From 5083b2c53e53e1d38aee4806a0ac60ffec7570ac Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Fri, 25 Aug 2023 17:11:22 +0500 Subject: [PATCH 1/6] FIX f1_score with zero_division=1 on binary classes --- sklearn/metrics/_classification.py | 10 +++++++--- sklearn/metrics/tests/test_classification.py | 21 +++++++++++++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 971ea5a25ffe3..dd26304358690 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1758,10 +1758,14 @@ def precision_recall_fscore_support( # score = (1 + beta**2) * precision * recall / (beta**2 * precision + recall) # We set to `zero_division_value` if the denominator is 0 **or** if **both** # precision and recall are ill-defined. + numer = (1 + beta2) * precision * recall denom = beta2 * precision + recall - mask = np.isclose(denom, 0) | np.isclose(pred_sum + true_sum, 0) - denom[mask] = 1 # avoid division by 0 - f_score = (1 + beta2) * precision * recall / denom + denom_mask = np.isclose(denom, 0) + mask = (np.not_equal(numer, 0) & np.isclose(denom, 0)) | np.isclose( + pred_sum + true_sum, 0 + ) + denom[denom_mask] = 1 # avoid division by 0 + f_score = numer / denom f_score[mask] = zero_division_value # Average the results diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index cfcb08a312443..5919d91bb080a 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1863,7 +1863,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction( assert_almost_equal(r, 1 / 3) assert_almost_equal(f, 1 / 3) assert s is None - expected_result = {1: 0.666, np.nan: 1.0} + expected_result = {1: 0.333, np.nan: 0.5} assert_almost_equal( fbeta_score( y_true, y_pred, beta=2, average="samples", zero_division=zero_division @@ -2802,3 +2802,22 @@ def test_classification_metric_pos_label_types(metric, classes): y_pred = y_true.copy() result = metric(y_true, y_pred, pos_label=pos_label) assert not np.any(np.isnan(result)) + + +def test_f1_for_small_binary_inputs_with_zero_division(): + """Non-regression test for gh-26965""" + y_true = np.array([0, 1]) + y_pred = np.array([1, 0]) + assert f1_score(y_true, y_pred, zero_division=1) == 0.0 + + y_true = np.array([0, 1]) + y_pred = np.array([0, 1]) + assert f1_score(y_true, y_pred, zero_division=1) == 1.0 + + y_true = np.array([0, 1]) + y_pred = np.array([0, 0]) + assert f1_score(y_true, y_pred, zero_division=1) == 0.0 + + y_true = np.array([0, 0]) + y_pred = np.array([0, 0]) + assert f1_score(y_true, y_pred, zero_division=1) == 1.0 From fdbfa71ab5fde711553166c6f8cb37665495254e Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Mon, 18 Sep 2023 12:26:01 +0500 Subject: [PATCH 2/6] Fix f1 score using a formulation which does not require precision and recall --- sklearn/metrics/_classification.py | 23 ++++++++++---------- sklearn/metrics/tests/test_classification.py | 10 ++++----- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 6876e1fa87e10..f1d3f4b67c0d5 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1711,7 +1711,7 @@ def precision_recall_fscore_support( array([0., 0., 1.]), array([0. , 0. , 0.8]), array([2, 2, 2])) """ - zero_division_value = _check_zero_division(zero_division) + _check_zero_division(zero_division) labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) # Calculate tp_sum, pred_sum, true_sum ### @@ -1757,17 +1757,18 @@ def precision_recall_fscore_support( else: # The score is defined as: # score = (1 + beta**2) * precision * recall / (beta**2 * precision + recall) - # We set to `zero_division_value` if the denominator is 0 **or** if **both** - # precision and recall are ill-defined. - numer = (1 + beta2) * precision * recall - denom = beta2 * precision + recall - denom_mask = np.isclose(denom, 0) - mask = (np.not_equal(numer, 0) & np.isclose(denom, 0)) | np.isclose( - pred_sum + true_sum, 0 + # Therefore, we can express the score in terms of confusion matrix entries as: + # score = (1 + beta**2) * tp / ((1 + beta**2) * tp + beta**2 * fn + fp) + denom = beta2 * true_sum + pred_sum + f_score = _prf_divide( + (1 + beta2) * tp_sum, + denom, + "fscore", + "true nor predicted", + average, + warn_for, + zero_division, ) - denom[denom_mask] = 1 # avoid division by 0 - f_score = numer / denom - f_score[mask] = zero_division_value # Average the results if average == "weighted": diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 0e33b64a86a64..256cffd9358d3 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1797,7 +1797,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction( assert_array_almost_equal(p, [zero_division_expected, 1.0, 1.0, 0.0], 2) assert_array_almost_equal(r, [0.0, 0.5, 1.0, zero_division_expected], 2) - expected_f = 0 if not np.isnan(zero_division_expected) else np.nan + expected_f = 0 assert_array_almost_equal(f, [expected_f, 1 / 1.5, 1, expected_f], 2) assert_array_almost_equal(s, [1, 2, 1, 0], 2) @@ -1814,7 +1814,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction( assert_almost_equal(p, (2 + value_to_sum) / values_to_average) assert_almost_equal(r, (1.5 + value_to_sum) / values_to_average) - expected_f = (2 / 3 + 1) / (4 if not np.isnan(zero_division_expected) else 2) + expected_f = (2 / 3 + 1) / 4 assert_almost_equal(f, expected_f) assert s is None assert_almost_equal( @@ -1847,7 +1847,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction( ) assert_almost_equal(p, 3 / 4 if zero_division_expected == 0 else 1.0) assert_almost_equal(r, 0.5) - values_to_average = 4 if not np.isnan(zero_division_expected) else 3 + values_to_average = 4 assert_almost_equal(f, (2 * 2 / 3 + 1) / values_to_average) assert s is None assert_almost_equal( @@ -1865,12 +1865,12 @@ def test_precision_recall_f1_score_with_an_empty_prediction( assert_almost_equal(r, 1 / 3) assert_almost_equal(f, 1 / 3) assert s is None - expected_result = {1: 0.333, np.nan: 0.5} + expected_result = 0.333 assert_almost_equal( fbeta_score( y_true, y_pred, beta=2, average="samples", zero_division=zero_division ), - expected_result.get(zero_division, 0.333), + expected_result, 2, ) From ae01c7ac034deba19d20ff8c1edc2ab9bec492ff Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Mon, 18 Sep 2023 12:32:37 +0500 Subject: [PATCH 3/6] Add changlog --- doc/whats_new/v1.4.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 8dbd867b0c9ba..5a356d7a339a4 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -232,6 +232,12 @@ Changelog support SciPy sparse arrays. :pr:`27239` by :user:`Yaroslav Korobko `. +- |Fix| :func:`f1_score` now provides correct values when handling various + cases in which zero division occurs by using a formulation that does not + depend on the precision and recall values. + :pr:`27165` by :user:`Omar Salman ` and + :user:`Guillaume Lemaitre `. + :mod:`sklearn.preprocessing` ............................ From 66785376108e2587ba0ccec90d7f554415d21080 Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Mon, 18 Sep 2023 12:49:01 +0500 Subject: [PATCH 4/6] Remove f1 warning that does not apply anymore --- sklearn/metrics/_classification.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index f1d3f4b67c0d5..6347d59dc93e8 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1744,12 +1744,6 @@ def precision_recall_fscore_support( tp_sum, true_sum, "recall", "true", average, warn_for, zero_division ) - # warn for f-score only if zero_division is warn, it is in warn_for - # and BOTH prec and rec are ill-defined - if zero_division == "warn" and ("f-score",) == warn_for: - if (pred_sum[true_sum == 0] == 0).any(): - _warn_prf(average, "true nor predicted", "F-score is", len(true_sum)) - if np.isposinf(beta): f_score = recall elif beta == 0: From b5d1f3e8e76f6a88fa535d8fe0a7e13c32136249 Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Mon, 18 Sep 2023 13:38:36 +0500 Subject: [PATCH 5/6] Fix warnings and tests that failed because of warning change --- fbeta.py | 7 +++++++ sklearn/metrics/_classification.py | 9 +++++---- sklearn/metrics/tests/test_classification.py | 6 ++++++ 3 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 fbeta.py diff --git a/fbeta.py b/fbeta.py new file mode 100644 index 0000000000000..77f539fd83c3a --- /dev/null +++ b/fbeta.py @@ -0,0 +1,7 @@ +import numpy as np + +from sklearn.metrics import fbeta_score + +y_true = [0, 1, 2, 0, 1, 2] +y_pred_empty = [0, 0, 0, 0, 0, 0] +fbeta_score(y_true, y_pred_empty, average="macro", zero_division=np.nan, beta=0.5) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 6347d59dc93e8..29473a69edca7 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1451,12 +1451,13 @@ def _prf_divide( # labels with no predicted samples. Use ``zero_division`` parameter to # control this behavior." - if metric in warn_for and "f-score" in warn_for: + if metric in warn_for and "f-score" in warn_for and metric != "f-score": msg_start = "{0} and F-score are".format(metric.title()) - elif metric in warn_for: - msg_start = "{0} is".format(metric.title()) elif "f-score" in warn_for: msg_start = "F-score is" + elif metric in warn_for: + msg_start = "{0} is".format(metric.title()) + else: return result @@ -1757,7 +1758,7 @@ def precision_recall_fscore_support( f_score = _prf_divide( (1 + beta2) * tp_sum, denom, - "fscore", + "f-score", "true nor predicted", average, warn_for, diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 256cffd9358d3..3570928ea3694 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2077,6 +2077,12 @@ def test_prf_warnings(): with warnings.catch_warnings(record=True) as record: warnings.simplefilter("always") precision_recall_fscore_support([0, 0], [0, 0], average="binary") + msg = ( + "F-score is ill-defined and being set to 0.0 due to no true nor " + "predicted samples. Use `zero_division` parameter to control this" + " behavior." + ) + assert str(record.pop().message) == msg msg = ( "Recall and F-score are ill-defined and " "being set to 0.0 due to no true samples." From 9b671070142cb5213d51e066e32b858e3cdcdfae Mon Sep 17 00:00:00 2001 From: Omar Salman Date: Mon, 18 Sep 2023 13:41:45 +0500 Subject: [PATCH 6/6] Correct value in doctest --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 29473a69edca7..87a0f182209f9 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1399,7 +1399,7 @@ def fbeta_score( >>> y_pred_empty = [0, 0, 0, 0, 0, 0] >>> fbeta_score(y_true, y_pred_empty, ... average="macro", zero_division=np.nan, beta=0.5) - 0.38... + 0.12... """ _, _, f, _ = precision_recall_fscore_support(