From 5083b2c53e53e1d38aee4806a0ac60ffec7570ac Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 25 Aug 2023 17:11:22 +0500
Subject: [PATCH 1/6] FIX f1_score with zero_division=1 on binary classes

---
 sklearn/metrics/_classification.py           | 10 +++++++---
 sklearn/metrics/tests/test_classification.py | 21 +++++++++++++++++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 971ea5a25ffe3..dd26304358690 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1758,10 +1758,14 @@ def precision_recall_fscore_support(
         # score = (1 + beta**2) * precision * recall / (beta**2 * precision + recall)
         # We set to `zero_division_value` if the denominator is 0 **or** if **both**
         # precision and recall are ill-defined.
+        numer = (1 + beta2) * precision * recall
         denom = beta2 * precision + recall
-        mask = np.isclose(denom, 0) | np.isclose(pred_sum + true_sum, 0)
-        denom[mask] = 1  # avoid division by 0
-        f_score = (1 + beta2) * precision * recall / denom
+        denom_mask = np.isclose(denom, 0)
+        mask = (np.not_equal(numer, 0) & np.isclose(denom, 0)) | np.isclose(
+            pred_sum + true_sum, 0
+        )
+        denom[denom_mask] = 1  # avoid division by 0
+        f_score = numer / denom
         f_score[mask] = zero_division_value
 
     # Average the results
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index cfcb08a312443..5919d91bb080a 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1863,7 +1863,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
     assert_almost_equal(r, 1 / 3)
     assert_almost_equal(f, 1 / 3)
     assert s is None
-    expected_result = {1: 0.666, np.nan: 1.0}
+    expected_result = {1: 0.333, np.nan: 0.5}
     assert_almost_equal(
         fbeta_score(
             y_true, y_pred, beta=2, average="samples", zero_division=zero_division
@@ -2802,3 +2802,22 @@ def test_classification_metric_pos_label_types(metric, classes):
         y_pred = y_true.copy()
     result = metric(y_true, y_pred, pos_label=pos_label)
     assert not np.any(np.isnan(result))
+
+
+def test_f1_for_small_binary_inputs_with_zero_division():
+    """Non-regression test for gh-26965"""
+    y_true = np.array([0, 1])
+    y_pred = np.array([1, 0])
+    assert f1_score(y_true, y_pred, zero_division=1) == 0.0
+
+    y_true = np.array([0, 1])
+    y_pred = np.array([0, 1])
+    assert f1_score(y_true, y_pred, zero_division=1) == 1.0
+
+    y_true = np.array([0, 1])
+    y_pred = np.array([0, 0])
+    assert f1_score(y_true, y_pred, zero_division=1) == 0.0
+
+    y_true = np.array([0, 0])
+    y_pred = np.array([0, 0])
+    assert f1_score(y_true, y_pred, zero_division=1) == 1.0

From fdbfa71ab5fde711553166c6f8cb37665495254e Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 18 Sep 2023 12:26:01 +0500
Subject: [PATCH 2/6] Fix f1 score using a formulation which does not require
 precision and recall

---
 sklearn/metrics/_classification.py           | 23 ++++++++++----------
 sklearn/metrics/tests/test_classification.py | 10 ++++-----
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 6876e1fa87e10..f1d3f4b67c0d5 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1711,7 +1711,7 @@ def precision_recall_fscore_support(
      array([0., 0., 1.]), array([0. , 0. , 0.8]),
      array([2, 2, 2]))
     """
-    zero_division_value = _check_zero_division(zero_division)
+    _check_zero_division(zero_division)
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
     # Calculate tp_sum, pred_sum, true_sum ###
@@ -1757,17 +1757,18 @@ def precision_recall_fscore_support(
     else:
         # The score is defined as:
         # score = (1 + beta**2) * precision * recall / (beta**2 * precision + recall)
-        # We set to `zero_division_value` if the denominator is 0 **or** if **both**
-        # precision and recall are ill-defined.
-        numer = (1 + beta2) * precision * recall
-        denom = beta2 * precision + recall
-        denom_mask = np.isclose(denom, 0)
-        mask = (np.not_equal(numer, 0) & np.isclose(denom, 0)) | np.isclose(
-            pred_sum + true_sum, 0
+        # Therefore, we can express the score in terms of confusion matrix entries as:
+        # score = (1 + beta**2) * tp / ((1 + beta**2) * tp + beta**2 * fn + fp)
+        denom = beta2 * true_sum + pred_sum
+        f_score = _prf_divide(
+            (1 + beta2) * tp_sum,
+            denom,
+            "fscore",
+            "true nor predicted",
+            average,
+            warn_for,
+            zero_division,
         )
-        denom[denom_mask] = 1  # avoid division by 0
-        f_score = numer / denom
-        f_score[mask] = zero_division_value
 
     # Average the results
     if average == "weighted":
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 0e33b64a86a64..256cffd9358d3 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1797,7 +1797,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
 
     assert_array_almost_equal(p, [zero_division_expected, 1.0, 1.0, 0.0], 2)
     assert_array_almost_equal(r, [0.0, 0.5, 1.0, zero_division_expected], 2)
-    expected_f = 0 if not np.isnan(zero_division_expected) else np.nan
+    expected_f = 0
     assert_array_almost_equal(f, [expected_f, 1 / 1.5, 1, expected_f], 2)
     assert_array_almost_equal(s, [1, 2, 1, 0], 2)
 
@@ -1814,7 +1814,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
 
     assert_almost_equal(p, (2 + value_to_sum) / values_to_average)
     assert_almost_equal(r, (1.5 + value_to_sum) / values_to_average)
-    expected_f = (2 / 3 + 1) / (4 if not np.isnan(zero_division_expected) else 2)
+    expected_f = (2 / 3 + 1) / 4
     assert_almost_equal(f, expected_f)
     assert s is None
     assert_almost_equal(
@@ -1847,7 +1847,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
     )
     assert_almost_equal(p, 3 / 4 if zero_division_expected == 0 else 1.0)
     assert_almost_equal(r, 0.5)
-    values_to_average = 4 if not np.isnan(zero_division_expected) else 3
+    values_to_average = 4
     assert_almost_equal(f, (2 * 2 / 3 + 1) / values_to_average)
     assert s is None
     assert_almost_equal(
@@ -1865,12 +1865,12 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
     assert_almost_equal(r, 1 / 3)
     assert_almost_equal(f, 1 / 3)
     assert s is None
-    expected_result = {1: 0.333, np.nan: 0.5}
+    expected_result = 0.333
     assert_almost_equal(
         fbeta_score(
             y_true, y_pred, beta=2, average="samples", zero_division=zero_division
         ),
-        expected_result.get(zero_division, 0.333),
+        expected_result,
         2,
     )
 

From ae01c7ac034deba19d20ff8c1edc2ab9bec492ff Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 18 Sep 2023 12:32:37 +0500
Subject: [PATCH 3/6] Add changlog

---
 doc/whats_new/v1.4.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 8dbd867b0c9ba..5a356d7a339a4 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -232,6 +232,12 @@ Changelog
   support SciPy sparse arrays.
   :pr:`27239` by :user:`Yaroslav Korobko <Tialo>`.
 
+- |Fix| :func:`f1_score` now provides correct values when handling various
+  cases in which zero division occurs by using a formulation that does not
+  depend on the precision and recall values.
+  :pr:`27165` by :user:`Omar Salman <OmarManzoor>` and
+  :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.preprocessing`
 ............................
 

From 66785376108e2587ba0ccec90d7f554415d21080 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 18 Sep 2023 12:49:01 +0500
Subject: [PATCH 4/6] Remove f1 warning that does not apply anymore

---
 sklearn/metrics/_classification.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index f1d3f4b67c0d5..6347d59dc93e8 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1744,12 +1744,6 @@ def precision_recall_fscore_support(
         tp_sum, true_sum, "recall", "true", average, warn_for, zero_division
     )
 
-    # warn for f-score only if zero_division is warn, it is in warn_for
-    # and BOTH prec and rec are ill-defined
-    if zero_division == "warn" and ("f-score",) == warn_for:
-        if (pred_sum[true_sum == 0] == 0).any():
-            _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
-
     if np.isposinf(beta):
         f_score = recall
     elif beta == 0:

From b5d1f3e8e76f6a88fa535d8fe0a7e13c32136249 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 18 Sep 2023 13:38:36 +0500
Subject: [PATCH 5/6] Fix warnings and tests that failed because of warning
 change

---
 fbeta.py                                     | 7 +++++++
 sklearn/metrics/_classification.py           | 9 +++++----
 sklearn/metrics/tests/test_classification.py | 6 ++++++
 3 files changed, 18 insertions(+), 4 deletions(-)
 create mode 100644 fbeta.py

diff --git a/fbeta.py b/fbeta.py
new file mode 100644
index 0000000000000..77f539fd83c3a
--- /dev/null
+++ b/fbeta.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+from sklearn.metrics import fbeta_score
+
+y_true = [0, 1, 2, 0, 1, 2]
+y_pred_empty = [0, 0, 0, 0, 0, 0]
+fbeta_score(y_true, y_pred_empty, average="macro", zero_division=np.nan, beta=0.5)
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 6347d59dc93e8..29473a69edca7 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1451,12 +1451,13 @@ def _prf_divide(
     # labels with no predicted samples. Use ``zero_division`` parameter to
     # control this behavior."
 
-    if metric in warn_for and "f-score" in warn_for:
+    if metric in warn_for and "f-score" in warn_for and metric != "f-score":
         msg_start = "{0} and F-score are".format(metric.title())
-    elif metric in warn_for:
-        msg_start = "{0} is".format(metric.title())
     elif "f-score" in warn_for:
         msg_start = "F-score is"
+    elif metric in warn_for:
+        msg_start = "{0} is".format(metric.title())
+
     else:
         return result
 
@@ -1757,7 +1758,7 @@ def precision_recall_fscore_support(
         f_score = _prf_divide(
             (1 + beta2) * tp_sum,
             denom,
-            "fscore",
+            "f-score",
             "true nor predicted",
             average,
             warn_for,
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 256cffd9358d3..3570928ea3694 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2077,6 +2077,12 @@ def test_prf_warnings():
     with warnings.catch_warnings(record=True) as record:
         warnings.simplefilter("always")
         precision_recall_fscore_support([0, 0], [0, 0], average="binary")
+        msg = (
+            "F-score is ill-defined and being set to 0.0 due to no true nor "
+            "predicted samples. Use `zero_division` parameter to control this"
+            " behavior."
+        )
+        assert str(record.pop().message) == msg
         msg = (
             "Recall and F-score are ill-defined and "
             "being set to 0.0 due to no true samples."

From 9b671070142cb5213d51e066e32b858e3cdcdfae Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 18 Sep 2023 13:41:45 +0500
Subject: [PATCH 6/6] Correct value in doctest

---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 29473a69edca7..87a0f182209f9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1399,7 +1399,7 @@ def fbeta_score(
     >>> y_pred_empty = [0, 0, 0, 0, 0, 0]
     >>> fbeta_score(y_true, y_pred_empty,
     ...             average="macro", zero_division=np.nan, beta=0.5)
-    0.38...
+    0.12...
     """
 
     _, _, f, _ = precision_recall_fscore_support(