punndcoder28
diff --git a/‎doc/whats_new/v1.4.rst
Lines changed: 6 additions & 0 deletions b/‎doc/whats_new/v1.4.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎sklearn/metrics/_classification.py
Lines changed: 16 additions & 29 deletions b/‎sklearn/metrics/_classification.py
Lines changed: 16 additions & 29 deletions
diff --git a/‎sklearn/metrics/tests/test_classification.py
Lines changed: 39 additions & 15 deletions b/‎sklearn/metrics/tests/test_classification.py
Lines changed: 39 additions & 15 deletions
@@ -544,6 +544,12 @@ Changelog
   :func:`sklearn.metrics.zero_one_loss` now support Array API compatible inputs.
   :pr:`27137` by :user:`Edoardo Abati <EdAbati>`.
 
+- |Fix| :func:`f1_score` now provides correct values when handling various
+  cases in which division by zero occurs by using a formulation that does not
+  depend on the precision and recall values.
+  :pr:`27577` by :user:`Omar Salman <OmarManzoor>` and
+  :user:`Guillaume Lemaitre <glemaitre>`.
+
 - |API| Deprecated `needs_threshold` and `needs_proba` from :func:`metrics.make_scorer`.
   These parameters will be removed in version 1.6. Instead, use `response_method` that
   accepts `"predict"`, `"predict_proba"` or `"decision_function"` or a list of such
 
@@ -1434,7 +1434,7 @@ def fbeta_score(
     >>> y_pred_empty = [0, 0, 0, 0, 0, 0]
     >>> fbeta_score(y_true, y_pred_empty,
     ...             average="macro", zero_division=np.nan, beta=0.5)
-    0.38...
+    0.12...
     """
 
     _, _, f, _ = precision_recall_fscore_support(
@@ -1482,20 +1482,8 @@ def _prf_divide(
         return result
 
     # build appropriate warning
-    # E.g. "Precision and F-score are ill-defined and being set to 0.0 in
-    # labels with no predicted samples. Use ``zero_division`` parameter to
-    # control this behavior."
-
-    if metric in warn_for and "f-score" in warn_for:
-        msg_start = "{0} and F-score are".format(metric.title())
-    elif metric in warn_for:
-        msg_start = "{0} is".format(metric.title())
-    elif "f-score" in warn_for:
-        msg_start = "F-score is"
-    else:
-        return result
-
-    _warn_prf(average, modifier, msg_start, len(result))
+    if metric in warn_for:
+        _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
 
     return result
 
@@ -1751,7 +1739,7 @@ def precision_recall_fscore_support(
      array([0., 0., 1.]), array([0. , 0. , 0.8]),
      array([2, 2, 2]))
     """
-    zero_division_value = _check_zero_division(zero_division)
+    _check_zero_division(zero_division)
     labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
 
     # Calculate tp_sum, pred_sum, true_sum ###
@@ -1784,26 +1772,25 @@ def precision_recall_fscore_support(
         tp_sum, true_sum, "recall", "true", average, warn_for, zero_division
     )
 
-    # warn for f-score only if zero_division is warn, it is in warn_for
-    # and BOTH prec and rec are ill-defined
-    if zero_division == "warn" and ("f-score",) == warn_for:
-        if (pred_sum[true_sum == 0] == 0).any():
-            _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
-
     if np.isposinf(beta):
         f_score = recall
     elif beta == 0:
         f_score = precision
     else:
         # The score is defined as:
         # score = (1 + beta**2) * precision * recall / (beta**2 * precision + recall)
-        # We set to `zero_division_value` if the denominator is 0 **or** if **both**
-        # precision and recall are ill-defined.
-        denom = beta2 * precision + recall
-        mask = np.isclose(denom, 0) | np.isclose(pred_sum + true_sum, 0)
-        denom[mask] = 1  # avoid division by 0
-        f_score = (1 + beta2) * precision * recall / denom
-        f_score[mask] = zero_division_value
+        # Therefore, we can express the score in terms of confusion matrix entries as:
+        # score = (1 + beta**2) * tp / ((1 + beta**2) * tp + beta**2 * fn + fp)
+        denom = beta2 * true_sum + pred_sum
+        f_score = _prf_divide(
+            (1 + beta2) * tp_sum,
+            denom,
+            "f-score",
+            "true nor predicted",
+            average,
+            warn_for,
+            zero_division,
+        )
 
     # Average the results
     if average == "weighted":
 
@@ -1809,7 +1809,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
 
     assert_array_almost_equal(p, [zero_division_expected, 1.0, 1.0, 0.0], 2)
     assert_array_almost_equal(r, [0.0, 0.5, 1.0, zero_division_expected], 2)
-    expected_f = 0 if not np.isnan(zero_division_expected) else np.nan
+    expected_f = 0
     assert_array_almost_equal(f, [expected_f, 1 / 1.5, 1, expected_f], 2)
     assert_array_almost_equal(s, [1, 2, 1, 0], 2)
 
@@ -1826,7 +1826,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
 
     assert_almost_equal(p, (2 + value_to_sum) / values_to_average)
     assert_almost_equal(r, (1.5 + value_to_sum) / values_to_average)
-    expected_f = (2 / 3 + 1) / (4 if not np.isnan(zero_division_expected) else 2)
+    expected_f = (2 / 3 + 1) / 4
     assert_almost_equal(f, expected_f)
     assert s is None
     assert_almost_equal(
@@ -1859,7 +1859,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
     )
     assert_almost_equal(p, 3 / 4 if zero_division_expected == 0 else 1.0)
     assert_almost_equal(r, 0.5)
-    values_to_average = 4 if not np.isnan(zero_division_expected) else 3
+    values_to_average = 4
     assert_almost_equal(f, (2 * 2 / 3 + 1) / values_to_average)
     assert s is None
     assert_almost_equal(
@@ -1877,12 +1877,12 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
     assert_almost_equal(r, 1 / 3)
     assert_almost_equal(f, 1 / 3)
     assert s is None
-    expected_result = {1: 0.666, np.nan: 1.0}
+    expected_result = 0.333
     assert_almost_equal(
         fbeta_score(
             y_true, y_pred, beta=2, average="samples", zero_division=zero_division
         ),
-        expected_result.get(zero_division, 0.333),
+        expected_result,
         2,
     )
 
@@ -2012,7 +2012,7 @@ def test_prf_warnings():
     f, w = precision_recall_fscore_support, UndefinedMetricWarning
     for average in [None, "weighted", "macro"]:
         msg = (
-            "Precision and F-score are ill-defined and "
+            "Precision is ill-defined and "
             "being set to 0.0 in labels with no predicted samples."
             " Use `zero_division` parameter to control"
             " this behavior."
@@ -2021,7 +2021,7 @@ def test_prf_warnings():
             f([0, 1, 2], [1, 1, 2], average=average)
 
         msg = (
-            "Recall and F-score are ill-defined and "
+            "Recall is ill-defined and "
             "being set to 0.0 in labels with no true samples."
             " Use `zero_division` parameter to control"
             " this behavior."
@@ -2031,7 +2031,7 @@ def test_prf_warnings():
 
     # average of per-sample scores
     msg = (
-        "Precision and F-score are ill-defined and "
+        "Precision is ill-defined and "
         "being set to 0.0 in samples with no predicted labels."
         " Use `zero_division` parameter to control"
         " this behavior."
@@ -2040,7 +2040,7 @@ def test_prf_warnings():
         f(np.array([[1, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), average="samples")
 
     msg = (
-        "Recall and F-score are ill-defined and "
+        "Recall is ill-defined and "
         "being set to 0.0 in samples with no true labels."
         " Use `zero_division` parameter to control"
         " this behavior."
@@ -2050,7 +2050,7 @@ def test_prf_warnings():
 
     # single score: micro-average
     msg = (
-        "Precision and F-score are ill-defined and "
+        "Precision is ill-defined and "
         "being set to 0.0 due to no predicted samples."
         " Use `zero_division` parameter to control"
         " this behavior."
@@ -2059,7 +2059,7 @@ def test_prf_warnings():
         f(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average="micro")
 
     msg = (
-        "Recall and F-score are ill-defined and "
+        "Recall is ill-defined and "
         "being set to 0.0 due to no true samples."
         " Use `zero_division` parameter to control"
         " this behavior."
@@ -2069,7 +2069,7 @@ def test_prf_warnings():
 
     # single positive label
     msg = (
-        "Precision and F-score are ill-defined and "
+        "Precision is ill-defined and "
         "being set to 0.0 due to no predicted samples."
         " Use `zero_division` parameter to control"
         " this behavior."
@@ -2078,7 +2078,7 @@ def test_prf_warnings():
         f([1, 1], [-1, -1], average="binary")
 
     msg = (
-        "Recall and F-score are ill-defined and "
+        "Recall is ill-defined and "
         "being set to 0.0 due to no true samples."
         " Use `zero_division` parameter to control"
         " this behavior."
@@ -2090,14 +2090,20 @@ def test_prf_warnings():
         warnings.simplefilter("always")
         precision_recall_fscore_support([0, 0], [0, 0], average="binary")
         msg = (
-            "Recall and F-score are ill-defined and "
+            "F-score is ill-defined and being set to 0.0 due to no true nor "
+            "predicted samples. Use `zero_division` parameter to control this"
+            " behavior."
+        )
+        assert str(record.pop().message) == msg
+        msg = (
+            "Recall is ill-defined and "
             "being set to 0.0 due to no true samples."
             " Use `zero_division` parameter to control"
             " this behavior."
         )
         assert str(record.pop().message) == msg
         msg = (
-            "Precision and F-score are ill-defined and "
+            "Precision is ill-defined and "
             "being set to 0.0 due to no predicted samples."
             " Use `zero_division` parameter to control"
             " this behavior."
@@ -2818,6 +2824,24 @@ def test_classification_metric_pos_label_types(metric, classes):
     assert not np.any(np.isnan(result))
 
 
+@pytest.mark.parametrize(
+    "y_true, y_pred, expected_score",
+    [
+        (np.array([0, 1]), np.array([1, 0]), 0.0),
+        (np.array([0, 1]), np.array([0, 1]), 1.0),
+        (np.array([0, 1]), np.array([0, 0]), 0.0),
+        (np.array([0, 0]), np.array([0, 0]), 1.0),
+    ],
+)
+def test_f1_for_small_binary_inputs_with_zero_division(y_true, y_pred, expected_score):
+    """Check the behaviour of `zero_division` for f1-score.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/26965
+    """
+    assert f1_score(y_true, y_pred, zero_division=1.0) == pytest.approx(expected_score)
+
+
 @pytest.mark.parametrize(
     "scoring",
     [