8000 FIX f1_score with zero_division=1 on binary classes by OmarManzoor · Pull Request #27165 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX f1_score with zero_division=1 on binary classes #27165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
8 changes: 8 additions & 0 deletions doc/whats_new/v1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,14 @@ Changelog
for CSR × CSR, Dense × CSR, and CSR × Dense datasets is now 1.5x faster.
:pr:`26765` by :user:`Meekail Zain <micky774>`

- |Fix| :func:`f1_score` now provides correct values when handling various
cases in which zero division occurs by using a formulation that does not
depend on the precision and recall values.
:pr:`27165` by :user:`Omar Salman <OmarManzoor>` and
:user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.preprocessing`
............................
- |Efficiency| Computing distances via :class:`metrics.DistanceMetric`
for CSR × CSR, Dense × CSR, and CSR × Dense now uses ~50% less memory,
and outputs distances in the same dtype as the provided data.
Expand Down
7 changes: 7 additions & 0 deletions fbeta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import numpy as np

from sklearn.metrics import fbeta_score

y_true = [0, 1, 2, 0, 1, 2]
y_pred_empty = [0, 0, 0, 0, 0, 0]
fbeta_score(y_true, y_pred_empty, average="macro", zero_division=np.nan, beta=0.5)
36 changes: 18 additions & 18 deletions sklearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1399,7 +1399,7 @@ def fbeta_score(
>>> y_pred_empty = [0, 0, 0, 0, 0, 0]
>>> fbeta_score(y_true, y_pred_empty,
... average="macro", zero_division=np.nan, beta=0.5)
0.38...
0.12...
"""

_, _, f, _ = precision_recall_fscore_support(
Expand Down Expand Up @@ -1451,12 +1451,13 @@ def _prf_divide(
# labels with no predicted samples. Use ``zero_division`` parameter to
# control this behavior."

if metric in warn_for and "f-score" in warn_for:
if metric in warn_for and "f-score" in warn_for and metric != "f-score":
msg_start = "{0} and F-score are".format(metric.title())
elif metric in warn_for:
msg_start = "{0} is".format(metric.title())
elif "f-score" in warn_for:
msg_start = "F-score is"
elif metric in warn_for:
msg_start = "{0} is".format(metric.title())

else:
return result

Expand Down Expand Up @@ -1711,7 +1712,7 @@ def precision_recall_fscore_support(
array([0., 0., 1.]), array([0. , 0. , 0.8]),
array([2, 2, 2]))
"""
zero_division_value = _check_zero_division(zero_division)
_check_zero_division(zero_division)
labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)

# Calculate tp_sum, pred_sum, true_sum ###
Expand Down Expand Up @@ -1744,26 +1745,25 @@ def precision_recall_fscore_support(
tp_sum, true_sum, "recall", "true", average, warn_for, zero_division
)

# warn for f-score only if zero_division is warn, it is in warn_for
# and BOTH prec and rec are ill-defined
if zero_division == "warn" and ("f-score",) == warn_for:
if (pred_sum[true_sum == 0] == 0).any():
_warn_prf(average, "true nor predicted", "F-score is", len(true_sum))

if np.isposinf(beta):
f_score = recall
elif beta == 0:
f_score = precision
else:
# The score is defined as:
# score = (1 + beta**2) * precision * recall / (beta**2 * precision + recall)
# We set to `zero_division_value` if the denominator is 0 **or** if **both**
# precision and recall are ill-defined.
denom = beta2 * precision + recall
mask = np.isclose(denom, 0) | np.isclose(pred_sum + true_sum, 0)
denom[mask] = 1 # avoid division by 0
f_score = (1 + beta2) * precision * recall / denom
f_score[mask] = zero_division_value
# Therefore, we can express the score in terms of confusion matrix entries as:
# score = (1 + beta**2) * tp / ((1 + beta**2) * tp + beta**2 * fn + fp)
denom = beta2 * true_sum + pred_sum
f_score = _prf_divide(
(1 + beta2) * tp_sum,
denom,
"f-score",
"true nor predicted",
average,
warn_for,
zero_division,
)

# Average the results
if average == "weighted":
8000 Expand Down
35 changes: 30 additions & 5 deletions sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1797,7 +1797,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(

assert_array_almost_equal(p, [zero_division_expected, 1.0, 1.0, 0.0], 2)
assert_array_almost_equal(r, [0.0, 0.5, 1.0, zero_division_expected], 2)
expected_f = 0 if not np.isnan(zero_division_expected) else np.nan
expected_f = 0
assert_array_almost_equal(f, [expected_f, 1 / 1.5, 1, expected_f], 2)
assert_array_almost_equal(s, [1, 2, 1, 0], 2)

Expand All @@ -1814,7 +1814,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(

assert_almost_equal(p, (2 + value_to_sum) / values_to_average)
assert_almost_equal(r, (1.5 + value_to_sum) / values_to_average)
expected_f = (2 / 3 + 1) / (4 if not np.isnan(zero_division_expected) else 2)
expected_f = (2 / 3 + 1) / 4
assert_almost_equal(f, expected_f)
assert s is None
assert_almost_equal(
Expand Down Expand Up @@ -1847,7 +1847,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
)
assert_almost_equal(p, 3 / 4 if zero_division_expected == 0 else 1.0)
assert_almost_equal(r, 0.5)
values_to_average = 4 if not np.isnan(zero_division_expected) else 3
values_to_average = 4
assert_almost_equal(f, (2 * 2 / 3 + 1) / values_to_average)
assert s is None
assert_almost_equal(
Expand All @@ -1865,12 +1865,12 @@ def test_precision_recall_f1_score_with_an_empty_prediction(
assert_almost_equal(r, 1 / 3)
assert_almost_equal(f, 1 / 3)
assert s is None
expected_result = {1: 0.666, np.nan: 1.0}
expected_result = 0.333
assert_almost_equal(
fbeta_score(
y_true, y_pred, beta=2, average="samples", zero_division=zero_division
),
expected_result.get(zero_division, 0.333),
expected_result,
2,
)

Expand Down Expand Up @@ -2077,6 +2077,12 @@ def test_prf_warnings():
with warnings.catch_warnings(record=True) as record:
warnings.simplefilter("always")
precision_recall_fscore_support([0, 0], [0, 0], average="binary")
msg = (
"F-score is ill-defined and being set to 0.0 due to no true nor "
"predicted samples. Use `zero_division` parameter to control this"
" behavior."
)
assert str(record.pop().message) == msg
msg = (
"Recall and F-score are ill-defined and "
"being set to 0.0 due to no true samples."
Expand Down Expand Up @@ -2804,3 +2810,22 @@ def test_classification_metric_pos_label_types(metric, classes):
y_pred = y_true.copy()
result = metric(y_true, y_pred, pos_label=pos_label)
assert not np.any(np.isnan(result))


def test_f1_for_small_binary_inputs_with_zero_division():
"""Non-regression test for gh-26965"""
y_true = np.array([0, 1])
y_pred = np.array([1, 0])
assert f1_score(y_true, y_pred, zero_division=1) == 0.0

y_true = np.array([0, 1])
y_pred = np.array([0, 1])
assert f1_score(y_true, y_pred, zero_division=1) == 1.0

y_true = np.array([0, 1])
y_pred = np.array([0, 0])
assert f1_score(y_true, y_pred, zero_division=1) == 0.0

y_true = np.array([0, 0])
y_pred = np.array([0, 0])
assert f1_score(y_true, y_pred, zero_division=1) == 1.0
0