From 1a3203f6245183cedd2d37a4912d7cb081e3b6ae Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 7 Jun 2024 12:01:55 +0200 Subject: [PATCH 01/14] zero_division=nans for cohen_kappa_score --- doc/modules/model_evaluation.rst | 8 ++--- sklearn/metrics/_classification.py | 31 ++++++++++++++++++-- sklearn/metrics/tests/test_classification.py | 6 +++- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index eff6684458deb..080ed0c63a58c 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -605,7 +605,7 @@ The function :func:`cohen_kappa_score` computes `Cohen's kappa This measure is intended to compare labelings by different human annotators, not a classifier versus a ground truth. -The kappa score (see docstring) is a number between -1 and 1. +The kappa score is a number between -1 and 1. Scores above .8 are generally considered good agreement; zero or lower means no agreement (practically random labels). @@ -614,9 +614,9 @@ but not for multilabel problems (except by manually computing a per-label score) and not for more than two annotators. >>> from sklearn.metrics import cohen_kappa_score - >>> y_true = [2, 0, 2, 2, 0, 1] - >>> y_pred = [0, 0, 2, 2, 0, 2] - >>> cohen_kappa_score(y_true, y_pred) + >>> labeling1 = [2, 0, 2, 2, 0, 1] + >>> labeling2 = [0, 0, 2, 2, 0, 2] + >>> cohen_kappa_score(labeling1, labeling2) 0.4285714285714286 .. _confusion_matrix: diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 1fb4c1d694be0..c4feebf69cc75 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -629,10 +629,17 @@ def multilabel_confusion_matrix( "labels": ["array-like", None], "weights": [StrOptions({"linear", "quadratic"}), None], "sample_weight": ["array-like", None], + "zero_division": [ + StrOptions({"warn"}), + Options(int, {0, 1}), + Options(float, {np.nan}), + ], }, prefer_skip_nested_validation=True, ) -def cohen_kappa_score(y1, y2, *, labels=None, weights=None, sample_weight=None): +def cohen_kappa_score( + y1, y2, *, labels=None, weights=None, sample_weight=None, zero_division="warn" +): r"""Compute Cohen's kappa: a statistic that measures inter-annotator agreement. This function computes Cohen's kappa [1]_, a score that expresses the level @@ -665,12 +672,21 @@ class labels [2]_. ``y1`` or ``y2`` are used. weights : {'linear', 'quadratic'}, default=None - Weighting type to calculate the score. `None` means no weighted; - "linear" means linear weighted; "quadratic" means quadratic weighted. + Weighting type to calculate the score. `None` means not weighted; + "linear" means linear weighting; "quadratic" means quadratic weighting. sample_weight : array-like of shape (n_samples,), default=None Sample weights. + zero_division : {"warn", 0.0, 1.0, np.nan}, default="warn" + Sets the return value when there is a zero division, e.g. when + `y1=y2={np.ones, np.zeros}`. + + - If set to "warn", this acts like 0.0, but a warning is also raised. + - If set to `np.nan`, such values will be excluded from the average. + + .. versionadded:: 1.6 + Returns ------- kappa : float @@ -713,6 +729,15 @@ class labels [2]_. else: w_mat = (w_mat - w_mat.T) ** 2 + if np.sum(w_mat * expected) == 0: + if zero_division == "warn": + msg = ( + "`cohen_kappa_score()` is ill-defined and being set to 0.0. Use " + "`zero_division` to control this behaviour." + ) + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) + return _check_zero_division(zero_division) + k = np.sum(w_mat * confusion) / np.sum(w_mat * expected) return 1 - k diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index b87e76ba2fb42..a26185820f281 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -810,6 +810,7 @@ def test_matthews_corrcoef_nan(): partial(fbeta_score, beta=1), precision_score, recall_score, + cohen_kappa_score, ], ) def test_zero_division_nan_no_warning(metric, y_true, y_pred, zero_division): @@ -818,7 +819,9 @@ def test_zero_division_nan_no_warning(metric, y_true, y_pred, zero_division): """ with warnings.catch_warnings(): warnings.simplefilter("error") - result = metric(y_true, y_pred, zero_division=zero_division) + # Ignore warnings related to wrong shape of confusion_matrix: + with ignore_warnings(category=UserWarning): + result = metric(y_true, y_pred, zero_division=zero_division) if np.isnan(zero_division): assert np.isnan(result) @@ -834,6 +837,7 @@ def test_zero_division_nan_no_warning(metric, y_true, y_pred, zero_division): partial(fbeta_score, beta=1), precision_score, recall_score, + cohen_kappa_score, ], ) def test_zero_division_nan_warning(metric, y_true, y_pred): From a77375a2e400cdc79bfdb0adbb06ba57687152fd Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 7 Jun 2024 12:09:35 +0200 Subject: [PATCH 02/14] changelog --- doc/whats_new/v1.6.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 3fd07fd51578e..85735110ced20 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -103,6 +103,10 @@ Changelog whether to raise an exception if a subset of the scorers in multimetric scoring fails or to return an error code. :pr:`28992` by :user:`Stefanie Senger `. +- |Enhancement| Adds `zero_division` to :func:`cohen_kappa_score`. When there is a zero + division, the metric is undefined and this value is returned. :pr:`29210` by + :user:`Marc Torrellas Socastro `. + :mod:`sklearn.model_selection` .............................. From 2e0dd7a2628de5efe9c9dacb2b673e451d080b96 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 7 Jun 2024 15:13:18 +0200 Subject: [PATCH 03/14] add comment --- sklearn/metrics/_classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index c4feebf69cc75..fdb2a5482ccb6 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -729,6 +729,7 @@ class labels [2]_. else: w_mat = (w_mat - w_mat.T) ** 2 + # Handle `zero_division` for an upcoming invalid divide: if np.sum(w_mat * expected) == 0: if zero_division == "warn": msg = ( From 8de166d0104a6714af76e74e74fac89af829158f Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 10 Jun 2024 11:10:41 +0200 Subject: [PATCH 04/14] little improvements --- sklearn/metrics/_classification.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index fdb2a5482ccb6..39ec2362b207a 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -631,8 +631,7 @@ def multilabel_confusion_matrix( "sample_weight": ["array-like", None], "zero_division": [ StrOptions({"warn"}), - Options(int, {0, 1}), - Options(float, {np.nan}), + Options(Real, {0.0, 1.0, np.nan}), ], }, prefer_skip_nested_validation=True, @@ -682,7 +681,7 @@ class labels [2]_. Sets the return value when there is a zero division, e.g. when `y1=y2={np.ones, np.zeros}`. - - If set to "warn", this acts like 0.0, but a warning is also raised. + - If set to "warn", this acts like a 0.0 input, but a warning is also raised. - If set to `np.nan`, such values will be excluded from the average. .. versionadded:: 1.6 @@ -729,7 +728,8 @@ class labels [2]_. else: w_mat = (w_mat - w_mat.T) ** 2 - # Handle `zero_division` for an upcoming invalid divide: + # Handle `zero_division` case if either "RuntimeWarning: invalid value encountered + # in scalar divide" or np.isfinite(k) == False: if np.sum(w_mat * expected) == 0: if zero_division == "warn": msg = ( From 6d3633ca0c948dd2ec41f9cd857f5e9a9a0494f5 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 10 Jun 2024 12:17:17 +0200 Subject: [PATCH 05/14] more little improvements --- sklearn/metrics/_classification.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 39ec2362b207a..7b3a52deeebbf 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -62,9 +62,11 @@ def _check_zero_division(zero_division): if isinstance(zero_division, str) and zero_division == "warn": - return np.float64(0.0) + return np.float64(0.0) # question to reviewers: could this be np.float16(0.0)? elif isinstance(zero_division, (int, float)) and zero_division in [0, 1]: - return np.float64(zero_division) + return np.float64( + zero_division + ) # question to reviewers: could this be np.float16(zero_division)? else: # np.isnan(zero_division) return np.nan @@ -679,10 +681,8 @@ class labels [2]_. zero_division : {"warn", 0.0, 1.0, np.nan}, default="warn" Sets the return value when there is a zero division, e.g. when - `y1=y2={np.ones, np.zeros}`. - - - If set to "warn", this acts like a 0.0 input, but a warning is also raised. - - If set to `np.nan`, such values will be excluded from the average. + `y1=y2={np.ones, np.zeros}`. If set to "warn", returns 0.0 input, but a + warning is also raised. .. versionadded:: 1.6 @@ -729,12 +729,12 @@ class labels [2]_. w_mat = (w_mat - w_mat.T) ** 2 # Handle `zero_division` case if either "RuntimeWarning: invalid value encountered - # in scalar divide" or np.isfinite(k) == False: + # in scalar divide" would be raised or np.isfinite(k) == False: if np.sum(w_mat * expected) == 0: if zero_division == "warn": msg = ( - "`cohen_kappa_score()` is ill-defined and being set to 0.0. Use " - "`zero_division` to control this behaviour." + "`cohen_kappa_score()` is ill-defined and is set to 0.0. Use the " + "`zero_division` param to control this behavior." ) warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return _check_zero_division(zero_division) @@ -1551,7 +1551,7 @@ def _warn_prf(average, modifier, msg_start, result_size): axis0, axis1 = axis1, axis0 msg = ( "{0} ill-defined and being set to 0.0 {{0}} " - "no {1} {2}s. Use `zero_division` parameter to control" + "no {1} {2}s. Use the `zero_division` parameter to control" " this behavior.".format(msg_start, modifier, axis0) ) if result_size == 1: From 4fcf177207b80f9a51060af5346a0388d8231976 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 10 Jun 2024 12:34:42 +0200 Subject: [PATCH 06/14] revert error message modifiaction for now --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 7b3a52deeebbf..c20d36fbefd26 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1551,7 +1551,7 @@ def _warn_prf(average, modifier, msg_start, result_size): axis0, axis1 = axis1, axis0 msg = ( "{0} ill-defined and being set to 0.0 {{0}} " - "no {1} {2}s. Use the `zero_division` parameter to control" + "no {1} {2}s. Use `zero_division` parameter to control" " this behavior.".format(msg_start, modifier, axis0) ) if result_size == 1: From 5ae0e88a9b3eaffa245187ce66b8fb7132b79918 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:09:40 +0200 Subject: [PATCH 07/14] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 7 +++---- sklearn/metrics/tests/test_classification.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index c20d36fbefd26..59bcebf4954fd 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -728,9 +728,8 @@ class labels [2]_. else: w_mat = (w_mat - w_mat.T) ** 2 - # Handle `zero_division` case if either "RuntimeWarning: invalid value encountered - # in scalar divide" would be raised or np.isfinite(k) == False: - if np.sum(w_mat * expected) == 0: + denominator = np.sum(w_mat * expected) + if np.isclose(denominator, 0): if zero_division == "warn": msg = ( "`cohen_kappa_score()` is ill-defined and is set to 0.0. Use the " @@ -739,7 +738,7 @@ class labels [2]_. warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return _check_zero_division(zero_division) - k = np.sum(w_mat * confusion) / np.sum(w_mat * expected) + k = np.sum(w_mat * confusion) / denominator return 1 - k diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index a26185820f281..8838c362c95c1 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -810,7 +810,7 @@ def test_matthews_corrcoef_nan(): partial(fbeta_score, beta=1), precision_score, recall_score, - cohen_kappa_score, + partial(cohen_kappa_score, labels=[0, 1]), ], ) def test_zero_division_nan_no_warning(metric, y_true, y_pred, zero_division): From bd5720307c2cc80e33805e8e22dfbfb575dbde9e Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 11 Jun 2024 15:29:41 +0200 Subject: [PATCH 08/14] add helper function like suggested --- sklearn/metrics/_classification.py | 38 +++++++++++++------- sklearn/metrics/tests/test_classification.py | 2 +- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 59bcebf4954fd..77c9ffef8a121 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -62,11 +62,9 @@ def _check_zero_division(zero_division): if isinstance(zero_division, str) and zero_division == "warn": - return np.float64(0.0) # question to reviewers: could this be np.float16(0.0)? + return np.float64(0.0) elif isinstance(zero_division, (int, float)) and zero_division in [0, 1]: - return np.float64( - zero_division - ) # question to reviewers: could this be np.float16(zero_division)? + return np.float64(zero_division) else: # np.isnan(zero_division) return np.nan @@ -715,7 +713,15 @@ class labels [2]_. n_classes = confusion.shape[0] sum0 = np.sum(confusion, axis=0) sum1 = np.sum(confusion, axis=1) - expected = np.outer(sum0, sum1) / np.sum(sum0) + + numerator = np.outer(sum0, sum1) + denominator = np.sum(sum0) + expected = _metric_handle_division( + numerator, denominator, "cohen_kappa_score()", zero_division + ) + + if np.isclose(denominator, 0): + return _check_zero_division(zero_division) if weights is None: w_mat = np.ones([n_classes, n_classes], dtype=int) @@ -728,18 +734,26 @@ class labels [2]_. else: w_mat = (w_mat - w_mat.T) ** 2 + numerator = np.sum(w_mat * confusion) denominator = np.sum(w_mat * expected) + score = _metric_handle_division( + numerator, denominator, "cohen_kappa_score()", zero_division + ) + + if np.isclose(denominator, 0): + return _check_zero_division(zero_division) + else: + return 1 - score + + +def _metric_handle_division(numerator, denominator, metric, zero_division): if np.isclose(denominator, 0): if zero_division == "warn": - msg = ( - "`cohen_kappa_score()` is ill-defined and is set to 0.0. Use the " - "`zero_division` param to control this behavior." - ) + msg = f"{metric} is ill-defined and set to 0.0. Use the `zero_division` " + "param to control this behavior." warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return _check_zero_division(zero_division) - - k = np.sum(w_mat * confusion) / denominator - return 1 - k + return numerator / denominator @validate_params( diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 8838c362c95c1..4569b18b8d707 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -810,7 +810,7 @@ def test_matthews_corrcoef_nan(): partial(fbeta_score, beta=1), precision_score, recall_score, - partial(cohen_kappa_score, labels=[0, 1]), + partial(cohen_kappa_score, labels=[0, 1]), ], ) def test_zero_division_nan_no_warning(metric, y_true, y_pred, zero_division): From 0fb0173017ea7abd74d08bbf258ec891e8d3970c Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:31:00 +0200 Subject: [PATCH 09/14] Update doc/whats_new/v1.6.rst Co-authored-by: Guillaume Lemaitre --- doc/whats_new/v1.6.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 85735110ced20..fd512da69ec82 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -104,8 +104,9 @@ Changelog or to return an error code. :pr:`28992` by :user:`Stefanie Senger `. - |Enhancement| Adds `zero_division` to :func:`cohen_kappa_score`. When there is a zero - division, the metric is undefined and this value is returned. :pr:`29210` by - :user:`Marc Torrellas Socastro `. + division, the metric is undefined and this value is returned. + :pr:`29210` by :user:`Marc Torrellas Socastro ` and + :user:`Stefanie Senger `. :mod:`sklearn.model_selection` .............................. From 2b6f669ee4e5712ee6711904486aa44128afd316 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 13 Jun 2024 10:38:33 +0200 Subject: [PATCH 10/14] changes after review --- sklearn/metrics/_classification.py | 7 ++++--- sklearn/metrics/tests/test_classification.py | 4 +--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 77c9ffef8a121..fa31ebf914ae8 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -678,9 +678,10 @@ class labels [2]_. Sample weights. zero_division : {"warn", 0.0, 1.0, np.nan}, default="warn" - Sets the return value when there is a zero division, e.g. when - `y1=y2={np.ones, np.zeros}`. If set to "warn", returns 0.0 input, but a - warning is also raised. + Sets the return value when there is a zero division. This is the case when both + labelings `y1` and `y2` both exclusively contain the class 0 (e. g. `[0,0,0,0]`) + (or if both are empty). If set to "warn", returns `0.0`, but a warning is also + raised. .. versionadded:: 1.6 diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 4569b18b8d707..aa612f73ef5c7 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -819,9 +819,7 @@ def test_zero_division_nan_no_warning(metric, y_true, y_pred, zero_division): """ with warnings.catch_warnings(): warnings.simplefilter("error") - # Ignore warnings related to wrong shape of confusion_matrix: - with ignore_warnings(category=UserWarning): - result = metric(y_true, y_pred, zero_division=zero_division) + result = metric(y_true, y_pred, zero_division=zero_division) if np.isnan(zero_division): assert np.isnan(result) From 2b064d82ceabd1457e6b34e13e1a37cd1be2cd01 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Thu, 13 Jun 2024 10:40:06 +0200 Subject: [PATCH 11/14] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 42 ++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 77c9ffef8a121..096b2bbd295a5 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -716,12 +716,12 @@ class labels [2]_. numerator = np.outer(sum0, sum1) denominator = np.sum(sum0) - expected = _metric_handle_division( + expected, is_zero_division = _metric_handle_division( numerator, denominator, "cohen_kappa_score()", zero_division ) - if np.isclose(denominator, 0): - return _check_zero_division(zero_division) + if is_zero_division: + return expected if weights is None: w_mat = np.ones([n_classes, n_classes], dtype=int) @@ -736,24 +736,44 @@ class labels [2]_. numerator = np.sum(w_mat * confusion) denominator = np.sum(w_mat * expected) - score = _metric_handle_division( + score, is_zero_division = _metric_handle_division( numerator, denominator, "cohen_kappa_score()", zero_division ) - if np.isclose(denominator, 0): - return _check_zero_division(zero_division) - else: - return 1 - score + if is_zero_division + return score + return 1 - score + +def _metric_handle_division(*, numerator, denominator, metric, zero_division): + """Helper to handle zero-division. -def _metric_handle_division(numerator, denominator, metric, zero_division): + Parameters + ---------- + numerator : numbers.Real + The numerator of the division. + denominator : numbers.Real + The denominator of the division. + metric : str + Name of the caller metric function. + zero_division : {0.0, 1.0, "warn"} + The strategy to use when encountering 0-denominator. + + Returns + ------- + result : numbers.Real + The resulting of the division + is_zero_division : bool + Whether or not we encountered a zero division. This value could be + required to early return `result` in the "caller" function. + """ if np.isclose(denominator, 0): if zero_division == "warn": msg = f"{metric} is ill-defined and set to 0.0. Use the `zero_division` " "param to control this behavior." warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) - return _check_zero_division(zero_division) - return numerator / denominator + return _check_zero_division(zero_division), True + return numerator / denominator, False @validate_params( From d562cf35efd77d34198be1585feaed9b2e5a4b27 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Fri, 14 Jun 2024 13:30:45 +0200 Subject: [PATCH 12/14] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index a36c51c4fb48e..043f0e22e8fdc 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -622,7 +622,7 @@ def multilabel_confusion_matrix( return np.array([tn, fp, fn, tp]).T.reshape(-1, 2, 2) -def _metric_handle_division(numerator, denominator, metric, zero_division): +def _metric_handle_division(*, numerator, denominator, metric, zero_division): """Helper to handle zero-division. Parameters @@ -710,7 +710,7 @@ class labels [2]_. zero_division : {"warn", 0.0, 1.0, np.nan}, default="warn" Sets the return value when there is a zero division. This is the case when both - labelings `y1` and `y2` both exclusively contain the 0 class (e. g. `[0,0,0,0]`) + labelings `y1` and `y2` both exclusively contain the 0 class (e. g. `[0, 0, 0, 0]`) (or if both are empty). If set to "warn", returns `0.0`, but a warning is also raised. @@ -749,7 +749,10 @@ class labels [2]_. numerator = np.outer(sum0, sum1) denominator = np.sum(sum0) expected, is_zero_division = _metric_handle_division( - numerator, denominator, "cohen_kappa_score()", zero_division + numerator=numerator, + denominator=denominator, + metric="cohen_kappa_score()", + zero_division=zero_division, ) if is_zero_division: @@ -769,7 +772,10 @@ class labels [2]_. numerator = np.sum(w_mat * confusion) denominator = np.sum(w_mat * expected) score, is_zero_division = _metric_handle_division( - numerator, denominator, "cohen_kappa_score()", zero_division + numerator=numerator, + denominator=denominator, + metric="cohen_kappa_score()", + zero_division=zero_division, ) if is_zero_division: From 4989216c72911c8b31ae9e8f55436db3f7e4cc76 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 14 Jun 2024 13:36:19 +0200 Subject: [PATCH 13/14] fix linting --- sklearn/metrics/_classification.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 043f0e22e8fdc..789c08b79b43d 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -710,9 +710,9 @@ class labels [2]_. zero_division : {"warn", 0.0, 1.0, np.nan}, default="warn" Sets the return value when there is a zero division. This is the case when both - labelings `y1` and `y2` both exclusively contain the 0 class (e. g. `[0, 0, 0, 0]`) - (or if both are empty). If set to "warn", returns `0.0`, but a warning is also - raised. + labelings `y1` and `y2` both exclusively contain the 0 class (e. g. + `[0, 0, 0, 0]`) (or if both are empty). If set to "warn", returns `0.0`, but a + warning is also raised. .. versionadded:: 1.6 From 4705d57c409f4af3867f460f416dae4d8cc84d99 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:15:31 +0200 Subject: [PATCH 14/14] Update doc/whats_new/v1.6.rst Co-authored-by: Adrin Jalali --- doc/whats_new/v1.6.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 7d7bb9f4bb4e1..35e19101aeaab 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -130,8 +130,8 @@ Changelog whether to raise an exception if a subset of the scorers in multimetric scoring fails or to return an error code. :pr:`28992` by :user:`Stefanie Senger `. -- |Enhancement| Adds `zero_division` to :func:`cohen_kappa_score`. When there is a zero - division, the metric is undefined and this value is returned. +- |Enhancement| Adds `zero_division` to :func:`cohen_kappa_score`. When there is a + division by zero, the metric is undefined and this value is returned. :pr:`29210` by :user:`Marc Torrellas Socastro ` and :user:`Stefanie Senger `.