From 172ac47507d75de61ff418360b05b5c8a8c113be Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 01:47:57 -0300 Subject: [PATCH 01/42] initial proposal with preliminary tests --- sklearn/inspection/__init__.py | 3 + sklearn/inspection/_metric_threshold_curve.py | 176 ++++++++++++++++++ .../tests/test_metric_threshold_curve.py | 73 ++++++++ 3 files changed, 252 insertions(+) create mode 100644 sklearn/inspection/_metric_threshold_curve.py create mode 100644 sklearn/inspection/tests/test_metric_threshold_curve.py diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index f73ffe8cff26f..09173c30ec1b0 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -7,10 +7,13 @@ from ._partial_dependence import partial_dependence from ._plot.partial_dependence import PartialDependenceDisplay +from ._metric_threshold_curve import metric_threshold_curve + __all__ = [ "partial_dependence", "permutation_importance", "PartialDependenceDisplay", "DecisionBoundaryDisplay", + "metric_threshold_curve", ] diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py new file mode 100644 index 0000000000000..5bad0f33213ae --- /dev/null +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -0,0 +1,176 @@ +"""Metrics per threshold curves are used to assess performance on binary +classification task given threshold grid. One can undestand the behaviour of +threshold-dependent metrics when changing the threshold. +""" + +# Authors: ######## +# License: BSD 3 clause + +from numbers import Real, Integral + +import numpy as np + +from ..utils import assert_all_finite +from ..utils import check_consistent_length +from ..utils.validation import _check_sample_weight +from ..utils import column_or_1d +from ..utils.multiclass import type_of_target +from ..utils._param_validation import validate_params, Interval + +from ..metrics._base import _check_pos_label_consistency + + +@validate_params( + { + "y_true": ["array-like"], + "y_score": ["array-like"], + "score_func": [callable], + "threshold_grid": [Interval(Integral, 3, None, closed="left"), + "array-like", + None], + "pos_label": [Real, str, "boolean", None], + "sample_weight": ["array-like", None], + } +) +def metric_threshold_curve( + y_true, + y_score, + score_func, + *, + threshold_grid=101, + pos_label=None, + sample_weight=None): + """Compute the threshold-dependent metric of interest per threshold. + + Note: this implementation is restricted to the binary classification task. + + Parameters + ---------- + y_true : array-like of shape (n_samples,), default=None + True targets of binary classification. + + y_score : array-like of shape (n_samples,), default=None + Estimated probabilities or output of a decision function. + + score_func : callable + Threshold dependent score function (or loss function) with signature + `score_func(y, y_pred, sample_weight, **kwargs)`. + + threshold_grid : array-like, int or None, default=101 + Values of threhsold for each score calculation. If int then + `threshold_grid` percentiles of `y_score` are selected. If `None` then + all possible thresholds are selected. If int is lower then + `len(set(y_score))` then all possible thresholds are selected. + + pos_label : int, float, bool or str, default=None + The label of the positive class. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. + + Returns + ------- + metric_values : ndarray of shape (n_thresholds,) + Score value for each threshold. At index i being the value of the + theshold-dependent metric for predictions score >= thresholds[i]. + + thresholds : ndarray of shape (n_thresholds,) + Ascending score values used as thresholds. + + See Also + -------- + precision_recall_curve : Compute precision-recall pairs for different + probability thresholds. + det_curve: Compute error rates for different probability thresholds. + roc_curve : Compute Receiver operating characteristic (ROC) curve. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.metrics import accuracy_score + >>> y_true = np.array([0, 0, 1, 1]) + >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) + >>> accuracy_values, thresholds = metric_threshold_curve( + ... y_true, y_scores, accuracy_score) + >>> thresholds + array([0.1 , 0.35, 0.4 , 0.8 ]) + >>> accuracy_values + array([0.75, 0.5 , 0.75, 0.5 ]) + """ + # Check to make sure y_true is valid + y_type = type_of_target(y_true, input_name="y_true") + if not (y_type == "binary" or (y_type == "multiclass" and pos_label is not None)): + raise ValueError("{0} format is not supported".format(y_type)) + + check_consistent_length(y_true, y_score, sample_weight) + y_true = column_or_1d(y_true) + y_score = column_or_1d(y_score) + assert_all_finite(y_true) + assert_all_finite(y_score) + + # Check if we have enough values to create the curve + if isinstance(threshold_grid, int): + if threshold_grid <= 2: + raise ValueError( + f"threshold_grid={threshold_grid} should be > 2." + ) + + # Filter out zero-weighted samples, as they should not impact the result + if sample_weight is not None: + sample_weight = column_or_1d(sample_weight) + sample_weight = _check_sample_weight(sample_weight, y_true) + nonzero_weight_mask = sample_weight != 0 + y_true = y_true[nonzero_weight_mask] + y_score = y_score[nonzero_weight_mask] + sample_weight = sample_weight[nonzero_weight_mask] + + pos_label = _check_pos_label_consistency(pos_label, y_true) + + # make y_true a boolean vector + y_true = y_true == pos_label + + # sort scores and corresponding truth values + desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1] + y_score = y_score[desc_score_indices] + y_true = y_true[desc_score_indices] + if sample_weight is not None: + sample_weight = sample_weight[desc_score_indices] + + # logic to see if we need to use all possible thresholds (distinct values) + all_thresholds = False + if threshold_grid is None: + all_thresholds = True + elif isinstance(threshold_grid, int): + if len(set(y_score)) < threshold_grid: + all_thresholds = True + + if all_thresholds: + # y_score typically has many tied values. Here we extract + # the indices associated with the distinct values. We also + # concatenate a value for the end of the curve. + distinct_value_indices = np.where(np.diff(y_score))[0] + threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] + thresholds = y_score[threshold_idxs[::-1]] + elif isinstance(threshold_grid, int): + # takes representative score points to calculate the metric + # with these thresholds + thresholds = np.percentile( + list(set(y_score)), np.linspace(0, 100, threshold_grid) + ) + else: + # if threshold_grid is an array then run some checks and sort + # it for consistency + threshold_grid = column_or_1d(threshold_grid) + assert_all_finite(threshold_grid) + thresholds = np.sort(threshold_grid) + + # for each threshold calculates the metric + metric_values = [] + for threshold in thresholds: + preds_threshold = (y_score > threshold).astype(int) + metric_values.append( + score_func(y_true, preds_threshold, sample_weight=sample_weight) + ) + # TODO: should we multithread the metric calculations? + + return np.array(metric_values), thresholds diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py new file mode 100644 index 0000000000000..20fd7b8a3fc27 --- /dev/null +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -0,0 +1,73 @@ +import pytest +import numpy as np + +from functools import partial + +from numpy.testing import assert_allclose + +from sklearn.datasets import make_classification +from sklearn.ensemble import RandomForestClassifier +from sklearn.inspection import metric_threshold_curve +from sklearn.metrics import ( + accuracy_score, + f1_score, + fbeta_score, + precision_score, + recall_score, +) +from sklearn.utils.validation import check_random_state + + +def test_grid_int_bigger_than_set_then_all(): + """When `threshold_grid` parameter is bigger than the number of unique + `y_score` then `len(thresholds)` should be equal to `len(set(y_score))` + and thresholds should be the same from what we get with + `threshold_grid=None`. + """ + + X, y = make_classification() + clf = RandomForestClassifier(n_estimators=10, random_state=42).fit(X, y) + y_score = clf.predict_proba(X)[:, 1] + + _, thresholds_big_int = metric_threshold_curve( + y, y_score, accuracy_score, threshold_grid=len(set(y_score)) + 1000 + ) + + _, thresholds_none = metric_threshold_curve( + y, y_score, accuracy_score, threshold_grid=None + ) + + assert_allclose(thresholds_big_int, thresholds_none) + assert len(thresholds_big_int) == len(set(y_score)) + + +def test_binary_clf_curve_multiclass_error(): + rng = check_random_state(404) + y_true = rng.randint(0, 3, size=10) + y_pred = rng.rand(10) + msg = "multiclass format is not supported" + with pytest.raises(ValueError, match=msg): + metric_threshold_curve(y_true, y_pred, accuracy_score) + + +@pytest.mark.parametrize( + "metric", + [ + partial(fbeta_score, beta=3), + partial(fbeta_score, beta=0.5), + f1_score, + precision_score, + recall_score, + accuracy_score, + ], +) +def test_metric_threshold_curve_end_points(metric): + rng = check_random_state(0) + y_true = np.array([0] * 50 + [1] * 50) + y_pred = rng.normal(3, size=100) + min_pred, max_pred = min(y_pred), max(y_pred) + + metric_values, _ = metric_threshold_curve(y_true, y_pred, metric) + + assert metric_values[0] == metric(y_true, (y_pred > min_pred)*1) + assert metric_values[-1] == metric(y_true, (y_pred > max_pred)*1) From d038e1153de0fe62d2ec0cd9a6d8851609a65492 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 01:57:50 -0300 Subject: [PATCH 02/42] removing check that validate_params already does --- sklearn/inspection/_metric_threshold_curve.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index 5bad0f33213ae..7bfeec58c6ab1 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -108,13 +108,6 @@ def metric_threshold_curve( assert_all_finite(y_true) assert_all_finite(y_score) - # Check if we have enough values to create the curve - if isinstance(threshold_grid, int): - if threshold_grid <= 2: - raise ValueError( - f"threshold_grid={threshold_grid} should be > 2." - ) - # Filter out zero-weighted samples, as they should not impact the result if sample_weight is not None: sample_weight = column_or_1d(sample_weight) From 322eccfc46cbf685b5973ba28418aabfd0f69bd0 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 02:25:33 -0300 Subject: [PATCH 03/42] changelog and linting from CI --- doc/whats_new/v1.3.rst | 7 ++++++ sklearn/inspection/_metric_threshold_curve.py | 23 +++++++++++-------- .../tests/test_metric_threshold_curve.py | 4 ++-- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 08ebf4abc92c3..c40f7618bb440 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -153,6 +153,13 @@ Changelog - |Enhancement| Added the parameter `fill_value` to :class:`impute.IterativeImputer`. :pr:`25232` by :user:`Thijs van Weezel `. +:mod:`sklearn.inspection` +......................... + +- |Feature| Implementation of :func:`inspection.metric_threshold_curve`. This + function the threshold-dependent metric of interest for each threshold in a + threshold grid. :pr:`25639` by :user:`Carlo Lemos `. + :mod:`sklearn.metrics` ...................... diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index 7bfeec58c6ab1..2f2c6df032f39 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -25,21 +25,24 @@ "y_true": ["array-like"], "y_score": ["array-like"], "score_func": [callable], - "threshold_grid": [Interval(Integral, 3, None, closed="left"), - "array-like", - None], + "threshold_grid": [ + Interval(Integral, 3, None, closed="left"), + "array-like", + None, + ], "pos_label": [Real, str, "boolean", None], "sample_weight": ["array-like", None], } ) def metric_threshold_curve( - y_true, - y_score, - score_func, - *, - threshold_grid=101, - pos_label=None, - sample_weight=None): + y_true, + y_score, + score_func, + *, + threshold_grid=101, + pos_label=None, + sample_weight=None, +): """Compute the threshold-dependent metric of interest per threshold. Note: this implementation is restricted to the binary classification task. diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py index 20fd7b8a3fc27..85bd88e031a78 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -69,5 +69,5 @@ def test_metric_threshold_curve_end_points(metric): metric_values, _ = metric_threshold_curve(y_true, y_pred, metric) - assert metric_values[0] == metric(y_true, (y_pred > min_pred)*1) - assert metric_values[-1] == metric(y_true, (y_pred > max_pred)*1) + assert metric_values[0] == metric(y_true, (y_pred > min_pred) * 1) + assert metric_values[-1] == metric(y_true, (y_pred > max_pred) * 1) From 7dbbec5a48c9d6f14c1ee8b8545543b536548756 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 12:33:23 -0300 Subject: [PATCH 04/42] trying to resolve doc related ci --- doc/inspection.rst | 1 + doc/modules/classes.rst | 1 + doc/modules/metric_threshold_curve.rst | 56 ++++++++++++++++++++++++++ doc/whats_new/v1.3.rst | 7 ++-- 4 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 doc/modules/metric_threshold_curve.rst diff --git a/doc/inspection.rst b/doc/inspection.rst index 57c1cfc3275e8..50082983d047d 100644 --- a/doc/inspection.rst +++ b/doc/inspection.rst @@ -27,5 +27,6 @@ to diagnose issues with model performance. .. toctree:: + modules/metric_threshold_curve modules/partial_dependence modules/permutation_importance diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index e8d438e124c4f..dd8d995c8504d 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -650,6 +650,7 @@ Kernels: :toctree: generated/ :template: function.rst + inspection.metric_threshold_curve inspection.partial_dependence inspection.permutation_importance diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst new file mode 100644 index 0000000000000..09fe90d495c92 --- /dev/null +++ b/doc/modules/metric_threshold_curve.rst @@ -0,0 +1,56 @@ + +.. _permutation_importance: + +Metric threshold curve +====================== + +.. currentmodule:: sklearn.inspection + +Metric threshold curve is a model inspection technique that can be used +for any :term:`fitted` binary classification :term:`estimator`. This is +especially useful for non-linear or opaque :term:`estimators`. The metric +threshold curve is defined to be how the threshold-dependent metric behaves +when we change the decision threshold. + +Let's consider the following trained binary classification model:: + + >>> import matplotlib.pyplot as plt + >>> from sklearn.datasets import make_classification + >>> from sklearn.ensemble import RandomForestClassifier + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.metrics import fbeta_score + >>> from functools import partial + ... + >>> X, y = make_classification( + ... n_samples=10_000, weights=(0.95, ), random_state=42) + ... + >>> X_train_clf, X_test, y_train_clf, y_test = train_test_split( + ... X, y, random_state=42, stratify=y) + >>> X_train_clf, X_train_thr, y_train_clf, y_train_thr = train_test_split( + ... X_train_clf, y_train_clf, random_state=42, stratify=y_train_clf) + ... + >>> model = RandomForestClassifier(random_state=42) + >>> model.fit(X_train_clf, y_train_clf) + ... + >>> fbeta_score(y_test, model.predict(X_test), beta=2) + 0.462... + +Its validation performance, measured via the threshold-dependent metric f2 +score, is suboptimal because of the default threshold of 0.5. We can futher +look into the behaviour of that metric with:: + + >>> from sklearn.inspection import metric_threshold_curve + >>> predict_proba_thr = model.predict_proba(X_train_thr)[:, 1] + ... + >>> f2_values, thresholds = metric_threshold_curve( + ... y_train_thr, predict_proba_thr, partial(fbeta_score, beta=2)) + ... + >>> best_thr = thresholds[np.argmax(f2_values)] + >>> best_thr + ... 0.21 + ... + >>> new_predict_test = (model.predict_proba(X_test)[: ,1] > best_thr).astype(int) + >>> fbeta_score(y_test, new_predict_test, beta=2) + ... 0.719... + +Note that the new choosen threshold optimizes the f2 score in the test set. diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index c40f7618bb440..ded64f1090a41 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -156,9 +156,10 @@ Changelog :mod:`sklearn.inspection` ......................... -- |Feature| Implementation of :func:`inspection.metric_threshold_curve`. This - function the threshold-dependent metric of interest for each threshold in a - threshold grid. :pr:`25639` by :user:`Carlo Lemos `. +- |MajorFeature| :func:`inspection.metric_threshold_curve` has been added to + measure the relationship between the threshold used by a binary classifier + for a given threshold-dependent function. :pr:`25639` by + :user:`Carlo Lemos `. :mod:`sklearn.metrics` ...................... From 2a0c6b35f36d8e50d139396a02ac402fe23fe20f Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 12:52:42 -0300 Subject: [PATCH 05/42] duplicate label --- doc/modules/metric_threshold_curve.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index 09fe90d495c92..a5a98d06e5bec 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -1,5 +1,5 @@ -.. _permutation_importance: +.. _metric_threshold_curve: Metric threshold curve ====================== From fbb9b9b15fb037a0e2794a6d5c8caa78dc2eed82 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 13:21:52 -0300 Subject: [PATCH 06/42] docstring example import error --- sklearn/inspection/_metric_threshold_curve.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index 2f2c6df032f39..a0114b9b19a31 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -91,6 +91,7 @@ def metric_threshold_curve( -------- >>> import numpy as np >>> from sklearn.metrics import accuracy_score + >>> from sklearn.inspection import metric_threshold_curve >>> y_true = np.array([0, 0, 1, 1]) >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) >>> accuracy_values, thresholds = metric_threshold_curve( From acb94bef2f09959d1f8bead894836a6cdfdbdc73 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 13:59:53 -0300 Subject: [PATCH 07/42] docstring typo --- doc/modules/metric_threshold_curve.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index a5a98d06e5bec..cc46e9590179a 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -29,8 +29,7 @@ Let's consider the following trained binary classification model:: >>> X_train_clf, X_train_thr, y_train_clf, y_train_thr = train_test_split( ... X_train_clf, y_train_clf, random_state=42, stratify=y_train_clf) ... - >>> model = RandomForestClassifier(random_state=42) - >>> model.fit(X_train_clf, y_train_clf) + >>> model = RandomForestClassifier(random_state=42).fit(X_train_clf, y_train_clf) ... >>> fbeta_score(y_test, model.predict(X_test), beta=2) 0.462... From a5cd20103c2ceb5ae0e88c4385dacce23964af02 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 14:02:24 -0300 Subject: [PATCH 08/42] docstring typo --- doc/modules/metric_threshold_curve.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index cc46e9590179a..dcfeade75fe65 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -48,7 +48,7 @@ look into the behaviour of that metric with:: >>> best_thr ... 0.21 ... - >>> new_predict_test = (model.predict_proba(X_test)[: ,1] > best_thr).astype(int) + >>> new_predict_test = (model.predict_proba(X_test)[:, 1] > best_thr).astype(int) >>> fbeta_score(y_test, new_predict_test, beta=2) ... 0.719... From 253b3e258fd8f22346e6b50ce7e543e2963ff224 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 14:23:05 -0300 Subject: [PATCH 09/42] docstring typo --- doc/modules/metric_threshold_curve.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index dcfeade75fe65..3d4d16bd7ee3c 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -14,6 +14,7 @@ when we change the decision threshold. Let's consider the following trained binary classification model:: + >>> import numpy as np >>> import matplotlib.pyplot as plt >>> from sklearn.datasets import make_classification >>> from sklearn.ensemble import RandomForestClassifier From cb5fee1768ad0284b064a740f2d63ba52c815c45 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 14:52:56 -0300 Subject: [PATCH 10/42] docstring typo --- doc/modules/metric_threshold_curve.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index 3d4d16bd7ee3c..b8e72fabdc86b 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -47,10 +47,10 @@ look into the behaviour of that metric with:: ... >>> best_thr = thresholds[np.argmax(f2_values)] >>> best_thr - ... 0.21 - ... + 0.21 + >>> new_predict_test = (model.predict_proba(X_test)[:, 1] > best_thr).astype(int) >>> fbeta_score(y_test, new_predict_test, beta=2) - ... 0.719... + 0.719... Note that the new choosen threshold optimizes the f2 score in the test set. From 9e45e2ee0c82f15bf3904ac9ca4cc602c527573c Mon Sep 17 00:00:00 2001 From: Carlo Date: Sat, 18 Feb 2023 19:11:48 -0300 Subject: [PATCH 11/42] change in doc order and typos --- doc/inspection.rst | 2 +- doc/modules/classes.rst | 2 +- doc/modules/metric_threshold_curve.rst | 14 ++++++-------- sklearn/inspection/_metric_threshold_curve.py | 6 +++++- sklearn/tests/test_public_functions.py | 1 + 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/doc/inspection.rst b/doc/inspection.rst index 50082983d047d..25577fdd25c45 100644 --- a/doc/inspection.rst +++ b/doc/inspection.rst @@ -27,6 +27,6 @@ to diagnose issues with model performance. .. toctree:: - modules/metric_threshold_curve modules/partial_dependence modules/permutation_importance + modules/metric_threshold_curve diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index dd8d995c8504d..64035eed7cb73 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -650,9 +650,9 @@ Kernels: :toctree: generated/ :template: function.rst - inspection.metric_threshold_curve inspection.partial_dependence inspection.permutation_importance + inspection.metric_threshold_curve Plotting -------- diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index b8e72fabdc86b..d3736a1bbda47 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -7,8 +7,7 @@ Metric threshold curve .. currentmodule:: sklearn.inspection Metric threshold curve is a model inspection technique that can be used -for any :term:`fitted` binary classification :term:`estimator`. This is -especially useful for non-linear or opaque :term:`estimators`. The metric +for any :term:`fitted` binary classification :term:`estimator`. The metric threshold curve is defined to be how the threshold-dependent metric behaves when we change the decision threshold. @@ -21,17 +20,17 @@ Let's consider the following trained binary classification model:: >>> from sklearn.model_selection import train_test_split >>> from sklearn.metrics import fbeta_score >>> from functools import partial - ... + >>> X, y = make_classification( ... n_samples=10_000, weights=(0.95, ), random_state=42) - ... + >>> X_train_clf, X_test, y_train_clf, y_test = train_test_split( ... X, y, random_state=42, stratify=y) >>> X_train_clf, X_train_thr, y_train_clf, y_train_thr = train_test_split( ... X_train_clf, y_train_clf, random_state=42, stratify=y_train_clf) - ... + >>> model = RandomForestClassifier(random_state=42).fit(X_train_clf, y_train_clf) - ... + >>> fbeta_score(y_test, model.predict(X_test), beta=2) 0.462... @@ -41,10 +40,9 @@ look into the behaviour of that metric with:: >>> from sklearn.inspection import metric_threshold_curve >>> predict_proba_thr = model.predict_proba(X_train_thr)[:, 1] - ... >>> f2_values, thresholds = metric_threshold_curve( ... y_train_thr, predict_proba_thr, partial(fbeta_score, beta=2)) - ... + >>> best_thr = thresholds[np.argmax(f2_values)] >>> best_thr 0.21 diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index a0114b9b19a31..a448f0673f2c1 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -47,6 +47,10 @@ def metric_threshold_curve( Note: this implementation is restricted to the binary classification task. + Read more in the :ref:`User Guide `. + + .. versionadded:: 1.3 + Parameters ---------- y_true : array-like of shape (n_samples,), default=None @@ -84,7 +88,7 @@ def metric_threshold_curve( -------- precision_recall_curve : Compute precision-recall pairs for different probability thresholds. - det_curve: Compute error rates for different probability thresholds. + det_curve : Compute error rates for different probability thresholds. roc_curve : Compute Receiver operating characteristic (ROC) curve. Examples diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 9b2b56cdb3eb8..2edb5d62eb503 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -111,6 +111,7 @@ def _check_function_param_validation( "sklearn.feature_extraction.img_to_graph", "sklearn.feature_extraction.image.extract_patches_2d", "sklearn.feature_extraction.image.reconstruct_from_patches_2d", + "sklearn.inspection.metric_threshold_curve", "sklearn.metrics.accuracy_score", "sklearn.metrics.auc", "sklearn.metrics.average_precision_score", From ad901a2be47fd8a4a8927cce2c968576c1325463 Mon Sep 17 00:00:00 2001 From: Carlo Date: Mon, 20 Feb 2023 06:16:27 -0300 Subject: [PATCH 12/42] removing example --- sklearn/inspection/_metric_threshold_curve.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index a448f0673f2c1..47def30282665 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -90,20 +90,6 @@ def metric_threshold_curve( probability thresholds. det_curve : Compute error rates for different probability thresholds. roc_curve : Compute Receiver operating characteristic (ROC) curve. - - Examples - -------- - >>> import numpy as np - >>> from sklearn.metrics import accuracy_score - >>> from sklearn.inspection import metric_threshold_curve - >>> y_true = np.array([0, 0, 1, 1]) - >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) - >>> accuracy_values, thresholds = metric_threshold_curve( - ... y_true, y_scores, accuracy_score) - >>> thresholds - array([0.1 , 0.35, 0.4 , 0.8 ]) - >>> accuracy_values - array([0.75, 0.5 , 0.75, 0.5 ]) """ # Check to make sure y_true is valid y_type = type_of_target(y_true, input_name="y_true") From 9b4febb75c6afd2ba2a614aef22fbd6dc1d9786b Mon Sep 17 00:00:00 2001 From: Carlo Lemos <55899543+vitaliset@users.noreply.github.com> Date: Sun, 14 May 2023 01:55:17 -0300 Subject: [PATCH 13/42] Update import of _check_pos_label_consistency --- sklearn/inspection/_metric_threshold_curve.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index 47def30282665..3fe0279fcc098 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -12,13 +12,14 @@ from ..utils import assert_all_finite from ..utils import check_consistent_length -from ..utils.validation import _check_sample_weight +from ..utils.validation import ( + _check_pos_label_consistency, + _check_sample_weight, +) from ..utils import column_or_1d from ..utils.multiclass import type_of_target from ..utils._param_validation import validate_params, Interval -from ..metrics._base import _check_pos_label_consistency - @validate_params( { From 119db53829c82ef1b5d3f0e053993105a0eb590a Mon Sep 17 00:00:00 2001 From: Carlo Date: Sun, 14 May 2023 12:45:01 -0300 Subject: [PATCH 14/42] codecov --- .../tests/test_metric_threshold_curve.py | 61 +++++++++++++++++-- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py index 85bd88e031a78..1c68fed494378 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -64,10 +64,61 @@ def test_binary_clf_curve_multiclass_error(): def test_metric_threshold_curve_end_points(metric): rng = check_random_state(0) y_true = np.array([0] * 50 + [1] * 50) - y_pred = rng.normal(3, size=100) - min_pred, max_pred = min(y_pred), max(y_pred) + y_score = rng.normal(3, size=100) + min_pred, max_score = min(y_score), max(y_score) - metric_values, _ = metric_threshold_curve(y_true, y_pred, metric) + metric_values, _ = metric_threshold_curve(y_true, y_score, metric) - assert metric_values[0] == metric(y_true, (y_pred > min_pred) * 1) - assert metric_values[-1] == metric(y_true, (y_pred > max_pred) * 1) + assert metric_values[0] == metric(y_true, (y_score > min_pred) * 1) + assert metric_values[-1] == metric(y_true, (y_score > max_score) * 1) + + +@pytest.mark.parametrize( + "metric", [partial(fbeta_score, beta=3), precision_score, recall_score], +) +def test_zero_sample_weight_equals_excluding(metric): + rng = check_random_state(0) + y_true = np.array([0] * 50 + [1] * 50) + y_score = rng.normal(3, size=100) + + sample_weight = np.array([0] * 20, [1] * 8) + metric_values_sw, _ = metric_threshold_curve( + y_true, y_score, metric, sample_weight=sample_weight + ) + + y_true_exclude = y_true[sample_weight != 0] + y_score_exclude = y_score[sample_weight != 0] + metric_values_exclude, _ = metric_threshold_curve( + y_true_exclude, y_score_exclude, metric + ) + + assert_allclose(metric_values_sw, metric_values_exclude) + + +def test_len_of_threshold_when_passing_int(): + y = [0] * 500 + [1] * 500 + y_score = list[range(1000)] + _, thresholds = metric_threshold_curve( + y, y_score, accuracy_score, threshold_grid=42 + ) + + assert len(thresholds) == 42 + + +def test_passing_the_grid(): + y = [0] * 500 + [1] * 500 + y_score = list[range(1000)] + + grid_sorted = np.array(list[range(200, 300)]) + _, thresholds_sorted = metric_threshold_curve( + y, y_score, accuracy_score, threshold_grid=grid_sorted + ) + + assert_allclose(grid_sorted, thresholds_sorted) + + grid_not_sorted = np.array(list[range(200, 300)][::-1]) + _, thresholds_not_sorted = metric_threshold_curve( + y, y_score, accuracy_score, threshold_grid=grid_not_sorted + ) + + assert_allclose(grid_not_sorted, thresholds_not_sorted) From be893c8ebf75b0c6166c9a202ed6cba9ea516d0f Mon Sep 17 00:00:00 2001 From: Carlo Date: Sun, 14 May 2023 12:52:40 -0300 Subject: [PATCH 15/42] linting --- sklearn/inspection/tests/test_metric_threshold_curve.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py index 1c68fed494378..3c34d3d175797 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -74,7 +74,8 @@ def test_metric_threshold_curve_end_points(metric): @pytest.mark.parametrize( - "metric", [partial(fbeta_score, beta=3), precision_score, recall_score], + "metric", + [partial(fbeta_score, beta=3), precision_score, recall_score], ) def test_zero_sample_weight_equals_excluding(metric): rng = check_random_state(0) From bd1e64f4f51c3d7e6eea85c83cb3173f1d6768c0 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sun, 14 May 2023 15:07:57 -0300 Subject: [PATCH 16/42] correcting typo --- .../tests/test_metric_threshold_curve.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py index 3c34d3d175797..7dd967c7077e3 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -98,28 +98,28 @@ def test_zero_sample_weight_equals_excluding(metric): def test_len_of_threshold_when_passing_int(): y = [0] * 500 + [1] * 500 - y_score = list[range(1000)] + y_score = list(range(1000)) _, thresholds = metric_threshold_curve( - y, y_score, accuracy_score, threshold_grid=42 + y, y_score, accuracy_score, threshold_grid=13 ) - assert len(thresholds) == 42 + assert len(thresholds) == 13 def test_passing_the_grid(): y = [0] * 500 + [1] * 500 - y_score = list[range(1000)] + y_score = list(range(1000)) - grid_sorted = np.array(list[range(200, 300)]) + grid_sorted = np.array(list(range(200, 300))) _, thresholds_sorted = metric_threshold_curve( y, y_score, accuracy_score, threshold_grid=grid_sorted ) assert_allclose(grid_sorted, thresholds_sorted) - grid_not_sorted = np.array(list[range(200, 300)][::-1]) + grid_not_sorted = grid_sorted[::-1] _, thresholds_not_sorted = metric_threshold_curve( y, y_score, accuracy_score, threshold_grid=grid_not_sorted ) - assert_allclose(grid_not_sorted, thresholds_not_sorted) + assert_allclose(grid_sorted, thresholds_not_sorted) From 031895061700d88023456c1b343974647b4e8ad8 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sun, 14 May 2023 15:20:06 -0300 Subject: [PATCH 17/42] test typo --- sklearn/inspection/tests/test_metric_threshold_curve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py index 7dd967c7077e3..32684d7edfb61 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -82,7 +82,7 @@ def test_zero_sample_weight_equals_excluding(metric): y_true = np.array([0] * 50 + [1] * 50) y_score = rng.normal(3, size=100) - sample_weight = np.array([0] * 20, [1] * 8) + sample_weight = np.array([0] * 20 + [1] * 80) metric_values_sw, _ = metric_threshold_curve( y_true, y_score, metric, sample_weight=sample_weight ) From efd6d721cbf0dcc43971a2a6124bc9b0d1bed354 Mon Sep 17 00:00:00 2001 From: Carlo Date: Sun, 14 May 2023 19:09:42 -0300 Subject: [PATCH 18/42] add example again to check pytest --- sklearn/inspection/_metric_threshold_curve.py | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/inspection/_metric_threshold_curve.py index 3fe0279fcc098..6139ac78c71ae 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/inspection/_metric_threshold_curve.py @@ -91,8 +91,22 @@ def metric_threshold_curve( probability thresholds. det_curve : Compute error rates for different probability thresholds. roc_curve : Compute Receiver operating characteristic (ROC) curve. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.metrics import accuracy_score + >>> from sklearn.inspection import metric_threshold_curve + >>> y_true = np.array([0, 0, 1, 1]) + >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) + >>> accuracy_values, thresholds = metric_threshold_curve( + ... y_true, y_scores, accuracy_score) + >>> thresholds + array([0.1 , 0.35, 0.4 , 0.8 ]) + >>> accuracy_values + array([0.75, 0.5 , 0.75, 0.5 ]) """ - # Check to make sure y_true is valid + # Check to make sure y_true is valid. y_type = type_of_target(y_true, input_name="y_true") if not (y_type == "binary" or (y_type == "multiclass" and pos_label is not None)): raise ValueError("{0} format is not supported".format(y_type)) @@ -103,7 +117,7 @@ def metric_threshold_curve( assert_all_finite(y_true) assert_all_finite(y_score) - # Filter out zero-weighted samples, as they should not impact the result + # Filter out zero-weighted samples, as they should not impact the result. if sample_weight is not None: sample_weight = column_or_1d(sample_weight) sample_weight = _check_sample_weight(sample_weight, y_true) @@ -114,17 +128,17 @@ def metric_threshold_curve( pos_label = _check_pos_label_consistency(pos_label, y_true) - # make y_true a boolean vector + # Make y_true a boolean vector. y_true = y_true == pos_label - # sort scores and corresponding truth values + # Sort scores and corresponding truth values. desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1] y_score = y_score[desc_score_indices] y_true = y_true[desc_score_indices] if sample_weight is not None: sample_weight = sample_weight[desc_score_indices] - # logic to see if we need to use all possible thresholds (distinct values) + # Logic to see if we need to use all possible thresholds (distinct values). all_thresholds = False if threshold_grid is None: all_thresholds = True @@ -140,19 +154,19 @@ def metric_threshold_curve( threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] thresholds = y_score[threshold_idxs[::-1]] elif isinstance(threshold_grid, int): - # takes representative score points to calculate the metric - # with these thresholds + # It takes representative score points to calculate the metric + # with these thresholds. thresholds = np.percentile( list(set(y_score)), np.linspace(0, 100, threshold_grid) ) else: - # if threshold_grid is an array then run some checks and sort - # it for consistency + # If threshold_grid is an array then run some checks and sort + # it for consistency. threshold_grid = column_or_1d(threshold_grid) assert_all_finite(threshold_grid) thresholds = np.sort(threshold_grid) - # for each threshold calculates the metric + # For each threshold calculates the metric. metric_values = [] for threshold in thresholds: preds_threshold = (y_score > threshold).astype(int) From dfa66a5912ad0320da764b47459de045a4d5bb8b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 20 May 2024 16:27:25 +0200 Subject: [PATCH 19/42] fixing imports --- sklearn/inspection/tests/test_metric_threshold_curve.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/inspection/tests/test_metric_threshold_curve.py index 32684d7edfb61..a5db6d91c8987 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/inspection/tests/test_metric_threshold_curve.py @@ -1,9 +1,7 @@ -import pytest -import numpy as np - from functools import partial -from numpy.testing import assert_allclose +import numpy as np +import pytest from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier @@ -15,6 +13,7 @@ precision_score, recall_score, ) +from sklearn.utils._testing import assert_allclose from sklearn.utils.validation import check_random_state From 1fb1c1349068f06acf2b28700190c92ec8d4adfd Mon Sep 17 00:00:00 2001 From: vitaliset Date: Wed, 22 May 2024 01:59:42 -0300 Subject: [PATCH 20/42] towards glemaitre suggestions --- doc/modules/metric_threshold_curve.rst | 4 + sklearn/inspection/__init__.py | 2 - sklearn/metrics/__init__.py | 2 + .../_decision_threshold.py} | 78 +++++++++---------- .../tests/test_decision_threshold.py} | 44 +++++------ sklearn/tests/test_public_functions.py | 2 +- 6 files changed, 64 insertions(+), 68 deletions(-) rename sklearn/{inspection/_metric_threshold_curve.py => metrics/_decision_threshold.py} (72%) rename sklearn/{inspection/tests/test_metric_threshold_curve.py => metrics/tests/test_decision_threshold.py} (66%) diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst index d3736a1bbda47..baeb48079df9e 100644 --- a/doc/modules/metric_threshold_curve.rst +++ b/doc/modules/metric_threshold_curve.rst @@ -6,6 +6,10 @@ Metric threshold curve .. currentmodule:: sklearn.inspection +TODO: It makes much more sense to move this to +`doc\modules\classification_threshold.rst`. Let's finish the code before +that. :) + Metric threshold curve is a model inspection technique that can be used for any :term:`fitted` binary classification :term:`estimator`. The metric threshold curve is defined to be how the threshold-dependent metric behaves diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index 3f618634d1364..f254967f96166 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,6 +1,5 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" -from ._metric_threshold_curve import metric_threshold_curve from ._partial_dependence import partial_dependence from ._permutation_importance import permutation_importance from ._plot.decision_boundary import DecisionBoundaryDisplay @@ -11,5 +10,4 @@ "permutation_importance", "PartialDependenceDisplay", "DecisionBoundaryDisplay", - "metric_threshold_curve", ] diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index af25a219c79f1..038011e75f749 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -26,6 +26,7 @@ recall_score, zero_one_loss, ) +from ._decision_threshold import decision_threshold_curve from ._dist_metrics import DistanceMetric from ._plot.confusion_matrix import ConfusionMatrixDisplay from ._plot.det_curve import DetCurveDisplay @@ -117,6 +118,7 @@ "d2_log_loss_score", "d2_pinball_score", "dcg_score", + "decision_threshold_curve", "davies_bouldin_score", "DetCurveDisplay", "det_curve", diff --git a/sklearn/inspection/_metric_threshold_curve.py b/sklearn/metrics/_decision_threshold.py similarity index 72% rename from sklearn/inspection/_metric_threshold_curve.py rename to sklearn/metrics/_decision_threshold.py index 6592323170fda..ac042d18a18d2 100644 --- a/sklearn/inspection/_metric_threshold_curve.py +++ b/sklearn/metrics/_decision_threshold.py @@ -23,25 +23,23 @@ { "y_true": ["array-like"], "y_score": ["array-like"], - "score_func": [callable], - "threshold_grid": [ + "scoring": [callable], + "thresholds": [ Interval(Integral, 3, None, closed="left"), "array-like", None, ], - "pos_label": [Real, str, "boolean", None], - "sample_weight": ["array-like", None], + "scoring_kwargs": [dict, None], }, prefer_skip_nested_validation=True, ) -def metric_threshold_curve( +def decision_threshold_curve( y_true, y_score, - score_func, + scoring, *, - threshold_grid=101, - pos_label=None, - sample_weight=None, + thresholds=101, + scoring_kwargs={}, ): """Compute the threshold-dependent metric of interest per threshold. @@ -59,21 +57,17 @@ def metric_threshold_curve( y_score : array-like of shape (n_samples,), default=None Estimated probabilities or output of a decision function. - score_func : callable - Threshold dependent score function (or loss function) with signature - `score_func(y, y_pred, sample_weight, **kwargs)`. + scoring : callable + Threshold-dependent score function (or loss function) with signature + `scoring(y, y_pred, **scoring_kwargs)`. - threshold_grid : array-like, int or None, default=101 + thresholds : array-like or int, default=101 Values of threhsold for each score calculation. If int then - `threshold_grid` percentiles of `y_score` are selected. If `None` then - all possible thresholds are selected. If int is lower then - `len(set(y_score))` then all possible thresholds are selected. + `thresholds` percentiles of `y_score` are selected. If int is lower + then `len(set(y_score))` then all possible thresholds are selected. - pos_label : int, float, bool or str, default=None - The label of the positive class. - - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. + scoring_kwargs : dict, default=None + Keyword arguments to pass to specified `scoring` function. Returns ------- @@ -94,11 +88,10 @@ def metric_threshold_curve( Examples -------- >>> import numpy as np - >>> from sklearn.metrics import accuracy_score - >>> from sklearn.inspection import metric_threshold_curve + >>> from sklearn.metrics import accuracy_score, decision_threshold_curve >>> y_true = np.array([0, 0, 1, 1]) >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) - >>> accuracy_values, thresholds = metric_threshold_curve( + >>> accuracy_values, thresholds = decision_threshold_curve( ... y_true, y_scores, accuracy_score) >>> thresholds array([0.1 , 0.35, 0.4 , 0.8 ]) @@ -107,16 +100,23 @@ def metric_threshold_curve( """ # Check to make sure y_true is valid. y_type = type_of_target(y_true, input_name="y_true") + pos_label = scoring_kwargs.get("pos_label") if not (y_type == "binary" or (y_type == "multiclass" and pos_label is not None)): + if y_type == "multiclass": + raise ValueError( + "In a multiclass scenario, you must pass a `pos_label` to `scoring_kwargs`." + ) raise ValueError("{0} format is not supported".format(y_type)) - check_consistent_length(y_true, y_score, sample_weight) + sample_weight = scoring_kwargs.get("sample_weight") + check_consistent_length( y_true, y_score, sample_weight) y_true = column_or_1d(y_true) y_score = column_or_1d(y_score) assert_all_finite(y_true) assert_all_finite(y_score) # Filter out zero-weighted samples, as they should not impact the result. + if sample_weight is not None: sample_weight = column_or_1d(sample_weight) sample_weight = _check_sample_weight(sample_weight, y_true) @@ -124,7 +124,7 @@ def metric_threshold_curve( y_true = y_true[nonzero_weight_mask] y_score = y_score[nonzero_weight_mask] sample_weight = sample_weight[nonzero_weight_mask] - + pos_label = _check_pos_label_consistency(pos_label, y_true) # Make y_true a boolean vector. @@ -136,14 +136,12 @@ def metric_threshold_curve( y_true = y_true[desc_score_indices] if sample_weight is not None: sample_weight = sample_weight[desc_score_indices] - + + if "sample_weight" in scoring_kwargs: + scoring_kwargs["sample_weight"] = sample_weight + # Logic to see if we need to use all possible thresholds (distinct values). - all_thresholds = False - if threshold_grid is None: - all_thresholds = True - elif isinstance(threshold_grid, int): - if len(set(y_score)) < threshold_grid: - all_thresholds = True + all_thresholds = isinstance(thresholds, int) and len(set(y_score)) < thresholds if all_thresholds: # y_score typically has many tied values. Here we extract @@ -152,25 +150,25 @@ def metric_threshold_curve( distinct_value_indices = np.where(np.diff(y_score))[0] threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] thresholds = y_score[threshold_idxs[::-1]] - elif isinstance(threshold_grid, int): + elif isinstance(thresholds, int): # It takes representative score points to calculate the metric # with these thresholds. thresholds = np.percentile( - list(set(y_score)), np.linspace(0, 100, threshold_grid) + list(set(y_score)), np.linspace(0, 100, thresholds) ) else: - # If threshold_grid is an array then run some checks and sort + # If thresholds is an array then run some checks and sort # it for consistency. - threshold_grid = column_or_1d(threshold_grid) - assert_all_finite(threshold_grid) - thresholds = np.sort(threshold_grid) + thresholds = column_or_1d(thresholds) + assert_all_finite(thresholds) + thresholds = np.sort(thresholds) # For each threshold calculates the metric. metric_values = [] for threshold in thresholds: preds_threshold = (y_score > threshold).astype(int) metric_values.append( - score_func(y_true, preds_threshold, sample_weight=sample_weight) + scoring(y_true, preds_threshold, **scoring_kwargs) ) # TODO: should we multithread the metric calculations? diff --git a/sklearn/inspection/tests/test_metric_threshold_curve.py b/sklearn/metrics/tests/test_decision_threshold.py similarity index 66% rename from sklearn/inspection/tests/test_metric_threshold_curve.py rename to sklearn/metrics/tests/test_decision_threshold.py index a5db6d91c8987..e7e21acf42d1a 100644 --- a/sklearn/inspection/tests/test_metric_threshold_curve.py +++ b/sklearn/metrics/tests/test_decision_threshold.py @@ -5,7 +5,7 @@ from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier -from sklearn.inspection import metric_threshold_curve +from sklearn.metrics import decision_threshold_curve from sklearn.metrics import ( accuracy_score, f1_score, @@ -18,25 +18,18 @@ def test_grid_int_bigger_than_set_then_all(): - """When `threshold_grid` parameter is bigger than the number of unique - `y_score` then `len(thresholds)` should be equal to `len(set(y_score))` - and thresholds should be the same from what we get with - `threshold_grid=None`. + """When `thresholds` parameter is bigger than the number of unique + `y_score` then `len(thresholds)` should be equal to `len(set(y_score))`. """ X, y = make_classification() clf = RandomForestClassifier(n_estimators=10, random_state=42).fit(X, y) y_score = clf.predict_proba(X)[:, 1] - _, thresholds_big_int = metric_threshold_curve( - y, y_score, accuracy_score, threshold_grid=len(set(y_score)) + 1000 + _, thresholds_big_int = decision_threshold_curve( + y, y_score, accuracy_score, thresholds=len(set(y_score)) + 1000 ) - _, thresholds_none = metric_threshold_curve( - y, y_score, accuracy_score, threshold_grid=None - ) - - assert_allclose(thresholds_big_int, thresholds_none) assert len(thresholds_big_int) == len(set(y_score)) @@ -44,9 +37,9 @@ def test_binary_clf_curve_multiclass_error(): rng = check_random_state(404) y_true = rng.randint(0, 3, size=10) y_pred = rng.rand(10) - msg = "multiclass format is not supported" + msg = "you must pass a `pos_label` to `scoring_kwargs`." with pytest.raises(ValueError, match=msg): - metric_threshold_curve(y_true, y_pred, accuracy_score) + decision_threshold_curve(y_true, y_pred, accuracy_score) @pytest.mark.parametrize( @@ -60,13 +53,13 @@ def test_binary_clf_curve_multiclass_error(): accuracy_score, ], ) -def test_metric_threshold_curve_end_points(metric): +def test_decision_threshold_curve_end_points(metric): rng = check_random_state(0) y_true = np.array([0] * 50 + [1] * 50) y_score = rng.normal(3, size=100) min_pred, max_score = min(y_score), max(y_score) - metric_values, _ = metric_threshold_curve(y_true, y_score, metric) + metric_values, _ = decision_threshold_curve(y_true, y_score, metric) assert metric_values[0] == metric(y_true, (y_score > min_pred) * 1) assert metric_values[-1] == metric(y_true, (y_score > max_score) * 1) @@ -82,13 +75,14 @@ def test_zero_sample_weight_equals_excluding(metric): y_score = rng.normal(3, size=100) sample_weight = np.array([0] * 20 + [1] * 80) - metric_values_sw, _ = metric_threshold_curve( - y_true, y_score, metric, sample_weight=sample_weight + scoring_kwargs = {"sample_weight": sample_weight} + metric_values_sw, _ = decision_threshold_curve( + y_true, y_score, metric, scoring_kwargs=scoring_kwargs ) y_true_exclude = y_true[sample_weight != 0] y_score_exclude = y_score[sample_weight != 0] - metric_values_exclude, _ = metric_threshold_curve( + metric_values_exclude, _ = decision_threshold_curve( y_true_exclude, y_score_exclude, metric ) @@ -98,8 +92,8 @@ def test_zero_sample_weight_equals_excluding(metric): def test_len_of_threshold_when_passing_int(): y = [0] * 500 + [1] * 500 y_score = list(range(1000)) - _, thresholds = metric_threshold_curve( - y, y_score, accuracy_score, threshold_grid=13 + _, thresholds = decision_threshold_curve( + y, y_score, accuracy_score, thresholds=13 ) assert len(thresholds) == 13 @@ -110,15 +104,15 @@ def test_passing_the_grid(): y_score = list(range(1000)) grid_sorted = np.array(list(range(200, 300))) - _, thresholds_sorted = metric_threshold_curve( - y, y_score, accuracy_score, threshold_grid=grid_sorted + _, thresholds_sorted = decision_threshold_curve( + y, y_score, accuracy_score, thresholds=grid_sorted ) assert_allclose(grid_sorted, thresholds_sorted) grid_not_sorted = grid_sorted[::-1] - _, thresholds_not_sorted = metric_threshold_curve( - y, y_score, accuracy_score, threshold_grid=grid_not_sorted + _, thresholds_not_sorted = decision_threshold_curve( + y, y_score, accuracy_score, thresholds=grid_not_sorted ) assert_allclose(grid_sorted, thresholds_not_sorted) diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 965f5221526e4..1ddb6421e4834 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -196,7 +196,6 @@ def _check_function_param_validation( "sklearn.feature_selection.mutual_info_classif", "sklearn.feature_selection.mutual_info_regression", "sklearn.feature_selection.r_regression", - "sklearn.inspection.metric_threshold_curve", "sklearn.inspection.partial_dependence", "sklearn.inspection.permutation_importance", "sklearn.isotonic.check_increasing", @@ -240,6 +239,7 @@ def _check_function_param_validation( "sklearn.metrics.d2_tweedie_score", "sklearn.metrics.davies_bouldin_score", "sklearn.metrics.dcg_score", + "sklearn.metrics.decision_threshold_curve", "sklearn.metrics.det_curve", "sklearn.metrics.explained_variance_score", "sklearn.metrics.f1_score", From e7bb2a7571c3357c4237231aec51f8574bf526df Mon Sep 17 00:00:00 2001 From: vitaliset Date: Sat, 8 Jun 2024 00:28:30 -0300 Subject: [PATCH 21/42] applying black suggestions --- sklearn/metrics/_decision_threshold.py | 21 ++++++++----------- .../metrics/tests/test_decision_threshold.py | 6 ++---- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index ac042d18a18d2..bb46e6a2378d2 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -6,7 +6,7 @@ # Authors: ######## # License: BSD 3 clause -from numbers import Integral, Real +from numbers import Integral import numpy as np @@ -104,12 +104,13 @@ def decision_threshold_curve( if not (y_type == "binary" or (y_type == "multiclass" and pos_label is not None)): if y_type == "multiclass": raise ValueError( - "In a multiclass scenario, you must pass a `pos_label` to `scoring_kwargs`." + "In a multiclass scenario, you must pass a `pos_label` \ + to `scoring_kwargs`." ) raise ValueError("{0} format is not supported".format(y_type)) sample_weight = scoring_kwargs.get("sample_weight") - check_consistent_length( y_true, y_score, sample_weight) + check_consistent_length(y_true, y_score, sample_weight) y_true = column_or_1d(y_true) y_score = column_or_1d(y_score) assert_all_finite(y_true) @@ -124,7 +125,7 @@ def decision_threshold_curve( y_true = y_true[nonzero_weight_mask] y_score = y_score[nonzero_weight_mask] sample_weight = sample_weight[nonzero_weight_mask] - + pos_label = _check_pos_label_consistency(pos_label, y_true) # Make y_true a boolean vector. @@ -136,10 +137,10 @@ def decision_threshold_curve( y_true = y_true[desc_score_indices] if sample_weight is not None: sample_weight = sample_weight[desc_score_indices] - + if "sample_weight" in scoring_kwargs: scoring_kwargs["sample_weight"] = sample_weight - + # Logic to see if we need to use all possible thresholds (distinct values). all_thresholds = isinstance(thresholds, int) and len(set(y_score)) < thresholds @@ -153,9 +154,7 @@ def decision_threshold_curve( elif isinstance(thresholds, int): # It takes representative score points to calculate the metric # with these thresholds. - thresholds = np.percentile( - list(set(y_score)), np.linspace(0, 100, thresholds) - ) + thresholds = np.percentile(list(set(y_score)), np.linspace(0, 100, thresholds)) else: # If thresholds is an array then run some checks and sort # it for consistency. @@ -167,9 +166,7 @@ def decision_threshold_curve( metric_values = [] for threshold in thresholds: preds_threshold = (y_score > threshold).astype(int) - metric_values.append( - scoring(y_true, preds_threshold, **scoring_kwargs) - ) + metric_values.append(scoring(y_true, preds_threshold, **scoring_kwargs)) # TODO: should we multithread the metric calculations? return np.array(metric_values), thresholds diff --git a/sklearn/metrics/tests/test_decision_threshold.py b/sklearn/metrics/tests/test_decision_threshold.py index e7e21acf42d1a..a5bc14e67d87d 100644 --- a/sklearn/metrics/tests/test_decision_threshold.py +++ b/sklearn/metrics/tests/test_decision_threshold.py @@ -5,9 +5,9 @@ from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier -from sklearn.metrics import decision_threshold_curve from sklearn.metrics import ( accuracy_score, + decision_threshold_curve, f1_score, fbeta_score, precision_score, @@ -92,9 +92,7 @@ def test_zero_sample_weight_equals_excluding(metric): def test_len_of_threshold_when_passing_int(): y = [0] * 500 + [1] * 500 y_score = list(range(1000)) - _, thresholds = decision_threshold_curve( - y, y_score, accuracy_score, thresholds=13 - ) + _, thresholds = decision_threshold_curve(y, y_score, accuracy_score, thresholds=13) assert len(thresholds) == 13 From 5a8f0c5d28863cc02fbba12969be90a90e705959 Mon Sep 17 00:00:00 2001 From: vitaliset Date: Sat, 8 Jun 2024 01:04:10 -0300 Subject: [PATCH 22/42] update extra stuff for consistency --- sklearn/metrics/_decision_threshold.py | 7 +++++-- sklearn/metrics/tests/test_decision_threshold.py | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index bb46e6a2378d2..6ce3bf1c84302 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -38,8 +38,8 @@ def decision_threshold_curve( y_score, scoring, *, - thresholds=101, - scoring_kwargs={}, + thresholds=100, + scoring_kwargs=None, ): """Compute the threshold-dependent metric of interest per threshold. @@ -98,6 +98,9 @@ def decision_threshold_curve( >>> accuracy_values array([0.75, 0.5 , 0.75, 0.5 ]) """ + if scoring_kwargs is None: + scoring_kwargs = {} + # Check to make sure y_true is valid. y_type = type_of_target(y_true, input_name="y_true") pos_label = scoring_kwargs.get("pos_label") diff --git a/sklearn/metrics/tests/test_decision_threshold.py b/sklearn/metrics/tests/test_decision_threshold.py index a5bc14e67d87d..12f17fb880397 100644 --- a/sklearn/metrics/tests/test_decision_threshold.py +++ b/sklearn/metrics/tests/test_decision_threshold.py @@ -37,7 +37,7 @@ def test_binary_clf_curve_multiclass_error(): rng = check_random_state(404) y_true = rng.randint(0, 3, size=10) y_pred = rng.rand(10) - msg = "you must pass a `pos_label` to `scoring_kwargs`." + msg = "In a multiclass scenario, you must pass " with pytest.raises(ValueError, match=msg): decision_threshold_curve(y_true, y_pred, accuracy_score) @@ -97,6 +97,19 @@ def test_len_of_threshold_when_passing_int(): assert len(thresholds) == 13 +@pytest.mark.parametrize( + "metric, scoring_kwargs", + [ + (f1_score, None), + (f1_score, {}), + (fbeta_score, {"beta": 4}), + ], +) +def test_scoring_kwargs(metric, scoring_kwargs): + y_true = np.array([0] * 50 + [1] * 50) + decision_threshold_curve(y_true, y_true, metric, scoring_kwargs=scoring_kwargs) + + def test_passing_the_grid(): y = [0] * 500 + [1] * 500 y_score = list(range(1000)) From 4fab2a3db9b7524d6fccbbca91f5c335693f9960 Mon Sep 17 00:00:00 2001 From: vitaliset Date: Sat, 8 Jun 2024 01:26:51 -0300 Subject: [PATCH 23/42] removing doc files for now as we need to adapt to pr 29038 --- doc/inspection.rst | 1 - doc/modules/classes.rst | 1917 ------------------------ doc/modules/metric_threshold_curve.rst | 58 - 3 files changed, 1976 deletions(-) delete mode 100644 doc/modules/classes.rst delete mode 100644 doc/modules/metric_threshold_curve.rst diff --git a/doc/inspection.rst b/doc/inspection.rst index 25577fdd25c45..57c1cfc3275e8 100644 --- a/doc/inspection.rst +++ b/doc/inspection.rst @@ -29,4 +29,3 @@ to diagnose issues with model performance. modules/partial_dependence modules/permutation_importance - modules/metric_threshold_curve diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst deleted file mode 100644 index 06eef55f0fa69..0000000000000 --- a/doc/modules/classes.rst +++ /dev/null @@ -1,1917 +0,0 @@ -.. _api_ref: - -============= -API Reference -============= - -This is the class and function reference of scikit-learn. Please refer to -the :ref:`full user guide ` for further details, as the class and -function raw specifications may not be enough to give full guidelines on their -uses. -For reference on concepts repeated across the API, see :ref:`glossary`. - -:mod:`sklearn`: Settings and information tools -============================================== - -.. automodule:: sklearn - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - config_context - get_config - set_config - show_versions - -:mod:`sklearn.base`: Base classes and utility functions -======================================================= - -.. automodule:: sklearn.base - :no-members: - :no-inherited-members: - -Base classes ------------- -.. currentmodule:: sklearn - -.. autosummary:: - :nosignatures: - :toctree: generated/ - :template: class.rst - - base.BaseEstimator - base.BiclusterMixin - base.ClassifierMixin - base.ClusterMixin - base.DensityMixin - base.RegressorMixin - base.TransformerMixin - base.MetaEstimatorMixin - base.OneToOneFeatureMixin - base.OutlierMixin - base.ClassNamePrefixFeaturesOutMixin - feature_selection.SelectorMixin - -Functions ---------- -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - base.clone - base.is_classifier - base.is_regressor - -.. _calibration_ref: - -:mod:`sklearn.calibration`: Probability Calibration -=================================================== - -.. automodule:: sklearn.calibration - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`calibration` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - calibration.CalibratedClassifierCV - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - calibration.calibration_curve - -.. _cluster_ref: - -:mod:`sklearn.cluster`: Clustering -================================== - -.. automodule:: sklearn.cluster - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`clustering` and :ref:`biclustering` sections for -further details. - -Classes -------- -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cluster.AffinityPropagation - cluster.AgglomerativeClustering - cluster.Birch - cluster.DBSCAN - cluster.HDBSCAN - cluster.FeatureAgglomeration - cluster.KMeans - cluster.BisectingKMeans - cluster.MiniBatchKMeans - cluster.MeanShift - cluster.OPTICS - cluster.SpectralClustering - cluster.SpectralBiclustering - cluster.SpectralCoclustering - -Functions ---------- -.. autosummary:: - :toctree: generated/ - :template: function.rst - - cluster.affinity_propagation - cluster.cluster_optics_dbscan - cluster.cluster_optics_xi - cluster.compute_optics_graph - cluster.dbscan - cluster.estimate_bandwidth - cluster.k_means - cluster.kmeans_plusplus - cluster.mean_shift - cluster.spectral_clustering - cluster.ward_tree - -.. _compose_ref: - -:mod:`sklearn.compose`: Composite Estimators -============================================ - -.. automodule:: sklearn.compose - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`combining_estimators` section for further -details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - compose.ColumnTransformer - compose.TransformedTargetRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - compose.make_column_transformer - compose.make_column_selector - -.. _covariance_ref: - -:mod:`sklearn.covariance`: Covariance Estimators -================================================ - -.. automodule:: sklearn.covariance - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`covariance` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - covariance.EmpiricalCovariance - covariance.EllipticEnvelope - covariance.GraphicalLasso - covariance.GraphicalLassoCV - covariance.LedoitWolf - covariance.MinCovDet - covariance.OAS - covariance.ShrunkCovariance - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - covariance.empirical_covariance - covariance.graphical_lasso - covariance.ledoit_wolf - covariance.ledoit_wolf_shrinkage - covariance.oas - covariance.shrunk_covariance - -.. _cross_decomposition_ref: - -:mod:`sklearn.cross_decomposition`: Cross decomposition -======================================================= - -.. automodule:: sklearn.cross_decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_decomposition` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - cross_decomposition.CCA - cross_decomposition.PLSCanonical - cross_decomposition.PLSRegression - cross_decomposition.PLSSVD - -.. _datasets_ref: - -:mod:`sklearn.datasets`: Datasets -================================= - -.. automodule:: sklearn.datasets - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`datasets` section for further details. - -Loaders -------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - datasets.clear_data_home - datasets.dump_svmlight_file - datasets.fetch_20newsgroups - datasets.fetch_20newsgroups_vectorized - datasets.fetch_california_housing - datasets.fetch_covtype - datasets.fetch_kddcup99 - datasets.fetch_lfw_pairs - datasets.fetch_lfw_people - datasets.fetch_olivetti_faces - datasets.fetch_openml - datasets.fetch_rcv1 - datasets.fetch_species_distributions - datasets.get_data_home - datasets.load_breast_cancer - datasets.load_diabetes - datasets.load_digits - datasets.load_files - datasets.load_iris - datasets.load_linnerud - datasets.load_sample_image - datasets.load_sample_images - datasets.load_svmlight_file - datasets.load_svmlight_files - datasets.load_wine - -Samples generator ------------------ - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - datasets.make_biclusters - datasets.make_blobs - datasets.make_checkerboard - datasets.make_circles - datasets.make_classification - datasets.make_friedman1 - datasets.make_friedman2 - datasets.make_friedman3 - datasets.make_gaussian_quantiles - datasets.make_hastie_10_2 - datasets.make_low_rank_matrix - datasets.make_moons - datasets.make_multilabel_classification - datasets.make_regression - datasets.make_s_curve - datasets.make_sparse_coded_signal - datasets.make_sparse_spd_matrix - datasets.make_sparse_uncorrelated - datasets.make_spd_matrix - datasets.make_swiss_roll - - -.. _decomposition_ref: - -:mod:`sklearn.decomposition`: Matrix Decomposition -================================================== - -.. automodule:: sklearn.decomposition - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`decompositions` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - decomposition.DictionaryLearning - decomposition.FactorAnalysis - decomposition.FastICA - decomposition.IncrementalPCA - decomposition.KernelPCA - decomposition.LatentDirichletAllocation - decomposition.MiniBatchDictionaryLearning - decomposition.MiniBatchSparsePCA - decomposition.NMF - decomposition.MiniBatchNMF - decomposition.PCA - decomposition.SparsePCA - decomposition.SparseCoder - decomposition.TruncatedSVD - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - decomposition.dict_learning - decomposition.dict_learning_online - decomposition.fastica - decomposition.non_negative_factorization - decomposition.sparse_encode - -.. _lda_ref: - -:mod:`sklearn.discriminant_analysis`: Discriminant Analysis -=========================================================== - -.. automodule:: sklearn.discriminant_analysis - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`lda_qda` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - discriminant_analysis.LinearDiscriminantAnalysis - discriminant_analysis.QuadraticDiscriminantAnalysis - -.. _dummy_ref: - -:mod:`sklearn.dummy`: Dummy estimators -====================================== - -.. automodule:: sklearn.dummy - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`model_evaluation` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - dummy.DummyClassifier - dummy.DummyRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - -.. _ensemble_ref: - -:mod:`sklearn.ensemble`: Ensemble Methods -========================================= - -.. automodule:: sklearn.ensemble - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`ensemble` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - ensemble.AdaBoostClassifier - ensemble.AdaBoostRegressor - ensemble.BaggingClassifier - ensemble.BaggingRegressor - ensemble.ExtraTreesClassifier - ensemble.ExtraTreesRegressor - ensemble.GradientBoostingClassifier - ensemble.GradientBoostingRegressor - ensemble.IsolationForest - ensemble.RandomForestClassifier - ensemble.RandomForestRegressor - ensemble.RandomTreesEmbedding - ensemble.StackingClassifier - ensemble.StackingRegressor - ensemble.VotingClassifier - ensemble.VotingRegressor - ensemble.HistGradientBoostingRegressor - ensemble.HistGradientBoostingClassifier - - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - -.. _exceptions_ref: - -:mod:`sklearn.exceptions`: Exceptions and warnings -================================================== - -.. automodule:: sklearn.exceptions - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - exceptions.ConvergenceWarning - exceptions.DataConversionWarning - exceptions.DataDimensionalityWarning - exceptions.EfficiencyWarning - exceptions.FitFailedWarning - exceptions.InconsistentVersionWarning - exceptions.NotFittedError - exceptions.UndefinedMetricWarning - - -:mod:`sklearn.experimental`: Experimental -========================================= - -.. automodule:: sklearn.experimental - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - - experimental.enable_iterative_imputer - experimental.enable_halving_search_cv - - -.. _feature_extraction_ref: - -:mod:`sklearn.feature_extraction`: Feature Extraction -===================================================== - -.. automodule:: sklearn.feature_extraction - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`feature_extraction` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_extraction.DictVectorizer - feature_extraction.FeatureHasher - -From images ------------ - -.. automodule:: sklearn.feature_extraction.image - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - feature_extraction.image.extract_patches_2d - feature_extraction.image.grid_to_graph - feature_extraction.image.img_to_graph - feature_extraction.image.reconstruct_from_patches_2d - - :template: class.rst - - feature_extraction.image.PatchExtractor - -.. _text_feature_extraction_ref: - -From text ---------- - -.. automodule:: sklearn.feature_extraction.text - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_extraction.text.CountVectorizer - feature_extraction.text.HashingVectorizer - feature_extraction.text.TfidfTransformer - feature_extraction.text.TfidfVectorizer - - -.. _feature_selection_ref: - -:mod:`sklearn.feature_selection`: Feature Selection -=================================================== - -.. automodule:: sklearn.feature_selection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`feature_selection` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - feature_selection.GenericUnivariateSelect - feature_selection.SelectPercentile - feature_selection.SelectKBest - feature_selection.SelectFpr - feature_selection.SelectFdr - feature_selection.SelectFromModel - feature_selection.SelectFwe - feature_selection.SequentialFeatureSelector - feature_selection.RFE - feature_selection.RFECV - feature_selection.VarianceThreshold - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - feature_selection.chi2 - feature_selection.f_classif - feature_selection.f_regression - feature_selection.r_regression - feature_selection.mutual_info_classif - feature_selection.mutual_info_regression - - -.. _gaussian_process_ref: - -:mod:`sklearn.gaussian_process`: Gaussian Processes -=================================================== - -.. automodule:: sklearn.gaussian_process - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`gaussian_process` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - gaussian_process.GaussianProcessClassifier - gaussian_process.GaussianProcessRegressor - -Kernels -------- - -.. automodule:: sklearn.gaussian_process.kernels - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class_with_call.rst - - gaussian_process.kernels.CompoundKernel - gaussian_process.kernels.ConstantKernel - gaussian_process.kernels.DotProduct - gaussian_process.kernels.ExpSineSquared - gaussian_process.kernels.Exponentiation - gaussian_process.kernels.Hyperparameter - gaussian_process.kernels.Kernel - gaussian_process.kernels.Matern - gaussian_process.kernels.PairwiseKernel - gaussian_process.kernels.Product - gaussian_process.kernels.RBF - gaussian_process.kernels.RationalQuadratic - gaussian_process.kernels.Sum - gaussian_process.kernels.WhiteKernel - - -.. _impute_ref: - -:mod:`sklearn.impute`: Impute -============================= - -.. automodule:: sklearn.impute - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`Impute` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - impute.SimpleImputer - impute.IterativeImputer - impute.MissingIndicator - impute.KNNImputer - - -.. _inspection_ref: - -:mod:`sklearn.inspection`: Inspection -===================================== - -.. automodule:: sklearn.inspection - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - inspection.partial_dependence - inspection.permutation_importance - inspection.metric_threshold_curve - -Plotting --------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_only_from_estimator.rst - - inspection.DecisionBoundaryDisplay - inspection.PartialDependenceDisplay - -.. _isotonic_ref: - -:mod:`sklearn.isotonic`: Isotonic regression -============================================ - -.. automodule:: sklearn.isotonic - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`isotonic` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - isotonic.IsotonicRegression - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - isotonic.check_increasing - isotonic.isotonic_regression - - -.. _kernel_approximation_ref: - -:mod:`sklearn.kernel_approximation`: Kernel Approximation -========================================================= - -.. automodule:: sklearn.kernel_approximation - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`kernel_approximation` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - kernel_approximation.AdditiveChi2Sampler - kernel_approximation.Nystroem - kernel_approximation.PolynomialCountSketch - kernel_approximation.RBFSampler - kernel_approximation.SkewedChi2Sampler - -.. _kernel_ridge_ref: - -:mod:`sklearn.kernel_ridge`: Kernel Ridge Regression -==================================================== - -.. automodule:: sklearn.kernel_ridge - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`kernel_ridge` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - kernel_ridge.KernelRidge - -.. _linear_model_ref: - -:mod:`sklearn.linear_model`: Linear Models -========================================== - -.. automodule:: sklearn.linear_model - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`linear_model` section for further details. - -The following subsections are only rough guidelines: the same estimator can -fall into multiple categories, depending on its parameters. - -.. currentmodule:: sklearn - -Linear classifiers ------------------- -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.LogisticRegression - linear_model.LogisticRegressionCV - linear_model.PassiveAggressiveClassifier - linear_model.Perceptron - linear_model.RidgeClassifier - linear_model.RidgeClassifierCV - linear_model.SGDClassifier - linear_model.SGDOneClassSVM - -Classical linear regressors ---------------------------- - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.LinearRegression - linear_model.Ridge - linear_model.RidgeCV - linear_model.SGDRegressor - -Regressors with variable selection ----------------------------------- - -The following estimators have built-in variable selection fitting -procedures, but any estimator using a L1 or elastic-net penalty also -performs variable selection: typically :class:`~linear_model.SGDRegressor` -or :class:`~sklearn.linear_model.SGDClassifier` with an appropriate penalty. - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.ElasticNet - linear_model.ElasticNetCV - linear_model.Lars - linear_model.LarsCV - linear_model.Lasso - linear_model.LassoCV - linear_model.LassoLars - linear_model.LassoLarsCV - linear_model.LassoLarsIC - linear_model.OrthogonalMatchingPursuit - linear_model.OrthogonalMatchingPursuitCV - -Bayesian regressors -------------------- - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.ARDRegression - linear_model.BayesianRidge - -Multi-task linear regressors with variable selection ----------------------------------------------------- - -These estimators fit multiple regression problems (or tasks) jointly, while -inducing sparse coefficients. While the inferred coefficients may differ -between the tasks, they are constrained to agree on the features that are -selected (non-zero coefficients). - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.MultiTaskElasticNet - linear_model.MultiTaskElasticNetCV - linear_model.MultiTaskLasso - linear_model.MultiTaskLassoCV - -Outlier-robust regressors -------------------------- - -Any estimator using the Huber loss would also be robust to outliers, e.g. -:class:`~linear_model.SGDRegressor` with ``loss='huber'``. - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.HuberRegressor - linear_model.QuantileRegressor - linear_model.RANSACRegressor - linear_model.TheilSenRegressor - -Generalized linear models (GLM) for regression ----------------------------------------------- - -These models allow for response variables to have error distributions other -than a normal distribution: - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - linear_model.PoissonRegressor - linear_model.TweedieRegressor - linear_model.GammaRegressor - - -Miscellaneous -------------- - -.. autosummary:: - :toctree: generated/ - :template: classes.rst - - linear_model.PassiveAggressiveRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - linear_model.enet_path - linear_model.lars_path - linear_model.lars_path_gram - linear_model.lasso_path - linear_model.orthogonal_mp - linear_model.orthogonal_mp_gram - linear_model.ridge_regression - - -.. _manifold_ref: - -:mod:`sklearn.manifold`: Manifold Learning -========================================== - -.. automodule:: sklearn.manifold - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`manifold` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - manifold.Isomap - manifold.LocallyLinearEmbedding - manifold.MDS - manifold.SpectralEmbedding - manifold.TSNE - -.. autosummary:: - :toctree: generated - :template: function.rst - - manifold.locally_linear_embedding - manifold.smacof - manifold.spectral_embedding - manifold.trustworthiness - - -.. _metrics_ref: - -:mod:`sklearn.metrics`: Metrics -=============================== - -See the :ref:`model_evaluation` section and the :ref:`metrics` section of the -user guide for further details. - -.. automodule:: sklearn.metrics - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -Model Selection Interface -------------------------- -See the :ref:`scoring_parameter` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.check_scoring - metrics.get_scorer - metrics.get_scorer_names - metrics.make_scorer - -Classification metrics ----------------------- - -See the :ref:`classification_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.accuracy_score - metrics.auc - metrics.average_precision_score - metrics.balanced_accuracy_score - metrics.brier_score_loss - metrics.class_likelihood_ratios - metrics.classification_report - metrics.cohen_kappa_score - metrics.confusion_matrix - metrics.d2_log_loss_score - metrics.dcg_score - metrics.det_curve - metrics.f1_score - metrics.fbeta_score - metrics.hamming_loss - metrics.hinge_loss - metrics.jaccard_score - metrics.log_loss - metrics.matthews_corrcoef - metrics.multilabel_confusion_matrix - metrics.ndcg_score - metrics.precision_recall_curve - metrics.precision_recall_fscore_support - metrics.precision_score - metrics.recall_score - metrics.roc_auc_score - metrics.roc_curve - metrics.top_k_accuracy_score - metrics.zero_one_loss - -Regression metrics ------------------- - -See the :ref:`regression_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.explained_variance_score - metrics.max_error - metrics.mean_absolute_error - metrics.mean_squared_error - metrics.mean_squared_log_error - metrics.median_absolute_error - metrics.mean_absolute_percentage_error - metrics.r2_score - metrics.root_mean_squared_log_error - metrics.root_mean_squared_error - metrics.mean_poisson_deviance - metrics.mean_gamma_deviance - metrics.mean_tweedie_deviance - metrics.d2_tweedie_score - metrics.mean_pinball_loss - metrics.d2_pinball_score - metrics.d2_absolute_error_score - -Multilabel ranking metrics --------------------------- -See the :ref:`multilabel_ranking_metrics` section of the user guide for further -details. - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.coverage_error - metrics.label_ranking_average_precision_score - metrics.label_ranking_loss - - -Clustering metrics ------------------- - -See the :ref:`clustering_evaluation` section of the user guide for further -details. - -.. automodule:: sklearn.metrics.cluster - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.adjusted_mutual_info_score - metrics.adjusted_rand_score - metrics.calinski_harabasz_score - metrics.davies_bouldin_score - metrics.completeness_score - metrics.cluster.contingency_matrix - metrics.cluster.pair_confusion_matrix - metrics.fowlkes_mallows_score - metrics.homogeneity_completeness_v_measure - metrics.homogeneity_score - metrics.mutual_info_score - metrics.normalized_mutual_info_score - metrics.rand_score - metrics.silhouette_score - metrics.silhouette_samples - metrics.v_measure_score - -Biclustering metrics --------------------- - -See the :ref:`biclustering_evaluation` section of the user guide for -further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.consensus_score - -Distance metrics ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - metrics.DistanceMetric - -Pairwise metrics ----------------- - -See the :ref:`metrics` section of the user guide for further details. - -.. automodule:: sklearn.metrics.pairwise - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - metrics.pairwise.additive_chi2_kernel - metrics.pairwise.chi2_kernel - metrics.pairwise.cosine_similarity - metrics.pairwise.cosine_distances - metrics.pairwise.distance_metrics - metrics.pairwise.euclidean_distances - metrics.pairwise.haversine_distances - metrics.pairwise.kernel_metrics - metrics.pairwise.laplacian_kernel - metrics.pairwise.linear_kernel - metrics.pairwise.manhattan_distances - metrics.pairwise.nan_euclidean_distances - metrics.pairwise.pairwise_kernels - metrics.pairwise.polynomial_kernel - metrics.pairwise.rbf_kernel - metrics.pairwise.sigmoid_kernel - metrics.pairwise.paired_euclidean_distances - metrics.pairwise.paired_manhattan_distances - metrics.pairwise.paired_cosine_distances - metrics.pairwise.paired_distances - metrics.pairwise_distances - metrics.pairwise_distances_argmin - metrics.pairwise_distances_argmin_min - metrics.pairwise_distances_chunked - - -Plotting --------- - -See the :ref:`visualizations` section of the user guide for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_all_class_methods.rst - - metrics.ConfusionMatrixDisplay - metrics.DetCurveDisplay - metrics.PrecisionRecallDisplay - metrics.PredictionErrorDisplay - metrics.RocCurveDisplay - calibration.CalibrationDisplay - -.. _mixture_ref: - -:mod:`sklearn.mixture`: Gaussian Mixture Models -=============================================== - -.. automodule:: sklearn.mixture - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`mixture` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - mixture.BayesianGaussianMixture - mixture.GaussianMixture - -.. _modelselection_ref: - -:mod:`sklearn.model_selection`: Model Selection -=============================================== - -.. automodule:: sklearn.model_selection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`cross_validation`, :ref:`grid_search` and -:ref:`learning_curve` sections for further details. - -Splitter Classes ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.GroupKFold - model_selection.GroupShuffleSplit - model_selection.KFold - model_selection.LeaveOneGroupOut - model_selection.LeavePGroupsOut - model_selection.LeaveOneOut - model_selection.LeavePOut - model_selection.PredefinedSplit - model_selection.RepeatedKFold - model_selection.RepeatedStratifiedKFold - model_selection.ShuffleSplit - model_selection.StratifiedKFold - model_selection.StratifiedShuffleSplit - model_selection.StratifiedGroupKFold - model_selection.TimeSeriesSplit - -Splitter Functions ------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.check_cv - model_selection.train_test_split - -.. _hyper_parameter_optimizers: - -Hyper-parameter optimizers --------------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.GridSearchCV - model_selection.HalvingGridSearchCV - model_selection.ParameterGrid - model_selection.ParameterSampler - model_selection.RandomizedSearchCV - model_selection.HalvingRandomSearchCV - -Post-fit model tuning ---------------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - model_selection.FixedThresholdClassifier - model_selection.TunedThresholdClassifierCV - -Model validation ----------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - model_selection.cross_validate - model_selection.cross_val_predict - model_selection.cross_val_score - model_selection.learning_curve - model_selection.permutation_test_score - model_selection.validation_curve - -Visualization -------------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: display_only_from_estimator.rst - - model_selection.LearningCurveDisplay - model_selection.ValidationCurveDisplay - -.. _multiclass_ref: - -:mod:`sklearn.multiclass`: Multiclass classification -==================================================== - -.. automodule:: sklearn.multiclass - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`multiclass_classification` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - multiclass.OneVsRestClassifier - multiclass.OneVsOneClassifier - multiclass.OutputCodeClassifier - -.. _multioutput_ref: - -:mod:`sklearn.multioutput`: Multioutput regression and classification -===================================================================== - -.. automodule:: sklearn.multioutput - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`multilabel_classification`, -:ref:`multiclass_multioutput_classification`, and -:ref:`multioutput_regression` sections for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated - :template: class.rst - - multioutput.ClassifierChain - multioutput.MultiOutputRegressor - multioutput.MultiOutputClassifier - multioutput.RegressorChain - -.. _naive_bayes_ref: - -:mod:`sklearn.naive_bayes`: Naive Bayes -======================================= - -.. automodule:: sklearn.naive_bayes - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`naive_bayes` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - naive_bayes.BernoulliNB - naive_bayes.CategoricalNB - naive_bayes.ComplementNB - naive_bayes.GaussianNB - naive_bayes.MultinomialNB - - -.. _neighbors_ref: - -:mod:`sklearn.neighbors`: Nearest Neighbors -=========================================== - -.. automodule:: sklearn.neighbors - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`neighbors` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - neighbors.BallTree - neighbors.KDTree - neighbors.KernelDensity - neighbors.KNeighborsClassifier - neighbors.KNeighborsRegressor - neighbors.KNeighborsTransformer - neighbors.LocalOutlierFactor - neighbors.RadiusNeighborsClassifier - neighbors.RadiusNeighborsRegressor - neighbors.RadiusNeighborsTransformer - neighbors.NearestCentroid - neighbors.NearestNeighbors - neighbors.NeighborhoodComponentsAnalysis - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - neighbors.kneighbors_graph - neighbors.radius_neighbors_graph - neighbors.sort_graph_by_row_values - -.. _neural_network_ref: - -:mod:`sklearn.neural_network`: Neural network models -==================================================== - -.. automodule:: sklearn.neural_network - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`neural_networks_supervised` and :ref:`neural_networks_unsupervised` sections for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - neural_network.BernoulliRBM - neural_network.MLPClassifier - neural_network.MLPRegressor - -.. _pipeline_ref: - -:mod:`sklearn.pipeline`: Pipeline -================================= - -.. automodule:: sklearn.pipeline - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`combining_estimators` section for further -details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - pipeline.FeatureUnion - pipeline.Pipeline - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - pipeline.make_pipeline - pipeline.make_union - -.. _preprocessing_ref: - -:mod:`sklearn.preprocessing`: Preprocessing and Normalization -============================================================= - -.. automodule:: sklearn.preprocessing - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`preprocessing` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - preprocessing.Binarizer - preprocessing.FunctionTransformer - preprocessing.KBinsDiscretizer - preprocessing.KernelCenterer - preprocessing.LabelBinarizer - preprocessing.LabelEncoder - preprocessing.MultiLabelBinarizer - preprocessing.MaxAbsScaler - preprocessing.MinMaxScaler - preprocessing.Normalizer - preprocessing.OneHotEncoder - preprocessing.OrdinalEncoder - preprocessing.PolynomialFeatures - preprocessing.PowerTransformer - preprocessing.QuantileTransformer - preprocessing.RobustScaler - preprocessing.SplineTransformer - preprocessing.StandardScaler - preprocessing.TargetEncoder - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - preprocessing.add_dummy_feature - preprocessing.binarize - preprocessing.label_binarize - preprocessing.maxabs_scale - preprocessing.minmax_scale - preprocessing.normalize - preprocessing.quantile_transform - preprocessing.robust_scale - preprocessing.scale - preprocessing.power_transform - - -.. _random_projection_ref: - -:mod:`sklearn.random_projection`: Random projection -=================================================== - -.. automodule:: sklearn.random_projection - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`random_projection` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - random_projection.GaussianRandomProjection - random_projection.SparseRandomProjection - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - random_projection.johnson_lindenstrauss_min_dim - - -.. _semi_supervised_ref: - -:mod:`sklearn.semi_supervised`: Semi-Supervised Learning -======================================================== - -.. automodule:: sklearn.semi_supervised - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`semi_supervised` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - semi_supervised.LabelPropagation - semi_supervised.LabelSpreading - semi_supervised.SelfTrainingClassifier - - -.. _svm_ref: - -:mod:`sklearn.svm`: Support Vector Machines -=========================================== - -.. automodule:: sklearn.svm - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`svm` section for further details. - -Estimators ----------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - svm.LinearSVC - svm.LinearSVR - svm.NuSVC - svm.NuSVR - svm.OneClassSVM - svm.SVC - svm.SVR - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - svm.l1_min_c - -.. _tree_ref: - -:mod:`sklearn.tree`: Decision Trees -=================================== - -.. automodule:: sklearn.tree - :no-members: - :no-inherited-members: - -**User guide:** See the :ref:`tree` section for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - tree.DecisionTreeClassifier - tree.DecisionTreeRegressor - tree.ExtraTreeClassifier - tree.ExtraTreeRegressor - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - tree.export_graphviz - tree.export_text - -Plotting --------- - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - tree.plot_tree - -.. _utils_ref: - -:mod:`sklearn.utils`: Utilities -=============================== - -.. automodule:: sklearn.utils - :no-members: - :no-inherited-members: - -**Developer guide:** See the :ref:`developers-utils` page for further details. - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.Bunch - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.as_float_array - utils.assert_all_finite - utils.deprecated - utils.estimator_html_repr - utils.gen_batches - utils.gen_even_slices - utils.indexable - utils.murmurhash3_32 - utils.resample - utils._safe_indexing - utils.safe_mask - utils.safe_sqr - utils.shuffle - -Input and parameter validation ------------------------------- - -.. automodule:: sklearn.utils.validation - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.check_X_y - utils.check_array - utils.check_scalar - utils.check_consistent_length - utils.check_random_state - utils.validation.check_is_fitted - utils.validation.check_memory - utils.validation.check_symmetric - utils.validation.column_or_1d - utils.validation.has_fit_parameter - -Utilities used in meta-estimators ---------------------------------- - -.. automodule:: sklearn.utils.metaestimators - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.metaestimators.available_if - -Utilities to handle weights based on class labels -------------------------------------------------- - -.. automodule:: sklearn.utils.class_weight - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.class_weight.compute_class_weight - utils.class_weight.compute_sample_weight - -Utilities to deal with multiclass target in classifiers -------------------------------------------------------- - -.. automodule:: sklearn.utils.multiclass - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.multiclass.type_of_target - utils.multiclass.is_multilabel - utils.multiclass.unique_labels - -Utilities for optimal mathematical operations ---------------------------------------------- - -.. automodule:: sklearn.utils.extmath - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.extmath.safe_sparse_dot - utils.extmath.randomized_range_finder - utils.extmath.randomized_svd - utils.extmath.fast_logdet - utils.extmath.density - utils.extmath.weighted_mode - -Utilities to work with sparse matrices and arrays -------------------------------------------------- - -.. automodule:: sklearn.utils.sparsefuncs - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.sparsefuncs.incr_mean_variance_axis - utils.sparsefuncs.inplace_column_scale - utils.sparsefuncs.inplace_row_scale - utils.sparsefuncs.inplace_swap_row - utils.sparsefuncs.inplace_swap_column - utils.sparsefuncs.mean_variance_axis - utils.sparsefuncs.inplace_csr_column_scale - -.. automodule:: sklearn.utils.sparsefuncs_fast - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.sparsefuncs_fast.inplace_csr_row_normalize_l1 - utils.sparsefuncs_fast.inplace_csr_row_normalize_l2 - -Utilities to work with graphs ------------------------------ - -.. automodule:: sklearn.utils.graph - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.graph.single_source_shortest_path_length - -Utilities for random sampling ------------------------------ - -.. automodule:: sklearn.utils.random - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.random.sample_without_replacement - - -Utilities to operate on arrays ------------------------------- - -.. automodule:: sklearn.utils.arrayfuncs - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.arrayfuncs.min_pos - -Metadata routing ----------------- - -.. automodule:: sklearn.utils.metadata_routing - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.metadata_routing.get_routing_for_object - utils.metadata_routing.process_routing - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.metadata_routing.MetadataRouter - utils.metadata_routing.MetadataRequest - utils.metadata_routing.MethodMapping - -Scikit-learn object discovery ------------------------------ - -.. automodule:: sklearn.utils.discovery - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.discovery.all_estimators - utils.discovery.all_displays - utils.discovery.all_functions - -Scikit-learn compatibility checker ----------------------------------- - -.. automodule:: sklearn.utils.estimator_checks - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.estimator_checks.check_estimator - utils.estimator_checks.parametrize_with_checks - -Utilities for parallel computing --------------------------------- - -.. automodule:: sklearn.utils.parallel - :no-members: - :no-inherited-members: - -.. currentmodule:: sklearn - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - utils.parallel.delayed - utils.parallel_backend - utils.register_parallel_backend - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - utils.parallel.Parallel - - -Recently deprecated -=================== diff --git a/doc/modules/metric_threshold_curve.rst b/doc/modules/metric_threshold_curve.rst deleted file mode 100644 index baeb48079df9e..0000000000000 --- a/doc/modules/metric_threshold_curve.rst +++ /dev/null @@ -1,58 +0,0 @@ - -.. _metric_threshold_curve: - -Metric threshold curve -====================== - -.. currentmodule:: sklearn.inspection - -TODO: It makes much more sense to move this to -`doc\modules\classification_threshold.rst`. Let's finish the code before -that. :) - -Metric threshold curve is a model inspection technique that can be used -for any :term:`fitted` binary classification :term:`estimator`. The metric -threshold curve is defined to be how the threshold-dependent metric behaves -when we change the decision threshold. - -Let's consider the following trained binary classification model:: - - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> from sklearn.datasets import make_classification - >>> from sklearn.ensemble import RandomForestClassifier - >>> from sklearn.model_selection import train_test_split - >>> from sklearn.metrics import fbeta_score - >>> from functools import partial - - >>> X, y = make_classification( - ... n_samples=10_000, weights=(0.95, ), random_state=42) - - >>> X_train_clf, X_test, y_train_clf, y_test = train_test_split( - ... X, y, random_state=42, stratify=y) - >>> X_train_clf, X_train_thr, y_train_clf, y_train_thr = train_test_split( - ... X_train_clf, y_train_clf, random_state=42, stratify=y_train_clf) - - >>> model = RandomForestClassifier(random_state=42).fit(X_train_clf, y_train_clf) - - >>> fbeta_score(y_test, model.predict(X_test), beta=2) - 0.462... - -Its validation performance, measured via the threshold-dependent metric f2 -score, is suboptimal because of the default threshold of 0.5. We can futher -look into the behaviour of that metric with:: - - >>> from sklearn.inspection import metric_threshold_curve - >>> predict_proba_thr = model.predict_proba(X_train_thr)[:, 1] - >>> f2_values, thresholds = metric_threshold_curve( - ... y_train_thr, predict_proba_thr, partial(fbeta_score, beta=2)) - - >>> best_thr = thresholds[np.argmax(f2_values)] - >>> best_thr - 0.21 - - >>> new_predict_test = (model.predict_proba(X_test)[:, 1] > best_thr).astype(int) - >>> fbeta_score(y_test, new_predict_test, beta=2) - 0.719... - -Note that the new choosen threshold optimizes the f2 score in the test set. From f1dc0e8f391300714698dbec7a0b038e948cae57 Mon Sep 17 00:00:00 2001 From: Carlo Lemos <55899543+vitaliset@users.noreply.github.com> Date: Mon, 29 Jul 2024 22:53:09 -0300 Subject: [PATCH 24/42] Update _decision_threshold.py to add authors --- sklearn/metrics/_decision_threshold.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index 6ce3bf1c84302..ed3283c22e35e 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -1,10 +1,14 @@ -"""Metrics per threshold curves are used to assess performance on binary -classification task given threshold grid. One can undestand the behaviour of -threshold-dependent metrics when changing the threshold. +"""Metric per threshold curve to assess binary classification performance. + +Given threshold grid, one can undestand the behaviour of threshold-dependent +metrics when changing the threshold. In imbalanced scenarios or +cost-sensitive learning, a 0.5 threshold may not be optimal and tools like +this can help you visualize how the performance changes. """ -# Authors: ######## -# License: BSD 3 clause +# Authors: The scikit-learn developers +# SPDX-License-Identifier: BSD-3-Clause + from numbers import Integral From 02842517d13db191851262f055bdf3ef8b0de080 Mon Sep 17 00:00:00 2001 From: vitaliset Date: Tue, 30 Jul 2024 01:18:36 -0300 Subject: [PATCH 25/42] towards using _curvescorer in the new decision threshold function. missing TODOs --- sklearn/metrics/_decision_threshold.py | 124 +++-------------- sklearn/metrics/_scorer.py | 95 +++++++++---- .../metrics/tests/test_decision_threshold.py | 130 +++++++++--------- .../_classification_threshold.py | 2 +- 4 files changed, 161 insertions(+), 190 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index ed3283c22e35e..fab20e2730fe0 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -9,18 +9,10 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause - from numbers import Integral -import numpy as np - -from ..utils import assert_all_finite, check_consistent_length, column_or_1d from ..utils._param_validation import Interval, validate_params -from ..utils.multiclass import type_of_target -from ..utils.validation import ( - _check_pos_label_consistency, - _check_sample_weight, -) +from ._scorer import _CurveScorer @validate_params( @@ -33,7 +25,6 @@ "array-like", None, ], - "scoring_kwargs": [dict, None], }, prefer_skip_nested_validation=True, ) @@ -41,9 +32,7 @@ def decision_threshold_curve( y_true, y_score, scoring, - *, thresholds=100, - scoring_kwargs=None, ): """Compute the threshold-dependent metric of interest per threshold. @@ -51,7 +40,7 @@ def decision_threshold_curve( Read more in the :ref:`User Guide `. - .. versionadded:: 1.3 + .. versionadded:: 1.6 Parameters ---------- @@ -61,23 +50,24 @@ def decision_threshold_curve( y_score : array-like of shape (n_samples,), default=None Estimated probabilities or output of a decision function. - scoring : callable - Threshold-dependent score function (or loss function) with signature - `scoring(y, y_pred, **scoring_kwargs)`. + scoring : callable, default=None + The objective metric to be estimated. It should be a callable object created + with :func:`~sklearn.metrics.make_scorer`. + # TODO(Carlo): Change it to also just be a function callable. In this case, + # transform it in a scorer inside the function. - thresholds : array-like or int, default=101 - Values of threhsold for each score calculation. If int then - `thresholds` percentiles of `y_score` are selected. If int is lower - then `len(set(y_score))` then all possible thresholds are selected. - - scoring_kwargs : dict, default=None - Keyword arguments to pass to specified `scoring` function. + thresholds : int or array-like, default=100 + Related to the number of decision thresholds for which we want to compute the + score. If an integer, it will be used to generate `thresholds` thresholds + uniformly distributed between the minimum and maximum of `y_score`. If an + array-like, it will be used as the thresholds. Returns ------- metric_values : ndarray of shape (n_thresholds,) - Score value for each threshold. At index i being the value of the + The scores associated to each threshold. At index i being the value of the theshold-dependent metric for predictions score >= thresholds[i]. + # TODO(Carlo) Check if > or >= thresholds : ndarray of shape (n_thresholds,) Ascending score values used as thresholds. @@ -89,91 +79,21 @@ def decision_threshold_curve( det_curve : Compute error rates for different probability thresholds. roc_curve : Compute Receiver operating characteristic (ROC) curve. - Examples + Examples #TODO(Carlo) change the example and fix threshold. -------- >>> import numpy as np >>> from sklearn.metrics import accuracy_score, decision_threshold_curve >>> y_true = np.array([0, 0, 1, 1]) - >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8]) + >>> y_score = np.array([0.1, 0.4, 0.35, 0.8]) >>> accuracy_values, thresholds = decision_threshold_curve( - ... y_true, y_scores, accuracy_score) + ... y_true, y_score, accuracy_score) >>> thresholds array([0.1 , 0.35, 0.4 , 0.8 ]) >>> accuracy_values array([0.75, 0.5 , 0.75, 0.5 ]) """ - if scoring_kwargs is None: - scoring_kwargs = {} - - # Check to make sure y_true is valid. - y_type = type_of_target(y_true, input_name="y_true") - pos_label = scoring_kwargs.get("pos_label") - if not (y_type == "binary" or (y_type == "multiclass" and pos_label is not None)): - if y_type == "multiclass": - raise ValueError( - "In a multiclass scenario, you must pass a `pos_label` \ - to `scoring_kwargs`." - ) - raise ValueError("{0} format is not supported".format(y_type)) - - sample_weight = scoring_kwargs.get("sample_weight") - check_consistent_length(y_true, y_score, sample_weight) - y_true = column_or_1d(y_true) - y_score = column_or_1d(y_score) - assert_all_finite(y_true) - assert_all_finite(y_score) - - # Filter out zero-weighted samples, as they should not impact the result. - - if sample_weight is not None: - sample_weight = column_or_1d(sample_weight) - sample_weight = _check_sample_weight(sample_weight, y_true) - nonzero_weight_mask = sample_weight != 0 - y_true = y_true[nonzero_weight_mask] - y_score = y_score[nonzero_weight_mask] - sample_weight = sample_weight[nonzero_weight_mask] - - pos_label = _check_pos_label_consistency(pos_label, y_true) - - # Make y_true a boolean vector. - y_true = y_true == pos_label - - # Sort scores and corresponding truth values. - desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1] - y_score = y_score[desc_score_indices] - y_true = y_true[desc_score_indices] - if sample_weight is not None: - sample_weight = sample_weight[desc_score_indices] - - if "sample_weight" in scoring_kwargs: - scoring_kwargs["sample_weight"] = sample_weight - - # Logic to see if we need to use all possible thresholds (distinct values). - all_thresholds = isinstance(thresholds, int) and len(set(y_score)) < thresholds - - if all_thresholds: - # y_score typically has many tied values. Here we extract - # the indices associated with the distinct values. We also - # concatenate a value for the end of the curve. - distinct_value_indices = np.where(np.diff(y_score))[0] - threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] - thresholds = y_score[threshold_idxs[::-1]] - elif isinstance(thresholds, int): - # It takes representative score points to calculate the metric - # with these thresholds. - thresholds = np.percentile(list(set(y_score)), np.linspace(0, 100, thresholds)) - else: - # If thresholds is an array then run some checks and sort - # it for consistency. - thresholds = column_or_1d(thresholds) - assert_all_finite(thresholds) - thresholds = np.sort(thresholds) - - # For each threshold calculates the metric. - metric_values = [] - for threshold in thresholds: - preds_threshold = (y_score > threshold).astype(int) - metric_values.append(scoring(y_true, preds_threshold, **scoring_kwargs)) - # TODO: should we multithread the metric calculations? - - return np.array(metric_values), thresholds + # if scoring is function ... transform into scorer (do I need an estimator?) + curve_scorer = _CurveScorer.from_scorer(scoring, thresholds) + thresholds, metric_values = curve_scorer._score_given_prediction(y_score) + + return metric_values, thresholds diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 76ad55514b8c2..853bc3b7f6f72 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -28,6 +28,7 @@ from ..base import is_regressor from ..utils import Bunch +from ..utils._encode import _unique from ..utils._param_validation import HasMethods, Hidden, StrOptions, validate_params from ..utils._response import _get_response_values from ..utils.metadata_routing import ( @@ -1122,11 +1123,12 @@ class _CurveScorer(_BaseScorer): uniformly distributed between the minimum and maximum predicted scores. If an array-like, it will be used as the thresholds. - response_method : str - The method to call on the estimator to get the response values. + response_method : str, default=None + The method to call on the estimator to get the response values. If value is set + to `None`, then """ - def __init__(self, score_func, sign, kwargs, thresholds, response_method): + def __init__(self, score_func, sign, kwargs, thresholds, response_method=None): super().__init__( score_func=score_func, sign=sign, @@ -1136,19 +1138,72 @@ def __init__(self, score_func, sign, kwargs, thresholds, response_method): self._thresholds = thresholds @classmethod - def from_scorer(cls, scorer, response_method, thresholds): + def from_scorer(cls, scorer, thresholds, response_method=None): """Create a continuous scorer from a normal scorer.""" instance = cls( score_func=scorer._score_func, sign=scorer._sign, - response_method=response_method, thresholds=thresholds, + response_method=response_method, kwargs=scorer._kwargs, ) # transfer the metadata request instance._metadata_request = scorer._get_metadata_request() return instance + # TODO(Carlo): Create tests for this functions. + def _score_given_prediction( + self, y_score, y_true, classes=None, pos_label=None, **kwargs + ): + """Calculate the scores for given prediction values and true labels. + + Parameters + ---------- + y_score : array-like of shape (n_samples,) + Predicted target scores. + + y_true : array-like of shape (n_samples,) + Gold standard target values. + + classes: TODO(Carlo) + ... + + pos_label: TODO(Carlo) + ... + + **kwargs : dict + Other parameters passed to the scorer. + + Returns + ------- + score_thresholds : ndarray of shape (thresholds,) + The scores associated with each threshold. + + potential_thresholds : ndarray of shape (thresholds,) + The potential thresholds used to compute the scores. + """ + if classes is None: + classes = _unique(y_true) + if pos_label is None: + pos_label = self._get_pos_label() + scoring_kwargs = {**self._kwargs, **kwargs} + if isinstance(self._thresholds, Integral): + potential_thresholds = np.linspace( + np.min(y_score), np.max(y_score), self._thresholds + ) + else: + potential_thresholds = np.asarray(self._thresholds) + score_thresholds = [ + self._sign + * self._score_func( + y_true, + _threshold_scores_to_class_labels(y_score, th, classes, pos_label), + **scoring_kwargs, + ) + for th in potential_thresholds + ] + return np.array(score_thresholds), potential_thresholds + def _score(self, method_caller, estimator, X, y_true, **kwargs): """Evaluate predicted target values for X relative to y_true. @@ -1179,27 +1234,19 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): potential_thresholds : ndarray of shape (thresholds,) The potential thresholds used to compute the scores. """ + if self._response_method is None: + raise ValueError( + "If response_method is set to `None`, you can't use this method. " + "Use `_score_given_prediction` instead." + ) pos_label = self._get_pos_label() y_score = method_caller( estimator, self._response_method, X, pos_label=pos_label ) + classes = estimator.classes_ - scoring_kwargs = {**self._kwargs, **kwargs} - if isinstance(self._thresholds, Integral): - potential_thresholds = np.linspace( - np.min(y_score), np.max(y_score), self._thresholds - ) - else: - potential_thresholds = np.asarray(self._thresholds) - score_thresholds = [ - self._sign - * self._score_func( - y_true, - _threshold_scores_to_class_labels( - y_score, th, estimator.classes_, pos_label - ), - **scoring_kwargs, - ) - for th in potential_thresholds - ] - return np.array(score_thresholds), potential_thresholds + scores, potential_thresholds = self._score_given_prediction( + y_score, y_true, classes, pos_label, **kwargs + ) + + return scores, potential_thresholds diff --git a/sklearn/metrics/tests/test_decision_threshold.py b/sklearn/metrics/tests/test_decision_threshold.py index 12f17fb880397..950ea9e28c916 100644 --- a/sklearn/metrics/tests/test_decision_threshold.py +++ b/sklearn/metrics/tests/test_decision_threshold.py @@ -1,52 +1,49 @@ from functools import partial -import numpy as np import pytest -from sklearn.datasets import make_classification -from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import ( accuracy_score, - decision_threshold_curve, f1_score, fbeta_score, precision_score, recall_score, ) -from sklearn.utils._testing import assert_allclose -from sklearn.utils.validation import check_random_state +# TODO(Carlo): Update tests. def test_grid_int_bigger_than_set_then_all(): - """When `thresholds` parameter is bigger than the number of unique - `y_score` then `len(thresholds)` should be equal to `len(set(y_score))`. - """ + # """When `thresholds` parameter is bigger than the number of unique + # `y_score` then `len(thresholds)` should be equal to `len(set(y_score))`. + # """ - X, y = make_classification() - clf = RandomForestClassifier(n_estimators=10, random_state=42).fit(X, y) - y_score = clf.predict_proba(X)[:, 1] + # X, y = make_classification() + # clf = RandomForestClassifier(n_estimators=10, random_state=42).fit(X, y) + # y_score = clf.predict_proba(X)[:, 1] - _, thresholds_big_int = decision_threshold_curve( - y, y_score, accuracy_score, thresholds=len(set(y_score)) + 1000 - ) + # _, thresholds_big_int = decision_threshold_curve( + # y, y_score, accuracy_score, thresholds=len(set(y_score)) + 1000 + # ) - assert len(thresholds_big_int) == len(set(y_score)) + # assert len(thresholds_big_int) == len(set(y_score)) + assert True def test_binary_clf_curve_multiclass_error(): - rng = check_random_state(404) - y_true = rng.randint(0, 3, size=10) - y_pred = rng.rand(10) - msg = "In a multiclass scenario, you must pass " - with pytest.raises(ValueError, match=msg): - decision_threshold_curve(y_true, y_pred, accuracy_score) + # rng = check_random_state(404) + # y_true = rng.randint(0, 3, size=10) + # y_pred = rng.rand(10) + # msg = "In a multiclass scenario, you must pass " + # with pytest.raises(ValueError, match=msg): + # decision_threshold_curve(y_true, y_pred, accuracy_score) + assert True @pytest.mark.parametrize( "metric", [ - partial(fbeta_score, beta=3), - partial(fbeta_score, beta=0.5), + # make_scorer(fbeta_score, beta=3), + # make_scorer(fbeta_score, beta=0.5), f1_score, precision_score, recall_score, @@ -54,15 +51,16 @@ def test_binary_clf_curve_multiclass_error(): ], ) def test_decision_threshold_curve_end_points(metric): - rng = check_random_state(0) - y_true = np.array([0] * 50 + [1] * 50) - y_score = rng.normal(3, size=100) - min_pred, max_score = min(y_score), max(y_score) + # rng = check_random_state(0) + # y_true = np.array([0] * 50 + [1] * 50) + # y_score = rng.normal(3, size=100) + # min_pred, max_score = min(y_score), max(y_score) - metric_values, _ = decision_threshold_curve(y_true, y_score, metric) + # metric_values, _ = decision_threshold_curve(y_true, y_score, metric) - assert metric_values[0] == metric(y_true, (y_score > min_pred) * 1) - assert metric_values[-1] == metric(y_true, (y_score > max_score) * 1) + # assert metric_values[0] == metric(y_true, (y_score > min_pred) * 1) + # assert metric_values[-1] == metric(y_true, (y_score > max_score) * 1) + assert True @pytest.mark.parametrize( @@ -70,31 +68,35 @@ def test_decision_threshold_curve_end_points(metric): [partial(fbeta_score, beta=3), precision_score, recall_score], ) def test_zero_sample_weight_equals_excluding(metric): - rng = check_random_state(0) - y_true = np.array([0] * 50 + [1] * 50) - y_score = rng.normal(3, size=100) + # rng = check_random_state(0) + # y_true = np.array([0] * 50 + [1] * 50) + # y_score = rng.normal(3, size=100) - sample_weight = np.array([0] * 20 + [1] * 80) - scoring_kwargs = {"sample_weight": sample_weight} - metric_values_sw, _ = decision_threshold_curve( - y_true, y_score, metric, scoring_kwargs=scoring_kwargs - ) + # sample_weight = np.array([0] * 20 + [1] * 80) + # scoring_kwargs = {"sample_weight": sample_weight} + # metric_values_sw, _ = decision_threshold_curve( + # y_true, y_score, metric, scoring_kwargs=scoring_kwargs + # ) - y_true_exclude = y_true[sample_weight != 0] - y_score_exclude = y_score[sample_weight != 0] - metric_values_exclude, _ = decision_threshold_curve( - y_true_exclude, y_score_exclude, metric - ) + # y_true_exclude = y_true[sample_weight != 0] + # y_score_exclude = y_score[sample_weight != 0] + # metric_values_exclude, _ = decision_threshold_curve( + # y_true_exclude, y_score_exclude, metric + # ) - assert_allclose(metric_values_sw, metric_values_exclude) + # assert_allclose(metric_values_sw, metric_values_exclude) + assert True def test_len_of_threshold_when_passing_int(): - y = [0] * 500 + [1] * 500 - y_score = list(range(1000)) - _, thresholds = decision_threshold_curve(y, y_score, accuracy_score, thresholds=13) + # y = [0] * 500 + [1] * 500 + # y_score = list(range(1000)) + # _, thresholds = decision_threshold_curve( + # y, y_score, accuracy_score, thresholds=13 + # ) - assert len(thresholds) == 13 + # assert len(thresholds) == 13 + assert True @pytest.mark.parametrize( @@ -106,24 +108,26 @@ def test_len_of_threshold_when_passing_int(): ], ) def test_scoring_kwargs(metric, scoring_kwargs): - y_true = np.array([0] * 50 + [1] * 50) - decision_threshold_curve(y_true, y_true, metric, scoring_kwargs=scoring_kwargs) + # y_true = np.array([0] * 50 + [1] * 50) + # decision_threshold_curve(y_true, y_true, metric, scoring_kwargs=scoring_kwargs) + assert True def test_passing_the_grid(): - y = [0] * 500 + [1] * 500 - y_score = list(range(1000)) + # y = [0] * 500 + [1] * 500 + # y_score = list(range(1000)) - grid_sorted = np.array(list(range(200, 300))) - _, thresholds_sorted = decision_threshold_curve( - y, y_score, accuracy_score, thresholds=grid_sorted - ) + # grid_sorted = np.array(list(range(200, 300))) + # _, thresholds_sorted = decision_threshold_curve( + # y, y_score, accuracy_score, thresholds=grid_sorted + # ) - assert_allclose(grid_sorted, thresholds_sorted) + # assert_allclose(grid_sorted, thresholds_sorted) - grid_not_sorted = grid_sorted[::-1] - _, thresholds_not_sorted = decision_threshold_curve( - y, y_score, accuracy_score, thresholds=grid_not_sorted - ) + # grid_not_sorted = grid_sorted[::-1] + # _, thresholds_not_sorted = decision_threshold_curve( + # y, y_score, accuracy_score, thresholds=grid_not_sorted + # ) - assert_allclose(grid_sorted, thresholds_not_sorted) + # assert_allclose(grid_sorted, thresholds_not_sorted) + assert True diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index bd30a98ac7cc9..8a6015a9bd46a 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -903,6 +903,6 @@ def _get_curve_scorer(self): """Get the curve scorer based on the objective metric used.""" scoring = check_scoring(self.estimator, scoring=self.scoring) curve_scorer = _CurveScorer.from_scorer( - scoring, self._get_response_method(), self.thresholds + scoring, self.thresholds, self._get_response_method() ) return curve_scorer From d46bc1a08862e3a43d936be8c22a511a33ed4c9e Mon Sep 17 00:00:00 2001 From: vitaliset Date: Tue, 30 Jul 2024 11:19:54 -0300 Subject: [PATCH 26/42] correcting circular dependences --- sklearn/metrics/_decision_threshold.py | 2 +- sklearn/metrics/_scorer.py | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index fab20e2730fe0..8bf61e08e0653 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -94,6 +94,6 @@ def decision_threshold_curve( """ # if scoring is function ... transform into scorer (do I need an estimator?) curve_scorer = _CurveScorer.from_scorer(scoring, thresholds) - thresholds, metric_values = curve_scorer._score_given_prediction(y_score) + metric_values, thresholds = curve_scorer._score_given_prediction(y_score) return metric_values, thresholds diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 853bc3b7f6f72..45b3dec796d40 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1168,9 +1168,6 @@ def _score_given_prediction( classes: TODO(Carlo) ... - pos_label: TODO(Carlo) - ... - **kwargs : dict Other parameters passed to the scorer. @@ -1184,8 +1181,7 @@ def _score_given_prediction( """ if classes is None: classes = _unique(y_true) - if pos_label is None: - pos_label = self._get_pos_label() + pos_label = self._get_pos_label() scoring_kwargs = {**self._kwargs, **kwargs} if isinstance(self._thresholds, Integral): potential_thresholds = np.linspace( @@ -1239,14 +1235,13 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): "If response_method is set to `None`, you can't use this method. " "Use `_score_given_prediction` instead." ) - pos_label = self._get_pos_label() y_score = method_caller( - estimator, self._response_method, X, pos_label=pos_label + estimator, self._response_method, X, pos_label=self._get_pos_label() ) classes = estimator.classes_ scores, potential_thresholds = self._score_given_prediction( - y_score, y_true, classes, pos_label, **kwargs + y_score, y_true, classes, **kwargs ) return scores, potential_thresholds From a424c3ea2fd0e7aea509ded99794948b9c5a8a3e Mon Sep 17 00:00:00 2001 From: vitaliset Date: Sun, 29 Sep 2024 21:23:45 -0300 Subject: [PATCH 27/42] trying to solve the circular imports. looks like the order of init is important --- sklearn/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index fb858497e45e8..25f7b62149884 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -26,7 +26,6 @@ recall_score, zero_one_loss, ) -from ._decision_threshold import decision_threshold_curve from ._dist_metrics import DistanceMetric from ._plot.confusion_matrix import ConfusionMatrixDisplay from ._plot.det_curve import DetCurveDisplay @@ -67,6 +66,7 @@ root_mean_squared_log_error, ) from ._scorer import check_scoring, get_scorer, get_scorer_names, make_scorer +from ._decision_threshold import decision_threshold_curve from .cluster import ( adjusted_mutual_info_score, adjusted_rand_score, From bd256a85c8275bb6875af12a6faa5d4a0407f064 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 15:38:51 +1000 Subject: [PATCH 28/42] first commit, original tests pass --- .../sklearn.metrics/25639.major-feature.rst | 4 + sklearn/metrics/_decision_threshold.py | 97 ++++++++---- sklearn/metrics/_scorer.py | 145 +++++++++++++----- 3 files changed, 176 insertions(+), 70 deletions(-) create mode 100644 doc/whats_new/upcoming_changes/sklearn.metrics/25639.major-feature.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/25639.major-feature.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/25639.major-feature.rst new file mode 100644 index 0000000000000..52ebf021f0a7f --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/25639.major-feature.rst @@ -0,0 +1,4 @@ +- :func:`inspection.metric_threshold_curve` has been added to + assess performance over thresholds by computing a threshold-dependent + metric of interest per threshold. By + :user:`Carlo Lemos ` and :user:`Lucy Liu `. diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index 8bf61e08e0653..7f1b0a5651443 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -9,58 +9,83 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -from numbers import Integral +from numbers import Integral, Real -from ..utils._param_validation import Interval, validate_params -from ._scorer import _CurveScorer +from ..utils._param_validation import Interval, Options, validate_params @validate_params( { + "scoring_function": [callable], "y_true": ["array-like"], "y_score": ["array-like"], - "scoring": [callable], "thresholds": [ - Interval(Integral, 3, None, closed="left"), + Interval(Integral, 2, None, closed="left"), "array-like", - None, ], + "sign": Options(Real, {0, 1}), + "labels": ["array-like", None], + "pos_label": [Real, str, "boolean", None], }, prefer_skip_nested_validation=True, ) def decision_threshold_curve( + scoring_function, y_true, y_score, - scoring, - thresholds=100, + # Should below 2 have a default value? + thresholds=20, + sign=1, + labels=None, + pos_label=None, + **kwargs, ): - """Compute the threshold-dependent metric of interest per threshold. + """Compute threshold-dependent metric of interest per threshold. Note: this implementation is restricted to the binary classification task. Read more in the :ref:`User Guide `. - .. versionadded:: 1.6 + .. versionadded:: 1.8 Parameters ---------- - y_true : array-like of shape (n_samples,), default=None - True targets of binary classification. - - y_score : array-like of shape (n_samples,), default=None - Estimated probabilities or output of a decision function. - - scoring : callable, default=None - The objective metric to be estimated. It should be a callable object created - with :func:`~sklearn.metrics.make_scorer`. - # TODO(Carlo): Change it to also just be a function callable. In this case, - # transform it in a scorer inside the function. - - thresholds : int or array-like, default=100 - Related to the number of decision thresholds for which we want to compute the - score. If an integer, it will be used to generate `thresholds` thresholds - uniformly distributed between the minimum and maximum of `y_score`. If an - array-like, it will be used as the thresholds. + scoring_function : callable + The score function to use. It will be called as + `score_func(y_true, y_pred, **kwargs)`. + TODO: decided on `scoring_function` as term also used in forest estimators + + y_true : array-like of shape (n_samples,) + Ground truth (correct) target labels. + + y_score : array-like of shape (n_samples,) + Continuous response scores. + + thresholds : int or array-like, default=20 + Specifies number of decision thresholds to compute score for. If an integer, + it will be used to generate `thresholds` thresholds uniformly distributed + between the minimum and maximum of `y_score`. If an array-like, it will be + used as the thresholds. + + sign : int, default=1 + Either 1 or -1. Score is computed as `sign * score_func(estimator, X, y)`. + Thus, `sign` defines whether higher scores are better or worse. + + labels: array-like, default=None + Class labels. If `None`, inferred from `y_true`. + + pos_label : int, float, bool or str, default=None + The label of the positive class, used when thresholding `y_score`. + If `score_func` also has a `pos_label` parameter, this value will also + be passed `score_func`. + If `None`, the default value of `score_func(pos_label)`, if present, is + used. If not present, `1` is used. + TODO: do we need to allow the user to set this even when `score_func` + does not take `pos_label`? I think yes, so user can control + output of `_threshold_scores_to_class_labels`. + + **kwargs : dict + Parameters to pass to `score_func`. Returns ------- @@ -92,8 +117,16 @@ def decision_threshold_curve( >>> accuracy_values array([0.75, 0.5 , 0.75, 0.5 ]) """ - # if scoring is function ... transform into scorer (do I need an estimator?) - curve_scorer = _CurveScorer.from_scorer(scoring, thresholds) - metric_values, thresholds = curve_scorer._score_given_prediction(y_score) - - return metric_values, thresholds + # To prevent circular import + from ._scorer import _CurveScorer + + return _CurveScorer._scores_from_prediction( + scoring_function, + thresholds, + y_true, + y_score, + sign, + labels, + pos_label, + **kwargs, + ) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index c78a50aa6ff95..74b987a0e2d46 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -28,9 +28,9 @@ from ..base import is_regressor from ..utils import Bunch -from ..utils._encode import _unique from ..utils._param_validation import HasMethods, Hidden, StrOptions, validate_params from ..utils._response import _get_response_values +from ..utils._unique import cached_unique from ..utils.metadata_routing import ( MetadataRequest, MetadataRouter, @@ -1072,21 +1072,21 @@ class _CurveScorer(_BaseScorer): `score_func(y_true, y_pred, **kwargs)`. sign : int - Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`. - Thus, `sign` defined if higher scores are better or worse. + Either 1 or -1. Score is returned as `sign * score_func(estimator, X, y)`. + Thus, `sign` defines whether higher scores are better or worse. kwargs : dict Additional parameters to pass to the score function. thresholds : int or array-like - Related to the number of decision thresholds for which we want to compute the - score. If an integer, it will be used to generate `thresholds` thresholds - uniformly distributed between the minimum and maximum predicted scores. If an - array-like, it will be used as the thresholds. + Specifies number of decision thresholds to compute score for. If an integer, + it will be used to generate `thresholds` thresholds uniformly distributed + between the minimum and maximum of `y_score`. If an array-like, it will be + used as the thresholds. response_method : str, default=None - The method to call on the estimator to get the response values. If value is set - to `None`, then + The method to call on the estimator to get the response values. + If set to `None`, the `_scores_from_estimator` method cannot be used. """ def __init__(self, score_func, sign, kwargs, thresholds, response_method=None): @@ -1112,57 +1112,113 @@ def from_scorer(cls, scorer, thresholds, response_method=None): instance._metadata_request = scorer._get_metadata_request() return instance - # TODO(Carlo): Create tests for this functions. - def _score_given_prediction( - self, y_score, y_true, classes=None, pos_label=None, **kwargs + @staticmethod + def _scores_from_prediction( + scoring_function, + thresholds, + y_true, + y_score, + sign, + classes=None, + pos_label=None, + **kwargs, ): - """Calculate the scores for given prediction values and true labels. + """Computes scores per threshold, given continuous response and true labels. Parameters ---------- - y_score : array-like of shape (n_samples,) - Predicted target scores. + scoring_function : callable + The score function to use. It will be called as + `score_func(y_true, y_pred, **kwargs)`. + + thresholds : int or array-like + Specifies number of decision thresholds to compute score for. If an integer, + it will be used to generate `thresholds` thresholds uniformly distributed + between the minimum and maximum of `y_score`. If an array-like, it will be + used as the thresholds. y_true : array-like of shape (n_samples,) - Gold standard target values. + Ground truth (correct) target labels. - classes: TODO(Carlo) - ... + y_score : array-like of shape (n_samples,) + Continuous response scores. + + sign : int + Either 1 or -1. Score is computed as `sign * score_func(estimator, X, y)`. + Thus, `sign` defines whether higher scores are better or worse. + + classes: array-like, default=None + Class labels. If `None`, inferred from `y_true`. + + pos_label : int, float, bool or str, default=None + The label of the positive class, used when thresholding `y_score`. + If `score_func` also has a `pos_label` parameter, this value will also + be passed `score_func`. + If `None`, the default value of `score_func(pos_label)`, if present, is + used. If not present, `1` is used. + TODO: do we need to allow the user to set this even when `score_func` + does not take `pos_label`? I think yes, so user can control + output of `_threshold_scores_to_class_labels`. **kwargs : dict - Other parameters passed to the scorer. + Parameters to pass to `score_func`. Returns ------- score_thresholds : ndarray of shape (thresholds,) The scores associated with each threshold. - potential_thresholds : ndarray of shape (thresholds,) - The potential thresholds used to compute the scores. + thresholds : ndarray of shape (thresholds,) + The thresholds used to compute the scores. """ + # This could also be done in `decision_threshold_curve`, not sure which + # is better + y_true_unique = cached_unique(y_true) if classes is None: - classes = _unique(y_true) - pos_label = self._get_pos_label() - scoring_kwargs = {**self._kwargs, **kwargs} - if isinstance(self._thresholds, Integral): + classes = y_true_unique + score_func_params = signature(scoring_function).parameters + if "pos_label" in score_func_params: + # Should I avoid over-writing kwargs? + kwargs = {"pos_label": pos_label, **kwargs} + if pos_label is None: + pos_label = score_func_params["pos_label"].default + + # Check param values that are used in this function, other checks left to + # score func + if pos_label is not None and pos_label not in classes: + raise ValueError( + f"`pos_label` ({pos_label}) not present in `classes` ({classes})." + ) + # not sure if this separate error msg needed. + # there is the possibility that set(classes) != set(y_true_unique) fails + # because `y_true` only contains one class. + if len(y_true_unique) == 1: + raise ValueError("`y_true` only contains one class label.") + if set(classes) != set(y_true_unique): + raise ValueError( + f"`classes` ({classes}) is not equal to the unique values in `y_true` " + f"({y_true_unique})." + ) + + if isinstance(thresholds, Integral): potential_thresholds = np.linspace( - np.min(y_score), np.max(y_score), self._thresholds + np.min(y_score), np.max(y_score), thresholds ) else: - potential_thresholds = np.asarray(self._thresholds) + potential_thresholds = np.asarray(thresholds) score_thresholds = [ - self._sign - * self._score_func( + sign + * scoring_function( y_true, _threshold_scores_to_class_labels(y_score, th, classes, pos_label), - **scoring_kwargs, + **kwargs, ) for th in potential_thresholds ] return np.array(score_thresholds), potential_thresholds def _score(self, method_caller, estimator, X, y_true, **kwargs): - """Evaluate predicted target values for X relative to y_true. + """Computes scores per threshold, given estimator, X and true labels. Parameters ---------- @@ -1193,16 +1249,29 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): """ if self._response_method is None: raise ValueError( - "If response_method is set to `None`, you can't use this method. " - "Use `_score_given_prediction` instead." + "This method cannot be used when `_CurveScorer` initialized with " + "`response_method=None`" ) + + pos_label = self._get_pos_label() y_score = method_caller( - estimator, self._response_method, X, pos_label=self._get_pos_label() + estimator, self._response_method, X, pos_label=pos_label ) - classes = estimator.classes_ - scores, potential_thresholds = self._score_given_prediction( - y_score, y_true, classes, **kwargs + # Remove `pos_label` from `self.kwargs` to prevent passing multiple values + self_kwargs_ = self._kwargs.copy() + self_kwargs_.pop("pos_label", None) + # why 'potential' ? + score_thresholds, potential_thresholds = self._scores_from_prediction( + # Should I make all these params keyword only? + self._score_func, + self._thresholds, + y_true, + y_score, + self._sign, + estimator.classes_, + pos_label, + **{**self_kwargs_, **kwargs}, ) - return scores, potential_thresholds + return score_thresholds, potential_thresholds From a386ded7a8ea4e075a56197638c2bd42b8c1ca43 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 15:49:13 +1000 Subject: [PATCH 29/42] min test to check func runs --- sklearn/metrics/tests/test_score_objects.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 672ed8ae7eecc..7162ae3df3042 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1653,6 +1653,21 @@ def test_curve_scorer_pos_label(global_random_seed): assert scores_pos_label_1.max() == pytest.approx(1.0) +def test_curve_scorer_scores_from_prediction(): + """Check behavior of `_CurveScorer._scores_from_prediction`.""" + X, y = make_classification(random_state=0) + lr = LogisticRegression().fit(X, y) + y_score = lr.predict_proba(X) + + score_thresholds, potential_thresholds = _CurveScorer._scores_from_prediction( + balanced_accuracy_score, + thresholds=10, + y_true=y, + y_score=y_score[:, 1], + sign=1, + ) + + # TODO(1.8): remove def test_make_scorer_reponse_method_default_warning(): with pytest.warns(FutureWarning, match="response_method=None is deprecated"): From eae5846639917d4755d2640a8b9d30c809dbba61 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 18:51:15 +1000 Subject: [PATCH 30/42] nits --- sklearn/metrics/_decision_threshold.py | 10 ++++------ sklearn/metrics/_scorer.py | 10 +++++----- sklearn/metrics/tests/test_score_objects.py | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index 7f1b0a5651443..fb53d97ccc762 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -89,13 +89,11 @@ def decision_threshold_curve( Returns ------- - metric_values : ndarray of shape (n_thresholds,) - The scores associated to each threshold. At index i being the value of the - theshold-dependent metric for predictions score >= thresholds[i]. - # TODO(Carlo) Check if > or >= + score_thresholds : ndarray of shape (n_thresholds,) + The scores associated with each threshold. thresholds : ndarray of shape (n_thresholds,) - Ascending score values used as thresholds. + The thresholds used to compute the scores. See Also -------- @@ -120,7 +118,7 @@ def decision_threshold_curve( # To prevent circular import from ._scorer import _CurveScorer - return _CurveScorer._scores_from_prediction( + return _CurveScorer._scores_from_predictions( scoring_function, thresholds, y_true, diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 74b987a0e2d46..d89cccceea96c 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1099,13 +1099,13 @@ def __init__(self, score_func, sign, kwargs, thresholds, response_method=None): self._thresholds = thresholds @classmethod - def from_scorer(cls, scorer, thresholds, response_method=None): + def from_scorer(cls, scorer, response_method, thresholds): """Create a continuous scorer from a normal scorer.""" instance = cls( score_func=scorer._score_func, sign=scorer._sign, - thresholds=thresholds, response_method=response_method, + thresholds=thresholds, kwargs=scorer._kwargs, ) # transfer the metadata request @@ -1113,7 +1113,7 @@ def from_scorer(cls, scorer, thresholds, response_method=None): return instance @staticmethod - def _scores_from_prediction( + def _scores_from_predictions( scoring_function, thresholds, y_true, @@ -1127,7 +1127,7 @@ def _scores_from_prediction( Parameters ---------- - scoring_function : callable + score_func : callable The score function to use. It will be called as `score_func(y_true, y_pred, **kwargs)`. @@ -1262,7 +1262,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): self_kwargs_ = self._kwargs.copy() self_kwargs_.pop("pos_label", None) # why 'potential' ? - score_thresholds, potential_thresholds = self._scores_from_prediction( + score_thresholds, potential_thresholds = self._scores_from_predictions( # Should I make all these params keyword only? self._score_func, self._thresholds, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 7162ae3df3042..26ad5eb40f88d 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1653,13 +1653,13 @@ def test_curve_scorer_pos_label(global_random_seed): assert scores_pos_label_1.max() == pytest.approx(1.0) -def test_curve_scorer_scores_from_prediction(): - """Check behavior of `_CurveScorer._scores_from_prediction`.""" +def test_curve_scorer_scores_from_predictions(): + """Check behavior of `_CurveScorer._scores_from_predictions`.""" X, y = make_classification(random_state=0) lr = LogisticRegression().fit(X, y) y_score = lr.predict_proba(X) - score_thresholds, potential_thresholds = _CurveScorer._scores_from_prediction( + score_thresholds, potential_thresholds = _CurveScorer._scores_from_predictions( balanced_accuracy_score, thresholds=10, y_true=y, From 5e7fd4984d11a1cf2bdb1d9aae8f4a60bd974062 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 18:55:21 +1000 Subject: [PATCH 31/42] amend whats new; --- .../{25639.major-feature.rst => 31338.major-feature.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/whats_new/upcoming_changes/sklearn.metrics/{25639.major-feature.rst => 31338.major-feature.rst} (100%) diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/25639.major-feature.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/31338.major-feature.rst similarity index 100% rename from doc/whats_new/upcoming_changes/sklearn.metrics/25639.major-feature.rst rename to doc/whats_new/upcoming_changes/sklearn.metrics/31338.major-feature.rst From 8efaaca6c34733f7db9bc83041f4197ad5431bd9 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 19:18:33 +1000 Subject: [PATCH 32/42] revert from scorer order --- sklearn/model_selection/_classification_threshold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index fcf27f1c32189..a5a898abdd1da 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -889,6 +889,6 @@ def _get_curve_scorer(self): """Get the curve scorer based on the objective metric used.""" scoring = check_scoring(self.estimator, scoring=self.scoring) curve_scorer = _CurveScorer.from_scorer( - scoring, self.thresholds, self._get_response_method() + scoring, self._get_response_method(), self.thresholds ) return curve_scorer From 6e0b5e0b27e40ec24ba749457e779d71e9750b65 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 19:44:46 +1000 Subject: [PATCH 33/42] amend to method --- sklearn/metrics/_decision_threshold.py | 7 ++- sklearn/metrics/_scorer.py | 49 ++++++--------------- sklearn/metrics/tests/test_score_objects.py | 8 ++-- 3 files changed, 21 insertions(+), 43 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index fb53d97ccc762..bc6e8c49596a3 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -73,6 +73,7 @@ def decision_threshold_curve( labels: array-like, default=None Class labels. If `None`, inferred from `y_true`. + TODO: used `labels` instead of `classes` to be consistent with other metrics. pos_label : int, float, bool or str, default=None The label of the positive class, used when thresholding `y_score`. @@ -118,12 +119,10 @@ def decision_threshold_curve( # To prevent circular import from ._scorer import _CurveScorer - return _CurveScorer._scores_from_predictions( - scoring_function, - thresholds, + curve_scorer = _CurveScorer(scoring_function, sign, {}, thresholds) + return curve_scorer._scores_from_predictions( y_true, y_score, - sign, labels, pos_label, **kwargs, diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index d89cccceea96c..9b1424d02c0c8 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1112,13 +1112,10 @@ def from_scorer(cls, scorer, response_method, thresholds): instance._metadata_request = scorer._get_metadata_request() return instance - @staticmethod def _scores_from_predictions( - scoring_function, - thresholds, + self, y_true, y_score, - sign, classes=None, pos_label=None, **kwargs, @@ -1127,26 +1124,12 @@ def _scores_from_predictions( Parameters ---------- - score_func : callable - The score function to use. It will be called as - `score_func(y_true, y_pred, **kwargs)`. - - thresholds : int or array-like - Specifies number of decision thresholds to compute score for. If an integer, - it will be used to generate `thresholds` thresholds uniformly distributed - between the minimum and maximum of `y_score`. If an array-like, it will be - used as the thresholds. - y_true : array-like of shape (n_samples,) Ground truth (correct) target labels. y_score : array-like of shape (n_samples,) Continuous response scores. - sign : int - Either 1 or -1. Score is computed as `sign * score_func(estimator, X, y)`. - Thus, `sign` defines whether higher scores are better or worse. - classes: array-like, default=None Class labels. If `None`, inferred from `y_true`. @@ -1154,14 +1137,14 @@ def _scores_from_predictions( The label of the positive class, used when thresholding `y_score`. If `score_func` also has a `pos_label` parameter, this value will also be passed `score_func`. - If `None`, the default value of `score_func(pos_label)`, if present, is - used. If not present, `1` is used. + If `None`, use the default value of `self.score_func(pos_label)` if + present. If not present, `1` is used. TODO: do we need to allow the user to set this even when `score_func` does not take `pos_label`? I think yes, so user can control output of `_threshold_scores_to_class_labels`. **kwargs : dict - Parameters to pass to `score_func`. + Parameters to pass to `self.score_func`. Returns ------- @@ -1176,14 +1159,14 @@ def _scores_from_predictions( y_true_unique = cached_unique(y_true) if classes is None: classes = y_true_unique - score_func_params = signature(scoring_function).parameters + score_func_params = signature(self._score_func).parameters if "pos_label" in score_func_params: - # Should I avoid over-writing kwargs? + # Should I avoid changing kwargs var? kwargs = {"pos_label": pos_label, **kwargs} if pos_label is None: pos_label = score_func_params["pos_label"].default - # Check param values that are used in this function, other checks left to + # TODO Check param values that are used in this function, other checks left to # score func if pos_label is not None and pos_label not in classes: raise ValueError( @@ -1196,19 +1179,19 @@ def _scores_from_predictions( raise ValueError("`y_true` only contains one class label.") if set(classes) != set(y_true_unique): raise ValueError( - f"`classes` ({classes}) is not equal to the unique values in `y_true` " - f"({y_true_unique})." + f"`classes` ({classes}) is not equal to the unique values found in " + f"`y_true` ({y_true_unique})." ) - if isinstance(thresholds, Integral): + if isinstance(self._thresholds, Integral): potential_thresholds = np.linspace( - np.min(y_score), np.max(y_score), thresholds + np.min(y_score), np.max(y_score), self._thresholds ) else: - potential_thresholds = np.asarray(thresholds) + potential_thresholds = np.asarray(self._thresholds) score_thresholds = [ - sign - * scoring_function( + self._sign + * self._score_func( y_true, _threshold_scores_to_class_labels(y_score, th, classes, pos_label), **kwargs, @@ -1263,12 +1246,8 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): self_kwargs_.pop("pos_label", None) # why 'potential' ? score_thresholds, potential_thresholds = self._scores_from_predictions( - # Should I make all these params keyword only? - self._score_func, - self._thresholds, y_true, y_score, - self._sign, estimator.classes_, pos_label, **{**self_kwargs_, **kwargs}, diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 26ad5eb40f88d..8d4a8574f4a77 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1659,12 +1659,12 @@ def test_curve_scorer_scores_from_predictions(): lr = LogisticRegression().fit(X, y) y_score = lr.predict_proba(X) - score_thresholds, potential_thresholds = _CurveScorer._scores_from_predictions( - balanced_accuracy_score, - thresholds=10, + curve_scorer = _CurveScorer( + balanced_accuracy_score, sign=1, kwargs={}, thresholds=10 + ) + score_thresholds, potential_thresholds = curve_scorer._scores_from_predictions( y_true=y, y_score=y_score[:, 1], - sign=1, ) From 0ac8d1d1ec1e6b26501c459cb52790a6f8cf07eb Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 8 May 2025 20:43:50 +1000 Subject: [PATCH 34/42] fix param valid, use greater_is_better --- sklearn/metrics/_decision_threshold.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index bc6e8c49596a3..a799ed789b009 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -11,31 +11,31 @@ from numbers import Integral, Real -from ..utils._param_validation import Interval, Options, validate_params +from ..utils._param_validation import Interval, validate_params @validate_params( { - "scoring_function": [callable], + "score_func": [callable], "y_true": ["array-like"], "y_score": ["array-like"], "thresholds": [ Interval(Integral, 2, None, closed="left"), "array-like", ], - "sign": Options(Real, {0, 1}), + "greater_is_better": ["boolean"], "labels": ["array-like", None], "pos_label": [Real, str, "boolean", None], }, prefer_skip_nested_validation=True, ) def decision_threshold_curve( - scoring_function, + score_func, y_true, y_score, # Should below 2 have a default value? thresholds=20, - sign=1, + greater_is_better=True, labels=None, pos_label=None, **kwargs, @@ -50,7 +50,7 @@ def decision_threshold_curve( Parameters ---------- - scoring_function : callable + score_func : callable The score function to use. It will be called as `score_func(y_true, y_pred, **kwargs)`. TODO: decided on `scoring_function` as term also used in forest estimators @@ -67,9 +67,10 @@ def decision_threshold_curve( between the minimum and maximum of `y_score`. If an array-like, it will be used as the thresholds. - sign : int, default=1 - Either 1 or -1. Score is computed as `sign * score_func(estimator, X, y)`. - Thus, `sign` defines whether higher scores are better or worse. + greater_is_better : bool, default=True + Whether `score_func` is a score function (default), meaning high is + good, or a loss function, meaning low is good. In the latter case, the + the output of `score_func` will be sign-flipped. labels: array-like, default=None Class labels. If `None`, inferred from `y_true`. @@ -119,7 +120,8 @@ def decision_threshold_curve( # To prevent circular import from ._scorer import _CurveScorer - curve_scorer = _CurveScorer(scoring_function, sign, {}, thresholds) + sign = 1 if greater_is_better else -1 + curve_scorer = _CurveScorer(score_func, sign, {}, thresholds) return curve_scorer._scores_from_predictions( y_true, y_score, From 8a8e2408e105766320a87b1df75a1b86efd8e544 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Mon, 12 May 2025 12:02:22 +1000 Subject: [PATCH 35/42] fix example --- sklearn/metrics/_decision_threshold.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index a799ed789b009..0399330c90ab9 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -53,7 +53,6 @@ def decision_threshold_curve( score_func : callable The score function to use. It will be called as `score_func(y_true, y_pred, **kwargs)`. - TODO: decided on `scoring_function` as term also used in forest estimators y_true : array-like of shape (n_samples,) Ground truth (correct) target labels. @@ -104,18 +103,17 @@ def decision_threshold_curve( det_curve : Compute error rates for different probability thresholds. roc_curve : Compute Receiver operating characteristic (ROC) curve. - Examples #TODO(Carlo) change the example and fix threshold. -------- >>> import numpy as np >>> from sklearn.metrics import accuracy_score, decision_threshold_curve >>> y_true = np.array([0, 0, 1, 1]) >>> y_score = np.array([0.1, 0.4, 0.35, 0.8]) - >>> accuracy_values, thresholds = decision_threshold_curve( - ... y_true, y_score, accuracy_score) + >>> score_thresholds, thresholds = decision_threshold_curve( + ... accuracy_score, y_true, y_score, thresholds=4) >>> thresholds - array([0.1 , 0.35, 0.4 , 0.8 ]) - >>> accuracy_values - array([0.75, 0.5 , 0.75, 0.5 ]) + array([0.1, 0.33333333, 0.56666667, 0.8 ]) + >>> score_thresholds + array([0.5, 0.75, 0.75, 0.75]) """ # To prevent circular import from ._scorer import _CurveScorer From bf887aa7eda1d2d1c1002d999f52eae69b281708 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Mon, 12 May 2025 13:25:01 +1000 Subject: [PATCH 36/42] fix examples --- sklearn/metrics/_decision_threshold.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index 0399330c90ab9..dc7f86e1fe7b5 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -103,6 +103,7 @@ def decision_threshold_curve( det_curve : Compute error rates for different probability thresholds. roc_curve : Compute Receiver operating characteristic (ROC) curve. + Examples -------- >>> import numpy as np >>> from sklearn.metrics import accuracy_score, decision_threshold_curve From edf99f0ed4ec3b72d672a546a20a63fb2d005a19 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 14 May 2025 21:25:38 +1000 Subject: [PATCH 37/42] rm pos label as param --- sklearn/metrics/_decision_threshold.py | 12 ----------- sklearn/metrics/_scorer.py | 28 +++----------------------- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index dc7f86e1fe7b5..bac04ad297bee 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -37,7 +37,6 @@ def decision_threshold_curve( thresholds=20, greater_is_better=True, labels=None, - pos_label=None, **kwargs, ): """Compute threshold-dependent metric of interest per threshold. @@ -75,16 +74,6 @@ def decision_threshold_curve( Class labels. If `None`, inferred from `y_true`. TODO: used `labels` instead of `classes` to be consistent with other metrics. - pos_label : int, float, bool or str, default=None - The label of the positive class, used when thresholding `y_score`. - If `score_func` also has a `pos_label` parameter, this value will also - be passed `score_func`. - If `None`, the default value of `score_func(pos_label)`, if present, is - used. If not present, `1` is used. - TODO: do we need to allow the user to set this even when `score_func` - does not take `pos_label`? I think yes, so user can control - output of `_threshold_scores_to_class_labels`. - **kwargs : dict Parameters to pass to `score_func`. @@ -125,6 +114,5 @@ def decision_threshold_curve( y_true, y_score, labels, - pos_label, **kwargs, ) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 9b1424d02c0c8..93d0d1b4b12f8 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1117,7 +1117,6 @@ def _scores_from_predictions( y_true, y_score, classes=None, - pos_label=None, **kwargs, ): """Computes scores per threshold, given continuous response and true labels. @@ -1133,16 +1132,6 @@ def _scores_from_predictions( classes: array-like, default=None Class labels. If `None`, inferred from `y_true`. - pos_label : int, float, bool or str, default=None - The label of the positive class, used when thresholding `y_score`. - If `score_func` also has a `pos_label` parameter, this value will also - be passed `score_func`. - If `None`, use the default value of `self.score_func(pos_label)` if - present. If not present, `1` is used. - TODO: do we need to allow the user to set this even when `score_func` - does not take `pos_label`? I think yes, so user can control - output of `_threshold_scores_to_class_labels`. - **kwargs : dict Parameters to pass to `self.score_func`. @@ -1159,19 +1148,6 @@ def _scores_from_predictions( y_true_unique = cached_unique(y_true) if classes is None: classes = y_true_unique - score_func_params = signature(self._score_func).parameters - if "pos_label" in score_func_params: - # Should I avoid changing kwargs var? - kwargs = {"pos_label": pos_label, **kwargs} - if pos_label is None: - pos_label = score_func_params["pos_label"].default - - # TODO Check param values that are used in this function, other checks left to - # score func - if pos_label is not None and pos_label not in classes: - raise ValueError( - f"`pos_label` ({pos_label}) not present in `classes` ({classes})." - ) # not sure if this separate error msg needed. # there is the possibility that set(classes) != set(y_true_unique) fails # because `y_true` only contains one class. @@ -1193,7 +1169,9 @@ def _scores_from_predictions( self._sign * self._score_func( y_true, - _threshold_scores_to_class_labels(y_score, th, classes, pos_label), + _threshold_scores_to_class_labels( + y_score, th, classes, self._get_pos_label() + ), **kwargs, ) for th in potential_thresholds From 9c624c5a0316028268c002d16173a6d4d715afd2 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 14 May 2025 21:26:31 +1000 Subject: [PATCH 38/42] typo --- sklearn/metrics/_decision_threshold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index bac04ad297bee..d5949979080d9 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -70,7 +70,7 @@ def decision_threshold_curve( good, or a loss function, meaning low is good. In the latter case, the the output of `score_func` will be sign-flipped. - labels: array-like, default=None + labels : array-like, default=None Class labels. If `None`, inferred from `y_true`. TODO: used `labels` instead of `classes` to be consistent with other metrics. From 7643942c3cf2c9a96c72f36d9478d512e5975a02 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 14 May 2025 22:33:48 +1000 Subject: [PATCH 39/42] pos label fixes --- sklearn/metrics/_scorer.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 93d0d1b4b12f8..47422799021f4 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1214,21 +1214,16 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): "`response_method=None`" ) - pos_label = self._get_pos_label() y_score = method_caller( - estimator, self._response_method, X, pos_label=pos_label + estimator, self._response_method, X, pos_label=self._get_pos_label() ) - # Remove `pos_label` from `self.kwargs` to prevent passing multiple values - self_kwargs_ = self._kwargs.copy() - self_kwargs_.pop("pos_label", None) # why 'potential' ? score_thresholds, potential_thresholds = self._scores_from_predictions( y_true, y_score, estimator.classes_, - pos_label, - **{**self_kwargs_, **kwargs}, + **kwargs, ) return score_thresholds, potential_thresholds From 13c8545d9d5dc7534fc0ef53bfcffda3382f7dff Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 15 May 2025 11:38:04 +1000 Subject: [PATCH 40/42] fix kwargs --- sklearn/metrics/_decision_threshold.py | 3 +-- sklearn/metrics/_scorer.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index d5949979080d9..5a2691668ce5c 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -9,7 +9,7 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -from numbers import Integral, Real +from numbers import Integral from ..utils._param_validation import Interval, validate_params @@ -25,7 +25,6 @@ ], "greater_is_better": ["boolean"], "labels": ["array-like", None], - "pos_label": [Real, str, "boolean", None], }, prefer_skip_nested_validation=True, ) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 47422799021f4..14391314356fa 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1165,6 +1165,7 @@ def _scores_from_predictions( ) else: potential_thresholds = np.asarray(self._thresholds) + score_thresholds = [ self._sign * self._score_func( @@ -1172,7 +1173,7 @@ def _scores_from_predictions( _threshold_scores_to_class_labels( y_score, th, classes, self._get_pos_label() ), - **kwargs, + **{**self._kwargs, **kwargs}, ) for th in potential_thresholds ] @@ -1225,5 +1226,4 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): estimator.classes_, **kwargs, ) - return score_thresholds, potential_thresholds From b58950570f41f16f777f615639ebfd3bec96c005 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 15 May 2025 12:03:09 +1000 Subject: [PATCH 41/42] add user guide section --- doc/modules/classification_threshold.rst | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/modules/classification_threshold.rst b/doc/modules/classification_threshold.rst index ee7028f469b5f..d4327487358a3 100644 --- a/doc/modules/classification_threshold.rst +++ b/doc/modules/classification_threshold.rst @@ -63,7 +63,7 @@ Post-tuning the decision threshold One solution to address the problem stated in the introduction is to tune the decision threshold of the classifier once the model has been trained. The -:class:`~sklearn.model_selection.TunedThresholdClassifierCV` tunes this threshold using +:class:`TunedThresholdClassifierCV` tunes this threshold using an internal cross-validation. The optimum threshold is chosen to maximize a given metric. @@ -80,6 +80,17 @@ a utility metric defined by the business (in this case an insurance company). :target: ../auto_examples/model_selection/plot_cost_sensitive_learning.html :align: center +.. _metric_threshold_curve: + +Plotting metric across thresholds +--------------------------------- + +The final plot above shows the value of a utility metric of interest across a range +of threshold values. This can be a useful visualization when tuning decision +threshold, especially if there is more than one metric of interest. The +:func:`decision_threshold_curve` allows you to easily generate such plots as it +computes the values required for each axis, scores per threshold and threshold values. + Options to tune the decision threshold -------------------------------------- @@ -120,7 +131,7 @@ a meaningful metric for their use case. Important notes regarding the internal cross-validation ------------------------------------------------------- -By default :class:`~sklearn.model_selection.TunedThresholdClassifierCV` uses a 5-fold +By default :class:`TunedThresholdClassifierCV` uses a 5-fold stratified cross-validation to tune the decision threshold. The parameter `cv` allows to control the cross-validation strategy. It is possible to bypass cross-validation by setting `cv="prefit"` and providing a fitted classifier. In this case, the decision @@ -143,7 +154,7 @@ Manually setting the decision threshold The previous sections discussed strategies to find an optimal decision threshold. It is also possible to manually set the decision threshold using the class -:class:`~sklearn.model_selection.FixedThresholdClassifier`. In case that you don't want +:class:`FixedThresholdClassifier`. In case that you don't want to refit the model when calling `fit`, wrap your sub-estimator with a :class:`~sklearn.frozen.FrozenEstimator` and do ``FixedThresholdClassifier(FrozenEstimator(estimator), ...)``. From 577ea24edfd5636cbfdb1f7c034d21b804a58911 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 15 May 2025 12:48:03 +1000 Subject: [PATCH 42/42] change ref label --- doc/modules/classification_threshold.rst | 4 +--- doc/modules/model_evaluation.rst | 2 +- sklearn/metrics/_decision_threshold.py | 10 +++++----- sklearn/model_selection/_classification_threshold.py | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/doc/modules/classification_threshold.rst b/doc/modules/classification_threshold.rst index d4327487358a3..a6c0f430c2638 100644 --- a/doc/modules/classification_threshold.rst +++ b/doc/modules/classification_threshold.rst @@ -1,6 +1,6 @@ .. currentmodule:: sklearn.model_selection -.. _TunedThresholdClassifierCV: +.. _threshold_tunning: ================================================== Tuning the decision threshold for class prediction @@ -80,8 +80,6 @@ a utility metric defined by the business (in this case an insurance company). :target: ../auto_examples/model_selection/plot_cost_sensitive_learning.html :align: center -.. _metric_threshold_curve: - Plotting metric across thresholds --------------------------------- diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index cf168295a6024..cc15b4ecce185 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -63,7 +63,7 @@ The most common decisions are done on binary classification tasks, where the res probability of rain a decision is made on how to act (whether to take mitigating measures like an umbrella or not). For classifiers, this is what :term:`predict` returns. -See also :ref:`TunedThresholdClassifierCV`. +See also :ref:`threshold_tunning`. There are many scoring functions which measure different aspects of such a decision, most of them are covered with or derived from the :func:`metrics.confusion_matrix`. diff --git a/sklearn/metrics/_decision_threshold.py b/sklearn/metrics/_decision_threshold.py index 5a2691668ce5c..9dbadb22edc38 100644 --- a/sklearn/metrics/_decision_threshold.py +++ b/sklearn/metrics/_decision_threshold.py @@ -1,9 +1,9 @@ """Metric per threshold curve to assess binary classification performance. -Given threshold grid, one can undestand the behaviour of threshold-dependent -metrics when changing the threshold. In imbalanced scenarios or -cost-sensitive learning, a 0.5 threshold may not be optimal and tools like -this can help you visualize how the performance changes. +Compute metric per threshold, over a range of threshold values to aid visualization +of threshold-dependent metric behavior. + +Utilizes `_CurveScorer` methods to do all the computation. """ # Authors: The scikit-learn developers @@ -42,7 +42,7 @@ def decision_threshold_curve( Note: this implementation is restricted to the binary classification task. - Read more in the :ref:`User Guide `. + Read more in the :ref:`User Guide `. .. versionadded:: 1.8 diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index c68ed38b8819d..707f28c3bc64e 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -510,7 +510,7 @@ class TunedThresholdClassifierCV(BaseThresholdClassifier): into a class label. The tuning is done by optimizing a binary metric, potentially constrained by a another metric. - Read more in the :ref:`User Guide `. + Read more in the :ref:`User Guide `. .. versionadded:: 1.5