From 8a2313ae154f75bdecf816cad18d36f2f71b1ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Fri, 31 Mar 2023 21:51:39 +0200 Subject: [PATCH 1/9] add parameters validation for metrics.check_scoring --- sklearn/metrics/_scorer.py | 9 ++++++++- sklearn/tests/test_public_functions.py | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 9df1b482bdeb3..a6f474fd1a5dd 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -65,7 +65,7 @@ from ..utils.multiclass import type_of_target from ..base import is_regressor -from ..utils._param_validation import validate_params +from ..utils._param_validation import HasMethods, validate_params def _cached_call(cache, estimator, method, *args, **kwargs): @@ -451,6 +451,13 @@ def _passthrough_scorer(estimator, *args, **kwargs): return estimator.score(*args, **kwargs) +@validate_params( + { + "estimator": [HasMethods("fit")], + "scoring": [str, callable, None], + "allow_none": ["boolean"], + } +) def check_scoring(estimator, scoring=None, *, allow_none=False): """Determine scorer from user options. diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index e127369072828..fe6d503c977eb 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -166,6 +166,7 @@ def _check_function_param_validation( "sklearn.metrics.average_precision_score", "sklearn.metrics.balanced_accuracy_score", "sklearn.metrics.brier_score_loss", + "sklearn.metrics.check_scoring", "sklearn.metrics.class_likelihood_ratios", "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score", From cf75eeb3bf7aa93c76ce947857c025ebcebd7198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Fri, 31 Mar 2023 22:03:52 +0200 Subject: [PATCH 2/9] remove in-function estimator validation --- sklearn/metrics/_scorer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index a6f474fd1a5dd..bebd4b952f32a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -484,11 +484,6 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): A scorer callable object / function with signature ``scorer(estimator, X, y)``. """ - if not hasattr(estimator, "fit"): - raise TypeError( - "estimator should be an estimator implementing 'fit' method, %r was passed" - % estimator - ) if isinstance(scoring, str): return get_scorer(scoring) elif callable(scoring): From 778841fdb61d65194ae28008dbfbf0c36e9f95c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Fri, 31 Mar 2023 22:08:32 +0200 Subject: [PATCH 3/9] remove in-function scoring validation --- sklearn/metrics/_scorer.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index bebd4b952f32a..4335671fddb23 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -18,7 +18,6 @@ # Arnaud Joly # License: Simplified BSD -from collections.abc import Iterable from functools import partial from collections import Counter from traceback import format_exc @@ -486,7 +485,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): """ if isinstance(scoring, str): return get_scorer(scoring) - elif callable(scoring): + if callable(scoring): # Heuristic to ensure user has not passed a metric module = getattr(scoring, "__module__", None) if ( @@ -503,7 +502,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): "to a scorer." % scoring ) return get_scorer(scoring) - elif scoring is None: + if scoring is None: if hasattr(estimator, "score"): return _passthrough_scorer elif allow_none: @@ -513,17 +512,6 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): "If no scoring is specified, the estimator passed should " "have a 'score' method. The estimator %r does not." % estimator ) - elif isinstance(scoring, Iterable): - raise ValueError( - "For evaluating multiple scores, use " - "sklearn.model_selection.cross_validate instead. " - "{0} was passed.".format(scoring) - ) - else: - raise ValueError( - "scoring value should either be a callable, string or None. %r was passed" - % scoring - ) def _check_multimetric_scoring(estimator, scoring): From 67d3bb4e32b017f0063d64af7a7e32053545ce0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Fri, 31 Mar 2023 22:16:23 +0200 Subject: [PATCH 4/9] add note regarding evaluating multiple scores in docstring --- sklearn/metrics/_scorer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 4335671fddb23..2194a43366cde 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -482,6 +482,11 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): scoring : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. + + Notes + ----- + For evaluating multiple scores, use + :func:`~sklearn.model_selection.cross_validate` instead. """ if isinstance(scoring, str): return get_scorer(scoring) From 190d86f543dec80a14947830375f4eb84b4dce05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Fri, 31 Mar 2023 22:34:05 +0200 Subject: [PATCH 5/9] remove obsolete estimator validation unit test and class --- sklearn/metrics/tests/test_score_objects.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index d39db7fc894c4..f582e51d729e1 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -179,12 +179,6 @@ def teardown_module(): shutil.rmtree(TEMP_FOLDER) -class EstimatorWithoutFit: - """Dummy estimator to test scoring validators""" - - pass - - class EstimatorWithFit(BaseEstimator): """Dummy estimator to test scoring validators""" @@ -228,13 +222,6 @@ def test_all_scorers_repr(): def check_scoring_validator_for_single_metric_usecases(scoring_validator): # Test all branches of single metric usecases - estimator = EstimatorWithoutFit() - pattern = ( - r"estimator should be an estimator implementing 'fit' method," r" .* was passed" - ) - with pytest.raises(TypeError, match=pattern): - scoring_validator(estimator) - estimator = EstimatorWithFitAndScore() estimator.fit([[1]], [1]) scorer = scoring_validator(estimator) From cc4053c805cb00225843e8bc526913b99d421b34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Mon, 3 Apr 2023 19:22:46 +0200 Subject: [PATCH 6/9] remove note regarding multiple scores --- sklearn/metrics/_scorer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 2194a43366cde..4335671fddb23 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -482,11 +482,6 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): scoring : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. - - Notes - ----- - For evaluating multiple scores, use - :func:`~sklearn.model_selection.cross_validate` instead. """ if isinstance(scoring, str): return get_scorer(scoring) From a124b79ad278af9b26a5f41f2be9fb40882b7a7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Mon, 3 Apr 2023 20:29:35 +0200 Subject: [PATCH 7/9] update scoring constraint to rely on get_scorer_names function In order for the function 'get_scorer_names' to be callable within the parameters validation, I had to move the 'check_scoring' function to the end of the file. --- sklearn/metrics/_scorer.py | 130 ++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 4335671fddb23..c0269d8f4718e 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -64,7 +64,7 @@ from ..utils.multiclass import type_of_target from ..base import is_regressor -from ..utils._param_validation import HasMethods, validate_params +from ..utils._param_validation import HasMethods, StrOptions, validate_params def _cached_call(cache, estimator, method, *args, **kwargs): @@ -450,70 +450,6 @@ def _passthrough_scorer(estimator, *args, **kwargs): return estimator.score(*args, **kwargs) -@validate_params( - { - "estimator": [HasMethods("fit")], - "scoring": [str, callable, None], - "allow_none": ["boolean"], - } -) -def check_scoring(estimator, scoring=None, *, allow_none=False): - """Determine scorer from user options. - - A TypeError will be thrown if the estimator cannot be scored. - - Parameters - ---------- - estimator : estimator object implementing 'fit' - The object to use to fit the data. - - scoring : str or callable, default=None - A string (see model evaluation documentation) or - a scorer callable object / function with signature - ``scorer(estimator, X, y)``. - If None, the provided estimator object's `score` method is used. - - allow_none : bool, default=False - If no scoring is specified and the estimator has no score function, we - can either return None or raise an exception. - - Returns - ------- - scoring : callable - A scorer callable object / function with signature - ``scorer(estimator, X, y)``. - """ - if isinstance(scoring, str): - return get_scorer(scoring) - if callable(scoring): - # Heuristic to ensure user has not passed a metric - module = getattr(scoring, "__module__", None) - if ( - hasattr(module, "startswith") - and module.startswith("sklearn.metrics.") - and not module.startswith("sklearn.metrics._scorer") - and not module.startswith("sklearn.metrics.tests.") - ): - raise ValueError( - "scoring value %r looks like it is a metric " - "function rather than a scorer. A scorer should " - "require an estimator as its first parameter. " - "Please use `make_scorer` to convert a metric " - "to a scorer." % scoring - ) - return get_scorer(scoring) - if scoring is None: - if hasattr(estimator, "score"): - return _passthrough_scorer - elif allow_none: - return None - else: - raise TypeError( - "If no scoring is specified, the estimator passed should " - "have a 'score' method. The estimator %r does not." % estimator - ) - - def _check_multimetric_scoring(estimator, scoring): """Check the scoring parameter in cases when multiple metrics are allowed. @@ -870,3 +806,67 @@ def get_scorer_names(): _SCORERS[qualified_name] = make_scorer(metric, pos_label=None, average=average) SCORERS = _DeprecatedScorers(_SCORERS) + + +@validate_params( + { + "estimator": [HasMethods("fit")], + "scoring": [StrOptions(set(get_scorer_names())), callable, None], + "allow_none": ["boolean"], + } +) +def check_scoring(estimator, scoring=None, *, allow_none=False): + """Determine scorer from user options. + + A TypeError will be thrown if the estimator cannot be scored. + + Parameters + ---------- + estimator : estimator object implementing 'fit' + The object to use to fit the data. + + scoring : str or callable, default=None + A string (see model evaluation documentation) or + a scorer callable object / function with signature + ``scorer(estimator, X, y)``. + If None, the provided estimator object's `score` method is used. + + allow_none : bool, default=False + If no scoring is specified and the estimator has no score function, we + can either return None or raise an exception. + + Returns + ------- + scoring : callable + A scorer callable object / function with signature + ``scorer(estimator, X, y)``. + """ + if isinstance(scoring, str): + return get_scorer(scoring) + if callable(scoring): + # Heuristic to ensure user has not passed a metric + module = getattr(scoring, "__module__", None) + if ( + hasattr(module, "startswith") + and module.startswith("sklearn.metrics.") + and not module.startswith("sklearn.metrics._scorer") + and not module.startswith("sklearn.metrics.tests.") + ): + raise ValueError( + "scoring value %r looks like it is a metric " + "function rather than a scorer. A scorer should " + "require an estimator as its first parameter. " + "Please use `make_scorer` to convert a metric " + "to a scorer." % scoring + ) + return get_scorer(scoring) + if scoring is None: + if hasattr(estimator, "score"): + return _passthrough_scorer + elif allow_none: + return None + else: + raise TypeError( + "If no scoring is specified, the estimator passed should " + "have a 'score' method. The estimator %r does not." % estimator + ) From 7847b01babb90735952adadf61cd4f1bb8ad6add Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Tue, 4 Apr 2023 21:07:13 +0200 Subject: [PATCH 8/9] remove obsolete invalid scoring str parameter unit test --- sklearn/model_selection/tests/test_validation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 863a366eb4410..19189b213b1bd 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -382,9 +382,6 @@ def test_cross_validate_invalid_scoring_param(): with pytest.warns(UserWarning, match=warning_message): cross_validate(estimator, X, y, scoring={"foo": multiclass_scorer}) - with pytest.raises(ValueError, match="'mse' is not a valid scoring value."): - cross_validate(SVC(), X, y, scoring="mse") - def test_cross_validate_nested_estimator(): # Non-regression test to ensure that nested From 5f95df1dc81ef40ea26d7648563b04de17eb54cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 5 Apr 2023 15:08:18 +0200 Subject: [PATCH 9/9] Update model_evaluation.rst --- doc/modules/model_evaluation.rst | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 537f23e49d2dc..926e4d240fdc6 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -115,17 +115,11 @@ Usage examples: >>> clf = svm.SVC(random_state=0) >>> cross_val_score(clf, X, y, cv=5, scoring='recall_macro') array([0.96..., 0.96..., 0.96..., 0.93..., 1. ]) - >>> model = svm.SVC() - >>> cross_val_score(model, X, y, cv=5, scoring='wrong_choice') - Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Use - sklearn.metrics.get_scorer_names() to get valid options. .. note:: - The values listed by the ``ValueError`` exception correspond to the - functions measuring prediction accuracy described in the following - sections. You can retrieve the names of all available scorers by calling + If a wrong scoring name is passed, an ``InvalidParameterError`` is raised. + You can retrieve the names of all available scorers by calling :func:`~sklearn.metrics.get_scorer_names`. .. currentmodule:: sklearn.metrics