From 1fbda386e8e647ff1e59067c2bd73ea2dbeb172f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:25:48 +0100 Subject: [PATCH 01/37] FEAT add metadata routing support to scorers --- sklearn/metrics/_scorer.py | 159 ++++++++++++++------ sklearn/metrics/tests/test_score_objects.py | 22 ++- sklearn/utils/metadata_routing.py | 1 + 3 files changed, 136 insertions(+), 46 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 1e8e330d1af81..4418f3637be8b 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -18,6 +18,7 @@ # Arnaud Joly # License: Simplified BSD +import copy from collections.abc import Iterable from functools import partial from collections import Counter @@ -61,6 +62,10 @@ from ..utils.multiclass import type_of_target from ..base import is_regressor +from ..utils.metadata_routing import _MetadataRequester +from ..utils.metadata_routing import MetadataRequest +from ..utils.metadata_routing import MetadataRouter +from ..utils.metadata_routing import process_routing def _cached_call(cache, estimator, method, *args, **kwargs): @@ -99,11 +104,15 @@ def __call__(self, estimator, *args, **kwargs): cache = {} if self._use_cache(estimator) else None cached_call = partial(_cached_call, cache) + params = process_routing(self, "score", kwargs) + for name, scorer in self._scorers.items(): if isinstance(scorer, _BaseScorer): - score = scorer._score(cached_call, estimator, *args, **kwargs) + score = scorer._score( + cached_call, estimator, *args, **params.get(name).score + ) else: - score = scorer(estimator, *args, **kwargs) + score = scorer(estimator, *args, **params.get(name).score) scores[name] = score return scores @@ -138,8 +147,23 @@ def _use_cache(self, estimator): return True return False + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + Returns + ------- + routing : dict + A dict representing a MetadataRouter. + """ + return MetadataRouter(owner=self.__class__.__name__).add( + **self._scorers, method_mapping="score" + ) + -class _BaseScorer: +class _BaseScorer(_MetadataRequester): def __init__(self, score_func, sign, kwargs): self._kwargs = kwargs self._score_func = score_func @@ -191,7 +215,7 @@ def __repr__(self): kwargs_string, ) - def __call__(self, estimator, X, y_true, sample_weight=None): + def __call__(self, estimator, X, y_true, **kwargs): """Evaluate predicted target values for X relative to y_true. Parameters @@ -206,29 +230,45 @@ def __call__(self, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- score : float Score function applied to prediction of estimator on X. """ - return self._score( - partial(_cached_call, None), - estimator, - X, - y_true, - sample_weight=sample_weight, - ) + return self._score(partial(_cached_call, None), estimator, X, y_true, **kwargs) def _factory_args(self): """Return non-default make_scorer arguments for repr.""" return "" + def set_score_request(self, **kwargs): + """Set requested parameters by the scorer. + + Note that this method returns a new instance of the scorer, and does + **not** change the original scorer object. + + .. versionadded:: 1.1 + + Parameters + ---------- + kwargs : dict + Arguments should be of the form param_name={True, False, None, str}. + The value can also be of the form RequestType + """ + res = copy.deepcopy(self) + res._metadata_request = MetadataRequest(owner=self.__class__.__name__) + for param, alias in kwargs.items(): + res._metadata_request.score.add_request(param=param, alias=alias) + return res + class _PredictScorer(_BaseScorer): - def _score(self, method_caller, estimator, X, y_true, sample_weight=None): + def _score(self, method_caller, estimator, X, y_true, **kwargs): """Evaluate predicted target values for X relative to y_true. Parameters @@ -247,8 +287,10 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- @@ -257,16 +299,13 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None): """ y_pred = method_caller(estimator, "predict", X) - if sample_weight is not None: - return self._sign * self._score_func( - y_true, y_pred, sample_weight=sample_weight, **self._kwargs - ) - else: - return self._sign * self._score_func(y_true, y_pred, **self._kwargs) + scoring_kwargs = copy.deepcopy(self._kwargs) + scoring_kwargs.update(kwargs) + return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs) class _ProbaScorer(_BaseScorer): - def _score(self, method_caller, clf, X, y, sample_weight=None): + def _score(self, method_caller, clf, X, y, **kwargs): """Evaluate predicted probabilities for X relative to y_true. Parameters @@ -286,8 +325,10 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): Gold standard target values for X. These must be class labels, not probabilities. - sample_weight : array-like, default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- @@ -302,19 +343,23 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): # problem: (when only 2 class are given to `y_true` during scoring) # Thus, we need to check for the shape of `y_pred`. y_pred = self._select_proba_binary(y_pred, clf.classes_) - if sample_weight is not None: - return self._sign * self._score_func( - y, y_pred, sample_weight=sample_weight, **self._kwargs - ) - else: - return self._sign * self._score_func(y, y_pred, **self._kwargs) + + scoring_kwargs = copy.deepcopy(self._kwargs) + scoring_kwargs.update(kwargs) + # this is for backward compatibility to avoid passing sample_weight + # to the scorer if it's None + # probably remove in v1.3 + if scoring_kwargs.get("sample_weight", -1) is None: + del scoring_kwargs["sample_weight"] + + return self._sign * self._score_func(y, y_pred, **scoring_kwargs) def _factory_args(self): return ", needs_proba=True" class _ThresholdScorer(_BaseScorer): - def _score(self, method_caller, clf, X, y, sample_weight=None): + def _score(self, method_caller, clf, X, y, **kwargs): """Evaluate decision function output for X relative to y_true. Parameters @@ -336,8 +381,10 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): Gold standard target values for X. These must be class labels, not decision function values. - sample_weight : array-like, default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- @@ -374,12 +421,14 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): elif isinstance(y_pred, list): y_pred = np.vstack([p[:, -1] for p in y_pred]).T - if sample_weight is not None: - return self._sign * self._score_func( - y, y_pred, sample_weight=sample_weight, **self._kwargs - ) - else: - return self._sign * self._score_func(y, y_pred, **self._kwargs) + scoring_kwargs = copy.deepcopy(self._kwargs) + scoring_kwargs.update(kwargs) + # this is for backward compatibility to avoid passing sample_weight + # to the scorer if it's None + # probably remove in v1.3 + if scoring_kwargs.get("sample_weight", -1) is None: + del scoring_kwargs["sample_weight"] + return self._sign * self._score_func(y, y_pred, **scoring_kwargs) def _factory_args(self): return ", needs_threshold=True" @@ -414,9 +463,29 @@ def get_scorer(scoring): return scorer -def _passthrough_scorer(estimator, *args, **kwargs): - """Function that wraps estimator.score""" - return estimator.score(*args, **kwargs) +class _passthrough_scorer: + def __init__(self, estimator): + self._estimator = estimator + + def __call__(self, estimator, *args, **kwargs): + """Method that wraps estimator.score""" + return estimator.score(*args, **kwargs) + + def get_metadata_request(self): + """Get requested data properties. + + .. versionadded:: 1.1 + + Returns + ------- + request : dict + A dict of dict of str->value. The key to the first dict is the name + of the method, and the key to the second dict is the name of the + argument requested by the method. + """ + return MetadataRouter(owner=self.__class__.__name__).add( + estimator=self._estimator, method_mapping="score" + ) def check_scoring(estimator, scoring=None, *, allow_none=False): @@ -471,7 +540,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): return get_scorer(scoring) elif scoring is None: if hasattr(estimator, "score"): - return _passthrough_scorer + return _passthrough_scorer(estimator) elif allow_none: return None else: @@ -604,7 +673,7 @@ def make_scorer( ---------- score_func : callable Score function (or loss function) with signature - `score_func(y, y_pred, **kwargs)`. + ``score_func(y, y_pred, **kwargs)``. greater_is_better : bool, default=True Whether `score_func` is a score function (default), meaning high is diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 3b3caceb9970a..7b21fac04c02c 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -236,7 +236,7 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator): estimator = EstimatorWithFitAndScore() estimator.fit([[1]], [1]) scorer = scoring_validator(estimator) - assert scorer is _passthrough_scorer + assert isinstance(scorer, _passthrough_scorer) assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) estimator = EstimatorWithFitAndPredict() @@ -629,11 +629,14 @@ def test_classification_scorer_sample_weight(): else: target = y_test try: + scorer = scorer.set_score_request(sample_weight=True) weighted = scorer( estimator[name], X_test, target, sample_weight=sample_weight ) ignored = scorer(estimator[name], X_test[10:], target[10:]) unweighted = scorer(estimator[name], X_test, target) + # this should not raise. sample_weight should be ignored if None. + _ = scorer(estimator[name], X_test[:10], target[:10], sample_weight=None) assert weighted != unweighted, ( f"scorer {name} behaves identically when called with " f"sample weights: {weighted} vs {unweighted}" @@ -677,6 +680,7 @@ def test_regression_scorer_sample_weight(): # skip classification scorers continue try: + scorer = scorer.set_score_request(sample_weight=True) weighted = scorer(reg, X_test, y_test, sample_weight=sample_weight) ignored = scorer(reg, X_test[11:], y_test[11:]) unweighted = scorer(reg, X_test, y_test) @@ -1140,3 +1144,19 @@ def test_scorer_no_op_multiclass_select_proba(): labels=lr.classes_, ) scorer(lr, X_test, y_test) + + +@pytest.mark.parametrize("name, scorer", SCORERS.items()) +def test_scorer_metadata_request(name, scorer): + assert hasattr(scorer, "set_score_request") + assert hasattr(scorer, "get_metadata_routing") + + weighted_scorer = scorer.set_score_request(sample_weight=True) + # set_score_request shouldn't mutate the instance + assert weighted_scorer is not scorer + + assert str(scorer.get_metadata_routing()) == "{}" + assert ( + str(weighted_scorer.get_metadata_routing()) + == "{'score': {'sample_weight': }}" + ) diff --git a/sklearn/utils/metadata_routing.py b/sklearn/utils/metadata_routing.py index 1b3ba6e33b1f7..89ba5b5a9253c 100644 --- a/sklearn/utils/metadata_routing.py +++ b/sklearn/utils/metadata_routing.py @@ -14,3 +14,4 @@ from ._metadata_requests import MetadataRequest # noqa from ._metadata_requests import MethodMapping # noqa from ._metadata_requests import process_routing # noqa +from ._metadata_requests import _MetadataRequester # noqa From 51baf9d277bc0498d104880e9a54cf3c598d1eef Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:29:01 +0100 Subject: [PATCH 02/37] clarify doc on not mutating scorers --- doc/metadata_routing.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index c6641ada487db..0e20f73966bf7 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -46,8 +46,10 @@ Weighted scoring and fitting ---------------------------- Here ``GroupKFold`` requests ``groups`` by default. However, we need to -explicitly request weights in ``make_scorer`` and for ``LogisticRegressionCV``. -Both of these *consumers* know how to use metadata called ``"sample_weight"``:: +explicitly request weights for our scorer and for ``LogisticRegressionCV``. +Note that ``set_score_request`` on a scorer does not mutate the instance and +instead returns a new instance with the metadata request attached to it. Both +of these *consumers* know how to use metadata called ``"sample_weight"``:: >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True From 78638c40a0d4161c7db91131fbe1ae1bbd386b67 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:40:44 +0100 Subject: [PATCH 03/37] improve tests --- sklearn/metrics/tests/test_score_objects.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 7b21fac04c02c..79e49bc68b4d1 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -15,6 +15,8 @@ from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings +from sklearn.utils.metadata_routing import MetadataRouter +from sklearn.utils.metadata_routing import process_routing from sklearn.base import BaseEstimator from sklearn.metrics import ( @@ -1160,3 +1162,18 @@ def test_scorer_metadata_request(name, scorer): str(weighted_scorer.get_metadata_routing()) == "{'score': {'sample_weight': }}" ) + + # make sure putting the scorer in a router doesn't request anything + router = MetadataRouter(owner="test").add(scorer=scorer, method_mapping="score") + with pytest.raises(TypeError, match="got unexpected argument"): + router.validate_metadata(params={"sample_weight": 1}, method="score") + routed_params = router.route_params(params={"sample_weight": 1}, caller="score") + assert not routed_params.scorer.score + + # make sure putting weighted_scorer in a router requests sample_weight + router = MetadataRouter(owner="test").add( + scorer=weighted_scorer, method_mapping="score" + ) + router.validate_metadata(params={"sample_weight": 1}, method="score") + routed_params = router.route_params(params={"sample_weight": 1}, caller="score") + assert list(routed_params.scorer.score.keys()) == ["sample_weight"] From ea317bc27e5bab252dd8304ddea1f9b16fb0d789 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:44:10 +0100 Subject: [PATCH 04/37] remove unused import --- sklearn/metrics/tests/test_score_objects.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 79e49bc68b4d1..02ca8352d477b 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -16,7 +16,6 @@ from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings from sklearn.utils.metadata_routing import MetadataRouter -from sklearn.utils.metadata_routing import process_routing from sklearn.base import BaseEstimator from sklearn.metrics import ( From 6c6c2f1044e74283a70c1fd720baa09effad5e46 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 11 Mar 2022 10:49:00 +0100 Subject: [PATCH 05/37] fix tests --- sklearn/inspection/_permutation_importance.py | 2 +- sklearn/model_selection/tests/test_search.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py index 7ca586efe8630..70b9c89c62126 100644 --- a/sklearn/inspection/_permutation_importance.py +++ b/sklearn/inspection/_permutation_importance.py @@ -15,7 +15,7 @@ def _weights_scorer(scorer, estimator, X, y, sample_weight): if sample_weight is not None: - return scorer(estimator, X, y, sample_weight) + return scorer(estimator, X, y, sample_weight=sample_weight) return scorer(estimator, X, y) diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index a4dcc201c0bb1..4611669b2b974 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1902,7 +1902,13 @@ def _run_search(self, evaluate): attr[0].islower() and attr[-1:] == "_" and attr - not in {"cv_results_", "best_estimator_", "refit_time_", "classes_"} + not in { + "cv_results_", + "best_estimator_", + "refit_time_", + "classes_", + "scorer_", + } ): assert getattr(gscv, attr) == getattr(mycv, attr), ( "Attribute %s not equal" % attr From b24c3addec0b2b7611b4ff656772eebbf7d3f886 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 11 Mar 2022 16:05:06 +0100 Subject: [PATCH 06/37] Christian's comments --- sklearn/metrics/_scorer.py | 22 ++++++++++----------- sklearn/metrics/tests/test_score_objects.py | 3 +-- sklearn/utils/_metadata_requests.py | 5 +++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 4418f3637be8b..290ec24cf2141 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -155,8 +155,9 @@ def get_metadata_routing(self): Returns ------- - routing : dict - A dict representing a MetadataRouter. + routing : MetadataRouter + A :class:`~utils.metadata_routing.MetadataRouter` encapsulating + routing information. """ return MetadataRouter(owner=self.__class__.__name__).add( **self._scorers, method_mapping="score" @@ -231,7 +232,7 @@ def __call__(self, estimator, X, y_true, **kwargs): Gold standard target values for X. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -288,7 +289,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): Gold standard target values for X. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -326,7 +327,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): not probabilities. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -382,7 +383,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): not decision function values. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -471,17 +472,16 @@ def __call__(self, estimator, *args, **kwargs): """Method that wraps estimator.score""" return estimator.score(*args, **kwargs) - def get_metadata_request(self): + def get_metadata_routing(self): """Get requested data properties. .. versionadded:: 1.1 Returns ------- - request : dict - A dict of dict of str->value. The key to the first dict is the name - of the method, and the key to the second dict is the name of the - argument requested by the method. + routing : MetadataRouter + A :class:`~utils.metadata_routing.MetadataRouter` encapsulating + routing information. """ return MetadataRouter(owner=self.__class__.__name__).add( estimator=self._estimator, method_mapping="score" diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 02ca8352d477b..749cedc25bff5 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -681,7 +681,6 @@ def test_regression_scorer_sample_weight(): # skip classification scorers continue try: - scorer = scorer.set_score_request(sample_weight=True) weighted = scorer(reg, X_test, y_test, sample_weight=sample_weight) ignored = scorer(reg, X_test[11:], y_test[11:]) unweighted = scorer(reg, X_test, y_test) @@ -1163,7 +1162,7 @@ def test_scorer_metadata_request(name, scorer): ) # make sure putting the scorer in a router doesn't request anything - router = MetadataRouter(owner="test").add(scorer=scorer, method_mapping="score") + router = MetadataRouter(owner="test").add(method_mapping="score", scorer=scorer) with pytest.raises(TypeError, match="got unexpected argument"): router.validate_metadata(params={"sample_weight": 1}, method="score") routed_params = router.route_params(params={"sample_weight": 1}, caller="score") diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 5719750ee5d58..9f6527f6809bf 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1112,8 +1112,9 @@ def get_metadata_routing(self): Returns ------- - routing : dict - A dict representing a MetadataRouter. + routing : MetadataRequest + A :class:`~utils.metadata_routing.MetadataRequest` encapsulating + routing information. """ return self._get_metadata_request() From a663b869f7455ead8bc144dbeb3e5d9d10665573 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 15:15:47 +0100 Subject: [PATCH 07/37] set_score_request -> with_score_request on scorers --- doc/metadata_routing.rst | 21 ++++++++++++--------- sklearn/metrics/_scorer.py | 4 ++-- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 0e20f73966bf7..0ffb21c2fc9bd 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -12,9 +12,10 @@ meta-estimators such as ``Pipeline`` and ``GridSearchCV``. In order to pass metadata to a method such as ``fit`` or ``score``, the object accepting the metadata, must *request* it. For estimators and splitters this is done via ``set_*_request`` methods, e.g. ``set_fit_request(...)``, and for scorers this -is done via ``set_score_request`` method. For grouped splitters such as -``GroupKFold`` a ``groups`` parameter is requested by default. This is best -demonstrated by the following examples. +is done via ``with_score_request`` method which creates a new scorer with the +requested metadata. For grouped splitters such as ``GroupKFold`` a ``groups`` +parameter is requested by default. This is best demonstrated by the following +examples. If you are developing a scikit-learn compatible estimator or meta-estimator, you can check our related developer guide: @@ -47,11 +48,11 @@ Weighted scoring and fitting Here ``GroupKFold`` requests ``groups`` by default. However, we need to explicitly request weights for our scorer and for ``LogisticRegressionCV``. -Note that ``set_score_request`` on a scorer does not mutate the instance and +Note that ``with_score_request`` on a scorer does not mutate the instance and instead returns a new instance with the metadata request attached to it. Both of these *consumers* know how to use metadata called ``"sample_weight"``:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -83,7 +84,7 @@ configure :class:`~linear_model.LogisticRegressionCV` to not request sample weights, so that :func:`~model_selection.cross_validate` does not pass the weights along:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -113,7 +114,7 @@ Unweighted feature selection Unlike ``LogisticRegressionCV``, ``SelectKBest`` doesn't accept weights and therefore `"sample_weight"` is not routed to it:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -138,7 +139,7 @@ Despite ``make_scorer`` and ``LogisticRegressionCV`` both expecting the key consumers. In this example, we pass ``scoring_weight`` to the scorer, and ``fitting_weight`` to ``LogisticRegressionCV``:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight="scoring_weight" ... ) >>> lr = LogisticRegressionCV( @@ -190,7 +191,9 @@ supports ``sample_weight`` in ``fit`` and ``score``, it exposes ``my_weights`` is done at the router level, and not by the object, e.g. estimator, itself. -For the scorers, this is done the same way, using ``set_score_request`` method. +For the scorers, this is done using ``with_score_request`` method which returns +a new instance of the scorer with the requested metadata and does not change +the original scorer. If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata request for all objects which potentially can accept ``sample_weight`` should diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 290ec24cf2141..34f8562b6b301 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -247,8 +247,8 @@ def _factory_args(self): """Return non-default make_scorer arguments for repr.""" return "" - def set_score_request(self, **kwargs): - """Set requested parameters by the scorer. + def with_score_request(self, **kwargs): + """Create a scorer that requests metadata parameters. Note that this method returns a new instance of the scorer, and does **not** change the original scorer object. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 749cedc25bff5..743701cc5fb85 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -630,7 +630,7 @@ def test_classification_scorer_sample_weight(): else: target = y_test try: - scorer = scorer.set_score_request(sample_weight=True) + scorer = scorer.with_score_request(sample_weight=True) weighted = scorer( estimator[name], X_test, target, sample_weight=sample_weight ) @@ -1148,11 +1148,11 @@ def test_scorer_no_op_multiclass_select_proba(): @pytest.mark.parametrize("name, scorer", SCORERS.items()) def test_scorer_metadata_request(name, scorer): - assert hasattr(scorer, "set_score_request") + assert hasattr(scorer, "with_score_request") assert hasattr(scorer, "get_metadata_routing") - weighted_scorer = scorer.set_score_request(sample_weight=True) - # set_score_request shouldn't mutate the instance + weighted_scorer = scorer.with_score_request(sample_weight=True) + # with_score_request shouldn't mutate the instance assert weighted_scorer is not scorer assert str(scorer.get_metadata_routing()) == "{}" From e838a714d4b262932d804058e640464803c9b702 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 18:38:05 +0100 Subject: [PATCH 08/37] warn on overlapping kwargs and metadata --- sklearn/metrics/_scorer.py | 53 +++++++++++++++++++-- sklearn/metrics/tests/test_score_objects.py | 19 ++++++++ 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 34f8562b6b301..58be34268750b 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -19,6 +19,7 @@ # License: Simplified BSD import copy +import warnings from collections.abc import Iterable from functools import partial from collections import Counter @@ -247,6 +248,19 @@ def _factory_args(self): """Return non-default make_scorer arguments for repr.""" return "" + def _warn_overlap(self, message, kwargs): + """Warn if there is any overlap between ``self._kwargs`` and kwargs. + + This method is intended to be used to check for overlap between + ``self._kwargs`` and ``kwargs`` passed as metadata. + """ + _kwargs = set() if self._kwargs is None else set(self._kwargs.keys()) + overlap = _kwargs.intersection(kwargs.keys()) + if overlap: + warnings.warn( + f"{message} Overlapping parameters are: {overlap}", UserWarning + ) + def with_score_request(self, **kwargs): """Create a scorer that requests metadata parameters. @@ -258,9 +272,19 @@ def with_score_request(self, **kwargs): Parameters ---------- kwargs : dict - Arguments should be of the form param_name={True, False, None, str}. - The value can also be of the form RequestType + Arguments should be of the form ``param_name=alias``, and `alias` + can be either one of ``{True, False, None, str}`` or an instance of + RequestType. """ + self._warn_overlap( + message=( + "You are setting metadata request for parameters which are " + "already set as kwargs for this metric. These set values will be " + "overridden by passed metadata if provided. Please pass them either " + "as metadata or kwargs to `make_scorer`." + ), + kwargs=kwargs, + ) res = copy.deepcopy(self) res._metadata_request = MetadataRequest(owner=self.__class__.__name__) for param, alias in kwargs.items(): @@ -298,7 +322,14 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): score : float Score function applied to prediction of estimator on X. """ - + self._warn_overlap( + message=( + "There is an overlap between set kwargs of this scorer instance and" + " passed metadata. Please pass them either as kwargs to `make_scorer`" + " or metadata, but not both." + ), + kwargs=kwargs, + ) y_pred = method_caller(estimator, "predict", X) scoring_kwargs = copy.deepcopy(self._kwargs) scoring_kwargs.update(kwargs) @@ -336,6 +367,14 @@ def _score(self, method_caller, clf, X, y, **kwargs): score : float Score function applied to prediction of estimator on X. """ + self._warn_overlap( + message=( + "There is an overlap between set kwargs of this scorer instance and" + " passed metadata. Please pass them either as kwargs to `make_scorer`" + " or metadata, but not both." + ), + kwargs=kwargs, + ) y_type = type_of_target(y) y_pred = method_caller(clf, "predict_proba", X) @@ -392,6 +431,14 @@ def _score(self, method_caller, clf, X, y, **kwargs): score : float Score function applied to prediction of estimator on X. """ + self._warn_overlap( + message=( + "There is an overlap between set kwargs of this scorer instance and" + " passed metadata. Please pass them either as kwargs to `make_scorer`" + " or metadata, but not both." + ), + kwargs=kwargs, + ) y_type = type_of_target(y) if y_type not in ("binary", "multilabel-indicator"): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 743701cc5fb85..2962fff8537db 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1175,3 +1175,22 @@ def test_scorer_metadata_request(name, scorer): router.validate_metadata(params={"sample_weight": 1}, method="score") routed_params = router.route_params(params={"sample_weight": 1}, caller="score") assert list(routed_params.scorer.score.keys()) == ["sample_weight"] + + +def test_metadata_kwarg_conflict(): + X, y = make_classification( + n_classes=3, n_informative=3, n_samples=20, random_state=0 + ) + lr = LogisticRegression().fit(X, y) + + scorer = make_scorer( + roc_auc_score, + needs_proba=True, + multi_class="ovo", + labels=lr.classes_, + ) + with pytest.warns(UserWarning, match="already set as kwargs"): + scorer = scorer.with_score_request(labels=True) + + with pytest.warns(UserWarning, match="There is an overlap"): + scorer(lr, X, y, labels=lr.classes_) From 0c6e2c454df2c2a58c904c8e9dee42f8e38ea3e8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 18:45:48 +0100 Subject: [PATCH 09/37] add references to docs --- sklearn/metrics/_scorer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 58be34268750b..4b1f3a093b68a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -234,6 +234,7 @@ def __call__(self, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 @@ -267,6 +268,9 @@ def with_score_request(self, **kwargs): Note that this method returns a new instance of the scorer, and does **not** change the original scorer object. + Please see :ref:`User Guide ` on how the routing + mechanism works. + .. versionadded:: 1.1 Parameters @@ -314,6 +318,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 @@ -359,6 +364,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 @@ -423,6 +429,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 From 0c61c05d3c66961d49b94129ccefc60afa955fb7 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 19:08:42 +0100 Subject: [PATCH 10/37] add a note on custom scorers --- doc/modules/model_evaluation.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 5b3604d403f3b..4a194adf1fa7d 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -224,6 +224,13 @@ the following two rules: Again, by convention higher numbers are better, so if your scorer returns loss, that value should be negated. +- If it requires extra metadata to be passed to it, it should expose a + ``get_metadata_routing`` method returning the requested metadata. The user + should be able to set the requested metadata via a ``with_score_request`` + method returning a copy of the scorer with the appropriate requests set. + Please see :ref:`User Guide ` for more details. + + .. note:: **Using custom scorers in functions where n_jobs > 1** While defining the custom scoring function alongside the calling function @@ -563,8 +570,8 @@ or *informedness*. Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, and Case Studies `_, 2015. - .. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description - and evaluation of a scalable learning classifier + .. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description + and evaluation of a scalable learning classifier system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89. .. _cohen_kappa: From ba36307d611d790876a7320dfe95f6441ed31ce1 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 22 Mar 2022 16:15:11 +0100 Subject: [PATCH 11/37] Revert "set_score_request -> with_score_request on scorers" This reverts commit a663b869f7455ead8bc144dbeb3e5d9d10665573. --- doc/metadata_routing.rst | 21 +++++++++------------ sklearn/metrics/_scorer.py | 6 ++---- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 0ffb21c2fc9bd..0e20f73966bf7 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -12,10 +12,9 @@ meta-estimators such as ``Pipeline`` and ``GridSearchCV``. In order to pass metadata to a method such as ``fit`` or ``score``, the object accepting the metadata, must *request* it. For estimators and splitters this is done via ``set_*_request`` methods, e.g. ``set_fit_request(...)``, and for scorers this -is done via ``with_score_request`` method which creates a new scorer with the -requested metadata. For grouped splitters such as ``GroupKFold`` a ``groups`` -parameter is requested by default. This is best demonstrated by the following -examples. +is done via ``set_score_request`` method. For grouped splitters such as +``GroupKFold`` a ``groups`` parameter is requested by default. This is best +demonstrated by the following examples. If you are developing a scikit-learn compatible estimator or meta-estimator, you can check our related developer guide: @@ -48,11 +47,11 @@ Weighted scoring and fitting Here ``GroupKFold`` requests ``groups`` by default. However, we need to explicitly request weights for our scorer and for ``LogisticRegressionCV``. -Note that ``with_score_request`` on a scorer does not mutate the instance and +Note that ``set_score_request`` on a scorer does not mutate the instance and instead returns a new instance with the metadata request attached to it. Both of these *consumers* know how to use metadata called ``"sample_weight"``:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -84,7 +83,7 @@ configure :class:`~linear_model.LogisticRegressionCV` to not request sample weights, so that :func:`~model_selection.cross_validate` does not pass the weights along:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -114,7 +113,7 @@ Unweighted feature selection Unlike ``LogisticRegressionCV``, ``SelectKBest`` doesn't accept weights and therefore `"sample_weight"` is not routed to it:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -139,7 +138,7 @@ Despite ``make_scorer`` and ``LogisticRegressionCV`` both expecting the key consumers. In this example, we pass ``scoring_weight`` to the scorer, and ``fitting_weight`` to ``LogisticRegressionCV``:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight="scoring_weight" ... ) >>> lr = LogisticRegressionCV( @@ -191,9 +190,7 @@ supports ``sample_weight`` in ``fit`` and ``score``, it exposes ``my_weights`` is done at the router level, and not by the object, e.g. estimator, itself. -For the scorers, this is done using ``with_score_request`` method which returns -a new instance of the scorer with the requested metadata and does not change -the original scorer. +For the scorers, this is done the same way, using ``set_score_request`` method. If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata request for all objects which potentially can accept ``sample_weight`` should diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 58f5f72c145dc..e6be7d34673ca 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -25,8 +25,6 @@ from collections import Counter import numpy as np -import copy -import warnings from . import ( r2_score, @@ -264,8 +262,8 @@ def _warn_overlap(self, message, kwargs): f"{message} Overlapping parameters are: {overlap}", UserWarning ) - def with_score_request(self, **kwargs): - """Create a scorer that requests metadata parameters. + def set_score_request(self, **kwargs): + """Set requested parameters by the scorer. Note that this method returns a new instance of the scorer, and does **not** change the original scorer object. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index bec4e24c4d751..b9c18f4c7782d 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -631,7 +631,7 @@ def test_classification_scorer_sample_weight(): else: target = y_test try: - scorer = scorer.with_score_request(sample_weight=True) + scorer = scorer.set_score_request(sample_weight=True) weighted = scorer( estimator[name], X_test, target, sample_weight=sample_weight ) @@ -1161,11 +1161,11 @@ def test_scorer_no_op_multiclass_select_proba(): @pytest.mark.parametrize("name, scorer", SCORERS.items()) def test_scorer_metadata_request(name, scorer): - assert hasattr(scorer, "with_score_request") + assert hasattr(scorer, "set_score_request") assert hasattr(scorer, "get_metadata_routing") - weighted_scorer = scorer.with_score_request(sample_weight=True) - # with_score_request shouldn't mutate the instance + weighted_scorer = scorer.set_score_request(sample_weight=True) + # set_score_request shouldn't mutate the instance assert weighted_scorer is not scorer assert str(scorer.get_metadata_routing()) == "{}" From 2bea2036dfa8d2688046731f8474fb200f1ff629 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 10:14:32 +0100 Subject: [PATCH 12/37] set_score_request now mutates the instance --- doc/metadata_routing.rst | 4 +--- doc/modules/model_evaluation.rst | 5 ++--- sklearn/metrics/_scorer.py | 18 +++++++----------- sklearn/metrics/tests/test_score_objects.py | 18 +++++++++++------- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 0e20f73966bf7..de04ecf022415 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -47,9 +47,7 @@ Weighted scoring and fitting Here ``GroupKFold`` requests ``groups`` by default. However, we need to explicitly request weights for our scorer and for ``LogisticRegressionCV``. -Note that ``set_score_request`` on a scorer does not mutate the instance and -instead returns a new instance with the metadata request attached to it. Both -of these *consumers* know how to use metadata called ``"sample_weight"``:: +Both of these *consumers* know how to use metadata called ``"sample_weight"``:: >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index ea85973d5ee56..d6d21b3814918 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -227,9 +227,8 @@ the following two rules: - If it requires extra metadata to be passed to it, it should expose a ``get_metadata_routing`` method returning the requested metadata. The user - should be able to set the requested metadata via a ``with_score_request`` - method returning a copy of the scorer with the appropriate requests set. - Please see :ref:`User Guide ` for more details. + should be able to set the requested metadata via a ``set_score_request`` + method. Please see :ref:`User Guide ` for more details. .. note:: **Using custom scorers in functions where n_jobs > 1** diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index e6be7d34673ca..72a7e2a807c8f 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -234,7 +234,7 @@ def __call__(self, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 @@ -265,9 +265,6 @@ def _warn_overlap(self, message, kwargs): def set_score_request(self, **kwargs): """Set requested parameters by the scorer. - Note that this method returns a new instance of the scorer, and does - **not** change the original scorer object. - Please see :ref:`User Guide ` on how the routing mechanism works. @@ -289,11 +286,10 @@ def set_score_request(self, **kwargs): ), kwargs=kwargs, ) - res = copy.deepcopy(self) - res._metadata_request = MetadataRequest(owner=self.__class__.__name__) + self._metadata_request = MetadataRequest(owner=self.__class__.__name__) for param, alias in kwargs.items(): - res._metadata_request.score.add_request(param=param, alias=alias) - return res + self._metadata_request.score.add_request(param=param, alias=alias) + return self class _PredictScorer(_BaseScorer): @@ -318,7 +314,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 @@ -364,7 +360,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 @@ -429,7 +425,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index b9c18f4c7782d..85bf4f050f588 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1159,23 +1159,27 @@ def test_scorer_no_op_multiclass_select_proba(): scorer(lr, X_test, y_test) -@pytest.mark.parametrize("name, scorer", SCORERS.items()) -def test_scorer_metadata_request(name, scorer): +@pytest.mark.parametrize("name", get_scorer_names(), ids=get_scorer_names()) +def test_scorer_metadata_request(name): + scorer = get_scorer(name) assert hasattr(scorer, "set_score_request") assert hasattr(scorer, "get_metadata_routing") + assert str(scorer.get_metadata_routing()) == "{}" + weighted_scorer = scorer.set_score_request(sample_weight=True) - # set_score_request shouldn't mutate the instance - assert weighted_scorer is not scorer + # set_score_request should mutate the instance + assert weighted_scorer is scorer - assert str(scorer.get_metadata_routing()) == "{}" assert ( str(weighted_scorer.get_metadata_routing()) == "{'score': {'sample_weight': }}" ) # make sure putting the scorer in a router doesn't request anything - router = MetadataRouter(owner="test").add(method_mapping="score", scorer=scorer) + router = MetadataRouter(owner="test").add( + method_mapping="score", scorer=get_scorer(name) + ) with pytest.raises(TypeError, match="got unexpected argument"): router.validate_metadata(params={"sample_weight": 1}, method="score") routed_params = router.route_params(params={"sample_weight": 1}, caller="score") @@ -1203,7 +1207,7 @@ def test_metadata_kwarg_conflict(): labels=lr.classes_, ) with pytest.warns(UserWarning, match="already set as kwargs"): - scorer = scorer.with_score_request(labels=True) + scorer.set_score_request(labels=True) with pytest.warns(UserWarning, match="There is an overlap"): scorer(lr, X, y, labels=lr.classes_) From dba266f3222c17de3307a158d2e145d10ff2e5dd Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 10:17:40 +0100 Subject: [PATCH 13/37] don't test repr --- sklearn/metrics/tests/test_score_objects.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 85bf4f050f588..90d2820d339e9 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -15,7 +15,8 @@ from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings -from sklearn.utils.metadata_routing import MetadataRouter +from sklearn.utils.metadata_routing import MetadataRouter, RequestType +from sklearn.tests.test_metadata_routing import assert_request_is_empty from sklearn.base import BaseEstimator from sklearn.metrics import ( @@ -1165,15 +1166,16 @@ def test_scorer_metadata_request(name): assert hasattr(scorer, "set_score_request") assert hasattr(scorer, "get_metadata_routing") - assert str(scorer.get_metadata_routing()) == "{}" + assert_request_is_empty(scorer.get_metadata_routing()) weighted_scorer = scorer.set_score_request(sample_weight=True) # set_score_request should mutate the instance assert weighted_scorer is scorer + assert_request_is_empty(weighted_scorer.get_metadata_routing(), exclude="score") assert ( - str(weighted_scorer.get_metadata_routing()) - == "{'score': {'sample_weight': }}" + weighted_scorer.get_metadata_routing().score.requests["sample_weight"] + == RequestType.REQUESTED ) # make sure putting the scorer in a router doesn't request anything From 203b4e4efa410be4597db2772f3be37a50d6d128 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 11:23:26 +0100 Subject: [PATCH 14/37] fix and test _passthrough_scorer --- sklearn/metrics/_scorer.py | 10 +++++++--- sklearn/metrics/tests/test_score_objects.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 72a7e2a807c8f..09a70cf7a393c 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -67,6 +67,7 @@ from ..utils.metadata_routing import MetadataRequest from ..utils.metadata_routing import MetadataRouter from ..utils.metadata_routing import process_routing +from ..utils.metadata_routing import get_routing_for_object def _cached_call(cache, estimator, method, *args, **kwargs): @@ -541,9 +542,12 @@ def get_metadata_routing(self): A :class:`~utils.metadata_routing.MetadataRouter` encapsulating routing information. """ - return MetadataRouter(owner=self.__class__.__name__).add( - estimator=self._estimator, method_mapping="score" - ) + # This scorer doesn't do any validation or routing, it only exposes the + # score requests to the parent object. This object behaves as a + # consumer rather than a router. + res = MetadataRequest(owner=self._estimator.__class__.__name__) + res.score = get_routing_for_object(self._estimator).score + return res def check_scoring(estimator, scoring=None, *, allow_none=False): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 90d2820d339e9..5aafb5c6b610d 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1213,3 +1213,16 @@ def test_metadata_kwarg_conflict(): with pytest.warns(UserWarning, match="There is an overlap"): scorer(lr, X, y, labels=lr.classes_) + + +def test_passthrough_scorer_metadata_request(): + scorer = _passthrough_scorer( + estimator=LinearSVC() + .set_score_request(sample_weight="alias") + .set_fit_request(sample_weight=True) + ) + # test that _passthrough_scorer leaves everything other than `score` empty + assert_request_is_empty(scorer.get_metadata_routing(), exclude="score") + # test that _passthrough_scorer doesn't behave like a router and leaves + # the request as is. + assert scorer.get_metadata_routing().score.requests["sample_weight"] == "alias" From 4e32f318900ab43afb2fe28559e53be786ba21e9 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 14:20:32 +0100 Subject: [PATCH 15/37] Joel's comments --- doc/modules/model_evaluation.rst | 4 ++-- sklearn/metrics/_scorer.py | 4 ++-- sklearn/metrics/tests/test_score_objects.py | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index d6d21b3814918..1e97e3b6cab6f 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -225,8 +225,8 @@ the following two rules: Again, by convention higher numbers are better, so if your scorer returns loss, that value should be negated. -- If it requires extra metadata to be passed to it, it should expose a - ``get_metadata_routing`` method returning the requested metadata. The user +- Advanced: If it requires extra metadata to be passed to it, it should expose + a ``get_metadata_routing`` method returning the requested metadata. The user should be able to set the requested metadata via a ``set_score_request`` method. Please see :ref:`User Guide ` for more details. diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 09a70cf7a393c..ffc12add29095 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -523,7 +523,7 @@ def get_scorer(scoring): return scorer -class _passthrough_scorer: +class _PassthroughScorer: def __init__(self, estimator): self._estimator = estimator @@ -602,7 +602,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): return get_scorer(scoring) elif scoring is None: if hasattr(estimator, "score"): - return _passthrough_scorer(estimator) + return _PassthroughScorer(estimator) elif allow_none: return None else: diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 5aafb5c6b610d..187ffdb7368e3 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -39,7 +39,7 @@ from sklearn.metrics import check_scoring from sklearn.metrics._scorer import ( _PredictScorer, - _passthrough_scorer, + _PassthroughScorer, _MultimetricScorer, _check_multimetric_scoring, ) @@ -238,7 +238,7 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator): estimator = EstimatorWithFitAndScore() estimator.fit([[1]], [1]) scorer = scoring_validator(estimator) - assert isinstance(scorer, _passthrough_scorer) + assert isinstance(scorer, _PassthroughScorer) assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) estimator = EstimatorWithFitAndPredict() @@ -1215,14 +1215,14 @@ def test_metadata_kwarg_conflict(): scorer(lr, X, y, labels=lr.classes_) -def test_passthrough_scorer_metadata_request(): - scorer = _passthrough_scorer( +def test_PassthroughScorer_metadata_request(): + scorer = _PassthroughScorer( estimator=LinearSVC() .set_score_request(sample_weight="alias") .set_fit_request(sample_weight=True) ) - # test that _passthrough_scorer leaves everything other than `score` empty + # test that _PassthroughScorer leaves everything other than `score` empty assert_request_is_empty(scorer.get_metadata_routing(), exclude="score") - # test that _passthrough_scorer doesn't behave like a router and leaves + # test that _PassthroughScorer doesn't behave like a router and leaves # the request as is. assert scorer.get_metadata_routing().score.requests["sample_weight"] == "alias" From da3c6a64e7c9668f5916d1bc8865773e42b7b01f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 24 Mar 2022 10:27:22 +0100 Subject: [PATCH 16/37] writing test --- sklearn/tests/test_metaestimators.py | 68 ++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index e743741f6fa43..ba242cacf319a 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -299,3 +299,71 @@ def test_meta_estimators_delegate_data_validation(estimator): # n_features_in_ should not be defined since data is not tabular data. assert not hasattr(estimator, "n_features_in_") + + +def _generate_meta_estimator_instances(): + for name, Estimator in all_estimators(): + is_meta_estimator = ( + name.endswith("CV") + or getattr(Estimator, "_required_parameters", None) + or set(Estimator().get_params().keys()).intersection( + [ + "steps", + "estimator", + "estimators", + "base_estimator", + "regressor", + "classifier", + ] + ) + ) + if not is_meta_estimator: + continue + + sig = set(signature(Estimator).parameters) + + if "estimator" in sig or "base_estimator" in sig or "regressor" in sig: + if is_regressor(Estimator): + estimator = make_pipeline(TfidfVectorizer(), Ridge()) + param_grid = {"ridge__alpha": [0.1, 1.0]} + else: + estimator = make_pipeline(TfidfVectorizer(), LogisticRegression()) + param_grid = {"logisticregression__C": [0.1, 1.0]} + + if "param_grid" in sig or "param_distributions" in sig: + # SearchCV estimators + extra_params = {"n_iter": 2} if "n_iter" in sig else {} + yield Estimator(estimator, param_grid, **extra_params) + else: + yield Estimator(estimator) + + elif "transformer_list" in sig: + # FeatureUnion + transformer_list = [ + ("trans1", make_pipeline(TfidfVectorizer(), MaxAbsScaler())), + ( + "trans2", + make_pipeline(TfidfVectorizer(), StandardScaler(with_mean=False)), + ), + ] + yield Estimator(transformer_list) + + elif "estimators" in sig: + # stacking, voting + if is_regressor(Estimator): + estimator = [ + ("est1", make_pipeline(TfidfVectorizer(), Ridge(alpha=0.1))), + ("est2", make_pipeline(TfidfVectorizer(), Ridge(alpha=1))), + ] + else: + estimator = [ + ( + "est1", + make_pipeline(TfidfVectorizer(), LogisticRegression(C=0.1)), + ), + ("est2", make_pipeline(TfidfVectorizer(), LogisticRegression(C=1))), + ] + yield Estimator(estimator) + + else: + continue From 44f91cecd1c509a8b77bb30766dfd1332d55f2a8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 24 Mar 2022 10:30:30 +0100 Subject: [PATCH 17/37] Thomas's comments --- sklearn/metrics/_scorer.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index ffc12add29095..bf345ff6a0344 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -333,8 +333,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): kwargs=kwargs, ) y_pred = method_caller(estimator, "predict", X) - scoring_kwargs = copy.deepcopy(self._kwargs) - scoring_kwargs.update(kwargs) + scoring_kwargs = {**self._kwargs, **kwargs} return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs) @@ -387,11 +386,10 @@ def _score(self, method_caller, clf, X, y, **kwargs): # Thus, we need to check for the shape of `y_pred`. y_pred = self._select_proba_binary(y_pred, clf.classes_) - scoring_kwargs = copy.deepcopy(self._kwargs) - scoring_kwargs.update(kwargs) + scoring_kwargs = {**self._kwargs, **kwargs} # this is for backward compatibility to avoid passing sample_weight # to the scorer if it's None - # probably remove in v1.3 + # TODO(1.3) Probably remove if scoring_kwargs.get("sample_weight", -1) is None: del scoring_kwargs["sample_weight"] @@ -473,11 +471,10 @@ def _score(self, method_caller, clf, X, y, **kwargs): elif isinstance(y_pred, list): y_pred = np.vstack([p[:, -1] for p in y_pred]).T - scoring_kwargs = copy.deepcopy(self._kwargs) - scoring_kwargs.update(kwargs) + scoring_kwargs = {**self._kwargs, **kwargs} # this is for backward compatibility to avoid passing sample_weight # to the scorer if it's None - # probably remove in v1.3 + # TODO(1.3) Probably remove if scoring_kwargs.get("sample_weight", -1) is None: del scoring_kwargs["sample_weight"] return self._sign * self._score_func(y, y_pred, **scoring_kwargs) From 7421fe4bc64c16656153f4620ed7aadaf93c601a Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 25 Mar 2022 16:51:48 +0100 Subject: [PATCH 18/37] for Thomas --- sklearn/tests/test_metaestimators.py | 101 ++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index ba242cacf319a..4f3c255ffc9f7 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -22,6 +22,9 @@ from sklearn.semi_supervised import SelfTrainingClassifier from sklearn.linear_model import Ridge, LogisticRegression from sklearn.preprocessing import StandardScaler, MaxAbsScaler +from sklearn.tests.test_metadata_routing import assert_request_is_empty +from sklearn.tests.test_metadata_routing import check_recorded_metadata +from sklearn.tests.test_metadata_routing import record_metadata class DelegatorData: @@ -301,7 +304,81 @@ def test_meta_estimators_delegate_data_validation(estimator): assert not hasattr(estimator, "n_features_in_") -def _generate_meta_estimator_instances(): +class RegressorSubEstimator(RegressorMixin, BaseEstimator): + """A regressor consuming metadata.""" + + def __init__(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + def fit(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return self + + def predict(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + +class ClassifierSubEstimator(ClassifierMixin, BaseEstimator): + """A classifier consuming metadata.""" + + def __init__(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + def fit(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + self.classes_ = [] + return self + + def predict(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + def predict_proba(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + def predict_log_proba(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + +class TransformerSubEstimator(TransformerMixin, BaseEstimator): + """A classifier consuming metadata.""" + + def __init__(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + def fit(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + self.classes_ = [] + return self + + def transform(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + def inverse_transform(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + +class MetaEstimatorData: + """Stores information on how to construct and use a meta-estimator in tests.""" + + def __init__( + self, *, name, construct, routing_methods=(), fit_args=make_classification() + ): + self.name = name + self.construct = construct + self.fit_args = fit_args + self.routing_methods = routing_methods + + +def _generate_meta_estimator_data_for_metadata_routing(): for name, Estimator in all_estimators(): is_meta_estimator = ( name.endswith("CV") @@ -322,7 +399,9 @@ def _generate_meta_estimator_instances(): sig = set(signature(Estimator).parameters) - if "estimator" in sig or "base_estimator" in sig or "regressor" in sig: + if set("estimator", "base_estimator", "regressor", "classifier").intersection( + sig + ): if is_regressor(Estimator): estimator = make_pipeline(TfidfVectorizer(), Ridge()) param_grid = {"ridge__alpha": [0.1, 1.0]} @@ -367,3 +446,21 @@ def _generate_meta_estimator_instances(): else: continue + + +def check_meta_estimator_metadata_routing(MetaEstimator, Estimator, methods): + # Check that by default request is empty + est = Estimator() + meta_est = MetaEstimator(est) + assert_request_is_empty(meta_est.get_metadata_routing()) + + # Check routing of metadata + if isinstance(methods, str): + methods = [methods] + for method in methods: + est = getattr(Estimator(), f"set_{method}_request")( + sample_weight=True, metadata="alias" + ) + meta_est = MetaEstimator(est) + with pytest.raises(TypeError, match="here"): + meta_est.fit(X, y, sample_weight=[1], metadata="test") From 380b20afd3360fa2c7fd88131c97fb2e059db246 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 28 Mar 2022 13:41:00 +0200 Subject: [PATCH 19/37] ... --- sklearn/multioutput.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 24e4cc8dda7e8..6965e7626e427 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -27,6 +27,7 @@ from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params from .utils.multiclass import check_classification_targets from .utils.fixes import delayed +from .utils.metadata_routing import MetadataRouter, MethodMapping, process_routing __all__ = [ "MultiOutputRegressor", @@ -150,7 +151,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): return self - def fit(self, X, y, sample_weight=None, **fit_params): + def fit(self, X, y, **fit_params): """Fit the model to data, separately for each output variable. Parameters @@ -162,11 +163,6 @@ def fit(self, X, y, sample_weight=None, **fit_params): Multi-output targets. An indicator matrix turns on multilabel estimation. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. If `None`, then samples are equally weighted. - Only supported if the underlying regressor supports sample - weights. - **fit_params : dict of string -> object Parameters passed to the ``estimator.fit`` method of each step. @@ -192,13 +188,6 @@ def fit(self, X, y, sample_weight=None, **fit_params): "multi-output regression but has only one." ) - if sample_weight is not None and not has_fit_parameter( - self.estimator, "sample_weight" - ): - raise ValueError("Underlying estimator does not support sample weights.") - - fit_params_validated = _check_fit_params(X, fit_params) - self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_fit_estimator)( self.estimator, X, y[:, i], sample_weight, **fit_params_validated @@ -240,6 +229,19 @@ def predict(self, X): def _more_tags(self): return {"multioutput_only": True} + def get_metadata_routing(self): + router = ( + MetadataRouter(owner=self.__class__.__name__) + .add( + estimator=self.estimator, + method_mapping=MethodMapping() + .add(callee="partial_fit", caller="fit") + .add(callee="fit", caller="fit"), + ) + .warn_on("estimator", "fit", "partial_fit") + ) + return router + class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator): """Multi target regression. From 1c64ec01124e14c8ea45625c750f6d068bb18c13 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 28 Mar 2022 17:24:53 +0200 Subject: [PATCH 20/37] ... --- conftest.py | 3 + sklearn/conftest.py | 4 - sklearn/exceptions.py | 9 + sklearn/multioutput.py | 40 ++--- sklearn/tests/test_metadata_routing.py | 54 ++++++ sklearn/tests/test_metaestimators.py | 162 ------------------ .../test_metaestimators_metadata_routing.py | 19 ++ sklearn/utils/_metadata_requests.py | 119 ++++++++++++- 8 files changed, 220 insertions(+), 190 deletions(-) create mode 100644 sklearn/tests/test_metaestimators_metadata_routing.py diff --git a/conftest.py b/conftest.py index e4e478d2d72d7..74006980443bc 100644 --- a/conftest.py +++ b/conftest.py @@ -4,3 +4,6 @@ # details. For example, this allows to build extensions in place and run pytest # doc/modules/clustering.rst and use sklearn from the local folder rather than # the one from site-packages. + +# This plugin is necessary to define the random seed fixture +pytest_plugins = ("sklearn.tests.random_seed",) diff --git a/sklearn/conftest.py b/sklearn/conftest.py index b6bde190ad3fb..766803cf2c56e 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -21,10 +21,6 @@ from sklearn.datasets import fetch_rcv1 -# This plugin is necessary to define the random seed fixture -pytest_plugins = ("sklearn.tests.random_seed",) - - if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): raise ImportError( "Your version of pytest is too old, you should have " diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index d84c1f6b40526..42b10b9c529d2 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -13,9 +13,18 @@ "SkipTestWarning", "UndefinedMetricWarning", "PositiveSpectrumWarning", + "UnsetMetadataPassedError", ] +class UnsetMetadataPassedError(ValueError): + """Exception class to raise if a metadata is passed which is not explicitly \ + requested. + + .. versionadded:: 1.1 + """ + + class NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 6965e7626e427..343dbf0353fe7 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -47,21 +47,16 @@ def _fit_estimator(estimator, X, y, sample_weight=None, **fit_params): def _partial_fit_estimator( - estimator, X, y, classes=None, sample_weight=None, first_time=True + estimator, X, y, classes=None, partial_fit_params=None, first_time=True ): + partial_fit_params = {} if partial_fit_params is None else partial_fit_params if first_time: estimator = clone(estimator) - if sample_weight is not None: - if classes is not None: - estimator.partial_fit(X, y, classes=classes, sample_weight=sample_weight) - else: - estimator.partial_fit(X, y, sample_weight=sample_weight) + if classes is not None: + estimator.partial_fit(X, y, classes=classes, **partial_fit_params) else: - if classes is not None: - estimator.partial_fit(X, y, classes=classes) - else: - estimator.partial_fit(X, y) + estimator.partial_fit(X, y, **partial_fit_params) return estimator @@ -106,10 +101,10 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): and can be omitted in the subsequent calls. Note that `y` doesn't need to contain all labels in `classes`. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. If `None`, then samples are equally weighted. - Only supported if the underlying regressor supports sample - weights. + **partial_fit_params : dict of string -> object + Parameters passed to the ``estimator.partial_fit`` method of each step. + + .. versionadded:: 1.1 Returns ------- @@ -125,10 +120,11 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): "multi-output regression but has only one." ) - if sample_weight is not None and not has_fit_parameter( - self.estimator, "sample_weight" - ): - raise ValueError("Underlying estimator does not support sample weights.") + routed_params = process_routing( + obj=self, + method="fit", + other_params=partial_fit_params, + ) first_time = not hasattr(self, "estimators_") @@ -138,8 +134,8 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): X, y[:, i], classes[i] if classes is not None else None, - sample_weight, - first_time, + partial_fit_params=routed_params.estimator.partial_fit, + first_time=first_time, ) for i in range(y.shape[1]) ) @@ -188,9 +184,11 @@ def fit(self, X, y, **fit_params): "multi-output regression but has only one." ) + routed_params = process_routing(obj=self, method="fit", other_params=fit_params) + self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_fit_estimator)( - self.estimator, X, y[:, i], sample_weight, **fit_params_validated + self.estimator, X, y[:, i], **routed_params.estimator.fit ) for i in range(y.shape[1]) ) diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 32d2c4e21551a..85af67bd9a900 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -844,3 +844,57 @@ def test_metadata_routing_get_param_names(): assert router._get_param_names( method="fit", return_alias=True, ignore_self=True ) == router._get_param_names(method="fit", return_alias=False, ignore_self=True) + + +def test_router_deprecation_warning(): + class MetaEstimator(BaseEstimator, MetaEstimatorMixin): + def __init__(self, estimator): + self.estimator = estimator + + def fit(self, X, y, **fit_params): + routed_params = process_routing(self, "fit", fit_params) + self.estimator_ = clone(self.estimator).fit( + X, y, **routed_params.estimator.fit + ) + + def predict(self, X, **predict_params): + routed_params = process_routing(self, "predict", predict_params) + return self.estimator_.predict(X, **routed_params.estimator.predict) + + def get_metadata_routing(self): + return ( + MetadataRouter(owner=self.__class__.__name__) + .add(estimator=self.estimator, method_mapping="one-to-one") + .warn_on(child="estimator", methods=["fit"], raise_on="1.3") + ) + + class Estimator(BaseEstimator): + def fit(self, X, y, sample_weight=None): + return self + + def predict(self, X, sample_weight=None): + return np.ones(shape=len(X)) + + est = MetaEstimator(estimator=Estimator()) + # the meta-estimator has est to have a warning on `fit`. + with pytest.warns( + FutureWarning, match="From version 1.3 this results in the following error" + ): + est.fit(X, y, sample_weight=my_weights) + + # but predict should raise since there is no warn_on set for it. + with pytest.raises( + ValueError, match="sample_weight is passed but is not explicitly set" + ): + est.predict(X, sample_weight=my_weights) + + # also check if passing a meta-estimator as a child object warns + est = MetaEstimator( + estimator=WeightedMetaRegressor( + estimator=RegressorMetadata().set_fit_request(sample_weight=True) + ) + ) + with pytest.warns( + FutureWarning, match="From version 1.3 this results in the following error" + ): + est.fit(X, y, sample_weight=my_weights) diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index 4f3c255ffc9f7..8faf4f4b7f154 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -302,165 +302,3 @@ def test_meta_estimators_delegate_data_validation(estimator): # n_features_in_ should not be defined since data is not tabular data. assert not hasattr(estimator, "n_features_in_") - - -class RegressorSubEstimator(RegressorMixin, BaseEstimator): - """A regressor consuming metadata.""" - - def __init__(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - - def fit(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return self - - def predict(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return np.zeros(shape=(len(X))) - - -class ClassifierSubEstimator(ClassifierMixin, BaseEstimator): - """A classifier consuming metadata.""" - - def __init__(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - - def fit(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - self.classes_ = [] - return self - - def predict(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return np.zeros(shape=(len(X))) - - def predict_proba(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return np.zeros(shape=(len(X))) - - def predict_log_proba(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return np.zeros(shape=(len(X))) - - -class TransformerSubEstimator(TransformerMixin, BaseEstimator): - """A classifier consuming metadata.""" - - def __init__(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - - def fit(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - self.classes_ = [] - return self - - def transform(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return np.zeros(shape=(len(X))) - - def inverse_transform(self, X, y, sample_weight=None, metadata=None): - record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) - return np.zeros(shape=(len(X))) - - -class MetaEstimatorData: - """Stores information on how to construct and use a meta-estimator in tests.""" - - def __init__( - self, *, name, construct, routing_methods=(), fit_args=make_classification() - ): - self.name = name - self.construct = construct - self.fit_args = fit_args - self.routing_methods = routing_methods - - -def _generate_meta_estimator_data_for_metadata_routing(): - for name, Estimator in all_estimators(): - is_meta_estimator = ( - name.endswith("CV") - or getattr(Estimator, "_required_parameters", None) - or set(Estimator().get_params().keys()).intersection( - [ - "steps", - "estimator", - "estimators", - "base_estimator", - "regressor", - "classifier", - ] - ) - ) - if not is_meta_estimator: - continue - - sig = set(signature(Estimator).parameters) - - if set("estimator", "base_estimator", "regressor", "classifier").intersection( - sig - ): - if is_regressor(Estimator): - estimator = make_pipeline(TfidfVectorizer(), Ridge()) - param_grid = {"ridge__alpha": [0.1, 1.0]} - else: - estimator = make_pipeline(TfidfVectorizer(), LogisticRegression()) - param_grid = {"logisticregression__C": [0.1, 1.0]} - - if "param_grid" in sig or "param_distributions" in sig: - # SearchCV estimators - extra_params = {"n_iter": 2} if "n_iter" in sig else {} - yield Estimator(estimator, param_grid, **extra_params) - else: - yield Estimator(estimator) - - elif "transformer_list" in sig: - # FeatureUnion - transformer_list = [ - ("trans1", make_pipeline(TfidfVectorizer(), MaxAbsScaler())), - ( - "trans2", - make_pipeline(TfidfVectorizer(), StandardScaler(with_mean=False)), - ), - ] - yield Estimator(transformer_list) - - elif "estimators" in sig: - # stacking, voting - if is_regressor(Estimator): - estimator = [ - ("est1", make_pipeline(TfidfVectorizer(), Ridge(alpha=0.1))), - ("est2", make_pipeline(TfidfVectorizer(), Ridge(alpha=1))), - ] - else: - estimator = [ - ( - "est1", - make_pipeline(TfidfVectorizer(), LogisticRegression(C=0.1)), - ), - ("est2", make_pipeline(TfidfVectorizer(), LogisticRegression(C=1))), - ] - yield Estimator(estimator) - - else: - continue - - -def check_meta_estimator_metadata_routing(MetaEstimator, Estimator, methods): - # Check that by default request is empty - est = Estimator() - meta_est = MetaEstimator(est) - assert_request_is_empty(meta_est.get_metadata_routing()) - - # Check routing of metadata - if isinstance(methods, str): - methods = [methods] - for method in methods: - est = getattr(Estimator(), f"set_{method}_request")( - sample_weight=True, metadata="alias" - ) - meta_est = MetaEstimator(est) - with pytest.raises(TypeError, match="here"): - meta_est.fit(X, y, sample_weight=[1], metadata="test") diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py new file mode 100644 index 0000000000000..89d9727cdeeb9 --- /dev/null +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -0,0 +1,19 @@ +import numpy as np +from sklearn.base import RegressorMixin, BaseEstimator +from sklearn.tests.test_metadata_routing import record_metadata, check_recorded_metadata + + +class RegressorSubEstimator(RegressorMixin, BaseEstimator): + """A regressor consuming metadata.""" + + def __init__(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + def fit(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return self + + def predict(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 9f6527f6809bf..b5d8a62df9f26 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -12,6 +12,7 @@ from collections import namedtuple from typing import Union, Optional from ._bunch import Bunch +from ..exceptions import UnsetMetadataPassedError # This namedtuple is used to store a (mapping, routing) pair. Mapping is a # MethodMapping object, and routing is the output of `get_metadata_routing`. @@ -177,6 +178,17 @@ def requests(self): """Dictionary of the form: ``{key: alias}``.""" return self._requests + def _assume_requested(self): + """Convert all ERROR_IF_PASSED to REQUESTED.""" + res = MethodMetadataRequest(owner=self.owner, method=self.method) + res._requests = { + param: ( + alias if alias != RequestType.ERROR_IF_PASSED else RequestType.REQUESTED + ) + for param, alias in self._requests.items() + } + return res + def add_request( self, *, @@ -299,7 +311,7 @@ def _route_params(self, params=None): elif alias == RequestType.REQUESTED and prop in args: res[prop] = args[prop] elif alias == RequestType.ERROR_IF_PASSED and prop in args: - raise ValueError( + raise UnsetMetadataPassedError( f"{prop} is passed but is not explicitly set as " f"requested or not for {self.owner}.{self.method}" ) @@ -350,9 +362,18 @@ class MetadataRequest: _type = "metadata_request" def __init__(self, owner): + # this is used to check if the user has set any request values + self._is_default_request = False for method in METHODS: setattr(self, method, MethodMetadataRequest(owner=owner, method=method)) + def _assume_requested(self): + """Convert all ERROR_IF_PASSED to REQUESTED.""" + res = MetadataRequest(owner=None) + for method in METHODS: + setattr(self, method, getattr(self, method)._assume_requested()) + return res + def _get_param_names(self, method, return_alias, ignore_self=None): """Get names of all metadata that can be consumed or routed by specified \ method. @@ -566,6 +587,9 @@ def __init__(self, owner): # `add_self()`) is treated differently from the other objects which are # stored in _route_mappings. self._self = None + # this attribute is used to decide if there should be an error raised + # or a FutureWarning if a metadata is passed which is not requested. + self._warn_on = dict() self.owner = owner def add_self(self, obj): @@ -763,11 +787,61 @@ def route_params(self, *, caller, params): res[name] = Bunch() for _callee, _caller in mapping: if _caller == caller: - res[name][_callee] = router._route_params( - params=params, method=_callee + res[name][_callee] = self._route_warn_or_error( + child=name, router=router, params=params, method=_callee ) return res + def _route_warn_or_error(self, *, child, router, params, method): + """Route parameters wile handling error or deprecation warning choice. + + This method warns instead of raising an error if the parent object + has set ``warn_on`` for the child object's method and the user has not + set any metadata request for that child object. This is used during the + deprecation cycle for backward compatibility. + + Parameters + ---------- + child : str + The name of the child object. + + router : MetadataRouter or MetadataRequest + The router for the child object. + + params : dict + The parameters to be routed. + + method : str + The name of the callee method. + + Returns + ------- + dict + The routed parameters. + """ + try: + res = router._route_params(params=params, method=method) + except UnsetMetadataPassedError as e: + warn_on = self._warn_on.get(child, {"methods": [], "raise_on": "1.3"}) + if method not in warn_on["methods"]: + # there is no warn_on set for this method of this child object, + # we raise as usual + raise + if not getattr(router, "_is_default_request", None): + # the user has set at least one request value for this child + # object, but not for all of them. Therefore we raise as usual. + raise + # otherwise raise a FutureWarning + router = router._assume_requested() + res = router._route_params(params=params, method=method) + warn( + "You are passing metadata for which the request values are not" + f" explicitly set. From version {warn_on['raise_on']} this results in" + f" the following error: {str(e)}", + FutureWarning, + ) + return res + def validate_metadata(self, *, method, params): """Validate given metadata for a method. @@ -798,6 +872,42 @@ def validate_metadata(self, *, method, params): "not requested metadata in any object." ) + def warn_on(self, child, methods, raise_on="1.3"): + """Set where deprecation warnings on no set requests should occur. + + This method is used in meta-estimators during the transition period for + backward compatibility. Expected behavior for meta-estimators on a code + such as ``RFE(Ridge()).fit(X, y, sample_weight=sample_weight)`` is to + raise a ``ValueError`` complaining about the fact that ``Ridge()`` has + not explicitly set the request values for ``sample_weight``. However, + this breaks backward compatibility for existing meta-estimators. + + Calling this method on a ``MetadataRouter`` object such as + ``warn_on(child='estimator', methods=['fit', 'score'])`` tells the + router to raise a ``FutureWarning`` instead of a ``ValueError`` if the + child object has no set requests. + + Parameters + ---------- + child : str + The name of the child object. + + methods : list of str + List of methods for which there should be a ``FutureWarning`` + instead of a ``ValueError``. + + raise_on : str, default="1.3" + The version after which there should be an error. Used in the + warning message to inform users. + + Returns + ------- + self : MetadataRouter + Returns `self`. + """ + self._warn_on[child] = {"methods": methods, "raise_on": raise_on} + return self + def _serialize(self): """Serialize the object. @@ -1055,6 +1165,9 @@ class attributes, as well as determining request keys from method signatures. """ requests = MetadataRequest(owner=cls.__name__) + # this indicates that the user has not set any request values for this + # object + requests._is_default_request = True for method in METHODS: setattr(requests, method, cls._build_request_for_signature(method=method)) From f15ef555b4bae2e9a4d0493833e6d71417fcdcc8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 29 Mar 2022 12:06:39 +0200 Subject: [PATCH 21/37] all --- conftest.py | 3 - sklearn/conftest.py | 4 + sklearn/multioutput.py | 31 ++--- sklearn/tests/test_metadata_routing.py | 52 ++++++-- .../test_metaestimators_metadata_routing.py | 111 +++++++++++++++++- sklearn/utils/_metadata_requests.py | 76 ++++++++++-- 6 files changed, 233 insertions(+), 44 deletions(-) diff --git a/conftest.py b/conftest.py index 74006980443bc..e4e478d2d72d7 100644 --- a/conftest.py +++ b/conftest.py @@ -4,6 +4,3 @@ # details. For example, this allows to build extensions in place and run pytest # doc/modules/clustering.rst and use sklearn from the local folder rather than # the one from site-packages. - -# This plugin is necessary to define the random seed fixture -pytest_plugins = ("sklearn.tests.random_seed",) diff --git a/sklearn/conftest.py b/sklearn/conftest.py index 766803cf2c56e..b6bde190ad3fb 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -21,6 +21,10 @@ from sklearn.datasets import fetch_rcv1 +# This plugin is necessary to define the random seed fixture +pytest_plugins = ("sklearn.tests.random_seed",) + + if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): raise ImportError( "Your version of pytest is too old, you should have " diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 343dbf0353fe7..9af6775b59767 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -24,7 +24,7 @@ from .model_selection import cross_val_predict from .utils.metaestimators import available_if from .utils import check_random_state -from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params +from .utils.validation import check_is_fitted from .utils.multiclass import check_classification_targets from .utils.fixes import delayed from .utils.metadata_routing import MetadataRouter, MethodMapping, process_routing @@ -81,7 +81,7 @@ def __init__(self, estimator, *, n_jobs=None): self.n_jobs = n_jobs @_available_if_estimator_has("partial_fit") - def partial_fit(self, X, y, classes=None, sample_weight=None): + def partial_fit(self, X, y, classes=None, **partial_fit_params): """Incrementally fit a separate model for each class output. Parameters @@ -122,7 +122,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): routed_params = process_routing( obj=self, - method="fit", + method="partial_fit", other_params=partial_fit_params, ) @@ -233,10 +233,10 @@ def get_metadata_routing(self): .add( estimator=self.estimator, method_mapping=MethodMapping() - .add(callee="partial_fit", caller="fit") + .add(callee="partial_fit", caller="partial_fit") .add(callee="fit", caller="fit"), ) - .warn_on("estimator", "fit", "partial_fit") + .warn_on(child="estimator", methods=["fit", "partial_fit"]) ) return router @@ -311,7 +311,7 @@ def __init__(self, estimator, *, n_jobs=None): super().__init__(estimator, n_jobs=n_jobs) @_available_if_estimator_has("partial_fit") - def partial_fit(self, X, y, sample_weight=None): + def partial_fit(self, X, y, **partial_fit_params): """Incrementally fit the model to data, for each output variable. Parameters @@ -322,17 +322,17 @@ def partial_fit(self, X, y, sample_weight=None): y : {array-like, sparse matrix} of shape (n_samples, n_outputs) Multi-output targets. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. If `None`, then samples are equally weighted. - Only supported if the underlying regressor supports sample - weights. + **partial_fit_params : dict of string -> object + Parameters passed to the ``estimator.partial_fit`` method of each step. + + .. versionadded:: 1.1 Returns ------- self : object Returns a fitted instance. """ - super().partial_fit(X, y, sample_weight=sample_weight) + super().partial_fit(X, y, **partial_fit_params) class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator): @@ -406,7 +406,7 @@ class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator): def __init__(self, estimator, *, n_jobs=None): super().__init__(estimator, n_jobs=n_jobs) - def fit(self, X, Y, sample_weight=None, **fit_params): + def fit(self, X, Y, **fit_params): """Fit the model to data matrix X and targets Y. Parameters @@ -417,11 +417,6 @@ def fit(self, X, Y, sample_weight=None, **fit_params): Y : array-like of shape (n_samples, n_classes) The target values. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. If `None`, then samples are equally weighted. - Only supported if the underlying classifier supports sample - weights. - **fit_params : dict of string -> object Parameters passed to the ``estimator.fit`` method of each step. @@ -432,7 +427,7 @@ def fit(self, X, Y, sample_weight=None, **fit_params): self : object Returns a fitted instance. """ - super().fit(X, Y, sample_weight, **fit_params) + super().fit(X, Y, **fit_params) self.classes_ = [estimator.classes_ for estimator in self.estimators_] return self diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 85af67bd9a900..f3340b1d8c5ab 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -587,7 +587,9 @@ def fit(self, X, y, prop=None, **kwargs): def test_method_metadata_request(): - mmr = MethodMetadataRequest(owner="test", method="fit") + mmr = MethodMetadataRequest( + router=MetadataRequest(owner="test"), owner="test", method="fit" + ) with pytest.raises( ValueError, match="alias should be either a valid identifier or" @@ -654,9 +656,9 @@ class RegressorMetadataWarn(RegressorMetadata): "obj, string", [ ( - MethodMetadataRequest(owner="test", method="fit").add_request( - param="foo", alias="bar" - ), + MethodMetadataRequest( + router=MetadataRequest(owner="test"), owner="test", method="fit" + ).add_request(param="foo", alias="bar"), "{'foo': 'bar'}", ), ( @@ -888,13 +890,47 @@ def predict(self, X, sample_weight=None): ): est.predict(X, sample_weight=my_weights) - # also check if passing a meta-estimator as a child object warns + # In this case both a warning and an error are raised. The warning comes + # from the MetaEstimator, and the error from WeightedMetaRegressor since it + # doesn't have any warn_on set but sample_weight is passed. + est = MetaEstimator(estimator=WeightedMetaRegressor(estimator=RegressorMetadata())) + with pytest.raises( + ValueError, match="sample_weight is passed but is not explicitly set" + ): + with pytest.warns( + FutureWarning, match="From version 1.3 this results in the following error" + ): + est.fit(X, y, sample_weight=my_weights) + + class WarningWeightedMetaRegressor(WeightedMetaRegressor): + """A WeightedMetaRegressor which warns instead.""" + + def get_metadata_routing(self): + router = ( + MetadataRouter(owner=self.__class__.__name__) + .add_self(self) + .add(estimator=self.estimator, method_mapping="one-to-one") + .warn_on(child="estimator", methods=["fit", "score"], raise_on="1.3") + ) + return router + + # Now there's only a warning since both meta-estimators warn. est = MetaEstimator( - estimator=WeightedMetaRegressor( - estimator=RegressorMetadata().set_fit_request(sample_weight=True) - ) + estimator=WarningWeightedMetaRegressor(estimator=RegressorMetadata()) ) with pytest.warns( FutureWarning, match="From version 1.3 this results in the following error" ): est.fit(X, y, sample_weight=my_weights) + + # but if the inner estimator has a non-default request, we fall ack to + # raising an error + est = MetaEstimator( + estimator=WarningWeightedMetaRegressor( + estimator=RegressorMetadata().set_fit_request(sample_weight=True) + ) + ) + with pytest.raises( + ValueError, match="sample_weight is passed but is not explicitly set" + ): + est.fit(X, y, sample_weight=my_weights) diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 89d9727cdeeb9..2913af6758681 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -1,6 +1,20 @@ import numpy as np -from sklearn.base import RegressorMixin, BaseEstimator -from sklearn.tests.test_metadata_routing import record_metadata, check_recorded_metadata +import pytest +from sklearn.base import RegressorMixin, ClassifierMixin, BaseEstimator +from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier +from sklearn.utils.metadata_routing import MetadataRouter +from sklearn.tests.test_metadata_routing import ( + record_metadata, + check_recorded_metadata, + assert_request_is_empty, +) + +N, M = 100, 4 +X = np.random.rand(N, M) +y = np.random.randint(0, 2, size=N) +y_multi = np.random.randint(0, 2, size=(N, 3)) +metadata = np.random.randint(0, 10, size=N) +sample_weight = np.random.rand(N) class RegressorSubEstimator(RegressorMixin, BaseEstimator): @@ -10,10 +24,103 @@ def __init__(self, **kwargs): for param, value in kwargs.items(): setattr(self, param, value) + def partial_fit(self, X, y, sample_weight=None, metadata=None): + record_metadata( + self, "partial_fit", sample_weight=sample_weight, metadata=metadata + ) + return self + def fit(self, X, y, sample_weight=None, metadata=None): record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) return self def predict(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "predict", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + +class ClassifierSubEstimator(ClassifierMixin, BaseEstimator): + """A regressor consuming metadata.""" + + def __init__(self, **kwargs): + for param, value in kwargs.items(): + setattr(self, param, value) + + def partial_fit(self, X, y, sample_weight=None, metadata=None): + record_metadata( + self, "partial_fit", sample_weight=sample_weight, metadata=metadata + ) + self.classes_ = [1] + return self + + def fit(self, X, y, sample_weight=None, metadata=None): record_metadata(self, "fit", sample_weight=sample_weight, metadata=metadata) + self.classes_ = [1] + return self + + def predict(self, X, y, sample_weight=None, metadata=None): + record_metadata(self, "predict", sample_weight=sample_weight, metadata=metadata) + return np.zeros(shape=(len(X))) + + def predict_proba(self, X, y, sample_weight=None, metadata=None): + record_metadata( + self, "predict_proba", sample_weight=sample_weight, metadata=metadata + ) + return np.zeros(shape=(len(X))) + + def predict_log_proba(self, X, y, sample_weight=None, metadata=None): + record_metadata( + self, "predict_log_proba", sample_weight=sample_weight, metadata=metadata + ) return np.zeros(shape=(len(X))) + + +def get_empty_metaestimators(): + yield MultiOutputRegressor(estimator=RegressorSubEstimator()) + yield MultiOutputClassifier(estimator=ClassifierSubEstimator()) + + +@pytest.mark.parametrize( + "metaestimator", + get_empty_metaestimators(), + ids=[str(x) for x in get_empty_metaestimators()], +) +def test_default_request(metaestimator): + # Check that by default request is empty and the right type + assert_request_is_empty(metaestimator.get_metadata_routing()) + assert isinstance(metaestimator.get_metadata_routing(), MetadataRouter) + + +@pytest.mark.parametrize( + "MultiOutput, Estimator", + [ + (MultiOutputClassifier, ClassifierSubEstimator), + (MultiOutputRegressor, RegressorSubEstimator), + ], + ids=["Classifier", "Regressor"], +) +def test_multioutput_metadata_routing(MultiOutput, Estimator): + # Check routing of metadata + metaest = MultiOutput(Estimator()) + with pytest.warns( + FutureWarning, + match=( + "You are passing metadata for which the request values are not explicitly" + " set. From version 1.3 this results in the following error" + ), + ): + metaest.fit(X, y_multi, sample_weight=sample_weight, metadata=metadata) + + metaest = MultiOutput( + Estimator() + .set_fit_request(sample_weight=True, metadata="alias") + .set_partial_fit_request(sample_weight=True, metadata=True) + ).fit(X, y_multi, alias=metadata) + check_recorded_metadata( + metaest.estimators_[0], "fit", sample_weight=None, metadata=metadata + ) + + metaest.partial_fit(X, y_multi, metadata=metadata) + check_recorded_metadata( + metaest.estimators_[0], "partial_fit", sample_weight=None, metadata=metadata + ) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index b5d8a62df9f26..b276a54693674 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -168,8 +168,9 @@ class MethodMetadataRequest: The name of the method to which these requests belong. """ - def __init__(self, owner, method): + def __init__(self, router, owner, method): self._requests = dict() + self.router = router self.owner = owner self.method = method @@ -178,9 +179,13 @@ def requests(self): """Dictionary of the form: ``{key: alias}``.""" return self._requests - def _assume_requested(self): - """Convert all ERROR_IF_PASSED to REQUESTED.""" - res = MethodMetadataRequest(owner=self.owner, method=self.method) + def _assume_requested(self, router): + """Convert all ERROR_IF_PASSED to REQUESTED. + + This method returns a new object with the above change and leaves the + original object unchanged. + """ + res = MethodMetadataRequest(router=router, owner=self.owner, method=self.method) res._requests = { param: ( alias if alias != RequestType.ERROR_IF_PASSED else RequestType.REQUESTED @@ -222,6 +227,9 @@ def add_request( "{None, True, False}, or a RequestType." ) + if alias != self._requests.get(param, None): + self.router._is_default_request = False + if alias == param: alias = RequestType.REQUESTED @@ -365,13 +373,21 @@ def __init__(self, owner): # this is used to check if the user has set any request values self._is_default_request = False for method in METHODS: - setattr(self, method, MethodMetadataRequest(owner=owner, method=method)) + setattr( + self, + method, + MethodMetadataRequest(router=self, owner=owner, method=method), + ) def _assume_requested(self): - """Convert all ERROR_IF_PASSED to REQUESTED.""" + """Convert all ERROR_IF_PASSED to REQUESTED. + + This method returns a new object with the above change and leaves the + original object unchanged. + """ res = MetadataRequest(owner=None) for method in METHODS: - setattr(self, method, getattr(self, method)._assume_requested()) + setattr(self, method, getattr(self, method)._assume_requested(router=res)) return res def _get_param_names(self, method, return_alias, ignore_self=None): @@ -592,6 +608,31 @@ def __init__(self, owner): self._warn_on = dict() self.owner = owner + @property + def _is_default_request(self): + """Return ``True`` only if all sub-components have default values.""" + if self._self: + if not self._self._is_default_request: + return False + + for router_mapping in self._route_mappings.values(): + if not router_mapping.router._is_default_request: + return False + + return True + + def _assume_requested(self): + """Convert all ERROR_IF_PASSED to REQUESTED. + + This method returns a new object with the above change and leaves the + original object unchanged. + """ + res = MetadataRouter(owner=self.owner) + if self._self: + res._self = self._self._assume_requested() + res._route_mappings = deepcopy(self._route_mappings) + return res + def add_self(self, obj): """Add `self` (as a consumer) to the routing. @@ -1039,6 +1080,7 @@ def func(**kw): for prop, alias in kw.items(): if alias is not UNCHANGED: method_metadata_request.add_request(param=prop, alias=alias) + requests._is_default_request = False instance._metadata_request = requests return instance @@ -1121,7 +1163,7 @@ def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) @classmethod - def _build_request_for_signature(cls, method): + def _build_request_for_signature(cls, router, method): """Build the `MethodMetadataRequest` for a method using its signature. This method takes all arguments from the method signature and uses @@ -1130,6 +1172,8 @@ def _build_request_for_signature(cls, method): Parameters ---------- + router : MetadataRequest + The parent object for the created `MethodMetadataRequest`. method : str The name of the method. @@ -1138,7 +1182,7 @@ def _build_request_for_signature(cls, method): method_request : MethodMetadataRequest The prepared request using the method's signature. """ - mmr = MethodMetadataRequest(owner=cls.__name__, method=method) + mmr = MethodMetadataRequest(router=router, owner=cls.__name__, method=method) # Here we use `isfunction` instead of `ismethod` because calling `getattr` # on a class instead of an instance returns an unbound function. if not hasattr(cls, method) or not inspect.isfunction(getattr(cls, method)): @@ -1165,11 +1209,12 @@ class attributes, as well as determining request keys from method signatures. """ requests = MetadataRequest(owner=cls.__name__) - # this indicates that the user has not set any request values for this - # object - requests._is_default_request = True for method in METHODS: - setattr(requests, method, cls._build_request_for_signature(method=method)) + setattr( + requests, + method, + cls._build_request_for_signature(router=requests, method=method), + ) # Then overwrite those defaults with the ones provided in # __metadata_request__* attributes. Defaults set in @@ -1197,6 +1242,11 @@ class attributes, as well as determining request keys from method method = attr[attr.index(substr) + len(substr) :] for prop, alias in value.items(): getattr(requests, method).add_request(param=prop, alias=alias) + + # this indicates that the user has not set any request values for this + # object + requests._is_default_request = True + return requests def _get_metadata_request(self): From ab9daa849bd3624cb19f51ed44f8dbce5424cf76 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 29 Mar 2022 13:53:15 +0200 Subject: [PATCH 22/37] remove unused imports --- sklearn/tests/test_metaestimators.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index 8faf4f4b7f154..e743741f6fa43 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -22,9 +22,6 @@ from sklearn.semi_supervised import SelfTrainingClassifier from sklearn.linear_model import Ridge, LogisticRegression from sklearn.preprocessing import StandardScaler, MaxAbsScaler -from sklearn.tests.test_metadata_routing import assert_request_is_empty -from sklearn.tests.test_metadata_routing import check_recorded_metadata -from sklearn.tests.test_metadata_routing import record_metadata class DelegatorData: From bc46a2edf3ee63c1327bea8914c8bf08b441e16b Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 29 Mar 2022 18:51:29 +0200 Subject: [PATCH 23/37] can't remove sample_weight from signature since we forgot to make them kwarg --- sklearn/multioutput.py | 17 +++++--- sklearn/tests/test_multioutput.py | 68 +++++++++++------------------ sklearn/utils/_metadata_requests.py | 3 ++ 3 files changed, 38 insertions(+), 50 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 9af6775b59767..77c11e21ce651 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -81,7 +81,7 @@ def __init__(self, estimator, *, n_jobs=None): self.n_jobs = n_jobs @_available_if_estimator_has("partial_fit") - def partial_fit(self, X, y, classes=None, **partial_fit_params): + def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_params): """Incrementally fit a separate model for each class output. Parameters @@ -124,6 +124,7 @@ def partial_fit(self, X, y, classes=None, **partial_fit_params): obj=self, method="partial_fit", other_params=partial_fit_params, + sample_weight=sample_weight, ) first_time = not hasattr(self, "estimators_") @@ -147,7 +148,7 @@ def partial_fit(self, X, y, classes=None, **partial_fit_params): return self - def fit(self, X, y, **fit_params): + def fit(self, X, y, sample_weight=None, **fit_params): """Fit the model to data, separately for each output variable. Parameters @@ -184,7 +185,9 @@ def fit(self, X, y, **fit_params): "multi-output regression but has only one." ) - routed_params = process_routing(obj=self, method="fit", other_params=fit_params) + routed_params = process_routing( + obj=self, method="fit", other_params=fit_params, sample_weight=sample_weight + ) self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_fit_estimator)( @@ -311,7 +314,7 @@ def __init__(self, estimator, *, n_jobs=None): super().__init__(estimator, n_jobs=n_jobs) @_available_if_estimator_has("partial_fit") - def partial_fit(self, X, y, **partial_fit_params): + def partial_fit(self, X, y, sample_weight=None, **partial_fit_params): """Incrementally fit the model to data, for each output variable. Parameters @@ -332,7 +335,7 @@ def partial_fit(self, X, y, **partial_fit_params): self : object Returns a fitted instance. """ - super().partial_fit(X, y, **partial_fit_params) + super().partial_fit(X, y, sample_weight=sample_weight, **partial_fit_params) class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator): @@ -406,7 +409,7 @@ class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator): def __init__(self, estimator, *, n_jobs=None): super().__init__(estimator, n_jobs=n_jobs) - def fit(self, X, Y, **fit_params): + def fit(self, X, Y, sample_weight=None, **fit_params): """Fit the model to data matrix X and targets Y. Parameters @@ -427,7 +430,7 @@ def fit(self, X, Y, **fit_params): self : object Returns a fitted instance. """ - super().fit(X, Y, **fit_params) + super().fit(X, Y, sample_weight=sample_weight, **fit_params) self.classes_ = [estimator.classes_ for estimator in self.estimators_] return self diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index ded47c4fe2b4c..ea4cc706d968d 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -1,4 +1,5 @@ import pytest +import re import numpy as np import scipy.sparse as sp from joblib import cpu_count @@ -27,7 +28,6 @@ from sklearn.base import ClassifierMixin from sklearn.utils import shuffle from sklearn.model_selection import GridSearchCV -from sklearn.dummy import DummyRegressor, DummyClassifier from sklearn.pipeline import make_pipeline from sklearn.impute import SimpleImputer from sklearn.ensemble import StackingRegressor @@ -110,12 +110,14 @@ def test_multi_target_sample_weights_api(): w = [0.8, 0.6] rgr = MultiOutputRegressor(OrthogonalMatchingPursuit()) - msg = "does not support sample weights" - with pytest.raises(ValueError, match=msg): + msg = re.escape("fit got unexpected argument(s) {'sample_weight'}") + with pytest.raises(TypeError, match=msg): rgr.fit(X, y, w) # no exception should be raised if the base estimator supports weights - rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) + rgr = MultiOutputRegressor( + GradientBoostingRegressor(random_state=0).set_fit_request(sample_weight=True) + ) rgr.fit(X, y, w) @@ -124,12 +126,20 @@ def test_multi_target_sample_weight_partial_fit(): X = [[1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [2.718, 3.141]] w = [2.0, 1.0] - rgr_w = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5)) + rgr_w = MultiOutputRegressor( + SGDRegressor(random_state=0, max_iter=5).set_partial_fit_request( + sample_weight=True + ) + ) rgr_w.partial_fit(X, y, w) # weighted with different weights w = [2.0, 2.0] - rgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5)) + rgr = MultiOutputRegressor( + SGDRegressor(random_state=0, max_iter=5).set_partial_fit_request( + sample_weight=True + ) + ) rgr.partial_fit(X, y, w) assert rgr.predict(X)[0][0] != rgr_w.predict(X)[0][0] @@ -140,7 +150,9 @@ def test_multi_target_sample_weights(): Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3.141, 2.718], [2.718, 3.141]] w = [2.0, 1.0] - rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) + rgr_w = MultiOutputRegressor( + GradientBoostingRegressor(random_state=0).set_fit_request(sample_weight=True) + ) rgr_w.fit(Xw, yw, w) # unweighted, but with repeated samples @@ -363,7 +375,9 @@ def test_multi_output_classification_sample_weights(): Xw = [[1, 2, 3], [4, 5, 6]] yw = [[3, 2], [2, 3]] w = np.asarray([2.0, 1.0]) - forest = RandomForestClassifier(n_estimators=10, random_state=1) + forest = RandomForestClassifier(n_estimators=10, random_state=1).set_fit_request( + sample_weight=True + ) clf_w = MultiOutputClassifier(forest) clf_w.fit(Xw, yw, w) @@ -383,7 +397,9 @@ def test_multi_output_classification_partial_fit_sample_weights(): Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]] yw = [[3, 2], [2, 3], [3, 2]] w = np.asarray([2.0, 1.0, 1.0]) - sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20) + sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20).set_fit_request( + sample_weight=True + ) clf_w = MultiOutputClassifier(sgd_linear_clf) clf_w.fit(Xw, yw, w) @@ -603,40 +619,6 @@ def test_multi_output_classes_(estimator): assert_array_equal(estimator_classes, expected_classes) -class DummyRegressorWithFitParams(DummyRegressor): - def fit(self, X, y, sample_weight=None, **fit_params): - self._fit_params = fit_params - return super().fit(X, y, sample_weight) - - -class DummyClassifierWithFitParams(DummyClassifier): - def fit(self, X, y, sample_weight=None, **fit_params): - self._fit_params = fit_params - return super().fit(X, y, sample_weight) - - -@pytest.mark.filterwarnings("ignore:`n_features_in_` is deprecated") -@pytest.mark.parametrize( - "estimator, dataset", - [ - ( - MultiOutputClassifier(DummyClassifierWithFitParams(strategy="prior")), - datasets.make_multilabel_classification(), - ), - ( - MultiOutputRegressor(DummyRegressorWithFitParams()), - datasets.make_regression(n_targets=3, random_state=0), - ), - ], -) -def test_multioutput_estimator_with_fit_params(estimator, dataset): - X, y = dataset - some_param = np.zeros_like(X) - estimator.fit(X, y, some_param=some_param) - for dummy_estimator in estimator.estimators_: - assert "some_param" in dummy_estimator._fit_params - - def test_regressor_chain_w_fit_params(): # Make sure fit_params are properly propagated to the sub-estimators rng = np.random.RandomState(0) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index b276a54693674..214cb0e5e1a27 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1337,6 +1337,9 @@ def process_routing(obj, method, other_params, **kwargs): # fit_params["sample_weight"] = sample_weight all_params = other_params if other_params is not None else dict() all_params.update(kwargs) + all_params = { + param: value for param, value in all_params.items() if value is not None + } request_routing = get_routing_for_object(obj) request_routing.validate_metadata(params=all_params, method=method) From fba9e179b5ccd23a7a4be1e58f776c314110640f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 1 Apr 2022 14:27:33 +0200 Subject: [PATCH 24/37] common test sub-estimators request weight by default --- sklearn/utils/estimator_checks.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index ada83d041b544..22deb956971f6 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -374,6 +374,13 @@ def _get_check_estimator_ids(obj): return re.sub(r"\s", "", str(obj)) +def _weighted(estimator): + """Request sample_weight for fit and score.""" + return estimator.set_fit_request(sample_weight=True).set_score_request( + sample_weight=True + ) + + def _construct_instance(Estimator): """Construct Estimator instance if possible.""" required_parameters = getattr(Estimator, "_required_parameters", []) @@ -384,16 +391,19 @@ def _construct_instance(Estimator): # For common test, we can enforce using `LinearRegression` that # is the default estimator in `RANSACRegressor` instead of `Ridge`. if issubclass(Estimator, RANSACRegressor): - estimator = Estimator(LinearRegression()) + estimator = Estimator(_weighted(LinearRegression())) elif issubclass(Estimator, RegressorMixin): - estimator = Estimator(Ridge()) + estimator = Estimator(_weighted(Ridge())) else: - estimator = Estimator(LogisticRegression(C=1)) + estimator = Estimator(_weighted(LogisticRegression(C=1))) elif required_parameters in (["estimators"],): # Heterogeneous ensemble classes (i.e. stacking, voting) if issubclass(Estimator, RegressorMixin): estimator = Estimator( - estimators=[("est1", Ridge(alpha=0.1)), ("est2", Ridge(alpha=1))] + estimators=[ + ("est1", _weighted(Ridge(alpha=0.1))), + ("est2", _weighted(Ridge(alpha=1))), + ] ) else: estimator = Estimator( From 54b093b52868b8a985e6c57318e6b238528ef82c Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 5 Apr 2022 17:07:04 +0200 Subject: [PATCH 25/37] fix docstrings --- sklearn/multioutput.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 77c11e21ce651..4cd11622c7451 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -101,6 +101,11 @@ def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_para and can be omitted in the subsequent calls. Note that `y` doesn't need to contain all labels in `classes`. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + **partial_fit_params : dict of string -> object Parameters passed to the ``estimator.partial_fit`` method of each step. @@ -160,6 +165,11 @@ def fit(self, X, y, sample_weight=None, **fit_params): Multi-output targets. An indicator matrix turns on multilabel estimation. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + **fit_params : dict of string -> object Parameters passed to the ``estimator.fit`` method of each step. @@ -231,6 +241,17 @@ def _more_tags(self): return {"multioutput_only": True} def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + Returns + ------- + routing : MetadataRouter + A :class:`~utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ router = ( MetadataRouter(owner=self.__class__.__name__) .add( @@ -325,6 +346,11 @@ def partial_fit(self, X, y, sample_weight=None, **partial_fit_params): y : {array-like, sparse matrix} of shape (n_samples, n_outputs) Multi-output targets. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + **partial_fit_params : dict of string -> object Parameters passed to the ``estimator.partial_fit`` method of each step. @@ -420,6 +446,11 @@ def fit(self, X, Y, sample_weight=None, **fit_params): Y : array-like of shape (n_samples, n_classes) The target values. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + **fit_params : dict of string -> object Parameters passed to the ``estimator.fit`` method of each step. From 268ae56234d34fc8afc1a44b924ed84b25353e86 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 12 May 2022 18:17:12 +0200 Subject: [PATCH 26/37] minor edits --- sklearn/exceptions.py | 2 +- sklearn/multioutput.py | 2 +- sklearn/tests/test_metadata_routing.py | 4 ++-- .../test_metaestimators_metadata_routing.py | 20 ++++++++++++------- sklearn/utils/_metadata_requests.py | 8 ++++---- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 42b10b9c529d2..63d636e47a15c 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -21,7 +21,7 @@ class UnsetMetadataPassedError(ValueError): """Exception class to raise if a metadata is passed which is not explicitly \ requested. - .. versionadded:: 1.1 + .. versionadded:: 1.2 """ diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 4cd11622c7451..b709dc60ca6e8 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -354,7 +354,7 @@ def partial_fit(self, X, y, sample_weight=None, **partial_fit_params): **partial_fit_params : dict of string -> object Parameters passed to the ``estimator.partial_fit`` method of each step. - .. versionadded:: 1.1 + .. versionadded:: 1.2 Returns ------- diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index f3340b1d8c5ab..6ac49cb865d7c 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -867,7 +867,7 @@ def get_metadata_routing(self): return ( MetadataRouter(owner=self.__class__.__name__) .add(estimator=self.estimator, method_mapping="one-to-one") - .warn_on(child="estimator", methods=["fit"], raise_on="1.3") + .warn_on(child="estimator", methods=["fit"], raise_on="1.4") ) class Estimator(BaseEstimator): @@ -923,7 +923,7 @@ def get_metadata_routing(self): ): est.fit(X, y, sample_weight=my_weights) - # but if the inner estimator has a non-default request, we fall ack to + # but if the inner estimator has a non-default request, we fall back to # raising an error est = MetaEstimator( estimator=WarningWeightedMetaRegressor( diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 2913af6758681..8610a599dcf2e 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -17,7 +17,7 @@ sample_weight = np.random.rand(N) -class RegressorSubEstimator(RegressorMixin, BaseEstimator): +class ConsumingRegressor(RegressorMixin, BaseEstimator): """A regressor consuming metadata.""" def __init__(self, **kwargs): @@ -39,7 +39,7 @@ def predict(self, X, y, sample_weight=None, metadata=None): return np.zeros(shape=(len(X))) -class ClassifierSubEstimator(ClassifierMixin, BaseEstimator): +class ConsumingClassifier(ClassifierMixin, BaseEstimator): """A regressor consuming metadata.""" def __init__(self, **kwargs): @@ -76,8 +76,8 @@ def predict_log_proba(self, X, y, sample_weight=None, metadata=None): def get_empty_metaestimators(): - yield MultiOutputRegressor(estimator=RegressorSubEstimator()) - yield MultiOutputClassifier(estimator=ClassifierSubEstimator()) + yield MultiOutputRegressor(estimator=ConsumingRegressor()) + yield MultiOutputClassifier(estimator=ConsumingClassifier()) @pytest.mark.parametrize( @@ -94,8 +94,8 @@ def test_default_request(metaestimator): @pytest.mark.parametrize( "MultiOutput, Estimator", [ - (MultiOutputClassifier, ClassifierSubEstimator), - (MultiOutputRegressor, RegressorSubEstimator), + (MultiOutputClassifier, ConsumingClassifier), + (MultiOutputRegressor, ConsumingRegressor), ], ids=["Classifier", "Regressor"], ) @@ -106,10 +106,16 @@ def test_multioutput_metadata_routing(MultiOutput, Estimator): FutureWarning, match=( "You are passing metadata for which the request values are not explicitly" - " set. From version 1.3 this results in the following error" + " set. From version 1.4 this results in the following error" ), ): metaest.fit(X, y_multi, sample_weight=sample_weight, metadata=metadata) + check_recorded_metadata( + metaest.estimators_[0], + "fit", + sample_weight=sample_weight, + metadata=metadata, + ) metaest = MultiOutput( Estimator() diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 214cb0e5e1a27..94688fa6eaeea 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -863,10 +863,10 @@ def _route_warn_or_error(self, *, child, router, params, method): try: res = router._route_params(params=params, method=method) except UnsetMetadataPassedError as e: - warn_on = self._warn_on.get(child, {"methods": [], "raise_on": "1.3"}) + warn_on = self._warn_on.get(child, {"methods": [], "raise_on": "1.4"}) if method not in warn_on["methods"]: # there is no warn_on set for this method of this child object, - # we raise as usual + # we raise as usual. raise if not getattr(router, "_is_default_request", None): # the user has set at least one request value for this child @@ -913,7 +913,7 @@ def validate_metadata(self, *, method, params): "not requested metadata in any object." ) - def warn_on(self, child, methods, raise_on="1.3"): + def warn_on(self, child, methods, raise_on="1.4"): """Set where deprecation warnings on no set requests should occur. This method is used in meta-estimators during the transition period for @@ -937,7 +937,7 @@ def warn_on(self, child, methods, raise_on="1.3"): List of methods for which there should be a ``FutureWarning`` instead of a ``ValueError``. - raise_on : str, default="1.3" + raise_on : str, default="1.4" The version after which there should be an error. Used in the warning message to inform users. From fb36733db359bd6a39f868dc20e17ac35866cc7e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 16 May 2022 17:37:15 +0200 Subject: [PATCH 27/37] staking a stab at param specific deprecation --- sklearn/exceptions.py | 10 ++++ sklearn/tests/test_metadata_routing.py | 38 +++++++++++--- sklearn/utils/_metadata_requests.py | 69 ++++++++++++++++++-------- 3 files changed, 91 insertions(+), 26 deletions(-) diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 63d636e47a15c..986bc2ac34c32 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -22,8 +22,18 @@ class UnsetMetadataPassedError(ValueError): requested. .. versionadded:: 1.2 + + Parameters + ---------- + message : str + The message """ + def __init__(self, message, unrequested_params, routed_params): + super().__init__(message) + self.unrequested_params = unrequested_params + self.routed_params = routed_params + class NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 6ac49cb865d7c..4869f73cdad7a 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -867,11 +867,16 @@ def get_metadata_routing(self): return ( MetadataRouter(owner=self.__class__.__name__) .add(estimator=self.estimator, method_mapping="one-to-one") - .warn_on(child="estimator", methods=["fit"], raise_on="1.4") + .warn_on( + child="estimator", + method="fit", + params=["sample_weight"], + raise_on="1.4", + ) ) class Estimator(BaseEstimator): - def fit(self, X, y, sample_weight=None): + def fit(self, X, y, sample_weight=None, groups=None): return self def predict(self, X, sample_weight=None): @@ -880,10 +885,20 @@ def predict(self, X, sample_weight=None): est = MetaEstimator(estimator=Estimator()) # the meta-estimator has est to have a warning on `fit`. with pytest.warns( - FutureWarning, match="From version 1.3 this results in the following error" + FutureWarning, match="From version 1.4 this results in the following error" ): est.fit(X, y, sample_weight=my_weights) + # we should raise because there is no warn_on for groups + with pytest.raises( + ValueError, match="sample_weight is passed but is not explicitly set" + ): + # the sample_weight should still warn + with pytest.warns( + FutureWarning, match="From version 1.4 this results in the following error" + ): + est.fit(X, y, sample_weight=my_weights, groups=1) + # but predict should raise since there is no warn_on set for it. with pytest.raises( ValueError, match="sample_weight is passed but is not explicitly set" @@ -898,7 +913,7 @@ def predict(self, X, sample_weight=None): ValueError, match="sample_weight is passed but is not explicitly set" ): with pytest.warns( - FutureWarning, match="From version 1.3 this results in the following error" + FutureWarning, match="From version 1.4 this results in the following error" ): est.fit(X, y, sample_weight=my_weights) @@ -910,7 +925,18 @@ def get_metadata_routing(self): MetadataRouter(owner=self.__class__.__name__) .add_self(self) .add(estimator=self.estimator, method_mapping="one-to-one") - .warn_on(child="estimator", methods=["fit", "score"], raise_on="1.3") + .warn_on( + child="estimator", + methods="fit", + params=["sample_weight"], + raise_on="1.4", + ) + .warn_on( + child="estimator", + methods="score", + params=["sample_weight"], + raise_on="1.4", + ) ) return router @@ -919,7 +945,7 @@ def get_metadata_routing(self): estimator=WarningWeightedMetaRegressor(estimator=RegressorMetadata()) ) with pytest.warns( - FutureWarning, match="From version 1.3 this results in the following error" + FutureWarning, match="From version 1.4 this results in the following error" ): est.fit(X, y, sample_weight=my_weights) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 7cdf537236115..0f2ed530657fc 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -307,6 +307,7 @@ def _route_params(self, params=None): corresponding method. """ self._check_warnings(params=params) + unrequested = dict() params = {} if params is None else params args = {arg: value for arg, value in params.items() if value is not None} res = Bunch() @@ -319,12 +320,19 @@ def _route_params(self, params=None): elif alias == RequestType.REQUESTED and prop in args: res[prop] = args[prop] elif alias == RequestType.ERROR_IF_PASSED and prop in args: - raise UnsetMetadataPassedError( - f"{prop} is passed but is not explicitly set as " - f"requested or not for {self.owner}.{self.method}" - ) + unrequested[prop] = args[prop] elif alias in args: res[prop] = args[alias] + if unrequested: + raise UnsetMetadataPassedError( + message=( + f"[{', '.join([key for key in unrequested])} are passed but is not" + " explicitly set as requested or not for" + f" {self.owner}.{self.method}" + ), + unrequested_params=unrequested, + routed_params=res, + ) return res def _serialize(self): @@ -861,9 +869,9 @@ def _route_warn_or_error(self, *, child, router, params, method): The routed parameters. """ try: - res = router._route_params(params=params, method=method) + routed_params = router._route_params(params=params, method=method) except UnsetMetadataPassedError as e: - warn_on = self._warn_on.get(child, {"methods": [], "raise_on": "1.4"}) + warn_on = self._warn_on.get(child, {}) if method not in warn_on["methods"]: # there is no warn_on set for this method of this child object, # we raise as usual. @@ -872,16 +880,30 @@ def _route_warn_or_error(self, *, child, router, params, method): # the user has set at least one request value for this child # object, but not for all of them. Therefore we raise as usual. raise - # otherwise raise a FutureWarning - router = router._assume_requested() - res = router._route_params(params=params, method=method) + # now we move everything which has a warn_on flag from + # `unrequested_params` to routed_params, and then raise if anything + # is left. Otherwise we have a perfectly formed `routed_params` and + # we return that. + warn_on_params = warn_on.get(method, {"params": [], "raise_on": "1.4"}) + warn_keys = list(e.unrequested_params.keys()) + routed_params = e.routed_params + for param in warn_on_params["params"]: + if param in e.unrequested_params: + routed_params[param] = e.unrequested_params.pop(param) + + # check if anything is left, and if yes, we raise as usual + if e.unrequested_params: + raise + + # Finally warn before returning the routed parameters. warn( "You are passing metadata for which the request values are not" - f" explicitly set. From version {warn_on['raise_on']} this results in" - f" the following error: {str(e)}", + f" explicitly set: {', '.join(warn_keys)}. From version" + f" {warn_on_params['raise_on']} this results in the following error:" + f" {str(e)}", FutureWarning, ) - return res + return routed_params def validate_metadata(self, *, method, params): """Validate given metadata for a method. @@ -913,7 +935,7 @@ def validate_metadata(self, *, method, params): "not requested metadata in any object." ) - def warn_on(self, child, methods, raise_on="1.4"): + def warn_on(self, *, child, method, params, raise_on="1.4"): """Set where deprecation warnings on no set requests should occur. This method is used in meta-estimators during the transition period for @@ -924,18 +946,23 @@ def warn_on(self, child, methods, raise_on="1.4"): this breaks backward compatibility for existing meta-estimators. Calling this method on a ``MetadataRouter`` object such as - ``warn_on(child='estimator', methods=['fit', 'score'])`` tells the - router to raise a ``FutureWarning`` instead of a ``ValueError`` if the - child object has no set requests. + ``warn_on(child='estimator', method='fit', params=['sample_weight'])`` + tells the router to raise a ``FutureWarning`` instead of a + ``ValueError`` if the child object has no set requests for + ``sample_weight`` during ``fit``. Parameters ---------- child : str The name of the child object. - methods : list of str - List of methods for which there should be a ``FutureWarning`` - instead of a ``ValueError``. + method : str + The method for which there should be a ``FutureWarning`` + instead of a ``ValueError`` for given params. + + params : list of str + The list of parameters for which there should be a + ``FutureWarning`` instead of a ``ValueError``. raise_on : str, default="1.4" The version after which there should be an error. Used in the @@ -946,7 +973,9 @@ def warn_on(self, child, methods, raise_on="1.4"): self : MetadataRouter Returns `self`. """ - self._warn_on[child] = {"methods": methods, "raise_on": raise_on} + if child not in self._warn_on: + self._warn_on[child] = dict() + self._warn_on[child][method] = {"params": params, "raise_on": raise_on} return self def _serialize(self): From 1baea75bc496657ae3a52303d18dff6aee35dede Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:00:38 +0200 Subject: [PATCH 28/37] fix tests --- sklearn/multioutput.py | 10 +++++--- sklearn/tests/test_metadata_routing.py | 34 ++++++++++++++++++++------ sklearn/utils/_metadata_requests.py | 11 ++++++--- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index b709dc60ca6e8..6f96905e49e03 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -253,14 +253,18 @@ def get_metadata_routing(self): routing information. """ router = ( - MetadataRouter(owner=self.__class__.__name__) - .add( + MetadataRouter(owner=self.__class__.__name__).add( estimator=self.estimator, method_mapping=MethodMapping() .add(callee="partial_fit", caller="partial_fit") .add(callee="fit", caller="fit"), ) - .warn_on(child="estimator", methods=["fit", "partial_fit"]) + # the fit method already accepts everything, therefore we don't + # specify parameters + .warn_on(child="estimator", method="fit", params=None) + # the partial_fit method at the time of this change only supports + # sample_weight, therefore we only include this metadata. + .warn_on(child="estimator", method="partial_fit", params=["sample_weight"]) ) return router diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 4869f73cdad7a..3598a3523ccd6 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -342,8 +342,10 @@ def test_simple_metadata_routing(): with pytest.raises( ValueError, match=( - "sample_weight is passed but is not explicitly set as requested or not for" - " ClassifierFitMetadata.fit" + re.escape( + "[sample_weight] are passed but is not explicitly set as requested or" + " not for ClassifierFitMetadata.fit" + ) ), ): clf.fit(X, y, sample_weight=my_weights) @@ -891,7 +893,11 @@ def predict(self, X, sample_weight=None): # we should raise because there is no warn_on for groups with pytest.raises( - ValueError, match="sample_weight is passed but is not explicitly set" + ValueError, + match=re.escape( + "[sample_weight, groups] are passed but is not explicitly set as requested" + " or not for Estimator.fit" + ), ): # the sample_weight should still warn with pytest.warns( @@ -901,7 +907,11 @@ def predict(self, X, sample_weight=None): # but predict should raise since there is no warn_on set for it. with pytest.raises( - ValueError, match="sample_weight is passed but is not explicitly set" + ValueError, + match=re.escape( + "[sample_weight] are passed but is not explicitly set as requested or not" + " for Estimator.predict" + ), ): est.predict(X, sample_weight=my_weights) @@ -910,7 +920,11 @@ def predict(self, X, sample_weight=None): # doesn't have any warn_on set but sample_weight is passed. est = MetaEstimator(estimator=WeightedMetaRegressor(estimator=RegressorMetadata())) with pytest.raises( - ValueError, match="sample_weight is passed but is not explicitly set" + ValueError, + match=re.escape( + "[sample_weight] are passed but is not explicitly set as requested or not" + " for RegressorMetadata.fit" + ), ): with pytest.warns( FutureWarning, match="From version 1.4 this results in the following error" @@ -927,13 +941,13 @@ def get_metadata_routing(self): .add(estimator=self.estimator, method_mapping="one-to-one") .warn_on( child="estimator", - methods="fit", + method="fit", params=["sample_weight"], raise_on="1.4", ) .warn_on( child="estimator", - methods="score", + method="score", params=["sample_weight"], raise_on="1.4", ) @@ -957,6 +971,10 @@ def get_metadata_routing(self): ) ) with pytest.raises( - ValueError, match="sample_weight is passed but is not explicitly set" + ValueError, + match=re.escape( + "[sample_weight] are passed but is not explicitly set as requested or not" + " for WarningWeightedMetaRegressor.fit" + ), ): est.fit(X, y, sample_weight=my_weights) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 0f2ed530657fc..70d839a4b4626 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -326,7 +326,7 @@ def _route_params(self, params=None): if unrequested: raise UnsetMetadataPassedError( message=( - f"[{', '.join([key for key in unrequested])} are passed but is not" + f"[{', '.join([key for key in unrequested])}] are passed but is not" " explicitly set as requested or not for" f" {self.owner}.{self.method}" ), @@ -872,7 +872,7 @@ def _route_warn_or_error(self, *, child, router, params, method): routed_params = router._route_params(params=params, method=method) except UnsetMetadataPassedError as e: warn_on = self._warn_on.get(child, {}) - if method not in warn_on["methods"]: + if method not in warn_on: # there is no warn_on set for this method of this child object, # we raise as usual. raise @@ -887,7 +887,12 @@ def _route_warn_or_error(self, *, child, router, params, method): warn_on_params = warn_on.get(method, {"params": [], "raise_on": "1.4"}) warn_keys = list(e.unrequested_params.keys()) routed_params = e.routed_params - for param in warn_on_params["params"]: + # if params is None, we accept and warn on everything. + warn_params = warn_on_params["params"] + if warn_params is None: + warn_params = e.unrequested_params.keys() + + for param in warn_params: if param in e.unrequested_params: routed_params[param] = e.unrequested_params.pop(param) From a985e83cc5c4108babd2dfb4b9e76429d81f0134 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 16:28:47 +0200 Subject: [PATCH 29/37] add test for new code --- sklearn/tests/test_metadata_routing.py | 32 ++++++++++--------- .../test_metaestimators_metadata_routing.py | 2 +- sklearn/utils/_metadata_requests.py | 2 +- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 3598a3523ccd6..ce84ed7fbb041 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -872,7 +872,7 @@ def get_metadata_routing(self): .warn_on( child="estimator", method="fit", - params=["sample_weight"], + params=None, raise_on="1.4", ) ) @@ -891,20 +891,6 @@ def predict(self, X, sample_weight=None): ): est.fit(X, y, sample_weight=my_weights) - # we should raise because there is no warn_on for groups - with pytest.raises( - ValueError, - match=re.escape( - "[sample_weight, groups] are passed but is not explicitly set as requested" - " or not for Estimator.fit" - ), - ): - # the sample_weight should still warn - with pytest.warns( - FutureWarning, match="From version 1.4 this results in the following error" - ): - est.fit(X, y, sample_weight=my_weights, groups=1) - # but predict should raise since there is no warn_on set for it. with pytest.raises( ValueError, @@ -963,6 +949,22 @@ def get_metadata_routing(self): ): est.fit(X, y, sample_weight=my_weights) + # here we should raise because there is no warn_on for groups + with pytest.raises( + ValueError, + match=re.escape( + "[sample_weight, groups] are passed but is not explicitly set as requested" + " or not for Estimator.fit" + ), + ): + # the sample_weight should still warn + with pytest.warns( + FutureWarning, match="From version 1.4 this results in the following error" + ): + WarningWeightedMetaRegressor(estimator=Estimator()).fit( + X, y, sample_weight=my_weights, groups=1 + ) + # but if the inner estimator has a non-default request, we fall back to # raising an error est = MetaEstimator( diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 8610a599dcf2e..3c0fbf1618421 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -106,7 +106,7 @@ def test_multioutput_metadata_routing(MultiOutput, Estimator): FutureWarning, match=( "You are passing metadata for which the request values are not explicitly" - " set. From version 1.4 this results in the following error" + " set: sample_weight, metadata." ), ): metaest.fit(X, y_multi, sample_weight=sample_weight, metadata=metadata) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 70d839a4b4626..a913b49340e6b 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -890,7 +890,7 @@ def _route_warn_or_error(self, *, child, router, params, method): # if params is None, we accept and warn on everything. warn_params = warn_on_params["params"] if warn_params is None: - warn_params = e.unrequested_params.keys() + warn_params = list(e.unrequested_params.keys()) for param in warn_params: if param in e.unrequested_params: From cb772126352864af760b5b91710578238bc5f020 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 16:34:19 +0200 Subject: [PATCH 30/37] remove unused _assume_requested --- sklearn/utils/_metadata_requests.py | 38 ----------------------------- 1 file changed, 38 deletions(-) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index a913b49340e6b..3b643398a9067 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -179,21 +179,6 @@ def requests(self): """Dictionary of the form: ``{key: alias}``.""" return self._requests - def _assume_requested(self, router): - """Convert all ERROR_IF_PASSED to REQUESTED. - - This method returns a new object with the above change and leaves the - original object unchanged. - """ - res = MethodMetadataRequest(router=router, owner=self.owner, method=self.method) - res._requests = { - param: ( - alias if alias != RequestType.ERROR_IF_PASSED else RequestType.REQUESTED - ) - for param, alias in self._requests.items() - } - return res - def add_request( self, *, @@ -387,17 +372,6 @@ def __init__(self, owner): MethodMetadataRequest(router=self, owner=owner, method=method), ) - def _assume_requested(self): - """Convert all ERROR_IF_PASSED to REQUESTED. - - This method returns a new object with the above change and leaves the - original object unchanged. - """ - res = MetadataRequest(owner=None) - for method in METHODS: - setattr(self, method, getattr(self, method)._assume_requested(router=res)) - return res - def _get_param_names(self, method, return_alias, ignore_self=None): """Get names of all metadata that can be consumed or routed by specified \ method. @@ -629,18 +603,6 @@ def _is_default_request(self): return True - def _assume_requested(self): - """Convert all ERROR_IF_PASSED to REQUESTED. - - This method returns a new object with the above change and leaves the - original object unchanged. - """ - res = MetadataRouter(owner=self.owner) - if self._self: - res._self = self._self._assume_requested() - res._route_mappings = deepcopy(self._route_mappings) - return res - def add_self(self, obj): """Add `self` (as a consumer) to the routing. From 30535828f9623192ff1e300df55e306a8add2ca8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 16:53:44 +0200 Subject: [PATCH 31/37] document exception parmaters --- sklearn/exceptions.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 986bc2ac34c32..23789dd68343e 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -27,9 +27,16 @@ class UnsetMetadataPassedError(ValueError): ---------- message : str The message + + unrequested_params : dict + A dictionary of parameters and their values which are provided but not + requested. + + routed_params : dict + A dictionary of routed parameters. """ - def __init__(self, message, unrequested_params, routed_params): + def __init__(self, *, message, unrequested_params, routed_params): super().__init__(message) self.unrequested_params = unrequested_params self.routed_params = routed_params From a8bc9ca1cde49029ae73c1e0d8c6f5b41312bf46 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 9 Jun 2022 19:35:54 +0200 Subject: [PATCH 32/37] try a different URL --- sklearn/multioutput.py | 10 +++++----- sklearn/tests/test_metadata_routing.py | 14 +++++--------- sklearn/utils/_metadata_requests.py | 7 +++---- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 6f96905e49e03..df691c777a265 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -106,10 +106,10 @@ def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_para Only supported if the underlying regressor supports sample weights. - **partial_fit_params : dict of string -> object + **partial_fit_params : dict of str -> object Parameters passed to the ``estimator.partial_fit`` method of each step. - .. versionadded:: 1.1 + .. versionadded:: 1.2 Returns ------- @@ -262,8 +262,8 @@ def get_metadata_routing(self): # the fit method already accepts everything, therefore we don't # specify parameters .warn_on(child="estimator", method="fit", params=None) - # the partial_fit method at the time of this change only supports - # sample_weight, therefore we only include this metadata. + # the partial_fit method at the time of this change (v1.2) only + # supports sample_weight, therefore we only include this metadata. .warn_on(child="estimator", method="partial_fit", params=["sample_weight"]) ) return router @@ -355,7 +355,7 @@ def partial_fit(self, X, y, sample_weight=None, **partial_fit_params): Only supported if the underlying regressor supports sample weights. - **partial_fit_params : dict of string -> object + **partial_fit_params : dict of str -> object Parameters passed to the ``estimator.partial_fit`` method of each step. .. versionadded:: 1.2 diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index ce84ed7fbb041..41372b973375d 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -339,15 +339,11 @@ def test_simple_metadata_routing(): # If the estimator accepts the metadata but doesn't explicitly say it doesn't # need it, there's an error clf = SimpleMetaClassifier(estimator=ClassifierFitMetadata()) - with pytest.raises( - ValueError, - match=( - re.escape( - "[sample_weight] are passed but is not explicitly set as requested or" - " not for ClassifierFitMetadata.fit" - ) - ), - ): + err_message = ( + "[sample_weight] are passed but is not explicitly set as requested or" + " not for ClassifierFitMetadata.fit" + ) + with pytest.raises(ValueError, match=(re.escape(err_message))): clf.fit(X, y, sample_weight=my_weights) # Explicitly saying the estimator doesn't need it, makes the error go away, diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 3b643398a9067..17dd7853e81fd 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -593,9 +593,8 @@ def __init__(self, owner): @property def _is_default_request(self): """Return ``True`` only if all sub-components have default values.""" - if self._self: - if not self._self._is_default_request: - return False + if self._self and not self._self._is_default_request: + return False for router_mapping in self._route_mappings.values(): if not router_mapping.router._is_default_request: @@ -804,7 +803,7 @@ def route_params(self, *, caller, params): return res def _route_warn_or_error(self, *, child, router, params, method): - """Route parameters wile handling error or deprecation warning choice. + """Route parameters while handling error or deprecation warning choice. This method warns instead of raising an error if the parent object has set ``warn_on`` for the child object's method and the user has not From 2fd59b05704fb738842ed7fc6823666b0c148bac Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sat, 16 Jul 2022 15:18:20 +0200 Subject: [PATCH 33/37] address comments --- doc/metadata_routing.rst | 2 +- sklearn/multioutput.py | 4 +- sklearn/tests/test_metadata_routing.py | 48 +++++++++++-------- .../test_metaestimators_metadata_routing.py | 17 +++---- sklearn/utils/_metadata_requests.py | 36 ++++++++++++-- 5 files changed, 72 insertions(+), 35 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index de04ecf022415..56df1e7f158db 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -205,7 +205,7 @@ or not:: ... ).fit(X, y, sample_weight=my_weights) ... except ValueError as e: ... print(e) - sample_weight is passed but is not explicitly set as requested or not for + [sample_weight] are passed but are not explicitly set as requested or not for LogisticRegression.score The issue can be fixed by explicitly setting the request value:: diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index df691c777a265..ed33fde4ecb20 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -260,7 +260,9 @@ def get_metadata_routing(self): .add(callee="fit", caller="fit"), ) # the fit method already accepts everything, therefore we don't - # specify parameters + # specify parameters. The value passed to ``child`` needs to be the + # same as what's passed to ``add`` above, in this case + # `"estimator"`. .warn_on(child="estimator", method="fit", params=None) # the partial_fit method at the time of this change (v1.2) only # supports sample_weight, therefore we only include this metadata. diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 41372b973375d..1ffd826e91b6e 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -340,10 +340,10 @@ def test_simple_metadata_routing(): # need it, there's an error clf = SimpleMetaClassifier(estimator=ClassifierFitMetadata()) err_message = ( - "[sample_weight] are passed but is not explicitly set as requested or" + "[sample_weight] are passed but are not explicitly set as requested or" " not for ClassifierFitMetadata.fit" ) - with pytest.raises(ValueError, match=(re.escape(err_message))): + with pytest.raises(ValueError, match=re.escape(err_message)): clf.fit(X, y, sample_weight=my_weights) # Explicitly saying the estimator doesn't need it, makes the error go away, @@ -847,6 +847,18 @@ def test_metadata_routing_get_param_names(): def test_router_deprecation_warning(): + """This test checks the warning mechanism related to `warn_on`. + + `warn_on` is there to handle backward compatibility in cases where the + meta-estimator is already doing some routing, and SLEP006 would break + existing user code. `warn_on` helps converting some of those errors to + warnings. + + In different scenarios with a meta-estimator and a child estimator we test + if the warning is raised when it should, an error raised when it should, + and the combinations of the above cases. + """ + class MetaEstimator(BaseEstimator, MetaEstimatorMixin): def __init__(self, estimator): self.estimator = estimator @@ -881,18 +893,21 @@ def predict(self, X, sample_weight=None): return np.ones(shape=len(X)) est = MetaEstimator(estimator=Estimator()) - # the meta-estimator has est to have a warning on `fit`. + # the meta-estimator has set (using `warn_on`) to have a warning on `fit`. with pytest.warns( FutureWarning, match="From version 1.4 this results in the following error" ): est.fit(X, y, sample_weight=my_weights) + err_msg = ( + "{params} are passed but are not explicitly set as requested or not for {owner}" + ) + warn_msg = "From version 1.4 this results in the following error" # but predict should raise since there is no warn_on set for it. with pytest.raises( ValueError, match=re.escape( - "[sample_weight] are passed but is not explicitly set as requested or not" - " for Estimator.predict" + err_msg.format(params="[sample_weight]", owner="Estimator.predict") ), ): est.predict(X, sample_weight=my_weights) @@ -904,13 +919,10 @@ def predict(self, X, sample_weight=None): with pytest.raises( ValueError, match=re.escape( - "[sample_weight] are passed but is not explicitly set as requested or not" - " for RegressorMetadata.fit" + err_msg.format(params="[sample_weight]", owner="RegressorMetadata.fit") ), ): - with pytest.warns( - FutureWarning, match="From version 1.4 this results in the following error" - ): + with pytest.warns(FutureWarning, match=warn_msg): est.fit(X, y, sample_weight=my_weights) class WarningWeightedMetaRegressor(WeightedMetaRegressor): @@ -940,23 +952,18 @@ def get_metadata_routing(self): est = MetaEstimator( estimator=WarningWeightedMetaRegressor(estimator=RegressorMetadata()) ) - with pytest.warns( - FutureWarning, match="From version 1.4 this results in the following error" - ): + with pytest.warns(FutureWarning, match=warn_msg): est.fit(X, y, sample_weight=my_weights) # here we should raise because there is no warn_on for groups with pytest.raises( ValueError, match=re.escape( - "[sample_weight, groups] are passed but is not explicitly set as requested" - " or not for Estimator.fit" + err_msg.format(params="[sample_weight, groups]", owner="Estimator.fit") ), ): # the sample_weight should still warn - with pytest.warns( - FutureWarning, match="From version 1.4 this results in the following error" - ): + with pytest.warns(FutureWarning, match=warn_msg): WarningWeightedMetaRegressor(estimator=Estimator()).fit( X, y, sample_weight=my_weights, groups=1 ) @@ -971,8 +978,9 @@ def get_metadata_routing(self): with pytest.raises( ValueError, match=re.escape( - "[sample_weight] are passed but is not explicitly set as requested or not" - " for WarningWeightedMetaRegressor.fit" + err_msg.format( + params="[sample_weight]", owner="WarningWeightedMetaRegressor.fit" + ) ), ): est.fit(X, y, sample_weight=my_weights) diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 3c0fbf1618421..615a246a8f1a3 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -40,7 +40,7 @@ def predict(self, X, y, sample_weight=None, metadata=None): class ConsumingClassifier(ClassifierMixin, BaseEstimator): - """A regressor consuming metadata.""" + """A classifier consuming metadata.""" def __init__(self, **kwargs): for param, value in kwargs.items(): @@ -102,13 +102,11 @@ def test_default_request(metaestimator): def test_multioutput_metadata_routing(MultiOutput, Estimator): # Check routing of metadata metaest = MultiOutput(Estimator()) - with pytest.warns( - FutureWarning, - match=( - "You are passing metadata for which the request values are not explicitly" - " set: sample_weight, metadata." - ), - ): + warn_msg = ( + "You are passing metadata for which the request values are not explicitly" + " set: sample_weight, metadata." + ) + with pytest.warns(FutureWarning, match=(warn_msg)): metaest.fit(X, y_multi, sample_weight=sample_weight, metadata=metadata) check_recorded_metadata( metaest.estimators_[0], @@ -122,6 +120,9 @@ def test_multioutput_metadata_routing(MultiOutput, Estimator): .set_fit_request(sample_weight=True, metadata="alias") .set_partial_fit_request(sample_weight=True, metadata=True) ).fit(X, y_multi, alias=metadata) + # if an estimator requests a metadata but it's not passed, no errors are + # raised. Therefore here we don't pass `sample_weight` to test nothing is + # raised. check_recorded_metadata( metaest.estimators_[0], "fit", sample_weight=None, metadata=metadata ) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 17dd7853e81fd..ea8b2d056c7b2 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -311,8 +311,8 @@ def _route_params(self, params=None): if unrequested: raise UnsetMetadataPassedError( message=( - f"[{', '.join([key for key in unrequested])}] are passed but is not" - " explicitly set as requested or not for" + f"[{', '.join([key for key in unrequested])}] are passed but are" + " not explicitly set as requested or not for" f" {self.owner}.{self.method}" ), unrequested_params=unrequested, @@ -837,7 +837,7 @@ def _route_warn_or_error(self, *, child, router, params, method): # there is no warn_on set for this method of this child object, # we raise as usual. raise - if not getattr(router, "_is_default_request", None): + if not router._is_default_request: # the user has set at least one request value for this child # object, but not for all of them. Therefore we raise as usual. raise @@ -851,7 +851,7 @@ def _route_warn_or_error(self, *, child, router, params, method): # if params is None, we accept and warn on everything. warn_params = warn_on_params["params"] if warn_params is None: - warn_params = list(e.unrequested_params.keys()) + warn_params = warn_keys for param in warn_params: if param in e.unrequested_params: @@ -920,7 +920,8 @@ def warn_on(self, *, child, method, params, raise_on="1.4"): Parameters ---------- child : str - The name of the child object. + The name of the child object. The names come from the keyword + arguments passed to the ``add`` method. method : str The method for which there should be a ``FutureWarning`` @@ -938,6 +939,30 @@ def warn_on(self, *, child, method, params, raise_on="1.4"): ------- self : MetadataRouter Returns `self`. + + Examples + -------- + >>> router = ( + ... MetadataRouter(owner=self.__class__.__name__).add( + ... estimator=self.estimator, + ... method_mapping=MethodMapping() + ... .add(callee="partial_fit", caller="partial_fit") + ... .add(callee="fit", caller="fit"), + ... ) + ... # Assuming the fit method already accepts everything, therefore we don't + ... # specify parameters. The value passed to ``child`` needs to be the + ... # same as what's passed to ``add`` above, in this case + ... # `"estimator"`. + ... .warn_on(child="estimator", method="fit", params=None) + ... # Assuming the partial_fit method at the time of this change (v1.2) only + ... # supports sample_weight, therefore we only include this metadata. + ... .warn_on( + ... child="estimator", + ... method="partial_fit", + ... params=["sample_weight"] + ... ) + ... ) + """ if child not in self._warn_on: self._warn_on[child] = dict() @@ -1204,6 +1229,7 @@ class attributes, as well as determining request keys from method signatures. """ requests = MetadataRequest(owner=cls.__name__) + for method in METHODS: setattr( requests, From c904c281960e725159a74ed24bc8e4f418777a18 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sat, 16 Jul 2022 18:02:20 +0200 Subject: [PATCH 34/37] add docs --- examples/plot_metadata_routing.py | 51 +++++++++++++++++++++++++++-- sklearn/utils/_metadata_requests.py | 28 +++------------- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/examples/plot_metadata_routing.py b/examples/plot_metadata_routing.py index 91c3651699ee4..d4c418cb0f6b5 100644 --- a/examples/plot_metadata_routing.py +++ b/examples/plot_metadata_routing.py @@ -37,7 +37,7 @@ from sklearn.utils.metadata_routing import MethodMapping from sklearn.utils.metadata_routing import process_routing from sklearn.utils.validation import check_is_fitted -from sklearn.linear_model import LinearRegression +from sklearn.linear_model import LinearRegression, LogisticRegression N, M = 100, 4 X = np.random.rand(N, M) @@ -585,7 +585,7 @@ def get_metadata_routing(self): # %% -# When an estimator suports a metadata which wasn't supported before, the +# When an estimator supports a metadata which wasn't supported before, the # following pattern can be used to warn the users about it. @@ -605,6 +605,53 @@ def predict(self, X): for w in record: print(w.message) +# %% +# Deprecation to Give Users Time to Adapt their Code +# -------------------------------------------------- +# With the introduction of metadata routing, following user code would raise an +# error: + +try: + reg = MetaRegressor(estimator=LinearRegression()) + reg.fit(X, y, sample_weight=my_weights) +except Exception as e: + print(e) + +# %% +# You might want to give your users a period during which they see a +# ``FutureWarning`` instead in order to have time to adapt to the new API. For +# this, the :class:`~sklearn.utils.metadata_routing.MetadataRouter` provides a +# `warn_on` method`: + + +class WarningMetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator): + def __init__(self, estimator): + self.estimator = estimator + + def fit(self, X, y, **fit_params): + params = process_routing(self, "fit", fit_params) + self.estimator_ = clone(self.estimator).fit(X, y, **params.estimator.fit) + + def get_metadata_routing(self): + router = ( + MetadataRouter(owner=self.__class__.__name__) + .add(estimator=self.estimator, method_mapping="one-to-one") + .warn_on(child="estimator", method="fit", params=None) + ) + return router + + +with warnings.catch_warnings(record=True) as record: + WeightedMetaRegressor(estimator=LogisticRegression()).fit( + X, y, sample_weight=my_weights + ) +for w in record: + print(w.message) + +# %% +# Note that in the above implementation, the value passed to ``child`` the same +# as the key passed to the ``add`` method, in this case ``"estimator"``. + # %% # Third Party Development and scikit-learn Dependency # --------------------------------------------------- diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 39b961917287a..d89a667314960 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -920,6 +920,10 @@ def warn_on(self, *, child, method, params, raise_on="1.4"): ``ValueError`` if the child object has no set requests for ``sample_weight`` during ``fit``. + You can find more information on how to use this method in the + developer guide: + :ref:`sphx_glr_auto_examples_plot_metadata_routing.py`. + Parameters ---------- child : str @@ -942,30 +946,6 @@ def warn_on(self, *, child, method, params, raise_on="1.4"): ------- self : MetadataRouter Returns `self`. - - Examples - -------- - >>> router = ( - ... MetadataRouter(owner=self.__class__.__name__).add( - ... estimator=self.estimator, - ... method_mapping=MethodMapping() - ... .add(callee="partial_fit", caller="partial_fit") - ... .add(callee="fit", caller="fit"), - ... ) - ... # Assuming the fit method already accepts everything, therefore we don't - ... # specify parameters. The value passed to ``child`` needs to be the - ... # same as what's passed to ``add`` above, in this case - ... # `"estimator"`. - ... .warn_on(child="estimator", method="fit", params=None) - ... # Assuming the partial_fit method at the time of this change (v1.2) only - ... # supports sample_weight, therefore we only include this metadata. - ... .warn_on( - ... child="estimator", - ... method="partial_fit", - ... params=["sample_weight"] - ... ) - ... ) - """ if child not in self._warn_on: self._warn_on[child] = dict() From 8eb85cfd1c0260ccf4e4292c9344af73d242e513 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sat, 16 Jul 2022 18:14:54 +0200 Subject: [PATCH 35/37] fix example code --- examples/plot_metadata_routing.py | 2 +- sklearn/utils/_metadata_requests.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/plot_metadata_routing.py b/examples/plot_metadata_routing.py index d4c418cb0f6b5..de2c86c91527c 100644 --- a/examples/plot_metadata_routing.py +++ b/examples/plot_metadata_routing.py @@ -642,7 +642,7 @@ def get_metadata_routing(self): with warnings.catch_warnings(record=True) as record: - WeightedMetaRegressor(estimator=LogisticRegression()).fit( + WarningMetaRegressor(estimator=LogisticRegression()).fit( X, y, sample_weight=my_weights ) for w in record: diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index d89a667314960..c8be404d7da5a 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -936,7 +936,8 @@ def warn_on(self, *, child, method, params, raise_on="1.4"): params : list of str The list of parameters for which there should be a - ``FutureWarning`` instead of a ``ValueError``. + ``FutureWarning`` instead of a ``ValueError``. If ``None``, the + rule is applied on all parameters. raise_on : str, default="1.4" The version after which there should be an error. Used in the From a056d25c5e3174b63a7d14533bfb280d27721776 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sat, 16 Jul 2022 18:31:40 +0200 Subject: [PATCH 36/37] remove extra backtick --- examples/plot_metadata_routing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_metadata_routing.py b/examples/plot_metadata_routing.py index de2c86c91527c..98895b823003b 100644 --- a/examples/plot_metadata_routing.py +++ b/examples/plot_metadata_routing.py @@ -621,7 +621,7 @@ def predict(self, X): # You might want to give your users a period during which they see a # ``FutureWarning`` instead in order to have time to adapt to the new API. For # this, the :class:`~sklearn.utils.metadata_routing.MetadataRouter` provides a -# `warn_on` method`: +# `warn_on` method: class WarningMetaRegressor(MetaEstimatorMixin, RegressorMixin, BaseEstimator): From 916e92b6f23028b78067520910a40116d2be8f21 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 17 Jul 2022 13:03:14 +0200 Subject: [PATCH 37/37] test _is_default_request and remove some unused lines --- sklearn/tests/test_metadata_routing.py | 34 +++++++++++++++++++ .../test_metaestimators_metadata_routing.py | 8 ----- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index 41e6d8f412256..6f0e1bf485a9b 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -989,6 +989,40 @@ def get_metadata_routing(self): est.fit(X, y, sample_weight=my_weights) +@pytest.mark.parametrize( + "estimator, is_default_request", + [ + (LinearRegression(), True), + (LinearRegression().set_fit_request(sample_weight=True), False), # type: ignore + (WeightedMetaRegressor(estimator=LinearRegression()), True), + ( + WeightedMetaRegressor( + estimator=LinearRegression().set_fit_request( # type: ignore + sample_weight=True + ) + ), + False, + ), + ( + WeightedMetaRegressor( + estimator=LinearRegression() + ).set_fit_request( # type: ignore + sample_weight=True + ), + False, + ), + ], +) +def test_is_default_request(estimator, is_default_request): + """Test the `_is_default_request` machinery. + + It should be `True` only if the user hasn't changed any default values. + + Applies to both `MetadataRouter` and `MetadataRequest`. + """ + assert estimator.get_metadata_routing()._is_default_request == is_default_request + + def test_method_generation(): # Test if all required request methods are generated. diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 615a246a8f1a3..dae9fbf3aa03a 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -20,10 +20,6 @@ class ConsumingRegressor(RegressorMixin, BaseEstimator): """A regressor consuming metadata.""" - def __init__(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - def partial_fit(self, X, y, sample_weight=None, metadata=None): record_metadata( self, "partial_fit", sample_weight=sample_weight, metadata=metadata @@ -42,10 +38,6 @@ def predict(self, X, y, sample_weight=None, metadata=None): class ConsumingClassifier(ClassifierMixin, BaseEstimator): """A classifier consuming metadata.""" - def __init__(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) - def partial_fit(self, X, y, sample_weight=None, metadata=None): record_metadata( self, "partial_fit", sample_weight=sample_weight, metadata=metadata