From 52309bd360a389473d7deea5fa391a8f9e6829bc Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 11 Jul 2024 16:29:17 -0400 Subject: [PATCH 1/3] WIP Signed-off-by: Adam Li --- doc/metadata_routing.rst | 4 +- doc/whats_new/v1.6.rst | 4 + sklearn/ensemble/_weight_boosting.py | 185 +++++++++++++++--- .../test_metaestimators_metadata_routing.py | 36 +++- 4 files changed, 195 insertions(+), 34 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 31dae6813bda5..927f94530e8e9 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -278,6 +278,8 @@ Meta-estimators and functions supporting metadata routing: - :class:`sklearn.compose.ColumnTransformer` - :class:`sklearn.compose.TransformedTargetRegressor` - :class:`sklearn.covariance.GraphicalLassoCV` +- :class:`sklearn.ensemble.AdaBoostClassifier` +- :class:`sklearn.ensemble.AdaBoostRegressor` - :class:`sklearn.ensemble.StackingClassifier` - :class:`sklearn.ensemble.StackingRegressor` - :class:`sklearn.ensemble.VotingClassifier` @@ -320,8 +322,6 @@ Meta-estimators and functions supporting metadata routing: Meta-estimators and tools not supporting metadata routing yet: -- :class:`sklearn.ensemble.AdaBoostClassifier` -- :class:`sklearn.ensemble.AdaBoostRegressor` - :class:`sklearn.feature_selection.RFE` - :class:`sklearn.feature_selection.RFECV` - :class:`sklearn.feature_selection.SequentialFeatureSelector` diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 3971f60eb5f4b..3699b498c665d 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -95,6 +95,10 @@ more details. for the `fit` method of its estimator and for its underlying CV splitter and scorer. :pr:`29266` by :user:`Adam Li `. +- |Feature| :class:`ensemble.AdaBoostClassifier` and :class:`ensemble.AdaBoostRegressor` + now supports metadata routing. + :pr:`28494` by :user:`Adam Li `. + Dropping support for building with setuptools --------------------------------------------- diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index e18bafb450d49..65b4895e57683 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -35,12 +35,15 @@ ) from ..metrics import accuracy_score, r2_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import _safe_indexing, check_random_state +from ..utils import _safe_indexing, check_random_state, Bunch from ..utils._param_validation import HasMethods, Interval, StrOptions from ..utils.extmath import softmax, stable_cumsum from ..utils.metadata_routing import ( - _raise_for_unsupported_routing, - _RoutingNotSupportedMixin, + _raise_for_params, + _routing_enabled, + process_routing, + MetadataRouter, + MethodMapping ) from ..utils.validation import ( _check_sample_weight, @@ -104,7 +107,7 @@ def _check_X(self, X): # AdaBoost*.estimator is not validated yet prefer_skip_nested_validation=False ) - def fit(self, X, y, sample_weight=None): + def fit(self, X, y, *, sample_weight=None, **fit_params): """Build a boosted classifier/regressor from the training set (X, y). Parameters @@ -120,12 +123,24 @@ def fit(self, X, y, sample_weight=None): Sample weights. If None, the sample weights are initialized to 1 / n_samples. + **fit_params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Returns ------- self : object Fitted estimator. """ - _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight) + _raise_for_params(fit_params, self, "fit") + X, y = self._validate_data( X, y, @@ -136,11 +151,30 @@ def fit(self, X, y, sample_weight=None): y_numeric=is_regressor(self), ) - sample_weight = _check_sample_weight( - sample_weight, X, np.float64, copy=True, only_non_negative=True - ) - sample_weight /= sample_weight.sum() + # sample weight should always be defined and thus is part of + # the metadata fit_params. + if sample_weight is None and _routing_enabled() and "sample_weight" in fit_params: + sample_weight = _check_sample_weight( + fit_params["sample_weight"], X, np.float64, copy=True, only_non_negative=True + ) + else: + sample_weight = _check_sample_weight( + sample_weight, X, np.float64, copy=True, only_non_negative=True + ) + sample_weight /= sample_weight.sum() + fit_params["sample_weight"] = sample_weight + + if _routing_enabled(): + routed_params = process_routing(self, "fit", **fit_params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(fit=fit_params) + if "sample_weight" in fit_params: + routed_params.estimator.fit["sample_weight"] = fit_params[ + "sample_weight" + ] + # Check parameters self._validate_estimator() @@ -200,7 +234,7 @@ def fit(self, X, y, sample_weight=None): return self @abstractmethod - def _boost(self, iboost, X, y, sample_weight, random_state): + def _boost(self, iboost, X, y, random_state, fit_params): """Implement a single boost. Warning: This method needs to be overridden by subclasses. @@ -217,11 +251,13 @@ def _boost(self, iboost, X, y, sample_weight, random_state): y : array-like of shape (n_samples,) The target values (class labels). - sample_weight : array-like of shape (n_samples,) - The current sample weights. - random_state : RandomState The current random number generator + + fit_params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 Returns ------- @@ -239,7 +275,7 @@ def _boost(self, iboost, X, y, sample_weight, random_state): """ pass - def staged_score(self, X, y, sample_weight=None): + def staged_score(self, X, y, *, sample_weight=None, **score_params): """Return staged scores for X, y. This generator method yields the ensemble score after each iteration of @@ -257,6 +293,17 @@ def staged_score(self, X, y, sample_weight=None): sample_weight : array-like of shape (n_samples,), default=None Sample weights. + + **fit_params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. Yields ------ @@ -309,6 +356,27 @@ def feature_importances_(self): "since estimator does not have a " "feature_importances_ attribute" ) from e + + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.6 + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__) + router.add( + estimator=self.estimator, + method_mapping=MethodMapping().add(callee="fit", caller="fit"), + ) + return router def _samme_proba(estimator, n_classes, X): @@ -333,7 +401,7 @@ def _samme_proba(estimator, n_classes, X): class AdaBoostClassifier( - _RoutingNotSupportedMixin, ClassifierMixin, BaseWeightBoosting + ClassifierMixin, BaseWeightBoosting ): """An AdaBoost classifier. @@ -547,7 +615,7 @@ def _validate_estimator(self): # "algorithm" parameter in version 1.6. Thus, a distinguishing function is # no longer needed. (Or adjust code here, if another algorithm, shall be # used instead of SAMME.R.) - def _boost(self, iboost, X, y, sample_weight, random_state): + def _boost(self, iboost, X, y, random_state, fit_params): """Implement a single boost. Perform a single boost according to the real multi-class SAMME.R @@ -565,13 +633,15 @@ def _boost(self, iboost, X, y, sample_weight, random_state): y : array-like of shape (n_samples,) The target values (class labels). - sample_weight : array-like of shape (n_samples,) - The current sample weights. - random_state : RandomState instance The RandomState instance used if the base estimator accepts a `random_state` attribute. + fit_params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + Returns ------- sample_weight : array-like of shape (n_samples,) or None @@ -594,12 +664,13 @@ def _boost(self, iboost, X, y, sample_weight, random_state): # TODO(1.6): Remove function. The `_boost_real` function won't be used any # longer, because the SAMME.R algorithm will be deprecated in 1.6. - def _boost_real(self, iboost, X, y, sample_weight, random_state): + def _boost_real(self, iboost, X, y, random_state, fit_params): """Implement a single boost using the SAMME.R real algorithm.""" estimator = self._make_estimator(random_state=random_state) + sample_weight = fit_params['sample_weight'] + estimator.fit(X, y, **fit_params) - estimator.fit(X, y, sample_weight=sample_weight) - + # XXX: how do we pass in score params here? y_predict_proba = estimator.predict_proba(X) if iboost == 0: @@ -653,11 +724,11 @@ def _boost_real(self, iboost, X, y, sample_weight, random_state): return sample_weight, 1.0, estimator_error - def _boost_discrete(self, iboost, X, y, sample_weight, random_state): + def _boost_discrete(self, iboost, X, y, random_state, fit_params): """Implement a single boost using the SAMME discrete algorithm.""" estimator = self._make_estimator(random_state=random_state) - - estimator.fit(X, y, sample_weight=sample_weight) + sample_weight = fit_params['sample_weight'] + estimator.fit(X, y, **fit_params) y_predict = estimator.predict(X) @@ -703,7 +774,7 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state): return sample_weight, estimator_weight, estimator_error - def predict(self, X): + def predict(self, X, **predict_params): """Predict classes for X. The predicted class of an input sample is computed as the weighted mean @@ -714,13 +785,16 @@ def predict(self, X): X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + + **predict_params : dict of str -> obj + Parameters to the `predict` called by the estimator. Returns ------- y : ndarray of shape (n_samples,) The predicted classes. """ - pred = self.decision_function(X) + pred = self.decision_function(X, **predict_params) if self.n_classes_ == 2: return self.classes_.take(pred > 0, axis=0) @@ -761,7 +835,7 @@ def staged_predict(self, X): for pred in self.staged_decision_function(X): yield np.array(classes.take(np.argmax(pred, axis=1), axis=0)) - def decision_function(self, X): + def decision_function(self, X, **predict_params): """Compute the decision function of ``X``. Parameters @@ -770,6 +844,9 @@ def decision_function(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **predict_params : dict of str -> obj + Parameters to the `predict` called by the estimator. + Returns ------- score : ndarray of shape of (n_samples, k) @@ -795,7 +872,7 @@ class in ``classes_``, respectively. else: # self.algorithm == "SAMME" pred = sum( np.where( - (estimator.predict(X) == classes).T, + (estimator.predict(X, **predict_params) == classes).T, w, -1 / (n_classes - 1) * w, ) @@ -963,8 +1040,33 @@ def predict_log_proba(self, X): """ return np.log(self.predict_proba(X)) + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.6 + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__) + router.add( + estimator=self.estimator, + method_mapping=MethodMapping().add(callee="fit", caller="fit"), + method_mapping=MethodMapping().add(callee="score", caller="score"), + method_mapping=MethodMapping().add(callee="predict", caller="predict"), + method_mapping=MethodMapping().add(callee="predict_proba", caller="predict_proba"), + method_mapping=MethodMapping().add(callee="predict_log_proba", caller="predict_log_proba"), + method_mapping=MethodMapping().add(callee="decision_function", caller="decision_function"), + ) + return router -class AdaBoostRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseWeightBoosting): +class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting): """An AdaBoost regressor. An AdaBoost [1] regressor is a meta-estimator that begins by fitting a @@ -1272,3 +1374,26 @@ def staged_predict(self, X): for i, _ in enumerate(self.estimators_, 1): yield self._get_median_predict(X, limit=i) + + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.6 + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__) + router.add( + estimator=self.estimator, + method_mapping=MethodMapping().add(callee="fit", caller="fit"), + method_mapping=MethodMapping().add(callee="score", caller="score"), + method_mapping=MethodMapping().add(callee="predict", caller="predict"), + ) + return router \ No newline at end of file diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 9aca241521ca0..3607be2913dea 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -407,6 +407,40 @@ def enable_slep006(): ], "method_mapping": {"fit": ["fit", "score"]}, }, + { + "metaestimator": AdaBoostClassifier, + "estimator_name": "estimator", + "estimator": "classifier", + "X": X, + "y": y, + "preserves_metadata": True, + "estimator_routing_methods": [ + "fit", + "predict", + "predict_proba", + "predict_log_proba", + "decision_function", + "score", + ], + "method_mapping": {"fit": ["fit", "score"]}, + }, + { + "metaestimator": AdaBoostRegressor, + "estimator_name": "estimator", + "estimator": "regressor", + "X": X, + "y": y, + "preserves_metadata": True, + "estimator_routing_methods": [ + "fit", + "predict", + "predict_proba", + "predict_log_proba", + "decision_function", + "score", + ], + "method_mapping": {"fit": ["fit", "score"]}, + }, ] """List containing all metaestimators to be tested and their settings @@ -446,8 +480,6 @@ def enable_slep006(): METAESTIMATOR_IDS = [str(row["metaestimator"].__name__) for row in METAESTIMATORS] UNSUPPORTED_ESTIMATORS = [ - AdaBoostClassifier(), - AdaBoostRegressor(), RFE(ConsumingClassifier()), RFECV(ConsumingClassifier()), SequentialFeatureSelector(ConsumingClassifier()), From 911974cf62f6d5d38b40cafb365237e11d98e8d4 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 11 Jul 2024 19:37:07 -0400 Subject: [PATCH 2/3] Almost there Signed-off-by: Adam Li --- sklearn/ensemble/_weight_boosting.py | 320 ++++++++++++++++++++------- 1 file changed, 246 insertions(+), 74 deletions(-) diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 65b4895e57683..d5df21289ff70 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -35,15 +35,15 @@ ) from ..metrics import accuracy_score, r2_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import _safe_indexing, check_random_state, Bunch +from ..utils import Bunch, _safe_indexing, check_random_state from ..utils._param_validation import HasMethods, Interval, StrOptions from ..utils.extmath import softmax, stable_cumsum from ..utils.metadata_routing import ( + MetadataRouter, + MethodMapping, _raise_for_params, _routing_enabled, process_routing, - MetadataRouter, - MethodMapping ) from ..utils.validation import ( _check_sample_weight, @@ -125,9 +125,9 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): **fit_params : dict Parameters to pass to the underlying estimators. - + .. versionadded:: 1.6 - + Only available if `enable_metadata_routing=True`, which can be set by using ``sklearn.set_config(enable_metadata_routing=True)``. @@ -153,9 +153,17 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): # sample weight should always be defined and thus is part of # the metadata fit_params. - if sample_weight is None and _routing_enabled() and "sample_weight" in fit_params: + if ( + sample_weight is None + and _routing_enabled() + and "sample_weight" in fit_params + ): sample_weight = _check_sample_weight( - fit_params["sample_weight"], X, np.float64, copy=True, only_non_negative=True + fit_params["sample_weight"], + X, + np.float64, + copy=True, + only_non_negative=True, ) else: sample_weight = _check_sample_weight( @@ -164,7 +172,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): sample_weight /= sample_weight.sum() fit_params["sample_weight"] = sample_weight - + if _routing_enabled(): routed_params = process_routing(self, "fit", **fit_params) else: @@ -174,7 +182,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): routed_params.estimator.fit["sample_weight"] = fit_params[ "sample_weight" ] - + # Check parameters self._validate_estimator() @@ -186,22 +194,26 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): # Initialization of the random number instance that will be used to # generate a seed at each iteration random_state = check_random_state(self.random_state) - epsilon = np.finfo(sample_weight.dtype).eps + epsilon = np.finfo(routed_params.estimator.fit["sample_weight"].dtype).eps - zero_weight_mask = sample_weight == 0.0 + zero_weight_mask = routed_params.estimator.fit["sample_weight"] == 0.0 for iboost in range(self.n_estimators): # avoid extremely small sample weight, for details see issue #20320 - sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None) + routed_params.estimator.fit["sample_weight"] = np.clip( + routed_params.estimator.fit["sample_weight"], a_min=epsilon, a_max=None + ) # do not clip sample weights that were exactly zero originally - sample_weight[zero_weight_mask] = 0.0 + routed_params.estimator.fit["sample_weight"][zero_weight_mask] = 0.0 # Boosting step - sample_weight, estimator_weight, estimator_error = self._boost( - iboost, X, y, sample_weight, random_state - ) + ( + routed_params.estimator.fit["sample_weight"], + estimator_weight, + estimator_error, + ) = self._boost(iboost, X, y, random_state, fit_params) # Early termination - if sample_weight is None: + if routed_params.estimator.fit["sample_weight"] is None: break self.estimator_weights_[iboost] = estimator_weight self.estimator_errors_[iboost] = estimator_error @@ -210,7 +222,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): if estimator_error == 0: break - sample_weight_sum = np.sum(sample_weight) + sample_weight_sum = np.sum(routed_params.estimator.fit["sample_weight"]) if not np.isfinite(sample_weight_sum): warnings.warn( @@ -229,7 +241,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): if iboost < self.n_estimators - 1: # Normalize - sample_weight /= sample_weight_sum + routed_params.estimator.fit["sample_weight"] /= sample_weight_sum return self @@ -253,10 +265,10 @@ def _boost(self, iboost, X, y, random_state, fit_params): random_state : RandomState The current random number generator - + fit_params : dict Parameters to pass to the underlying estimators. - + .. versionadded:: 1.6 Returns @@ -275,7 +287,7 @@ def _boost(self, iboost, X, y, random_state, fit_params): """ pass - def staged_score(self, X, y, *, sample_weight=None, **score_params): + def staged_score(self, X, y, *, sample_weight=None, **params): """Return staged scores for X, y. This generator method yields the ensemble score after each iteration of @@ -292,9 +304,9 @@ def staged_score(self, X, y, *, sample_weight=None, **score_params): Labels for X. sample_weight : array-like of shape (n_samples,), default=None - Sample weights. - - **fit_params : dict + Sample weights, which are passed to scorer. + + **params : dict Parameters to pass to the underlying estimators. .. versionadded:: 1.6 @@ -311,11 +323,22 @@ def staged_score(self, X, y, *, sample_weight=None, **score_params): """ X = self._check_X(X) - for y_pred in self.staged_predict(X): + _raise_for_params(params, self, "staged_score") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_score", **params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(staged_predict=params) + routed_params.scorer = Bunch(score={}) + if sample_weight is not None: + routed_params.scorer.score["sample_weight"] = sample_weight + + for y_pred in self.staged_predict(X, **routed_params.estimator.staged_predict): if is_classifier(self): - yield accuracy_score(y, y_pred, sample_weight=sample_weight) + yield accuracy_score(y, y_pred, **routed_params.scorer.score) else: - yield r2_score(y, y_pred, sample_weight=sample_weight) + yield r2_score(y, y_pred, **routed_params.scorer.score) @property def feature_importances_(self): @@ -356,7 +379,7 @@ def feature_importances_(self): "since estimator does not have a " "feature_importances_ attribute" ) from e - + def get_metadata_routing(self): """Get metadata routing of this object. @@ -400,9 +423,7 @@ def _samme_proba(estimator, n_classes, X): ) -class AdaBoostClassifier( - ClassifierMixin, BaseWeightBoosting -): +class AdaBoostClassifier(ClassifierMixin, BaseWeightBoosting): """An AdaBoost classifier. An AdaBoost [1]_ classifier is a meta-estimator that begins by fitting a @@ -657,17 +678,17 @@ def _boost(self, iboost, X, y, random_state, fit_params): If None then boosting has terminated early. """ if self.algorithm == "SAMME.R": - return self._boost_real(iboost, X, y, sample_weight, random_state) + return self._boost_real(iboost, X, y, random_state, fit_params) else: # elif self.algorithm == "SAMME": - return self._boost_discrete(iboost, X, y, sample_weight, random_state) + return self._boost_discrete(iboost, X, y, random_state, fit_params) # TODO(1.6): Remove function. The `_boost_real` function won't be used any # longer, because the SAMME.R algorithm will be deprecated in 1.6. def _boost_real(self, iboost, X, y, random_state, fit_params): """Implement a single boost using the SAMME.R real algorithm.""" estimator = self._make_estimator(random_state=random_state) - sample_weight = fit_params['sample_weight'] + sample_weight = fit_params["sample_weight"] estimator.fit(X, y, **fit_params) # XXX: how do we pass in score params here? @@ -727,7 +748,7 @@ def _boost_real(self, iboost, X, y, random_state, fit_params): def _boost_discrete(self, iboost, X, y, random_state, fit_params): """Implement a single boost using the SAMME discrete algorithm.""" estimator = self._make_estimator(random_state=random_state) - sample_weight = fit_params['sample_weight'] + sample_weight = fit_params["sample_weight"] estimator.fit(X, y, **fit_params) y_predict = estimator.predict(X) @@ -774,7 +795,7 @@ def _boost_discrete(self, iboost, X, y, random_state, fit_params): return sample_weight, estimator_weight, estimator_error - def predict(self, X, **predict_params): + def predict(self, X, **params): """Predict classes for X. The predicted class of an input sample is computed as the weighted mean @@ -785,16 +806,30 @@ def predict(self, X, **predict_params): X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. - - **predict_params : dict of str -> obj - Parameters to the `predict` called by the estimator. + + **params : dict of str -> obj + Parameters to pass to the `predict` method of the underlying estimator. + + .. versionadded:: 1.6 Returns ------- y : ndarray of shape (n_samples,) The predicted classes. """ - pred = self.decision_function(X, **predict_params) + _raise_for_params(params, self, "predict") + + if _routing_enabled(): + routed_params = process_routing( + self, + "predict", + **params, + ) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}) + + pred = self.decision_function(X, **routed_params.estimator.predict) if self.n_classes_ == 2: return self.classes_.take(pred > 0, axis=0) @@ -835,7 +870,7 @@ def staged_predict(self, X): for pred in self.staged_decision_function(X): yield np.array(classes.take(np.argmax(pred, axis=1), axis=0)) - def decision_function(self, X, **predict_params): + def decision_function(self, X, **params): """Compute the decision function of ``X``. Parameters @@ -844,8 +879,16 @@ def decision_function(self, X, **predict_params): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. - **predict_params : dict of str -> obj - Parameters to the `predict` called by the estimator. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``decision_function`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. Returns ------- @@ -863,6 +906,13 @@ class in ``classes_``, respectively. n_classes = self.n_classes_ classes = self.classes_[:, np.newaxis] + _raise_for_params(params, self, "decision_function") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "decision_function", **params) + else: + routed_params = Bunch(estimator=Bunch(decision_function={})) + # TODO(1.6): Remove, because "algorithm" param will be deprecated in 1.6 if self.algorithm == "SAMME.R": # The weights are all 1. for SAMME.R @@ -872,7 +922,12 @@ class in ``classes_``, respectively. else: # self.algorithm == "SAMME" pred = sum( np.where( - (estimator.predict(X, **predict_params) == classes).T, + ( + estimator.predict( + X, **routed_params.estimator.decision_function + ) + == classes + ).T, w, -1 / (n_classes - 1) * w, ) @@ -885,7 +940,7 @@ class in ``classes_``, respectively. return pred.sum(axis=1) return pred - def staged_decision_function(self, X): + def staged_decision_function(self, X, **params): """Compute decision function of ``X`` for each boosting iteration. This method allows monitoring (i.e. determine error on testing set) @@ -897,6 +952,17 @@ def staged_decision_function(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``predict`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Yields ------ score : generator of ndarray of shape (n_samples, k) @@ -910,6 +976,13 @@ class in ``classes_``, respectively. check_is_fitted(self) X = self._check_X(X) + _raise_for_params(params, self, "staged_decision_function") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_decision_function", **params) + else: + routed_params = Bunch(estimator=Bunch(predict={})) + n_classes = self.n_classes_ classes = self.classes_[:, np.newaxis] pred = None @@ -925,7 +998,12 @@ class in ``classes_``, respectively. current_pred = _samme_proba(estimator, n_classes, X) else: # elif self.algorithm == "SAMME": current_pred = np.where( - (estimator.predict(X) == classes).T, + ( + estimator.predict( + X, **routed_params.estimator.staged_decision_function + ) + == classes + ).T, weight, -1 / (n_classes - 1) * weight, ) @@ -961,7 +1039,7 @@ def _compute_proba_from_decision(decision, n_classes): decision /= n_classes - 1 return softmax(decision, copy=False) - def predict_proba(self, X): + def predict_proba(self, X, **params): """Predict class probabilities for X. The predicted class probabilities of an input sample is computed as @@ -974,6 +1052,17 @@ def predict_proba(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``predict_proba`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Returns ------- p : ndarray of shape (n_samples, n_classes) @@ -986,10 +1075,17 @@ def predict_proba(self, X): if n_classes == 1: return np.ones((_num_samples(X), 1)) - decision = self.decision_function(X) + _raise_for_params(params, self, "predict_proba") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "predict_proba", **params) + else: + routed_params = Bunch(estimator=Bunch(predict_proba={})) + + decision = self.decision_function(X, **routed_params.estimator.predict_proba) return self._compute_proba_from_decision(decision, n_classes) - def staged_predict_proba(self, X): + def staged_predict_proba(self, X, **params): """Predict class probabilities for X. The predicted class probabilities of an input sample is computed as @@ -1013,13 +1109,21 @@ def staged_predict_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the :term:`classes_` attribute. """ + _raise_for_params(params, self, "staged_predict_proba") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_predict_proba", **params) + else: + routed_params = Bunch(estimator=Bunch(staged_predict_proba={})) n_classes = self.n_classes_ - for decision in self.staged_decision_function(X): + for decision in self.staged_decision_function( + X, **routed_params.estimator.staged_predict + ): yield self._compute_proba_from_decision(decision, n_classes) - def predict_log_proba(self, X): + def predict_log_proba(self, X, **params): """Predict class log-probabilities for X. The predicted class log-probabilities of an input sample is computed as @@ -1038,7 +1142,17 @@ def predict_log_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the :term:`classes_` attribute. """ - return np.log(self.predict_proba(X)) + _raise_for_params(params, self, "predict_log_proba") + + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "predict_log_proba", **params) + else: + routed_params = Bunch(estimator=Bunch(predict_log_proba={})) + + return np.log( + self.predict_proba(X, **routed_params.estimator.predict_log_proba) + ) def get_metadata_routing(self): """Get metadata routing of this object. @@ -1055,17 +1169,29 @@ def get_metadata_routing(self): routing information. """ router = MetadataRouter(owner=self.__class__.__name__) + router.add_self_request(self) router.add( estimator=self.estimator, - method_mapping=MethodMapping().add(callee="fit", caller="fit"), - method_mapping=MethodMapping().add(callee="score", caller="score"), - method_mapping=MethodMapping().add(callee="predict", caller="predict"), - method_mapping=MethodMapping().add(callee="predict_proba", caller="predict_proba"), - method_mapping=MethodMapping().add(callee="predict_log_proba", caller="predict_log_proba"), - method_mapping=MethodMapping().add(callee="decision_function", caller="decision_function"), + method_mapping=( + MethodMapping() + .add(callee="fit", caller="fit") + .add(callee="score", caller="fit") + .add(callee="predict", caller="predict") + .add(callee="predict_proba", caller="predict_proba") + .add(callee="decision_function", caller="decision_function") + .add(callee="predict_log_proba", caller="predict_log_proba") + .add(callee="score", caller="score") + .add(callee="staged_score", caller="staged_score") + .add(callee="staged_predict", caller="staged_predict") + .add(callee="staged_predict_proba", caller="staged_predict_proba") + .add( + callee="staged_decision_function", caller="staged_decision_function" + ) + ), ) return router + class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting): """An AdaBoost regressor. @@ -1212,7 +1338,7 @@ def _validate_estimator(self): """Check the estimator and set the estimator_ attribute.""" super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3)) - def _boost(self, iboost, X, y, sample_weight, random_state): + def _boost(self, iboost, X, y, random_state, params): """Implement a single boost for regression Perform a single boost according to the AdaBoost.R2 algorithm and @@ -1230,9 +1356,6 @@ def _boost(self, iboost, X, y, sample_weight, random_state): The target values (class labels in classification, real numbers in regression). - sample_weight : array-like of shape (n_samples,) - The current sample weights. - random_state : RandomState The RandomState instance used if the base estimator accepts a `random_state` attribute. @@ -1240,6 +1363,9 @@ def _boost(self, iboost, X, y, sample_weight, random_state): learner. replacement. + params : dict + Parameters to pass to the underlying estimators. + Returns ------- sample_weight : array-like of shape (n_samples,) or None @@ -1255,6 +1381,7 @@ def _boost(self, iboost, X, y, sample_weight, random_state): If None then boosting has terminated early. """ estimator = self._make_estimator(random_state=random_state) + sample_weight = params["sample_weight"] # Weighted sampling of the training set with replacement bootstrap_idx = random_state.choice( @@ -1269,7 +1396,7 @@ def _boost(self, iboost, X, y, sample_weight, random_state): X_ = _safe_indexing(X, bootstrap_idx) y_ = _safe_indexing(y, bootstrap_idx) estimator.fit(X_, y_) - y_predict = estimator.predict(X) + y_predict = estimator.predict(X, **params) error_vect = np.abs(y_predict - y) sample_mask = sample_weight > 0 @@ -1310,9 +1437,11 @@ def _boost(self, iboost, X, y, sample_weight, random_state): return sample_weight, estimator_weight, estimator_error - def _get_median_predict(self, X, limit): + def _get_median_predict(self, X, limit, **predict_params): # Evaluate predictions of all estimators - predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T + predictions = np.array( + [est.predict(X, **predict_params) for est in self.estimators_[:limit]] + ).T # Sort the predictions sorted_idx = np.argsort(predictions, axis=1) @@ -1327,7 +1456,7 @@ def _get_median_predict(self, X, limit): # Return median predictions return predictions[np.arange(_num_samples(X)), median_estimators] - def predict(self, X): + def predict(self, X, **params): """Predict regression value for X. The predicted regression value of an input sample is computed @@ -1339,6 +1468,9 @@ def predict(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> obj + Parameters to the `predict` function of the estimator. + Returns ------- y : ndarray of shape (n_samples,) @@ -1347,9 +1479,22 @@ def predict(self, X): check_is_fitted(self) X = self._check_X(X) - return self._get_median_predict(X, len(self.estimators_)) + _raise_for_params(params, self, "predict") + if _routing_enabled(): + routed_params = process_routing( + self, + "predict", + **params, + ) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}) + + return self._get_median_predict( + X, len(self.estimators_), **routed_params.estimator.predict + ) - def staged_predict(self, X): + def staged_predict(self, X, **params): """Return staged predictions for X. The predicted regression value of an input sample is computed @@ -1364,6 +1509,17 @@ def staged_predict(self, X): X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``predict`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Yields ------ y : generator of ndarray of shape (n_samples,) @@ -1372,8 +1528,17 @@ def staged_predict(self, X): check_is_fitted(self) X = self._check_X(X) + _raise_for_params(params, self, "staged_predict") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_predict", **params) + else: + routed_params = Bunch(estimator=Bunch(staged_predict={})) + for i, _ in enumerate(self.estimators_, 1): - yield self._get_median_predict(X, limit=i) + yield self._get_median_predict( + X, limit=i, **routed_params.estimator.staged_predict + ) def get_metadata_routing(self): """Get metadata routing of this object. @@ -1390,10 +1555,17 @@ def get_metadata_routing(self): routing information. """ router = MetadataRouter(owner=self.__class__.__name__) + router.add_self_request(self) router.add( estimator=self.estimator, - method_mapping=MethodMapping().add(callee="fit", caller="fit"), - method_mapping=MethodMapping().add(callee="score", caller="score"), - method_mapping=MethodMapping().add(callee="predict", caller="predict"), + method_mapping=( + MethodMapping() + .add(callee="fit", caller="fit") + .add(callee="score", caller="fit") + .add(callee="predict", caller="predict") + .add(callee="score", caller="score") + .add(callee="staged_predict", caller="staged_predict") + .add(callee="staged_score", caller="staged_score") + ), ) - return router \ No newline at end of file + return router From b2b44b9cb5b45ee3c99653d6a6c37b929fe769bd Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 11 Jul 2024 20:28:47 -0400 Subject: [PATCH 3/3] WIP Signed-off-by: Adam Li --- doc/whats_new/v1.6.rst | 2 +- sklearn/ensemble/_weight_boosting.py | 157 +++++++++++++++------------ 2 files changed, 87 insertions(+), 72 deletions(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 3699b498c665d..f0046ac9e0499 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -97,7 +97,7 @@ more details. - |Feature| :class:`ensemble.AdaBoostClassifier` and :class:`ensemble.AdaBoostRegressor` now supports metadata routing. - :pr:`28494` by :user:`Adam Li `. + :pr:`29472` by :user:`Adam Li `. Dropping support for building with setuptools --------------------------------------------- diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index d5df21289ff70..9914e90e739e1 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -107,7 +107,7 @@ def _check_X(self, X): # AdaBoost*.estimator is not validated yet prefer_skip_nested_validation=False ) - def fit(self, X, y, *, sample_weight=None, **fit_params): + def fit(self, X, y, *, sample_weight=None, **params): """Build a boosted classifier/regressor from the training set (X, y). Parameters @@ -123,7 +123,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): Sample weights. If None, the sample weights are initialized to 1 / n_samples. - **fit_params : dict + **params : dict Parameters to pass to the underlying estimators. .. versionadded:: 1.6 @@ -139,7 +139,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): self : object Fitted estimator. """ - _raise_for_params(fit_params, self, "fit") + _raise_for_params(params, self, "fit") X, y = self._validate_data( X, @@ -152,14 +152,10 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): ) # sample weight should always be defined and thus is part of - # the metadata fit_params. - if ( - sample_weight is None - and _routing_enabled() - and "sample_weight" in fit_params - ): + # the metadata params. + if sample_weight is None and _routing_enabled() and "sample_weight" in params: sample_weight = _check_sample_weight( - fit_params["sample_weight"], + params["sample_weight"], X, np.float64, copy=True, @@ -171,17 +167,15 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): ) sample_weight /= sample_weight.sum() - fit_params["sample_weight"] = sample_weight + params["sample_weight"] = sample_weight if _routing_enabled(): - routed_params = process_routing(self, "fit", **fit_params) + routed_params = process_routing(self, "fit", **params) else: routed_params = Bunch() - routed_params.estimator = Bunch(fit=fit_params) - if "sample_weight" in fit_params: - routed_params.estimator.fit["sample_weight"] = fit_params[ - "sample_weight" - ] + routed_params.estimator = Bunch(fit=params) + if "sample_weight" in params: + routed_params.estimator.fit["sample_weight"] = params["sample_weight"] # Check parameters self._validate_estimator() @@ -210,7 +204,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): routed_params.estimator.fit["sample_weight"], estimator_weight, estimator_error, - ) = self._boost(iboost, X, y, random_state, fit_params) + ) = self._boost(iboost, X, y, random_state, params) # Early termination if routed_params.estimator.fit["sample_weight"] is None: @@ -246,7 +240,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params): return self @abstractmethod - def _boost(self, iboost, X, y, random_state, fit_params): + def _boost(self, iboost, X, y, random_state, params): """Implement a single boost. Warning: This method needs to be overridden by subclasses. @@ -266,7 +260,7 @@ def _boost(self, iboost, X, y, random_state, fit_params): random_state : RandomState The current random number generator - fit_params : dict + params : dict Parameters to pass to the underlying estimators. .. versionadded:: 1.6 @@ -334,11 +328,19 @@ def staged_score(self, X, y, *, sample_weight=None, **params): if sample_weight is not None: routed_params.scorer.score["sample_weight"] = sample_weight + scorer = self._get_scorer() for y_pred in self.staged_predict(X, **routed_params.estimator.staged_predict): if is_classifier(self): - yield accuracy_score(y, y_pred, **routed_params.scorer.score) + yield scorer(y, y_pred, **routed_params.scorer.score) else: - yield r2_score(y, y_pred, **routed_params.scorer.score) + yield scorer(y, y_pred, **routed_params.scorer.score) + + def _get_scorer(self): + """Return the default scorer for the estimator.""" + if is_classifier(self): + return accuracy_score + else: + return r2_score @property def feature_importances_(self): @@ -397,12 +399,20 @@ def get_metadata_routing(self): router = MetadataRouter(owner=self.__class__.__name__) router.add( estimator=self.estimator, - method_mapping=MethodMapping().add(callee="fit", caller="fit"), + method_mapping=( + MethodMapping() + .add(caller="fit", callee="fit") + .add(caller="staged_score", callee="staged_predict") + ), + ) + router.add( + self._get_scorer(), + method_mapping=MethodMapping().add(caller="staged_score", callee="score"), ) return router -def _samme_proba(estimator, n_classes, X): +def _samme_proba(estimator, n_classes, X, params): """Calculate algorithm 4, step 2, equation c) of Zhu et al [1]. References @@ -410,7 +420,7 @@ def _samme_proba(estimator, n_classes, X): .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009. """ - proba = estimator.predict_proba(X) + proba = estimator.predict_proba(X, **params) # Displace zero probabilities so the log is defined. # Also fix negative elements which may occur with @@ -636,7 +646,7 @@ def _validate_estimator(self): # "algorithm" parameter in version 1.6. Thus, a distinguishing function is # no longer needed. (Or adjust code here, if another algorithm, shall be # used instead of SAMME.R.) - def _boost(self, iboost, X, y, random_state, fit_params): + def _boost(self, iboost, X, y, random_state, params): """Implement a single boost. Perform a single boost according to the real multi-class SAMME.R @@ -658,7 +668,7 @@ def _boost(self, iboost, X, y, random_state, fit_params): The RandomState instance used if the base estimator accepts a `random_state` attribute. - fit_params : dict + params : dict Parameters to pass to the underlying estimators. .. versionadded:: 1.6 @@ -678,18 +688,18 @@ def _boost(self, iboost, X, y, random_state, fit_params): If None then boosting has terminated early. """ if self.algorithm == "SAMME.R": - return self._boost_real(iboost, X, y, random_state, fit_params) + return self._boost_real(iboost, X, y, random_state, params) else: # elif self.algorithm == "SAMME": - return self._boost_discrete(iboost, X, y, random_state, fit_params) + return self._boost_discrete(iboost, X, y, random_state, params) # TODO(1.6): Remove function. The `_boost_real` function won't be used any # longer, because the SAMME.R algorithm will be deprecated in 1.6. - def _boost_real(self, iboost, X, y, random_state, fit_params): + def _boost_real(self, iboost, X, y, random_state, params): """Implement a single boost using the SAMME.R real algorithm.""" estimator = self._make_estimator(random_state=random_state) - sample_weight = fit_params["sample_weight"] - estimator.fit(X, y, **fit_params) + sample_weight = params["sample_weight"] + estimator.fit(X, y, **params) # XXX: how do we pass in score params here? y_predict_proba = estimator.predict_proba(X) @@ -745,11 +755,11 @@ def _boost_real(self, iboost, X, y, random_state, fit_params): return sample_weight, 1.0, estimator_error - def _boost_discrete(self, iboost, X, y, random_state, fit_params): + def _boost_discrete(self, iboost, X, y, random_state, params): """Implement a single boost using the SAMME discrete algorithm.""" estimator = self._make_estimator(random_state=random_state) - sample_weight = fit_params["sample_weight"] - estimator.fit(X, y, **fit_params) + sample_weight = params["sample_weight"] + estimator.fit(X, y, **params) y_predict = estimator.predict(X) @@ -911,21 +921,23 @@ class in ``classes_``, respectively. # metadata routing is enabled. routed_params = process_routing(self, "decision_function", **params) else: - routed_params = Bunch(estimator=Bunch(decision_function={})) + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}, predict_proba={}) # TODO(1.6): Remove, because "algorithm" param will be deprecated in 1.6 if self.algorithm == "SAMME.R": # The weights are all 1. for SAMME.R pred = sum( - _samme_proba(estimator, n_classes, X) for estimator in self.estimators_ + _samme_proba( + estimator, n_classes, X, routed_params.estimator.predict_proba + ) + for estimator in self.estimators_ ) else: # self.algorithm == "SAMME" pred = sum( np.where( ( - estimator.predict( - X, **routed_params.estimator.decision_function - ) + estimator.predict(X, **routed_params.estimator.predict) == classes ).T, w, @@ -981,7 +993,8 @@ class in ``classes_``, respectively. # metadata routing is enabled. routed_params = process_routing(self, "staged_decision_function", **params) else: - routed_params = Bunch(estimator=Bunch(predict={})) + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}, predict_proba={}) n_classes = self.n_classes_ classes = self.classes_[:, np.newaxis] @@ -995,13 +1008,13 @@ class in ``classes_``, respectively. # 1.6 if self.algorithm == "SAMME.R": # The weights are all 1. for SAMME.R - current_pred = _samme_proba(estimator, n_classes, X) + current_pred = _samme_proba( + estimator, n_classes, X, routed_params.estimator.predict_proba + ) else: # elif self.algorithm == "SAMME": current_pred = np.where( ( - estimator.predict( - X, **routed_params.estimator.staged_decision_function - ) + estimator.predict(X, **routed_params.estimator.predict) == classes ).T, weight, @@ -1080,9 +1093,9 @@ def predict_proba(self, X, **params): # metadata routing is enabled. routed_params = process_routing(self, "predict_proba", **params) else: - routed_params = Bunch(estimator=Bunch(predict_proba={})) + routed_params = Bunch(self=Bunch(decision_function={})) - decision = self.decision_function(X, **routed_params.estimator.predict_proba) + decision = self.decision_function(X, **routed_params.self.decision_function) return self._compute_proba_from_decision(decision, n_classes) def staged_predict_proba(self, X, **params): @@ -1114,12 +1127,12 @@ def staged_predict_proba(self, X, **params): # metadata routing is enabled. routed_params = process_routing(self, "staged_predict_proba", **params) else: - routed_params = Bunch(estimator=Bunch(staged_predict_proba={})) + routed_params = Bunch(self=Bunch(staged_decision_function={})) n_classes = self.n_classes_ for decision in self.staged_decision_function( - X, **routed_params.estimator.staged_predict + X, **routed_params.self.staged_decision_function ): yield self._compute_proba_from_decision(decision, n_classes) @@ -1148,11 +1161,9 @@ def predict_log_proba(self, X, **params): # metadata routing is enabled. routed_params = process_routing(self, "predict_log_proba", **params) else: - routed_params = Bunch(estimator=Bunch(predict_log_proba={})) + routed_params = Bunch(self=Bunch(predict_proba={})) - return np.log( - self.predict_proba(X, **routed_params.estimator.predict_log_proba) - ) + return np.log(self.predict_proba(X, **routed_params.self.predict_proba)) def get_metadata_routing(self): """Get metadata routing of this object. @@ -1170,23 +1181,27 @@ def get_metadata_routing(self): """ router = MetadataRouter(owner=self.__class__.__name__) router.add_self_request(self) + router.add( + self._get_scorer(), + method_mapping=MethodMapping().add(caller="staged_score", callee="score"), + ) router.add( estimator=self.estimator, method_mapping=( MethodMapping() .add(callee="fit", caller="fit") - .add(callee="score", caller="fit") - .add(callee="predict", caller="predict") - .add(callee="predict_proba", caller="predict_proba") - .add(callee="decision_function", caller="decision_function") - .add(callee="predict_log_proba", caller="predict_log_proba") + # .add(callee="predict", caller="predict") + # .add(callee="staged_predict", caller="staged_predict") + .add(callee="predict", caller="decision_function") + .add(callee="predict_proba", caller="decision_function") + .add(callee="predict", caller="staged_decision_function") + .add(callee="predict_proba", caller="staged_decision_function") + # .add(callee="predict_proba", caller="predict_proba") + # .add(callee="decision_function", caller="predict_proba") + # .add(callee="staged_predict_proba", caller="staged_predict_proba") + # .add(callee="predict_log_proba", caller="predict_log_proba") .add(callee="score", caller="score") .add(callee="staged_score", caller="staged_score") - .add(callee="staged_predict", caller="staged_predict") - .add(callee="staged_predict_proba", caller="staged_predict_proba") - .add( - callee="staged_decision_function", caller="staged_decision_function" - ) ), ) return router @@ -1437,10 +1452,10 @@ def _boost(self, iboost, X, y, random_state, params): return sample_weight, estimator_weight, estimator_error - def _get_median_predict(self, X, limit, **predict_params): + def _get_median_predict(self, X, limit, **params): # Evaluate predictions of all estimators predictions = np.array( - [est.predict(X, **predict_params) for est in self.estimators_[:limit]] + [est.predict(X, **params) for est in self.estimators_[:limit]] ).T # Sort the predictions @@ -1560,12 +1575,12 @@ def get_metadata_routing(self): estimator=self.estimator, method_mapping=( MethodMapping() - .add(callee="fit", caller="fit") - .add(callee="score", caller="fit") - .add(callee="predict", caller="predict") - .add(callee="score", caller="score") - .add(callee="staged_predict", caller="staged_predict") - .add(callee="staged_score", caller="staged_score") + .add(caller="fit", callee="fit") + .add(caller="score", callee="fit") + .add(caller="predict", callee="predict") + .add(caller="score", callee="score") + .add(caller="staged_predict", callee="staged_predict") + .add(caller="staged_score", callee="staged_score") ), ) return router