diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 31dae6813bda5..927f94530e8e9 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -278,6 +278,8 @@ Meta-estimators and functions supporting metadata routing: - :class:`sklearn.compose.ColumnTransformer` - :class:`sklearn.compose.TransformedTargetRegressor` - :class:`sklearn.covariance.GraphicalLassoCV` +- :class:`sklearn.ensemble.AdaBoostClassifier` +- :class:`sklearn.ensemble.AdaBoostRegressor` - :class:`sklearn.ensemble.StackingClassifier` - :class:`sklearn.ensemble.StackingRegressor` - :class:`sklearn.ensemble.VotingClassifier` @@ -320,8 +322,6 @@ Meta-estimators and functions supporting metadata routing: Meta-estimators and tools not supporting metadata routing yet: -- :class:`sklearn.ensemble.AdaBoostClassifier` -- :class:`sklearn.ensemble.AdaBoostRegressor` - :class:`sklearn.feature_selection.RFE` - :class:`sklearn.feature_selection.RFECV` - :class:`sklearn.feature_selection.SequentialFeatureSelector` diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 3971f60eb5f4b..f0046ac9e0499 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -95,6 +95,10 @@ more details. for the `fit` method of its estimator and for its underlying CV splitter and scorer. :pr:`29266` by :user:`Adam Li `. +- |Feature| :class:`ensemble.AdaBoostClassifier` and :class:`ensemble.AdaBoostRegressor` + now supports metadata routing. + :pr:`29472` by :user:`Adam Li `. + Dropping support for building with setuptools --------------------------------------------- diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index e18bafb450d49..9914e90e739e1 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -35,12 +35,15 @@ ) from ..metrics import accuracy_score, r2_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import _safe_indexing, check_random_state +from ..utils import Bunch, _safe_indexing, check_random_state from ..utils._param_validation import HasMethods, Interval, StrOptions from ..utils.extmath import softmax, stable_cumsum from ..utils.metadata_routing import ( - _raise_for_unsupported_routing, - _RoutingNotSupportedMixin, + MetadataRouter, + MethodMapping, + _raise_for_params, + _routing_enabled, + process_routing, ) from ..utils.validation import ( _check_sample_weight, @@ -104,7 +107,7 @@ def _check_X(self, X): # AdaBoost*.estimator is not validated yet prefer_skip_nested_validation=False ) - def fit(self, X, y, sample_weight=None): + def fit(self, X, y, *, sample_weight=None, **params): """Build a boosted classifier/regressor from the training set (X, y). Parameters @@ -120,12 +123,24 @@ def fit(self, X, y, sample_weight=None): Sample weights. If None, the sample weights are initialized to 1 / n_samples. + **params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Returns ------- self : object Fitted estimator. """ - _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight) + _raise_for_params(params, self, "fit") + X, y = self._validate_data( X, y, @@ -136,10 +151,31 @@ def fit(self, X, y, sample_weight=None): y_numeric=is_regressor(self), ) - sample_weight = _check_sample_weight( - sample_weight, X, np.float64, copy=True, only_non_negative=True - ) + # sample weight should always be defined and thus is part of + # the metadata params. + if sample_weight is None and _routing_enabled() and "sample_weight" in params: + sample_weight = _check_sample_weight( + params["sample_weight"], + X, + np.float64, + copy=True, + only_non_negative=True, + ) + else: + sample_weight = _check_sample_weight( + sample_weight, X, np.float64, copy=True, only_non_negative=True + ) + sample_weight /= sample_weight.sum() + params["sample_weight"] = sample_weight + + if _routing_enabled(): + routed_params = process_routing(self, "fit", **params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(fit=params) + if "sample_weight" in params: + routed_params.estimator.fit["sample_weight"] = params["sample_weight"] # Check parameters self._validate_estimator() @@ -152,22 +188,26 @@ def fit(self, X, y, sample_weight=None): # Initialization of the random number instance that will be used to # generate a seed at each iteration random_state = check_random_state(self.random_state) - epsilon = np.finfo(sample_weight.dtype).eps + epsilon = np.finfo(routed_params.estimator.fit["sample_weight"].dtype).eps - zero_weight_mask = sample_weight == 0.0 + zero_weight_mask = routed_params.estimator.fit["sample_weight"] == 0.0 for iboost in range(self.n_estimators): # avoid extremely small sample weight, for details see issue #20320 - sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None) + routed_params.estimator.fit["sample_weight"] = np.clip( + routed_params.estimator.fit["sample_weight"], a_min=epsilon, a_max=None + ) # do not clip sample weights that were exactly zero originally - sample_weight[zero_weight_mask] = 0.0 + routed_params.estimator.fit["sample_weight"][zero_weight_mask] = 0.0 # Boosting step - sample_weight, estimator_weight, estimator_error = self._boost( - iboost, X, y, sample_weight, random_state - ) + ( + routed_params.estimator.fit["sample_weight"], + estimator_weight, + estimator_error, + ) = self._boost(iboost, X, y, random_state, params) # Early termination - if sample_weight is None: + if routed_params.estimator.fit["sample_weight"] is None: break self.estimator_weights_[iboost] = estimator_weight self.estimator_errors_[iboost] = estimator_error @@ -176,7 +216,7 @@ def fit(self, X, y, sample_weight=None): if estimator_error == 0: break - sample_weight_sum = np.sum(sample_weight) + sample_weight_sum = np.sum(routed_params.estimator.fit["sample_weight"]) if not np.isfinite(sample_weight_sum): warnings.warn( @@ -195,12 +235,12 @@ def fit(self, X, y, sample_weight=None): if iboost < self.n_estimators - 1: # Normalize - sample_weight /= sample_weight_sum + routed_params.estimator.fit["sample_weight"] /= sample_weight_sum return self @abstractmethod - def _boost(self, iboost, X, y, sample_weight, random_state): + def _boost(self, iboost, X, y, random_state, params): """Implement a single boost. Warning: This method needs to be overridden by subclasses. @@ -217,12 +257,14 @@ def _boost(self, iboost, X, y, sample_weight, random_state): y : array-like of shape (n_samples,) The target values (class labels). - sample_weight : array-like of shape (n_samples,) - The current sample weights. - random_state : RandomState The current random number generator + params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + Returns ------- sample_weight : array-like of shape (n_samples,) or None @@ -239,7 +281,7 @@ def _boost(self, iboost, X, y, sample_weight, random_state): """ pass - def staged_score(self, X, y, sample_weight=None): + def staged_score(self, X, y, *, sample_weight=None, **params): """Return staged scores for X, y. This generator method yields the ensemble score after each iteration of @@ -256,7 +298,18 @@ def staged_score(self, X, y, sample_weight=None): Labels for X. sample_weight : array-like of shape (n_samples,), default=None - Sample weights. + Sample weights, which are passed to scorer. + + **params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. Yields ------ @@ -264,11 +317,30 @@ def staged_score(self, X, y, sample_weight=None): """ X = self._check_X(X) - for y_pred in self.staged_predict(X): + _raise_for_params(params, self, "staged_score") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_score", **params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(staged_predict=params) + routed_params.scorer = Bunch(score={}) + if sample_weight is not None: + routed_params.scorer.score["sample_weight"] = sample_weight + + scorer = self._get_scorer() + for y_pred in self.staged_predict(X, **routed_params.estimator.staged_predict): if is_classifier(self): - yield accuracy_score(y, y_pred, sample_weight=sample_weight) + yield scorer(y, y_pred, **routed_params.scorer.score) else: - yield r2_score(y, y_pred, sample_weight=sample_weight) + yield scorer(y, y_pred, **routed_params.scorer.score) + + def _get_scorer(self): + """Return the default scorer for the estimator.""" + if is_classifier(self): + return accuracy_score + else: + return r2_score @property def feature_importances_(self): @@ -310,8 +382,37 @@ def feature_importances_(self): "feature_importances_ attribute" ) from e + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.6 -def _samme_proba(estimator, n_classes, X): + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__) + router.add( + estimator=self.estimator, + method_mapping=( + MethodMapping() + .add(caller="fit", callee="fit") + .add(caller="staged_score", callee="staged_predict") + ), + ) + router.add( + self._get_scorer(), + method_mapping=MethodMapping().add(caller="staged_score", callee="score"), + ) + return router + + +def _samme_proba(estimator, n_classes, X, params): """Calculate algorithm 4, step 2, equation c) of Zhu et al [1]. References @@ -319,7 +420,7 @@ def _samme_proba(estimator, n_classes, X): .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009. """ - proba = estimator.predict_proba(X) + proba = estimator.predict_proba(X, **params) # Displace zero probabilities so the log is defined. # Also fix negative elements which may occur with @@ -332,9 +433,7 @@ def _samme_proba(estimator, n_classes, X): ) -class AdaBoostClassifier( - _RoutingNotSupportedMixin, ClassifierMixin, BaseWeightBoosting -): +class AdaBoostClassifier(ClassifierMixin, BaseWeightBoosting): """An AdaBoost classifier. An AdaBoost [1]_ classifier is a meta-estimator that begins by fitting a @@ -547,7 +646,7 @@ def _validate_estimator(self): # "algorithm" parameter in version 1.6. Thus, a distinguishing function is # no longer needed. (Or adjust code here, if another algorithm, shall be # used instead of SAMME.R.) - def _boost(self, iboost, X, y, sample_weight, random_state): + def _boost(self, iboost, X, y, random_state, params): """Implement a single boost. Perform a single boost according to the real multi-class SAMME.R @@ -565,13 +664,15 @@ def _boost(self, iboost, X, y, sample_weight, random_state): y : array-like of shape (n_samples,) The target values (class labels). - sample_weight : array-like of shape (n_samples,) - The current sample weights. - random_state : RandomState instance The RandomState instance used if the base estimator accepts a `random_state` attribute. + params : dict + Parameters to pass to the underlying estimators. + + .. versionadded:: 1.6 + Returns ------- sample_weight : array-like of shape (n_samples,) or None @@ -587,19 +688,20 @@ def _boost(self, iboost, X, y, sample_weight, random_state): If None then boosting has terminated early. """ if self.algorithm == "SAMME.R": - return self._boost_real(iboost, X, y, sample_weight, random_state) + return self._boost_real(iboost, X, y, random_state, params) else: # elif self.algorithm == "SAMME": - return self._boost_discrete(iboost, X, y, sample_weight, random_state) + return self._boost_discrete(iboost, X, y, random_state, params) # TODO(1.6): Remove function. The `_boost_real` function won't be used any # longer, because the SAMME.R algorithm will be deprecated in 1.6. - def _boost_real(self, iboost, X, y, sample_weight, random_state): + def _boost_real(self, iboost, X, y, random_state, params): """Implement a single boost using the SAMME.R real algorithm.""" estimator = self._make_estimator(random_state=random_state) + sample_weight = params["sample_weight"] + estimator.fit(X, y, **params) - estimator.fit(X, y, sample_weight=sample_weight) - + # XXX: how do we pass in score params here? y_predict_proba = estimator.predict_proba(X) if iboost == 0: @@ -653,11 +755,11 @@ def _boost_real(self, iboost, X, y, sample_weight, random_state): return sample_weight, 1.0, estimator_error - def _boost_discrete(self, iboost, X, y, sample_weight, random_state): + def _boost_discrete(self, iboost, X, y, random_state, params): """Implement a single boost using the SAMME discrete algorithm.""" estimator = self._make_estimator(random_state=random_state) - - estimator.fit(X, y, sample_weight=sample_weight) + sample_weight = params["sample_weight"] + estimator.fit(X, y, **params) y_predict = estimator.predict(X) @@ -703,7 +805,7 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state): return sample_weight, estimator_weight, estimator_error - def predict(self, X): + def predict(self, X, **params): """Predict classes for X. The predicted class of an input sample is computed as the weighted mean @@ -715,12 +817,29 @@ def predict(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> obj + Parameters to pass to the `predict` method of the underlying estimator. + + .. versionadded:: 1.6 + Returns ------- y : ndarray of shape (n_samples,) The predicted classes. """ - pred = self.decision_function(X) + _raise_for_params(params, self, "predict") + + if _routing_enabled(): + routed_params = process_routing( + self, + "predict", + **params, + ) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}) + + pred = self.decision_function(X, **routed_params.estimator.predict) if self.n_classes_ == 2: return self.classes_.take(pred > 0, axis=0) @@ -761,7 +880,7 @@ def staged_predict(self, X): for pred in self.staged_decision_function(X): yield np.array(classes.take(np.argmax(pred, axis=1), axis=0)) - def decision_function(self, X): + def decision_function(self, X, **params): """Compute the decision function of ``X``. Parameters @@ -770,6 +889,17 @@ def decision_function(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``decision_function`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Returns ------- score : ndarray of shape of (n_samples, k) @@ -786,16 +916,30 @@ class in ``classes_``, respectively. n_classes = self.n_classes_ classes = self.classes_[:, np.newaxis] + _raise_for_params(params, self, "decision_function") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "decision_function", **params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}, predict_proba={}) + # TODO(1.6): Remove, because "algorithm" param will be deprecated in 1.6 if self.algorithm == "SAMME.R": # The weights are all 1. for SAMME.R pred = sum( - _samme_proba(estimator, n_classes, X) for estimator in self.estimators_ + _samme_proba( + estimator, n_classes, X, routed_params.estimator.predict_proba + ) + for estimator in self.estimators_ ) else: # self.algorithm == "SAMME" pred = sum( np.where( - (estimator.predict(X) == classes).T, + ( + estimator.predict(X, **routed_params.estimator.predict) + == classes + ).T, w, -1 / (n_classes - 1) * w, ) @@ -808,7 +952,7 @@ class in ``classes_``, respectively. return pred.sum(axis=1) return pred - def staged_decision_function(self, X): + def staged_decision_function(self, X, **params): """Compute decision function of ``X`` for each boosting iteration. This method allows monitoring (i.e. determine error on testing set) @@ -820,6 +964,17 @@ def staged_decision_function(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``predict`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Yields ------ score : generator of ndarray of shape (n_samples, k) @@ -833,6 +988,14 @@ class in ``classes_``, respectively. check_is_fitted(self) X = self._check_X(X) + _raise_for_params(params, self, "staged_decision_function") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_decision_function", **params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}, predict_proba={}) + n_classes = self.n_classes_ classes = self.classes_[:, np.newaxis] pred = None @@ -845,10 +1008,15 @@ class in ``classes_``, respectively. # 1.6 if self.algorithm == "SAMME.R": # The weights are all 1. for SAMME.R - current_pred = _samme_proba(estimator, n_classes, X) + current_pred = _samme_proba( + estimator, n_classes, X, routed_params.estimator.predict_proba + ) else: # elif self.algorithm == "SAMME": current_pred = np.where( - (estimator.predict(X) == classes).T, + ( + estimator.predict(X, **routed_params.estimator.predict) + == classes + ).T, weight, -1 / (n_classes - 1) * weight, ) @@ -884,7 +1052,7 @@ def _compute_proba_from_decision(decision, n_classes): decision /= n_classes - 1 return softmax(decision, copy=False) - def predict_proba(self, X): + def predict_proba(self, X, **params): """Predict class probabilities for X. The predicted class probabilities of an input sample is computed as @@ -897,6 +1065,17 @@ def predict_proba(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``predict_proba`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Returns ------- p : ndarray of shape (n_samples, n_classes) @@ -909,10 +1088,17 @@ def predict_proba(self, X): if n_classes == 1: return np.ones((_num_samples(X), 1)) - decision = self.decision_function(X) + _raise_for_params(params, self, "predict_proba") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "predict_proba", **params) + else: + routed_params = Bunch(self=Bunch(decision_function={})) + + decision = self.decision_function(X, **routed_params.self.decision_function) return self._compute_proba_from_decision(decision, n_classes) - def staged_predict_proba(self, X): + def staged_predict_proba(self, X, **params): """Predict class probabilities for X. The predicted class probabilities of an input sample is computed as @@ -936,13 +1122,21 @@ def staged_predict_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the :term:`classes_` attribute. """ + _raise_for_params(params, self, "staged_predict_proba") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_predict_proba", **params) + else: + routed_params = Bunch(self=Bunch(staged_decision_function={})) n_classes = self.n_classes_ - for decision in self.staged_decision_function(X): + for decision in self.staged_decision_function( + X, **routed_params.self.staged_decision_function + ): yield self._compute_proba_from_decision(decision, n_classes) - def predict_log_proba(self, X): + def predict_log_proba(self, X, **params): """Predict class log-probabilities for X. The predicted class log-probabilities of an input sample is computed as @@ -961,10 +1155,59 @@ def predict_log_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the :term:`classes_` attribute. """ - return np.log(self.predict_proba(X)) + _raise_for_params(params, self, "predict_log_proba") + + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "predict_log_proba", **params) + else: + routed_params = Bunch(self=Bunch(predict_proba={})) + + return np.log(self.predict_proba(X, **routed_params.self.predict_proba)) + + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.6 + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__) + router.add_self_request(self) + router.add( + self._get_scorer(), + method_mapping=MethodMapping().add(caller="staged_score", callee="score"), + ) + router.add( + estimator=self.estimator, + method_mapping=( + MethodMapping() + .add(callee="fit", caller="fit") + # .add(callee="predict", caller="predict") + # .add(callee="staged_predict", caller="staged_predict") + .add(callee="predict", caller="decision_function") + .add(callee="predict_proba", caller="decision_function") + .add(callee="predict", caller="staged_decision_function") + .add(callee="predict_proba", caller="staged_decision_function") + # .add(callee="predict_proba", caller="predict_proba") + # .add(callee="decision_function", caller="predict_proba") + # .add(callee="staged_predict_proba", caller="staged_predict_proba") + # .add(callee="predict_log_proba", caller="predict_log_proba") + .add(callee="score", caller="score") + .add(callee="staged_score", caller="staged_score") + ), + ) + return router -class AdaBoostRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseWeightBoosting): +class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting): """An AdaBoost regressor. An AdaBoost [1] regressor is a meta-estimator that begins by fitting a @@ -1110,7 +1353,7 @@ def _validate_estimator(self): """Check the estimator and set the estimator_ attribute.""" super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3)) - def _boost(self, iboost, X, y, sample_weight, random_state): + def _boost(self, iboost, X, y, random_state, params): """Implement a single boost for regression Perform a single boost according to the AdaBoost.R2 algorithm and @@ -1128,9 +1371,6 @@ def _boost(self, iboost, X, y, sample_weight, random_state): The target values (class labels in classification, real numbers in regression). - sample_weight : array-like of shape (n_samples,) - The current sample weights. - random_state : RandomState The RandomState instance used if the base estimator accepts a `random_state` attribute. @@ -1138,6 +1378,9 @@ def _boost(self, iboost, X, y, sample_weight, random_state): learner. replacement. + params : dict + Parameters to pass to the underlying estimators. + Returns ------- sample_weight : array-like of shape (n_samples,) or None @@ -1153,6 +1396,7 @@ def _boost(self, iboost, X, y, sample_weight, random_state): If None then boosting has terminated early. """ estimator = self._make_estimator(random_state=random_state) + sample_weight = params["sample_weight"] # Weighted sampling of the training set with replacement bootstrap_idx = random_state.choice( @@ -1167,7 +1411,7 @@ def _boost(self, iboost, X, y, sample_weight, random_state): X_ = _safe_indexing(X, bootstrap_idx) y_ = _safe_indexing(y, bootstrap_idx) estimator.fit(X_, y_) - y_predict = estimator.predict(X) + y_predict = estimator.predict(X, **params) error_vect = np.abs(y_predict - y) sample_mask = sample_weight > 0 @@ -1208,9 +1452,11 @@ def _boost(self, iboost, X, y, sample_weight, random_state): return sample_weight, estimator_weight, estimator_error - def _get_median_predict(self, X, limit): + def _get_median_predict(self, X, limit, **params): # Evaluate predictions of all estimators - predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T + predictions = np.array( + [est.predict(X, **params) for est in self.estimators_[:limit]] + ).T # Sort the predictions sorted_idx = np.argsort(predictions, axis=1) @@ -1225,7 +1471,7 @@ def _get_median_predict(self, X, limit): # Return median predictions return predictions[np.arange(_num_samples(X)), median_estimators] - def predict(self, X): + def predict(self, X, **params): """Predict regression value for X. The predicted regression value of an input sample is computed @@ -1237,6 +1483,9 @@ def predict(self, X): The training input samples. Sparse matrix can be CSC, CSR, COO, DOK, or LIL. COO, DOK, and LIL are converted to CSR. + **params : dict of str -> obj + Parameters to the `predict` function of the estimator. + Returns ------- y : ndarray of shape (n_samples,) @@ -1245,9 +1494,22 @@ def predict(self, X): check_is_fitted(self) X = self._check_X(X) - return self._get_median_predict(X, len(self.estimators_)) + _raise_for_params(params, self, "predict") + if _routing_enabled(): + routed_params = process_routing( + self, + "predict", + **params, + ) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(predict={}) + + return self._get_median_predict( + X, len(self.estimators_), **routed_params.estimator.predict + ) - def staged_predict(self, X): + def staged_predict(self, X, **params): """Return staged predictions for X. The predicted regression value of an input sample is computed @@ -1262,6 +1524,17 @@ def staged_predict(self, X): X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. + **params : dict of str -> object + Parameters to pass to the underlying estimator's + ``predict`` method. + + .. versionadded:: 1.6 + Only available if `enable_metadata_routing=True`, + which can be set by using + ``sklearn.set_config(enable_metadata_routing=True)``. + See :ref:`Metadata Routing User Guide ` for + more details. + Yields ------ y : generator of ndarray of shape (n_samples,) @@ -1270,5 +1543,44 @@ def staged_predict(self, X): check_is_fitted(self) X = self._check_X(X) + _raise_for_params(params, self, "staged_predict") + if _routing_enabled(): + # metadata routing is enabled. + routed_params = process_routing(self, "staged_predict", **params) + else: + routed_params = Bunch(estimator=Bunch(staged_predict={})) + for i, _ in enumerate(self.estimators_, 1): - yield self._get_median_predict(X, limit=i) + yield self._get_median_predict( + X, limit=i, **routed_params.estimator.staged_predict + ) + + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.6 + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__) + router.add_self_request(self) + router.add( + estimator=self.estimator, + method_mapping=( + MethodMapping() + .add(caller="fit", callee="fit") + .add(caller="score", callee="fit") + .add(caller="predict", callee="predict") + .add(caller="score", callee="score") + .add(caller="staged_predict", callee="staged_predict") + .add(caller="staged_score", callee="staged_score") + ), + ) + return router diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 9aca241521ca0..3607be2913dea 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -407,6 +407,40 @@ def enable_slep006(): ], "method_mapping": {"fit": ["fit", "score"]}, }, + { + "metaestimator": AdaBoostClassifier, + "estimator_name": "estimator", + "estimator": "classifier", + "X": X, + "y": y, + "preserves_metadata": True, + "estimator_routing_methods": [ + "fit", + "predict", + "predict_proba", + "predict_log_proba", + "decision_function", + "score", + ], + "method_mapping": {"fit": ["fit", "score"]}, + }, + { + "metaestimator": AdaBoostRegressor, + "estimator_name": "estimator", + "estimator": "regressor", + "X": X, + "y": y, + "preserves_metadata": True, + "estimator_routing_methods": [ + "fit", + "predict", + "predict_proba", + "predict_log_proba", + "decision_function", + "score", + ], + "method_mapping": {"fit": ["fit", "score"]}, + }, ] """List containing all metaestimators to be tested and their settings @@ -446,8 +480,6 @@ def enable_slep006(): METAESTIMATOR_IDS = [str(row["metaestimator"].__name__) for row in METAESTIMATORS] UNSUPPORTED_ESTIMATORS = [ - AdaBoostClassifier(), - AdaBoostRegressor(), RFE(ConsumingClassifier()), RFECV(ConsumingClassifier()), SequentialFeatureSelector(ConsumingClassifier()),