diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index ce2fa414d0d13..bae0243becae0 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -471,6 +471,11 @@ Changelog the output feature names. :pr:`21762` by :user:`Zhehao Liu ` and `Thomas Fan`_. +- |API| The attribute `loss_` of :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` has been deprecated and will be removed + in version 1.3. + :pr:`23079` by :user:`Christian Lorentzen `. + :mod:`sklearn.feature_extraction` ................................. diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 443c7b4d3152f..9b776a7feab10 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -207,7 +207,7 @@ def _fit_stage( """Fit another stage of ``_n_classes`` trees to the boosting model.""" assert sample_mask.dtype == bool - loss = self.loss_ + loss = self._loss original_y = y # Need to pass a copy of raw_predictions to negative_gradient() @@ -328,11 +328,11 @@ def _check_params(self): loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss] if is_classifier(self): - self.loss_ = loss_class(self.n_classes_) + self._loss = loss_class(self.n_classes_) elif self.loss in ("huber", "quantile"): - self.loss_ = loss_class(self.alpha) + self._loss = loss_class(self.alpha) else: - self.loss_ = loss_class() + self._loss = loss_class() check_scalar( self.subsample, @@ -346,7 +346,7 @@ def _check_params(self): if self.init is not None: # init must be an estimator or 'zero' if isinstance(self.init, BaseEstimator): - self.loss_.check_init_estimator(self.init) + self._loss.check_init_estimator(self.init) elif not (isinstance(self.init, str) and self.init == "zero"): raise ValueError( "The init parameter must be an estimator or 'zero'. " @@ -439,9 +439,9 @@ def _init_state(self): self.init_ = self.init if self.init_ is None: - self.init_ = self.loss_.init_estimator() + self.init_ = self._loss.init_estimator() - self.estimators_ = np.empty((self.n_estimators, self.loss_.K), dtype=object) + self.estimators_ = np.empty((self.n_estimators, self._loss.K), dtype=object) self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64) # do oob? if self.subsample < 1.0: @@ -471,7 +471,7 @@ def _resize_state(self): ) self.estimators_ = np.resize( - self.estimators_, (total_n_estimators, self.loss_.K) + self.estimators_, (total_n_estimators, self._loss.K) ) self.train_score_ = np.resize(self.train_score_, total_n_estimators) if self.subsample < 1 or hasattr(self, "oob_improvement_"): @@ -607,7 +607,7 @@ def fit(self, X, y, sample_weight=None, monitor=None): # fit initial model and initialize raw predictions if self.init_ == "zero": raw_predictions = np.zeros( - shape=(X.shape[0], self.loss_.K), dtype=np.float64 + shape=(X.shape[0], self._loss.K), dtype=np.float64 ) else: # XXX clean this once we have a support_sample_weight tag @@ -634,7 +634,7 @@ def fit(self, X, y, sample_weight=None, monitor=None): else: # regular estimator whose input checking failed raise - raw_predictions = self.loss_.get_init_raw_predictions(X, self.init_) + raw_predictions = self._loss.get_init_raw_predictions(X, self.init_) begin_at_stage = 0 @@ -712,7 +712,7 @@ def _fit_stages( do_oob = self.subsample < 1.0 sample_mask = np.ones((n_samples,), dtype=bool) n_inbag = max(1, int(self.subsample * n_samples)) - loss_ = self.loss_ + loss_ = self._loss if self.verbose: verbose_reporter = VerboseReporter(verbose=self.verbose) @@ -804,10 +804,10 @@ def _raw_predict_init(self, X): X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True) if self.init_ == "zero": raw_predictions = np.zeros( - shape=(X.shape[0], self.loss_.K), dtype=np.float64 + shape=(X.shape[0], self._loss.K), dtype=np.float64 ) else: - raw_predictions = self.loss_.get_init_raw_predictions(X, self.init_).astype( + raw_predictions = self._loss.get_init_raw_predictions(X, self.init_).astype( np.float64 ) return raw_predictions @@ -978,6 +978,15 @@ def apply(self, X): def n_features_(self): return self.n_features_in_ + # TODO(1.3): Remove + # mypy error: Decorated property not supported + @deprecated( # type: ignore + "Attribute `loss_` was deprecated in version 1.1 and will be removed in 1.3." + ) + @property + def loss_(self): + return self._loss + class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): """Gradient Boosting for classification. @@ -1214,6 +1223,10 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): loss_ : LossFunction The concrete ``LossFunction`` object. + .. deprecated:: 1.1 + Attribute `loss_` was deprecated in version 1.1 and will be + removed in 1.3. + init_ : estimator The estimator that provides the initial predictions. Set via the ``init`` argument or ``loss.init_estimator``. @@ -1434,7 +1447,7 @@ def predict(self, X): The predicted values. """ raw_predictions = self.decision_function(X) - encoded_labels = self.loss_._raw_prediction_to_decision(raw_predictions) + encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions) return self.classes_.take(encoded_labels, axis=0) def staged_predict(self, X): @@ -1456,7 +1469,7 @@ def staged_predict(self, X): The predicted value of the input samples. """ for raw_predictions in self._staged_raw_predict(X): - encoded_labels = self.loss_._raw_prediction_to_decision(raw_predictions) + encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions) yield self.classes_.take(encoded_labels, axis=0) def predict_proba(self, X): @@ -1482,7 +1495,7 @@ def predict_proba(self, X): """ raw_predictions = self.decision_function(X) try: - return self.loss_._raw_prediction_to_proba(raw_predictions) + return self._loss._raw_prediction_to_proba(raw_predictions) except NotFittedError: raise except AttributeError as e: @@ -1534,7 +1547,7 @@ def staged_predict_proba(self, X): """ try: for raw_predictions in self._staged_raw_predict(X): - yield self.loss_._raw_prediction_to_proba(raw_predictions) + yield self._loss._raw_prediction_to_proba(raw_predictions) except NotFittedError: raise except AttributeError as e: @@ -1781,6 +1794,10 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): loss_ : LossFunction The concrete ``LossFunction`` object. + .. deprecated:: 1.1 + Attribute `loss_` was deprecated in version 1.1 and will be + removed in 1.3. + init_ : estimator The estimator that provides the initial predictions. Set via the ``init`` argument or ``loss.init_estimator``. diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index dc81c4f5c9e09..5a28bed077036 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -475,7 +475,7 @@ def test_max_feature_regression(): random_state=1, ) gbrt.fit(X_train, y_train) - log_loss = gbrt.loss_(y_test, gbrt.decision_function(X_test)) + log_loss = gbrt._loss(y_test, gbrt.decision_function(X_test)) assert log_loss < 0.5, "GB failed with deviance %.4f" % log_loss @@ -1535,3 +1535,18 @@ def test_loss_deprecated(old_loss, new_loss, Estimator): est2 = Estimator(loss=new_loss, random_state=0) est2.fit(X, y) assert_allclose(est1.predict(X), est2.predict(X)) + + +# TODO(1.3): remove +@pytest.mark.parametrize( + "Estimator", [GradientBoostingClassifier, GradientBoostingRegressor] +) +def test_loss_attribute_deprecation(Estimator): + # Check that we raise the proper deprecation warning if accessing + # `loss_`. + X = np.array([[1, 2], [3, 4]]) + y = np.array([1, 0]) + est = Estimator().fit(X, y) + + with pytest.warns(FutureWarning, match="`loss_` was deprecated"): + est.loss_