From e3f758fb6468424c8109cf0ea5c0916e4a1b9da2 Mon Sep 17 00:00:00 2001 From: Madhura Jayaratne Date: Thu, 3 Sep 2020 00:15:25 +1000 Subject: [PATCH 1/4] Deprecate criterion='mae' in GradientBoostingClassifier and GradientBoostingRegressor --- sklearn/ensemble/_gb.py | 11 +++++++++++ sklearn/ensemble/tests/test_gradient_boosting.py | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 97bdeb6e69a55..b15201d0da3cb 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -393,6 +393,11 @@ def fit(self, X, y, sample_weight=None, monitor=None): ------- self : object """ + if self.criterion == 'mae': + # TODO: This should raise an error from 0.26 + warnings.warn("criterion='mae' was deprecated in version 0.24 and " + "will be removed in version 0.26.", FutureWarning) + # if not warmstart - clear the estimator state if not self.warm_start: self._clear_state() @@ -802,6 +807,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): some cases. .. versionadded:: 0.18 + .. deprecated:: 0.24 + `criterion='mae'` is deprecated and will be removed in version + 0.26. min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: @@ -1320,6 +1328,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): some cases. .. versionadded:: 0.18 + .. deprecated:: 0.24 + `criterion='mae'` is deprecated and will be removed in version + 0.26. min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 835e0c5b3bab1..812ff16933758 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -1333,3 +1333,17 @@ def test_attr_error_raised_if_not_fitted(): ) with pytest.raises(AttributeError, match=msg): gbr.n_classes_ + + +# TODO: Update in 0.26 to check for the error raised +@pytest.mark.parametrize('estimator', [ + GradientBoostingClassifier(criterion='mae'), + GradientBoostingRegressor(criterion='mae') +]) +def test_criterion_mae_deprecation(estimator): + # checks whether a deprecation warning is issues when criterion='mae' + # is used. + msg = ("criterion='mae' was deprecated in version 0.24 and " + "will be removed in version 0.26.") + with pytest.warns(FutureWarning, match=msg): + estimator.fit(X, y) From f9f1a3e89357fa0661e44702538e3e9948dcb63a Mon Sep 17 00:00:00 2001 From: Madhura Jayaratne Date: Thu, 3 Sep 2020 08:11:43 +1000 Subject: [PATCH 2/4] Add change log entry for #18263 --- doc/whats_new/v0.24.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index c3f3f993a2f57..9b1ab5c78ff71 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -192,6 +192,11 @@ Changelog :class:`ensemble.GradientBoostingRegressor` and returns `1`. :pr:`17702` by :user:`Simona Maggio `. +- |API|: Mean absolute error ('mae') is now deprecated for the parameter + ``criterion`` in :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier`. + :pr:`18326` by :user:`Madhura Jayaratne `. + :mod:`sklearn.exceptions` ......................... From e04af1c9dd26cac923381c72fbfdb61ef59efd39 Mon Sep 17 00:00:00 2001 From: Madhura Jayaratne Date: Fri, 4 Sep 2020 01:11:45 +1000 Subject: [PATCH 3/4] Explain the reasons for deprecation --- sklearn/ensemble/_gb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index b15201d0da3cb..34a1ddb7c6c74 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -809,7 +809,8 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): .. versionadded:: 0.18 .. deprecated:: 0.24 `criterion='mae'` is deprecated and will be removed in version - 0.26. + 0.26. Use `criterion='friedman_mse'` or `'mse'` instead, as trees + should use a least-square criterion in Gradient Boosting. min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: @@ -1330,7 +1331,8 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): .. versionadded:: 0.18 .. deprecated:: 0.24 `criterion='mae'` is deprecated and will be removed in version - 0.26. + 0.26. The correct way of minimizing the absolute error is to use + `loss='lad'` instead. min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: From 7d63e666aa1d9af0873797bc663568ef81c179d1 Mon Sep 17 00:00:00 2001 From: Madhura Jayaratne Date: Fri, 4 Sep 2020 16:46:28 +1000 Subject: [PATCH 4/4] Add explanation to the warning as well --- sklearn/ensemble/_gb.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 34a1ddb7c6c74..bbbe5053788e0 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -358,6 +358,10 @@ def _check_initialized(self): """Check that the estimator is initialized, raising an error if not.""" check_is_fitted(self) + @abstractmethod + def _warn_mae_for_criterion(self): + pass + def fit(self, X, y, sample_weight=None, monitor=None): """Fit the gradient boosting model. @@ -395,8 +399,7 @@ def fit(self, X, y, sample_weight=None, monitor=None): """ if self.criterion == 'mae': # TODO: This should raise an error from 0.26 - warnings.warn("criterion='mae' was deprecated in version 0.24 and " - "will be removed in version 0.26.", FutureWarning) + self._warn_mae_for_criterion() # if not warmstart - clear the estimator state if not self.warm_start: @@ -1111,6 +1114,14 @@ def _validate_y(self, y, sample_weight): self.n_classes_ = self._n_classes return y + def _warn_mae_for_criterion(self): + # TODO: This should raise an error from 0.26 + warnings.warn("criterion='mae' was deprecated in version 0.24 and " + "will be removed in version 0.26. Use " + "criterion='friedman_mse' or 'mse' instead, as trees " + "should use a least-square criterion in Gradient " + "Boosting.", FutureWarning) + def decision_function(self, X): """Compute the decision function of ``X``. @@ -1614,6 +1625,13 @@ def _validate_y(self, y, sample_weight=None): y = y.astype(DOUBLE) return y + def _warn_mae_for_criterion(self): + # TODO: This should raise an error from 0.26 + warnings.warn("criterion='mae' was deprecated in version 0.24 and " + "will be removed in version 0.26. The correct way of " + "minimizing the absolute error is to use loss='lad' " + "instead.", FutureWarning) + def predict(self, X): """Predict regression target for X.