From 7cf349478efbb0a0f4a06669b5f20f5c9a4bc472 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 28 Jan 2021 21:30:08 -0500 Subject: [PATCH 1/2] API Removes tol=None option from HistGradient* --- .../_hist_gradient_boosting/gradient_boosting.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index dd8435a2af746..d30cd030bf698 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -82,7 +82,7 @@ def _validate_parameters(self): raise ValueError( 'validation_fraction={} must be strictly ' 'positive, or None.'.format(self.validation_fraction)) - if self.tol is not None and self.tol < 0: + if self.tol < 0: raise ValueError('tol={} ' 'must not be smaller than 0.'.format(self.tol)) @@ -646,8 +646,7 @@ def _should_stop(self, scores): # harder for subsequent iteration to be considered an improvement upon # the reference score, and therefore it is more likely to early stop # because of the lack of significant improvement. - tol = 0 if self.tol is None else self.tol - reference_score = scores[-reference_position] + tol + reference_score = scores[-reference_position] + self.tol recent_scores = scores[-reference_position + 1:] recent_improvements = [score > reference_score for score in recent_scores] @@ -992,7 +991,7 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some tolerance. Only used if early stopping is performed. - tol : float or None, default=1e-7 + tol : float, default=1e-7 The absolute tolerance to use when comparing scores during early stopping. The higher the tolerance, the more likely we are to early stop: higher tolerance means that it will be harder for subsequent @@ -1245,7 +1244,7 @@ class HistGradientBoostingClassifier(ClassifierMixin, stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some tolerance. Only used if early stopping is performed. - tol : float or None, default=1e-7 + tol : float, default=1e-7 The absolute tolerance to use when comparing scores. The higher the tolerance, the more likely we are to early stop: higher tolerance means that it will be harder for subsequent iterations to be From ff792270018886b708614ad998c270ec03767c30 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 29 Jan 2021 14:38:05 +0100 Subject: [PATCH 2/2] Pass tol=0.0 instead of tol=None in early stopping tests --- .../_hist_gradient_boosting/tests/test_gradient_boosting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 8bb9e83966fff..c501125059c8f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -85,7 +85,7 @@ def test_invalid_classification_loss(): (None, None, True, 5, 1e-1), ('loss', .1, True, 5, 1e-7), # use loss ('loss', None, True, 5, 1e-1), # use loss on training data - (None, None, False, 5, None), # no early stopping + (None, None, False, 5, 0.0), # no early stopping ]) def test_early_stopping_regression(scoring, validation_fraction, early_stopping, n_iter_no_change, tol): @@ -126,7 +126,7 @@ def test_early_stopping_regression(scoring, validation_fraction, (None, None, True, 5, 1e-1), ('loss', .1, True, 5, 1e-7), # use loss ('loss', None, True, 5, 1e-1), # use loss on training data - (None, None, False, 5, None), # no early stopping + (None, None, False, 5, 0.0), # no early stopping ]) def test_early_stopping_classification(data, scoring, validation_fraction, early_stopping, n_iter_no_change, tol):