From 4f0247ad28949d6357eebe77544711324051ca41 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Sun, 14 Nov 2021 17:21:54 +0100 Subject: [PATCH 1/3] speed up grid search --- .../plot_gradient_boosting_quantile.py | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index ebcf9c15f3103..67eab011aa3bb 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -62,7 +62,7 @@ def f(x): all_models = {} common_params = dict( learning_rate=0.05, - n_estimators=250, + n_estimators=200, max_depth=2, min_samples_leaf=9, min_samples_split=9, @@ -224,7 +224,7 @@ def coverage_fraction(y, y_low, y_high): # underfit and could not adapt to sinusoidal shape of the signal. # # The hyper-parameters of the model were approximately hand-tuned for the -# median regressor and there is no reason than the same hyper-parameters are +# median regressor and there is no reason that the same hyper-parameters are # suitable for the 5th percentile regressor. # # To confirm this hypothesis, we tune the hyper-parameters of a new regressor @@ -238,11 +238,11 @@ def coverage_fraction(y, y_low, y_high): param_grid = dict( - learning_rate=[0.01, 0.05, 0.1], - n_estimators=[100, 150, 200, 250, 300], - max_depth=[2, 5, 10, 15, 20], - min_samples_leaf=[1, 5, 10, 20, 30, 50], - min_samples_split=[2, 5, 10, 20, 30, 50], + learning_rate=[0.05, 0.1, 0.2], + n_estimators=[150, 200, 250], + max_depth=[2, 5, 10], + min_samples_leaf=[1, 5, 10, 20], + min_samples_split=[5, 10, 20, 30, 50], ) alpha = 0.05 neg_mean_pinball_loss_05p_scorer = make_scorer( @@ -262,9 +262,9 @@ def coverage_fraction(y, y_low, y_high): pprint(search_05p.best_params_) # %% -# We observe that the search procedure identifies that deeper trees are needed -# to get a good fit for the 5th percentile regressor. Deeper trees are more -# expressive and less likely to underfit. +# We observe that the hyper-parameters that were hand-tuned for the median +# regressor are in the same range as the hyper-parameters suitable for the 5th +# percentile regressor # # Let's now tune the hyper-parameters for the 95th percentile regressor. We # need to redefine the `scoring` metric used to select the best model, along @@ -286,15 +286,12 @@ def coverage_fraction(y, y_low, y_high): pprint(search_95p.best_params_) # %% -# This time, shallower trees are selected and lead to a more constant piecewise -# and therefore more robust estimation of the 95th percentile. This is -# beneficial as it avoids overfitting the large outliers of the log-normal -# additive noise. -# -# We can confirm this intuition by displaying the predicted 90% confidence -# interval comprised by the predictions of those two tuned quantile regressors: -# the prediction of the upper 95th percentile has a much coarser shape than the -# prediction of the lower 5th percentile: +# The result shows that the hyper-parameters for the 95th percentile regressor +# identified by the grid search are roughly in the same range as the hand- +# tuned hyper-parameters for the median regressor and the hyper-parameters +# identified by the grid search for the 5th percentile regressor. However, the +# hyper-parameter grid searches did lead to an improved 90% confidence +# interval which can be seen below: y_lower = search_05p.predict(xx) y_upper = search_95p.predict(xx) From b0a896998d9e655165ed19f10f74dd3b615da244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= Date: Tue, 16 Nov 2021 09:57:28 -0800 Subject: [PATCH 2/3] Add a dot and trigger CirclCI build --- examples/ensemble/plot_gradient_boosting_quantile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index 67eab011aa3bb..c8104acd9c061 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -264,7 +264,7 @@ def coverage_fraction(y, y_low, y_high): # %% # We observe that the hyper-parameters that were hand-tuned for the median # regressor are in the same range as the hyper-parameters suitable for the 5th -# percentile regressor +# percentile regressor. # # Let's now tune the hyper-parameters for the 95th percentile regressor. We # need to redefine the `scoring` metric used to select the best model, along From f9fc83b81ca561c1db3fc7628d2b503d8ee7c6c9 Mon Sep 17 00:00:00 2001 From: Maren Westermann Date: Tue, 30 Nov 2021 17:08:11 +0100 Subject: [PATCH 3/3] address comments --- .../plot_gradient_boosting_quantile.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index 67eab011aa3bb..bbf9b79becfff 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -97,7 +97,7 @@ def f(x): fig = plt.figure(figsize=(10, 10)) plt.plot(xx, f(xx), "g:", linewidth=3, label=r"$f(x) = x\,\sin(x)$") plt.plot(X_test, y_test, "b.", markersize=10, label="Test observations") -plt.plot(xx, y_med, "r-", label="Predicted median", color="orange") +plt.plot(xx, y_med, "r-", label="Predicted median") plt.plot(xx, y_pred, "r-", label="Predicted mean") plt.plot(xx, y_upper, "k-") plt.plot(xx, y_lower, "k-") @@ -232,14 +232,13 @@ def coverage_fraction(y, y_low, y_high): # cross-validation on the pinball loss with alpha=0.05: # %% -from sklearn.model_selection import RandomizedSearchCV +from sklearn.experimental import enable_halving_search_cv # noqa +from sklearn.model_selection import HalvingRandomSearchCV from sklearn.metrics import make_scorer from pprint import pprint - param_grid = dict( learning_rate=[0.05, 0.1, 0.2], - n_estimators=[150, 200, 250], max_depth=[2, 5, 10], min_samples_leaf=[1, 5, 10, 20], min_samples_split=[5, 10, 20, 30, 50], @@ -251,10 +250,12 @@ def coverage_fraction(y, y_low, y_high): greater_is_better=False, # maximize the negative loss ) gbr = GradientBoostingRegressor(loss="quantile", alpha=alpha, random_state=0) -search_05p = RandomizedSearchCV( +search_05p = HalvingRandomSearchCV( gbr, param_grid, - n_iter=10, # increase this if computational budget allows + resource="n_estimators", + max_resources=250, + min_resources=50, scoring=neg_mean_pinball_loss_05p_scorer, n_jobs=2, random_state=0, @@ -287,11 +288,13 @@ def coverage_fraction(y, y_low, y_high): # %% # The result shows that the hyper-parameters for the 95th percentile regressor -# identified by the grid search are roughly in the same range as the hand- +# identified by the search procedure are roughly in the same range as the hand- # tuned hyper-parameters for the median regressor and the hyper-parameters -# identified by the grid search for the 5th percentile regressor. However, the -# hyper-parameter grid searches did lead to an improved 90% confidence -# interval which can be seen below: +# identified by the search procedure for the 5th percentile regressor. However, +# the hyper-parameter searches did lead to an improved 90% confidence interval +# that is comprised by the predictions of those two tuned quantile regressors. +# Note that the prediction of the upper 95th percentile has a much coarser shape +# than the prediction of the lower 5th percentile because of the outliers: y_lower = search_05p.predict(xx) y_upper = search_95p.predict(xx)