@@ -277,10 +277,9 @@ def score_estimator(estimator, df_test):
277
277
278
278
279
279
##############################################################################
280
- # Like the Ridge regression above, the gradient boosted trees model minimizes
281
- # the conditional squared error. However, because of a higher predictive power,
282
- # it also results in a smaller Poisson deviance than the linear Poisson
283
- # regression model.
280
+ # Like the Poisson GLM above, the gradient boosted trees model minimizes
281
+ # the Poisson deviance. However, because of a higher predictive power,
282
+ # it reaches lower values of Poisson deviance.
284
283
#
285
284
# Evaluating models with a single train / test split is prone to random
286
285
# fluctuations. If computing resources allow, it should be verified that
@@ -339,7 +338,7 @@ def score_estimator(estimator, df_test):
339
338
#
340
339
# Note that we could have used the least squares loss for the
341
340
# ``HistGradientBoostingRegressor`` model. This would wrongly assume a normal
342
- # distribution the response variable as for the `Ridge` model, and possibly
341
+ # distributed response variable as does the `Ridge` model, and possibly
343
342
# also lead to slightly negative predictions. However the gradient boosted
344
343
# trees would still perform relatively well and in particular better than
345
344
# ``PoissonRegressor`` thanks to the flexibility of the trees combined with the
@@ -533,13 +532,9 @@ def lorenz_curve(y_true, y_pred, exposure):
533
532
# Main takeaways
534
533
# --------------
535
534
#
536
- # - The performance of the models can be evaluted by their ability to yield
535
+ # - The performance of the models can be evaluated by their ability to yield
537
536
# well-calibrated predictions and a good ranking.
538
537
#
539
- # - The Gini index reflects the ability of a model to rank predictions
540
- # irrespective of their absolute values, and therefore only assess their
541
- # ranking power.
542
- #
543
538
# - The calibration of the model can be assessed by plotting the mean observed
544
539
# value vs the mean predicted value on groups of test samples binned by
545
540
# predicted risk.
@@ -552,6 +547,10 @@ def lorenz_curve(y_true, y_pred, exposure):
552
547
# - Using the Poisson loss with a log-link can correct these problems and lead
553
548
# to a well-calibrated linear model.
554
549
#
550
+ # - The Gini index reflects the ability of a model to rank predictions
551
+ # irrespective of their absolute values, and therefore only assess their
552
+ # ranking power.
553
+ #
555
554
# - Despite the improvement in calibration, the ranking power of both linear
556
555
# models are comparable and well below the ranking power of the Gradient
557
556
# Boosting Regression Trees.
0 commit comments