8000 ENH Add custom loss support for HistGradientBoosting by gbolmier · Pull Request #16908 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

ENH Add custom loss support for HistGradientBoosting #16908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

8000
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .binning import _BinMapper
from .grower import TreeGrower
from .loss import _LOSSES
from .loss import BaseLoss


class BaseHistGradientBoosting(BaseEstimator, ABC):
Expand Down Expand Up @@ -58,7 +59,8 @@ def _validate_parameters(self):
The parameters that are directly passed to the grower are checked in
TreeGrower."""

if self.loss not in self._VALID_LOSSES:
if (self.loss not in self._VALID_LOSSES and
not isinstance(self.loss, BaseLoss)):
raise ValueError(
"Loss {} is not supported for {}. Accepted losses: "
"{}.".format(self.loss, self.__class__.__name__,
Expand Down Expand Up @@ -150,7 +152,11 @@ def fit(self, X, y, sample_weight=None):
# data.
self._in_fit = True

self.loss_ = self._get_loss(sample_weight=sample_weight)
if isinstance(self.loss, str):
self.loss_ = self._get_loss(sample_weight=sample_weight)
elif isinstance(self.loss, BaseLoss):
self.loss_ = self.loss

if self.early_stopping == 'auto':
self.do_early_stopping_ = n_samples > 10000
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble._hist_gradient_boosting.loss import _LOSSES
from sklearn.ensemble._hist_gradient_boosting.loss import LeastSquares
from sklearn.ensemble._hist_gradient_boosting.loss import BinaryCrossEntropy
from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
from sklearn.utils import shuffle
Expand Down Expand Up @@ -681,3 +683,22 @@ def test_single_node_trees(Est):
for predictor in est._predictors)
# Still gives correct predictions thanks to the baseline prediction
assert_allclose(est.predict(X), y)


@pytest.mark.parametrize('Est, loss, X, y', [
(
HistGradientBoostingClassifier,
BinaryCrossEntropy(sample_weight=None),
X_classification,
y_classification
),
(
HistGradientBoostingRegressor,
LeastSquares(sample_weight=None),
X_regression,
y_regression
)
])
def test_custom_loss(Est, loss, X, y):
est = Est(loss=loss, max_iter=20)
est.fit(X, y)
0