10000 [MRG] Fix GBDT init parameter when it's a pipeline by NicolasHug · Pull Request #13472 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] Fix GBDT init parameter when it's a pipeline #13472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
< 8000 /div>
25 changes: 15 additions & 10 deletions sklearn/ensemble/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,20 +1476,25 @@ def fit(self, X, y, sample_weight=None, monitor=None):
raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),
dtype=np.float64)
else:
try:
self.init_.fit(X, y, sample_weight=sample_weight)
except TypeError:
if sample_weight_is_none:
self.init_.fit(X, y)
else:
raise ValueError(
"The initial estimator {} does not support sample "
"weights.".format(self.init_.__class__.__name__))
# XXX clean this once we have a support_sample_weight tag
if sample_weight_is_none:
self.init_.fit(X, y)
else:
msg = ("The initial estimator {} does not support sample "
"weights.".format(self.init_.__class__.__name__))
try:
self.init_.fit(X, y, sample_weight=sample_weight)
except TypeError: # regular estimator without SW support
raise ValueError(msg)
except ValueError as e:
if 'not enough values to unpack' in str(e): # pipeline
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather it if we improved the message for fit params missing __ in Pipeline, but okay

raise ValueError(msg) from e
else: # regular estimator whose input checking failed
raise
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Do not need the else here

Copy link
Member Author
@NicolasHug NicolasHug Mar 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally prefer the whole if/else logic. It's clearer, it doesn't rely on the fact that the above block exits, and has a more functional flavor.


raw_predictions = \
self.loss_.get_init_raw_predictions(X, self.init_)


begin_at_stage = 0

# The rng state must be preserved if warm_start is True
Expand Down
30 changes: 30 additions & 0 deletions sklearn/ensemble/tests/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
from sklearn.exceptions import DataConversionWarning
from sklearn.exceptions import NotFittedError
from sklearn.dummy import DummyClassifier, DummyRegressor
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.svm import NuSVR


GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier,
Expand Down Expand Up @@ -1366,6 +1369,33 @@ def test_gradient_boosting_with_init(gb, dataset_maker, init_estimator):
gb(init=init_est).fit(X, y, sample_weight=sample_weight)


def test_gradient_boosting_with_init_pipeline():
# Check that the init estimator can be a pipeline (see issue #13466)

X, y = make_regression(random_state=0)
init = make_pipeline(LinearRegression())
gb = GradientBoostingRegressor(init=init)
gb.fit(X, y) # pipeline without sample_weight works fine

with pytest.raises(
ValueError,
match='The initial estimator Pipeline does not support sample '
'weights'):
gb.fit(X, y, sample_weight=np.ones(X.shape[0]))

# Passing sample_weight to a pipeline raises a ValueError. This test makes
# sure we make the distinction between ValueError raised by a pipeline that
# was passed sample_weight, and a ValueError raised by a regular estimator
# whose input checking failed.
with pytest.raises(
ValueError,
match='nu <= 0 or nu > 1'):
# Note that NuSVR properly supports sample_weight
init = NuSVR(gamma='auto', nu=1.5)
gb = GradientBoostingRegressor(init=init)
gb.fit(X, y, sample_weight=np.ones(X.shape[0]))


@pytest.mark.parametrize('estimator, missing_method', [
(GradientBoostingClassifier(init=LinearSVC()), 'predict_proba'),
(GradientBoostingRegressor(init=OneHotEncoder()), 'predict')
Expand Down
0