8000 fix issue 4447 : min_weight_leaf not properly passed to PresortBestSp… · lithuak/scikit-learn@2ff021f · GitHub
[go: up one dir, main page]

Skip to content

Commit 2ff021f

Browse files
pprettamueller
authored andcommitted
fix issue 4447 : min_weight_leaf not properly passed to PresortBestSplitter
1 parent 33d132a commit 2ff021f

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

sklearn/ensemble/gradient_boosting.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,12 +1002,19 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
10021002
n_inbag = max(1, int(self.subsample * n_samples))
10031003
loss_ = self.loss_
10041004

1005+
# Set min_weight_leaf from min_weight_fraction_leaf
1006+
if self.min_weight_fraction_leaf != 0. and sample_weight is not None:
1007+
min_weight_leaf = (self.min_weight_fraction_leaf *
1008+
np.sum(sample_weight))
1009+
else:
1010+
min_weight_leaf = 0.
1011+
10051012
# init criterion and splitter
10061013
criterion = FriedmanMSE(1)
10071014
splitter = PresortBestSplitter(criterion,
10081015
self.max_features_,
10091016
self.min_samples_leaf,
1010-
self.min_weight_fraction_leaf,
1017+
min_weight_leaf,
10111018
random_state)
10121019

10131020
if self.verbose:

sklearn/ensemble/tests/test_gradient_boosting.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,26 @@ def test_non_uniform_weights_toy_edge_case_reg():
980980
y = [0, 0, 1, 0]
981981
# ignore the first 2 training samples by setting their weight to 0
982982
sample_weight = [0, 0, 1, 1]
983-
for loss in ('ls', 'huber', 'lad', 'quantile'):
984-
gb = GradientBoostingRegressor(n_estimators=5)
983+
for loss in ('huber', 'ls', 'lad', 'quantile'):
984+
gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss)
985985
gb.fit(X, y, sample_weight=sample_weight)
986-
assert_true(gb.predict([[1, 0]])[0] > 0.5)
986+
assert_greater(gb.predict([[1, 0]])[0], 0.5)
987+
988+
989+
def test_non_uniform_weights_toy_min_weight_leaf():
990+
"""Regression test for https://github.com/scikit-learn/scikit-learn/issues/4447 """
991+
X = [[1, 0],
992+
[1, 0],
993+
[1, 0],
994+
[0, 1],
995+
]
996+
y = [0, 0, 1, 0]
997+
# ignore the first 2 training samples by setting their weight to 0
998+
sample_weight = [0, 0, 1, 1]
999+
gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1)
1000+
gb.fit(X, y, sample_weight=sample_weight)
1001+
assert_true(gb.predict([[1, 0]])[0] > 0.5)
1002+
assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2)
9871003

9881004

9891005
def test_non_uniform_weights_toy_edge_case_clf():

0 commit comments

Comments
 (0)
0