From b48fb08f52e4eb12e6a4f9bec0b23feb933f2905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20P=C3=B6lsterl?= Date: Wed, 7 Dec 2016 22:58:12 +0000 Subject: [PATCH] [MRG] Set min_impurity_split in gradient boosting models self.min_impurity_split should be passed to DecisionTreeRegressor in BaseGradientBoosting._fit_stage. Fixes #8006 --- doc/whats_new.rst | 6 +++++- sklearn/ensemble/gradient_boosting.py | 1 + sklearn/ensemble/tests/test_gradient_boosting.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 07e26e29bf1ca..5e4c95fb7d222 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -118,6 +118,11 @@ Bug fixes when a numpy array is passed in for weights. :issue:`7983` by :user:`Vincent Pham `. + - Fix a bug where :class:`sklearn.ensemble.GradientBoostingClassifier` and + :class:`sklearn.ensemble.GradientBoostingRegressor` ignored the + ``min_impurity_split`` parameter. + :issue:`8006` by :user:`Sebastian Pölsterl `. + API changes summary ------------------- @@ -127,7 +132,6 @@ API changes summary now only have ``self.estimators_`` available after ``fit``. :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. - .. _changes_0_18_1: Version 0.18.1 diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index a210d1c4265b0..a337ee9891437 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -765,6 +765,7 @@ def _fit_stage(self, i, X, y, y_pred, sample_weight, sample_mask, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, + min_impurity_split=self.min_impurity_split, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, random_state=random_state, diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 817122338c91e..6fd55f691c26c 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -961,6 +961,19 @@ def test_max_leaf_nodes_max_depth(): assert_equal(tree.max_depth, 1) +def test_min_impurity_split(): + # Test if min_impurity_split of base estimators is set + # Regression test for #8006 + X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) + all_estimators = [GradientBoostingRegressor, + GradientBoostingClassifier] + + for GBEstimator in all_estimators: + est = GBEstimator(min_impurity_split=0.1).fit(X, y) + for tree in est.estimators_.flat: + assert_equal(tree.min_impurity_split, 0.1) + + def test_warm_start_wo_nestimators_change(): # Test if warm_start does nothing if n_estimators is not changed. # Regression test for #3513.