8000 remove min_impurity_split · scikit-learn/scikit-learn@2ccaf85 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2ccaf85

Browse files
committed
remove min_impurity_split
1 parent 6aa8807 commit 2ccaf85

File tree

5 files changed

+3
-102
lines changed

5 files changed

+3
-102
lines changed

sklearn/ensemble/tests/test_forest.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,22 +1220,6 @@ def test_decision_path(name):
12201220
check_decision_path(name)
12211221

12221222

1223-
@pytest.mark.filterwarnings('ignore:The default value of n_estimators')
1224-
def test_min_impurity_split():
1225-
# Test if min_impurity_split of base estimators is set
1226-
# Regression test for #8006
1227-
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
1228-
all_estimators = [RandomForestClassifier, RandomForestRegressor,
1229-
ExtraTreesClassifier, ExtraTreesRegressor]
1230-
1231-
for Estimator in all_estimators:
1232-
est = Estimator(min_impurity_split=0.1)
1233-
est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
1234-
est.fit, X, y)
1235-
for tree in est.estimators_:
1236-
assert_equal(tree.min_impurity_split, 0.1)
1237-
1238-
12391223
@pytest.mark.filterwarnings('ignore:The default value of n_estimators')
12401224
def test_min_impurity_decrease():
12411225
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)

sklearn/ensemble/tests/test_gradient_boosting.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,19 +1111,6 @@ def test_max_leaf_nodes_max_depth(GBEstimator):
11111111
assert_equal(tree.max_depth, 1)
11121112

11131113

1114-
@pytest.mark.parametrize('GBEstimator', GRADIENT_BOOSTING_ESTIMATORS)
1115-
def test_min_impurity_split(GBEstimator):
1116-
# Test if min_impurity_split of base estimators is set
1117-
# Regression test for #8006
1118-
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
1119-
1120-
est = GBEstimator(min_impurity_split=0.1)
1121-
est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
1122-
est.fit, X, y)
1123-
for tree in est.estimators_.flat:
1124-
assert_equal(tree.min_impurity_split, 0.1)
1125-
1126-
11271114
@pytest.mark.parametrize('GBEstimator', GRADIENT_BOOSTING_ESTIMATORS)
11281115
def test_min_impurity_decrease(GBEstimator):
11291116
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)

sklearn/tree/_tree.pxd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ cdef class TreeBuilder:
9696
cdef SIZE_t min_samples_leaf # Minimum number of samples in a leaf
9797
cdef double min_weight_leaf # Minimum weight in a leaf
9898
cdef SIZE_t max_depth # Maximal tree depth
99-
cdef double min_impurity_split
10099
cdef double min_impurity_decrease # Impurity threshold for early stopping
101100

102101
cpdef build(self, Tree tree, object X, np.ndarray y,

sklearn/tree/_tree.pyx

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,13 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
132132

133133
def __cinit__(self, Splitter splitter, SIZE_t min_samples_split,
134134
SIZE_t min_samples_leaf, double min_weight_leaf,
135-
SIZE_t max_depth, double min_impurity_decrease,
136-
double min_impurity_split):
135+
SIZE_t max_depth, double min_impurity_decrease):
137136
self.splitter = splitter
138137
self.min_samples_split = min_samples_split
139138
self.min_samples_leaf = min_samples_leaf
140139
self.min_weight_leaf = min_weight_leaf
141140
self.max_depth = max_depth
142141
self.min_impurity_decrease = min_impurity_decrease
143-
self.min_impurity_split = min_impurity_split
144142

145143
cpdef build(self, Tree tree, object X, np.ndarray y,
146144
np.ndarray sample_weight=None,
@@ -171,7 +169,6 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
171169
cdef double min_weight_leaf = self.min_weight_leaf
172170
cdef SIZE_t min_samples_split = self.min_samples_split
173171
cdef double min_impurity_decrease = self.min_impurity_decrease
174-
cdef double min_impurity_split = self.min_impurity_split
175172

176173
# Recursive partition (without actual recursion)
177174
splitter.init(X, y, sample_weight_ptr, X_idx_sorted)
@@ -229,9 +226,6 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
229226
impurity = splitter.node_impurity()
230227
first = 0
231228

232-
is_leaf = (is_leaf or
233-
(impurity <= min_impurity_split))
234-
235229
if not is_leaf:
236230
splitter.node_split(impurity, &split, &n_constant_features)
237231
# If EPSILON=0 in the below comparison, float precision
@@ -303,15 +297,14 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
303297
def __cinit__(self, Splitter splitter, SIZE_t min_samples_split,
304298
SIZE_t min_samples_leaf, min_weight_leaf,
305299
SIZE_t max_depth, SIZE_t max_leaf_nodes,
306-
double min_impurity_decrease, double min_impurity_split):
300+
double min_impurity_decrease):
307301
self.splitter = splitter
308302
self.min_samples_split = min_samples_split
309303
self.min_samples_leaf = min_samples_leaf
310304
self.min_weight_leaf = min_weight_leaf
311305
self.max_depth = max_depth
312306
self.max_leaf_nodes = max_leaf_nodes
313307
self.min_impurity_decrease = min_impurity_decrease
314-
self.min_impurity_split = min_impurity_split
315308

316309
cpdef build(self, Tree tree, object X, np.ndarray y,
317310
np.ndarray sample_weight=None,
@@ -438,7 +431,6 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
438431
cdef SIZE_t n_constant_features = 0
439432
cdef double weighted_n_samples = splitter.weighted_n_samples
440433
cdef double min_impurity_decrease = self.min_impurity_decrease
441-
cdef double min_impurity_split = self.min_impurity_split
442434
cdef double weighted_n_node_samples
443435
cdef bint is_leaf
444436
cdef SIZE_t n_left, n_right
@@ -453,8 +445,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
453445
is_leaf = (depth > self.max_depth or
454446
n_node_samples < self.min_samples_split or
455447
n_node_samples < 2 * self.min_samples_leaf or
456-
weighted_n_node_samples < 2 * self.min_weight_leaf or
457-
impurity <= min_impurity_split)
448+
weighted_n_node_samples < 2 * self.min_weight_leaf)
458449

459450
if not is_leaf:
460451
splitter.node_split(impurity, &split, &n_constant_features)

sklearn/tree/tests/test_tree.py

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,6 @@ def test_error():
527527
X, y)
528528
assert_raises(ValueError, TreeEstimator(max_depth=-1).fit, X, y)
529529
assert_raises(ValueError, TreeEstimator(max_features=42).fit, X, y)
530-
# min_impurity_split warning
531-
with ignore_warnings(category=DeprecationWarning):
532-
assert_raises(ValueError,
533-
TreeEstimator(min_impurity_split=-1.0).fit, X, y)
534530
assert_raises(ValueError,
535531
TreeEstimator(min_impurity_decrease=-1.0).fit, X, y)
536532

@@ -788,62 +784,6 @@ def test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input(name):
788784
name, "multilabel", True)
789785

790786

791-
def test_min_impurity_split():
792-
# test if min_impurity_split creates leaves with impurity
793-
# [0, min_impurity_split) when min_samples_leaf = 1 and
794-
# min_samples_split = 2.
795-
X = np.asfortranarray(iris.data, dtype=tree._tree.DTYPE)
796-
y = iris.target
797-
798-
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
799-
# by setting max_leaf_nodes
800-
for max_leaf_nodes, name in product((None, 1000), ALL_TREES.keys()):
801-
TreeEstimator = ALL_TREES[name]
802-
min_impurity_split = .5
803-
804-
# verify leaf nodes without min_impurity_split less than
805-
# impurity 1e-7
806-
est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
807-
random_state=0)
808-
assert_true(est.min_impurity_split is None,
809-
"Failed, min_impurity_split = {0} > 1e-7".format(
810-
est.min_impurity_split))
811-
try:
812-
assert_warns(DeprecationWarning, est.fit, X, y)
813-
except AssertionError:
814-
pass
815-
for node in range(est.tree_.node_count):
816-
if (est.tree_.children_left[node] == TREE_LEAF or
817-
est.tree_.children_right[node] == TREE_LEAF):
818-
assert_equal(est.tree_.impurity[node], 0.,
819-
"Failed with {0} "
820-
"min_impurity_split={1}".format(
821-
est.tree_.impurity[node],
822-
est.min_impurity_split))
823-
824-
# verify leaf nodes have impurity [0,min_impurity_split] when using
825-
# min_impurity_split
826-
est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
827-
min_impurity_split=min_impurity_split,
828-
random_state=0)
829-
assert_warns_message(DeprecationWarning,
830-
"Use the min_impurity_decrease",
831-
est.fit, X, y)
832-
for node in range(est.tree_.node_count):
833-
if (est.tree_.children_left[node] == TREE_LEAF or
834-
est.tree_.children_right[node] == TREE_LEAF):
835-
assert_greater_equal(est.tree_.impurity[node], 0,
836-
"Failed with {0}, "
837-
"min_impurity_split={1}".format(
838-
est.tree_.impurity[node],
839-
est.min_impurity_split))
840-
assert_less_equal(est.tree_.impurity[node], min_impurity_split,
841-
"Failed with {0}, "
842-
"min_impurity_split={1}".format(
843-
est.tree_.impurity[node],
844-
est.min_impurity_split))
845-
846-
847787
def test_min_impurity_decrease():
848788
# test if min_impurity_decrease ensure that a split is made only if
849789
# if the impurity decrease is atleast that value

0 commit comments

Comments
 (0)
0