From 9585337545c7c275eb6cc7906e37220e6230aefa Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 06:40:45 -0700
Subject: [PATCH 01/16] feature: add beta-threshold early stopping for decision
 tree growth

---
 sklearn/tree/_tree.pxd          |  2 ++
 sklearn/tree/_tree.pyx          | 17 +++++++++---
 sklearn/tree/tests/test_tree.py | 48 +++++++++++++++++++++++++++++++++
 sklearn/tree/tree.py            | 24 +++++++++++++++--
 4 files changed, 85 insertions(+), 6 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 007b7a7860342..0db91ed2c87eb 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -4,6 +4,7 @@
 #          Joel Nothman <joel.nothman@gmail.com>
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 #          Jacob Schreiber <jmschreiber91@gmail.com>
+#          Nelson Liu <nelson@nelsonliu.me>
 #
 # License: BSD 3 clause
 
@@ -95,6 +96,7 @@ cdef class TreeBuilder:
     cdef SIZE_t min_samples_leaf    # Minimum number of samples in a leaf
     cdef double min_weight_leaf     # Minimum weight in a leaf
     cdef SIZE_t max_depth           # Maximal tree depth
+    cdef double beta                # Impurity threshold for early stopping
 
     cpdef build(self, Tree tree, object X, np.ndarray y,
                 np.ndarray sample_weight=*,
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index f44320a7b47ae..9f97938521fe1 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -12,6 +12,7 @@
 #          Joel Nothman <joel.nothman@gmail.com>
 #          Fares Hedayati <fares.hedayati@gmail.com>
 #          Jacob Schreiber <jmschreiber91@gmail.com>
+#          Nelson Liu <nelson@nelsonliu.me>
 #
 # License: BSD 3 clause
 
@@ -131,12 +132,13 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
     def __cinit__(self, Splitter splitter, SIZE_t min_samples_split,
                   SIZE_t min_samples_leaf, double min_weight_leaf,
-                  SIZE_t max_depth):
+                  SIZE_t max_depth, double beta):
         self.splitter = splitter
         self.min_samples_split = min_samples_split
         self.min_samples_leaf = min_samples_leaf
         self.min_weight_leaf = min_weight_leaf
         self.max_depth = max_depth
+        self.beta = beta
 
     cpdef build(self, Tree tree, object X, np.ndarray y,
                 np.ndarray sample_weight=None,
@@ -166,6 +168,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         cdef SIZE_t min_samples_leaf = self.min_samples_leaf
         cdef double min_weight_leaf = self.min_weight_leaf
         cdef SIZE_t min_samples_split = self.min_samples_split
+        cdef double beta = self.beta
 
         # Recursive partition (without actual recursion)
         splitter.init(X, y, sample_weight_ptr, X_idx_sorted)
@@ -223,7 +226,9 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                     impurity = splitter.node_impurity()
                     first = 0
 
-                is_leaf = is_leaf or (impurity <= MIN_IMPURITY_SPLIT)
+                is_leaf = (is_leaf or
+                           (impurity <= MIN_IMPURITY_SPLIT) or
+                           (impurity < beta))
 
                 if not is_leaf:
                     splitter.node_split(impurity, &split, &n_constant_features)
@@ -289,13 +294,15 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
 
     def __cinit__(self, Splitter splitter, SIZE_t min_samples_split,
                   SIZE_t min_samples_leaf,  min_weight_leaf,
-                  SIZE_t max_depth, SIZE_t max_leaf_nodes):
+                  SIZE_t max_depth, SIZE_t max_leaf_nodes,
+                  double beta):
         self.splitter = splitter
         self.min_samples_split = min_samples_split
         self.min_samples_leaf = min_samples_leaf
         self.min_weight_leaf = min_weight_leaf
         self.max_depth = max_depth
         self.max_leaf_nodes = max_leaf_nodes
+        self.beta = beta
 
     cpdef build(self, Tree tree, object X, np.ndarray y,
                 np.ndarray sample_weight=None,
@@ -421,6 +428,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         cdef SIZE_t n_node_samples
         cdef SIZE_t n_constant_features = 0
         cdef double weighted_n_samples = splitter.weighted_n_samples
+        cdef double beta = self.beta
         cdef double weighted_n_node_samples
         cdef bint is_leaf
         cdef SIZE_t n_left, n_right
@@ -436,7 +444,8 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                    (n_node_samples < self.min_samples_split) or
                    (n_node_samples < 2 * self.min_samples_leaf) or
                    (weighted_n_node_samples < self.min_weight_leaf) or
-                   (impurity <= MIN_IMPURITY_SPLIT))
+                   (impurity <= MIN_IMPURITY_SPLIT) or
+                   (impurity < beta))
 
         if not is_leaf:
             splitter.node_split(impurity, &split, &n_constant_features)
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 78a35fe5becc1..3cf5808edad14 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -681,6 +681,54 @@ def test_min_weight_fraction_leaf():
         yield check_min_weight_fraction_leaf, name, "multilabel", True
 
 
+def test_beta():
+    # Test if beta creates leaves with impurity [0, beta) when
+    # min_samples_leaf = 1 and min_samples_split = 2.
+    X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
+    y = iris.target
+
+    # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
+    # by setting max_leaf_nodes
+    # we set max leaf nodes to a number greater than the total nodes
+    # possible, thus ensuring that the leaves generated have impurity
+    # of 0 when there is no beta stopping used.
+    for max_leaf_nodes, name in product((None, 1000), ALL_TREES.keys()):
+        TreeEstimator = ALL_TREES[name]
+        beta = .5
+
+        # verify leaf nodes without beta have impurity 0
+        est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
+                            random_state=0)
+        est.fit(X, y)
+        for node in range(est.tree_.node_count):
+            if (est.tree_.children_left[node] == TREE_LEAF or
+                est.tree_.children_right[node] == TREE_LEAF):
+                assert_equal(est.tree_.impurity[node], 0.,
+                             "Failed with {0} "
+                             "beta={1}".format(
+                                 est.tree_.impurity[node],
+                                 est.beta))
+
+        # verify leaf nodes have impurity [0,beta) when using beta
+        est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
+                            beta=beta,
+                            random_state=0)
+        est.fit(X, y)
+        for node in range(est.tree_.node_count):
+            if (est.tree_.children_left[node] == TREE_LEAF or
+                est.tree_.children_right[node] == TREE_LEAF):
+                assert_greater_equal(est.tree_.impurity[node], 0,
+                                     "Failed with {0} "
+                                     "beta={1}".format(
+                                         est.tree_.impurity[node],
+                                         est.beta))
+                assert_less(est.tree_.impurity[node], beta,
+                            "Failed with {0} "
+                            "beta={1}".format(
+                                est.tree_.impurity[node],
+                                est.beta))
+
+
 def test_pickle():
 
     for name, TreeEstimator in ALL_TREES.items():
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index f004d845279bc..edb387ad25778 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -10,6 +10,7 @@
 #          Satrajit Gosh <satrajit.ghosh@gmail.com>
 #          Joly Arnaud <arnaud.v.joly@gmail.com>
 #          Fares Hedayati <fares.hedayati@gmail.com>
+#          Nelson Liu <nelson@nelsonliu.me>
 #
 # License: BSD 3 clause
 
@@ -89,6 +90,7 @@ def __init__(self,
                  max_features,
                  max_leaf_nodes,
                  random_state,
+                 beta,
                  class_weight=None,
                  presort=False):
         self.criterion = criterion
@@ -100,6 +102,7 @@ def __init__(self,
         self.max_features = max_features
         self.random_state = random_state
         self.max_leaf_nodes = max_leaf_nodes
+        self.beta = beta
         self.class_weight = class_weight
         self.presort = presort
 
@@ -151,6 +154,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         """
 
         random_state = check_random_state(self.random_state)
+        beta = self.beta
         if check_input:
             X = check_array(X, dtype=DTYPE, accept_sparse="csc")
             y = check_array(y, ensure_2d=False, dtype=None)
@@ -359,13 +363,13 @@ def fit(self, X, y, sample_weight=None, check_input=True,
             builder = DepthFirstTreeBuilder(splitter, min_samples_split,
                                             min_samples_leaf,
                                             min_weight_leaf,
-                                            max_depth)
+                                            max_depth, beta)
         else:
             builder = BestFirstTreeBuilder(splitter, min_samples_split,
                                            min_samples_leaf,
                                            min_weight_leaf,
                                            max_depth,
-                                           max_leaf_nodes)
+                                           max_leaf_nodes, beta)
 
         builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
 
@@ -608,6 +612,10 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     presort : bool, optional (default=False)
         Whether to presort the data to speed up the finding of best splits in
         fitting. For the default settings of a decision tree on large
@@ -685,6 +693,7 @@ def __init__(self,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
+                 beta=0.,
                  class_weight=None,
                  presort=False):
         super(DecisionTreeClassifier, self).__init__(
@@ -698,6 +707,7 @@ def __init__(self,
             max_leaf_nodes=max_leaf_nodes,
             class_weight=class_weight,
             random_state=random_state,
+            beta=beta,
             presort=presort)
 
     def predict_proba(self, X, check_input=True):
@@ -848,6 +858,10 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     presort : bool, optional (default=False)
         Whether to presort the data to speed up the finding of best splits in
         fitting. For the default settings of a decision tree on large
@@ -917,6 +931,7 @@ def __init__(self,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
+                 beta=0.,
                  presort=False):
         super(DecisionTreeRegressor, self).__init__(
             criterion=criterion,
@@ -928,6 +943,7 @@ def __init__(self,
             max_features=max_features,
             max_leaf_nodes=max_leaf_nodes,
             random_state=random_state,
+            beta=beta,
             presort=presort)
 
 
@@ -965,6 +981,7 @@ def __init__(self,
                  max_features="auto",
                  random_state=None,
                  max_leaf_nodes=None,
+                 beta=0.,
                  class_weight=None):
         super(ExtraTreeClassifier, self).__init__(
             criterion=criterion,
@@ -976,6 +993,7 @@ def __init__(self,
             max_features=max_features,
             max_leaf_nodes=max_leaf_nodes,
             class_weight=class_weight,
+            beta=beta,
             random_state=random_state)
 
 
@@ -1012,6 +1030,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  random_state=None,
+                 beta=0.,
                  max_leaf_nodes=None):
         super(ExtraTreeRegressor, self).__init__(
             criterion=criterion,
@@ -1022,4 +1041,5 @@ def __init__(self,
             min_weight_fraction_leaf=min_weight_fraction_leaf,
             max_features=max_features,
             max_leaf_nodes=max_leaf_nodes,
+            beta=beta,
             random_state=random_state)

From 40164b84c3be5ab788537717b26c881eaafeb177 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 14:53:37 -0700
Subject: [PATCH 02/16] check if value of beta is greater than or equal to 0

---
 sklearn/tree/tree.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index edb387ad25778..c30f9d2e0d7f9 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -308,6 +308,9 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         else:
             min_weight_leaf = 0.
 
+        if not 0. <= beta:
+            raise ValueError("beta must be greater than 0")
+
         presort = self.presort
         # Allow presort to be 'auto', which means True if the dataset is dense,
         # otherwise it will be False.

From d306dc3cb96cd6d6cabd93204bc5f70abe9ceca5 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 14:56:53 -0700
Subject: [PATCH 03/16] test if default value of beta is 0 and edit input
 validation error message

---
 sklearn/tree/tests/test_tree.py | 3 +++
 sklearn/tree/tree.py            | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 3cf5808edad14..80d624c13ddb6 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -699,6 +699,9 @@ def test_beta():
         # verify leaf nodes without beta have impurity 0
         est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
+        assert_equal(est.beta, 0.,
+                     "Failed, beta = {0} != 0".format(
+                         est.beta))
         est.fit(X, y)
         for node in range(est.tree_.node_count):
             if (est.tree_.children_left[node] == TREE_LEAF or
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index c30f9d2e0d7f9..ef7b51b13ce3e 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -309,7 +309,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
             min_weight_leaf = 0.
 
         if not 0. <= beta:
-            raise ValueError("beta must be greater than 0")
+            raise ValueError("beta must be greater or equal to 0")
 
         presort = self.presort
         # Allow presort to be 'auto', which means True if the dataset is dense,

From e0867b403131d29ee1bbbf4917857cad8ba82738 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 15:55:45 -0700
Subject: [PATCH 04/16] feature: separately validate beta for reg. and clf.,
 and add tests for it

---
 sklearn/tree/tests/test_tree.py | 11 +++++++++++
 sklearn/tree/tree.py            | 10 ++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 80d624c13ddb6..07a833b13168e 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -441,6 +441,10 @@ def test_max_features():
         clf.fit(iris.data, iris.target)
         assert_equal(clf.max_features_, 2)
 
+        # use values of beta that are invalid for classification
+        clf = TreeClassifier(beta=2.0)
+        assert_raises(ValueError, clf.fit, X, y)
+
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_features="sqrt")
         est.fit(iris.data, iris.target)
@@ -493,6 +497,13 @@ def test_max_features():
         est = TreeEstimator(max_features="foobar")
         assert_raises(ValueError, est.fit, X, y)
 
+        # use values of beta that are invalid
+        clf = TreeClassifier(beta=-1.0)
+        assert_raises(ValueError, clf.fit, X, y)
+
+        clf = TreeClassifier(beta="foobar")
+        assert_raises(ValueError, clf.fit, X, y)
+
 
 def test_error():
     # Test that it gives proper exception on deficient input.
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index ef7b51b13ce3e..e92786a0b2c95 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -308,8 +308,14 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         else:
             min_weight_leaf = 0.
 
-        if not 0. <= beta:
-            raise ValueError("beta must be greater or equal to 0")
+        if is_classification:
+            if not 0. <= beta <= 1:
+                raise ValueError("beta must be in range [0,1] "
+                                 "in classification")
+        else:
+            if not 0. <= beta:
+                raise ValueError("beta must be greater than or equal "
+                                 "to 0 in regression")
 
         presort = self.presort
         # Allow presort to be 'auto', which means True if the dataset is dense,

From 8205f83b2b2bd57614c63ddac090e46cff04638f Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 18:26:37 -0700
Subject: [PATCH 05/16] feature: add beta to forest-based ensemble methods

---
 sklearn/ensemble/forest.py | 41 ++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index f76414066a92c..a096d39999967 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -805,6 +805,10 @@ class RandomForestClassifier(ForestClassifier):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     bootstrap : boolean, optional (default=True)
         Whether bootstrap samples are used when building trees.
 
@@ -899,6 +903,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
+                 beta=0.,
                  bootstrap=True,
                  oob_score=False,
                  n_jobs=1,
@@ -911,7 +916,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
+                              "max_features", "max_leaf_nodes", "beta",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -928,6 +933,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
+        self.beta = beta
 
 
 class RandomForestRegressor(ForestRegressor):
@@ -1001,6 +1007,10 @@ class RandomForestRegressor(ForestRegressor):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     bootstrap : boolean, optional (default=True)
         Whether bootstrap samples are used when building trees.
 
@@ -1064,6 +1074,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
+                 beta=0.,
                  bootstrap=True,
                  oob_score=False,
                  n_jobs=1,
@@ -1075,7 +1086,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
+                              "max_features", "max_leaf_nodes", "beta",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -1091,6 +1102,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
+        self.beta = beta
 
 
 class ExtraTreesClassifier(ForestClassifier):
@@ -1160,6 +1172,10 @@ class ExtraTreesClassifier(ForestClassifier):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees.
 
@@ -1255,6 +1271,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
+                 beta=0.,
                  bootstrap=False,
                  oob_score=False,
                  n_jobs=1,
@@ -1267,7 +1284,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
+                              "max_features", "max_leaf_nodes", "beta",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -1284,6 +1301,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
+        self.beta = beta
 
 
 class ExtraTreesRegressor(ForestRegressor):
@@ -1355,6 +1373,10 @@ class ExtraTreesRegressor(ForestRegressor):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees.
 
@@ -1419,6 +1441,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
+                 beta=0.,
                  bootstrap=False,
                  oob_score=False,
                  n_jobs=1,
@@ -1430,7 +1453,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
+                              "max_features", "max_leaf_nodes", "beta",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -1446,7 +1469,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
-
+        self.beta = beta
 
 class RandomTreesEmbedding(BaseForest):
     """An ensemble of totally random trees.
@@ -1500,6 +1523,10 @@ class RandomTreesEmbedding(BaseForest):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     sparse_output : bool, optional (default=True)
         Whether or not to return a sparse CSR matrix, as default behavior,
         or to return a dense array compatible with dense pipeline operators.
@@ -1544,6 +1571,7 @@ def __init__(self,
                  min_samples_leaf=1,
                  min_weight_fraction_leaf=0.,
                  max_leaf_nodes=None,
+                 beta=0.,
                  sparse_output=True,
                  n_jobs=1,
                  random_state=None,
@@ -1554,7 +1582,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
+                              "max_features", "max_leaf_nodes", "beta",
                               "random_state"),
             bootstrap=False,
             oob_score=False,
@@ -1570,6 +1598,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = 1
         self.max_leaf_nodes = max_leaf_nodes
+        self.beta = beta
         self.sparse_output = sparse_output
 
     def _set_oob_score(self, X, y):

From 796fa8a06abab5be17b31fc9c215e43cdd833c16 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 18:44:36 -0700
Subject: [PATCH 06/16] feature: add separate condition to determine that beta
 is float

---
 sklearn/tree/tests/test_tree.py | 17 ++++++-----------
 sklearn/tree/tree.py            |  8 +++++---
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 07a833b13168e..bffc97d7069a8 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -441,10 +441,6 @@ def test_max_features():
         clf.fit(iris.data, iris.target)
         assert_equal(clf.max_features_, 2)
 
-        # use values of beta that are invalid for classification
-        clf = TreeClassifier(beta=2.0)
-        assert_raises(ValueError, clf.fit, X, y)
-
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_features="sqrt")
         est.fit(iris.data, iris.target)
@@ -497,13 +493,6 @@ def test_max_features():
         est = TreeEstimator(max_features="foobar")
         assert_raises(ValueError, est.fit, X, y)
 
-        # use values of beta that are invalid
-        clf = TreeClassifier(beta=-1.0)
-        assert_raises(ValueError, clf.fit, X, y)
-
-        clf = TreeClassifier(beta="foobar")
-        assert_raises(ValueError, clf.fit, X, y)
-
 
 def test_error():
     # Test that it gives proper exception on deficient input.
@@ -516,6 +505,10 @@ def test_error():
         X2 = [[-2, -1, 1]]  # wrong feature shape for sample
         assert_raises(ValueError, est.predict_proba, X2)
 
+        # invalid type for beta parameter in classification
+        est = TreeEstimator(beta=2.0)
+        assert_raises(ValueError, est.fit, X, y)
+
     for name, TreeEstimator in ALL_TREES.items():
         # Invalid values for parameters
         assert_raises(ValueError, TreeEstimator(min_samples_leaf=-1).fit, X, y)
@@ -535,6 +528,8 @@ def test_error():
                       X, y)
         assert_raises(ValueError, TreeEstimator(max_depth=-1).fit, X, y)
         assert_raises(ValueError, TreeEstimator(max_features=42).fit, X, y)
+        assert_raises(ValueError, TreeEstimator(beta=-1.0).fit, X, y)
+        assert_raises(ValueError, TreeEstimator(beta="foobar").fit, X, y)
 
         # Wrong dimensions
         est = TreeEstimator()
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index e92786a0b2c95..83d5dba95a9c1 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -308,14 +308,16 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         else:
             min_weight_leaf = 0.
 
+        if not isinstance(beta, float):
+            raise ValueError("beta must be a float")
         if is_classification:
-            if not 0. <= beta <= 1:
-                raise ValueError("beta must be in range [0,1] "
+            if not 0. <= beta <= 1.:
+                raise ValueError("beta must be in range [0., 1.] "
                                  "in classification")
         else:
             if not 0. <= beta:
                 raise ValueError("beta must be greater than or equal "
-                                 "to 0 in regression")
+                                 "to 0. in regression")
 
         presort = self.presort
         # Allow presort to be 'auto', which means True if the dataset is dense,

From cdd8dfdbbcaeac25567d4edb548c9db88a8bd971 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 3 Jul 2016 19:04:07 -0700
Subject: [PATCH 07/16] feature: add beta to gradient boosting estimators

---
 sklearn/ensemble/gradient_boosting.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 1b0767d419168..19b5145dbf10d 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -722,7 +722,7 @@ class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble,
     @abstractmethod
     def __init__(self, loss, learning_rate, n_estimators, criterion,
                  min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
-                 max_depth, init, subsample, max_features,
+                 max_depth, beta, init, subsample, max_features,
                  random_state, alpha=0.9, verbose=0, max_leaf_nodes=None,
                  warm_start=False, presort='auto'):
 
@@ -736,6 +736,7 @@ def __init__(self, loss, learning_rate, n_estimators, criterion,
         self.subsample = subsample
         self.max_features = max_features
         self.max_depth = max_depth
+        self.beta = beta
         self.init = init
         self.random_state = random_state
         self.alpha = alpha
@@ -1358,6 +1359,10 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     init : BaseEstimator, None, optional (default=None)
         An estimator object that is used to compute the initial
         predictions. ``init`` has to provide ``fit`` and ``predict``.
@@ -1437,7 +1442,7 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
     def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                  min_samples_leaf=1, min_weight_fraction_leaf=0.,
-                 max_depth=3, init=None, random_state=None,
+                 max_depth=3, beta=0.,init=None, random_state=None,
                  max_features=None, verbose=0,
                  max_leaf_nodes=None, warm_start=False,
                  presort='auto'):
@@ -1450,7 +1455,9 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
             max_depth=max_depth, init=init, subsample=subsample,
             max_features=max_features,
             random_state=random_state, verbose=verbose,
-            max_leaf_nodes=max_leaf_nodes, warm_start=warm_start,
+            max_leaf_nodes=max_leaf_nodes,
+            beta=beta,
+            warm_start=warm_start,
             presort=presort)
 
     def _validate_y(self, y):
@@ -1711,6 +1718,10 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
         Best nodes are defined as relative reduction in impurity.
         If None then unlimited number of leaf nodes.
 
+    beta : float, optional (default=0.)
+        Threshold for early stopping in tree growth. If the impurity
+        of a node is below the threshold, the node is a leaf.
+
     alpha : float (default=0.9)
         The alpha-quantile of the huber loss function and the quantile
         loss function. Only if ``loss='huber'`` or ``loss='quantile'``.
@@ -1791,7 +1802,7 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
     def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                  min_samples_leaf=1, min_weight_fraction_leaf=0.,
-                 max_depth=3, init=None, random_state=None,
+                 max_depth=3, beta=0., init=None, random_state=None,
                  max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,
                  warm_start=False, presort='auto'):
 
@@ -1801,7 +1812,7 @@ def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
             min_samples_leaf=min_samples_leaf,
             min_weight_fraction_leaf=min_weight_fraction_leaf,
             max_depth=max_depth, init=init, subsample=subsample,
-            max_features=max_features,
+            max_features=max_features, beta=beta,
             random_state=random_state, alpha=alpha, verbose=verbose,
             max_leaf_nodes=max_leaf_nodes, warm_start=warm_start,
             presort=presort)

From 68f2d6c450f99d4da47a1d36ff0cac417ac1da7b Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Mon, 4 Jul 2016 09:00:12 -0700
Subject: [PATCH 08/16] rename parameter to min_impurity_split, edit input
 validation and associated tests

---
 sklearn/ensemble/forest.py            | 60 +++++++++++++--------------
 sklearn/ensemble/gradient_boosting.py | 24 +++++------
 sklearn/tree/_tree.pxd                |  2 +-
 sklearn/tree/_tree.pyx                | 16 +++----
 sklearn/tree/tests/test_tree.py       | 41 ++++++++----------
 sklearn/tree/tree.py                  | 46 +++++++++-----------
 6 files changed, 88 insertions(+), 101 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index a096d39999967..3c030669356ed 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -805,9 +805,9 @@ class RandomForestClassifier(ForestClassifier):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     bootstrap : boolean, optional (default=True)
         Whether bootstrap samples are used when building trees.
@@ -903,7 +903,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  bootstrap=True,
                  oob_score=False,
                  n_jobs=1,
@@ -916,7 +916,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes", "beta",
+                              "max_features", "max_leaf_nodes", "min_impurity_split",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -933,7 +933,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
 
 
 class RandomForestRegressor(ForestRegressor):
@@ -1007,9 +1007,9 @@ class RandomForestRegressor(ForestRegressor):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     bootstrap : boolean, optional (default=True)
         Whether bootstrap samples are used when building trees.
@@ -1074,7 +1074,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  bootstrap=True,
                  oob_score=False,
                  n_jobs=1,
@@ -1086,7 +1086,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes", "beta",
+                              "max_features", "max_leaf_nodes", "min_impurity_split",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -1102,7 +1102,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
 
 
 class ExtraTreesClassifier(ForestClassifier):
@@ -1172,9 +1172,9 @@ class ExtraTreesClassifier(ForestClassifier):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees.
@@ -1271,7 +1271,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  bootstrap=False,
                  oob_score=False,
                  n_jobs=1,
@@ -1284,7 +1284,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes", "beta",
+                              "max_features", "max_leaf_nodes", "min_impurity_split",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -1301,7 +1301,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
 
 
 class ExtraTreesRegressor(ForestRegressor):
@@ -1373,9 +1373,9 @@ class ExtraTreesRegressor(ForestRegressor):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees.
@@ -1441,7 +1441,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  bootstrap=False,
                  oob_score=False,
                  n_jobs=1,
@@ -1453,7 +1453,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes", "beta",
+                              "max_features", "max_leaf_nodes", "min_impurity_split",
                               "random_state"),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -1469,7 +1469,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
 
 class RandomTreesEmbedding(BaseForest):
     """An ensemble of totally random trees.
@@ -1523,9 +1523,9 @@ class RandomTreesEmbedding(BaseForest):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     sparse_output : bool, optional (default=True)
         Whether or not to return a sparse CSR matrix, as default behavior,
@@ -1571,7 +1571,7 @@ def __init__(self,
                  min_samples_leaf=1,
                  min_weight_fraction_leaf=0.,
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  sparse_output=True,
                  n_jobs=1,
                  random_state=None,
@@ -1582,7 +1582,7 @@ def __init__(self,
             n_estimators=n_estimators,
             estimator_params=("criterion", "max_depth", "min_samples_split",
                               "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes", "beta",
+                              "max_features", "max_leaf_nodes", "min_impurity_split",
                               "random_state"),
             bootstrap=False,
             oob_score=False,
@@ -1598,7 +1598,7 @@ def __init__(self,
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = 1
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
         self.sparse_output = sparse_output
 
     def _set_oob_score(self, X, y):
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 19b5145dbf10d..a5ed725e05e10 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -722,7 +722,7 @@ class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble,
     @abstractmethod
     def __init__(self, loss, learning_rate, n_estimators, criterion,
                  min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
-                 max_depth, beta, init, subsample, max_features,
+                 max_depth, min_impurity_split, init, subsample, max_features,
                  random_state, alpha=0.9, verbose=0, max_leaf_nodes=None,
                  warm_start=False, presort='auto'):
 
@@ -736,7 +736,7 @@ def __init__(self, loss, learning_rate, n_estimators, criterion,
         self.subsample = subsample
         self.max_features = max_features
         self.max_depth = max_depth
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
         self.init = init
         self.random_state = random_state
         self.alpha = alpha
@@ -1359,9 +1359,9 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     init : BaseEstimator, None, optional (default=None)
         An estimator object that is used to compute the initial
@@ -1442,7 +1442,7 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
     def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                  min_samples_leaf=1, min_weight_fraction_leaf=0.,
-                 max_depth=3, beta=0.,init=None, random_state=None,
+                 max_depth=3, min_impurity_split=0.,init=None, random_state=None,
                  max_features=None, verbose=0,
                  max_leaf_nodes=None, warm_start=False,
                  presort='auto'):
@@ -1456,7 +1456,7 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
             max_features=max_features,
             random_state=random_state, verbose=verbose,
             max_leaf_nodes=max_leaf_nodes,
-            beta=beta,
+            min_impurity_split=min_impurity_split,
             warm_start=warm_start,
             presort=presort)
 
@@ -1718,9 +1718,9 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
         Best nodes are defined as relative reduction in impurity.
         If None then unlimited number of leaf nodes.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     alpha : float (default=0.9)
         The alpha-quantile of the huber loss function and the quantile
@@ -1802,7 +1802,7 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
     def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                  min_samples_leaf=1, min_weight_fraction_leaf=0.,
-                 max_depth=3, beta=0., init=None, random_state=None,
+                 max_depth=3, min_impurity_split=0., init=None, random_state=None,
                  max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,
                  warm_start=False, presort='auto'):
 
@@ -1812,7 +1812,7 @@ def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
             min_samples_leaf=min_samples_leaf,
             min_weight_fraction_leaf=min_weight_fraction_leaf,
             max_depth=max_depth, init=init, subsample=subsample,
-            max_features=max_features, beta=beta,
+            max_features=max_features, min_impurity_split=min_impurity_split,
             random_state=random_state, alpha=alpha, verbose=verbose,
             max_leaf_nodes=max_leaf_nodes, warm_start=warm_start,
             presort=presort)
diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 0db91ed2c87eb..dbf0545b1e1d5 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -96,7 +96,7 @@ cdef class TreeBuilder:
     cdef SIZE_t min_samples_leaf    # Minimum number of samples in a leaf
     cdef double min_weight_leaf     # Minimum weight in a leaf
     cdef SIZE_t max_depth           # Maximal tree depth
-    cdef double beta                # Impurity threshold for early stopping
+    cdef double min_impurity_split  # Impurity threshold for early stopping
 
     cpdef build(self, Tree tree, object X, np.ndarray y,
                 np.ndarray sample_weight=*,
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 9f97938521fe1..bf39bb928d2cf 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -132,13 +132,13 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
     def __cinit__(self, Splitter splitter, SIZE_t min_samples_split,
                   SIZE_t min_samples_leaf, double min_weight_leaf,
-                  SIZE_t max_depth, double beta):
+                  SIZE_t max_depth, double min_impurity_split):
         self.splitter = splitter
         self.min_samples_split = min_samples_split
         self.min_samples_leaf = min_samples_leaf
         self.min_weight_leaf = min_weight_leaf
         self.max_depth = max_depth
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
 
     cpdef build(self, Tree tree, object X, np.ndarray y,
                 np.ndarray sample_weight=None,
@@ -168,7 +168,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         cdef SIZE_t min_samples_leaf = self.min_samples_leaf
         cdef double min_weight_leaf = self.min_weight_leaf
         cdef SIZE_t min_samples_split = self.min_samples_split
-        cdef double beta = self.beta
+        cdef double min_impurity_split = self.min_impurity_split
 
         # Recursive partition (without actual recursion)
         splitter.init(X, y, sample_weight_ptr, X_idx_sorted)
@@ -228,7 +228,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
                 is_leaf = (is_leaf or
                            (impurity <= MIN_IMPURITY_SPLIT) or
-                           (impurity < beta))
+                           (impurity < min_impurity_split))
 
                 if not is_leaf:
                     splitter.node_split(impurity, &split, &n_constant_features)
@@ -295,14 +295,14 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
     def __cinit__(self, Splitter splitter, SIZE_t min_samples_split,
                   SIZE_t min_samples_leaf,  min_weight_leaf,
                   SIZE_t max_depth, SIZE_t max_leaf_nodes,
-                  double beta):
+                  double min_impurity_split):
         self.splitter = splitter
         self.min_samples_split = min_samples_split
         self.min_samples_leaf = min_samples_leaf
         self.min_weight_leaf = min_weight_leaf
         self.max_depth = max_depth
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
 
     cpdef build(self, Tree tree, object X, np.ndarray y,
                 np.ndarray sample_weight=None,
@@ -428,7 +428,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         cdef SIZE_t n_node_samples
         cdef SIZE_t n_constant_features = 0
         cdef double weighted_n_samples = splitter.weighted_n_samples
-        cdef double beta = self.beta
+        cdef double min_impurity_split = self.min_impurity_split
         cdef double weighted_n_node_samples
         cdef bint is_leaf
         cdef SIZE_t n_left, n_right
@@ -445,7 +445,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                    (n_node_samples < 2 * self.min_samples_leaf) or
                    (weighted_n_node_samples < self.min_weight_leaf) or
                    (impurity <= MIN_IMPURITY_SPLIT) or
-                   (impurity < beta))
+                   (impurity < min_impurity_split))
 
         if not is_leaf:
             splitter.node_split(impurity, &split, &n_constant_features)
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index bffc97d7069a8..02132290e54fc 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -505,10 +505,6 @@ def test_error():
         X2 = [[-2, -1, 1]]  # wrong feature shape for sample
         assert_raises(ValueError, est.predict_proba, X2)
 
-        # invalid type for beta parameter in classification
-        est = TreeEstimator(beta=2.0)
-        assert_raises(ValueError, est.fit, X, y)
-
     for name, TreeEstimator in ALL_TREES.items():
         # Invalid values for parameters
         assert_raises(ValueError, TreeEstimator(min_samples_leaf=-1).fit, X, y)
@@ -528,8 +524,7 @@ def test_error():
                       X, y)
         assert_raises(ValueError, TreeEstimator(max_depth=-1).fit, X, y)
         assert_raises(ValueError, TreeEstimator(max_features=42).fit, X, y)
-        assert_raises(ValueError, TreeEstimator(beta=-1.0).fit, X, y)
-        assert_raises(ValueError, TreeEstimator(beta="foobar").fit, X, y)
+        assert_raises(ValueError, TreeEstimator(min_impurity_split=-1.0).fit, X, y)
 
         # Wrong dimensions
         est = TreeEstimator()
@@ -687,8 +682,8 @@ def test_min_weight_fraction_leaf():
         yield check_min_weight_fraction_leaf, name, "multilabel", True
 
 
-def test_beta():
-    # Test if beta creates leaves with impurity [0, beta) when
+def test_min_impurity_split():
+    # Test if min_impurity_split creates leaves with impurity [0, min_impurity_split) when
     # min_samples_leaf = 1 and min_samples_split = 2.
     X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
     y = iris.target
@@ -697,30 +692,30 @@ def test_beta():
     # by setting max_leaf_nodes
     # we set max leaf nodes to a number greater than the total nodes
     # possible, thus ensuring that the leaves generated have impurity
-    # of 0 when there is no beta stopping used.
+    # of 0 when there is no min_impurity_split stopping used.
     for max_leaf_nodes, name in product((None, 1000), ALL_TREES.keys()):
         TreeEstimator = ALL_TREES[name]
-        beta = .5
+        min_impurity_split = .5
 
-        # verify leaf nodes without beta have impurity 0
+        # verify leaf nodes without min_impurity_split have impurity 0
         est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
-        assert_equal(est.beta, 0.,
-                     "Failed, beta = {0} != 0".format(
-                         est.beta))
+        assert_equal(est.min_impurity_split, 0.,
+                     "Failed, min_impurity_split = {0} != 0".format(
+                         est.min_impurity_split))
         est.fit(X, y)
         for node in range(est.tree_.node_count):
             if (est.tree_.children_left[node] == TREE_LEAF or
                 est.tree_.children_right[node] == TREE_LEAF):
                 assert_equal(est.tree_.impurity[node], 0.,
                              "Failed with {0} "
-                             "beta={1}".format(
+                             "min_impurity_split={1}".format(
                                  est.tree_.impurity[node],
-                                 est.beta))
+                                 est.min_impurity_split))
 
-        # verify leaf nodes have impurity [0,beta) when using beta
+        # verify leaf nodes have impurity [0,min_impurity_split) when using min_impurity_split
         est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
-                            beta=beta,
+                            min_impurity_split=min_impurity_split,
                             random_state=0)
         est.fit(X, y)
         for node in range(est.tree_.node_count):
@@ -728,14 +723,14 @@ def test_beta():
                 est.tree_.children_right[node] == TREE_LEAF):
                 assert_greater_equal(est.tree_.impurity[node], 0,
                                      "Failed with {0} "
-                                     "beta={1}".format(
+                                     "min_impurity_split={1}".format(
                                          est.tree_.impurity[node],
-                                         est.beta))
-                assert_less(est.tree_.impurity[node], beta,
+                                         est.min_impurity_split))
+                assert_less(est.tree_.impurity[node], min_impurity_split,
                             "Failed with {0} "
-                            "beta={1}".format(
+                            "min_impurity_split={1}".format(
                                 est.tree_.impurity[node],
-                                est.beta))
+                                est.min_impurity_split))
 
 
 def test_pickle():
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 83d5dba95a9c1..00d99421a82c7 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -90,7 +90,7 @@ def __init__(self,
                  max_features,
                  max_leaf_nodes,
                  random_state,
-                 beta,
+                 min_impurity_split,
                  class_weight=None,
                  presort=False):
         self.criterion = criterion
@@ -102,7 +102,7 @@ def __init__(self,
         self.max_features = max_features
         self.random_state = random_state
         self.max_leaf_nodes = max_leaf_nodes
-        self.beta = beta
+        self.min_impurity_split = min_impurity_split
         self.class_weight = class_weight
         self.presort = presort
 
@@ -154,7 +154,6 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         """
 
         random_state = check_random_state(self.random_state)
-        beta = self.beta
         if check_input:
             X = check_array(X, dtype=DTYPE, accept_sparse="csc")
             y = check_array(y, ensure_2d=False, dtype=None)
@@ -308,16 +307,9 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         else:
             min_weight_leaf = 0.
 
-        if not isinstance(beta, float):
-            raise ValueError("beta must be a float")
-        if is_classification:
-            if not 0. <= beta <= 1.:
-                raise ValueError("beta must be in range [0., 1.] "
-                                 "in classification")
-        else:
-            if not 0. <= beta:
-                raise ValueError("beta must be greater than or equal "
-                                 "to 0. in regression")
+        if not 0. <= self.min_impurity_split:
+            raise ValueError("min_impurity_split must be greater than or equal "
+                             "to 0")
 
         presort = self.presort
         # Allow presort to be 'auto', which means True if the dataset is dense,
@@ -374,13 +366,13 @@ def fit(self, X, y, sample_weight=None, check_input=True,
             builder = DepthFirstTreeBuilder(splitter, min_samples_split,
                                             min_samples_leaf,
                                             min_weight_leaf,
-                                            max_depth, beta)
+                                            max_depth, self.min_impurity_split)
         else:
             builder = BestFirstTreeBuilder(splitter, min_samples_split,
                                            min_samples_leaf,
                                            min_weight_leaf,
                                            max_depth,
-                                           max_leaf_nodes, beta)
+                                           max_leaf_nodes, self.min_impurity_split)
 
         builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
 
@@ -623,9 +615,9 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    beta : float, optional (default=0.)
-        Threshold for early stopping in tree growth. If the impurity
-        of a node is below the threshold, the node is a leaf.
+    min_impurity_split : float, optional (default=0.)
+        Threshold for early stopping in tree growth. A node will split
+        if its impurity is above the threshold, otherwise it is a leaf.
 
     presort : bool, optional (default=False)
         Whether to presort the data to speed up the finding of best splits in
@@ -704,7 +696,7 @@ def __init__(self,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  class_weight=None,
                  presort=False):
         super(DecisionTreeClassifier, self).__init__(
@@ -718,7 +710,7 @@ def __init__(self,
             max_leaf_nodes=max_leaf_nodes,
             class_weight=class_weight,
             random_state=random_state,
-            beta=beta,
+            min_impurity_split=min_impurity_split,
             presort=presort)
 
     def predict_proba(self, X, check_input=True):
@@ -869,7 +861,7 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    beta : float, optional (default=0.)
+    min_impurity_split : float, optional (default=0.)
         Threshold for early stopping in tree growth. If the impurity
         of a node is below the threshold, the node is a leaf.
 
@@ -942,7 +934,7 @@ def __init__(self,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  presort=False):
         super(DecisionTreeRegressor, self).__init__(
             criterion=criterion,
@@ -954,7 +946,7 @@ def __init__(self,
             max_features=max_features,
             max_leaf_nodes=max_leaf_nodes,
             random_state=random_state,
-            beta=beta,
+            min_impurity_split=min_impurity_split,
             presort=presort)
 
 
@@ -992,7 +984,7 @@ def __init__(self,
                  max_features="auto",
                  random_state=None,
                  max_leaf_nodes=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  class_weight=None):
         super(ExtraTreeClassifier, self).__init__(
             criterion=criterion,
@@ -1004,7 +996,7 @@ def __init__(self,
             max_features=max_features,
             max_leaf_nodes=max_leaf_nodes,
             class_weight=class_weight,
-            beta=beta,
+            min_impurity_split=min_impurity_split,
             random_state=random_state)
 
 
@@ -1041,7 +1033,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  random_state=None,
-                 beta=0.,
+                 min_impurity_split=0.,
                  max_leaf_nodes=None):
         super(ExtraTreeRegressor, self).__init__(
             criterion=criterion,
@@ -1052,5 +1044,5 @@ def __init__(self,
             min_weight_fraction_leaf=min_weight_fraction_leaf,
             max_features=max_features,
             max_leaf_nodes=max_leaf_nodes,
-            beta=beta,
+            min_impurity_split=min_impurity_split,
             random_state=random_state)

From 346eac932337bcc4751717fb94e17abf941798b8 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Mon, 4 Jul 2016 09:30:03 -0700
Subject: [PATCH 09/16] chore: fix spacing in forest and force recompilation of
 grad boosting extension

---
 sklearn/ensemble/_gradient_boosting.pyx | 2 +-
 sklearn/ensemble/forest.py              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx
index 9e6e9f6d29c0e..d268856e32049 100644
--- a/sklearn/ensemble/_gradient_boosting.pyx
+++ b/sklearn/ensemble/_gradient_boosting.pyx
@@ -35,7 +35,7 @@ ctypedef np.npy_intp SIZE_t
 
 # constant to mark tree leafs
 cdef int LEAF = -1
-
+# trivial comment to force recompilation
 cdef void _predict_regression_tree_inplace_fast(DTYPE_t *X,
                                                 Node* root_node,
                                                 double *value,
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 3c030669356ed..4208a4201838a 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -1471,6 +1471,7 @@ def __init__(self,
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_split = min_impurity_split
 
+
 class RandomTreesEmbedding(BaseForest):
     """An ensemble of totally random trees.
 

From 1a5fae5fd08465f7ba3d0e3b26ee89ad22026dd5 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Mon, 4 Jul 2016 09:57:02 -0700
Subject: [PATCH 10/16] remove trivial comment in grad boost and add whats new

---
 doc/whats_new.rst                       | 3 +++
 sklearn/ensemble/_gradient_boosting.pyx | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index c47f4274f4dd8..e5ef76d388bc0 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -125,6 +125,9 @@ New features
      <https://github.com/scikit-learn/scikit-learn/pull/6667>`_) by `Nelson
      Liu`_.
 
+     - Added weighted impurity-based early stopping criterion for decision tree growth.
+     (`#6954 <https://github.com/scikit-learn/scikit-learn/pull/6954>`_) by `Nelson Liu`_
+
 Enhancements
 ............
 
diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx
index d268856e32049..9e6e9f6d29c0e 100644
--- a/sklearn/ensemble/_gradient_boosting.pyx
+++ b/sklearn/ensemble/_gradient_boosting.pyx
@@ -35,7 +35,7 @@ ctypedef np.npy_intp SIZE_t
 
 # constant to mark tree leafs
 cdef int LEAF = -1
-# trivial comment to force recompilation
+
 cdef void _predict_regression_tree_inplace_fast(DTYPE_t *X,
                                                 Node* root_node,
                                                 double *value,

From f85e74a653cd3c1f86d48490581ff1529cc76eda Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Mon, 4 Jul 2016 12:42:36 -0700
Subject: [PATCH 11/16] edit wording in test comment / rebuild

---
 sklearn/tree/tests/test_tree.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 02132290e54fc..dc3aae1b87f74 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -683,16 +683,14 @@ def test_min_weight_fraction_leaf():
 
 
 def test_min_impurity_split():
-    # Test if min_impurity_split creates leaves with impurity [0, min_impurity_split) when
-    # min_samples_leaf = 1 and min_samples_split = 2.
+    # test if min_impurity_split creates leaves with impurity
+    # [0, min_impurity_split) when min_samples_leaf = 1 and
+    # min_samples_split = 2.
     X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
     y = iris.target
 
     # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
     # by setting max_leaf_nodes
-    # we set max leaf nodes to a number greater than the total nodes
-    # possible, thus ensuring that the leaves generated have impurity
-    # of 0 when there is no min_impurity_split stopping used.
     for max_leaf_nodes, name in product((None, 1000), ALL_TREES.keys()):
         TreeEstimator = ALL_TREES[name]
         min_impurity_split = .5

From 15a2951fdc5a94ec434aa286029b360128ab5f57 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Sun, 17 Jul 2016 18:52:14 -0500
Subject: [PATCH 12/16] rename constant with the same name as our parameter

---
 sklearn/tree/_tree.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index bf39bb928d2cf..7beebe88d3a6a 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -64,7 +64,7 @@ TREE_UNDEFINED = -2
 cdef SIZE_t _TREE_LEAF = TREE_LEAF
 cdef SIZE_t _TREE_UNDEFINED = TREE_UNDEFINED
 cdef SIZE_t INITIAL_STACK_SIZE = 10
-cdef DTYPE_t MIN_IMPURITY_SPLIT = 1e-7
+cdef DTYPE_t LEAF_MIN_IMPURITY = 1e-7
 
 # Repeat struct definition for numpy
 NODE_DTYPE = np.dtype({
@@ -227,7 +227,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                     first = 0
 
                 is_leaf = (is_leaf or
-                           (impurity <= MIN_IMPURITY_SPLIT) or
+                           (impurity <= LEAF_MIN_IMPURITY) or
                            (impurity < min_impurity_split))
 
                 if not is_leaf:
@@ -444,7 +444,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                    (n_node_samples < self.min_samples_split) or
                    (n_node_samples < 2 * self.min_samples_leaf) or
                    (weighted_n_node_samples < self.min_weight_leaf) or
-                   (impurity <= MIN_IMPURITY_SPLIT) or
+                   (impurity <= LEAF_MIN_IMPURITY) or
                    (impurity < min_impurity_split))
 
         if not is_leaf:

From 7ff2aaac1b7a5bbb6d050cf3e592ce95cf75af7f Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Mon, 25 Jul 2016 11:32:16 -0700
Subject: [PATCH 13/16] edit line length for what's new

---
 doc/whats_new.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index e5ef76d388bc0..7b9c90f8afa1f 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -125,8 +125,10 @@ New features
      <https://github.com/scikit-learn/scikit-learn/pull/6667>`_) by `Nelson
      Liu`_.
 
-     - Added weighted impurity-based early stopping criterion for decision tree growth.
-     (`#6954 <https://github.com/scikit-learn/scikit-learn/pull/6954>`_) by `Nelson Liu`_
+     - Added weighted impurity-based early stopping criterion for decision tree
+       growth. (`#6954
+       <https://github.com/scikit-learn/scikit-learn/pull/6954>`_) by `Nelson
+       Liu`_
 
 Enhancements
 ............

From 838bad6809f0974e43d7bfec68eb6df2a58e5ed0 Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Tue, 26 Jul 2016 09:15:27 -0700
Subject: [PATCH 14/16] remove constant and set min_impurity_split to 1e-7 by
 default

---
 sklearn/tree/_tree.pyx          |  7 ++-----
 sklearn/tree/tests/test_tree.py | 21 +++++++++++----------
 sklearn/tree/tree.py            | 12 ++++++------
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 7beebe88d3a6a..f3db4a197580a 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -64,7 +64,6 @@ TREE_UNDEFINED = -2
 cdef SIZE_t _TREE_LEAF = TREE_LEAF
 cdef SIZE_t _TREE_UNDEFINED = TREE_UNDEFINED
 cdef SIZE_t INITIAL_STACK_SIZE = 10
-cdef DTYPE_t LEAF_MIN_IMPURITY = 1e-7
 
 # Repeat struct definition for numpy
 NODE_DTYPE = np.dtype({
@@ -227,8 +226,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                     first = 0
 
                 is_leaf = (is_leaf or
-                           (impurity <= LEAF_MIN_IMPURITY) or
-                           (impurity < min_impurity_split))
+                           (impurity <= min_impurity_split))
 
                 if not is_leaf:
                     splitter.node_split(impurity, &split, &n_constant_features)
@@ -444,8 +442,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                    (n_node_samples < self.min_samples_split) or
                    (n_node_samples < 2 * self.min_samples_leaf) or
                    (weighted_n_node_samples < self.min_weight_leaf) or
-                   (impurity <= LEAF_MIN_IMPURITY) or
-                   (impurity < min_impurity_split))
+                   (impurity <= min_impurity_split))
 
         if not is_leaf:
             splitter.node_split(impurity, &split, &n_constant_features)
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index dc3aae1b87f74..231d12d539e5a 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -695,11 +695,12 @@ def test_min_impurity_split():
         TreeEstimator = ALL_TREES[name]
         min_impurity_split = .5
 
-        # verify leaf nodes without min_impurity_split have impurity 0
+        # verify leaf nodes without min_impurity_split less than
+        # impurity 1e-7
         est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
-        assert_equal(est.min_impurity_split, 0.,
-                     "Failed, min_impurity_split = {0} != 0".format(
+        assert_less_equal(est.min_impurity_split, 1e-7,
+                     "Failed, min_impurity_split = {0} > 1e-7".format(
                          est.min_impurity_split))
         est.fit(X, y)
         for node in range(est.tree_.node_count):
@@ -711,7 +712,7 @@ def test_min_impurity_split():
                                  est.tree_.impurity[node],
                                  est.min_impurity_split))
 
-        # verify leaf nodes have impurity [0,min_impurity_split) when using min_impurity_split
+        # verify leaf nodes have impurity [0,min_impurity_split] when using min_impurity_split
         est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                             min_impurity_split=min_impurity_split,
                             random_state=0)
@@ -720,15 +721,15 @@ def test_min_impurity_split():
             if (est.tree_.children_left[node] == TREE_LEAF or
                 est.tree_.children_right[node] == TREE_LEAF):
                 assert_greater_equal(est.tree_.impurity[node], 0,
-                                     "Failed with {0} "
+                                     "Failed with {0}, "
                                      "min_impurity_split={1}".format(
                                          est.tree_.impurity[node],
                                          est.min_impurity_split))
-                assert_less(est.tree_.impurity[node], min_impurity_split,
-                            "Failed with {0} "
-                            "min_impurity_split={1}".format(
-                                est.tree_.impurity[node],
-                                est.min_impurity_split))
+                assert_less_equal(est.tree_.impurity[node], min_impurity_split,
+                                  "Failed with {0}, "
+                                  "min_impurity_split={1}".format(
+                                      est.tree_.impurity[node],
+                                      est.min_impurity_split))
 
 
 def test_pickle():
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 00d99421a82c7..907d46dce3ce3 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -307,7 +307,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         else:
             min_weight_leaf = 0.
 
-        if not 0. <= self.min_impurity_split:
+        if self.min_impurity_split < 0.:
             raise ValueError("min_impurity_split must be greater than or equal "
                              "to 0")
 
@@ -696,7 +696,7 @@ def __init__(self,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  class_weight=None,
                  presort=False):
         super(DecisionTreeClassifier, self).__init__(
@@ -861,7 +861,7 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. If the impurity
         of a node is below the threshold, the node is a leaf.
 
@@ -934,7 +934,7 @@ def __init__(self,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  presort=False):
         super(DecisionTreeRegressor, self).__init__(
             criterion=criterion,
@@ -984,7 +984,7 @@ def __init__(self,
                  max_features="auto",
                  random_state=None,
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  class_weight=None):
         super(ExtraTreeClassifier, self).__init__(
             criterion=criterion,
@@ -1033,7 +1033,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  random_state=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  max_leaf_nodes=None):
         super(ExtraTreeRegressor, self).__init__(
             criterion=criterion,

From b15f10256ae341ce042ab0d00e259bcc3fa508ce Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Tue, 26 Jul 2016 09:23:37 -0700
Subject: [PATCH 15/16] fix docstrings for new default

---
 sklearn/ensemble/forest.py            | 10 +++++-----
 sklearn/ensemble/gradient_boosting.py |  4 ++--
 sklearn/tree/tree.py                  |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 4208a4201838a..71464e1ca1864 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -805,7 +805,7 @@ class RandomForestClassifier(ForestClassifier):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
@@ -1007,7 +1007,7 @@ class RandomForestRegressor(ForestRegressor):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
@@ -1172,7 +1172,7 @@ class ExtraTreesClassifier(ForestClassifier):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
@@ -1373,7 +1373,7 @@ class ExtraTreesRegressor(ForestRegressor):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
@@ -1524,7 +1524,7 @@ class RandomTreesEmbedding(BaseForest):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index a5ed725e05e10..a37f047088687 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1359,7 +1359,7 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
         If None then unlimited number of leaf nodes.
         If not None then ``max_depth`` will be ignored.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
@@ -1718,7 +1718,7 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
         Best nodes are defined as relative reduction in impurity.
         If None then unlimited number of leaf nodes.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 907d46dce3ce3..c1aa0b3ab2578 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -615,7 +615,7 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    min_impurity_split : float, optional (default=0.)
+    min_impurity_split : float, optional (default=1e-7)
         Threshold for early stopping in tree growth. A node will split
         if its impurity is above the threshold, otherwise it is a leaf.
 

From 9fce4fc11b6dc91ddad16f159f26d321f9c3e6fb Mon Sep 17 00:00:00 2001
From: Nelson Liu <nelson.liu.2009@gmail.com>
Date: Wed, 27 Jul 2016 08:33:28 -0700
Subject: [PATCH 16/16] fix defaults in gradientboosting and forest classes

---
 sklearn/ensemble/forest.py            | 10 +++++-----
 sklearn/ensemble/gradient_boosting.py |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 71464e1ca1864..1002c5967834e 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -903,7 +903,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  bootstrap=True,
                  oob_score=False,
                  n_jobs=1,
@@ -1074,7 +1074,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  bootstrap=True,
                  oob_score=False,
                  n_jobs=1,
@@ -1271,7 +1271,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  bootstrap=False,
                  oob_score=False,
                  n_jobs=1,
@@ -1441,7 +1441,7 @@ def __init__(self,
                  min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  bootstrap=False,
                  oob_score=False,
                  n_jobs=1,
@@ -1572,7 +1572,7 @@ def __init__(self,
                  min_samples_leaf=1,
                  min_weight_fraction_leaf=0.,
                  max_leaf_nodes=None,
-                 min_impurity_split=0.,
+                 min_impurity_split=1e-7,
                  sparse_output=True,
                  n_jobs=1,
                  random_state=None,
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index a37f047088687..eca5d3697fbe0 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1442,8 +1442,8 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
     def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                  min_samples_leaf=1, min_weight_fraction_leaf=0.,
-                 max_depth=3, min_impurity_split=0.,init=None, random_state=None,
-                 max_features=None, verbose=0,
+                 max_depth=3, min_impurity_split=1e-7, init=None,
+                 random_state=None, max_features=None, verbose=0,
                  max_leaf_nodes=None, warm_start=False,
                  presort='auto'):
 
@@ -1802,7 +1802,7 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
     def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                  min_samples_leaf=1, min_weight_fraction_leaf=0.,
-                 max_depth=3, min_impurity_split=0., init=None, random_state=None,
+                 max_depth=3, min_impurity_split=1e-7, init=None, random_state=None,
                  max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,
                  warm_start=False, presort='auto'):