scikit-learn · lorentzenchr · Nov 4, 2020 · Nov 2, 2020 · Nov 2, 2020 · Nov 4, 2020
diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx
@@ -34,6 +34,8 @@ from ._utils cimport safe_realloc
 from ._utils cimport sizet_ptr_to_ndarray
 from ._utils cimport WeightedMedianCalculator
 
+# EPSILON is used in the Poisson criterion
+cdef double EPSILON = 10 * np.finfo('double').eps
 
 cdef class Criterion:
     """Interface for impurity criteria.
@@ -1384,10 +1386,13 @@ cdef class Poisson(RegressionCriterion):
         cdef double y_mean_right = 0.
 
         for k in range(self.n_outputs):
-            if (self.sum_left[k] <= 0) or (self.sum_right[k] <= 0):
+            if (self.sum_left[k] <= EPSILON) or (self.sum_right[k] <= EPSILON):
                 # Poisson loss does not allow non-positive predictions. We
                 # therefore forbid splits that have child nodes with
                 # sum(y_i) <= 0.
+                # Since sum_right = sum_total - sum_left, it can lead to
+                # floating point rounding error and will not give zero. Thus,
+                # we relax the above comparison to sum(y_i) <= EPSILON.
                 return -INFINITY
             else:
                 y_mean_left = self.sum_left[k] / self.weighted_n_left
@@ -1436,11 +1441,16 @@ cdef class Poisson(RegressionCriterion):
         cdef SIZE_t n_outputs = self.n_outputs
 
         for k in range(n_outputs):
-            y_mean = y_sum[k] / weight_sum
-
-            if y_mean <= 0:
+            if y_sum[k] <= EPSILON:
+                # y_sum could be computed from the subtraction
+                # sum_right = sum_total - sum_left leading to a potential
+                # floating point rounding error.
+                # Thus, we relax the comparison y_sum <= 0 to
+                # y_sum <= EPSILON.
                 return INFINITY
 
+            y_mean = y_sum[k] / weight_sum
+
             for p in range(start, end):
                 i = self.samples[p]
 

diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
@@ -1987,18 +1987,19 @@ def test_balance_property(criterion, Tree):
     assert np.sum(reg.predict(X)) == pytest.approx(np.sum(y))
 
 
-def test_poisson_zero_nodes():
+@pytest.mark.parametrize("seed", range(3))
+def test_poisson_zero_nodes(seed):
     # Test that sum(y)=0 and therefore y_pred=0 is forbidden on nodes.
     X = [[0, 0], [0, 1], [0, 2], [0, 3],
          [1, 0], [1, 2], [1, 2], [1, 3]]
     y = [0, 0, 0, 0, 1, 2, 3, 4]
     # Note that X[:, 0] == 0 is a 100% indicator for y == 0. The tree can
     # easily learn that:
-    reg = DecisionTreeRegressor(criterion="mse", random_state=1)
+    reg = DecisionTreeRegressor(criterion="mse", random_state=seed)
     reg.fit(X, y)
     assert np.amin(reg.predict(X)) == 0
     # whereas Poisson must predict strictly positive numbers
-    reg = DecisionTreeRegressor(criterion="poisson", random_state=1)
+    reg = DecisionTreeRegressor(criterion="poisson", random_state=seed)
     reg.fit(X, y)
     assert np.all(reg.predict(X) > 0)
 
@@ -2009,12 +2010,13 @@ def test_poisson_zero_nodes():
         n_samples=1_000,
         n_features=n_features,
         n_informative=n_features * 2 // 3,
+        random_state=seed,
     )
     # some excess zeros
     y[(-1 < y) & (y < 0)] = 0
     # make sure the target is positive
     y = np.abs(y)
-    reg = DecisionTreeRegressor(criterion='poisson', random_state=42)
+    reg = DecisionTreeRegressor(criterion='poisson', random_state=seed)
     reg.fit(X, y)
     assert np.all(reg.predict(X) > 0)