scikit-learn · matwey · Apr 15, 2020 · Apr 17, 2020 · Mar 19, 2020 · NicolasHug
diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
@@ -501,9 +501,7 @@ def _average_path_length(n_samples_leaf):
 
     average_path_length[mask_1] = 0.
     average_path_length[mask_2] = 1.
-    average_path_length[not_mask] = (
-        2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
-        - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]
-    )
+    average_path_length[not_mask] = 2.0 * (np.log(n_samples_leaf[not_mask])
+                                           + np.euler_gamma - 1.0)
-    average_path_length[not_mask] = 2.0 * (np.log(n_samples_leaf[not_mask])
-                                           + np.euler_gamma - 1.0)
+    average_path_length[not_mask] = 2 * (np.log(n_samples_leaf[not_mask])
+                                         + np.euler_gamma - 1)
-    average_path_length[not_mask] = 2.0 * (np.log(n_samples_leaf[not_mask])
-                                           + np.euler_gamma - 1.0)
+    average_path_length[not_mask] = 2 * (np.log(n_samples_leaf[not_mask])
+                                         + np.euler_gamma - 1)
 
     return average_path_length.reshape(n_samples_leaf_shape)
diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
@@ -235,19 +235,22 @@ def test_iforest_subsampled_features():
 
 
 def test_iforest_average_path_length():
+    def harmonic_humber(n):
+        return np.sum(1.0/np.arange(1, n+1))
     # It tests non-regression for #8549 which used the wrong formula
     # for average path length, strictly for the integer case
     # Updated to check average path length when input is <= 2 (issue #11839)
-    result_one = 2.0 * (np.log(4.0) + np.euler_gamma) - 2.0 * 4.0 / 5.0
-    result_two = 2.0 * (np.log(998.0) + np.euler_gamma) - 2.0 * 998.0 / 999.0
+    result_one = 2.0 * harmonic_humber(4.0) - 2.0 * 4.0 / 5.0
+    result_two = 2.0 * harmonic_humber(998.0) - 2.0 * 998.0 / 999.0
     assert_allclose(_average_path_length([0]), [0.0])
     assert_allclose(_average_path_length([1]), [0.0])
     assert_allclose(_average_path_length([2]), [1.0])
-    assert_allclose(_average_path_length([5]), [result_one])
-    assert_allclose(_average_path_length([999]), [result_two])
+    assert_allclose(_average_path_length([5]), [result_one], rtol=0.1)
+    assert_allclose(_average_path_length([999]), [result_two], rtol=1e-4)
     assert_allclose(
         _average_path_length(np.array([1, 2, 5, 999])),
         [0.0, 1.0, result_one, result_two],
+        rtol=0.1
     )
     # _average_path_length is increasing
     avg_path_length = _average_path_length(np.arange(5))
@@ -322,37 +325,3 @@ def test_iforest_deprecation():
     warn_msg = "'behaviour' is deprecated in 0.22 and will be removed in 0.24"
     with pytest.warns(FutureWarning, match=warn_msg):
         iforest.fit(iris.data)
-
-
-def test_iforest_with_uniform_data():
-    """Test whether iforest predicts inliers when using uniform data"""
-
-    # 2-d array of all 1s
-    X = np.ones((100, 10))
-    iforest = IsolationForest()
-    iforest.fit(X)
-
-    rng = np.random.RandomState(0)
-
-    assert all(iforest.predict(X) == 1)
-    assert all(iforest.predict(rng.randn(100, 10)) == 1)
-    assert all(iforest.predict(X + 1) == 1)
-    assert all(iforest.predict(X - 1) == 1)
-
-    # 2-d array where columns contain the same value across rows
-    X = np.repeat(rng.randn(1, 10), 100, 0)
-    iforest = IsolationForest()
-    iforest.fit(X)
-
-    assert all(iforest.predict(X) == 1)
-    assert all(iforest.predict(rng.randn(100, 10)) == 1)
-    assert all(iforest.predict(np.ones((100, 10))) == 1)
-
-    # Single row
-    X = rng.randn(1, 10)
-    iforest = IsolationForest()
-    iforest.fit(X)
-
-    assert all(iforest.predict(X) == 1)
-    assert all(iforest.predict(rng.randn(100, 10)) == 1)
-    assert all(iforest.predict(np.ones((100, 10))) == 1)