scikit-learn
diff --git a/‎sklearn/ensemble/iforest.py
Lines changed: 27 additions & 29 deletions b/‎sklearn/ensemble/iforest.py
Lines changed: 27 additions & 29 deletions
diff --git a/‎sklearn/ensemble/tests/test_iforest.py
Lines changed: 16 additions & 14 deletions b/‎sklearn/ensemble/tests/test_iforest.py
Lines changed: 16 additions & 14 deletions
@@ -383,30 +383,33 @@ def score_samples(self, X):
                              "".format(self.n_features_, X.shape[1]))
         n_samples = X.shape[0]
 
-        n_samples_leaf = np.zeros((n_samples, self.n_estimators), order="f")
-        depths = np.zeros((n_samples, self.n_estimators), order="f")
+        n_samples_leaf = np.zeros(n_samples, order="f")
+        depths = np.zeros(n_samples, order="f")
 
         if self._max_features == X.shape[1]:
             subsample_features = False
         else:
             subsample_features = True
 
-        for i, (tree, features) in enumerate(zip(self.estimators_,
-                                                 self.estimators_features_)):
+        for tree, features in zip(self.estimators_, self.estimators_features_):
             if subsample_features:
                 X_subset = X[:, features]
             else:
                 X_subset = X
             leaves_index = tree.apply(X_subset)
             node_indicator = tree.decision_path(X_subset)
-            n_samples_leaf[:, i] = tree.tree_.n_node_samples[leaves_index]
-            depths[:, i] = np.ravel(node_indicator.sum(axis=1))
-            depths[:, i] -= 1
+            n_samples_leaf = tree.tree_.n_node_samples[leaves_index]
 
-        depths += _average_path_length(n_samples_leaf)
+            depths += (
+                np.ravel(node_indicator.sum(axis=1))
+                + _average_path_length(n_samples_leaf)
+                - 1.0
+            )
 
-        scores = 2 ** (-depths.mean(axis=1) / _average_path_length(
-            self.max_samples_))
+        scores = 2 ** (
+            -depths
+            / (len(self.estimators_) * _average_path_length([self.max_samples_]))
+        )
 
         # Take the opposite of the scores as bigger is better (here less
         # abnormal)
@@ -423,12 +426,12 @@ def threshold_(self):
 
 
 def _average_path_length(n_samples_leaf):
-    """ The average path length in a n_samples iTree, which is equal to
+    """The average path length in a n_samples iTree, which is equal to
     the average path length of an unsuccessful BST search since the
     latter has the same structure as an isolation tree.
     Parameters
     ----------
-    n_samples_leaf : array-like, shape (n_samples, n_estimators), or int.
+    n_samples_leaf : array-like, shape (n_samples,).
         The number of training samples in each test sample leaf, for
         each estimators.
 
@@ -437,25 +440,20 @@ def _average_path_length(n_samples_leaf):
     average_path_length : array, same shape as n_samples_leaf
 
     """
-    if isinstance(n_samples_leaf, INTEGER_TYPES):
-        if n_samples_leaf <= 1:
-            return 1.
-        else:
-            return 2. * (np.log(n_samples_leaf - 1.) + np.euler_gamma) - 2. * (
-                n_samples_leaf - 1.) / n_samples_leaf
 
-    else:
+    n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False)
 
-        n_samples_leaf_shape = n_samples_leaf.shape
-        n_samples_leaf = n_samples_leaf.reshape((1, -1))
-        average_path_length = np.zeros(n_samples_leaf.shape)
+    n_samples_leaf_shape = n_samples_leaf.shape
+    n_samples_leaf = n_samples_leaf.reshape((1, -1))
+    average_path_length = np.zeros(n_samples_leaf.shape)
 
-        mask = (n_samples_leaf <= 1)
-        not_mask = np.logical_not(mask)
+    mask = (n_samples_leaf <= 1)
+    not_mask = np.logical_not(mask)
 
-        average_path_length[mask] = 1.
-        average_path_length[not_mask] = 2. * (
-            np.log(n_samples_leaf[not_mask] - 1.) + np.euler_gamma) - 2. * (
-                n_samples_leaf[not_mask] - 1.) / n_samples_leaf[not_mask]
+    average_path_length[mask] = 1.
+    average_path_length[not_mask] = (
+        2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
+        - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]
+    )
 
-        return average_path_length.reshape(n_samples_leaf_shape)
+    return average_path_length.reshape(n_samples_leaf_shape)
@@ -219,21 +219,22 @@ def test_iforest_performance():
     assert_greater(roc_auc_score(y_test, y_pred), 0.98)
 
 
-@pytest.mark.filterwarnings('ignore:threshold_ attribute')
-def test_iforest_works():
+@pytest.mark.parametrize("contamination", [0.25, "auto"])
+@pytest.mark.filterwarnings("ignore:threshold_ attribute")
+def test_iforest_works(contamination):
     # toy sample (the last two samples are outliers)
     X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
 
     # Test IsolationForest
-    for contamination in [0.25, "auto"]:
-        clf = IsolationForest(behaviour='new', random_state=rng,
-                              contamination=contamination)
-        clf.fit(X)
-        decision_func = - clf.decision_function(X)
-        pred = clf.predict(X)
-        # assert detect outliers:
-        assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
-        assert_array_equal(pred, 6 * [1] + 2 * [-1])
+    clf = IsolationForest(
+        behaviour="new", random_state=rng, contamination=contamination
+    )
+    clf.fit(X)
+    decision_func = -clf.decision_function(X)
+    pred = clf.predict(X)
+    # assert detect outliers:
+    assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
+    assert_array_equal(pred, 6 * [1] + 2 * [-1])
 
 
 @pytest.mark.filterwarnings('ignore:default contamination')
@@ -265,9 +266,10 @@ def test_iforest_average_path_length():
 
     result_one = 2. * (np.log(4.) + np.euler_gamma) - 2. * 4. / 5.
     result_two = 2. * (np.log(998.) + np.euler_gamma) - 2. * 998. / 999.
-    assert_almost_equal(_average_path_length(1), 1., decimal=10)
-    assert_almost_equal(_average_path_length(5), result_one, decimal=10)
-    assert_almost_equal(_average_path_length(999), result_two, decimal=10)
+
+    assert_array_almost_equal(_average_path_length([1]), [1.], decimal=10)
+    assert_array_almost_equal(_average_path_length([5]), [result_one], decimal=10)
+    assert_array_almost_equal(_average_path_length([999]), [result_two], decimal=10)
     assert_array_almost_equal(_average_path_length(np.array([1, 5, 999])),
                               [1., result_one, result_two], decimal=10)