8000 Suggestions by @albertcthomas and @jjerphan implemented. · scikit-learn/scikit-learn@782225c · GitHub
[go: up one dir, main page]

Skip to content

Commit 782225c

Browse files
committed
Suggestions by @albertcthomas and @jjerphan implemented.
1 parent ed967c7 commit 782225c

File tree

2 files changed

+26
-9
lines changed

2 files changed

+26
-9
lines changed

sklearn/ensemble/_iforest.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def predict(self, X):
335335
X = self._validate_data(X, accept_sparse="csr", reset=False)
336336
is_inlier = np.ones(X.shape[0], dtype=int)
337337
# is_inlier[self.decision_function(X) < 0] = -1
338-
is_inlier[self.decision_function(X) < -1.0e-15] = -1
338+
is_inlier[self.decision_function(X) < -2 * np.finfo(float).eps] = -1
339339
return is_inlier
340340

341341
def decision_function(self, X):
@@ -486,6 +486,7 @@ def _more_tags(self):
486486
}
487487

488488

489+
# Lookup table used below in _average_path_length() for small samples
489490
_average_path_length_small = np.array(
490491
(
491492
0.0,
@@ -558,6 +559,21 @@ def _average_path_length(n_samples_leaf):
558559
Returns
559560
-------
560561
average_path_length : ndarray of shape (n_samples,)
562+
563+
Notes
564+
-----
565+
Average path length equals :math:`2*(H(n)-1)`, with :math:`H(n)`
566+
the :math:`n`th harmonic number. Calculation adapted from the
567+
harmonic number asymptotic expansion, see Wikipedia
568+
(https://en.wikipedia.org/wiki/Harmonic_number#Calculation) or
569+
M.B. Villarino in [MBV]_.
570+
571+
References
572+
----------
573+
.. [MBV] Villarino, M.B. Ramanujan’s Harmonic Number Expansion Into Negative
574+
Powers Of A Triangular Number. JIPAM. J. Inequal. Pure Appl. Math. 9(3),
575+
89 (2008). https://www.emis.de/journals/JIPAM/article1026.html.
576+
Preprint at https://arxiv.org/abs/0707.3950.
561577
"""
562578

563579
n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False)
@@ -566,7 +582,7 @@ def _average_path_length(n_samples_leaf):
566582
n_samples_leaf = n_samples_leaf.reshape((1, -1))
567583
average_path_length = np.zeros(n_samples_leaf.shape)
568584

569-
mask_small = n_samples_leaf < 52
585+
mask_small = n_samples_leaf < len(_average_path_length_small)
570586
not_mask = ~mask_small
571587

572588
average_path_length[mask_small] = _average_path_length_small[
@@ -575,20 +591,21 @@ def _average_path_length(n_samples_leaf):
575591

576592
# Average path length equals 2*(H(n)-1), with H(n) the nth harmonic number.
577593
# For the harmonic number calculation,
578-
# see the following publications and references therein
594+
# see Wikipedia (https://en.wikipedia.org/wiki/Harmonic_number#Calculation)
595+
# or the following publications and references therein
579596
# Villarino, M.B. Ramanujan’s Harmonic Number Expansion Into Negative
580597
# Powers Of A Triangular Number. JIPAM. J. Inequal. Pure Appl. Math. 9(3),
581-
# 89 (2008). https://www.emis.de/journals/JIPAM/article1026.html?sid=1026.
598+
# 89 (2008). https://www.emis.de/journals/JIPAM/article1026.html.
582599
# Preprint at https://arxiv.org/abs/0707.3950.
583600
# or
584601
# Wang, W. Harmonic Number Expansions of the Ramanujan Type.
585602
# Results Math 73, 161 (2018). https://doi.org/10.1007/s00025-018-0920-8
586603

587-
tmp = 1.0 / np.square(n_samples_leaf[not_mask])
604+
n2_inv = 1.0 / np.square(n_samples_leaf[not_mask])
588605
average_path_length[not_mask] = (
589606
2.0 * (np.log(n_samples_leaf[not_mask]) - 1.0 + np.euler_gamma)
590607
+ 1.0 / n_samples_leaf[not_mask]
591-
- tmp * (1.0 / 6.0 - tmp * (1.0 / 60.0 - tmp / 126.0))
608+
- n2_inv * (1.0 / 6.0 - n2_inv * (1.0 / 60.0 - n2_inv / 126.0))
592609
)
593610

594611
return average_path_length.reshape(n_samples_leaf_shape)

sklearn/ensemble/tests/test_iforest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,8 @@ def test_iforest_average_path_length():
230230
# It tests non-regression for #8549 which used the wrong formula
231231
# for average path length, strictly for the integer case
232232
# Updated to check average path length when input is <= 2 (issue #11839)
233-
result_5 = 77.0 / 30.0
234-
result_999 = 12.9689417211006898253130364
233+
result_5 = 77.0 / 30.0 # exact value
234+
result_999 = 12.9689417211006898253130364 # exact value
235235
assert_allclose(_average_path_length([0]), [0.0])
236236
assert_allclose(_average_path_length([1]), [0.0])
237237
assert_allclose(_average_path_length([2]), [1.0])
@@ -317,7 +317,7 @@ def test_iforest_with_uniform_data():
317317

318318
rng = np.random.RandomState(0)
319319

320-
assert all(np.abs(iforest.decision_function(X)) < 1.0e-15)
320+
assert all(np.abs(iforest.decision_function(X)) < 2 * np.finfo(float).eps)
321321
assert all(iforest.predict(X) == 1)
322322
assert all(iforest.predict(rng.randn(100, 10)) == 1)
323323
assert all(iforest.predict(X + 1) == 1)

0 commit comments

Comments
 (0)
0