@@ -383,30 +383,33 @@ def score_samples(self, X):
383
383
"" .format (self .n_features_ , X .shape [1 ]))
384
384
n_samples = X .shape [0 ]
385
385
386
- n_samples_leaf = np .zeros (( n_samples , self . n_estimators ) , order = "f" )
387
- depths = np .zeros (( n_samples , self . n_estimators ) , order = "f" )
386
+ n_samples_leaf = np .zeros (n_samples , order = "f" )
387
+ depths = np .zeros (n_samples , order = "f" )
388
388
389
389
if self ._max_features == X .shape [1 ]:
390
390
subsample_features = False
391
391
else :
392
392
subsample_features = True
393
393
394
- for i , (tree , features ) in enumerate (zip (self .estimators_ ,
395
- self .estimators_features_ )):
394
+ for tree , features in zip (self .estimators_ , self .estimators_features_ ):
396
395
if subsample_features :
397
396
X_subset = X [:, features ]
398
397
else :
399
398
X_subset = X
400
399
leaves_index = tree .apply (X_subset )
401
400
node_indicator = tree .decision_path (X_subset )
402
- n_samples_leaf [:, i ] = tree .tree_ .n_node_samples [leaves_index ]
403
- depths [:, i ] = np .ravel (node_indicator .sum (axis = 1 ))
404
- depths [:, i ] -= 1
401
+ n_samples_leaf = tree .tree_ .n_node_samples [leaves_index ]
405
402
406
- depths += _average_path_length (n_samples_leaf )
403
+ depths += (
404
+ np .ravel (node_indicator .sum (axis = 1 ))
405
+ + _average_path_length (n_samples_leaf )
406
+ - 1.0
407
+ )
407
408
408
- scores = 2 ** (- depths .mean (axis = 1 ) / _average_path_length (
409
- self .max_samples_ ))
409
+ scores = 2 ** (
410
+ - depths
411
+ / (len (self .estimators_ ) * _average_path_length ([self .max_samples_ ]))
412
+ )
410
413
411
414
# Take the opposite of the scores as bigger is better (here less
412
415
# abnormal)
@@ -423,12 +426,12 @@ def threshold_(self):
423
426
424
427
425
428
def _average_path_length (n_samples_leaf ):
426
- """ The average path length in a n_samples iTree, which is equal to
429
+ """The average path length in a n_samples iTree, which is equal to
427
430
the average path length of an unsuccessful BST search since the
428
431
latter has the same structure as an isolation tree.
429
432
Parameters
430
433
----------
431
- n_samples_leaf : array-like, shape (n_samples, n_estimators), or int .
434
+ n_samples_leaf : array-like, shape (n_samples,) .
432
435
The number of training samples in each test sample leaf, for
433
436
each estimators.
434
437
@@ -437,25 +440,20 @@ def _average_path_length(n_samples_leaf):
437
440
average_path_length : array, same shape as n_samples_leaf
438
441
439
442
"""
440
- if isinstance (n_samples_leaf , INTEGER_TYPES ):
441
- if n_samples_leaf <= 1 :
442
- return 1.
443
- else :
444
- return 2. * (np .log (n_samples_leaf - 1. ) + np .euler_gamma ) - 2. * (
445
- n_samples_leaf - 1. ) / n_samples_leaf
446
443
447
- else :
444
+ n_samples_leaf = check_array ( n_samples_leaf , ensure_2d = False )
448
445
449
- n_samples_leaf_shape = n_samples_leaf .shape
450
- n_samples_leaf = n_samples_leaf .reshape ((1 , - 1 ))
451
- average_path_length = np .zeros (n_samples_leaf .shape )
446
+ n_samples_leaf_shape = n_samples_leaf .shape
447
+ n_samples_leaf = n_samples_leaf .reshape ((1 , - 1 ))
448
+ average_path_length = np .zeros (n_samples_leaf .shape )
452
449
453
- mask = (n_samples_leaf <= 1 )
454
- not_mask = np .logical_not (mask )
450
+ mask = (n_samples_leaf <= 1 )
451
+ not_mask = np .logical_not (mask )
455
452
456
- average_path_length [mask ] = 1.
457
- average_path_length [not_mask ] = 2. * (
458
- np .log (n_samples_leaf [not_mask ] - 1. ) + np .euler_gamma ) - 2. * (
459
- n_samples_leaf [not_mask ] - 1. ) / n_samples_leaf [not_mask ]
453
+ average_path_length [mask ] = 1.
454
+ average_path_length [not_mask ] = (
455
+ 2.0 * (np .log (n_samples_leaf [not_mask ] - 1.0 ) + np .euler_gamma )
456
+ - 2.0 * (n_samples_leaf [not_mask ] - 1.0 ) / n_samples_leaf [not_mask ]
457
+ )
460
458
461
- return average_path_length .reshape (n_samples_leaf_shape )
459
+ return average_path_length .reshape (n_samples_leaf_shape )
0 commit comments