8000
button>@@ -152,22 +152,21 @@ class IsolationForest(BaseBagging, OutlierMixin):
152
152
153
153
"""
154
154
155
- def __init__ (
156
- self ,
157
- n_estimators = 100 ,
158
- max_samples = "auto" ,
159
- contamination = "legacy" ,
160
- max_features = 1.0 ,
161
- bootstrap = False ,
162
- n_jobs = None ,
163
- behaviour = "old" ,
164
- random_state = None ,
165
- verbose = 0 ,
166
- ):
155
+ def __init__ (self ,
156
+ n_estimators = 100 ,
157
+ max_samples = "auto" ,
158
+ contamination = "legacy" ,
159
+ max_features = 1. ,
160
+ bootstrap = False ,
161
+ n_jobs = None ,
162
+ behaviour = 'old' ,
163
+ random_state = None ,
164
+ verbose = 0 ):
167
165
super ().__init__ (
168
166
base_estimator = ExtraTreeRegressor (
169
- max_features = 1 , splitter = "random" , random_state = random_state
170
- ),
167
+ max_features = 1 ,
168
+ splitter = 'random' ,
169
+ random_state = random_state ),
171
170
# here above max_features has no links with self.max_features
172
171
bootstrap = bootstrap ,
173
172
bootstrap_features = False ,
@@ -176,8 +175,7 @@ def __init__(
176
175
max_features = max_features ,
177
176
n_jobs = n_jobs ,
178
177
random_state = random_state ,
179
- verbose = verbose ,
180
- )
178
+ verbose = verbose )
181
179
182
180
self .behaviour = behaviour
183
181
self .contamination = contamination
8000
@@ -190,7 +188,7 @@ def _parallel_args(self):
190
188
# a thread-based backend rather than a process-based backend so as
191
189
# to avoid suffering from communication overhead and extra memory
192
190
# copies.
193
- return _joblib_parallel_args (prefer = " threads" )
191
+ return _joblib_parallel_args (prefer = ' threads' )
194
192
195
193
def fit (self , X , y = None , sample_weight = None ):
196
194
"""Fit estimator.
@@ -213,26 +211,22 @@ def fit(self, X, y=None, sample_weight=None):
213
211
self : object
214
212
"""
215
213
if self .contamination == "legacy" :
216
- warn (
217
- "default contamination parameter 0.1 will change "
218
- 'in version 0.22 to "auto". This will change the '
219
- "predict method behavior." ,
220
- FutureWarning ,
221
- )
214
+ warn ('default contamination parameter 0.1 will change '
215
+ 'in version 0.22 to "auto". This will change the '
216
+ 'predict method behavior.' ,
217
+ FutureWarning )
222
218
self ._contamination = 0.1
223
219
else :
224
220
self ._contamination = self .contamination
225
221
226
- if self .behaviour == "old" :
227
- warn (
228
- 'behaviour="old" is deprecated and will be removed '
229
- 'in version 0.22. Please use behaviour="new", which '
230
- "makes the decision_function change to match "
231
- "other anomaly detection algorithm API." ,
232
- FutureWarning ,
233
- )
222
+ if self .behaviour == 'old' :
223
+ warn ('behaviour="old" is deprecated and will be removed '
224
+ 'in version 0.22. Please use behaviour="new", which '
225
+ 'makes the decision_function change to match '
226
+ 'other anomaly detection algorithm API.' ,
227
+ FutureWarning )
234
228
235
- X = check_array (X , accept_sparse = [" csc" ])
229
+ X = check_array (X , accept_sparse = [' csc' ])
236
230
if issparse (X ):
237
231
# Pre-sort indices to avoid that each individual tree of the
238
232
# ensemble sorts the indices.
@@ -245,51 +239,43 @@ def fit(self, X, y=None, sample_weight=None):
245
239
n_samples = X .shape [0 ]
246
240
247
241
if isinstance (self .max_samples , str ):
248
- if self .max_samples == " auto" :
242
+ if self .max_samples == ' auto' :
249
243
max_samples = min (256 , n_samples )
250
244
else :
251
- raise ValueError (
252
- "max_samples (%s) is not supported."
253
- 'Valid choices are: "auto", int or'
254
- "float" % self .max_samples
255
- )
245
+ raise ValueError ('max_samples (%s) is not supported.'
246
+ 'Valid choices are: "auto", int or'
247
+ 'float' % self .max_samples )
256
248
257
249
elif isinstance (self .max_samples , INTEGER_TYPES ):
258
250
if self .max_samples > n_samples :
259
- warn (
260
- "max_samples (%s) is greater than the "
261
- "total number of samples (%s). max_samples "
262
- "will be set to n_samples for estimation."
263
- % (self .max_samples , n_samples )
264
- )
251
+ warn ("max_samples (%s) is greater than the "
252
+ "total number of samples (%s). max_samples "
253
+ "will be set to n_samples for estimation."
254
+ % (self .max_samples , n_samples ))
265
255
max_samples = n_samples
266
256
else :
267
257
max_samples = self .max_samples
268
258
else : # float
269
- if not (0.0 < self .max_samples <= 1.0 ):
270
- raise ValueError (
271
- "max_samples must be in (0, 1], got %r" % self .max_samples
272
- )
259
+ if not (0. < self .max_samples <= 1. ):
260
+ raise ValueError ("max_samples must be in (0, 1], got %r"
261
+ % self .max_samples )
273
262
max_samples = int (self .max_samples * X .shape [0 ])
274
263
275
264
self .max_samples_ = max_samples
276
265
max_depth = int (np .ceil (np .log2 (max (max_samples , 2 ))))
277
- super ()._fit (
278
- X , y , max_samples , max_depth = max_depth , sample_weight = sample_weight
279
- )
266
+ super ()._fit (X , y , max_samples ,
267
+ max_depth = max_depth ,
268
+ sample_weight = sample_weight )
280
269
281
- if self .behaviour == " old" :
270
+ if self .behaviour == ' old' :
282
271
# in this case, decision_function = 0.5 + self.score_samples(X):
283
272
if self ._contamination == "auto" :
284
- raise ValueError (
285
- "contamination parameter cannot be set to "
286
- "'auto' when behaviour == 'old'."
287
- )
273
+ raise ValueError ("contamination parameter cannot be set to "
274
+ "'auto' when behaviour == 'old'." )
288
275
289
276
self .offset_ = - 0.5
290
- self ._threshold_ = np .percentile (
291
- self .decision_function (X ), 100.0 * self ._contamination
292
- )
277
+ self ._threshold_ = np .percentile (self .decision_function (X ),
278
+ 100. * self ._contamination )
293
279
294
280
return self
295
281
@@ -302,7 +288,8 @@ def fit(self, X, y=None, sample_weight=None):
302
288
303
289
# else, define offset_ wrt contamination parameter, so that the
304
290
# threshold_ attribute is implicitly 0 and is not needed anymore:
305
- self .offset_ = np .percentile (self .score_samples (X ), 100.0 * self ._contamination )
291
+ self .offset_ = np .percentile (self .score_samples (X ),
292
+ 100. * self ._contamination )
306
293
307
294
return self
308
295
@@ -323,9 +310,9 @@ def predict(self, X):
323
310
be considered as an inlier according to the fitted model.
324
311
"""
325
312
check_is_fitted (self , ["offset_" ])
326
- X = check_array (X , accept_sparse = " csr" )
313
+ X = check_array (X , accept_sparse = ' csr' )
327
314
is_inlier = np .ones (X .shape [0 ], dtype = int )
328
- threshold = self .threshold_ if self .behaviour == " old" else 0
315
+ threshold = self .threshold_ if self .behaviour == ' old' else 0
329
316
is_inlier [self .decision_function (X ) < threshold ] = - 1
330
317
return is_inlier
331
318
@@ -343,9 +330,10 @@ def decision_function(self, X):
343
330
344
331
Parameters
345
332
----------
346
- X : {array-like, sparse matrix}, shape (n_samples, n_features)
347
- The training input samples. Sparse matrices are accepted only if
348
- they are supported by the base estimator.
333
+ X : array-like or sparse matrix, shape (n_samples, n_features)
334
+ The input samples. Internally, it will be converted to
335
+ ``dtype=np.float32`` and if a sparse matrix is provided
336
+ to a sparse ``csr_matrix``.
349
337
350
338
Returns
351
339
-------
@@ -374,9 +362,8 @@ def score_samples(self, X):
374
362
375
363
Parameters
376
364
----------
377
- X : {array-like, sparse matrix}, shape (n_samples, n_features)
378
- The training input samples. Sparse matrices are accepted only if
379
- they are supported by the base estimator.
365
+ X : array-like or sparse matrix, shape (n_samples, n_features)
366
+ The input samples.
380
367
381
368
Returns
382
369
-------
@@ -388,14 +375,12 @@ def score_samples(self, X):
388
375
check_is_fitted (self , ["estimators_" ])
389
376
390
377
# Check data
391
- X = check_array (X , accept_sparse = " csr" )
378
+ X = check_array (X , accept_sparse = ' csr' )
392
379
if self .n_features_ != X .shape [1 ]:
393
- raise ValueError (
394
- "Number of features of the model must "
395
- "match the input. Model n_features is {0} and "
396
- "input n_features is {1}."
397
- "" .format (self .n_features_ , X .shape [1 ])
398
- )
380
+ raise ValueError ("Number of features of the model must "
381
+ "match the input. Model n_features is {0} and "
382
+ "input n_features is {1}."
383
+ "" .format (self .n_features_ , X .shape [1 ]))
399
384
n_samples = X .shape [0 ]
400
385
401
386
n_samples_leaf = np .zeros (n_samples , order = "f" )
@@ -423,10 +408,7 @@ def score_samples(self, X):
423
408
424
409
scores = 2 ** (
425
410
- depths
426
- / (
427
- len (self .estimators_ )
428
- * _average_path_length ([self .max_samples_ ])
429
- )
411
+ / (len (self .estimators_ ) * _average_path_length ([self .max_samples_ ]))
430
412
)
431
413
432
414
# Take the opposite of the scores as bigger is better (here less
@@ -435,15 +417,11 @@ def score_samples(self, X):
435
417
436
418
@property
437
419
def threshold_ (self ):
438
- if self .behaviour != "old" :
439
- raise AttributeError (
440
- "threshold_ attribute does not exist when " "behaviour != 'old'"
441
- )
442
- warn (
443
- "threshold_ attribute is deprecated in 0.20 and will"
444
- " be removed in 0.22." ,
445
- DeprecationWarning ,
446
- )
420
+ if self .behaviour != 'old' :
421
+ raise AttributeError ("threshold_ attribute does not exist when "
422
+ "behaviour != 'old'" )
423
+ warn ("threshold_ attribute is deprecated in 0.20 and will"
424
+ " be removed in 0.22." , DeprecationWarning )
447
425
return self ._threshold_
448
426
449
427
@@ -473,8 +451,8 @@ def _average_path_length(n_samples_leaf):
473
451
mask_2 = n_samples_leaf == 2
474
452
not_mask = ~ np .logical_or (mask_1 , mask_2 )
475
453
476
- average_path_length [mask_1 ] = 0.0
477
- average_path_length [mask_2 ] = 1.0
454
+ average_path_length [mask_1 ] = 0.
455
+ average_path_length [mask_2 ] = 1.
478
456
average_path_length [not_mask ] = (
479
457
2.0 * (np .log (n_samples_leaf [not_mask ] - 1.0 ) + np .euler_gamma )
480
458
- 2.0 * (n_samples_leaf [not_mask ] - 1.0 ) / n_samples_leaf [not_mask ]
0 commit comments