8000 fix doc · scikit-learn/scikit-learn@25d1e56 · GitHub
[go: up one dir, main page]

Skip to content

Commit 25d1e56

Browse files
committed
fix doc
1 parent 36ebb0e commit 25d1e56

File tree

1 file changed

+68
-90
lines changed

1 file changed

+68
-90
lines changed

sklearn/ensemble/iforest.py

Lines changed: 68 additions & 90 deletions
Original file line numberDiff line numberDiff line change
152152
153153
"""
154154

155-
def __init__(
156-
self,
157-
n_estimators=100,
158-
max_samples="auto",
159-
contamination="legacy",
160-
max_features=1.0,
161-
bootstrap=False,
162-
n_jobs=None,
163-
behaviour="old",
164-
random_state=None,
165-
verbose=0,
166-
):
155+
def __init__(self,
156+
n_estimators=100,
157+
max_samples="auto",
158+
contamination="legacy",
159+
max_features=1.,
160+
bootstrap=False,
161+
n_jobs=None,
162+
behaviour='old',
163+
random_state=None,
164+
verbose=0):
167165
super().__init__(
168166
base_estimator=ExtraTreeRegressor(
169-
max_features=1, splitter="random", random_state=random_state
170-
),
167+
max_features=1,
168+
splitter='random',
169+
random_state=random_state),
171170
# here above max_features has no links with self.max_features
172171
bootstrap=bootstrap,
173172
bootstrap_features=False,
@@ -176,8 +175,7 @@ def __init__(
176175
max_features=max_features,
177176
n_jobs=n_jobs,
178177
random_state=random_state,
179-
verbose=verbose,
180-
)
178+
verbose=verbose)
181179

182180
self.behaviour = behaviour
183181
self.contamination = contamination
8000
@@ -190,7 +188,7 @@ def _parallel_args(self):
190188
# a thread-based backend rather than a process-based backend so as
191189
# to avoid suffering from communication overhead and extra memory
192190
# copies.
193-
return _joblib_parallel_args(prefer="threads")
191+
return _joblib_parallel_args(prefer='threads')
194192

195193
def fit(self, X, y=None, sample_weight=None):
196194
"""Fit estimator.
@@ -213,26 +211,22 @@ def fit(self, X, y=None, sample_weight=None):
213211
self : object
214212
"""
215213
if self.contamination == "legacy":
216-
warn(
217-
"default contamination parameter 0.1 will change "
218-
'in version 0.22 to "auto". This will change the '
219-
"predict method behavior.",
220-
FutureWarning,
221-
)
214+
warn('default contamination parameter 0.1 will change '
215+
'in version 0.22 to "auto". This will change the '
216+
'predict method behavior.',
217+
FutureWarning)
222218
self._contamination = 0.1
223219
else:
224220
self._contamination = self.contamination
225221

226-
if self.behaviour == "old":
227-
warn(
228-
'behaviour="old" is deprecated and will be removed '
229-
'in version 0.22. Please use behaviour="new", which '
230-
"makes the decision_function change to match "
231-
"other anomaly detection algorithm API.",
232-
FutureWarning,
233-
)
222+
if self.behaviour == 'old':
223+
warn('behaviour="old" is deprecated and will be removed '
224+
'in version 0.22. Please use behaviour="new", which '
225+
'makes the decision_function change to match '
226+
'other anomaly detection algorithm API.',
227+
FutureWarning)
234228

235-
X = check_array(X, accept_sparse=["csc"])
229+
X = check_array(X, accept_sparse=['csc'])
236230
if issparse(X):
237231
# Pre-sort indices to avoid that each individual tree of the
238232
# ensemble sorts the indices.
@@ -245,51 +239,43 @@ def fit(self, X, y=None, sample_weight=None):
245239
n_samples = X.shape[0]
246240

247241
if isinstance(self.max_samples, str):
248-
if self.max_samples == "auto":
242+
if self.max_samples == 'auto':
249243
max_samples = min(256, n_samples)
250244
else:
251-
raise ValueError(
252-
"max_samples (%s) is not supported."
253-
'Valid choices are: "auto", int or'
254-
"float" % self.max_samples
255-
)
245+
raise ValueError('max_samples (%s) is not supported.'
246+
'Valid choices are: "auto", int or'
247+
'float' % self.max_samples)
256248

257249
elif isinstance(self.max_samples, INTEGER_TYPES):
258250
if self.max_samples > n_samples:
259-
warn(
260-
"max_samples (%s) is greater than the "
261-
"total number of samples (%s). max_samples "
262-
"will be set to n_samples for estimation."
263-
% (self.max_samples, n_samples)
264-
)
251+
warn("max_samples (%s) is greater than the "
252+
"total number of samples (%s). max_samples "
253+
"will be set to n_samples for estimation."
254+
% (self.max_samples, n_samples))
265255
max_samples = n_samples
266256
else:
267257
max_samples = self.max_samples
268258
else: # float
269-
if not (0.0 < self.max_samples <= 1.0):
270-
raise ValueError(
271-
"max_samples must be in (0, 1], got %r" % self.max_samples
272-
)
259+
if not (0. < self.max_samples <= 1.):
260+
raise ValueError("max_samples must be in (0, 1], got %r"
261+
% self.max_samples)
273262
max_samples = int(self.max_samples * X.shape[0])
274263

275264
self.max_samples_ = max_samples
276265
max_depth = int(np.ceil(np.log2(max(max_samples, 2))))
277-
super()._fit(
278-
X, y, max_samples, max_depth=max_depth, sample_weight=sample_weight
279-
)
266+
super()._fit(X, y, max_samples,
267+
max_depth=max_depth,
268+
sample_weight=sample_weight)
280269

281-
if self.behaviour == "old":
270+
if self.behaviour == 'old':
282271
# in this case, decision_function = 0.5 + self.score_samples(X):
283272
if self._contamination == "auto":
284-
raise ValueError(
285-
"contamination parameter cannot be set to "
286-
"'auto' when behaviour == 'old'."
287-
)
273+
raise ValueError("contamination parameter cannot be set to "
274+
"'auto' when behaviour == 'old'.")
288275

289276
self.offset_ = -0.5
290-
self._threshold_ = np.percentile(
291-
self.decision_function(X), 100.0 * self._contamination
292-
)
277+
self._threshold_ = np.percentile(self.decision_function(X),
278+
100. * self._contamination)
293279

294280
return self
295281

@@ -302,7 +288,8 @@ def fit(self, X, y=None, sample_weight=None):
302288

303289
# else, define offset_ wrt contamination parameter, so that the
304290
# threshold_ attribute is implicitly 0 and is not needed anymore:
305-
self.offset_ = np.percentile(self.score_samples(X), 100.0 * self._contamination)
291+
self.offset_ = np.percentile(self.score_samples(X),
292+
100. * self._contamination)
306293

307294
return self
308295

@@ -323,9 +310,9 @@ def predict(self, X):
323310
be considered as an inlier according to the fitted model.
324311
"""
325312
check_is_fitted(self, ["offset_"])
326-
X = check_array(X, accept_sparse="csr")
313+
X = check_array(X, accept_sparse='csr')
327314
is_inlier = np.ones(X.shape[0], dtype=int)
328-
threshold = self.threshold_ if self.behaviour == "old" else 0
315+
threshold = self.threshold_ if self.behaviour == 'old' else 0
329316
is_inlier[self.decision_function(X) < threshold] = -1
330317
return is_inlier
331318

@@ -343,9 +330,10 @@ def decision_function(self, X):
343330
344331
Parameters
345332
----------
346-
X : {array-like, sparse matrix}, shape (n_samples, n_features)
347-
The training input samples. Sparse matrices are accepted only if
348-
they are supported by the base estimator.
333+
X : array-like or sparse matrix, shape (n_samples, n_features)
334+
The input samples. Internally, it will be converted to
335+
``dtype=np.float32`` and if a sparse matrix is provided
336+
to a sparse ``csr_matrix``.
349337
350338
Returns
351339
-------
@@ -374,9 +362,8 @@ def score_samples(self, X):
374362
375363
Parameters
376364
----------
377-
X : {array-like, sparse matrix}, shape (n_samples, n_features)
378-
The training input samples. Sparse matrices are accepted only if
379-
they are supported by the base estimator.
365+
X : array-like or sparse matrix, shape (n_samples, n_features)
366+
The input samples.
380367
381368
Returns
382369
-------
@@ -388,14 +375,12 @@ def score_samples(self, X):
388375
check_is_fitted(self, ["estimators_"])
389376

390377
# Check data
391-
X = check_array(X, accept_sparse="csr")
378+
X = check_array(X, accept_sparse='csr')
392379
if self.n_features_ != X.shape[1]:
393-
raise ValueError(
394-
"Number of features of the model must "
395-
"match the input. Model n_features is {0} and "
396-
"input n_features is {1}."
397-
"".format(self.n_features_, X.shape[1])
398-
)
380+
raise ValueError("Number of features of the model must "
381+
"match the input. Model n_features is {0} and "
382+
"input n_features is {1}."
383+
"".format(self.n_features_, X.shape[1]))
399384
n_samples = X.shape[0]
400385

401386
n_samples_leaf = np.zeros(n_samples, order="f")
@@ -423,10 +408,7 @@ def score_samples(self, X):
423408

424409
scores = 2 ** (
425410
-depths
426-
/ (
427-
len(self.estimators_)
428-
* _average_path_length([self.max_samples_])
429-
)
411+
/ (len(self.estimators_) * _average_path_length([self.max_samples_]))
430412
)
431413

432414
# Take the opposite of the scores as bigger is better (here less
@@ -435,15 +417,11 @@ def score_samples(self, X):
435417

436418
@property
437419
def threshold_(self):
438-
if self.behaviour != "old":
439-
raise AttributeError(
440-
"threshold_ attribute does not exist when " "behaviour != 'old'"
441-
)
442-
warn(
443-
"threshold_ attribute is deprecated in 0.20 and will"
444-
" be removed in 0.22.",
445-
DeprecationWarning,
446-
)
420+
if self.behaviour != 'old':
421+
raise AttributeError("threshold_ attribute does not exist when "
422+
"behaviour != 'old'")
423+
warn("threshold_ attribute is deprecated in 0.20 and will"
424+
" be removed in 0.22.", DeprecationWarning)
447425
return self._threshold_
448426

449427

@@ -473,8 +451,8 @@ def _average_path_length(n_samples_leaf):
473451
mask_2 = n_samples_leaf == 2
474452
not_mask = ~np.logical_or(mask_1, mask_2)
475453

476-
average_path_length[mask_1] = 0.0
477-
average_path_length[mask_2] = 1.0
454+
average_path_length[mask_1] = 0.
455+
average_path_length[mask_2] = 1.
478456
average_path_length[not_mask] = (
479457
2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
480458
- 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]

0 commit comments

Comments
 (0)
0