scikit-learn
diff --git a/‎sklearn/ensemble/iforest.py
Lines changed: 68 additions & 90 deletions b/‎sklearn/ensemble/iforest.py
Lines changed: 68 additions & 90 deletions
@@ -152,22 +152,21 @@ class IsolationForest(BaseBagging, OutlierMixin):
 
     """
 
-    def __init__(
-        self,
-        n_estimators=100,
-        max_samples="auto",
-        contamination="legacy",
-        max_features=1.0,
-        bootstrap=False,
-        n_jobs=None,
-        behaviour="old",
-        random_state=None,
-        verbose=0,
-    ):
+    def __init__(self,
+                 n_estimators=100,
+                 max_samples="auto",
+                 contamination="legacy",
+                 max_features=1.,
+                 bootstrap=False,
+                 n_jobs=None,
+                 behaviour='old',
+                 random_state=None,
+                 verbose=0):
         super().__init__(
             base_estimator=ExtraTreeRegressor(
-                max_features=1, splitter="random", random_state=random_state
-            ),
+                max_features=1,
+                splitter='random',
+                random_state=random_state),
             # here above max_features has no links with self.max_features
             bootstrap=bootstrap,
             bootstrap_features=False,
@@ -176,8 +175,7 @@ def __init__(
             max_features=max_features,
             n_jobs=n_jobs,
             random_state=random_state,
-            verbose=verbose,
-        )
+            verbose=verbose)
 
         self.behaviour = behaviour
         self.contamination = contamination
@@ -190,7 +188,7 @@ def _parallel_args(self):
         # a thread-based backend rather than a process-based backend so as
         # to avoid suffering from communication overhead and extra memory
         # copies.
-        return _joblib_parallel_args(prefer="threads")
+        return _joblib_parallel_args(prefer='threads')
 
     def fit(self, X, y=None, sample_weight=None):
         """Fit estimator.
@@ -213,26 +211,22 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
         """
         if self.contamination == "legacy":
-            warn(
-                "default contamination parameter 0.1 will change "
-                'in version 0.22 to "auto". This will change the '
-                "predict method behavior.",
-                FutureWarning,
-            )
+            warn('default contamination parameter 0.1 will change '
+                 'in version 0.22 to "auto". This will change the '
+                 'predict method behavior.',
+                 FutureWarning)
             self._contamination = 0.1
         else:
             self._contamination = self.contamination
 
-        if self.behaviour == "old":
-            warn(
-                'behaviour="old" is deprecated and will be removed '
-                'in version 0.22. Please use behaviour="new", which '
-                "makes the decision_function change to match "
-                "other anomaly detection algorithm API.",
-                FutureWarning,
-            )
+        if self.behaviour == 'old':
+            warn('behaviour="old" is deprecated and will be removed '
+                 'in version 0.22. Please use behaviour="new", which '
+                 'makes the decision_function change to match '
+                 'other anomaly detection algorithm API.',
+                 FutureWarning)
 
-        X = check_array(X, accept_sparse=["csc"])
+        X = check_array(X, accept_sparse=['csc'])
         if issparse(X):
             # Pre-sort indices to avoid that each individual tree of the
             # ensemble sorts the indices.
@@ -245,51 +239,43 @@ def fit(self, X, y=None, sample_weight=None):
         n_samples = X.shape[0]
 
         if isinstance(self.max_samples, str):
-            if self.max_samples == "auto":
+            if self.max_samples == 'auto':
                 max_samples = min(256, n_samples)
             else:
-                raise ValueError(
-                    "max_samples (%s) is not supported."
-                    'Valid choices are: "auto", int or'
-                    "float" % self.max_samples
-                )
+                raise ValueError('max_samples (%s) is not supported.'
+                                 'Valid choices are: "auto", int or'
+                                 'float' % self.max_samples)
 
         elif isinstance(self.max_samples, INTEGER_TYPES):
             if self.max_samples > n_samples:
-                warn(
-                    "max_samples (%s) is greater than the "
-                    "total number of samples (%s). max_samples "
-                    "will be set to n_samples for estimation."
-                    % (self.max_samples, n_samples)
-                )
+                warn("max_samples (%s) is greater than the "
+                     "total number of samples (%s). max_samples "
+                     "will be set to n_samples for estimation."
+                     % (self.max_samples, n_samples))
                 max_samples = n_samples
             else:
                 max_samples = self.max_samples
         else:  # float
-            if not (0.0 < self.max_samples <= 1.0):
-                raise ValueError(
-                    "max_samples must be in (0, 1], got %r" % self.max_samples
-                )
+            if not (0. < self.max_samples <= 1.):
+                raise ValueError("max_samples must be in (0, 1], got %r"
+                                 % self.max_samples)
             max_samples = int(self.max_samples * X.shape[0])
 
         self.max_samples_ = max_samples
         max_depth = int(np.ceil(np.log2(max(max_samples, 2))))
-        super()._fit(
-            X, y, max_samples, max_depth=max_depth, sample_weight=sample_weight
-        )
+        super()._fit(X, y, max_samples,
+                     max_depth=max_depth,
+                     sample_weight=sample_weight)
 
-        if self.behaviour == "old":
+        if self.behaviour == 'old':
             # in this case, decision_function = 0.5 + self.score_samples(X):
             if self._contamination == "auto":
-                raise ValueError(
-                    "contamination parameter cannot be set to "
-                    "'auto' when behaviour == 'old'."
-                )
+                raise ValueError("contamination parameter cannot be set to "
+                                 "'auto' when behaviour == 'old'.")
 
             self.offset_ = -0.5
-            self._threshold_ = np.percentile(
-                self.decision_function(X), 100.0 * self._contamination
-            )
+            self._threshold_ = np.percentile(self.decision_function(X),
+                                             100. * self._contamination)
 
             return self
 
@@ -302,7 +288,8 @@ def fit(self, X, y=None, sample_weight=None):
 
         # else, define offset_ wrt contamination parameter, so that the
         # threshold_ attribute is implicitly 0 and is not needed anymore:
-        self.offset_ = np.percentile(self.score_samples(X), 100.0 * self._contamination)
+        self.offset_ = np.percentile(self.score_samples(X),
+                                     100. * self._contamination)
 
         return self
 
@@ -323,9 +310,9 @@ def predict(self, X):
             be considered as an inlier according to the fitted model.
         """
         check_is_fitted(self, ["offset_"])
-        X = check_array(X, accept_sparse="csr")
+        X = check_array(X, accept_sparse='csr')
         is_inlier = np.ones(X.shape[0], dtype=int)
-        threshold = self.threshold_ if self.behaviour == "old" else 0
+        threshold = self.threshold_ if self.behaviour == 'old' else 0
         is_inlier[self.decision_function(X) < threshold] = -1
         return is_inlier
 
@@ -343,9 +330,10 @@ def decision_function(self, X):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
-            The training input samples. Sparse matrices are accepted only if
-            they are supported by the base estimator.
+        X : array-like or sparse matrix, shape (n_samples, n_features)
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
 
         Returns
         -------
@@ -374,9 +362,8 @@ def score_samples(self, X):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
-            The training input samples. Sparse matrices are accepted only if
-            they are supported by the base estimator.
+        X : array-like or sparse matrix, shape (n_samples, n_features)
+            The input samples.
 
         Returns
         -------
@@ -388,14 +375,12 @@ def score_samples(self, X):
         check_is_fitted(self, ["estimators_"])
 
         # Check data
-        X = check_array(X, accept_sparse="csr")
+        X = check_array(X, accept_sparse='csr')
         if self.n_features_ != X.shape[1]:
-            raise ValueError(
-                "Number of features of the model must "
-                "match the input. Model n_features is {0} and "
-                "input n_features is {1}."
-                "".format(self.n_features_, X.shape[1])
-            )
+            raise ValueError("Number of features of the model must "
+                             "match the input. Model n_features is {0} and "
+                             "input n_features is {1}."
+                             "".format(self.n_features_, X.shape[1]))
         n_samples = X.shape[0]
 
         n_samples_leaf = np.zeros(n_samples, order="f")
@@ -423,10 +408,7 @@ def score_samples(self, X):
 
         scores = 2 ** (
             -depths
-            / (
-                len(self.estimators_)
-                * _average_path_length([self.max_samples_])
-            )
+            / (len(self.estimators_) * _average_path_length([self.max_samples_]))
         )
 
         # Take the opposite of the scores as bigger is better (here less
@@ -435,15 +417,11 @@ def score_samples(self, X):
 
     @property
     def threshold_(self):
-        if self.behaviour != "old":
-            raise AttributeError(
-                "threshold_ attribute does not exist when " "behaviour != 'old'"
-            )
-        warn(
-            "threshold_ attribute is deprecated in 0.20 and will"
-            " be removed in 0.22.",
-            DeprecationWarning,
-        )
+        if self.behaviour != 'old':
+            raise AttributeError("threshold_ attribute does not exist when "
+                                 "behaviour != 'old'")
+        warn("threshold_ attribute is deprecated in 0.20 and will"
+             " be removed in 0.22.", DeprecationWarning)
         return self._threshold_
 
 
@@ -473,8 +451,8 @@ def _average_path_length(n_samples_leaf):
     mask_2 = n_samples_leaf == 2
     not_mask = ~np.logical_or(mask_1, mask_2)
 
-    average_path_length[mask_1] = 0.0
-    average_path_length[mask_2] = 1.0
+    average_path_length[mask_1] = 0.
+    average_path_length[mask_2] = 1.
     average_path_length[not_mask] = (
         2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
         - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]