8000 ENH Prefer threads for IsolationForest (#12543) · thoo/scikit-learn@a8911b5 · GitHub
[go: up one dir, main page]

Skip to content

Commit a8911b5

Browse files
ogriselthoo
authored andcommitted
ENH Prefer threads for IsolationForest (scikit-learn#12543)
1 parent a68e7ca commit a8911b5

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

doc/whats_new/v0.21.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ Support for Python 3.4 and below has been officially dropped.
5555
:class:`linear_model.MultiTaskLasso` which were breaking when
5656
``warm_start = True``. :issue:`12360` by :user:`Aakanksha Joshi <joaak>`.
5757

58+
:mod:`sklearn.ensemble`
59+
.......................
60+
61+
- |Efficiency| Make :class:`ensemble.IsolationForest` prefer threads over
62+
processes when running with ``n_jobs > 1`` as the underlying decision tree
63+
fit calls do release the GIL. This changes reduces memory usage and
64+
communication overhead. :issue:`12543` by :user:`Isaac Storch <istorch>`
65+
and `Olivier Grisel`_.
66+
5867
:mod:`sklearn.metrics`
5968
......................
6069

sklearn/ensemble/bagging.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ def fit(self, X, y, sample_weight=None):
243243
"""
244244
return self._fit(X, y, self.max_samples, sample_weight=sample_weight)
245245

246+
def _parallel_args(self):
247+
return {}
248+
246249
def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
247250
"""Build a Bagging ensemble of estimators from the training
248251
set (X, y).
@@ -365,7 +368,8 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
365368
seeds = random_state.randint(MAX_INT, size=n_more_estimators)
366369
self._seeds = seeds
367370

368-
all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
371+
all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose,
372+
**self._parallel_args())(
369373
delayed(_parallel_build_estimators)(
370374
n_estimators[i],
371375
self,
@@ -686,7 +690,8 @@ def predict_proba(self, X):
686690
n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,
687691
self.n_jobs)
688692

689-
all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
693+
all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,
694+
**self._parallel_args())(
690695
delayed(_parallel_predict_proba)(
691696
self.estimators_[starts[i]:starts[i + 1]],
692697
self.estimators_features_[starts[i]:starts[i + 1]],

sklearn/ensemble/iforest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from ..externals import six
1515
from ..tree import ExtraTreeRegressor
1616
from ..utils import check_random_state, check_array
17+
from ..utils.fixes import _joblib_parallel_args
1718
from ..utils.validation import check_is_fitted
1819
from ..base import OutlierMixin
1920

@@ -186,6 +187,13 @@ def __init__(self,
186187
def _set_oob_score(self, X, y):
187188
raise NotImplementedError("OOB score not supported by iforest")
188189

190+
def _parallel_args(self):
191+
# ExtraTreeRegressor releases the GIL, so it's more efficient to use
192+
# a thread-based backend rather than a process-based backend so as
193+
# to avoid suffering from communication overhead and extra memory
194+
# copies.
195+
return _joblib_parallel_args(prefer='threads')
196+
189197
def fit(self, X, y=None, sample_weight=None):
190198
"""Fit estimator.
191199

0 commit comments

Comments
 (0)
0