7
7
"""
8
8
9
9
# Author: Nicolas Hug
10
- import concurrent .futures
11
10
12
11
import numpy as np
13
12
14
13
from ...base import BaseEstimator , TransformerMixin
15
14
from ...utils import check_array , check_random_state
16
15
from ...utils ._openmp_helpers import _openmp_effective_n_threads
17
16
from ...utils .fixes import percentile
17
+ from ...utils .parallel import Parallel , delayed
18
18
from ...utils .validation import check_is_fitted
19
19
from ._binning import _map_to_bins
20
20
from ._bitset import set_bitset_memoryview
@@ -230,19 +230,13 @@ def fit(self, X, y=None):
230
230
self .bin_thresholds_ = [None ] * n_features
231
231
n_bins_non_missing = [None ] * n_features
232
232
233
- with concurrent .futures .ThreadPoolExecutor (
234
- max_workers = self .n_threads
235
- ) as executor :
236
- future_to_f_idx = {
237
- executor .submit (_find_binning_thresholds , X [:, f_idx ], max_bins ): f_idx
238
- for f_idx in range (n_features )
239
- if not self .is_categorical_ [f_idx ]
240
- }
241
- for future in concurrent .futures .as_completed (future_to_f_idx ):
242
- f_idx = future_to_f_idx [future ]
243
- self .bin_thresholds_ [f_idx ] = future .result ()
244
- n_bins_non_missing [f_idx ] = self .bin_thresholds_ [f_idx ].shape [0 ] + 1
233
+ non_cat_thresholds = Parallel (n_jobs = self .n_threads , backend = "threading" )(
234
+ delayed (_find_binning_thresholds )(X [:, f_idx ], max_bins )
235
+ for f_idx in range (n_features )
236
+ if not self .is_categorical_ [f_idx ]
237
+ )
245
238
239
+ non_cat_idx = 0
246
240
for f_idx in range (n_features ):
247
241
if self .is_categorical_ [f_idx ]:
248
242
# Since categories are assumed to be encoded in
@@ -252,6 +246,10 @@ def fit(self, X, y=None):
252
246
thresholds = known_categories [f_idx ]
253
247
n_bins_non_missing [f_idx ] = thresholds .shape [0 ]
254
248
self .bin_thresholds_ [f_idx ] = thresholds
249
+ else :
250
+ self .bin_thresholds_ [f_idx ] = non_cat_thresholds [non_cat_idx ]
251
+ n_bins_non_missing [f_idx ] = self .bin_thresholds_ [f_idx ].shape [0 ] + 1
252
+ non_cat_idx += 1
255
253
256
254
self .n_bins_non_missing_ = np .array (n_bins_non_missing , dtype = np .uint32 )
257
255
return self
0 commit comments