SkuaD01
diff --git a/‎sklearn/impute/_knn.py
Lines changed: 53 additions & 45 deletions b/‎sklearn/impute/_knn.py
Lines changed: 53 additions & 45 deletions
@@ -3,6 +3,8 @@
 # License: BSD 3 clause
 
 import numpy as np
+from multiprocessing import cpu_count
+from joblib import Parallel, delayed
 
 from ._base import _BaseImputer
 from ..utils.validation import FLOAT_DTYPES
@@ -236,55 +238,61 @@ def transform(self, X):
         dist_idx_map = np.zeros(X.shape[0], dtype=int)
         dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])
 
+
+        def process_chunk_col(dist_chunk, start, row_missing_chunk, col):
+            if not valid_mask[col]:
+                # column was all missing during training
+                return
+
+            col_mask = mask[row_missing_chunk, col]
+            if not np.any(col_mask):
+                # column has no missing values
+                return
+
+            potential_donors_idx, = np.nonzero(non_missing_fix_X[:, col])
+
+            # receivers_idx are indices in X
+            receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]
+
+            # distances for samples that needed imputation for column
+            dist_subset = (dist_chunk[dist_idx_map[receivers_idx] - start]
+                            [:, potential_donors_idx])
+
+            # receivers with all nan distances impute with mean
+            all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)
+            all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]
+
+            if all_nan_receivers_idx.size:
+                col_mean = np.ma.array(self._fit_X[:, col],
+                                        mask=mask_fit_X[:, col]).mean()
+                X[all_nan_receivers_idx, col] = col_mean
+
+                if len(all_nan_receivers_idx) == len(receivers_idx):
+                    # all receivers imputed with mean
+                    return
+
+                # receivers with at least one defined distance
+                receivers_idx = receivers_idx[~all_nan_dist_mask]
+                dist_subset = (dist_chunk[dist_idx_map[receivers_idx]
+                                          - start]
+                                [:, potential_donors_idx])
+
+            n_neighbors = min(self.n_neighbors, len(potential_donors_idx))
+            value = self._calc_impute(
+                dist_subset,
+                n_neighbors,
+                self._fit_X[potential_donors_idx, col],
+                mask_fit_X[potential_donors_idx, col])
+            X[receivers_idx, col] = value
+
         def process_chunk(dist_chunk, start):
             row_missing_chunk = row_missing_idx[start:start + len(dist_chunk)]
 
             # Find and impute missing by column
-            for col in range(X.shape[1]):
-                if not valid_mask[col]:
-                    # column was all missing during training
-                    continue
-
                 col_mask = mask[row_missing_chunk, col]
-                if not np.any(col_mask):
-                    # column has no missing values
-                    continue
-
-                potential_donors_idx, = np.nonzero(non_missing_fix_X[:, col])
-
-                # receivers_idx are indices in X
-                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]
-
-                # distances for samples that needed imputation for column
-                dist_subset = (dist_chunk[dist_idx_map[receivers_idx] - start]
-                               [:, potential_donors_idx])
-
-                # receivers with all nan distances impute with mean
-                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)
-                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]
-
-                if all_nan_receivers_idx.size:
-                    col_mean = np.ma.array(self._fit_X[:, col],
-                                           mask=mask_fit_X[:, col]).mean()
-                    X[all_nan_receivers_idx, col] = col_mean
-
-                    if len(all_nan_receivers_idx) == len(receivers_idx):
-                        # all receivers imputed with mean
-                        continue
-
-                    # receivers with at least one defined distance
-                    receivers_idx = receivers_idx[~all_nan_dist_mask]
-                    dist_subset = (dist_chunk[dist_idx_map[receivers_idx]
-                                              - start]
-                                   [:, potential_donors_idx])
-
-                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))
-                value = self._calc_impute(
-                    dist_subset,
-                    n_neighbors,
-                    self._fit_X[potential_donors_idx, col],
-                    mask_fit_X[potential_donors_idx, col])
-                X[receivers_idx, col] = value
+            Parallel(n_jobs=cpu_count(), backend='threading')(delayed(process_chunk_col)(dist_chunk, start, row_missing_chunk, col) for col in range(X.shape[1]))
+            # for col in range(X.shape[1]):
+            #     process_chunk_col(dist_chunk, start, row_missing_chunk, col)
+                
 
         # process in fixed-memory chunks
         gen = pairwise_distances_chunked(