sstalley
diff --git a/‎.binder/requirements.txt
Lines changed: 1 addition & 1 deletion b/‎.binder/requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/developers/advanced_installation.rst
Lines changed: 1 addition & 1 deletion b/‎doc/developers/advanced_installation.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/templates/index.html
Lines changed: 1 addition & 0 deletions b/‎doc/templates/index.html
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/whats_new/v0.22.rst
Lines changed: 42 additions & 0 deletions b/‎doc/whats_new/v0.22.rst
Lines changed: 42 additions & 0 deletions
diff --git a/‎setup.cfg
Lines changed: 2 additions & 2 deletions b/‎setup.cfg
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/__init__.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/cluster/_bicluster.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/cluster/_bicluster.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/impute/_knn.py
Lines changed: 61 additions & 51 deletions b/‎sklearn/impute/_knn.py
Lines changed: 61 additions & 51 deletions
diff --git a/‎sklearn/impute/tests/test_knn.py
Lines changed: 19 additions & 8 deletions b/‎sklearn/impute/tests/test_knn.py
Lines changed: 19 additions & 8 deletions
@@ -5,5 +5,5 @@ scikit-image==0.16.2
 pandas==0.25.3
 sphinx-gallery==0.5.0
 # Need to update the scikit-learn version on each 0.22 minor release
-scikit-learn==0.22
+scikit-learn==0.22.2
 
@@ -26,7 +26,7 @@ Installing a nightly build is the quickest way to:
 
 ::
 
-  pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
+  pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
 
 
 .. _install_bleeding_edge:
 
@@ -156,6 +156,7 @@ <h4 class="sk-landing-call-header">News</h4>
         <li><strong>On-going development:</strong>
         <a href="https://scikit-learn.org/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
+        <li><strong>February 2020.</strong> scikit-learn 0.22.2 is available for download (<a href="whats_new/v0.22.html#version-0-22-2">Changelog</a>).
         <li><strong>January 2020.</strong> scikit-learn 0.22.1 is available for download (<a href="whats_new/v0.22.html#version-0-22-1">Changelog</a>).
         <li><strong>December 2019.</strong> scikit-learn 0.22 is available for download (<a href="whats_new/v0.22.html#version-0-22-0">Changelog</a>).
         </li>
 
@@ -2,6 +2,48 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_0_22_2:
+
+Version 0.22.2
+==============
+
+**February 28 2020**
+
+Changelog
+---------
+
+:mod:`sklearn.impute`
+.....................
+
+- |Efficiency| Reduce :func:`impute.KNNImputer` asymptotic memory usage by
+  chunking pairwise distance computation.
+  :pr:`16397` by `Joel Nothman`_.
+
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| Fixed a bug in :func:`metrics.plot_roc_curve` where
+  the name of the estimator was passed in the :class:`metrics.RocCurveDisplay`
+  instead of the parameter `name`. It results in a different plot when calling
+  :meth:`metrics.RocCurveDisplay.plot` for the subsequent times.
+  :pr:`16500` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- |Fix| Fixed a bug in :func:`metrics.plot_precision_recall_curve` where the
+  name of the estimator was passed in the
+  :class:`metrics.PrecisionRecallDisplay` instead of the parameter `name`. It
+  results in a different plot when calling
+  :meth:`metrics.PrecisionRecallDisplay.plot` for the subsequent times.
+  :pr:`#16505` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.neighbors`
+..............................
+
+- |Fix| Fix a bug which converted a list of arrays into a 2-D object 
+  array instead of a 1-D array containing NumPy arrays. This bug
+  was affecting :meth:`neighbors.NearestNeighbors.radius_neighbors`.
+  :pr:`16076` by :user:`Guillaume Lemaitre <glemaitre>` and  
+  :user:`Alex Shacked <alexshacked>`.
+
 .. _changes_0_22_1:
 
 Version 0.22.1
 
@@ -20,9 +20,9 @@ filterwarnings =
 
 [wheelhouse_uploader]
 artifact_indexes=
-    # Wheels built by travis (only for specific tags):
+    # Wheels built by Azure Pipelines (only for specific tags):
     # https://github.com/MacPython/scikit-learn-wheels
-    http://wheels.scipy.org
+    https://pypi.anaconda.org/scikit-learn-wheels-staging/simple/scikit-learn/
 
 [flake8]
 # Default flake8 3.5 ignored flags
 
@@ -40,7 +40,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = '0.22.1'
+__version__ = '0.22.2'
 
 
 # On OSX, we can get a runtime error due to multiple OpenMP libraries loaded
 
@@ -260,9 +260,9 @@ class SpectralCoclustering(BaseSpectral):
     >>> X = np.array([[1, 1], [2, 1], [1, 0],
     ...               [4, 7], [3, 5], [3, 6]])
     >>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)
-    >>> clustering.row_labels_
+    >>> clustering.row_labels_ #doctest: +SKIP
     array([0, 1, 1, 0, 0, 0], dtype=int32)
-    >>> clustering.column_labels_
+    >>> clustering.column_labels_ #doctest: +SKIP
     array([0, 0], dtype=int32)
     >>> clustering
     SpectralCoclustering(n_clusters=2, random_state=0)
 
@@ -952,7 +952,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting,
     --------
     >>> # To use this experimental feature, we need to explicitly ask for it:
     >>> from sklearn.experimental import enable_hist_gradient_boosting  # noqa
-    >>> from sklearn.ensemble import HistGradientBoostingRegressor
+    >>> from sklearn.ensemble import HistGradientBoostingClassifier
     >>> from sklearn.datasets import load_iris
     >>> X, y = load_iris(return_X_y=True)
     >>> clf = HistGradientBoostingClassifier().fit(X, y)
 
@@ -6,7 +6,7 @@
 
 from ._base import _BaseImputer
 from ..utils.validation import FLOAT_DTYPES
-from ..metrics import pairwise_distances
+from ..metrics import pairwise_distances_chunked
 from ..metrics.pairwise import _NAN_METRICS
 from ..neighbors._base import _get_weights
 from ..neighbors._base import _check_weights
@@ -217,71 +217,81 @@ def transform(self, X):
 
         mask = _get_mask(X, self.missing_values)
         mask_fit_X = self._mask_fit_X
+        valid_mask = ~np.all(mask_fit_X, axis=0)
 
-        # Removes columns where the training data is all nan
         if not np.any(mask):
-            valid_mask = ~np.all(mask_fit_X, axis=0)
+            # No missing values in X
+            # Remove columns where the training data is all nan
             return X[:, valid_mask]
 
         row_missing_idx = np.flatnonzero(mask.any(axis=1))
 
-        # Pairwise distances between receivers and fitted samples
-        dist = pairwise_distances(X[row_missing_idx, :], self._fit_X,
-                                  metric=self.metric,
-                                  missing_values=self.missing_values,
-                                  force_all_finite=force_all_finite)
+        non_missing_fix_X = np.logical_not(mask_fit_X)
 
         # Maps from indices from X to indices in dist matrix
         dist_idx_map = np.zeros(X.shape[0], dtype=np.int)
         dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])
 
-        non_missing_fix_X = np.logical_not(mask_fit_X)
-
-        # Find and impute missing
-        valid_idx = []
-        for col in range(X.shape[1]):
-
-            potential_donors_idx = np.flatnonzero(non_missing_fix_X[:, col])
-
-            # column was all missing during training
-            if len(potential_donors_idx) == 0:
-                continue
-
-            # column has no missing values
-            if not np.any(mask[:, col]):
-                valid_idx.append(col)
-                continue
+        def process_chunk(dist_chunk, start):
+            row_missing_chunk = row_missing_idx[start:start + len(dist_chunk)]
 
-            valid_idx.append(col)
-
-            receivers_idx = np.flatnonzero(mask[:, col])
-
-            # distances for samples that needed imputation for column
-            dist_subset = (dist[dist_idx_map[receivers_idx]]
-                           [:, potential_donors_idx])
+            # Find and impute missing by column
+            for col in range(X.shape[1]):
+                if not valid_mask[col]:
+                    # column was all missing during training
+                    continue
 
-            # receivers with all nan distances impute with mean
-            all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)
-            all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]
+                col_mask = mask[row_missing_chunk, col]
+                if not np.any(col_mask):
+                    # column has no missing values
+                    continue
 
-            if all_nan_receivers_idx.size:
-                col_mean = np.ma.array(self._fit_X[:, col],
-                                       mask=mask_fit_X[:, col]).mean()
-                X[all_nan_receivers_idx, col] = col_mean
+                potential_donors_idx, = np.nonzero(non_missing_fix_X[:, col])
 
-                if len(all_nan_receivers_idx) == len(receivers_idx):
-                    # all receivers imputed with mean
-                    continue
+                # receivers_idx are indices in X
+                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]
 
-                # receivers with at least one defined distance
-                receivers_idx = receivers_idx[~all_nan_dist_mask]
-                dist_subset = (dist[dist_idx_map[receivers_idx]]
+                # distances for samples that needed imputation for column
+                dist_subset = (dist_chunk[dist_idx_map[receivers_idx] - start]
                                [:, potential_donors_idx])
 
-            n_neighbors = min(self.n_neighbors, len(potential_donors_idx))
-            value = self._calc_impute(dist_subset, n_neighbors,
-                                      self._fit_X[potential_donors_idx, col],
-                                      mask_fit_X[potential_donors_idx, col])
-            X[receivers_idx, col] = value
-
-        return super()._concatenate_indicator(X[:, valid_idx], X_indicator)
+                # receivers with all nan distances impute with mean
+                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)
+                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]
+
+                if all_nan_receivers_idx.size:
+                    col_mean = np.ma.array(self._fit_X[:, col],
+                                           mask=mask_fit_X[:, col]).mean()
+                    X[all_nan_receivers_idx, col] = col_mean
+
+                    if len(all_nan_receivers_idx) == len(receivers_idx):
+                        # all receivers imputed with mean
+                        continue
+
+                    # receivers with at least one defined distance
+                    receivers_idx = receivers_idx[~all_nan_dist_mask]
+                    dist_subset = (dist_chunk[dist_idx_map[receivers_idx]
+                                              - start]
+                                   [:, potential_donors_idx])
+
+                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))
+                value = self._calc_impute(
+                    dist_subset,
+                    n_neighbors,
+                    self._fit_X[potential_donors_idx, col],
+                    mask_fit_X[potential_donors_idx, col])
+                X[receivers_idx, col] = value
+
+        # process in fixed-memory chunks
+        gen = pairwise_distances_chunked(
+            X[row_missing_idx, :],
+            self._fit_X,
+            metric=self.metric,
+            missing_values=self.missing_values,
+            force_all_finite=force_all_finite,
+            reduce_func=process_chunk)
+        for chunk in gen:
+            # process_chunk modifies X in place. No return value.
+            pass
+
+        return super()._concatenate_indicator(X[:, valid_mask], X_indicator)
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 
+from sklearn import config_context
 from sklearn.impute import KNNImputer
 from sklearn.metrics.pairwise import nan_euclidean_distances
 from sklearn.metrics.pairwise import pairwise_distances
@@ -522,8 +523,12 @@ def custom_callable(x, y, missing_values=np.nan, squared=False):
     assert_allclose(imputer.fit_transform(X), X_imputed)
 
 
+@pytest.mark.parametrize("working_memory", [None, 0])
 @pytest.mark.parametrize("na", [-1, np.nan])
-def test_knn_imputer_with_simple_example(na):
+# Note that we use working_memory=0 to ensure that chunking is tested, even
+# for a small dataset. However, it should raise a UserWarning that we ignore.
+@pytest.mark.filterwarnings("ignore:adhere to working_memory")
+def test_knn_imputer_with_simple_example(na, working_memory):
 
     X = np.array([
         [0, na, 0, na],
@@ -553,8 +558,9 @@ def test_knn_imputer_with_simple_example(na):
         [r7c0, 7, 7, 7]
     ])
 
-    imputer_comp = KNNImputer(missing_values=na)
-    assert_allclose(imputer_comp.fit_transform(X), X_imputed)
+    with config_context(working_memory=working_memory):
+        imputer_comp = KNNImputer(missing_values=na)
+        assert_allclose(imputer_comp.fit_transform(X), X_imputed)
 
 
 @pytest.mark.parametrize("na", [-1, np.nan])
@@ -598,8 +604,10 @@ def test_knn_imputer_drops_all_nan_features(na):
     assert_allclose(knn.transform(X2), X2_expected)
 
 
+@pytest.mark.parametrize("working_memory", [None, 0])
 @pytest.mark.parametrize("na", [-1, np.nan])
-def test_knn_imputer_distance_weighted_not_enough_neighbors(na):
+def test_knn_imputer_distance_weighted_not_enough_neighbors(na,
+                                                            working_memory):
     X = np.array([
         [3, na],
         [2, na],
@@ -626,11 +634,14 @@ def test_knn_imputer_distance_weighted_not_enough_neighbors(na):
         [X_50, 5]
     ])
 
-    knn_3 = KNNImputer(missing_values=na, n_neighbors=3, weights='distance')
-    assert_allclose(knn_3.fit_transform(X), X_expected)
+    with config_context(working_memory=working_memory):
+        knn_3 = KNNImputer(missing_values=na, n_neighbors=3,
+                           weights='distance')
+        assert_allclose(knn_3.fit_transform(X), X_expected)
 
-    knn_4 = KNNImputer(missing_values=na, n_neighbors=4, weights='distance')
-    assert_allclose(knn_4.fit_transform(X), X_expected)
+        knn_4 = KNNImputer(missing_values=na, n_neighbors=4,
+                           weights='distance')
+        assert_allclose(knn_4.fit_transform(X), X_expected)
 
 
 @pytest.mark.parametrize("na, allow_nan", [(-1, False), (np.nan, True)])
Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@`
`40`	`40`	`# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.`
`41`	`41`	`# 'X.Y.dev0' is the canonical version of 'X.Y.dev'`
`42`	`42`	`#`
`43`		`-__version__ = '0.22.1'`
	`43`	`+__version__ = '0.22.2'`
`44`	`44`
`45`	`45`
`46`	`46`	`# On OSX, we can get a runtime error due to multiple OpenMP libraries loaded`