From 57b0f1872ca45f0b8e542be0f1c01c0bb0669a48 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@gmail.com>
Date: Tue, 22 Dec 2020 15:24:50 +0100
Subject: [PATCH 01/38] DOC 0.24.0 release highlights formatting (#19059)


From 319bc3911039f899a1d52acfd89a431593fc9597 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 22 Dec 2020 16:02:21 +0100
Subject: [PATCH 02/38] MNT update the number of wheels generated to upload to
 PyPI

---
 build_tools/github/check_wheels.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py
index c1c183f279b54..05c70085b4081 100644
--- a/build_tools/github/check_wheels.py
+++ b/build_tools/github/check_wheels.py
@@ -11,11 +11,11 @@
 build_matrix = wheel_config['jobs']['build_wheels']['strategy']['matrix']
 n_python_versions = len(build_matrix['python'])
 
-# For each python version we have: 5 wheels
+# For each python version we have: 6 wheels
 # 1 osx wheel (x86_64)
-# 2 linux wheel (i686 + x86_64)
+# 3 linux wheel (i686 + x86_64 + arm64)
 # 2 windows wheel (win32 + wind_amd64)
-n_wheels = 5 * n_python_versions
+n_wheels = 6 * n_python_versions
 
 # plus one more for the sdist
 n_wheels += 1

From 203fb7c9cf70c6213c9f1f96e167b5b8f4e894f4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 22 Dec 2020 16:22:02 +0100
Subject: [PATCH 03/38] MNT fix publish to pypi conditions

---
 .github/workflows/publish_pypi.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
index 0e2b9ffd0f258..677188a3567b3 100644
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@@ -40,10 +40,10 @@ jobs:
         user: __token__
         password: ${{ secrets.TEST_PYPI_TOKEN }}
         repository_url: https://test.pypi.org/legacy/
-      if: ${{ github.event.inputs.pypi_repo }} == 'testpypi'
+      if: ${{ github.event.inputs.pypi_repo == 'testpypi' }}
     - name: Publish package to PyPI
       uses: pypa/gh-action-pypi-publish@v1.4.1
       with:
         user: __token__
         password: ${{ secrets.PYPI_TOKEN }}
-      if: ${{ github.event.inputs.pypi_repo }} == 'pypi'
+      if: ${{ github.event.inputs.pypi_repo == 'pypi' }}

From e467ba8c879f384535875d68673001759862fea6 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Tue, 22 Dec 2020 11:48:46 -0500
Subject: [PATCH 04/38] CI Publish to Pypi workflow for aarch64 wheels (#19060)

---
 build_tools/github/check_wheels.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py
index 05c70085b4081..64cebe3b6b0c4 100644
--- a/build_tools/github/check_wheels.py
+++ b/build_tools/github/check_wheels.py
@@ -11,15 +11,25 @@
 build_matrix = wheel_config['jobs']['build_wheels']['strategy']['matrix']
 n_python_versions = len(build_matrix['python'])
 
-# For each python version we have: 6 wheels
+# For each python version we have: 5 wheels
 # 1 osx wheel (x86_64)
-# 3 linux wheel (i686 + x86_64 + arm64)
+# 2 linux wheel (i686 + x86_64)
 # 2 windows wheel (win32 + wind_amd64)
-n_wheels = 6 * n_python_versions
+n_wheels = 5 * n_python_versions
 
 # plus one more for the sdist
 n_wheels += 1
 
+# aarch64 builds from travis
+travis_config_path = Path.cwd() / ".travis.yml"
+with travis_config_path.open('r') as f:
+    travis_config = yaml.safe_load(f)
+
+jobs = travis_config['jobs']['include']
+travis_builds = [j for j in jobs
+                 if any("CIBW_BUILD" in env for env in j["env"])]
+n_wheels += len(travis_builds)
+
 dist_files = list(Path("dist").glob('**/*'))
 n_dist_files = len(dist_files)
 

From d9bdc06b0e7c5a15f475d54f8d6b9eb3d50801a6 Mon Sep 17 00:00:00 2001
From: Harry Wei <haochuanwei@users.noreply.github.com>
Date: Mon, 4 Jan 2021 18:00:06 +0800
Subject: [PATCH 05/38] DOC typo correction in neighbors.rst (#19099)

---
 doc/modules/neighbors.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index 36a9c86d98e24..bb84b79e8570a 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -433,7 +433,7 @@ based on the following assumptions:
   training points
 * ``leaf_size`` is close to its default value of ``30``
 * when :math:`D > 15`, the intrinsic dimensionality of the data is generally
-  to high for tree-based methods
+  too high for tree-based methods
 
 Effect of ``leaf_size``
 -----------------------

From 665a389c3d877181b0ef474b57d721e3eba7afac Mon Sep 17 00:00:00 2001
From: yzhenman <65328572+yzhenman@users.noreply.github.com>
Date: Mon, 4 Jan 2021 02:04:55 -0800
Subject: [PATCH 06/38] DOC fix dataset used for visualization in digits
 classification example (#19095)

---
 examples/classification/plot_digits_classification.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/classification/plot_digits_classification.py b/examples/classification/plot_digits_classification.py
index 35843883df1b2..0d1a79f609f7d 100644
--- a/examples/classification/plot_digits_classification.py
+++ b/examples/classification/plot_digits_classification.py
@@ -78,8 +78,9 @@
 # digit value in the title.
 
 _, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
-for ax, image, prediction in zip(axes, digits.images, predicted):
+for ax, image, prediction in zip(axes, X_test, predicted):
     ax.set_axis_off()
+    image = image.reshape(8, 8)
     ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
     ax.set_title(f'Prediction: {prediction}')
 

From 5be5808b27defdbe28d697aeed5d795eace8e812 Mon Sep 17 00:00:00 2001
From: "Paulo S. Costa" <pauloscosta5@gmail.com>
Date: Tue, 5 Jan 2021 19:19:15 -0800
Subject: [PATCH 07/38] DOC Fix cross-validation wording in RidgeCV (#19121)

---
 sklearn/linear_model/_ridge.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 199a1cd760660..7e8d5d273d1df 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1670,8 +1670,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
 
     See glossary entry for :term:`cross-validation estimator`.
 
-    By default, it performs Leave-One-Out Cross-Validation, which is a form of
-    efficient Leave-One-Out cross-validation.
+    By default, it performs efficient Leave-One-Out Cross-Validation.
 
     Read more in the :ref:`User Guide <ridge_regression>`.
 

From 98bcc24a77140b9ed9bb464e6863943f5e619398 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 6 Jan 2021 04:11:53 -0500
Subject: [PATCH 08/38] TST Skips test_compare_to_ELKI for arm (#19115)

* TST Skips test for arm [cd build]

* CI Skip for 32bit linux [cd build]
---
 sklearn/cluster/tests/test_optics.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 03ca4995c0446..4428b6c00d7eb 100644
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -1,6 +1,8 @@
 # Authors: Shane Grigsby <refuge@rocktalus.com>
 #          Adrin Jalali <adrin.jalali@gmail.com>
 # License: BSD 3 clause
+import platform
+import sys
 
 import numpy as np
 import pytest
@@ -15,8 +17,10 @@
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_raise_message
 from sklearn.utils._testing import assert_allclose
+from sklearn.utils.fixes import sp_version, parse_version
 
 from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.utils import _IS_32BIT
 
 
 rng = np.random.RandomState(0)
@@ -314,6 +318,11 @@ def test_processing_order():
     assert_array_equal(clust.ordering_, [0, 1, 2, 3])
 
 
+@pytest.mark.skipif(sp_version >= parse_version("1.6.0")
+                    and (platform.machine() == "aarch64" or
+                         (sys.platform == "linux" and _IS_32BIT)),
+                    reason=("Test fails for SciPy 1.6.0 on ARM and on 32-bit "
+                            "linux. See #19111"))
 def test_compare_to_ELKI():
     # Expected values, computed with (future) ELKI 0.7.5 using:
     # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter

From 110333fdf98e41ca3f2f669ea0f9e8f07d17566b Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@gmail.com>
Date: Wed, 6 Jan 2021 15:48:57 +0100
Subject: [PATCH 09/38] CI Reduce travis nightly load (#19113)

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 .travis.yml | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3f631d9f8bc90..3c995f35253ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -36,15 +36,21 @@ jobs:
         - BUILD_WITH_ICC=true
       if: type = cron OR commit_message =~ /\[icc-build\]/
 
-    - python: 3.7
+    # Manual trigger of linux/arm64 tests in PR without triggering the full
+    # wheel building process for all the Python versions. 
+    - python: 3.9
       os: linux
       arch: arm64
-      if: type = cron OR commit_message =~ /\[arm64\]/
+      if: commit_message =~ /\[arm64\]/
       env:
         - CPU_COUNT=8
 
-    # Linux environments to build the scikit-learn wheels
-    # for the ARM64 arquitecture and Python 3.6 and newer
+    # Linux environments to build the scikit-learn wheels for the ARM64
+    # architecture and Python 3.6 and newer. This is used both at release time
+    # with the manual trigger in the commit message in the release branch and as
+    # a scheduled task to build the weekly dev build on the master branch. The
+    # weekly frequency is meant to avoid depleting the Travis CI credits too
+    # fast.
     - python: 3.6
       os: linux
       arch: arm64

From 867cf5f6c7f47156dda79de114f80717e75eee5f Mon Sep 17 00:00:00 2001
From: shinnar <shinnar@users.noreply.github.com>
Date: Fri, 8 Jan 2021 03:12:02 -0500
Subject: [PATCH 10/38] DOC Fix docstring of HalvingSearch estimators (#19133)

---
 sklearn/model_selection/_search_successive_halving.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index 512595b1943ce..b522ce7fbda41 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -448,7 +448,7 @@ class HalvingGridSearchCV(BaseSuccessiveHalving):
 
         The refitted estimator is made available at the ``best_estimator_``
         attribute and permits using ``predict`` directly on this
-        ``GridSearchCV`` instance.
+        ``HalvingGridSearchCV`` instance.
 
     error_score : 'raise' or numeric
         Value to assign to the score if an error occurs in estimator fitting.
@@ -735,7 +735,7 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
 
         The refitted estimator is made available at the ``best_estimator_``
         attribute and permits using ``predict`` directly on this
-        ``GridSearchCV`` instance.
+        ``HalvingRandomSearchCV`` instance.
 
     error_score : 'raise' or numeric
         Value to assign to the score if an error occurs in estimator fitting.

From 59f6ec959d4574503a450567b7f58fa0a386c6c6 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 8 Jan 2021 12:39:00 +0100
Subject: [PATCH 11/38] FIX accept meta-estimator in SelfTrainingClassifier
 (#19126)

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 doc/whats_new/v0.24.rst                       | 17 ++++++++++++
 sklearn/semi_supervised/_self_training.py     |  8 +++---
 .../tests/test_self_training.py               | 27 ++++++++++++++++++-
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index b2c6db64969f0..ca96e875d342a 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -2,6 +2,23 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_0_24_1:
+
+Version 0.24.1
+==============
+
+Changelog
+---------
+
+:mod:`sklearn.semi_supervised`
+..............................
+
+- |Fix| :class:`semi_supervised.SelfTrainingClassifier` is now accepting
+  meta-estimator (e.g. :class:`ensemble.StackingClassifier`). The validation
+  of this estimator is done on the fitted estimator, once we know the existence
+  of the method `predict_proba`.
+  :pr:`19126` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 .. _changes_0_24:
 
 Version 0.24.0
diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index c3ed0baeaae68..8c79065c830d1 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -205,10 +205,10 @@ def fit(self, X, y):
                 X[safe_mask(X, has_label)],
                 self.transduction_[has_label])
 
-            if self.n_iter_ == 1:
-                # Only validate in the first iteration so that n_iter=0 is
-                # equivalent to the base_estimator itself.
-                _validate_estimator(self.base_estimator)
+            # Validate the fitted estimator since `predict_proba` can be
+            # delegated to an underlying "final" fitted estimator as
+            # generally done in meta-estimator or pipeline.
+            _validate_estimator(self.base_estimator_)
 
             # Predict on the unlabeled samples
             prob = self.base_estimator_.predict_proba(
diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py
index b5c44996d5e52..7c5287be9974c 100644
--- a/sklearn/semi_supervised/tests/test_self_training.py
+++ b/sklearn/semi_supervised/tests/test_self_training.py
@@ -4,14 +4,16 @@
 from numpy.testing import assert_array_equal
 import pytest
 
+from sklearn.ensemble import StackingClassifier
 from sklearn.exceptions import NotFittedError
-from sklearn.semi_supervised import SelfTrainingClassifier
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import SVC
 from sklearn.model_selection import train_test_split
 from sklearn.datasets import load_iris, make_blobs
 from sklearn.metrics import accuracy_score
 
+from sklearn.semi_supervised import SelfTrainingClassifier
+
 # Author: Oliver Rausch <rauscho@ethz.ch>
 # License: BSD 3 clause
 
@@ -318,3 +320,26 @@ def test_k_best_selects_best():
 
     for row in most_confident_svc.tolist():
         assert row in added_by_st
+
+
+def test_base_estimator_meta_estimator():
+    # Check that a meta-estimator relying on an estimator implementing
+    # `predict_proba` will work even if it does expose this method before being
+    # fitted.
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/19119
+
+    base_estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=True)), ("svc_2", SVC(probability=True)),
+        ],
+        final_estimator=SVC(probability=True), cv=2
+    )
+
+    # make sure that the `base_estimator` does not expose `predict_proba`
+    # without being fitted
+    assert not hasattr(base_estimator, "predict_proba")
+
+    clf = SelfTrainingClassifier(base_estimator=base_estimator)
+    clf.fit(X_train, y_train_missing_labels)
+    clf.predict_proba(X_test)

From f317eadb6bdefd6c0e456ab49290afdefd2cb09f Mon Sep 17 00:00:00 2001
From: Connor Tann <connor_tann@hotmail.com>
Date: Fri, 8 Jan 2021 13:34:35 +0000
Subject: [PATCH 12/38] DOC Fix typo in datasets.rst (#19136)

Fix typo in dataset loading docs
---
 doc/datasets.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/datasets.rst b/doc/datasets.rst
index 30efdae06b1e3..b9484a02ce84c 100644
--- a/doc/datasets.rst
+++ b/doc/datasets.rst
@@ -39,8 +39,8 @@ an array of shape ``n_samples`` * ``n_features`` with
 key ``data`` (except for 20newsgroups) and a numpy array of
 length ``n_samples``, containing the target values, with key ``target``.
 
-The Bunch object is a dictionary that exposes its keys are attributes.
-For more information about Bunch object, see :class:`~sklearn.utils.Bunch`:
+The Bunch object is a dictionary that exposes its keys as attributes.
+For more information about Bunch object, see :class:`~sklearn.utils.Bunch`.
 
 It's also possible for almost all of these function to constrain the output
 to be a tuple containing only the data and the target, by setting the

From e4a63dba4c2f77954b1228cd7a1d3a472210c776 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 8 Jan 2021 18:55:53 +0100
Subject: [PATCH 13/38] DOC Update docs guideline regarding docstring
 formatting (#18243)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 doc/developers/contributing.rst               | 14 ++++++-
 doc/glossary.rst                              |  7 ++++
 .../model_selection/plot_learning_curve.py    |  2 +-
 sklearn/dummy.py                              |  8 ++--
 sklearn/linear_model/_least_angle.py          | 38 ++++++++++---------
 sklearn/preprocessing/_discretization.py      |  6 +--
 6 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 78c1175620c4f..8a3c460c615a8 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -779,6 +779,8 @@ Finally, follow the formatting rules below to make it consistently good:
 
     sample_weight : array-like of shape (n_samples,), default=None
 
+    multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays
+
   In general have the following in mind:
 
       1. Use Python basic types. (``bool`` instead of ``boolean``)
@@ -792,10 +794,18 @@ Finally, follow the formatting rules below to make it consistently good:
       5. Specify ``dataframe`` when "frame-like" features are being used, such
          as the column names.
       6. When specifying the data type of a list, use ``of`` as a delimiter:
-         ``list of int``.
+         ``list of int``. When the parameter supports arrays giving details
+         about the shape and/or data type and a list of such arrays, you can
+         use one of ``array-like of shape (n_samples,) or list of such arrays``.
       7. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32``
          after defining the shape:
-         ``ndarray of shape (n_samples,), dtype=np.int32``.
+         ``ndarray of shape (n_samples,), dtype=np.int32``. You can specify
+         multiple dtype as a set:
+         ``array-like of shape (n_samples,), dtype={np.float64, np.float32}``.
+         If one wants to mention arbitrary precision, use `integral` and
+         `floating` rather than the Python dtype `int` and `float`. When both
+         `int` and `floating` are supported, there is no need to specify the
+         dtype.
       8. When the default is ``None``, ``None`` only needs to be specified at the
          end with ``default=None``. Be sure to include in the docstring, what it
          means for the parameter or attribute to be ``None``.
diff --git a/doc/glossary.rst b/doc/glossary.rst
index 30e647be1c0f4..a43eda4a79b67 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -255,6 +255,13 @@ General Concepts
         or vectorizing.  Our estimators do not work with struct arrays, for
         instance.
 
+        Our documentation can sometimes give information about the dtype
+        precision, e.g. `np.int32`, `np.int64`, etc. When the precision is
+        provided, it refers to the NumPy dtype. If an arbitrary precision is
+        used, the documentation will refer to dtype `integer` or `floating`.
+        Note that in this case, the precision can be platform dependent.
+        The `numeric` dtype refers to accepting both `integer` and `floating`.
+
         TODO: Mention efficiency and precision issues; casting policy.
 
     duck typing
diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
index ee9809f27e44f..71cc565c3528c 100644
--- a/examples/model_selection/plot_learning_curve.py
+++ b/examples/model_selection/plot_learning_curve.py
@@ -77,7 +77,7 @@ def plot_learning_curve(estimator, title, X, y, axes=None, ylim=None, cv=None,
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    train_sizes : array-like of shape (n_ticks,), dtype={int, float}
+    train_sizes : array-like of shape (n_ticks,)
         Relative or absolute numbers of training examples that will be used to
         generate the learning curve. If the ``dtype`` is float, it is regarded
         as a fraction of the maximum size of the training set (that is
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 66992d83f83f4..ad5ab3f24731d 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -64,13 +64,13 @@ class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
 
     Attributes
     ----------
-    classes_ : ndarray of shape (n_classes,) or list thereof
+    classes_ : ndarray of shape (n_classes,) or list of such arrays
         Class labels for each output.
 
     n_classes_ : int or list of int
         Number of label for each output.
 
-    class_prior_ : ndarray of shape (n_classes,) or list thereof
+    class_prior_ : ndarray of shape (n_classes,) or list of such arrays
         Probability of each class for each output.
 
     n_outputs_ : int
@@ -272,7 +272,7 @@ def predict_proba(self, X):
 
         Returns
         -------
-        P : ndarray of shape (n_samples, n_classes) or list thereof
+        P : ndarray of shape (n_samples, n_classes) or list of such arrays
             Returns the probability of the sample for each class in
             the model, where classes are ordered arithmetically, for each
             output.
@@ -335,7 +335,7 @@ def predict_log_proba(self, X):
 
         Returns
         -------
-        P : ndarray of shape (n_samples, n_classes) or list thereof
+        P : ndarray of shape (n_samples, n_classes) or list of such arrays
             Returns the log probability of the sample for each class in
             the model, where classes are ordered arithmetically for each
             output.
diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index e1d9146b5f2ea..55e37ff51fc6a 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -864,21 +864,22 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
 
     Attributes
     ----------
-    alphas_ : array-like of shape (n_alphas + 1,) or list of thereof of \
-            shape (n_targets,)
+    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
         Maximum of covariances (in absolute value) at each iteration.
         ``n_alphas`` is either ``max_iter``, ``n_features`` or the
         number of nodes in the path with ``alpha >= alpha_min``, whichever
-        is smaller.
+        is smaller. If this is a list of array-like, the length of the outer
+        list is `n_targets`.
 
-    active_ : list of shape (n_alphas,) or list of thereof of shape \
-            (n_targets,)
+    active_ : list of shape (n_alphas,) or list of such lists
         Indices of active variables at the end of the path.
+        If this is a list of list, the length of the outer list is `n_targets`.
 
-    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list of \
-            thereof of shape (n_targets,)
+    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \
+            of such arrays
         The varying values of the coefficients along the path. It is not
-        present if the ``fit_path`` parameter is ``False``.
+        present if the ``fit_path`` parameter is ``False``. If this is a list
+        of array-like, the length of the outer list is `n_targets`.
 
     coef_ : array-like of shape (n_features,) or (n_targets, n_features)
         Parameter vector (w in the formulation formula).
@@ -1121,21 +1122,23 @@ class LassoLars(Lars):
 
     Attributes
     ----------
-    alphas_ : array-like of shape (n_alphas + 1,) or list of thereof of shape \
-            (n_targets,)
+    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
         Maximum of covariances (in absolute value) at each iteration.
         ``n_alphas`` is either ``max_iter``, ``n_features`` or the
         number of nodes in the path with ``alpha >= alpha_min``, whichever
-        is smaller.
+        is smaller. If this is a list of array-like, the length of the outer
+        list is `n_targets`.
 
-    active_ : list of length n_alphas or list of thereof of shape (n_targets,)
+    active_ : list of length n_alphas or list of such lists
         Indices of active variables at the end of the path.
+        If this is a list of list, the length of the outer list is `n_targets`.
 
-    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list of \
-            thereof of shape (n_targets,)
+    coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \
+            of such arrays
         If a list is passed it's expected to be one of n_targets such arrays.
         The varying values of the coefficients along the path. It is not
-        present if the ``fit_path`` parameter is ``False``.
+        present if the ``fit_path`` parameter is ``False``. If this is a list
+        of array-like, the length of the outer list is `n_targets`.
 
     coef_ : array-like of shape (n_features,) or (n_targets, n_features)
         Parameter vector (w in the formulation formula).
@@ -1382,8 +1385,9 @@ class LarsCV(Lars):
 
     Attributes
     ----------
-    active_ : list of length n_alphas or list of thereof of shape (n_targets,)
+    active_ : list of length n_alphas or list of such lists
         Indices of active variables at the end of the path.
+        If this is a list of lists, the outer list length is `n_targets`.
 
     coef_ : array-like of shape (n_features,)
         parameter vector (w in the formulation formula)
@@ -1775,7 +1779,7 @@ class LassoLarsIC(LassoLars):
     alpha_ : float
         the alpha parameter chosen by the information criterion
 
-    alphas_ : array-like of shape (n_alphas + 1,) or list thereof
+    alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
         Maximum of covariances (in absolute value) at each iteration.
         ``n_alphas`` is either ``max_iter``, ``n_features`` or the
         number of nodes in the path with ``alpha >= alpha_min``, whichever
diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index a628533ac13d0..22fa236f3314e 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -139,7 +139,7 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like of shape (n_samples, n_features), dtype={int, float}
+        X : array-like of shape (n_samples, n_features)
             Data to be discretized.
 
         y : None
@@ -276,7 +276,7 @@ def transform(self, X):
 
         Parameters
         ----------
-        X : array-like of shape (n_samples, n_features), dtype={int, float}
+        X : array-like of shape (n_samples, n_features)
             Data to be discretized.
 
         Returns
@@ -326,7 +326,7 @@ def inverse_transform(self, Xt):
 
         Parameters
         ----------
-        Xt : array-like of shape (n_samples, n_features), dtype={int, float}
+        Xt : array-like of shape (n_samples, n_features)
             Transformed data in the binned space.
 
         Returns

From 3ebe1a5f9f974d06560e3a2e02a7980e27b24b92 Mon Sep 17 00:00:00 2001
From: Kunj <kunjparikh6@gmail.com>
Date: Sat, 9 Jan 2021 09:47:13 -0800
Subject: [PATCH 14/38] DOC Update docs for StandardScaler.scale_ to include 0
 variance (#19124)

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/preprocessing/_data.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 478d41ecc768a..3921b898c072d 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -617,8 +617,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
     Attributes
     ----------
     scale_ : ndarray of shape (n_features,) or None
-        Per feature relative scaling of the data. This is calculated using
-        `np.sqrt(var_)`. Equal to ``None`` when ``with_std=False``.
+        Per feature relative scaling of the data to achieve zero mean and unit
+        variance. Generally this is calculated using `np.sqrt(var_)`. If a
+        variance is zero, we can't achieve unit variance, and the data is left
+        as-is, giving a scaling factor of 1. `scale_` is equal to `None`
+        when `with_std=False`.
 
         .. versionadded:: 0.17
            *scale_*

From 890caa4d6345d1db2893f784f259050cdca2ada3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@gmail.com>
Date: Mon, 11 Jan 2021 14:12:16 +0100
Subject: [PATCH 15/38] CI Use macos-10.13 compatible libomp when building the
 wheels (#19064)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 .github/workflows/wheels.yml       |  1 +
 build_tools/github/build_wheels.sh | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index ac1d495642049..17726ec9a112b 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -86,6 +86,7 @@ jobs:
                             OPENBLAS_NUM_THREADS=2
                             SKLEARN_SKIP_NETWORK_TESTS=1
                             SKLEARN_BUILD_PARALLEL=3
+                            MACOSX_DEPLOYMENT_TARGET=10.13
           CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
           CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: bash build_tools/github/repair_windows_wheels.sh {wheel} {dest_dir} ${{ matrix.bitness }}
           CIBW_BEFORE_TEST_WINDOWS: bash build_tools/github/build_minimal_windows_image.sh ${{ matrix.python }} ${{ matrix.bitness }}
diff --git a/build_tools/github/build_wheels.sh b/build_tools/github/build_wheels.sh
index 917fc14fdb651..9b45481cbb978 100644
--- a/build_tools/github/build_wheels.sh
+++ b/build_tools/github/build_wheels.sh
@@ -5,7 +5,15 @@ set -x
 
 # OpenMP is not present on macOS by default
 if [[ "$RUNNER_OS" == "macOS" ]]; then
-    brew install libomp
+    # Make sure to use a libomp version binary compatible with the oldest
+    # supported version of the macos SDK as libomp will be vendored into the
+    # scikit-learn wheels for macos. The list of bottles can be found at:
+    # https://formulae.brew.sh/api/formula/libomp.json. Currently, the oldest
+    # supported macos version is: High Sierra / 10.13. When upgrading this, be
+    # sure to update the MACOSX_DEPLOYMENT_TARGET environment variable in
+    # wheels.yml accordingly.
+    wget https://homebrew.bintray.com/bottles/libomp-11.0.0.high_sierra.bottle.tar.gz
+    brew install libomp-11.0.0.high_sierra.bottle.tar.gz
     export CC=/usr/bin/clang
     export CXX=/usr/bin/clang++
     export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"

From 7168e14cbb3e40627fa71111e70292b951dd16f2 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 11 Jan 2021 17:37:31 +0000
Subject: [PATCH 16/38] DOC minor broken links fix in parallelism docs (#19151)

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 doc/computing/parallelism.rst | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/doc/computing/parallelism.rst b/doc/computing/parallelism.rst
index c30d0790c1f01..3dce5ef66bb1d 100644
--- a/doc/computing/parallelism.rst
+++ b/doc/computing/parallelism.rst
@@ -114,9 +114,11 @@ threads than the number of CPUs on a machine. Over-subscription happens when
 a program is running too many threads at the same time.
 
 Suppose you have a machine with 8 CPUs. Consider a case where you're running
-a :class:`~GridSearchCV` (parallelized with joblib) with ``n_jobs=8`` over
-a :class:`~HistGradientBoostingClassifier` (parallelized with OpenMP). Each
-instance of :class:`~HistGradientBoostingClassifier` will spawn 8 threads
+a :class:`~sklearn.model_selection.GridSearchCV` (parallelized with joblib)
+with ``n_jobs=8`` over a
+:class:`~sklearn.ensemble.HistGradientBoostingClassifier` (parallelized with
+OpenMP). Each instance of
+:class:`~sklearn.ensemble.HistGradientBoostingClassifier` will spawn 8 threads
 (since you have 8 CPUs). That's a total of ``8 * 8 = 64`` threads, which
 leads to oversubscription of physical CPU resources and to scheduling
 overhead.
@@ -129,9 +131,10 @@ is the default), joblib will tell its child **processes** to limit the
 number of threads they can use, so as to avoid oversubscription. In practice
 the heuristic that joblib uses is to tell the processes to use ``max_threads
 = n_cpus // n_jobs``, via their corresponding environment variable. Back to
-our example from above, since the joblib backend of :class:`~GridSearchCV`
-is ``loky``, each process will only be able to use 1 thread instead of 8,
-thus mitigating the oversubscription issue.
+our example from above, since the joblib backend of
+:class:`~sklearn.model_selection.GridSearchCV` is ``loky``, each process will
+only be able to use 1 thread instead of 8, thus mitigating the
+oversubscription issue.
 
 Note that:
 

From bbc46f3cd12a5302c95a41533127b30c6d265c2d Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 11 Jan 2021 12:44:04 -0500
Subject: [PATCH 17/38] DOC Adds default to SpectralClustering (#19149)

---
 sklearn/cluster/_spectral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index 79a0b77954028..b86d5870025c3 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -356,7 +356,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         increase with similarity) should be used. This property is not checked
         by the clustering algorithm.
 
-    n_neighbors : int
+    n_neighbors : int, default=10
         Number of neighbors to use when constructing the affinity matrix using
         the nearest neighbors method. Ignored for ``affinity='rbf'``.
 

From 463894431c1b14cbf7350276c766e7ed57529f8f Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@gmail.com>
Date: Mon, 11 Jan 2021 18:46:18 +0100
Subject: [PATCH 18/38] DOC Update installation instructions for macos/arm64
 (#19146)

---
 doc/developers/advanced_installation.rst |  7 +++-
 doc/install.rst                          | 45 +++++++++++++++++-------
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index fdda0076428af..7b305c13200a7 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -238,6 +238,11 @@ to enable OpenMP support:
 
 - or install `libomp` with Homebrew to extend the default Apple clang compiler.
 
+For Apple Silicon M1 hardware, only the conda-forge method below is known to
+work at the time of writing (January 2021). You can install the `macos/arm64`
+distribution of conda using the `miniforge installer
+<https://github.com/conda-forge/miniforge#miniforge>`_
+
 macOS compilers from conda-forge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -257,7 +262,7 @@ scikit-learn from source:
 .. prompt:: bash $
 
     conda create -n sklearn-dev -c conda-forge python numpy scipy cython \
-        joblib threadpoolctl pytest "compilers>=1.0.4,!=1.1.0" llvm-openmp
+        joblib threadpoolctl pytest compilers llvm-openmp
     conda activate sklearn-dev
     make clean
     pip install --verbose --no-build-isolation --editable .
diff --git a/doc/install.rst b/doc/install.rst
index 57cb489a11262..7912cc4dc4df6 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -59,7 +59,10 @@ Installing the latest release
          <span class="sk-expandable" data-packager="pip" data-os="windows">Install the 64bit version of Python 3, for instance from <a href="https://www.python.org/">https://www.python.org</a>.</span
          ><span class="sk-expandable" data-packager="pip" data-os="mac">Install Python 3 using <a href="https://brew.sh/">homebrew</a> (<code>brew install python</code>) or by manually installing the package from <a href="https://www.python.org">https://www.python.org</a>.</span
          ><span class="sk-expandable" data-packager="pip" data-os="linux">Install python3 and python3-pip using the package manager of the Linux Distribution.</span
-         ><span class="sk-expandable" data-packager="conda"><a href="https://docs.conda.io/projects/conda/en/latest/user-guide/install/">Install conda</a> (no administrator permission required).</span>
+         ><span class="sk-expandable" data-packager="conda"
+            >Install conda using the <a href="https://docs.conda.io/projects/conda/en/latest/user-guide/install/">Anaconda or miniconda</a>
+             installers or the <a href="https://https://github.com/conda-forge/miniforge#miniforge">miniforge</a> installers
+             (no administrator permission required for any of those).</span>
        </div>
 
 Then run:
@@ -106,17 +109,15 @@ In order to check your installation you can use
   </div>
 
 Note that in order to avoid potential conflicts with other packages it is
-strongly recommended to use a virtual environment, e.g. python3 ``virtualenv``
-(see `python3 virtualenv documentation
-<https://docs.python.org/3/tutorial/venv.html>`_) or `conda environments
+strongly recommended to use a `virtual environment (venv)
+<https://docs.python.org/3/tutorial/venv.html>`_ or a `conda environment
 <https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html>`_.
 
-Using an isolated environment makes possible to install a specific version of
-scikit-learn and its dependencies independently of any previously installed
-Python packages.
-In particular under Linux is it discouraged to install pip packages alongside
-the packages managed by the package manager of the distribution
-(apt, dnf, pacman...).
+Using such an isolated environment makes it possible to install a specific
+version of scikit-learn with pip or conda and its dependencies independently of
+any previously installed Python packages. In particular under Linux is it
+discouraged to install pip packages alongside the packages managed by the
+package manager of the distribution (apt, dnf, pacman...).
 
 Note that you should always remember to activate the environment of your choice
 prior to running any Python command whenever you start a new terminal session.
@@ -127,8 +128,6 @@ and NumPy and SciPy are not recompiled from source, which can happen when using
 particular configurations of operating system and hardware (such as Linux on
 a Raspberry Pi).
 
-If you must install scikit-learn and its dependencies with pip, you can install
-it as ``scikit-learn[alldeps]``.
 
 Scikit-learn plotting capabilities (i.e., functions start with "plot\_"
 and classes end with "Display") require Matplotlib. The examples require
@@ -151,6 +150,28 @@ purpose.
    For installing on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+
    are required.
 
+.. _install_on_apple_silicon_m1:
+
+Installing on Apple Silicon M1 hardware
+=======================================
+
+The recently introduced `macos/arm64` platform (sometimes also known as
+`macos/aarch64`) requires the open source community to upgrade the build
+configuation and automation to properly support it.
+
+At the time of writing (January 2021), the only way to get a working
+installation of scikit-learn on this hardware is to install scikit-learn and its
+dependencies from the conda-forge distribution, for instance using the miniforge
+installers:
+
+https://github.com/conda-forge/miniforge
+
+The following issue tracks progress on making it possible to install
+scikit-learn from PyPI with pip:
+
+https://github.com/scikit-learn/scikit-learn/issues/19137
+
+
 .. _install_by_distribution:
 
 Third party distributions of scikit-learn

From e3caae66014d8f74aa52bc61120ca39ccca08df0 Mon Sep 17 00:00:00 2001
From: Miao Cai <philliphily@gmail.com>
Date: Tue, 12 Jan 2021 01:50:13 +0800
Subject: [PATCH 19/38] DOC Mention to use a command prompt in Windows install
 (#19125)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 doc/developers/advanced_installation.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index 7b305c13200a7..7fbceeeab4c47 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -206,7 +206,8 @@ console:
 
     python -c "import struct; print(struct.calcsize('P') * 8)"
 
-For 64-bit Python, configure the build environment with:
+For 64-bit Python, configure the build environment by running the following
+commands in ``cmd`` or an Anaconda Prompt (if you use Anaconda):
 
     ::
 

From 3ae054f5d62456dc342413de4b7aee80b30412b3 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Tue, 12 Jan 2021 06:20:12 -0500
Subject: [PATCH 20/38] DOC Uses float instead of real in cross_decomposition
 (#19156)

---
 sklearn/cross_decomposition/_pls.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py
index 66adacb64b1f3..817d4edbd9e88 100644
--- a/sklearn/cross_decomposition/_pls.py
+++ b/sklearn/cross_decomposition/_pls.py
@@ -499,7 +499,7 @@ class PLSRegression(_PLS):
         The maximum number of iterations of the power method when
         `algorithm='nipals'`. Ignored otherwise.
 
-    tol : real, default 1e-06
+    tol : float, default=1e-06
         The tolerance used as convergence criteria in the power method: the
         algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
         than `tol`, where `u` corresponds to the left singular vector.
@@ -597,7 +597,7 @@ class PLSCanonical(_PLS):
         the maximum number of iterations of the power method when
         `algorithm='nipals'`. Ignored otherwise.
 
-    tol : real, default 1e-06
+    tol : float, default=1e-06
         The tolerance used as convergence criteria in the power method: the
         algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
         than `tol`, where `u` corresponds to the left singular vector.
@@ -703,7 +703,7 @@ class CCA(_PLS):
     max_iter : int, default=500
         the maximum number of iterations of the power method.
 
-    tol : real, default 1e-06
+    tol : float, default=1e-06
         The tolerance used as convergence criteria in the power method: the
         algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
         than `tol`, where `u` corresponds to the left singular vector.

From fcc49e0129f5a1ef451967c09dad2e75551d3450 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?=
 <JuanCarlos.Alfaro@uclm.es>
Date: Tue, 12 Jan 2021 15:30:36 +0100
Subject: [PATCH 21/38] MNT Replace PDF build by ZIP of the HTML (#17564)

---
 build_tools/circle/build_doc.sh     | 11 ++++------
 build_tools/circle/list_versions.py | 31 +++++++++++++++++++++--------
 doc/Makefile                        | 19 +++++++++++++++---
 3 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 96ae64df1c44d..691006bd2dab0 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -116,8 +116,8 @@ fi
 
 if [[ "$CIRCLE_BRANCH" =~ ^master$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST" ]]
 then
-    # PDF linked into HTML
-    make_args="dist LATEXMKOPTS=-halt-on-error"
+    # ZIP linked into HTML
+    make_args=dist
 elif [[ "$build_type" =~ ^QUICK ]]
 then
     make_args=html-noplot
@@ -133,13 +133,10 @@ fi
 make_args="SPHINXOPTS=-T $make_args"  # show full traceback on exception
 
 # Installing required system packages to support the rendering of math
-# notation in the HTML documentation
+# notation in the HTML documentation and to optimize the image files
 sudo -E apt-get -yq update
-sudo -E apt-get -yq remove texlive-binaries --purge
 sudo -E apt-get -yq --no-install-suggests --no-install-recommends \
-    install dvipng texlive-latex-base texlive-latex-extra \
-    texlive-latex-recommended texlive-fonts-recommended \
-    latexmk gsfonts ccache
+    install dvipng gsfonts ccache zip optipng
 
 # deactivate circleci virtualenv and setup a miniconda env instead
 if [[ `type -t deactivate` ]]; then
diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index 19fa8aa2dc991..9d64497012445 100755
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -8,6 +8,7 @@
 from distutils.version import LooseVersion
 from urllib.request import urlopen
 
+
 def json_urlread(url):
     try:
         return json.loads(urlopen(url).read().decode('utf8'))
@@ -32,10 +33,23 @@ def human_readable_data_quantity(quantity, multiple=1024):
             quantity /= multiple
 
 
-def get_pdf_size(version):
+def get_file_extension(version):
+    if version == 'dev':
+        # The 'dev' branch should be explictly handled
+        return 'zip'
+
+    current_version = LooseVersion(version)
+    min_zip_version = LooseVersion('1.0.0')
+
+    return 'zip' if current_version >= min_zip_version else 'pdf'
+
+
+def get_file_size(version):
     api_url = ROOT_URL + '%s/_downloads' % version
     for path_details in json_urlread(api_url):
-        if path_details['name'] == 'scikit-learn-docs.pdf':
+        file_extension = get_file_extension(version)
+        file_path = f'scikit-learn-docs.{file_extension}'
+        if path_details['name'] == file_path:
             return human_readable_data_quantity(path_details['size'], 1000)
 
 
@@ -64,8 +78,8 @@ def get_pdf_size(version):
     if path_details['type'] == 'dir':
         html = urlopen(RAW_FMT % name).read().decode('utf8')
         version_num = VERSION_RE.search(html).group(1)
-        pdf_size = get_pdf_size(name)
-        dirs[name] = (version_num, pdf_size)
+        file_size = get_file_size(name)
+        dirs[name] = (version_num, file_size)
 
     if path_details['type'] == 'symlink':
         symlinks[name] = json_urlread(path_details['_links']['self'])['target']
@@ -81,7 +95,7 @@ def get_pdf_size(version):
 for name in (NAMED_DIRS +
              sorted((k for k in dirs if k[:1].isdigit()),
                     key=LooseVersion, reverse=True)):
-    version_num, pdf_size = dirs[name]
+    version_num, file_size = dirs[name]
     if version_num in seen:
         # symlink came first
         continue
@@ -91,7 +105,8 @@ def get_pdf_size(version):
     path = 'https://scikit-learn.org/%s/' % name
     out = ('* `Scikit-learn %s%s documentation <%s>`_'
            % (version_num, name_display, path))
-    if pdf_size is not None:
-        out += (' (`PDF %s <%s/_downloads/scikit-learn-docs.pdf>`_)'
-                % (pdf_size, path))
+    if file_size is not None:
+        file_extension = get_file_extension(version_num)
+        out += (f' (`{file_extension.upper()} {file_size} <{path}/'
+                f'_downloads/scikit-learn-docs.{file_extension}>`_)')
     print(out)
diff --git a/doc/Makefile b/doc/Makefile
index 1cbce7dba9662..6146d11123017 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -17,7 +17,7 @@ ALLSPHINXOPTS   = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
     $(EXAMPLES_PATTERN_OPTS) .
 
 
-.PHONY: help clean html dirhtml pickle json latex latexpdf changes linkcheck doctest optipng
+.PHONY: help clean html dirhtml ziphtml pickle json latex latexpdf changes linkcheck doctest optipng
 
 all: html-noplot
 
@@ -25,6 +25,7 @@ help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html      to make standalone HTML files"
 	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  ziphtml   to make a ZIP of the HTML"
 	@echo "  pickle    to make pickle files"
 	@echo "  json      to make JSON files"
 	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@@ -58,6 +59,19 @@ dirhtml:
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 
+ziphtml:
+	@if [ ! -d "$(BUILDDIR)/html/stable/" ]; then \
+		make html; \
+	fi
+	# Optimize the images to reduce the size of the ZIP
+	optipng $(BUILDDIR)/html/stable/_images/*.png
+	# Exclude the output directory to avoid infinity recursion
+	cd $(BUILDDIR)/html/stable; \
+	zip -q -x _downloads \
+	       -r _downloads/scikit-learn-docs.zip .
+	@echo
+	@echo "Build finished. The ZIP of the HTML is in $(BUILDDIR)/html/stable/_downloads."
+
 pickle:
 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 	@echo
@@ -106,5 +120,4 @@ optipng:
 	find _build auto_examples */generated -name '*.png' -print0 \
 	  | xargs -0 -n 1 -P 4 optipng -o10
 
-dist: html latexpdf
-	cp _build/latex/user_guide.pdf _build/html/stable/_downloads/scikit-learn-docs.pdf
+dist: html ziphtml

From 7baf7581646d9891cbb933bcce688c3d17c58829 Mon Sep 17 00:00:00 2001
From: Sina Tootoonian <sina.tootoonian@gmail.com>
Date: Wed, 13 Jan 2021 07:18:06 +0000
Subject: [PATCH 22/38] DOC Normalization of linear_model decision_function
 (#19142)

---
 sklearn/linear_model/_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 2399e1216238f..0cd9263cb5618 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -264,8 +264,8 @@ def decision_function(self, X):
         """
         Predict confidence scores for samples.
 
-        The confidence score for a sample is the signed distance of that
-        sample to the hyperplane.
+        The confidence score for a sample is proportional to the signed
+        distance of that sample to the hyperplane.
 
         Parameters
         ----------

From 1e4d7567e5cf455849a1b72512937d183e93886f Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 13 Jan 2021 03:23:27 -0500
Subject: [PATCH 23/38] CI Adds skipping to azure pipelines with commit message
 (#19134)

---
 azure-pipelines.yml | 117 +++++++++++++++++++++++++++-----------------
 1 file changed, 71 insertions(+), 46 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 1a42c533fb2ee..870c5f0e1d313 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -8,17 +8,11 @@ schedules:
   always: true
 
 jobs:
-- job: linting
-  displayName: Linting
+- job: git_commit
+  displayName: Get Git Commit
   pool:
     vmImage: ubuntu-18.04
   steps:
-    - task: UsePythonVersion@0
-      inputs:
-        versionSpec: '3.9'
-    - bash: |
-        pip install flake8 mypy==0.782
-      displayName: Install linters
     - bash: |
         set -ex
         if [[ $BUILD_REASON == "PullRequest" ]]; then
@@ -26,48 +20,53 @@ jobs:
           # which has a "Merge ID into ID" as a commit message. The latest commit
           # message is the second to last commit
           COMMIT_ID=$(echo $BUILD_SOURCEVERSIONMESSAGE | awk '{print $2}')
-          COMMIT_MESSAGE=$(git log $COMMIT_ID -1 --pretty=%B)
+          message=$(git log $COMMIT_ID -1 --pretty=%B)
         else
-          COMMIT_MESSAGE=$BUILD_SOURCEVERSIONMESSAGE
+          message=$BUILD_SOURCEVERSIONMESSAGE
         fi
-        echo "##vso[task.setvariable variable=COMMIT_MESSAGE]$COMMIT_MESSAGE"
+        echo "##vso[task.setvariable variable=message;isOutput=true]$message"
+      name: commit
       displayName: Get source version message
+
+- job: linting
+  dependsOn: [git_commit]
+  condition: |
+    and(
+      succeeded(),
+      not(contains(dependencies['git_commit']['outputs']['commit.message'], '[lint skip]')),
+      not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
+    )
+  displayName: Linting
+  pool:
+    vmImage: ubuntu-18.04
+  steps:
+    - task: UsePythonVersion@0
+      inputs:
+        versionSpec: '3.9'
     - bash: |
-        set -ex
-        if [[ "$COMMIT_MESSAGE" =~ "[lint skip]" ]]; then
-          # skip linting
-          echo "Skipping flake8 linting"
-          exit 0
-        else
-          ./build_tools/circle/linting.sh
-        fi
+        pip install flake8 mypy==0.782
+      displayName: Install linters
+    - bash: |
+        ./build_tools/circle/linting.sh
       displayName: Run linting
     - bash: |
-        set -ex
-        if [[ "$COMMIT_MESSAGE" =~ "[lint skip]" ]]; then
-          # skip linting
-          echo "Skipping mypy linting"
-          exit 0
-        else
-          mypy sklearn/
-        fi
+        mypy sklearn/
       displayName: Run mypy
-    - bash: |
-        if [[ "$COMMIT_MESSAGE" =~ "[scipy-dev]" ]] || [[ $BUILD_REASON == "Schedule" ]]; then
-          echo "Running scipy-dev"
-          echo "##vso[task.setvariable variable=runScipyDev;isOutput=true]true"
-        else
-          echo "##vso[task.setvariable variable=runScipyDev;isOutput=true]false"
-        fi
-      name: gitCommitMessage
-      displayName: Determine to run scipy-dev
 
 - template: build_tools/azure/posix.yml
   parameters:
     name: Linux_Nightly
     vmImage: ubuntu-18.04
-    dependsOn: [linting]
-    condition: eq(dependencies['linting']['outputs']['gitCommitMessage.runScipyDev'], 'true')
+    dependsOn: [git_commit, linting]
+    condition: |
+      and(
+        succeeded(),
+        not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
+        or(eq(variables['Build.Reason'], 'Schedule'),
+           contains(dependencies['git_commit']['outputs']['commit.message'], '[scipy-dev]'
+          )
+        )
+      )
     matrix:
       pylatest_pip_scipy_dev:
         DISTRIB: 'conda-pip-scipy-dev'
@@ -84,6 +83,12 @@ jobs:
   parameters:
     name: Linux_Runs
     vmImage: ubuntu-18.04
+    dependsOn: [git_commit]
+    condition: |
+      and(
+        succeeded(),
+        not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]'))
+      )
     matrix:
       pylatest_conda_mkl:
         DISTRIB: 'conda'
@@ -95,8 +100,13 @@ jobs:
   parameters:
     name: Linux
     vmImage: ubuntu-18.04
-    dependsOn: [linting]
-    condition: and(ne(variables['Build.Reason'], 'Schedule'), succeeded('linting'))
+    dependsOn: [linting, git_commit]
+    condition: |
+      and(
+        succeeded(),
+        not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
+        ne(variables['Build.Reason'], 'Schedule')
+      )
     matrix:
       # Linux environment to test that scikit-learn can be built against
       # versions of numpy, scipy with ATLAS that comes with Ubuntu Bionic 18.04
@@ -139,8 +149,13 @@ jobs:
   parameters:
     name: Linux32
     vmImage: ubuntu-18.04
-    dependsOn: [linting]
-    condition: and(ne(variables['Build.Reason'], 'Schedule'), succeeded('linting'))
+    dependsOn: [linting, git_commit]
+    condition: |
+      and(
+        succeeded(),
+        not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
+        ne(variables['Build.Reason'], 'Schedule')
+      )
     matrix:
       py36_ubuntu_atlas_32bit:
         DISTRIB: 'ubuntu-32'
@@ -157,8 +172,13 @@ jobs:
   parameters:
     name: macOS
     vmImage: macOS-10.14
-    dependsOn: [linting]
-    condition: and(ne(variables['Build.Reason'], 'Schedule'), succeeded('linting'))
+    dependsOn: [linting, git_commit]
+    condition: |
+      and(
+        succeeded(),
+        not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
+        ne(variables['Build.Reason'], 'Schedule')
+      )
     matrix:
       pylatest_conda_forge_mkl:
         DISTRIB: 'conda'
@@ -174,8 +194,13 @@ jobs:
   parameters:
     name: Windows
     vmImage: vs2017-win2016
-    dependsOn: [linting]
-    condition: and(ne(variables['Build.Reason'], 'Schedule'), succeeded('linting'))
+    dependsOn: [linting, git_commit]
+    condition: |
+      and(
+        succeeded(),
+        not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')),
+        ne(variables['Build.Reason'], 'Schedule')
+      )
     matrix:
       py37_conda_mkl:
         PYTHON_VERSION: '3.7'

From cd62f8beb1fe4aa1e09ec6929b27bad2ce1b4d03 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 13 Jan 2021 04:02:12 -0500
Subject: [PATCH 24/38] DOC Clarifies docstrings in decomposition (#19161)

---
 sklearn/decomposition/_dict_learning.py | 14 ++++++-------
 sklearn/decomposition/_lda.py           |  2 +-
 sklearn/decomposition/_nmf.py           | 26 ++++++++++++-------------
 sklearn/decomposition/_pca.py           |  2 +-
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 781f288b70351..046738aa9700d 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -1156,10 +1156,10 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     fit_algorithm : {'lars', 'cd'}, default='lars'
         * `'lars'`: uses the least angle regression method to solve the lasso
-           problem (`linear_model.lars_path`);
+          problem (:func:`~sklearn.linear_model.lars_path`);
         * `'cd'`: uses the coordinate descent method to compute the
-          Lasso solution (`linear_model.Lasso`). Lars will be faster if
-          the estimated components are sparse.
+          Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be
+          faster if the estimated components are sparse.
 
         .. versionadded:: 0.17
            *cd* coordinate descent method to improve speed.
@@ -1169,11 +1169,11 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
         Algorithm used to transform the data:
 
         - `'lars'`: uses the least angle regression method
-          (`linear_model.lars_path`);
+          (:func:`~sklearn.linear_model.lars_path`);
         - `'lasso_lars'`: uses Lars to compute the Lasso solution.
         - `'lasso_cd'`: uses the coordinate descent method to compute the
-          Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster
-          if the estimated components are sparse.
+          Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`
+          will be faster if the estimated components are sparse.
         - `'omp'`: uses orthogonal matching pursuit to estimate the sparse
           solution.
         - `'threshold'`: squashes to zero all coefficients less than alpha from
@@ -1404,7 +1404,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
     shuffle : bool, default=True
         Whether to shuffle the samples before forming batches.
 
-    dict_init : nbarray of shape (n_components, n_features), default=None
+    dict_init : ndarray of shape (n_components, n_features), default=None
         initial value of the dictionary for warm restart scenarios
 
     transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \
diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
index 6e6a5627ff7c5..e554d299fe478 100644
--- a/sklearn/decomposition/_lda.py
+++ b/sklearn/decomposition/_lda.py
@@ -194,7 +194,7 @@ class LatentDirichletAllocation(TransformerMixin, BaseEstimator):
         Number of documents to use in each EM iteration. Only used in online
         learning.
 
-    evaluate_every : int, default=0
+    evaluate_every : int, default=-1
         How often to evaluate perplexity. Only used in `fit` method.
         set it to 0 or negative number to not evaluate perplexity in
         training at all. Evaluating perplexity can help you check convergence
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 5d01060951ae1..7bedc60998388 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1138,23 +1138,23 @@ class NMF(TransformerMixin, BaseEstimator):
         Default: None.
         Valid options:
 
-        - None: 'nndsvd' if n_components <= min(n_samples, n_features),
-            otherwise random.
+        - `None`: 'nndsvd' if n_components <= min(n_samples, n_features),
+          otherwise random.
 
-        - 'random': non-negative random matrices, scaled with:
-            sqrt(X.mean() / n_components)
+        - `'random'`: non-negative random matrices, scaled with:
+          sqrt(X.mean() / n_components)
 
-        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
-            initialization (better for sparseness)
+        - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)
+          initialization (better for sparseness)
 
-        - 'nndsvda': NNDSVD with zeros filled with the average of X
-            (better when sparsity is not desired)
+        - `'nndsvda'`: NNDSVD with zeros filled with the average of X
+          (better when sparsity is not desired)
 
-        - 'nndsvdar': NNDSVD with zeros filled with small random values
-            (generally faster, less accurate alternative to NNDSVDa
-            for when sparsity is not desired)
+        - `'nndsvdar'` NNDSVD with zeros filled with small random values
+          (generally faster, less accurate alternative to NNDSVDa
+          for when sparsity is not desired)
 
-        - 'custom': use custom matrices W and H
+        - `'custom'`: use custom matrices W and H
 
     solver : {'cd', 'mu'}, default='cd'
         Numerical solver to use:
@@ -1207,7 +1207,7 @@ class NMF(TransformerMixin, BaseEstimator):
            Regularization parameter *l1_ratio* used in the Coordinate Descent
            solver.
 
-    verbose : bool, default=False
+    verbose : int, default=0
         Whether to be verbose.
 
     shuffle : bool, default=False
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index c69dc959b851a..80ac7e856dfd0 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -130,7 +130,7 @@ class PCA(_BasePCA):
 
     Parameters
     ----------
-    n_components : int, float or str, default=None
+    n_components : int, float or 'mle', default=None
         Number of components to keep.
         if n_components is not set all components are kept::
 

From 957781f04c6dc7265ab31414907a28df0a897b94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?=
 <JuanCarlos.Alfaro@uclm.es>
Date: Wed, 13 Jan 2021 10:21:02 +0100
Subject: [PATCH 25/38] MNT fix strict comparison in version listing (#19163)

---
 build_tools/circle/list_versions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index 9d64497012445..399c77b723d3c 100755
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -34,7 +34,7 @@ def human_readable_data_quantity(quantity, multiple=1024):
 
 
 def get_file_extension(version):
-    if version == 'dev':
+    if 'dev' in version:
         # The 'dev' branch should be explictly handled
         return 'zip'
 

From 300782fdbb7722bcfc595c031f3f2b703f5df11d Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 13 Jan 2021 11:02:53 -0500
Subject: [PATCH 26/38] TST Download datasets before running pytest-xdist
 (#19118)

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 conftest.py                                   | 16 ----
 doc/computing/parallelism.rst                 |  3 +-
 sklearn/conftest.py                           | 85 +++++++++++++++++++
 sklearn/datasets/tests/conftest.py            | 60 -------------
 .../ensemble/tests/test_gradient_boosting.py  |  7 +-
 5 files changed, 90 insertions(+), 81 deletions(-)

diff --git a/conftest.py b/conftest.py
index 5c48de4ac36a3..aec49c03ae13d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -5,7 +5,6 @@
 # doc/modules/clustering.rst and use sklearn from the local folder rather than
 # the one from site-packages.
 
-import os
 import platform
 import sys
 
@@ -17,18 +16,12 @@
 from sklearn._min_dependencies import PYTEST_MIN_VERSION
 from sklearn.utils.fixes import np_version, parse_version
 
-
 if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
     raise ImportError('Your version of pytest is too old, you should have '
                       'at least pytest >= {} installed.'
                       .format(PYTEST_MIN_VERSION))
 
 
-def pytest_addoption(parser):
-    parser.addoption("--skip-network", action="store_true", default=False,
-                     help="skip network tests")
-
-
 def pytest_collection_modifyitems(config, items):
     for item in items:
         # FeatureHasher is not compatible with PyPy
@@ -50,15 +43,6 @@ def pytest_collection_modifyitems(config, items):
             )
             item.add_marker(marker)
 
-    # Skip tests which require internet if the flag is provided
-    if (config.getoption("--skip-network")
-            or int(os.environ.get("SKLEARN_SKIP_NETWORK_TESTS", "0"))):
-        skip_network = pytest.mark.skip(
-            reason="test requires internet connectivity")
-        for item in items:
-            if "network" in item.keywords:
-                item.add_marker(skip_network)
-
     # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
     # run doctests only for numpy >= 1.14.
     skip_doctests = False
diff --git a/doc/computing/parallelism.rst b/doc/computing/parallelism.rst
index 3dce5ef66bb1d..8605650e8eec5 100644
--- a/doc/computing/parallelism.rst
+++ b/doc/computing/parallelism.rst
@@ -212,4 +212,5 @@ These environment variables should be set before importing scikit-learn.
 :SKLEARN_SKIP_NETWORK_TESTS:
 
     When this environment variable is set to a non zero value, the tests
-    that need network access are skipped.
+    that need network access are skipped. When this environment variable is
+    not set then network tests are skipped.
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
index 8a98921342efa..2978115e3091c 100644
--- a/sklearn/conftest.py
+++ b/sklearn/conftest.py
@@ -1,9 +1,94 @@
 import os
+from os import environ
+from functools import wraps
 
 import pytest
 from threadpoolctl import threadpool_limits
 
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import fetch_covtype
+from sklearn.datasets import fetch_kddcup99
+from sklearn.datasets import fetch_olivetti_faces
+from sklearn.datasets import fetch_rcv1
+
+
+dataset_fetchers = {
+    'fetch_20newsgroups_fxt': fetch_20newsgroups,
+    'fetch_20newsgroups_vectorized_fxt': fetch_20newsgroups_vectorized,
+    'fetch_california_housing_fxt': fetch_california_housing,
+    'fetch_covtype_fxt': fetch_covtype,
+    'fetch_kddcup99_fxt': fetch_kddcup99,
+    'fetch_olivetti_faces_fxt': fetch_olivetti_faces,
+    'fetch_rcv1_fxt': fetch_rcv1,
+}
+
+
+def _fetch_fixture(f):
+    """Fetch dataset (download if missing and requested by environment)."""
+    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
+
+    @wraps(f)
+    def wrapped(*args, **kwargs):
+        kwargs['download_if_missing'] = download_if_missing
+        try:
+            return f(*args, **kwargs)
+        except IOError:
+            pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
+    return pytest.fixture(lambda: wrapped)
+
+
+# Adds fixtures for fetching data
+fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups)
+fetch_20newsgroups_vectorized_fxt = \
+    _fetch_fixture(fetch_20newsgroups_vectorized)
+fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing)
+fetch_covtype_fxt = _fetch_fixture(fetch_covtype)
+fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99)
+fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces)
+fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1)
+
+
+def pytest_collection_modifyitems(config, items):
+    """Called after collect is completed.
+
+    Parameters
+    ----------
+    config : pytest config
+    items : list of collected items
+    """
+    run_network_tests = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
+    skip_network = pytest.mark.skip(
+        reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
+
+    # download datasets during collection to avoid thread unsafe behavior
+    # when running pytest in parallel with pytest-xdist
+    dataset_features_set = set(dataset_fetchers)
+    datasets_to_download = set()
+
+    for item in items:
+        if not hasattr(item, "fixturenames"):
+            continue
+        item_fixtures = set(item.fixturenames)
+        dataset_to_fetch = item_fixtures & dataset_features_set
+        if not dataset_to_fetch:
+            continue
+
+        if run_network_tests:
+            datasets_to_download |= dataset_to_fetch
+        else:
+            # network tests are skipped
+            item.add_marker(skip_network)
+
+    # Only download datasets on the first worker spawned by pytest-xdist
+    # to avoid thread unsafe behavior. If pytest-xdist is not used, we still
+    # download before tests run.
+    worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0")
+    if worker_id == "gw0" and run_network_tests:
+        for name in datasets_to_download:
+            dataset_fetchers[name]()
 
 
 @pytest.fixture(scope='function')
diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index 4612cd5deb4bc..cf356d6ca3b10 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -1,67 +1,7 @@
 """ Network tests are only run, if data is already locally available,
 or if download is specifically requested by environment variable."""
 import builtins
-from functools import wraps
-from os import environ
 import pytest
-from sklearn.datasets import fetch_20newsgroups
-from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.datasets import fetch_california_housing
-from sklearn.datasets import fetch_covtype
-from sklearn.datasets import fetch_kddcup99
-from sklearn.datasets import fetch_olivetti_faces
-from sklearn.datasets import fetch_rcv1
-
-
-def _wrapped_fetch(f, dataset_name):
-    """ Fetch dataset (download if missing and requested by environment) """
-    download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'
-
-    @wraps(f)
-    def wrapped(*args, **kwargs):
-        kwargs['download_if_missing'] = download_if_missing
-        try:
-            return f(*args, **kwargs)
-        except IOError:
-            pytest.skip("Download {} to run this test".format(dataset_name))
-    return wrapped
-
-
-@pytest.fixture
-def fetch_20newsgroups_fxt():
-    return _wrapped_fetch(fetch_20newsgroups, dataset_name='20newsgroups')
-
-
-@pytest.fixture
-def fetch_20newsgroups_vectorized_fxt():
-    return _wrapped_fetch(fetch_20newsgroups_vectorized,
-                          dataset_name='20newsgroups_vectorized')
-
-
-@pytest.fixture
-def fetch_california_housing_fxt():
-    return _wrapped_fetch(fetch_california_housing,
-                          dataset_name='california_housing')
-
-
-@pytest.fixture
-def fetch_covtype_fxt():
-    return _wrapped_fetch(fetch_covtype, dataset_name='covtype')
-
-
-@pytest.fixture
-def fetch_kddcup99_fxt():
-    return _wrapped_fetch(fetch_kddcup99, dataset_name='kddcup99')
-
-
-@pytest.fixture
-def fetch_olivetti_faces_fxt():
-    return _wrapped_fetch(fetch_olivetti_faces, dataset_name='olivetti_faces')
-
-
-@pytest.fixture
-def fetch_rcv1_fxt():
-    return _wrapped_fetch(fetch_rcv1, dataset_name='rcv1')
 
 
 @pytest.fixture
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 256b79db4865c..498e5bf38a675 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -13,7 +13,7 @@
 
 from sklearn import datasets
 from sklearn.base import clone
-from sklearn.datasets import (make_classification, fetch_california_housing,
+from sklearn.datasets import (make_classification,
                               make_regression)
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.ensemble import GradientBoostingRegressor
@@ -345,8 +345,7 @@ def test_max_feature_regression():
     assert deviance < 0.5, "GB failed with deviance %.4f" % deviance
 
 
-@pytest.mark.network
-def test_feature_importance_regression():
+def test_feature_importance_regression(fetch_california_housing_fxt):
     """Test that Gini importance is calculated correctly.
 
     This test follows the example from [1]_ (pg. 373).
@@ -354,7 +353,7 @@ def test_feature_importance_regression():
     .. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
        of statistical learning. New York: Springer series in statistics.
     """
-    california = fetch_california_housing()
+    california = fetch_california_housing_fxt()
     X, y = california.data, california.target
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 

From ff778443331e30025dd25a02c10a69c29a17a6c3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@gmail.com>
Date: Thu, 14 Jan 2021 17:28:12 +0100
Subject: [PATCH 27/38] CI Use stable numpy scipy release for [icc-build] and
 [arm64] on travis (#19176)

The scipy-dev builds have moved to Azure Pipelines and there is no
reason to not use stable versions of numpy and scipy to run the ICC
and ARM64 tests on travis.

This should fix the invalid wheel metadata failure observed on travis
which was already resolved on Azure Pipelines by using the legacy
pip dependency resolver for the scipy-dev build.
---
 build_tools/travis/install_master.sh | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/build_tools/travis/install_master.sh b/build_tools/travis/install_master.sh
index 042ce53b41d2c..e2e0534216c7c 100755
--- a/build_tools/travis/install_master.sh
+++ b/build_tools/travis/install_master.sh
@@ -50,23 +50,8 @@ conda update --yes conda
 conda create -n testenv --yes python=3.7
 
 source activate testenv
-
-if [[ $TRAVIS_CPU_ARCH == amd64 ]]; then
-    echo "Upgrading pip and setuptools."
-    pip install --upgrade pip setuptools
-    echo "Installing numpy, scipy and pandas master wheels."
-    dev_anaconda_url=https://pypi.anaconda.org/scipy-wheels-nightly/simple
-    pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url numpy scipy pandas
-    echo "Installing cython pre-release wheels."
-    pip install --pre cython
-    echo "Installing joblib master."
-    pip install https://github.com/joblib/joblib/archive/master.zip
-    echo "Installing pillow master."
-    pip install https://github.com/python-pillow/Pillow/archive/master.zip
-else
-    conda install -y scipy numpy pandas cython
-    pip install joblib threadpoolctl
-fi
+conda install -y scipy numpy pandas cython
+pip install joblib threadpoolctl
 
 pip install $(get_dep pytest $PYTEST_VERSION) pytest-xdist
 

From 0f93f76be746f91f859075f51e3cbcf11301c525 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 16 Jan 2021 13:19:07 -0500
Subject: [PATCH 28/38] FIX Fixes issue with exatly_zero_info_score (#19179)

* ENH Fixes issue with exatly_zero_info_score [scipy-dev]

* ENH Remove unneeded line [scipy-dev]

* WIP Keep types [scipy-dev]

* REV Smaller diff [scipy-dev]

* WIP Expand mutual_info_score [scipy-dev]

* WIP Removes float casting [scipy-dev]

* WIP Adds casting back in

* CI [scipy-dev]

* WIP Casting is not needed [scipy-dev]

* WIP Only clip [scipy-dev]

* REV Smaller diff [scipy-dev]

* WIP Place expected_mutual_information diff back [scipy-dev]

* ENH Uses around

* WIP Use where again [scipy-dev]

* ENH Adjust comments to match code
---
 .../metrics/cluster/_expected_mutual_info_fast.pyx | 14 +++++++-------
 sklearn/metrics/cluster/_supervised.py             |  1 +
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx
index b9b94508da046..d2f9cd8578b12 100644
--- a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx
+++ b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx
@@ -22,7 +22,7 @@ def expected_mutual_information(contingency, int n_samples):
     cdef DOUBLE N, gln_N, emi, term2, term3, gln
     cdef np.ndarray[DOUBLE] gln_a, gln_b, gln_Na, gln_Nb, gln_nij, log_Nnij
     cdef np.ndarray[DOUBLE] nijs, term1
-    cdef np.ndarray[DOUBLE, ndim=2] log_ab_outer
+    cdef np.ndarray[DOUBLE] log_a, log_b
     cdef np.ndarray[np.int32_t] a, b
     #cdef np.ndarray[int, ndim=2] start, end
     R, C = contingency.shape
@@ -37,10 +37,10 @@ def expected_mutual_information(contingency, int n_samples):
     # term1 is nij / N
     term1 = nijs / N
     # term2 is log((N*nij) / (a * b)) == log(N * nij) - log(a * b)
-    # term2 uses the outer product
-    log_ab_outer = np.log(a)[:, np.newaxis] + np.log(b)
-    # term2 uses N * nij
-    log_Nnij = np.log(N * nijs)
+    log_a = np.log(a)
+    log_b = np.log(b)
+    # term2 uses log(N * nij) = log(N) + log(nij)
+    log_Nnij = np.log(N) + np.log(nijs)
     # term3 is large, and involved many factorials. Calculate these in log
     # space to stop overflows.
     gln_a = gammaln(a + 1)
@@ -54,12 +54,12 @@ def expected_mutual_information(contingency, int n_samples):
     start = np.maximum(start, 1)
     end = np.minimum(np.resize(a, (C, R)).T, np.resize(b, (R, C))) + 1
     # emi itself is a summation over the various values.
-    emi = 0
+    emi = 0.0
     cdef Py_ssize_t i, j, nij
     for i in range(R):
         for j in range(C):
             for nij in range(start[i,j], end[i,j]):
-                term2 = log_Nnij[nij] - log_ab_outer[i,j]
+                term2 = log_Nnij[nij] - log_a[i] - log_b[j]
                 # Numerators are positive, denominators are negative.
                 gln = (gln_a[i] + gln_b[j] + gln_Na[i] + gln_Nb[j]
                      - gln_N - gln_nij[nij] - lgamma(a[i] - nij + 1)
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 6e4e13f26017a..19d1552518db4 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -795,6 +795,7 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
     log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
     mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
           contingency_nm * log_outer)
+    mi = np.where(np.abs(mi) < np.finfo(mi.dtype).eps, 0.0, mi)
     return np.clip(mi.sum(), 0.0, None)
 
 
From a837abe3a36e4f3528d4b15a409c4d833407f477 Mon Sep 17 00:00:00 2001
From: Zito <zitorelova@gmail.com>
Date: Sat, 16 Jan 2021 10:28:47 -0800
Subject: [PATCH 29/38] DOC description for Calinski-Harabasz Index (#19167)

---
 doc/modules/clustering.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 010d721fdd073..61c8393a734c8 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -1737,7 +1737,7 @@ Ratio Criterion - can be used to evaluate the model, where a higher
 Calinski-Harabasz score relates to a model with better defined clusters.
 
 The index is the ratio of the sum of between-clusters dispersion and of
-inter-cluster dispersion for all clusters (where dispersion is defined as the
+within-cluster dispersion for all clusters (where dispersion is defined as the
 sum of distances squared):
 
   >>> from sklearn import metrics

From dc59bc108716ec3e5cf8f534ec8d44eba33d0843 Mon Sep 17 00:00:00 2001
From: ranjanikrishnan <ranjanikrishnanr@gmail.com>
Date: Sun, 17 Jan 2021 21:28:33 +0100
Subject: [PATCH 30/38] DOC Add link to video for contributing: Andreas video
 Volume 2 (#19180)

Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
---
 doc/developers/contributing.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 8a3c460c615a8..fb2c0aa997fe5 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -213,6 +213,11 @@ latest up-to-date workflow.
   `Transcript
   <https://github.com/data-umbrella/event-transcripts/blob/main/2020/06-reshama-shaikh-sklearn-pr.md>`__
 
+- Sprint-specific instructions and practical tips:
+  `Video <https://youtu.be/p_2Uw2BxdhA>`__,
+  `Transcript
+  <https://github.com/data-umbrella/data-umbrella-scikit-learn-sprint/blob/master/3_transcript_ACM_video_vol2.md>`__
+
 How to contribute
 -----------------
 

From a4df528b12ecf29faaac7d6aa82f3fcfa1708581 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <62496969+abhinavtps@users.noreply.github.com>
Date: Mon, 18 Jan 2021 20:43:19 +0530
Subject: [PATCH 31/38] DOC Replacing swarmplot with stripplot to avoid seaborn
 warning (#19195)

---
 .../plot_linear_model_coefficient_interpretation.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py
index 93a5b430a3542..459b180f00e36 100644
--- a/examples/inspection/plot_linear_model_coefficient_interpretation.py
+++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py
@@ -325,7 +325,7 @@
     columns=feature_names
 )
 plt.figure(figsize=(9, 7))
-sns.swarmplot(data=coefs, orient='h', color='k', alpha=0.5)
+sns.stripplot(data=coefs, orient='h', color='k', alpha=0.5)
 sns.boxplot(data=coefs, orient='h', color='cyan', saturation=0.5)
 plt.axvline(x=0, color='.5')
 plt.xlabel('Coefficient importance')
@@ -376,7 +376,7 @@
     columns=feature_names[:-1]
 )
 plt.figure(figsize=(9, 7))
-sns.swarmplot(data=coefs, orient='h', color='k', alpha=0.5)
+sns.stripplot(data=coefs, orient='h', color='k', alpha=0.5)
 sns.boxplot(data=coefs, orient='h', color='cyan', saturation=0.5)
 plt.axvline(x=0, color='.5')
 plt.title('Coefficient importance and its variability')
@@ -469,7 +469,7 @@
     columns=feature_names
 )
 plt.figure(figsize=(9, 7))
-sns.swarmplot(data=coefs, orient='h', color='k', alpha=0.5)
+sns.stripplot(data=coefs, orient='h', color='k', alpha=0.5)
 sns.boxplot(data=coefs, orient='h', color='cyan', saturation=0.5)
 plt.axvline(x=0, color='.5')
 plt.title('Coefficient variability')

From e79b51689df0b217b9e44b695e10560b82f1cf9b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Jan 2021 17:25:41 +0100
Subject: [PATCH 32/38] DOC add entry in whats new for 0.24.1 (#19196)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 doc/whats_new/v0.24.rst | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index ca96e875d342a..cbe26892d772e 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -7,6 +7,17 @@
 Version 0.24.1
 ==============
 
+**January 2020**
+
+Packaging
+---------
+
+The 0.24.0 scikit-learn wheels were not working with MacOS <1.15 due to
+`libomp`. The version of `libomp` used to build the wheels was too recent for
+older macOS versions. This issue has been fixed for 0.24.1 scikit-learn wheels.
+Scikit-learn wheels published on PyPI.org now officially support macOS 10.13
+and later.
+
 Changelog
 ---------
 
@@ -56,21 +67,9 @@ Details are listed in the changelog below.
 (While we are trying to better inform users by providing this information, we
 cannot assure that this list is complete.)
 
-
 Changelog
 ---------
 
-..
-    Entries should be grouped by module (in alphabetic order) and prefixed with
-    one of the labels: |MajorFeature|, |Feature|, |Efficiency|, |Enhancement|,
-    |Fix| or |API| (see whats_new.rst for descriptions).
-    Entries should be ordered by those labels (e.g. |Fix| after |Efficiency|).
-    Changes not specific to a module should be listed under *Multiple Modules*
-    or *Miscellaneous*.
-    Entries should end with:
-    :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
-    where 123456 is the *pull request* number, not the issue number.
-
 :mod:`sklearn.base`
 ...................
 

From a9fd32c25b8c643a04449db9cbd4806d84ed765c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Jan 2021 19:50:19 +0100
Subject: [PATCH 33/38] DOC update the version and year release

---
 doc/templates/index.html | 2 ++
 doc/whats_new/v0.24.rst  | 2 +-
 sklearn/__init__.py      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/templates/index.html b/doc/templates/index.html
index d333530ef8376..88affa25830b4 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -155,6 +155,8 @@ <h4 class="sk-landing-call-header">News</h4>
         <ul class="sk-landing-call-list list-unstyled">
         <li><strong>On-going development:</strong>
         <a href="https://scikit-learn.org/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
+        <li><strong>January 2021.</strong> scikit-learn 0.24.1 is available for download (<a href="whats_new/v0.24.html#version-0-24-1">Changelog</a>).
+        </li>
         <li><strong>December 2020.</strong> scikit-learn 0.24.0 is available for download (<a href="whats_new/v0.24.html#version-0-24-0">Changelog</a>).
         </li>
         <li><strong>August 2020.</strong> scikit-learn 0.23.2 is available for download (<a href="whats_new/v0.23.html#version-0-23-2">Changelog</a>).
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index cbe26892d772e..2bef032a6cb0c 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -7,7 +7,7 @@
 Version 0.24.1
 ==============
 
-**January 2020**
+**January 2021**
 
 Packaging
 ---------
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 6aace65cbdfb1..04c20fc5c08fd 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -39,7 +39,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = '0.24.0'
+__version__ = '0.24.1'
 
 
 # On OSX, we can get a runtime error due to multiple OpenMP libraries loaded

From a7b9f317f1a9e3db2e893a296e6255141b4ecd3e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Jan 2021 20:06:25 +0100
Subject: [PATCH 34/38] DOC add entry in whats new for numerical instability in
 mutual information (#19200)

---
 doc/whats_new/v0.24.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 2bef032a6cb0c..749d67e83d4af 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -21,6 +21,14 @@ and later.
 Changelog
 ---------
 
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| Fix numerical stability bug that could happen in
+  :func:`metrics.adjusted_mutual_info_score` and
+  :func:`metrics.mutual_info_score` with NumPy 1.20+.
+  :pr:`19179` by `Thomas Fan`_.
+
 :mod:`sklearn.semi_supervised`
 ..............................
 

From eee3918288b8d9b8d6818d2a0e3a8c697146080f Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Tue, 5 Jan 2021 04:27:02 -0500
Subject: [PATCH 35/38] TST Adapts wminkowski for scipy 1.6.0 (#19096)

---
 sklearn/neighbors/tests/test_dist_metrics.py | 28 ++++++++++++++++++--
 sklearn/neighbors/tests/test_neighbors.py    | 16 +++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/sklearn/neighbors/tests/test_dist_metrics.py b/sklearn/neighbors/tests/test_dist_metrics.py
index 60a4e97880e15..441bcc134fe6b 100644
--- a/sklearn/neighbors/tests/test_dist_metrics.py
+++ b/sklearn/neighbors/tests/test_dist_metrics.py
@@ -55,7 +55,19 @@ def test_cdist(metric):
     keys = argdict.keys()
     for vals in itertools.product(*argdict.values()):
         kwargs = dict(zip(keys, vals))
-        D_true = cdist(X1, X2, metric, **kwargs)
+        if metric == "wminkowski":
+            if sp_version >= parse_version("1.8.0"):
+                pytest.skip("wminkowski will be removed in SciPy 1.8.0")
+
+            # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
+            ExceptionToAssert = None
+            if sp_version >= parse_version("1.6.0"):
+                ExceptionToAssert = DeprecationWarning
+            with pytest.warns(ExceptionToAssert):
+                D_true = cdist(X1, X2, metric, **kwargs)
+        else:
+            D_true = cdist(X1, X2, metric, **kwargs)
+
         check_cdist(metric, kwargs, D_true)
 
 
@@ -83,7 +95,19 @@ def test_pdist(metric):
     keys = argdict.keys()
     for vals in itertools.product(*argdict.values()):
         kwargs = dict(zip(keys, vals))
-        D_true = cdist(X1, X1, metric, **kwargs)
+        if metric == "wminkowski":
+            if sp_version >= parse_version("1.8.0"):
+                pytest.skip("wminkowski will be removed in SciPy 1.8.0")
+
+            # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
+            ExceptionToAssert = None
+            if sp_version >= parse_version("1.6.0"):
+                ExceptionToAssert = DeprecationWarning
+            with pytest.warns(ExceptionToAssert):
+                D_true = cdist(X1, X1, metric, **kwargs)
+        else:
+            D_true = cdist(X1, X1, metric, **kwargs)
+
         check_pdist(metric, kwargs, D_true)
 
 
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 513df1edb1bec..2b6c9a48d545d 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -26,6 +26,7 @@
 from sklearn.utils._testing import assert_raise_message
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils.validation import check_random_state
+from sklearn.utils.fixes import sp_version, parse_version
 
 import joblib
 
@@ -1244,6 +1245,9 @@ def test_neighbors_metrics(n_samples=20, n_features=3,
     test = rng.rand(n_query_pts, n_features)
 
     for metric, metric_params in metrics:
+        if metric == "wminkowski" and sp_version >= parse_version("1.8.0"):
+            # wminkowski will be removed in SciPy 1.8.0
+            continue
         results = {}
         p = metric_params.pop('p', 2)
         for algorithm in algorithms:
@@ -1265,8 +1269,16 @@ def test_neighbors_metrics(n_samples=20, n_features=3,
                           if metric == 'haversine' else slice(None))
 
             neigh.fit(X[:, feature_sl])
-            results[algorithm] = neigh.kneighbors(test[:, feature_sl],
-                                                  return_distance=True)
+
+            # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
+            ExceptionToAssert = None
+            if (metric == "wminkowski" and algorithm == 'brute'
+                    and sp_version >= parse_version("1.6.0")):
+                ExceptionToAssert = DeprecationWarning
+
+            with pytest.warns(ExceptionToAssert):
+                results[algorithm] = neigh.kneighbors(test[:, feature_sl],
+                                                      return_distance=True)
 
         assert_array_almost_equal(results['brute'][0], results['ball_tree'][0])
         assert_array_almost_equal(results['brute'][1], results['ball_tree'][1])

From 4c3f0dc3e16f7ccc7758c6b3e9b368bed086fc10 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <cmarmo@users.noreply.github.com>
Date: Sat, 2 Jan 2021 15:19:32 +0100
Subject: [PATCH 36/38] TST Fix scipy DeprecationWarning from wminkowski in
 nightly (#18930)

---
 sklearn/metrics/tests/test_pairwise.py | 45 ++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index 88c285421fca6..7640a6a2b8e70 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -4,8 +4,16 @@
 from numpy import linalg
 
 from scipy.sparse import dok_matrix, csr_matrix, issparse
-from scipy.spatial.distance import cosine, cityblock, minkowski, wminkowski
+from scipy.spatial.distance import cosine, cityblock, minkowski
 from scipy.spatial.distance import cdist, pdist, squareform
+try:
+    from scipy.spatial.distance import wminkowski
+except ImportError:
+    # In scipy 1.6.0, wminkowski is deprecated and minkowski
+    # should be used instead.
+    from scipy.spatial.distance import minkowski as wminkowski
+
+from sklearn.utils.fixes import sp_version, parse_version
 
 import pytest
 
@@ -233,6 +241,7 @@ def test_pairwise_precomputed_non_negative():
         pairwise_distances(np.full((5, 5), -1), metric='precomputed')
 
 
+_minkowski_kwds = {'w': np.arange(1, 5).astype('double', copy=False), 'p': 1}
 _wminkowski_kwds = {'w': np.arange(1, 5).astype('double', copy=False), 'p': 1}
 
 
@@ -245,8 +254,38 @@ def callable_rbf_kernel(x, y, **kwds):
 @pytest.mark.parametrize(
         'func, metric, kwds',
         [(pairwise_distances, 'euclidean', {}),
-         (pairwise_distances, wminkowski, _wminkowski_kwds),
-         (pairwise_distances, 'wminkowski', _wminkowski_kwds),
+         pytest.param(
+             pairwise_distances, minkowski, _minkowski_kwds,
+             marks=pytest.mark.skipif(
+                 sp_version < parse_version("1.0"),
+                 reason="minkowski does not accept the w "
+                        "parameter prior to scipy 1.0."
+             )
+         ),
+         pytest.param(
+             pairwise_distances, 'minkowski', _minkowski_kwds,
+             marks=pytest.mark.skipif(
+                 sp_version < parse_version("1.0"),
+                 reason="minkowski does not accept the w "
+                        "parameter prior to scipy 1.0."
+             )
+         ),
+         pytest.param(
+             pairwise_distances, wminkowski, _wminkowski_kwds,
+             marks=pytest.mark.skipif(
+                 sp_version >= parse_version("1.6.0"),
+                 reason="wminkowski is now minkowski "
+                        "and it has been already tested."
+             )
+         ),
+         pytest.param(
+             pairwise_distances, 'wminkowski', _wminkowski_kwds,
+             marks=pytest.mark.skipif(
+                 sp_version >= parse_version("1.6.0"),
+                 reason="wminkowski is now minkowski "
+                        "and it has been already tested."
+             )
+         ),
          (pairwise_kernels, 'polynomial', {'degree': 1}),
          (pairwise_kernels, callable_rbf_kernel, {'gamma': .1})])
 @pytest.mark.parametrize('array_constr', [np.array, csr_matrix])

From cbb9320771590655211d0e0faea7e6e643c5a895 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 19 Jan 2021 10:00:38 +0100
Subject: [PATCH 37/38] Trigger wheel builder workflow: [cd build]


From cf21428a5770ca77b741fe8c7215d9dfd7caa417 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 19 Jan 2021 11:09:01 +0100
Subject: [PATCH 38/38] DOC fix year release 0.24.1