From e86b14388ce8a85bcbd1e8a5253a6400f587bc39 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 7 Sep 2021 12:07:44 +0200
Subject: [PATCH 01/49] MAINT missing what's new entry for PR-19401 (#20955)

* MAINT missing what's new entry for PR-19401

* Consolidate sklearn.feature_extraction entries + fix refs
---
 doc/whats_new/v1.0.rst | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index c35d2a1a481c1..917eb3edec5ec 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -403,14 +403,24 @@ Changelog
 :mod:`sklearn.feature_extraction`
 .................................
 
-- |Fix| Fixed a bug in :class:`feature_extraction.HashingVectorizer` where some
-  input strings would result in negative indices in the transformed data.
-  :pr:`19035` by :user:`Liu Yu <ly648499246>`.
+- |Fix| Fixed a bug in :class:`feature_extraction.text.HashingVectorizer`
+  where some input strings would result in negative indices in the transformed
+  data. :pr:`19035` by :user:`Liu Yu <ly648499246>`.
 
 - |Fix| Fixed a bug in :class:`feature_extraction.DictVectorizer` by raising an
   error with unsupported value type.
   :pr:`19520` by :user:`Jeff Zhao <kamiyaa>`.
 
+- |Fix| Fixed a bug in :func:`feature_extraction.image.img_to_graph`
+  and :func:`feature_extraction.image.grid_to_graph` where singleton connected
+  components were not handled properly, resulting in a wrong vertex indexing.
+  :pr:`18964` by `Bertrand Thirion`_.
+
+- |Fix| Raise a warning in :class:`feature_extraction.text.CountVectorizer`
+  with `lowercase=True` when there are vocabulary entries with uppercase
+  characters to avoid silent misses in the resulting feature vectors.
+  :pr:`19401` by :user:`Zito Relova <zitorelova>`
+
 :mod:`sklearn.feature_selection`
 ................................
 
@@ -849,13 +859,6 @@ Changelog
 - |Fix| Improves compatibility of :func:`tree.plot_tree` with high DPI screens.
   :pr:`20023` by `Thomas Fan`_.
 
-:mod:`sklearn.feature_extraction`
-..................................
-
-- |Fix| Fixed a bug in :func:`image._to_graph` where singleton
-  connected components were not handled properly, resulting in a wrong
-  vertex indexing.  :pr:`18964` by `Bertrand Thirion`_.
-
 - |Fix| Fixed a bug in :class:`tree.DecisionTreeClassifier`,
   :class:`tree.DecisionTreeRegressor` where a node could be split whereas it
   should not have been due to incorrect handling of rounding errors.

From d707ef4ad581cebd7ec40e0b3cec716a9ce4c3f1 Mon Sep 17 00:00:00 2001
From: baam <83430343+baam25simo@users.noreply.github.com>
Date: Wed, 8 Sep 2021 13:20:36 +0200
Subject: [PATCH 02/49] DOC Ensures that Birch passes numpydoc validation
 (#20972)

---
 maint_tools/test_docstrings.py | 1 -
 sklearn/cluster/_birch.py      | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 69478c846a4a3..295f2598cc78c 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "Birch",
     "GammaRegressor",
     "GaussianProcessRegressor",
     "GaussianRandomProjection",
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index 38a57e4130d38..4f0e7200c2734 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -480,7 +480,7 @@ def __init__(
     # TODO: Remove in 1.2
     # mypy error: Decorated property not supported
     @deprecated(  # type: ignore
-        "`fit_` is deprecated in 1.0 and will be removed in 1.2"
+        "`fit_` is deprecated in 1.0 and will be removed in 1.2."
     )
     @property
     def fit_(self):
@@ -489,7 +489,7 @@ def fit_(self):
     # TODO: Remove in 1.2
     # mypy error: Decorated property not supported
     @deprecated(  # type: ignore
-        "`partial_fit_` is deprecated in 1.0 and will be removed in 1.2"
+        "`partial_fit_` is deprecated in 1.0 and will be removed in 1.2."
     )
     @property
     def partial_fit_(self):

From 3c0f0e8515dcbd269ec3065a611b4b0eeaec963a Mon Sep 17 00:00:00 2001
From: baam <83430343+baam25simo@users.noreply.github.com>
Date: Wed, 8 Sep 2021 17:33:56 +0200
Subject: [PATCH 03/49] DOC Ensures that GammaRegressor passes numpydoc
 validation (#20973)

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 maint_tools/test_docstrings.py   | 1 -
 sklearn/linear_model/_glm/glm.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 295f2598cc78c..612817e23f6b9 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "GammaRegressor",
     "GaussianProcessRegressor",
     "GaussianRandomProjection",
     "GridSearchCV",
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 49cd1d4222ccf..9ca5dc2ff6237 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -606,6 +606,11 @@ class GammaRegressor(GeneralizedLinearRegressor):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    PoissonRegressor : Generalized Linear Model with a Poisson distribution.
+    TweedieRegressor : Generalized Linear Model with a Tweedie distribution.
+
     Examples
     --------
     >>> from sklearn import linear_model
@@ -648,6 +653,7 @@ def __init__(
 
     @property
     def family(self):
+        """Return the family of the regressor."""
         # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
         return "gamma"
 

From 555078973a2703c0c6ad96f8842334027506a455 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 8 Sep 2021 11:53:03 -0400
Subject: [PATCH 04/49] DOC Reword score_sample's and similar interfaces'
 docstrings (#20979)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Co-authored-by: Alexandre Gramfort <alexandre.gramfort@inria.fr>
---
 sklearn/covariance/_empirical_covariance.py |  9 ++++++---
 sklearn/mixture/_base.py                    | 11 +++++------
 sklearn/neighbors/_kde.py                   |  6 +++---
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py
index f54ff1e52d10c..b9fd47e160a2d 100644
--- a/sklearn/covariance/_empirical_covariance.py
+++ b/sklearn/covariance/_empirical_covariance.py
@@ -235,7 +235,10 @@ def fit(self, X, y=None):
         return self
 
     def score(self, X_test, y=None):
-        """Compute the log-likelihood of a Gaussian data set with `self.covariance_`.
+        """Compute the log-likelihood of `X_test` under the estimated Gaussian model.
+
+        The Gaussian model is defined by its mean and covariance matrix which are
+        represented respectively by `self.location_` and `self.covariance_`.
 
         Parameters
         ----------
@@ -251,8 +254,8 @@ def score(self, X_test, y=None):
         Returns
         -------
         res : float
-            The likelihood of the data set with `self.covariance_` as an
-            estimator of its covariance matrix.
+            The log-likelihood of `X_test` with `self.location_` and `self.covariance_`
+            as estimators of the Gaussian model mean and covariance matrix respectively.
         """
         X_test = self._validate_data(X_test, reset=False)
         # compute empirical covariance of the test set
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index 4c7ded66e20a1..d40903899c187 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -335,7 +335,7 @@ def _set_parameters(self, params):
         pass
 
     def score_samples(self, X):
-        """Compute the weighted log probabilities for each sample.
+        """Compute the log-likelihood of each sample.
 
         Parameters
         ----------
@@ -346,7 +346,7 @@ def score_samples(self, X):
         Returns
         -------
         log_prob : array, shape (n_samples,)
-            Log probabilities of each data point in X.
+            Log-likelihood of each sample in `X` under the current model.
         """
         check_is_fitted(self)
         X = self._validate_data(X, reset=False)
@@ -368,7 +368,7 @@ def score(self, X, y=None):
         Returns
         -------
         log_likelihood : float
-            Log likelihood of the Gaussian mixture given X.
+            Log-likelihood of `X` under the Gaussian mixture model.
         """
         return self.score_samples(X).mean()
 
@@ -391,7 +391,7 @@ def predict(self, X):
         return self._estimate_weighted_log_prob(X).argmax(axis=1)
 
     def predict_proba(self, X):
-        """Predict posterior probability of each component given the data.
+        """Evaluate the components' density for each sample.
 
         Parameters
         ----------
@@ -402,8 +402,7 @@ def predict_proba(self, X):
         Returns
         -------
         resp : array, shape (n_samples, n_components)
-            Returns the probability each Gaussian (state) in
-            the model given each sample.
+            Density of each Gaussian component for each sample in X.
         """
         check_is_fitted(self)
         X = self._validate_data(X, reset=False)
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
index 328a13371bafd..9c55e2e00d193 100644
--- a/sklearn/neighbors/_kde.py
+++ b/sklearn/neighbors/_kde.py
@@ -208,7 +208,7 @@ def fit(self, X, y=None, sample_weight=None):
         return self
 
     def score_samples(self, X):
-        """Evaluate the log density model on the data.
+        """Compute the log-likelihood of each sample under the model.
 
         Parameters
         ----------
@@ -219,7 +219,7 @@ def score_samples(self, X):
         Returns
         -------
         density : ndarray of shape (n_samples,)
-            The array of log(density) evaluations. These are normalized to be
+            Log-likelihood of each sample in `X`. These are normalized to be
             probability densities, so values will be low for high-dimensional
             data.
         """
@@ -246,7 +246,7 @@ def score_samples(self, X):
         return log_density
 
     def score(self, X, y=None):
-        """Compute the total log probability density under the model.
+        """Compute the total log-likelihood under the model.
 
         Parameters
         ----------

From 9b0ecf2db18805dd5c9c4af9faa1d8fd5296f65a Mon Sep 17 00:00:00 2001
From: baam <83430343+baam25simo@users.noreply.github.com>
Date: Thu, 9 Sep 2021 11:02:52 +0200
Subject: [PATCH 05/49] DOC Ensures that GaussianProcessRegressor passes
 numpydoc validation (#20981)

---
 maint_tools/test_docstrings.py   |  1 -
 sklearn/gaussian_process/_gpr.py | 16 +++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 612817e23f6b9..ff8e1d29636a2 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "GaussianProcessRegressor",
     "GaussianRandomProjection",
     "GridSearchCV",
     "HalvingGridSearchCV",
diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py
index 8b503358423f4..cf9a349f1b074 100644
--- a/sklearn/gaussian_process/_gpr.py
+++ b/sklearn/gaussian_process/_gpr.py
@@ -82,7 +82,7 @@ def optimizer(obj_func, initial_theta, bounds):
 
         Per default, the L-BFGS-B algorithm from `scipy.optimize.minimize`
         is used. If None is passed, the kernel's parameters are kept fixed.
-        Available internal optimizers are: `{'fmin_l_bfgs_b'}`
+        Available internal optimizers are: `{'fmin_l_bfgs_b'}`.
 
     n_restarts_optimizer : int, default=0
         The number of restarts of the optimizer for finding the kernel's
@@ -146,6 +146,11 @@ def optimizer(obj_func, initial_theta, bounds):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    GaussianProcessClassifier : Gaussian process classification (GPC)
+        based on Laplace approximation.
+
     References
     ----------
     .. [1] `Rasmussen, Carl Edward.
@@ -200,7 +205,8 @@ def fit(self, X, y):
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            GaussianProcessRegressor class instance.
         """
         if self.kernel is None:  # Use an RBF kernel as default
             self.kernel_ = C(1.0, constant_value_bounds="fixed") * RBF(
@@ -318,7 +324,7 @@ def obj_func(theta, eval_gradient=True):
         return self
 
     def predict(self, X, return_std=False, return_cov=False):
-        """Predict using the Gaussian process regression model
+        """Predict using the Gaussian process regression model.
 
         We can also predict based on an unfitted model by using the GP prior.
         In addition to the mean of the predictive distribution, optionally also
@@ -433,7 +439,7 @@ def sample_y(self, X, n_samples=1, random_state=0):
             Query points where the GP is evaluated.
 
         n_samples : int, default=1
-            Number of samples drawn from the Gaussian process per query point
+            Number of samples drawn from the Gaussian process per query point.
 
         random_state : int, RandomState instance or None, default=0
             Determines random number generation to randomly draw samples.
@@ -464,7 +470,7 @@ def sample_y(self, X, n_samples=1, random_state=0):
     def log_marginal_likelihood(
         self, theta=None, eval_gradient=False, clone_kernel=True
     ):
-        """Returns log-marginal likelihood of theta for training data.
+        """Return log-marginal likelihood of theta for training data.
 
         Parameters
         ----------

From 9d5526fcec80173e11e05dbf5548d66e8d0eecca Mon Sep 17 00:00:00 2001
From: baam <83430343+baam25simo@users.noreply.github.com>
Date: Thu, 9 Sep 2021 11:04:14 +0200
Subject: [PATCH 06/49] DOC Ensures that GaussianRandomProjection passes
 numpydoc validation (#20982)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/random_projection.py   | 20 ++++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index ff8e1d29636a2..8281a0bf0039a 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "GaussianRandomProjection",
     "GridSearchCV",
     "HalvingGridSearchCV",
     "HalvingRandomSearchCV",
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index b0cdbaf5fc9ff..338975b8774d4 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -336,13 +336,13 @@ def fit(self, X, y=None):
             matrix dimensions based on the theory referenced in the
             afore mentioned papers.
 
-        y
-            Ignored
+        y : Ignored
+            Not used, present here for API consistency by convention.
 
         Returns
         -------
-        self
-
+        self : object
+            BaseRandomProjection class instance.
         """
         X = self._validate_data(X, accept_sparse=["csr", "csc"])
 
@@ -395,7 +395,7 @@ def fit(self, X, y=None):
         return self
 
     def transform(self, X):
-        """Project the data by using matrix product with the random matrix
+        """Project the data by using matrix product with the random matrix.
 
         Parameters
         ----------
@@ -477,6 +477,11 @@ class GaussianRandomProjection(BaseRandomProjection):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    SparseRandomProjection : Reduce dimensionality through sparse
+        random projection.
+
     Examples
     --------
     >>> import numpy as np
@@ -487,11 +492,6 @@ class GaussianRandomProjection(BaseRandomProjection):
     >>> X_new = transformer.fit_transform(X)
     >>> X_new.shape
     (100, 3947)
-
-    See Also
-    --------
-    SparseRandomProjection
-
     """
 
     def __init__(self, n_components="auto", *, eps=0.1, random_state=None):

From d2efdc19736028b95eeb74e7ce219dde3a0438e7 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 9 Sep 2021 06:05:04 -0300
Subject: [PATCH 07/49] DOC Ensures that SelectKBest passes numpydoc validation
 (#20983)

---
 maint_tools/test_docstrings.py                |  1 -
 .../_univariate_selection.py                  | 32 +++++++++----------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 8281a0bf0039a..eef0605471ddf 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -64,7 +64,6 @@
     "SelectFpr",
     "SelectFromModel",
     "SelectFwe",
-    "SelectKBest",
     "SelfTrainingClassifier",
     "SequentialFeatureSelector",
     "SimpleImputer",
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 95b244bd2cca6..eb7e506d741b7 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -563,22 +563,6 @@ class SelectKBest(_BaseFilter):
 
         .. versionadded:: 1.0
 
-    Examples
-    --------
-    >>> from sklearn.datasets import load_digits
-    >>> from sklearn.feature_selection import SelectKBest, chi2
-    >>> X, y = load_digits(return_X_y=True)
-    >>> X.shape
-    (1797, 64)
-    >>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)
-    >>> X_new.shape
-    (1797, 20)
-
-    Notes
-    -----
-    Ties between features with equal scores will be broken in an unspecified
-    way.
-
     See Also
     --------
     f_classif: ANOVA F-value between label/feature for classification tasks.
@@ -593,6 +577,22 @@ class SelectKBest(_BaseFilter):
     SelectFwe : Select features based on family-wise error rate.
     GenericUnivariateSelect : Univariate feature selector with configurable
         mode.
+
+    Notes
+    -----
+    Ties between features with equal scores will be broken in an unspecified
+    way.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.feature_selection import SelectKBest, chi2
+    >>> X, y = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)
+    >>> X_new.shape
+    (1797, 20)
     """
 
     def __init__(self, score_func=f_classif, *, k=10):

From 2c9dab38055a9c869d3b4fbdcfc61b126870afd1 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 9 Sep 2021 06:05:51 -0300
Subject: [PATCH 08/49] DOC Ensures that SelectFdr passes numpydoc validation
 (#20984)

---
 maint_tools/test_docstrings.py                |  1 -
 .../_univariate_selection.py                  | 32 +++++++++----------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index eef0605471ddf..929b8b27e912c 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -60,7 +60,6 @@
     "RobustScaler",
     "SGDOneClassSVM",
     "SGDRegressor",
-    "SelectFdr",
     "SelectFpr",
     "SelectFromModel",
     "SelectFwe",
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index eb7e506d741b7..1acd8a5703cb0 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -699,7 +699,7 @@ def _get_support_mask(self):
 
 
 class SelectFdr(_BaseFilter):
-    """Filter: Select the p-values for an estimated false discovery rate
+    """Filter: Select the p-values for an estimated false discovery rate.
 
     This uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound
     on the expected false discovery rate.
@@ -717,17 +717,6 @@ class SelectFdr(_BaseFilter):
     alpha : float, default=5e-2
         The highest uncorrected p-value for features to keep.
 
-    Examples
-    --------
-    >>> from sklearn.datasets import load_breast_cancer
-    >>> from sklearn.feature_selection import SelectFdr, chi2
-    >>> X, y = load_breast_cancer(return_X_y=True)
-    >>> X.shape
-    (569, 30)
-    >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)
-    >>> X_new.shape
-    (569, 16)
-
     Attributes
     ----------
     scores_ : array-like of shape (n_features,)
@@ -747,10 +736,6 @@ class SelectFdr(_BaseFilter):
 
         .. versionadded:: 1.0
 
-    References
-    ----------
-    https://en.wikipedia.org/wiki/False_discovery_rate
-
     See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
@@ -765,6 +750,21 @@ class SelectFdr(_BaseFilter):
     SelectFwe : Select features based on family-wise error rate.
     GenericUnivariateSelect : Univariate feature selector with configurable
         mode.
+
+    References
+    ----------
+    https://en.wikipedia.org/wiki/False_discovery_rate
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import SelectFdr, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)
+    >>> X_new.shape
+    (569, 16)
     """
 
     def __init__(self, score_func=f_classif, *, alpha=5e-2):

From 870410e983d63b7bf6402129d4afccbcbf49c81a Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 9 Sep 2021 06:06:52 -0300
Subject: [PATCH 09/49] DOC Ensures that SelectFpr passes numpydoc validation
 (#20985)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py                |  1 -
 .../_univariate_selection.py                  | 22 +++++++++----------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 929b8b27e912c..9829abbb9a0e3 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -60,7 +60,6 @@
     "RobustScaler",
     "SGDOneClassSVM",
     "SGDRegressor",
-    "SelectFpr",
     "SelectFromModel",
     "SelectFwe",
     "SelfTrainingClassifier",
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 1acd8a5703cb0..c79b5b176e8d6 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -661,17 +661,6 @@ class SelectFpr(_BaseFilter):
 
         .. versionadded:: 1.0
 
-    Examples
-    --------
-    >>> from sklearn.datasets import load_breast_cancer
-    >>> from sklearn.feature_selection import SelectFpr, chi2
-    >>> X, y = load_breast_cancer(return_X_y=True)
-    >>> X.shape
-    (569, 30)
-    >>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)
-    >>> X_new.shape
-    (569, 16)
-
     See Also
     --------
     f_classif : ANOVA F-value between label/feature for classification tasks.
@@ -686,6 +675,17 @@ class SelectFpr(_BaseFilter):
     SelectFwe : Select features based on family-wise error rate.
     GenericUnivariateSelect : Univariate feature selector with configurable
         mode.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import SelectFpr, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)
+    >>> X_new.shape
+    (569, 16)
     """
 
     def __init__(self, score_func=f_classif, *, alpha=5e-2):

From b0865295f9153b933076fcaed3388883d8ef37b6 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 9 Sep 2021 06:07:36 -0300
Subject: [PATCH 10/49] DOC Ensures that SelectFwe passes numpydoc validation
 (#20986)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py                |  1 -
 .../_univariate_selection.py                  | 24 +++++++++----------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 9829abbb9a0e3..2c26869a65c1a 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -61,7 +61,6 @@
     "SGDOneClassSVM",
     "SGDRegressor",
     "SelectFromModel",
-    "SelectFwe",
     "SelfTrainingClassifier",
     "SequentialFeatureSelector",
     "SimpleImputer",
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index c79b5b176e8d6..31236abd75680 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -785,7 +785,7 @@ def _get_support_mask(self):
 
 
 class SelectFwe(_BaseFilter):
-    """Filter: Select the p-values corresponding to Family-wise error rate
+    """Filter: Select the p-values corresponding to Family-wise error rate.
 
     Read more in the :ref:`User Guide <univariate_feature_selection>`.
 
@@ -800,17 +800,6 @@ class SelectFwe(_BaseFilter):
     alpha : float, default=5e-2
         The highest uncorrected p-value for features to keep.
 
-    Examples
-    --------
-    >>> from sklearn.datasets import load_breast_cancer
-    >>> from sklearn.feature_selection import SelectFwe, chi2
-    >>> X, y = load_breast_cancer(return_X_y=True)
-    >>> X.shape
-    (569, 30)
-    >>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)
-    >>> X_new.shape
-    (569, 15)
-
     Attributes
     ----------
     scores_ : array-like of shape (n_features,)
@@ -842,6 +831,17 @@ class SelectFwe(_BaseFilter):
     SelectFdr : Select features based on an estimated false discovery rate.
     GenericUnivariateSelect : Univariate feature selector with configurable
         mode.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import SelectFwe, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)
+    >>> X_new.shape
+    (569, 15)
     """
 
     def __init__(self, score_func=f_classif, *, alpha=5e-2):

From 065bca855d63976a2103376b753695e429bd8293 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 9 Sep 2021 06:08:58 -0300
Subject: [PATCH 11/49] DOC Ensures that SelectFromModel passes numpydoc
 validation (#20988)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py           |  1 -
 sklearn/feature_selection/_from_model.py | 28 ++++++++++++++----------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 2c26869a65c1a..cc2e738561d8a 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -60,7 +60,6 @@
     "RobustScaler",
     "SGDOneClassSVM",
     "SGDRegressor",
-    "SelectFromModel",
     "SelfTrainingClassifier",
     "SequentialFeatureSelector",
     "SimpleImputer",
diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
index ca51f1f56b3b4..55b7f6f070c9c 100644
--- a/sklearn/feature_selection/_from_model.py
+++ b/sklearn/feature_selection/_from_model.py
@@ -76,7 +76,7 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
         ``feature_importances_`` or ``coef_`` attribute after fitting.
         Otherwise, the ``importance_getter`` parameter should be used.
 
-    threshold : string or float, default=None
+    threshold : str or float, default=None
         The threshold value to use for feature selection. Features whose
         importance is greater or equal are kept while the others are
         discarded. If "median" (resp. "mean"), then the ``threshold`` value is
@@ -144,6 +144,14 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
     threshold_ : float
         The threshold value used for feature selection.
 
+    See Also
+    --------
+    RFE : Recursive feature elimination based on importance weights.
+    RFECV : Recursive feature elimination with built-in cross-validated
+        selection of the best number of features.
+    SequentialFeatureSelector : Sequential cross-validation based feature
+        selection. Does not rely on importance weights.
+
     Notes
     -----
     Allows NaN/Inf in the input if the underlying estimator does as well.
@@ -169,14 +177,6 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
            [-0.02],
            [-0.48],
            [ 1.48]])
-
-    See Also
-    --------
-    RFE : Recursive feature elimination based on importance weights.
-    RFECV : Recursive feature elimination with built-in cross-validated
-        selection of the best number of features.
-    SequentialFeatureSelector : Sequential cross-validation based feature
-        selection. Does not rely on importance weights.
     """
 
     def __init__(
@@ -238,11 +238,13 @@ def fit(self, X, y=None, **fit_params):
             The target values (integers that correspond to classes in
             classification, real numbers in regression).
 
-        **fit_params : Other estimator specific parameters
+        **fit_params : dict
+            Other estimator specific parameters.
 
         Returns
         -------
         self : object
+            Fitted estimator.
         """
         if self.max_features is not None:
             if not isinstance(self.max_features, numbers.Integral):
@@ -269,6 +271,7 @@ def fit(self, X, y=None, **fit_params):
 
     @property
     def threshold_(self):
+        """Threshold value used for feature selection."""
         scores = _get_feature_importances(
             estimator=self.estimator_,
             getter=self.importance_getter,
@@ -290,11 +293,13 @@ def partial_fit(self, X, y=None, **fit_params):
             The target values (integers that correspond to classes in
             classification, real numbers in regression).
 
-        **fit_params : Other estimator specific parameters
+        **fit_params : dict
+            Other estimator specific parameters.
 
         Returns
         -------
         self : object
+            Fitted estimator.
         """
         if self.prefit:
             raise NotFittedError("Since 'prefit=True', call transform directly")
@@ -305,6 +310,7 @@ def partial_fit(self, X, y=None, **fit_params):
 
     @property
     def n_features_in_(self):
+        """Number of features seen during `fit`."""
         # For consistency with other estimators we raise a AttributeError so
         # that hasattr() fails if the estimator isn't fitted.
         try:

From f755d7121be77f094e1cb3dd207666ea5254e43e Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 9 Sep 2021 05:09:37 -0400
Subject: [PATCH 12/49] DOC Ensures that LabelBinarizer passes numpydoc
 validation (#20990)

---
 maint_tools/test_docstrings.py  |  1 -
 sklearn/preprocessing/_label.py | 22 ++++++++++------------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index cc2e738561d8a..c4f3aafc41bbe 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -18,7 +18,6 @@
     "IterativeImputer",
     "KBinsDiscretizer",
     "KNNImputer",
-    "LabelBinarizer",
     "LabelPropagation",
     "LabelSpreading",
     "LocalOutlierFactor",
diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index b5c7d7c117e21..2886f1b7f9ecf 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -188,7 +188,6 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-
     neg_label : int, default=0
         Value with which negative labels must be encoded.
 
@@ -201,7 +200,6 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
 
     Attributes
     ----------
-
     classes_ : ndarray of shape (n_classes,)
         Holds the label for each class.
 
@@ -215,6 +213,13 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
         True if the input data to transform is given as a sparse matrix, False
         otherwise.
 
+    See Also
+    --------
+    label_binarize : Function to perform the transform operation of
+        LabelBinarizer with fixed classes.
+    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
+        scheme.
+
     Examples
     --------
     >>> from sklearn import preprocessing
@@ -248,13 +253,6 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
            [0, 1, 0],
            [0, 0, 1],
            [0, 1, 0]])
-
-    See Also
-    --------
-    label_binarize : Function to perform the transform operation of
-        LabelBinarizer with fixed classes.
-    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
-        scheme.
     """
 
     def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):
@@ -288,7 +286,8 @@ def fit(self, y):
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Returns the instance itself.
         """
         self.y_type_ = type_of_target(y)
         if "multioutput" in self.y_type_:
@@ -303,8 +302,7 @@ def fit(self, y):
         return self
 
     def fit_transform(self, y):
-        """Fit label binarizer and transform multi-class labels to binary
-        labels.
+        """Fit label binarizer/transform multi-class labels to binary labels.
 
         The output of transform is sometimes referred to as
         the 1-of-K coding scheme.

From 3d8583ade8e4ad0abb84b4b73357e936d635fd2d Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 9 Sep 2021 06:43:07 -0300
Subject: [PATCH 13/49] DOC Ensures that Pipeline passes numpydoc validation
 (#20969)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |   1 -
 sklearn/pipeline.py            | 171 ++++++++++++++++++++++-----------
 2 files changed, 116 insertions(+), 56 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index c4f3aafc41bbe..7cb0b5e923071 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -47,7 +47,6 @@
     "PassiveAggressiveClassifier",
     "PassiveAggressiveRegressor",
     "PatchExtractor",
-    "Pipeline",
     "PolynomialFeatures",
     "PowerTransformer",
     "QuadraticDiscriminantAnalysis",
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 9d4997686612b..c508e55cbb636 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -54,17 +54,17 @@ class Pipeline(_BaseComposition):
 
     Sequentially apply a list of transforms and a final estimator.
     Intermediate steps of the pipeline must be 'transforms', that is, they
-    must implement fit and transform methods.
-    The final estimator only needs to implement fit.
+    must implement `fit` and `transform` methods.
+    The final estimator only needs to implement `fit`.
     The transformers in the pipeline can be cached using ``memory`` argument.
 
     The purpose of the pipeline is to assemble several steps that can be
-    cross-validated together while setting different parameters.
-    For this, it enables setting parameters of the various steps using their
-    names and the parameter name separated by a '__', as in the example below.
-    A step's estimator may be replaced entirely by setting the parameter
-    with its name to another estimator, or a transformer removed by setting
-    it to 'passthrough' or ``None``.
+    cross-validated together while setting different parameters. For this, it
+    enables setting parameters of the various steps using their names and the
+    parameter name separated by a `'__'`, as in the example below. A step's
+    estimator may be replaced entirely by setting the parameter with its name
+    to another estimator, or a transformer removed by setting it to
+    `'passthrough'` or `None`.
 
     Read more in the :ref:`User Guide <pipeline>`.
 
@@ -72,10 +72,10 @@ class Pipeline(_BaseComposition):
 
     Parameters
     ----------
-    steps : list
-        List of (name, transform) tuples (implementing fit/transform) that are
-        chained, in the order in which they are chained, with the last object
-        an estimator.
+    steps : list of tuple
+        List of (name, transform) tuples (implementing `fit`/`transform`) that
+        are chained, in the order in which they are chained, with the last
+        object an estimator.
 
     memory : str or object with the joblib.Memory interface, default=None
         Used to cache the fitted transformers of the pipeline. By default,
@@ -173,9 +173,17 @@ def set_params(self, **kwargs):
         you can directly set the parameters of the estimators contained in
         `steps`.
 
+        Parameters
+        ----------
+        **kwargs : dict
+            Parameters of this estimator or parameters of estimators contained
+            in `steps`. Parameters of the steps may be set using its name and
+            the parameter name separated by a '__'.
+
         Returns
         -------
-        self
+        self : object
+            Pipeline class instance.
         """
         self._set_params("steps", **kwargs)
         return self
@@ -266,6 +274,10 @@ def _estimator_type(self):
 
     @property
     def named_steps(self):
+        """Access the steps by name.
+
+        Read-only attribute to access any step by given name.
+        Keys are steps names and values are the steps objects."""
         # Use Bunch object to improve autocomplete
         return Bunch(**dict(self.steps))
 
@@ -349,10 +361,10 @@ def _fit(self, X, y=None, **fit_params_steps):
         return X
 
     def fit(self, X, y=None, **fit_params):
-        """Fit the model
+        """Fit the model.
 
-        Fit all the transforms one after the other and transform the
-        data, then fit the transformed data using the final estimator.
+        Fit all the transformers one after the other and transform the
+        data. Finally, fit the transformed data using the final estimator.
 
         Parameters
         ----------
@@ -371,8 +383,8 @@ def fit(self, X, y=None, **fit_params):
 
         Returns
         -------
-        self : Pipeline
-            This estimator
+        self : object
+            Pipeline with fitted steps.
         """
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
@@ -384,10 +396,10 @@ def fit(self, X, y=None, **fit_params):
         return self
 
     def fit_transform(self, X, y=None, **fit_params):
-        """Fit the model and transform with the final estimator
+        """Fit the model and transform with the final estimator.
 
-        Fits all the transforms one after the other and transforms the
-        data, then uses fit_transform on transformed data with the final
+        Fits all the transformers one after the other and transform the
+        data. Then uses `fit_transform` on transformed data with the final
         estimator.
 
         Parameters
@@ -407,8 +419,8 @@ def fit_transform(self, X, y=None, **fit_params):
 
         Returns
         -------
-        Xt : array-like of shape  (n_samples, n_transformed_features)
-            Transformed samples
+        Xt : ndarray of shape (n_samples, n_transformed_features)
+            Transformed samples.
         """
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
@@ -425,7 +437,11 @@ def fit_transform(self, X, y=None, **fit_params):
 
     @available_if(_final_estimator_has("predict"))
     def predict(self, X, **predict_params):
-        """Apply transforms to the data, and predict with the final estimator
+        """Transform the data, and apply `predict` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls `predict`
+        method. Only valid if the final estimator implements `predict`.
 
         Parameters
         ----------
@@ -445,7 +461,8 @@ def predict(self, X, **predict_params):
 
         Returns
         -------
-        y_pred : array-like
+        y_pred : ndarray
+            Result of calling `predict` on the final estimator.
         """
         Xt = X
         for _, name, transform in self._iter(with_final=False):
@@ -454,11 +471,12 @@ def predict(self, X, **predict_params):
 
     @available_if(_final_estimator_has("fit_predict"))
     def fit_predict(self, X, y=None, **fit_params):
-        """Applies fit_predict of last step in pipeline after transforms.
+        """Transform the data, and apply `fit_predict` with the final estimator.
 
-        Applies fit_transforms of a pipeline to the data, followed by the
-        fit_predict method of the final estimator in the pipeline. Valid
-        only if the final estimator implements fit_predict.
+        Call `fit_transform` of each transformer in the pipeline. The
+        transformed data are finally passed to the final estimator that calls
+        `fit_predict` method. Only valid if the final estimator implements
+        `fit_predict`.
 
         Parameters
         ----------
@@ -477,7 +495,8 @@ def fit_predict(self, X, y=None, **fit_params):
 
         Returns
         -------
-        y_pred : array-like
+        y_pred : ndarray
+            Result of calling `fit_predict` on the final estimator.
         """
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt = self._fit(X, y, **fit_params_steps)
@@ -489,7 +508,12 @@ def fit_predict(self, X, y=None, **fit_params):
 
     @available_if(_final_estimator_has("predict_proba"))
     def predict_proba(self, X, **predict_proba_params):
-        """Apply transforms, and predict_proba of the final estimator
+        """Transform the data, and apply `predict_proba` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls
+        `predict_proba` method. Only valid if the final estimator implements
+        `predict_proba`.
 
         Parameters
         ----------
@@ -498,12 +522,13 @@ def predict_proba(self, X, **predict_proba_params):
             of the pipeline.
 
         **predict_proba_params : dict of string -> object
-            Parameters to the ``predict_proba`` called at the end of all
+            Parameters to the `predict_proba` called at the end of all
             transformations in the pipeline.
 
         Returns
         -------
-        y_proba : array-like of shape (n_samples, n_classes)
+        y_proba : ndarray of shape (n_samples, n_classes)
+            Result of calling `predict_proba` on the final estimator.
         """
         Xt = X
         for _, name, transform in self._iter(with_final=False):
@@ -512,7 +537,12 @@ def predict_proba(self, X, **predict_proba_params):
 
     @available_if(_final_estimator_has("decision_function"))
     def decision_function(self, X):
-        """Apply transforms, and decision_function of the final estimator
+        """Transform the data, and apply `decision_function` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls
+        `decision_function` method. Only valid if the final estimator
+        implements `decision_function`.
 
         Parameters
         ----------
@@ -522,7 +552,8 @@ def decision_function(self, X):
 
         Returns
         -------
-        y_score : array-like of shape (n_samples, n_classes)
+        y_score : ndarray of shape (n_samples, n_classes)
+            Result of calling `decision_function` on the final estimator.
         """
         Xt = X
         for _, name, transform in self._iter(with_final=False):
@@ -531,7 +562,12 @@ def decision_function(self, X):
 
     @available_if(_final_estimator_has("score_samples"))
     def score_samples(self, X):
-        """Apply transforms, and score_samples of the final estimator.
+        """Transform the data, and apply `score_samples` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls
+        `score_samples` method. Only valid if the final estimator implements
+        `score_samples`.
 
         Parameters
         ----------
@@ -542,6 +578,7 @@ def score_samples(self, X):
         Returns
         -------
         y_score : ndarray of shape (n_samples,)
+            Result of calling `score_samples` on the final estimator.
         """
         Xt = X
         for _, _, transformer in self._iter(with_final=False):
@@ -550,7 +587,12 @@ def score_samples(self, X):
 
     @available_if(_final_estimator_has("predict_log_proba"))
     def predict_log_proba(self, X, **predict_log_proba_params):
-        """Apply transforms, and predict_log_proba of the final estimator
+        """Transform the data, and apply `predict_log_proba` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls
+        `predict_log_proba` method. Only valid if the final estimator
+        implements `predict_log_proba`.
 
         Parameters
         ----------
@@ -564,7 +606,8 @@ def predict_log_proba(self, X, **predict_log_proba_params):
 
         Returns
         -------
-        y_score : array-like of shape (n_samples, n_classes)
+        y_log_proba : ndarray of shape (n_samples, n_classes)
+            Result of calling `predict_log_proba` on the final estimator.
         """
         Xt = X
         for _, name, transform in self._iter(with_final=False):
@@ -578,9 +621,14 @@ def _can_transform(self):
 
     @available_if(_can_transform)
     def transform(self, X):
-        """Apply transforms, and transform with the final estimator
+        """Transform the data, and apply `transform` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls
+        `transform` method. Only valid if the final estimator
+        implements `transform`.
 
-        This also works where final estimator is ``None``: all prior
+        This also works where final estimator is `None` in which case all prior
         transformations are applied.
 
         Parameters
@@ -591,7 +639,8 @@ def transform(self, X):
 
         Returns
         -------
-        Xt : array-like of shape  (n_samples, n_transformed_features)
+        Xt : ndarray of shape (n_samples, n_transformed_features)
+            Transformed data.
         """
         Xt = X
         for _, _, transform in self._iter():
@@ -603,13 +652,13 @@ def _can_inverse_transform(self):
 
     @available_if(_can_inverse_transform)
     def inverse_transform(self, Xt):
-        """Apply inverse transformations in reverse order
+        """Apply `inverse_transform` for each step in a reverse order.
 
-        All estimators in the pipeline must support ``inverse_transform``.
+        All estimators in the pipeline must support `inverse_transform`.
 
         Parameters
         ----------
-        Xt : array-like of shape  (n_samples, n_transformed_features)
+        Xt : array-like of shape (n_samples, n_transformed_features)
             Data samples, where ``n_samples`` is the number of samples and
             ``n_features`` is the number of features. Must fulfill
             input requirements of last step of pipeline's
@@ -617,7 +666,9 @@ def inverse_transform(self, Xt):
 
         Returns
         -------
-        Xt : array-like of shape (n_samples, n_features)
+        Xt : ndarray of shape (n_samples, n_features)
+            Inverse transformed data, that is, data in the original feature
+            space.
         """
         reverse_iter = reversed(list(self._iter()))
         for _, _, transform in reverse_iter:
@@ -626,7 +677,11 @@ def inverse_transform(self, Xt):
 
     @available_if(_final_estimator_has("score"))
     def score(self, X, y=None, sample_weight=None):
-        """Apply transforms, and score with the final estimator
+        """Transform the data, and apply `score` with the final estimator.
+
+        Call `transform` of each transformer in the pipeline. The transformed
+        data are finally passed to the final estimator that calls
+        `score` method. Only valid if the final estimator implements `score`.
 
         Parameters
         ----------
@@ -645,6 +700,7 @@ def score(self, X, y=None, sample_weight=None):
         Returns
         -------
         score : float
+            Result of calling `score` on the final estimator.
         """
         Xt = X
         for _, name, transform in self._iter(with_final=False):
@@ -656,6 +712,7 @@ def score(self, X, y=None, sample_weight=None):
 
     @property
     def classes_(self):
+        """The classes labels. Only exist if the last step is a classifier."""
         return self.steps[-1][1].classes_
 
     def _more_tags(self):
@@ -700,11 +757,13 @@ def get_feature_names_out(self, input_features=None):
 
     @property
     def n_features_in_(self):
+        """Number of features seen during first step `fit` method."""
         # delegate to first step (which will call _check_is_fitted)
         return self.steps[0][1].n_features_in_
 
     @property
     def feature_names_in_(self):
+        """Names of features seen during first step `fit` method."""
         # delegate to first step (which will call _check_is_fitted)
         return self.steps[0][1].feature_names_in_
 
@@ -765,15 +824,16 @@ def _name_estimators(estimators):
 
 
 def make_pipeline(*steps, memory=None, verbose=False):
-    """Construct a Pipeline from the given estimators.
+    """Construct a :class:`Pipeline` from the given estimators.
 
-    This is a shorthand for the Pipeline constructor; it does not require, and
-    does not permit, naming the estimators. Instead, their names will be set
-    to the lowercase of their types automatically.
+    This is a shorthand for the :class:`Pipeline` constructor; it does not
+    require, and does not permit, naming the estimators. Instead, their names
+    will be set to the lowercase of their types automatically.
 
     Parameters
     ----------
-    *steps : list of estimators.
+    *steps : list of Estimator objects
+        List of the scikit-learn estimators that are chained together.
 
     memory : str or object with the joblib.Memory interface, default=None
         Used to cache the fitted transformers of the pipeline. By default,
@@ -789,6 +849,11 @@ def make_pipeline(*steps, memory=None, verbose=False):
         If True, the time elapsed while fitting each step will be printed as it
         is completed.
 
+    Returns
+    -------
+    p : Pipeline
+        Returns a scikit-learn :class:`Pipeline` object.
+
     See Also
     --------
     Pipeline : Class for creating a pipeline of transforms with a final
@@ -801,10 +866,6 @@ def make_pipeline(*steps, memory=None, verbose=False):
     >>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
     Pipeline(steps=[('standardscaler', StandardScaler()),
                     ('gaussiannb', GaussianNB())])
-
-    Returns
-    -------
-    p : Pipeline
     """
     return Pipeline(_name_estimators(steps), memory=memory, verbose=verbose)
 

From 497bcb62b873b463f41f4e6409be93e5007569f6 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Fri, 10 Sep 2021 03:56:27 -0400
Subject: [PATCH 14/49] DOC Fixes build from source instructions (#21004)

---
 doc/developers/advanced_installation.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index c1dec51723861..34f0979f8ddb8 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -60,11 +60,12 @@ feature, code or documentation improvement).
 #. Optional (but recommended): create and activate a dedicated virtualenv_
    or `conda environment`_.
 
-#. Install Cython_ and build the project with pip in :ref:`editable_mode`:
+#. Install NumPy_, SciPy_, and Cython_ and build the project with pip in
+   :ref:`editable_mode`:
 
    .. prompt:: bash $
 
-        pip install cython
+        pip install numpy scipy cython
         pip install --verbose --no-build-isolation --editable .
 
 #. Check that the installed scikit-learn has a version number ending with
@@ -436,6 +437,8 @@ the base system and these steps will not be necessary.
 
 .. _OpenMP: https://en.wikipedia.org/wiki/OpenMP
 .. _Cython: https://cython.org
+.. _NumPy: https://numpy.org
+.. _SciPy: https://www.scipy.org
 .. _Homebrew: https://brew.sh
 .. _virtualenv: https://docs.python.org/3/tutorial/venv.html
 .. _conda environment: https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html

From 768b7a2fdc5cdfdd27c33afa8bf9ac1b3b376fda Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Fri, 10 Sep 2021 06:00:37 -0400
Subject: [PATCH 15/49] CI Migrates pypy3 test to Azure (#21005)

---
 .circleci/config.yml                          | 31 -------------------
 azure-pipelines.yml                           | 10 ++++--
 build_tools/azure/install.sh                  | 22 +++++++++++--
 .../azure/{posix-32.yml => posix-docker.yml}  |  9 ++++--
 4 files changed, 34 insertions(+), 38 deletions(-)
 rename build_tools/azure/{posix-32.yml => posix-docker.yml} (94%)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c28fefa4e96e2..2e8958a2ab879 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -109,27 +109,6 @@ jobs:
           name: linting
           command: ./build_tools/circle/linting.sh
 
-  pypy3:
-    docker:
-      - image: condaforge/miniforge3
-    environment:
-      # Avoid the interactive dialog when installing tzdata
-      - DEBIAN_FRONTEND: noninteractive
-    steps:
-      - restore_cache:
-          keys:
-            - pypy3-ccache-{{ .Branch }}
-            - pypy3-ccache
-      - run: apt-get -yq update && apt-get -yq install git ssh
-      - checkout
-      - run: conda init bash && source ~/.bashrc
-      - run: ./build_tools/circle/build_test_pypy.sh
-      - save_cache:
-          key: pypy3-ccache-{{ .Branch }}-{{ .BuildNum }}
-          paths:
-            - ~/.ccache
-            - ~/.cache/pip
-
   linux-arm64:
     machine:
       image: ubuntu-2004:202101-01
@@ -190,16 +169,6 @@ workflows:
       - deploy:
           requires:
             - doc
-  pypy:
-    triggers:
-      - schedule:
-          cron: "0 0 * * *"
-          filters:
-            branches:
-              only:
-                - main
-    jobs:
-      - pypy3
   linux-arm64:
     jobs:
       - linux-arm64
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 2c19db7c9aaae..def0a8f98ae9b 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -182,9 +182,9 @@ jobs:
         TEST_DOCSTRINGS: 'true'
         CHECK_WARNINGS: 'true'
 
-- template: build_tools/azure/posix-32.yml
+- template: build_tools/azure/posix-docker.yml
   parameters:
-    name: Linux32
+    name: Linux_Docker
     vmImage: ubuntu-20.04
     dependsOn: [linting, git_commit]
     condition: |
@@ -194,8 +194,14 @@ jobs:
         ne(variables['Build.Reason'], 'Schedule')
       )
     matrix:
+      pypy3:
+        DISTRIB: 'conda-mamba-pypy3'
+        DOCKER_CONTAINER: 'condaforge/mambaforge-pypy3:4.10.3-5'
+        PILLOW_VERSION: 'none'
+        PANDAS_VERSION: 'none'
       debian_atlas_32bit:
         DISTRIB: 'debian-32'
+        DOCKER_CONTAINER: 'i386/debian:10.9'
         JOBLIB_VERSION: 'min'
         # disable pytest xdist due to unknown bug with 32-bit container
         PYTEST_XDIST_VERSION: 'none'
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
index b69b01fb5a8dd..f67f72836e72d 100755
--- a/build_tools/azure/install.sh
+++ b/build_tools/azure/install.sh
@@ -5,9 +5,19 @@ set -x
 
 UNAMESTR=`uname`
 
+if [[ "$DISTRIB" == "conda-mamba-pypy3" ]]; then
+    # condaforge/mambaforge-pypy3 needs compilers
+    apt-get -yq update
+    apt-get -yq install build-essential
+fi
+
 make_conda() {
     TO_INSTALL="$@"
-    conda create -n $VIRTUALENV --yes $TO_INSTALL
+    if [[ "$DISTRIB" == *"mamba"* ]]; then
+        mamba create -n $VIRTUALENV --yes $TO_INSTALL
+    else
+        conda create -n $VIRTUALENV --yes $TO_INSTALL
+    fi
     source activate $VIRTUALENV
 }
 
@@ -25,7 +35,7 @@ setup_ccache() {
 # imports get_dep
 source build_tools/shared.sh
 
-if [[ "$DISTRIB" == "conda" ]]; then
+if [[ "$DISTRIB" == "conda" || "$DISTRIB" == *"mamba"* ]]; then
 
     if [[ "$CONDA_CHANNEL" != "" ]]; then
         TO_INSTALL="-c $CONDA_CHANNEL"
@@ -33,7 +43,13 @@ if [[ "$DISTRIB" == "conda" ]]; then
         TO_INSTALL=""
     fi
 
-    TO_INSTALL="$TO_INSTALL python=$PYTHON_VERSION ccache pip blas[build=$BLAS]"
+    if [[ "$DISTRIB" == *"pypy"* ]]; then
+        TO_INSTALL="$TO_INSTALL pypy"
+    else
+        TO_INSTALL="$TO_INSTALL python=$PYTHON_VERSION"
+    fi
+
+    TO_INSTALL="$TO_INSTALL ccache pip blas[build=$BLAS]"
 
     TO_INSTALL="$TO_INSTALL $(get_dep numpy $NUMPY_VERSION)"
     TO_INSTALL="$TO_INSTALL $(get_dep scipy $SCIPY_VERSION)"
diff --git a/build_tools/azure/posix-32.yml b/build_tools/azure/posix-docker.yml
similarity index 94%
rename from build_tools/azure/posix-32.yml
rename to build_tools/azure/posix-docker.yml
index 039236a70fbe5..443ba1d1434e0 100644
--- a/build_tools/azure/posix-32.yml
+++ b/build_tools/azure/posix-docker.yml
@@ -30,6 +30,10 @@ jobs:
     THREADPOOLCTL_VERSION: 'latest'
     COVERAGE: 'false'
     TEST_DOCSTRINGS: 'false'
+    BLAS: 'openblas'
+    # Set in azure-pipelines.yml
+    DISTRIB: ''
+    DOCKER_CONTAINER: ''
   strategy:
     matrix:
       ${{ insert }}: ${{ parameters.matrix }}
@@ -45,7 +49,7 @@ jobs:
         -w /io
         --detach
         --name skcontainer
-        -e DISTRIB=debian-32
+        -e DISTRIB=$DISTRIB
         -e TEST_DIR=/temp_dir
         -e JUNITXML=$JUNITXML
         -e VIRTUALENV=testvenv
@@ -63,7 +67,8 @@ jobs:
         -e OMP_NUM_THREADS=$OMP_NUM_THREADS
         -e OPENBLAS_NUM_THREADS=$OPENBLAS_NUM_THREADS
         -e SKLEARN_SKIP_NETWORK_TESTS=$SKLEARN_SKIP_NETWORK_TESTS
-        i386/debian:10.9
+        -e BLAS=$BLAS
+        $DOCKER_CONTAINER
         sleep 1000000
       displayName: 'Start container'
     - script: >

From 942b996c1e0acfccdff5153b61480c235847801e Mon Sep 17 00:00:00 2001
From: jalexand3r <java568@gmail.com>
Date: Mon, 13 Sep 2021 03:25:46 -0500
Subject: [PATCH 16/49] DOC fix typo parameter name in example Isomap (#21011)

Co-authored-by: Johnathan Alexander <jalexander86@gatech.edu>
---
 doc/modules/neighbors.rst   | 2 +-
 sklearn/neighbors/_graph.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index f394f011af11a..03842dcc704cb 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -567,7 +567,7 @@ using the caching properties of the scikit-learn pipeline:
     >>> from sklearn.pipeline import make_pipeline
     >>> estimator = make_pipeline(
     ...     KNeighborsTransformer(n_neighbors=5, mode='distance'),
-    ...     Isomap(neighbors_algorithm='precomputed'),
+    ...     Isomap(metric='precomputed'),
     ...     memory='/path/to/cache')
 
 Second, precomputing the graph can give finer control on the nearest neighbors
diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py
index ee079e2e167a5..b16897a216ff0 100644
--- a/sklearn/neighbors/_graph.py
+++ b/sklearn/neighbors/_graph.py
@@ -337,7 +337,7 @@ class KNeighborsTransformer(KNeighborsMixin, TransformerMixin, NeighborsBase):
     >>> from sklearn.pipeline import make_pipeline
     >>> estimator = make_pipeline(
     ...     KNeighborsTransformer(n_neighbors=5, mode='distance'),
-    ...     Isomap(neighbors_algorithm='precomputed'))
+    ...     Isomap(metric='precomputed'))
     """
 
     def __init__(

From 73dec986e99977194b74ec76f0979ebb2ee6f309 Mon Sep 17 00:00:00 2001
From: genvalen <13563897+genvalen@users.noreply.github.com>
Date: Mon, 13 Sep 2021 04:55:21 -0400
Subject: [PATCH 17/49] DOC Ensures that KBinsDiscretizer passes numpydoc
 validation (#21016)

---
 maint_tools/test_docstrings.py           | 1 -
 sklearn/preprocessing/_discretization.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 7cb0b5e923071..a4d19c16dd9be 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -16,7 +16,6 @@
     "HuberRegressor",
     "Isomap",
     "IterativeImputer",
-    "KBinsDiscretizer",
     "KNNImputer",
     "LabelPropagation",
     "LabelSpreading",
diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 24e9df1050d6f..3eafa09519f08 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -133,7 +133,6 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
            [-0.5,  2.5, -2.5, -0.5],
            [ 0.5,  3.5, -1.5,  0.5],
            [ 0.5,  3.5, -1.5,  1.5]])
-
     """
 
     def __init__(self, n_bins=5, *, encode="onehot", strategy="quantile", dtype=None):
@@ -157,7 +156,8 @@ def fit(self, X, y=None):
 
         Returns
         -------
-        self
+        self : object
+            Returns the instance itself.
         """
         X = self._validate_data(X, dtype="numeric")
 

From 8b9a7f9492617ba2e12504f9dfdad3331d134cd1 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Mon, 13 Sep 2021 05:56:16 -0300
Subject: [PATCH 18/49] DOC Ensures that OPTICS passes numpydoc validation
 (#21017)

---
 maint_tools/test_docstrings.py |  1 -
 sklearn/cluster/_optics.py     | 10 +++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index a4d19c16dd9be..4cd7d20982bef 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -33,7 +33,6 @@
     "NearestCentroid",
     "NeighborhoodComponentsAnalysis",
     "Normalizer",
-    "OPTICS",
     "OneVsOneClassifier",
     "OneVsRestClassifier",
     "OrdinalEncoder",
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index 25190b2c08a25..45a080839c6ae 100755
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -268,13 +268,13 @@ def fit(self, X, y=None):
             A feature array, or array of distances between samples if
             metric='precomputed'.
 
-        y : ignored
-            Ignored.
+        y : Ignored
+            Not used, present for API consistency by convention.
 
         Returns
         -------
-        self : instance of OPTICS
-            The instance.
+        self : object
+            Returns a fitted instance of self.
         """
         dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float
         if dtype == bool and X.dtype != bool:
@@ -360,7 +360,7 @@ def _validate_size(size, n_samples, param_name):
 
 # OPTICS helper functions
 def _compute_core_distances_(X, neighbors, min_samples, working_memory):
-    """Compute the k-th nearest neighbor of each sample
+    """Compute the k-th nearest neighbor of each sample.
 
     Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]
     but with more memory efficiency.

From 83ce10d2bbcc649a56428f641a8766549f0648a5 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Mon, 13 Sep 2021 06:08:31 -0300
Subject: [PATCH 19/49] DOC Ensures that Isomap passes numpydoc validation
 (#21018)

---
 maint_tools/test_docstrings.py |  1 -
 sklearn/manifold/_isomap.py    | 41 ++++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 4cd7d20982bef..fea74c9572259 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -14,7 +14,6 @@
     "HalvingRandomSearchCV",
     "HashingVectorizer",
     "HuberRegressor",
-    "Isomap",
     "IterativeImputer",
     "KNNImputer",
     "LabelPropagation",
diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py
index 4450330794da0..db0639455d18f 100644
--- a/sklearn/manifold/_isomap.py
+++ b/sklearn/manifold/_isomap.py
@@ -19,7 +19,7 @@
 
 
 class Isomap(TransformerMixin, BaseEstimator):
-    """Isomap Embedding
+    """Isomap Embedding.
 
     Non-linear dimensionality reduction through Isometric Mapping
 
@@ -28,10 +28,10 @@ class Isomap(TransformerMixin, BaseEstimator):
     Parameters
     ----------
     n_neighbors : int, default=5
-        number of neighbors to consider for each point.
+        Number of neighbors to consider for each point.
 
     n_components : int, default=2
-        number of coordinates for the manifold
+        Number of coordinates for the manifold.
 
     eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'
         'auto' : Attempt to choose the most efficient solver
@@ -71,7 +71,7 @@ class Isomap(TransformerMixin, BaseEstimator):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    metric : string, or callable, default="minkowski"
+    metric : str, or callable, default="minkowski"
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string or callable, it must be one of
         the options allowed by :func:`sklearn.metrics.pairwise_distances` for
@@ -121,6 +121,23 @@ class Isomap(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    sklearn.decomposition.PCA : Principal component analysis that is a linear
+        dimensionality reduction method.
+    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using
+        kernels and PCA.
+    MDS : Manifold learning using multidimensional scaling.
+    TSNE : T-distributed Stochastic Neighbor Embedding.
+    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.
+    SpectralEmbedding : Spectral embedding for non-linear dimensionality.
+
+    References
+    ----------
+
+    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
+           framework for nonlinear dimensionality reduction. Science 290 (5500)
+
     Examples
     --------
     >>> from sklearn.datasets import load_digits
@@ -132,12 +149,6 @@ class Isomap(TransformerMixin, BaseEstimator):
     >>> X_transformed = embedding.fit_transform(X[:100])
     >>> X_transformed.shape
     (100, 2)
-
-    References
-    ----------
-
-    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
-           framework for nonlinear dimensionality reduction. Science 290 (5500)
     """
 
     def __init__(
@@ -251,6 +262,7 @@ def reconstruction_error(self):
         Returns
         -------
         reconstruction_error : float
+            Reconstruction error.
 
         Notes
         -----
@@ -270,7 +282,7 @@ def reconstruction_error(self):
         return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]
 
     def fit(self, X, y=None):
-        """Compute the embedding vectors for data X
+        """Compute the embedding vectors for data X.
 
         Parameters
         ----------
@@ -280,10 +292,12 @@ def fit(self, X, y=None):
             object.
 
         y : Ignored
+            Not used, present for API consistency by convention.
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Returns a fitted instance of self.
         """
         self._fit_transform(X)
         return self
@@ -298,10 +312,12 @@ def fit_transform(self, X, y=None):
             and `n_features` is the number of features.
 
         y : Ignored
+            Not used, present for API consistency by convention.
 
         Returns
         -------
         X_new : array-like, shape (n_samples, n_components)
+            X transformed in the new space.
         """
         self._fit_transform(X)
         return self.embedding_
@@ -327,6 +343,7 @@ def transform(self, X):
         Returns
         -------
         X_new : array-like, shape (n_queries, n_components)
+            X transformed in the new space.
         """
         check_is_fitted(self)
         distances, indices = self.nbrs_.kneighbors(X, return_distance=True)

From f3cd2a5745dd9bfcae532af80d365a08c437e2e9 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Mon, 13 Sep 2021 06:12:12 -0300
Subject: [PATCH 20/49] DOC Ensures that RadiusNeighborsTransformer passes
 numpydoc validation (#21021)

---
 maint_tools/test_docstrings.py |  1 -
 sklearn/neighbors/_graph.py    | 25 ++++++++++++++++++-------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index fea74c9572259..6765e2e1a6832 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -50,7 +50,6 @@
     "QuantileRegressor",
     "QuantileTransformer",
     "RANSACRegressor",
-    "RadiusNeighborsTransformer",
     "RandomizedSearchCV",
     "RobustScaler",
     "SGDOneClassSVM",
diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py
index b16897a216ff0..77751d65ba9b5 100644
--- a/sklearn/neighbors/_graph.py
+++ b/sklearn/neighbors/_graph.py
@@ -437,10 +437,10 @@ def _more_tags(self):
 
 
 class RadiusNeighborsTransformer(RadiusNeighborsMixin, TransformerMixin, NeighborsBase):
-    """Transform X into a (weighted) graph of neighbors nearer than a radius
+    """Transform X into a (weighted) graph of neighbors nearer than a radius.
 
     The transformed data is a sparse graph as returned by
-    radius_neighbors_graph.
+    `radius_neighbors_graph`.
 
     Read more in the :ref:`User Guide <neighbors_transformer>`.
 
@@ -453,7 +453,7 @@ class RadiusNeighborsTransformer(RadiusNeighborsMixin, TransformerMixin, Neighbo
         matrix with ones and zeros, and 'distance' will return the distances
         between neighbors according to the given metric.
 
-    radius : float, default=1.
+    radius : float, default=1.0
         Radius of neighborhood in the transformed sparse graph.
 
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
@@ -475,7 +475,7 @@ class RadiusNeighborsTransformer(RadiusNeighborsMixin, TransformerMixin, Neighbo
         nature of the problem.
 
     metric : str or callable, default='minkowski'
-        metric to use for distance computation. Any metric from scikit-learn
+        Metric to use for distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
 
         If metric is a callable function, it is called on each
@@ -540,6 +540,13 @@ class RadiusNeighborsTransformer(RadiusNeighborsMixin, TransformerMixin, Neighbo
     n_samples_fit_ : int
         Number of samples in the fitted data.
 
+    See Also
+    --------
+    kneighbors_graph : Compute the weighted graph of k-neighbors for
+        points in X.
+    KNeighborsTransformer : Transform X into a weighted graph of k
+        nearest neighbors.
+
     Examples
     --------
     >>> from sklearn.cluster import DBSCAN
@@ -583,6 +590,9 @@ def fit(self, X, y=None):
                 (n_samples, n_samples) if metric='precomputed'
             Training data.
 
+        y : Ignored
+            Not used, present for API consistency by convention.
+
         Returns
         -------
         self : RadiusNeighborsTransformer
@@ -591,12 +601,12 @@ def fit(self, X, y=None):
         return self._fit(X)
 
     def transform(self, X):
-        """Computes the (weighted) graph of Neighbors for points in X
+        """Compute the (weighted) graph of Neighbors for points in X.
 
         Parameters
         ----------
         X : array-like of shape (n_samples_transform, n_features)
-            Sample data
+            Sample data.
 
         Returns
         -------
@@ -620,7 +630,8 @@ def fit_transform(self, X, y=None):
         X : array-like of shape (n_samples, n_features)
             Training set.
 
-        y : ignored
+        y : Ignored
+            Not used, present for API consistency by convention.
 
         Returns
         -------

From ef0cd4bf992a1e2d6727c63371009ae3001332fb Mon Sep 17 00:00:00 2001
From: EricEllwanger <52634304+EricEllwanger@users.noreply.github.com>
Date: Mon, 13 Sep 2021 05:12:51 -0400
Subject: [PATCH 21/49] DOC - Ensures that GridSearchCV passes numpydoc
 validation (#21003)

---
 maint_tools/test_docstrings.py     |  1 -
 sklearn/model_selection/_search.py | 91 ++++++++++++++++++++++--------
 2 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 6765e2e1a6832..727646f60fa3a 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "GridSearchCV",
     "HalvingGridSearchCV",
     "HalvingRandomSearchCV",
     "HashingVectorizer",
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 0ff04e94298a6..746ec5e9a0813 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -432,7 +432,7 @@ def _pairwise(self):
         return getattr(self.estimator, "_pairwise", False)
 
     def score(self, X, y=None):
-        """Returns the score on the given data, if the estimator has been refit.
+        """Return the score on the given data, if the estimator has been refit.
 
         This uses the score defined by ``scoring`` where provided, and the
         ``best_estimator_.score`` method otherwise.
@@ -451,6 +451,8 @@ def score(self, X, y=None):
         Returns
         -------
         score : float
+            The score defined by ``scoring`` if provided, and the
+            ``best_estimator_.score`` method otherwise.
         """
         _check_refit(self, "score")
         check_is_fitted(self)
@@ -491,6 +493,7 @@ def score_samples(self, X):
         Returns
         -------
         y_score : ndarray of shape (n_samples,)
+            The ``best_estimator_.score_samples`` method.
         """
         check_is_fitted(self)
         return self.best_estimator_.score_samples(X)
@@ -508,6 +511,11 @@ def predict(self, X):
             Must fulfill the input assumptions of the
             underlying estimator.
 
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            The predicted labels or values for `X` based on the estimator with
+            the best found parameters.
         """
         check_is_fitted(self)
         return self.best_estimator_.predict(X)
@@ -525,6 +533,12 @@ def predict_proba(self, X):
             Must fulfill the input assumptions of the
             underlying estimator.
 
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes)
+            Predicted class probabilities for `X` based on the estimator with
+            the best found parameters. The order of the classes corresponds
+            to that in the fitted attribute :term:`classes_`.
         """
         check_is_fitted(self)
         return self.best_estimator_.predict_proba(X)
@@ -542,6 +556,12 @@ def predict_log_proba(self, X):
             Must fulfill the input assumptions of the
             underlying estimator.
 
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,) or (n_samples, n_classes)
+            Predicted class log-probabilities for `X` based on the estimator
+            with the best found parameters. The order of the classes
+            corresponds to that in the fitted attribute :term:`classes_`.
         """
         check_is_fitted(self)
         return self.best_estimator_.predict_log_proba(X)
@@ -559,6 +579,12 @@ def decision_function(self, X):
             Must fulfill the input assumptions of the
             underlying estimator.
 
+        Returns
+        -------
+        y_score : ndarray of shape (n_samples,) or (n_samples, n_classes) \
+                or (n_samples, n_classes * (n_classes-1) / 2)
+            Result of the decision function for `X` based on the estimator with
+            the best found parameters.
         """
         check_is_fitted(self)
         return self.best_estimator_.decision_function(X)
@@ -576,6 +602,11 @@ def transform(self, X):
             Must fulfill the input assumptions of the
             underlying estimator.
 
+        Returns
+        -------
+        Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            `X` transformed in the new space based on the estimator with
+            the best found parameters.
         """
         check_is_fitted(self)
         return self.best_estimator_.transform(X)
@@ -593,12 +624,21 @@ def inverse_transform(self, Xt):
             Must fulfill the input assumptions of the
             underlying estimator.
 
+        Returns
+        -------
+        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            Result of the `inverse_transform` function for `Xt` based on the
+            estimator with the best found parameters.
         """
         check_is_fitted(self)
         return self.best_estimator_.inverse_transform(Xt)
 
     @property
     def n_features_in_(self):
+        """Number of features seen during :term:`fit`.
+
+        Only available when `refit=True`.
+        """
         # For consistency with other estimators we raise a AttributeError so
         # that hasattr() fails if the search estimator isn't fitted.
         try:
@@ -614,6 +654,10 @@ def n_features_in_(self):
 
     @property
     def classes_(self):
+        """Class labels.
+
+        Only available when `refit=True` and the estimator is a classifier.
+        """
         _estimator_has("classes_")(self)
         return self.best_estimator_.classes_
 
@@ -733,7 +777,12 @@ def fit(self, X, y=None, *, groups=None, **fit_params):
             instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
 
         **fit_params : dict of str -> object
-            Parameters passed to the ``fit`` method of the estimator
+            Parameters passed to the ``fit`` method of the estimator.
+
+        Returns
+        -------
+        self : object
+            Instance of fitted estimator.
         """
         estimator = self.estimator
         refit_metric = "score"
@@ -1002,7 +1051,7 @@ class GridSearchCV(BaseSearchCV):
 
     Parameters
     ----------
-    estimator : estimator object.
+    estimator : estimator object
         This is assumed to implement the scikit-learn estimator interface.
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
@@ -1137,25 +1186,6 @@ class GridSearchCV(BaseSearchCV):
         .. versionchanged:: 0.21
             Default value was changed from ``True`` to ``False``
 
-
-    Examples
-    --------
-    >>> from sklearn import svm, datasets
-    >>> from sklearn.model_selection import GridSearchCV
-    >>> iris = datasets.load_iris()
-    >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
-    >>> svc = svm.SVC()
-    >>> clf = GridSearchCV(svc, parameters)
-    >>> clf.fit(iris.data, iris.target)
-    GridSearchCV(estimator=SVC(),
-                 param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
-    >>> sorted(clf.cv_results_.keys())
-    ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
-     'param_C', 'param_kernel', 'params',...
-     'rank_test_score', 'split0_test_score',...
-     'split2_test_score', ...
-     'std_fit_time', 'std_score_time', 'std_test_score']
-
     Attributes
     ----------
     cv_results_ : dict of numpy (masked) ndarrays
@@ -1308,6 +1338,23 @@ class GridSearchCV(BaseSearchCV):
     sklearn.metrics.make_scorer : Make a scorer from a performance metric or
         loss function.
 
+    Examples
+    --------
+    >>> from sklearn import svm, datasets
+    >>> from sklearn.model_selection import GridSearchCV
+    >>> iris = datasets.load_iris()
+    >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
+    >>> svc = svm.SVC()
+    >>> clf = GridSearchCV(svc, parameters)
+    >>> clf.fit(iris.data, iris.target)
+    GridSearchCV(estimator=SVC(),
+                 param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
+    >>> sorted(clf.cv_results_.keys())
+    ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
+     'param_C', 'param_kernel', 'params',...
+     'rank_test_score', 'split0_test_score',...
+     'split2_test_score', ...
+     'std_fit_time', 'std_score_time', 'std_test_score']
     """
 
     _required_parameters = ["estimator", "param_grid"]

From 89e81e258b820ef7274bd34f194bfb3f7478b85a Mon Sep 17 00:00:00 2001
From: Bharat Raghunathan <bharatraghunthan9767@gmail.com>
Date: Mon, 13 Sep 2021 14:44:16 +0530
Subject: [PATCH 22/49] DOC Ensures that OneVsOneClassifier passes numpydoc
 validation (#21013)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/multiclass.py          | 23 +++++++++++++++--------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 727646f60fa3a..2a22ba8ad16ce 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -31,7 +31,6 @@
     "NearestCentroid",
     "NeighborhoodComponentsAnalysis",
     "Normalizer",
-    "OneVsOneClassifier",
     "OneVsRestClassifier",
     "OrdinalEncoder",
     "OrthogonalMatchingPursuit",
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index b3a64716c0a7f..d0cc62eeea504 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -617,7 +617,7 @@ def _partial_fit_ovo_binary(estimator, X, y, i, j):
 
 
 class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
-    """One-vs-one multiclass strategy
+    """One-vs-one multiclass strategy.
 
     This strategy consists in fitting one classifier per class pair.
     At prediction time, the class which received the most votes is selected.
@@ -654,7 +654,7 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
         Array containing labels.
 
     n_classes_ : int
-        Number of classes
+        Number of classes.
 
     pairwise_indices_ : list, length = ``len(estimators_)``, or ``None``
         Indices of samples used when training the estimators.
@@ -677,6 +677,10 @@ class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    OneVsRestClassifier : One-vs-all multiclass strategy.
+
     Examples
     --------
     >>> from sklearn.datasets import load_iris
@@ -709,7 +713,8 @@ def fit(self, X, y):
 
         Returns
         -------
-        self
+        self : object
+            The fitted underlying estimator.
         """
         # We need to validate the data because we do a safe_indexing later.
         X, y = self._validate_data(
@@ -746,13 +751,12 @@ def fit(self, X, y):
 
     @available_if(_estimators_has("partial_fit"))
     def partial_fit(self, X, y, classes=None):
-        """Partially fit underlying estimators
+        """Partially fit underlying estimators.
 
         Should be used when memory is inefficient to train all data. Chunks
         of data can be passed in several iteration, where the first call
         should have an array of all target variables.
 
-
         Parameters
         ----------
         X : (sparse) array-like of shape (n_samples, n_features)
@@ -770,7 +774,8 @@ def partial_fit(self, X, y, classes=None):
 
         Returns
         -------
-        self
+        self : object
+            The partially fitted underlying estimator.
         """
         first_call = _check_partial_fit_first_call(self, classes)
         if first_call:
@@ -842,11 +847,12 @@ def decision_function(self, X):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
+            Input data.
 
         Returns
         -------
-        Y : array-like of shape (n_samples, n_classes) or (n_samples,) for \
-            binary classification.
+        Y : array-like of shape (n_samples, n_classes) or (n_samples,)
+            Result of calling `decision_function` on the final estimator.
 
             .. versionchanged:: 0.19
                 output shape changed to ``(n_samples,)`` to conform to
@@ -879,6 +885,7 @@ def decision_function(self, X):
 
     @property
     def n_classes_(self):
+        """Number of classes."""
         return len(self.classes_)
 
     # TODO: Remove in 1.1

From c11f9c0047b66e8fa804ba9b637af0a9dc94b655 Mon Sep 17 00:00:00 2001
From: Bharat Raghunathan <bharatraghunthan9767@gmail.com>
Date: Mon, 13 Sep 2021 14:44:51 +0530
Subject: [PATCH 23/49] DOC Ensures that OneVsRestClassifier passes numpydoc
 validation (#21014)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/multiclass.py          | 36 +++++++++++++++++++++-------------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 2a22ba8ad16ce..1e85bc9da26ef 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -31,7 +31,6 @@
     "NearestCentroid",
     "NeighborhoodComponentsAnalysis",
     "Normalizer",
-    "OneVsRestClassifier",
     "OrdinalEncoder",
     "OrthogonalMatchingPursuit",
     "OrthogonalMatchingPursuitCV",
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index d0cc62eeea504..c5d70b284771d 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -275,6 +275,13 @@ class OneVsRestClassifier(
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    MultiOutputClassifier : Alternate way of extending an estimator for
+        multilabel classification.
+    sklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables
+        to binary indicator matrix.
+
     Examples
     --------
     >>> import numpy as np
@@ -292,13 +299,6 @@ class OneVsRestClassifier(
     >>> clf = OneVsRestClassifier(SVC()).fit(X, y)
     >>> clf.predict([[-19, -20], [9, 9], [-5, 5]])
     array([2, 0, 1])
-
-    See Also
-    --------
-    sklearn.multioutput.MultiOutputClassifier : Alternate way of extending an
-        estimator for multilabel classification.
-    sklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables
-        to binary indicator matrix.
     """
 
     def __init__(self, estimator, *, n_jobs=None):
@@ -319,7 +319,8 @@ def fit(self, X, y):
 
         Returns
         -------
-        self
+        self : object
+            Instance of fitted estimator.
         """
         # A sparse LabelBinarizer, with sparse_output=True, has been shown to
         # outperform or match a dense label binarizer in all cases and has also
@@ -355,7 +356,7 @@ def fit(self, X, y):
 
     @available_if(_estimators_has("partial_fit"))
     def partial_fit(self, X, y, classes=None):
-        """Partially fit underlying estimators
+        """Partially fit underlying estimators.
 
         Should be used when memory is inefficient to train all data.
         Chunks of data can be passed in several iteration.
@@ -378,7 +379,8 @@ def partial_fit(self, X, y, classes=None):
 
         Returns
         -------
-        self
+        self : object
+            Instance of partially fitted estimator.
         """
         if _check_partial_fit_first_call(self, classes):
             if not hasattr(self.estimator, "partial_fit"):
@@ -477,6 +479,7 @@ def predict_proba(self, X):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
+            Input data.
 
         Returns
         -------
@@ -501,18 +504,22 @@ def predict_proba(self, X):
 
     @available_if(_estimators_has("decision_function"))
     def decision_function(self, X):
-        """Returns the distance of each sample from the decision boundary for
-        each class. This can only be used with estimators which implement the
-        decision_function method.
+        """Decision function for the OneVsRestClassifier.
+
+        Return the distance of each sample from the decision boundary for each
+        class. This can only be used with estimators which implement the
+        `decision_function` method.
 
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
+            Input data.
 
         Returns
         -------
         T : array-like of shape (n_samples, n_classes) or (n_samples,) for \
             binary classification.
+            Result of calling `decision_function` on the final estimator.
 
             .. versionchanged:: 0.19
                 output shape changed to ``(n_samples,)`` to conform to
@@ -527,11 +534,12 @@ def decision_function(self, X):
 
     @property
     def multilabel_(self):
-        """Whether this is a multilabel classifier"""
+        """Whether this is a multilabel classifier."""
         return self.label_binarizer_.y_type_.startswith("multilabel")
 
     @property
     def n_classes_(self):
+        """Number of classes."""
         return len(self.classes_)
 
     # TODO: Remove coef_ attribute in 1.1

From 269f3f10ac111a949b7cdaec93230f36ba6f9f7e Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Mon, 13 Sep 2021 11:52:28 +0200
Subject: [PATCH 24/49] DOC add contributors to whats_new 1.0 and more fixes
 (#21009)

---
 doc/whats_new/v1.0.rst | 182 +++++++++++++++++++++++++++--------------
 1 file changed, 119 insertions(+), 63 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 917eb3edec5ec..efe424c8b9171 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -30,8 +30,6 @@ parameter is used as positional, a `TypeError` is now raised.
 <https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep009/proposal.html>`_
 for more details.
 
-Put the changes in their relevant module.
-
 Changed models
 --------------
 
@@ -135,13 +133,14 @@ Changelog
   1.2. :pr:`20165` by `Thomas Fan`_.
 
 - |API| :term:`get_feature_names_out` has been added to the transformer API
-  to get the names of the output features. :pr:`18444` by `Thomas Fan`_.
+  to get the names of the output features. :term:`get_feature_names` has in
+  turn been deprecated. :pr:`18444` by `Thomas Fan`_.
 
 - |API| All estimators store `feature_names_in_` when fitted on pandas Dataframes.
-  These feature names are compared to names seen in `non-fit` methods,
-  `i.e.` `transform` and will raise a `FutureWarning` if they are not consistent.
-  These `FutureWarning`s will become `ValueError`s in 1.2.
-  :pr:`18010` by `Thomas Fan`_.
+  These feature names are compared to names seen in non-`fit` methods, e.g.
+  `transform` and will raise a `FutureWarning` if they are not consistent.
+  These ``FutureWarning`` s will become ``ValueError`` s in 1.2. :pr:`18010` by
+  `Thomas Fan`_.
 
 :mod:`sklearn.base`
 ...................
@@ -154,7 +153,7 @@ Changelog
 - |Feature| :func:`calibration.CalibrationDisplay` added to plot
   calibration curves. :pr:`17443` by :user:`Lucy Liu <lucyleeow>`.
 
-- |Fix| The predict and predict_proba methods of
+- |Fix| The ``predict`` and ``predict_proba`` methods of
   :class:`calibration.CalibratedClassifierCV` can now properly be used on
   prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`.
 
@@ -166,10 +165,10 @@ Changelog
 :mod:`sklearn.cluster`
 ......................
 
-- |Efficiency| The "k-means++" initialization of :class:`cluster.KMeans` and
-  :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
-  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and
-  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Efficiency| The ``"k-means++"`` initialization of :class:`cluster.KMeans`
+  and :class:`cluster.MiniBatchKMeans` is now faster, especially in multicore
+  settings. :pr:`19002` by :user:`Jon Crall <Erotemic>` and :user:`Jérémie du
+  Boisberranger <jeremiedbb>`.
 
 - |Efficiency| :class:`cluster.KMeans` with `algorithm='elkan'` is now faster
   in multicore settings. :pr:`19052` by
@@ -216,11 +215,11 @@ Changelog
   deprecated and will be removed in 1.2. :pr:`19297` by `Thomas Fan`_.
 
 - |API| the default value for the `batch_size` parameter of
-  :class:`MiniBatchKMeans` was changed from 100 to 1024 due to efficiency
-  reasons. The `n_iter_` attribute of :class:`MiniBatchKMeans` now reports the
-  number of started epochs and the `n_steps_` attribute reports the number of
-  mini batches processed. :pr:`17622`
-  by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  :class:`cluster.MiniBatchKMeans` was changed from 100 to 1024 due to
+  efficiency reasons. The `n_iter_` attribute of
+  :class:`cluster.MiniBatchKMeans` now reports the number of started epochs and
+  the `n_steps_` attribute reports the number of mini batches processed.
+  :pr:`17622` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |API| :func:`cluster.spectral_clustering` raises an improved error when passed
   a `np.matrix`. :pr:`20560` by `Thomas Fan`_.
@@ -228,10 +227,6 @@ Changelog
 :mod:`sklearn.compose`
 ......................
 
-- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
-  This flag controls the prefixing of feature names out in
-  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
-
 - |Enhancement| :class:`compose.ColumnTransformer` now records the output
   of each transformer in `output_indices_`. :pr:`18393` by
   :user:`Luca Bittarello <lbittarello>`.
@@ -247,20 +242,25 @@ Changelog
   :pr:`19244` by :user:`Ricardo <ricardojnf>`.
 
 - |FIX| :meth:`compose.ColumnTransformer.get_feature_names` supports
-  non-string feature names returned by any of its transformers.
-  :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>` and
-  :user:`Alonso Silva Allende <alonsosilvaallende>`.
+  non-string feature names returned by any of its transformers. However, note
+  that ``get_feature_names`` is deprecated, use ``get_feature_names_out``
+  instead. :pr:`18459` by :user:`Albert Villanova del Moral <albertvillanova>`
+  and :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
 - |Fix| :class:`compose.TransformedTargetRegressor` now takes nD targets with
   an adequate transformer.
   :pr:`18898` by :user:`Oras Phongpanagnam <panangam>`.
 
+- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
+  This flag controls the prefixing of feature names out in
+  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
+
 :mod:`sklearn.covariance`
 .........................
 
 - |Fix| Adds arrays check to :func:`covariance.ledoit_wolf` and
-  :func:`covariance.ledoit_wolf_shrinkage`.
-  :pr:`20416` by :user:`Hugo Defois <defoishugo>`.
+  :func:`covariance.ledoit_wolf_shrinkage`. :pr:`20416` by :user:`Hugo Defois
+  <defoishugo>`.
 
 - |API| Deprecates the following keys in `cv_results_`: `'mean_score'`,
   `'std_score'`, and `'split(k)_score'` in favor of `'mean_test_score'`
@@ -308,28 +308,31 @@ Changelog
 - |Fix| Fixes incorrect multiple data-conversion warnings when clustering
   boolean data. :pr:`19046` by :user:`Surya Prakash <jdsurya>`.
 
-- |Fix| Fixed :func:`dict_learning`, used by :class:`DictionaryLearning`, to
-  ensure determinism of the output. Achieved by flipping signs of the SVD
-  output which is used to initialize the code.
-  :pr:`18433` by :user:`Bruno Charron <brcharron>`.
+- |Fix| Fixed :func:`dict_learning`, used by
+  :class:`decomposition.DictionaryLearning`, to ensure determinism of the
+  output. Achieved by flipping signs of the SVD output which is used to
+  initialize the code. :pr:`18433` by :user:`Bruno Charron <brcharron>`.
 
-- |Fix| Fixed a bug in :class:`MiniBatchDictionaryLearning`,
-  :class:`MiniBatchSparsePCA` and :func:`dict_learning_online` where the
-  update of the dictionary was incorrect. :pr:`19198` by
-  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Fix| Fixed a bug in :class:`decomposition.MiniBatchDictionaryLearning`,
+  :class:`decomposition.MiniBatchSparsePCA` and
+  :func:`decomposition.dict_learning_online` where the update of the dictionary
+  was incorrect. :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| Fixed a bug in :class:`DictionaryLearning`, :class:`SparsePCA`,
-  :class:`MiniBatchDictionaryLearning`, :class:`MiniBatchSparsePCA`,
-  :func:`dict_learning` and :func:`dict_learning_online` where the restart of
-  unused atoms during the dictionary update was not working as expected.
-  :pr:`19198` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+- |Fix| Fixed a bug in :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.SparsePCA`,
+  :class:`decomposition.MiniBatchDictionaryLearning`,
+  :class:`decomposition.MiniBatchSparsePCA`,
+  :func:`decomposition.dict_learning` and
+  :func:`decomposition.dict_learning_online` where the restart of unused atoms
+  during the dictionary update was not working as expected. :pr:`19198` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |API| In :class:`decomposition.DictionaryLearning`,
   :class:`decomposition.MiniBatchDictionaryLearning`,
-  :func:`dict_learning` and :func:`dict_learning_online`,
-  `transform_alpha` will be equal to `alpha` instead of 1.0 by default
-  starting from version 1.2
-  :pr:`19159` by :user:`Benoît Malézieux <bmalezieux>`.
+  :func:`decomposition.dict_learning` and
+  :func:`decomposition.dict_learning_online`, `transform_alpha` will be equal
+  to `alpha` instead of 1.0 by default starting from version 1.2 :pr:`19159` by
+  :user:`Benoît Malézieux <bmalezieux>`.
 
 - |API| Rename variable names in :class:`KernelPCA` to improve
   readability. `lambdas_` and `alphas_` are renamed to `eigenvalues_`
@@ -368,9 +371,9 @@ Changelog
   :class:`ensemble.StackingClassifier` and :class:`ensemble.StackingRegressor`.
   :pr:`19564` by `Thomas Fan`_.
 
-- |Enhancement| Documented and tested support of the Poisson criterion for
-  :class:`ensemble.RandomForestRegressor`. :pr:`19836` by
-  :user:`Brian Sun <bsun94>`.
+- |Enhancement| Added Poisson criterion to
+  :class:`ensemble.RandomForestRegressor`. :pr:`19836` by :user:`Brian Sun
+  <bsun94>`.
 
 - |Fix| Do not allow to compute out-of-bag (OOB) score in
   :class:`ensemble.RandomForestClassifier` and
@@ -384,7 +387,7 @@ Changelog
   to avoid underflows.
   :pr:`10096` by :user:`Fenil Suchak <fenilsuchak>`.
 
-- |Fix| Fixed the range of the argument max_samples to be (0.0, 1.0]
+- |Fix| Fixed the range of the argument ``max_samples`` to be ``(0.0, 1.0]``
   in :class:`ensemble.RandomForestClassifier`,
   :class:`ensemble.RandomForestRegressor`, where `max_samples=1.0` is
   interpreted as using all `n_samples` for bootstrapping. :pr:`20159` by
@@ -435,10 +438,10 @@ Changelog
   :user:`Gil Rutter <g-rutter>`, and :user:`Adrin Jalali <adrinjalali>`.
 
 - |FIX| Fix a bug in :func:`isotonic.isotonic_regression` where the
-  `sample_weight` passed by a user were overwritten during the fit.
+  `sample_weight` passed by a user were overwritten during ``fit``.
   :pr:`20515` by :user:`Carsten Allefeld <allefeld>`.
 
-- |Fix| Change :func:`feature_selection.SequentialFeatureSelection` to
+- |Fix| Change :func:`feature_selection.SequentialFeatureSelector` to
   allow for unsupervised modelling so that the `fit` signature need not
   do any `y` validation and allow for `y=None`.
   :pr:`19568` by :user:`Shyam Desai <ShyamDesai>`.
@@ -456,20 +459,19 @@ Changelog
 .........................
 
 - |Enhancement| Add `max_samples` parameter in
-  :func:`inspection._permutation_importance`. It enables to draw a subset of
-  the samples to compute the permutation importance. This is useful to
-  keep the method tractable when evaluating feature importance on
-  large datasets.
+  :func:`inspection.permutation_importance`. It enables to draw a subset of the
+  samples to compute the permutation importance. This is useful to keep the
+  method tractable when evaluating feature importance on large datasets.
   :pr:`20431` by :user:`Oliver Pfaffel <o1iv3r>`.
 
 - |Enhancement| Add kwargs to format ICE and PD lines separately in partial
-  dependence plots :func:`~sklearn.inspection.plot_partial_dependence` and
-  :meth:`~sklearn.inspection.PartialDependenceDisplay.plot`.
-  :pr:`19428` by :user:`Mehdi Hamoumi <mhham>`.
+  dependence plots :func:`inspection.plot_partial_dependence` and
+  :meth:`inspection.PartialDependenceDisplay.plot`. :pr:`19428` by :user:`Mehdi
+  Hamoumi <mhham>`.
 
 - |Fix| Allow multiple scorers input to
-  :func:`~sklearn.inspection.permutation_importance`.
-  :pr:`19411` by :user:`Simona Maggio <simonamaggio>`.
+  :func:`inspection.permutation_importance`. :pr:`19411` by :user:`Simona
+  Maggio <simonamaggio>`.
 
 - |API| :class:`inspection.PartialDependenceDisplay` exposes a class method:
   :func:`~inspection.PartialDependenceDisplay.from_estimator`.
@@ -479,10 +481,10 @@ Changelog
 :mod:`sklearn.kernel_approximation`
 ...................................
 
-- |Fix| Fix a bug in :class:`sklearn.kernel_approximation.Nystroem`
-  where the attribute `component_indices_` did not correspond
-  to the subset of samples indices used to generate the approximated kernel.
-  :pr:`20554` by :user:`Xiangyin Kong <kxytim>`.
+- |Fix| Fix a bug in :class:`kernel_approximation.Nystroem`
+  where the attribute `component_indices_` did not correspond to the subset of
+  sample indices used to generate the approximated kernel. :pr:`20554` by
+  :user:`Xiangyin Kong <kxytim>`.
 
 :mod:`sklearn.linear_model`
 ...........................
@@ -941,4 +943,58 @@ Code and Documentation Contributors
 Thanks to everyone who has contributed to the maintenance and improvement of
 the project since version 0.24, including:
 
-TODO: update at the time of the release.
+Abdulelah S. Al Mesfer, Abhinav Gupta, Adam J. Stewart, Adam Li, Adam Midvidy,
+Adrian Garcia Badaracco, Adrian Sadłocha, Adrin Jalali, Agamemnon Krasoulis,
+Alberto Rubiales, Albert Thomas, Albert Villanova del Moral, Alek Lefebvre,
+Alessia Marcolini, Alexandr Fonari, Alihan Zihna, Aline Ribeiro de Almeida,
+Amanda, Amanda Dsouza, Amol Deshmukh, Ana Pessoa, Anavelyz, Andreas Mueller,
+Andrew Delong, Ashish, Ashvith Shetty, Atsushi Nukariya, Avi Gupta, Ayush
+Singh, baam, BaptBillard, Benjamin Pedigo, Bertrand Thirion, Bharat
+Raghunathan, bmalezieux, Brian Rice, Brian Sun, Bruno Charron, Bryan Chen,
+bumblebee, caherrera-meli, Carsten Allefeld, CeeThinwa, Chiara Marmo,
+chrissobel, Christian Lorentzen, Christopher Yeh, Chuliang Xiao, Clément
+Fauchereau, cliffordEmmanuel, Conner Shen, Connor Tann, David Dale, David Katz,
+David Poznik, Divyanshu Deoli, dmallia17, Dmitry Kobak, DS_anas, Eduardo
+Jardim, EdwinWenink, EL-ATEIF Sara, Eleni Markou, Eric Fiegel, Erich Schubert,
+Ezri-Mudde, Fatos Morina, Felipe Rodrigues, Felix Hafner, Fenil Suchak,
+flyingdutchman23, Flynn, Fortune Uwha, Francois Berenger, Frankie Robertson,
+Frans Larsson, Frederick Robinson, Gabriel S Vicente, Gael Varoquaux, genvalen,
+Geoffrey Thomas, geroldcsendes, Gleb Levitskiy, Glen, Glòria Macià Muñoz,
+gregorystrubel, groceryheist, Guillaume Lemaitre, guiweber, Haidar Almubarak,
+Hans Moritz Günther, Haoyin Xu, Harris Mirza, Harry Wei, Harutaka Kawamura,
+Hassan Alsawadi, Helder Geovane Gomes de Lima, Hugo DEFOIS, Igor Ilic, Ikko
+Ashimine, Isaack Mungui, Ishaan Bhat, Ishan Mishra, Iván Pulido, iwhalvic,
+Jack Liu, James Alan Preiss, James Budarz, James Lamb, Jannik, Jeff Zhao,
+Jennifer Maldonado, Jérémie du Boisberranger, Jesse Lima, Jianzhu Guo,
+jnboehm, Joel Nothman, JohanWork, John Paton, Jonathan Schneider, Jon Crall,
+Jon Haitz Legarreta Gorroño, Joris Van den Bossche, José Manuel Nápoles
+Duarte, Juan Carlos Alfaro Jiménez, Juan Martin Loyola, Julien Jerphanion,
+Julio Batista Silva, julyrashchenko, JVM, Kadatatlu Kishore, Karen Palacio, Kei
+Ishikawa, kmatt10, kobaski, Kot271828, Kunj, KurumeYuta, kxytim, lacrosse91,
+LalliAcqua, Laveen Bagai, Leonardo Rocco, Leonardo Uieda, Leopoldo Corona, Loic
+Esteve, LSturtew, Luca Bittarello, Luccas Quadros, Lucy Jiménez, Lucy Liu,
+ly648499246, Mabu Manaileng, makoeppel, Marco Gorelli, Maren Westermann,
+Mariangela, Maria Telenczuk, marielaraj, Martin Hirzel, Mateo Noreña, Mathieu
+Blondel, Mathis Batoul, mathurinm, Matthew Calcote, Maxime Prieur, Maxwell,
+Mehdi Hamoumi, Mehmet Ali Özer, Miao Cai, Michal Karbownik, michalkrawczyk,
+Mitzi, mlondschien, Mohamed Haseeb, Mohamed Khoualed, Muhammad Jarir Kanji,
+murata-yu, Nadim Kawwa, Nanshan Li, naozin555, Nate Parsons, Neal Fultz, Nic
+Annau, Nicolas Hug, Nicolas Miller, Nico Stefani, Nigel Bosch, Nodar
+Okroshiashvili, Norbert Preining, novaya, Ogbonna Chibuike Stephen, OGordon100,
+Oliver Pfaffel, Olivier Grisel, Oras Phongpanangam, Pablo Duque, Pablo
+Ibieta-Jimenez, Patric Lacouth, Paulo S. Costa, Paweł Olszewski, Peter Dye,
+PierreAttard, Pierre-Yves Le Borgne, PranayAnchuri, Prince Canuma, putschblos,
+qdeffense, RamyaNP, ranjanikrishnan, Ray Bell, Rene Jean Corneille, Reshama
+Shaikh, ricardojnf, RichardScottOZ, Rodion Martynov, Rohan Paul, Roman Lutz,
+Roman Yurchak, Samuel Brice, Sandy Khosasi, Sean Benhur J, Sebastian Flores,
+Sebastian Pölsterl, Shao Yang Hong, shinehide, shinnar, shivamgargsya,
+Shooter23, Shuhei Kayawari, Shyam Desai, simonamaggio, Sina Tootoonian,
+solosilence, Steven Kolawole, Steve Stagg, Surya Prakash, swpease, Sylvain
+Marié, Takeshi Oura, Terence Honles, TFiFiE, Thomas A Caswell, Thomas J. Fan,
+Tim Gates, TimotheeMathieu, Timothy Wolodzko, Tim Vink, t-jakubek, t-kusanagi,
+tliu68, Tobias Uhmann, tom1092, Tomás Moreyra, Tomás Ronald Hughes, Tom
+Dupré la Tour, Tommaso Di Noto, Tomohiro Endo, Toshihiro NAKAE, tsuga, Uttam
+kumar, vadim-ushtanit, Vangelis Gkiastas, Venkatachalam N, Vilém Zouhar,
+Vinicius Rios Fuck, Vlasovets, waijean, Whidou, xavier dupré, xiaoyuchai,
+Yasmeen Alsaedy, yoch, Yosuke KOBAYASHI, Yu Feng, YusukeNagasaka, yzhenman,
+Zero, ZeyuSun, ZhaoweiWang, Zito, Zito Relova

From 5d8804d7c2d057d253776f4e68a9103097c8ea10 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Mon, 13 Sep 2021 12:46:52 -0300
Subject: [PATCH 25/49] DOC Ensures that PowerTransformer passes numpydoc
 validation (#21015)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/preprocessing/_data.py | 46 +++++++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 1e85bc9da26ef..0e84bfd7638ac 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -42,7 +42,6 @@
     "PassiveAggressiveRegressor",
     "PatchExtractor",
     "PolynomialFeatures",
-    "PowerTransformer",
     "QuadraticDiscriminantAnalysis",
     "QuantileRegressor",
     "QuantileTransformer",
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 4c1bcaa0c921d..ebfa2304f75a7 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2974,21 +2974,6 @@ class PowerTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.preprocessing import PowerTransformer
-    >>> pt = PowerTransformer()
-    >>> data = [[1, 2], [3, 2], [4, 5]]
-    >>> print(pt.fit(data))
-    PowerTransformer()
-    >>> print(pt.lambdas_)
-    [ 1.386... -3.100...]
-    >>> print(pt.transform(data))
-    [[-1.316... -0.707...]
-     [ 0.209... -0.707...]
-     [ 1.106...  1.414...]]
-
     See Also
     --------
     power_transform : Equivalent function without the estimator API.
@@ -3014,6 +2999,21 @@ class PowerTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
 
     .. [2] G.E.P. Box and D.R. Cox, "An Analysis of Transformations", Journal
            of the Royal Statistical Society B, 26, 211-252 (1964).
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import PowerTransformer
+    >>> pt = PowerTransformer()
+    >>> data = [[1, 2], [3, 2], [4, 5]]
+    >>> print(pt.fit(data))
+    PowerTransformer()
+    >>> print(pt.lambdas_)
+    [ 1.386... -3.100...]
+    >>> print(pt.transform(data))
+    [[-1.316... -0.707...]
+     [ 0.209... -0.707...]
+     [ 1.106...  1.414...]]
     """
 
     def __init__(self, method="yeo-johnson", *, standardize=True, copy=True):
@@ -3044,6 +3044,22 @@ def fit(self, X, y=None):
         return self
 
     def fit_transform(self, X, y=None):
+        """Fit `PowerTransformer` to `X`, then transform `X`.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data used to estimate the optimal transformation parameters
+            and to be transformed using a power transformation.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_features)
+            Transformed data.
+        """
         return self._fit(X, y, force_transform=True)
 
     def _fit(self, X, y=None, force_transform=False):

From 7bb3613e0480eca52c2dd9cb183ac0df36f19cca Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 14 Sep 2021 04:10:25 -0400
Subject: [PATCH 26/49] DOC Ensures that VarianceThreshold passes numpydoc
 validation (#21034)

---
 maint_tools/test_docstrings.py                   |  1 -
 sklearn/feature_selection/_variance_threshold.py | 15 +++++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 0e84bfd7638ac..294f2582e5087 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -64,7 +64,6 @@
     "TheilSenRegressor",
     "TransformedTargetRegressor",
     "TweedieRegressor",
-    "VarianceThreshold",
 ]
 
 
diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py
index 9d4be461b8f67..41ccfba2605cc 100644
--- a/sklearn/feature_selection/_variance_threshold.py
+++ b/sklearn/feature_selection/_variance_threshold.py
@@ -39,6 +39,15 @@ class VarianceThreshold(SelectorMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    SelectFromModel: Meta-transformer for selecting features based on
+        importance weights.
+    SelectPercentile : Select features according to a percentile of the highest
+        scores.
+    SequentialFeatureSelector : Transformer that performs Sequential Feature
+        Selection.
+
     Notes
     -----
     Allows NaN in the input.
@@ -66,7 +75,8 @@ def fit(self, X, y=None):
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
-            Sample vectors from which to compute variances.
+            Data from which to compute variances, where `n_samples` is
+            the number of samples and `n_features` is the number of features.
 
         y : any, default=None
             Ignored. This parameter exists only for compatibility with
@@ -74,7 +84,8 @@ def fit(self, X, y=None):
 
         Returns
         -------
-        self
+        self : object
+            Returns the instance itself.
         """
         X = self._validate_data(
             X,

From 37948b51c6ce3350593c0cdb155560e01b870970 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 14 Sep 2021 04:11:25 -0400
Subject: [PATCH 27/49] DOC Ensures that SequentialFeatureSelector passes
 numpydoc validation (#21035)

---
 maint_tools/test_docstrings.py           | 1 -
 sklearn/feature_selection/_sequential.py | 9 +++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 294f2582e5087..536b55d61678a 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -51,7 +51,6 @@
     "SGDOneClassSVM",
     "SGDRegressor",
     "SelfTrainingClassifier",
-    "SequentialFeatureSelector",
     "SimpleImputer",
     "SparseRandomProjection",
     "SpectralBiclustering",
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
index 33d2dc1c9c2fa..6292f191eec2f 100644
--- a/sklearn/feature_selection/_sequential.py
+++ b/sklearn/feature_selection/_sequential.py
@@ -97,6 +97,8 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator
 
     See Also
     --------
+    GenericUnivariateSelect : Univariate feature selector with configurable
+        strategy.
     RFE : Recursive feature elimination based on importance weights.
     RFECV : Recursive feature elimination based on importance weights, with
         automatic selection of the number of features.
@@ -139,12 +141,14 @@ def __init__(
         self.n_jobs = n_jobs
 
     def fit(self, X, y=None):
-        """Learn the features to select.
+        """Learn the features to select from X.
 
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Training vectors.
+            Training vectors, where `n_samples` is the number of samples and
+            `n_features` is the number of predictors.
+
         y : array-like of shape (n_samples,), default=None
             Target values. This parameter may be ignored for
             unsupervised learning.
@@ -152,6 +156,7 @@ def fit(self, X, y=None):
         Returns
         -------
         self : object
+            Returns the instance itself.
         """
         tags = self._get_tags()
         X = self._validate_data(

From e36a99e8e9635ec44400c76c7bdb312eca4dc4ef Mon Sep 17 00:00:00 2001
From: Bharat Raghunathan <bharatraghunthan9767@gmail.com>
Date: Tue, 14 Sep 2021 15:38:58 +0530
Subject: [PATCH 28/49] DOC Ensures that OrdinalEncoder passes numpydoc
 validation (#21030)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py     | 1 -
 sklearn/preprocessing/_encoders.py | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 536b55d61678a..36d5e4ae7b780 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -31,7 +31,6 @@
     "NearestCentroid",
     "NeighborhoodComponentsAnalysis",
     "Normalizer",
-    "OrdinalEncoder",
     "OrthogonalMatchingPursuit",
     "OrthogonalMatchingPursuitCV",
     "OutputCodeClassifier",
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 8338a94438785..df35c7b811443 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -853,7 +853,8 @@ def fit(self, X, y=None):
 
         Returns
         -------
-        self
+        self : object
+            Fitted encoder.
         """
         handle_unknown_strategies = ("error", "use_encoded_value")
         if self.handle_unknown not in handle_unknown_strategies:

From dfc80bb551301cb6f45704ca7b6df19843d1aed6 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Tue, 14 Sep 2021 13:30:13 +0200
Subject: [PATCH 29/49] DOC more whats new 1.0 fixes (#21036)

---
 doc/whats_new/v1.0.rst | 179 ++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 93 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index efe424c8b9171..b3d69843dfa3e 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -506,33 +506,30 @@ Changelog
   :pr:`16449` by :user:`Christian Lorentzen <lorentzenchr>`.
 
 - |Feature| Added new solver `lbfgs` (available with `solver="lbfgs"`)
-  and `positive` argument to class:`linear_model.Ridge`.
-  When `positive` is set to True, forces the coefficients to be positive
-  (only supported by `lbfgs`).
-  :pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
+  and `positive` argument to :class:`linear_model.Ridge`. When `positive` is
+  set to `True`, forces the coefficients to be positive (only supported by
+  `lbfgs`). :pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
 
 - |Efficiency| The implementation of :class:`linear_model.LogisticRegression`
   has been optimised for dense matrices when using `solver='newton-cg'` and
   `multi_class!='multinomial'`.
   :pr:`19571` by :user:`Julien Jerphanion <jjerphan>`.
 
-- |Efficiency| The implementation of `fit` for `PolynomialFeatures` transformer
-  is now faster. This is especially noticeable on large sparse input.
-  :pr:`19734` by :user:`Fred Robinson <frrad>`.
-
 - |Enhancement| `fit` method preserves dtype for numpy.float32 in
-  :class:`Lars`, :class:`LassoLars`, :class:`LassoLars`, :class:`LarsCV` and
-  :class:`LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura <takoika>`.
+  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
+  :class:`linear_model.LassoLars`, :class:`linear_model.LarsCV` and
+  :class:`linear_model.LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura
+  <takoika>`.
 
 - |Enhancement| Validate user-supplied gram matrix passed to linear models
   via the `precompute` argument. :pr:`19004` by :user:`Adam Midvidy <amidvidy>`.
 
-- |Fix| :meth:`ElasticNet.fit` no longer modifies `sample_weight` in place.
-  :pr:`19055` by `Thomas Fan`_.
+- |Fix| :meth:`linear_model.ElasticNet.fit` no longer modifies `sample_weight`
+  in place. :pr:`19055` by `Thomas Fan`_.
 
-- |Fix| :class:`Lasso`, :class:`ElasticNet` no longer have a `dual_gap_`
-  not corresponding to their objective. :pr:`19172` by
-  :user:`Mathurin Massias <mathurinm>`
+- |Fix| :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet` no
+  longer have a `dual_gap_` not corresponding to their objective. :pr:`19172`
+  by :user:`Mathurin Massias <mathurinm>`
 
 - |Fix| `sample_weight` are now fully taken into account in linear models
   when `normalize=True` for both feature centering and feature
@@ -545,13 +542,9 @@ Changelog
   a model perfectly on some datasets when `residual_threshold=0`.
   :pr:`19499` by :user:`Gregory Strubel <gregorystrubel>`.
 
-- |Fix| Sample weight invariance for class:`Ridge` was fixed in :pr:`19616` by
-  :user:`Oliver Grisel <ogrisel>` and
-  :user:`Christian Lorentzen <lorentzenchr>`.
-
-- |Fix| The :func:`preprocessing.StandardScaler.inverse_transform` method
-  now raises error when the input data is 1D.
-  :pr:`19752` by :user:`Zhehao Liu <Max1993Liu>`.
+- |Fix| Sample weight invariance for :class:`linear_model.Ridge` was fixed in
+  :pr:`19616` by :user:`Oliver Grisel <ogrisel>` and :user:`Christian Lorentzen
+  <lorentzenchr>`.
 
 - |Fix| The dictionary `params` in :func:`linear_model.enet_path` and
   :func:`linear_model.lasso_path` should only contain parameter of the
@@ -559,60 +552,54 @@ Changelog
   :pr:`19391` by :user:`Shao Yang Hong <hongshaoyang>`.
 
 - |API| Raise a warning in :class:`linear_model.RANSACRegressor` that from
-  version 1.2, `min_samples` need to be set explicitly for model other than
-  :class:`linear_model.LinearRegression`.
-  :pr:`19390` by :user:`Shao Yang Hong <hongshaoyang>`.
+  version 1.2, `min_samples` need to be set explicitly for models other than
+  :class:`linear_model.LinearRegression`. :pr:`19390` by :user:`Shao Yang Hong
+  <hongshaoyang>`.
 
 - |API|: The parameter ``normalize`` of :class:`linear_model.LinearRegression`
-  is deprecated and will be removed in 1.2.
-  Motivation for this deprecation: ``normalize`` parameter did not take any
-  effect if ``fit_intercept`` was set to False and therefore was deemed
-  confusing.
-  The behavior of the deprecated LinearModel(normalize=True) can be
-  reproduced with :class:`~sklearn.pipeline.Pipeline` with
-  :class:`~sklearn.preprocessing.LinearModel` (where LinearModel is
-  LinearRegression, Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV) as
-  follows:
-  make_pipeline(StandardScaler(with_mean=False), LinearModel()).
-  The ``normalize`` parameter in :class:`linear_model.LinearRegression` was
-  deprecated in :pr:`17743` by
+  is deprecated and will be removed in 1.2. Motivation for this deprecation:
+  ``normalize`` parameter did not take any effect if ``fit_intercept`` was set
+  to False and therefore was deemed confusing. The behavior of the deprecated
+  ``LinearModel(normalize=True)`` can be reproduced with a
+  :class:`~sklearn.pipeline.Pipeline` with ``LinearModel`` (where
+  ``LinearModel`` is :class:`~linear_model.LinearRegression`,
+  :class:`~linear_model.Ridge`, :class:`~linear_model.RidgeClassifier`,
+  :class:`~linear_model.RidgeCV` or :class:`~linear_model.RidgeClassifierCV`)
+  as follows: ``make_pipeline(StandardScaler(with_mean=False),
+  LinearModel())``. The ``normalize`` parameter in
+  :class:`~linear_model.LinearRegression` was deprecated in :pr:`17743` by
   :user:`Maria Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`linear_model.Ridge`,  :class:`linear_model.RidgeClassifier`,
-  :class:`linear_model.RidgeCV`, and :class:`linear_model.RidgeClassifierCV`,
-  in: :pr:`17772` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`linear_model.BayesianRidge`,
-  :class:`linear_model.ARDRegression` in:
-  :pr:`17746` by :user:`Maria Telenczuk <maikia>`.
-  Same for :class:`linear_model.Lasso`, :class:`linear_model.LassoCV`,
-  :class:`linear_model.ElasticNet`, :class:`linear_model.ElasticNetCV`,
-  :class:`linear_model.MultiTaskLasso`, :class:`linear_model.MultiTaskLassoCV`,
-  :class:`linear_model.MultiTaskElasticNet`,
-  :class:`linear_model.MultiTaskElasticNetCV`, in:
-  :pr:`17785` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
-
-- The ``normalize`` parameter of :class:`linear_model.OrthogonalMatchingPursuit`
-  :class:`linear_model.OrthogonalMatchingPursuitCV` will default to
-  False in 1.2 and will be removed in 1.4.
-  :pr:`17750` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
-  Same for :class:`linear_model.Lars`
-  :class:`linear_model.LarsCV` :class:`linear_model.LassoLars`
-  :class:`linear_model.LassoLarsCV` :class:`linear_model.LassoLarsIC`,
-  in :pr:`17769` by :user:`Maria Telenczuk <maikia>` and
-  :user:`Alexandre Gramfort <agramfort>`.
+  Same for :class:`~linear_model.Ridge`,
+  :class:`~linear_model.RidgeClassifier`, :class:`~linear_model.RidgeCV`, and
+  :class:`~linear_model.RidgeClassifierCV`, in: :pr:`17772` by :user:`Maria
+  Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`. Same for
+  :class:`~linear_model.BayesianRidge`, :class:`~linear_model.ARDRegression`
+  in: :pr:`17746` by :user:`Maria Telenczuk <maikia>`. Same for
+  :class:`~linear_model.Lasso`, :class:`~linear_model.LassoCV`,
+  :class:`~linear_model.ElasticNet`, :class:`~linear_model.ElasticNetCV`,
+  :class:`~linear_model.MultiTaskLasso`,
+  :class:`~linear_model.MultiTaskLassoCV`,
+  :class:`~linear_model.MultiTaskElasticNet`,
+  :class:`~linear_model.MultiTaskElasticNetCV`, in: :pr:`17785` by :user:`Maria
+  Telenczuk <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
+
+- |API| The ``normalize`` parameter of
+  :class:`~linear_model.OrthogonalMatchingPursuit` and
+  :class:`~linear_model.OrthogonalMatchingPursuitCV` will default to False in
+  1.2 and will be removed in 1.4. :pr:`17750` by :user:`Maria Telenczuk
+  <maikia>` and :user:`Alexandre Gramfort <agramfort>`. Same for
+  :class:`~linear_model.Lars` :class:`~linear_model.LarsCV`
+  :class:`~linear_model.LassoLars` :class:`~linear_model.LassoLarsCV`
+  :class:`~linear_model.LassoLarsIC`, in :pr:`17769` by :user:`Maria Telenczuk
+  <maikia>` and :user:`Alexandre Gramfort <agramfort>`.
 
 - |API| Keyword validation has moved from `__init__` and `set_params` to `fit`
   for the following estimators conforming to scikit-learn's conventions:
-  :class:`linear_model.SGDClassifier`,
-  :class:`linear_model.SparseSGDClassifier`,
-  :class:`linear_model.SGDRegressor`,
-  :class:`linear_model.SparseSGDRegressor`,
-  :class:`linear_model.SGDOneClassSVM`,
-  :class:`linear_model.SparseSGDOneClassSVM`,
-  :class:`linear_model.PassiveAggressiveClassifier`,
-  :class:`linear_model.PassiveAggressiveRegressor`.
+  :class:`~linear_model.SGDClassifier`,
+  :class:`~linear_model.SGDRegressor`,
+  :class:`~linear_model.SGDOneClassSVM`,
+  :class:`~linear_model.PassiveAggressiveClassifier`, and
+  :class:`~linear_model.PassiveAggressiveRegressor`.
   :pr:`20683` by `Guillaume Lemaitre`_.
 
 :mod:`sklearn.manifold`
@@ -669,9 +656,9 @@ Changelog
   :pr:`18328` by :user:`Albert Villanova del Moral <albertvillanova>` and
   :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
-- |Fix| avoid overflow in :func:`metrics.cluster.adjust_rand_score` with large
-  amount of data.
-  :pr:`20312` by :user:`Divyanshu Deoli <divyanshudeoli>`.
+- |Fix| avoid overflow in :func:`metrics.cluster.adjusted_rand_score` with
+  large amount of data. :pr:`20312` by :user:`Divyanshu Deoli
+  <divyanshudeoli>`.
 
 - |API| :class:`metrics.ConfusionMatrixDisplay` exposes two class methods
   :func:`~metrics.ConfusionMatrixDisplay.from_estimator` and
@@ -755,7 +742,7 @@ Changelog
 
 - |FIX| :class:`neighbors.NearestNeighbors`, :class:`neighbors.KNeighborsClassifier`,
   :class:`neighbors.RadiusNeighborsClassifier`, :class:`neighbors.KNeighborsRegressor`
-  and :class:`neighbors.RadiusNeighborsRegressor` does not validate `weights` in
+  and :class:`neighbors.RadiusNeighborsRegressor` do not validate `weights` in
   `__init__` and validates `weights` in `fit` instead. :pr:`20072` by
   :user:`Juan Carlos Alfaro Jiménez <alfaro96>`.
 
@@ -774,9 +761,8 @@ Changelog
 .......................
 
 - |API| The `predict_proba` and `predict_log_proba` methods of the
-  :class:`Pipeline` class now support passing prediction kwargs to
-  the final estimator.
-  :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
+  :class:`pipeline.Pipeline` now support passing prediction kwargs to the final
+  estimator. :pr:`19790` by :user:`Christopher Flynn <crflynn>`.
 
 :mod:`sklearn.preprocessing`
 ............................
@@ -804,13 +790,22 @@ Changelog
   a tuple to `degree`, i.e. `degree=(min_degree, max_degree)`.
   :pr:`20250` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Efficiency| `preprocessing.StandardScaler` is faster and more memory
+- |Efficiency| :class:`preprocessing.StandardScaler` is faster and more memory
   efficient. :pr:`20652` by `Thomas Fan`_.
 
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``.
   :pr:`19934` by :user:`Gleb Levitskiy <GLevV>`.
 
+- |Efficiency| The implementation of `fit` for
+  :class:`preprocessing.PolynomialFeatures` transformer is now faster. This is
+  especially noticeable on large sparse input. :pr:`19734` by :user:`Fred
+  Robinson <frrad>`.
+
+- |Fix| The :func:`preprocessing.StandardScaler.inverse_transform` method
+  now raises error when the input data is 1D. :pr:`19752` by :user:`Zhehao Liu
+  <Max1993Liu>`.
+
 - |Fix| :func:`preprocessing.scale`, :class:`preprocessing.StandardScaler`
   and similar scalers detect near-constant features to avoid scaling them to
   very large values. This problem happens in particular when using a scaler on
@@ -823,7 +818,7 @@ Changelog
   correctly handles integer dtypes. :pr:`19356` by :user:`makoeppel`.
 
 - |Fix| :meth:`preprocessing.OrdinalEncoder.inverse_transform` is not
-  supporting sparse matrix and raise the appropriate error message.
+  supporting sparse matrix and raises the appropriate error message.
   :pr:`19879` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - |Fix| The `fit` method of :class:`preprocessing.OrdinalEncoder` will not
@@ -836,14 +831,14 @@ Changelog
   (`np.float64` or `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
 
+- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
+  based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.
+
 - |API| The `n_input_features_` attribute of
   :class:`preprocessing.PolynomialFeatures` is deprecated in favor of
   `n_features_in_` and will be removed in 1.2. :pr:`20240` by
   :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| :class:`preprocessing.FunctionTransformer` does not set `n_features_in_`
-  based on the input to `inverse_transform`. :pr:`20961` by `Thomas Fan`_.
-
 :mod:`sklearn.svm`
 ...................
 
@@ -887,8 +882,8 @@ Changelog
   :pr:`19948` by `Joel Nothman`_.
 
 - |Enhancement| :func:`utils.validation.check_is_fitted` now uses
-  ``__sklearn_is_fitted__`` if available, instead of checking for attributes ending with
-  an underscore. This also makes :class:`Pipeline` and
+  ``__sklearn_is_fitted__`` if available, instead of checking for attributes
+  ending with an underscore. This also makes :class:`pipeline.Pipeline` and
   :class:`preprocessing.FunctionTransformer` pass
   ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_.
 
@@ -896,8 +891,9 @@ Changelog
   precision of the computed variance was very poor when the real variance is
   exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| Propreties that are decorated with :func:`utils.deprecated` correctly
-  wraps the property's docstring. :pr:`20385` by `Thomas Fan`_.
+- |Fix| The docstrings of propreties that are decorated with
+  :func:`utils.deprecated` are now properly wrapped. :pr:`20385` by `Thomas
+  Fan`_.
 
 - |Fix| :func:`utils.stats._weighted_percentile` now correctly ignores
   zero-weighted observations smaller than the smallest observation with
@@ -920,6 +916,10 @@ Changelog
   `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
 
+- |Fix| Support for `np.matrix` is deprecated in
+  :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in
+  1.2. :pr:`20165` by `Thomas Fan`_.
+
 - |API| :func:`utils._testing.assert_warns` and
   :func:`utils._testing.assert_warns_message` are deprecated in 1.0 and will
   be removed in 1.2. Used `pytest.warns` context manager instead. Note that
@@ -930,13 +930,6 @@ Changelog
   now deprecated. Use `scipy.sparse.csgraph.shortest_path` instead. :pr:`20531`
   by `Tom Dupre la Tour`_.
 
-:mod:`sklearn.validation`
-.........................
-
-- |Fix| Support for `np.matrix` is deprecated in
-  :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in
-  1.2. :pr:`20165` by `Thomas Fan`_.
-
 Code and Documentation Contributors
 -----------------------------------
 

From a0524483bfda6d60d742da96bccd85bbb6892e84 Mon Sep 17 00:00:00 2001
From: EricEllwanger <52634304+EricEllwanger@users.noreply.github.com>
Date: Tue, 14 Sep 2021 11:10:16 -0400
Subject: [PATCH 30/49] DOC - Ensures HalvingGridSearchCV and
 HalvingRandomSearchCV pass numpydoc validation (#21037)

Co-authored-by: frellwan <frellwan@hotmail.com>
---
 maint_tools/test_docstrings.py                |  2 --
 .../_search_successive_halving.py             | 19 ++++++++++++-------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 36d5e4ae7b780..a8728fd4d4c42 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,8 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "HalvingGridSearchCV",
-    "HalvingRandomSearchCV",
     "HashingVectorizer",
     "HuberRegressor",
     "IterativeImputer",
diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index 2a4ce03ae39c3..2c3bff9fd19e6 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -240,7 +240,12 @@ def fit(self, X, y=None, groups=None, **fit_params):
             instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
 
         **fit_params : dict of string -> object
-            Parameters passed to the ``fit`` method of the estimator
+            Parameters passed to the ``fit`` method of the estimator.
+
+        Returns
+        -------
+        self : object
+            Instance of fitted estimator.
         """
         self._checked_cv_orig = check_cv(
             self.cv, y, classifier=is_classifier(self.estimator)
@@ -410,7 +415,7 @@ class HalvingGridSearchCV(BaseSuccessiveHalving):
 
     Parameters
     ----------
-    estimator : estimator object.
+    estimator : estimator object
         This is assumed to implement the scikit-learn estimator interface.
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
@@ -496,7 +501,7 @@ class HalvingGridSearchCV(BaseSuccessiveHalving):
             deactivating shuffling (`shuffle=False`), or by setting the
             `cv`'s `random_state` parameter to an integer.
 
-    scoring : string, callable, or None, default=None
+    scoring : str, callable, or None, default=None
         A single string (see :ref:`scoring_parameter`) or a callable
         (see :ref:`scoring`) to evaluate the predictions on the test set.
         If None, the estimator's score method is used.
@@ -513,7 +518,7 @@ class HalvingGridSearchCV(BaseSuccessiveHalving):
         Value to assign to the score if an error occurs in estimator fitting.
         If set to 'raise', the error is raised. If a numeric value is given,
         FitFailedWarning is raised. This parameter does not affect the refit
-        step, which will always raise the error. Default is ``np.nan``
+        step, which will always raise the error. Default is ``np.nan``.
 
     return_train_score : bool, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
@@ -740,7 +745,7 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
 
     Parameters
     ----------
-    estimator : estimator object.
+    estimator : estimator object
         This is assumed to implement the scikit-learn estimator interface.
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
@@ -832,7 +837,7 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
             deactivating shuffling (`shuffle=False`), or by setting the
             `cv`'s `random_state` parameter to an integer.
 
-    scoring : string, callable, or None, default=None
+    scoring : str, callable, or None, default=None
         A single string (see :ref:`scoring_parameter`) or a callable
         (see :ref:`scoring`) to evaluate the predictions on the test set.
         If None, the estimator's score method is used.
@@ -849,7 +854,7 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
         Value to assign to the score if an error occurs in estimator fitting.
         If set to 'raise', the error is raised. If a numeric value is given,
         FitFailedWarning is raised. This parameter does not affect the refit
-        step, which will always raise the error. Default is ``np.nan``
+        step, which will always raise the error. Default is ``np.nan``.
 
     return_train_score : bool, default=False
         If ``False``, the ``cv_results_`` attribute will not include training

From bc285cd9e044203f38dd367cf515d6ba07beaf3b Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Wed, 15 Sep 2021 05:48:56 -0300
Subject: [PATCH 31/49] DOC Ensures that MDS passes numpydoc validation
 (#21048)

---
 maint_tools/test_docstrings.py |  1 -
 sklearn/manifold/_mds.py       | 51 ++++++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index a8728fd4d4c42..10b5fbfe94b08 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -17,7 +17,6 @@
     "LabelSpreading",
     "LocalOutlierFactor",
     "LocallyLinearEmbedding",
-    "MDS",
     "MeanShift",
     "MiniBatchKMeans",
     "MissingIndicator",
diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py
index 10d24bf82361b..fb2fe3d3da9b8 100644
--- a/sklearn/manifold/_mds.py
+++ b/sklearn/manifold/_mds.py
@@ -154,7 +154,7 @@ def smacof(
     random_state=None,
     return_n_iter=False,
 ):
-    """Computes multidimensional scaling using the SMACOF algorithm.
+    """Compute multidimensional scaling using the SMACOF algorithm.
 
     The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a
     multidimensional scaling algorithm which minimizes an objective function
@@ -163,7 +163,8 @@ def smacof(
     stress, and is more powerful than traditional techniques such as gradient
     descent.
 
-    The SMACOF algorithm for metric MDS can summarized by the following steps:
+    The SMACOF algorithm for metric MDS can be summarized by the following
+    steps:
 
     1. Set an initial start configuration, randomly or not.
     2. Compute the stress
@@ -389,17 +390,16 @@ class MDS(BaseEstimator):
     n_iter_ : int
         The number of iterations corresponding to the best stress.
 
-    Examples
+    See Also
     --------
-    >>> from sklearn.datasets import load_digits
-    >>> from sklearn.manifold import MDS
-    >>> X, _ = load_digits(return_X_y=True)
-    >>> X.shape
-    (1797, 64)
-    >>> embedding = MDS(n_components=2)
-    >>> X_transformed = embedding.fit_transform(X[:100])
-    >>> X_transformed.shape
-    (100, 2)
+    sklearn.decomposition.PCA : Principal component analysis that is a linear
+        dimensionality reduction method.
+    sklearn.decomposition.KernelPCA : Non-linear dimensionality reduction using
+        kernels and PCA.
+    TSNE : T-distributed Stochastic Neighbor Embedding.
+    Isomap : Manifold learning based on Isometric Mapping.
+    LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.
+    SpectralEmbedding : Spectral embedding for non-linear dimensionality.
 
     References
     ----------
@@ -412,6 +412,17 @@ class MDS(BaseEstimator):
     "Multidimensional scaling by optimizing goodness of fit to a nonmetric
     hypothesis" Kruskal, J. Psychometrika, 29, (1964)
 
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import MDS
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = MDS(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
     """
 
     def __init__(
@@ -452,7 +463,7 @@ def _pairwise(self):
 
     def fit(self, X, y=None, init=None):
         """
-        Computes the position of the points in the embedding space.
+        Compute the position of the points in the embedding space.
 
         Parameters
         ----------
@@ -462,18 +473,24 @@ def fit(self, X, y=None, init=None):
             be the dissimilarity matrix.
 
         y : Ignored
+            Not used, present for API consistency by convention.
 
         init : ndarray of shape (n_samples,), default=None
             Starting configuration of the embedding to initialize the SMACOF
             algorithm. By default, the algorithm is initialized with a randomly
             chosen array.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
         """
         self.fit_transform(X, init=init)
         return self
 
     def fit_transform(self, X, y=None, init=None):
         """
-        Fit the data from X, and returns the embedded coordinates.
+        Fit the data from `X`, and returns the embedded coordinates.
 
         Parameters
         ----------
@@ -483,11 +500,17 @@ def fit_transform(self, X, y=None, init=None):
             be the dissimilarity matrix.
 
         y : Ignored
+            Not used, present for API consistency by convention.
 
         init : ndarray of shape (n_samples,), default=None
             Starting configuration of the embedding to initialize the SMACOF
             algorithm. By default, the algorithm is initialized with a randomly
             chosen array.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            X transformed in the new space.
         """
         X = self._validate_data(X)
         if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":

From 44cbbd2f70267827a75bec9dac6905ce6dc88cc5 Mon Sep 17 00:00:00 2001
From: Manimaran <manimaran_p@outlook.com>
Date: Wed, 15 Sep 2021 16:58:09 +0530
Subject: [PATCH 32/49] DOC Ensure MeanShift docstrings passes numpydoc
 validation (#21049)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/cluster/_mean_shift.py | 32 ++++++++++++++++++++------------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 10b5fbfe94b08..85e98113e0151 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -17,7 +17,6 @@
     "LabelSpreading",
     "LocalOutlierFactor",
     "LocallyLinearEmbedding",
-    "MeanShift",
     "MiniBatchKMeans",
     "MissingIndicator",
     "MultiLabelBinarizer",
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index 9fc260485600b..9d19d0d9a4015 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -337,19 +337,9 @@ class MeanShift(ClusterMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
-    Examples
+    See Also
     --------
-    >>> from sklearn.cluster import MeanShift
-    >>> import numpy as np
-    >>> X = np.array([[1, 1], [2, 1], [1, 0],
-    ...               [4, 7], [3, 5], [3, 6]])
-    >>> clustering = MeanShift(bandwidth=2).fit(X)
-    >>> clustering.labels_
-    array([1, 1, 1, 0, 0, 0])
-    >>> clustering.predict([[0, 0], [5, 5]])
-    array([1, 0])
-    >>> clustering
-    MeanShift(bandwidth=2)
+    KMeans : K-Means clustering.
 
     Notes
     -----
@@ -375,6 +365,19 @@ class MeanShift(ClusterMixin, BaseEstimator):
     feature space analysis". IEEE Transactions on Pattern Analysis and
     Machine Intelligence. 2002. pp. 603-619.
 
+    Examples
+    --------
+    >>> from sklearn.cluster import MeanShift
+    >>> import numpy as np
+    >>> X = np.array([[1, 1], [2, 1], [1, 0],
+    ...               [4, 7], [3, 5], [3, 6]])
+    >>> clustering = MeanShift(bandwidth=2).fit(X)
+    >>> clustering.labels_
+    array([1, 1, 1, 0, 0, 0])
+    >>> clustering.predict([[0, 0], [5, 5]])
+    array([1, 0])
+    >>> clustering
+    MeanShift(bandwidth=2)
     """
 
     def __init__(
@@ -405,7 +408,12 @@ def fit(self, X, y=None):
             Samples to cluster.
 
         y : Ignored
+            Not used, present for API consistency by convention.
 
+        Returns
+        -------
+        self : object
+               Fitted instance.
         """
         X = self._validate_data(X)
         bandwidth = self.bandwidth

From c20ab57863502549d08a7924a74b82cce8744887 Mon Sep 17 00:00:00 2001
From: EricEllwanger <52634304+EricEllwanger@users.noreply.github.com>
Date: Wed, 15 Sep 2021 07:34:40 -0400
Subject: [PATCH 33/49] DOC - Ensure HashingVectorizer passes numpydoc
 validation (#21047)

Co-authored-by: frellwan <frellwan@hotmail.com>
---
 maint_tools/test_docstrings.py     |  1 -
 sklearn/feature_extraction/text.py | 43 +++++++++++++++++++++---------
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 85e98113e0151..a94f91b7f2b20 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "HashingVectorizer",
     "HuberRegressor",
     "IterativeImputer",
     "KNNImputer",
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index d47be52748577..236326bb01f1b 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -555,7 +555,7 @@ def _warn_for_unused_params(self):
 
 
 class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
-    r"""Convert a collection of text documents to a matrix of token occurrences
+    r"""Convert a collection of text documents to a matrix of token occurrences.
 
     It turns a collection of text documents into a scipy.sparse matrix holding
     token occurrence counts (or binary occurrence information), possibly
@@ -568,10 +568,10 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
     This strategy has several advantages:
 
     - it is very low memory scalable to large datasets as there is no need to
-      store a vocabulary dictionary in memory
+      store a vocabulary dictionary in memory.
 
     - it is fast to pickle and un-pickle as it holds no state besides the
-      constructor parameters
+      constructor parameters.
 
     - it can be used in a streaming (partial fit) or parallel pipeline as there
       is no state computed during fit.
@@ -595,7 +595,6 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
 
     Parameters
     ----------
-
     input : {'filename', 'file', 'content'}, default='content'
         - If `'filename'`, the sequence passed as an argument to fit is
           expected to be a list of filenames that need reading to fetch
@@ -607,7 +606,7 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
         - If `'content'`, the input is expected to be a sequence of items that
           can be of type string or byte.
 
-    encoding : string, default='utf-8'
+    encoding : str, default='utf-8'
         If bytes or files are given to analyze, this encoding is used to
         decode.
 
@@ -686,7 +685,7 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
         of features are likely to cause hash collisions, but large numbers
         will cause larger coefficient dimensions in linear learners.
 
-    binary : bool, default=False.
+    binary : bool, default=False
         If True, all non zero counts are set to 1. This is useful for discrete
         probabilistic models that model binary events rather than integer
         counts.
@@ -704,6 +703,13 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
     dtype : type, default=np.float64
         Type of the matrix returned by fit_transform() or transform().
 
+    See Also
+    --------
+    CountVectorizer : Convert a collection of text documents to a matrix of
+        token counts.
+    TfidfVectorizer : Convert a collection of raw documents to a matrix of
+        TF-IDF features.
+
     Examples
     --------
     >>> from sklearn.feature_extraction.text import HashingVectorizer
@@ -717,11 +723,6 @@ class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):
     >>> X = vectorizer.fit_transform(corpus)
     >>> print(X.shape)
     (4, 16)
-
-    See Also
-    --------
-    CountVectorizer, TfidfVectorizer
-
     """
 
     def __init__(
@@ -762,7 +763,7 @@ def __init__(
         self.dtype = dtype
 
     def partial_fit(self, X, y=None):
-        """Does nothing: this transformer is stateless.
+        """No-op: this transformer is stateless.
 
         This method is just there to mark the fact that this transformer
         can work in a streaming setup.
@@ -771,16 +772,32 @@ def partial_fit(self, X, y=None):
         ----------
         X : ndarray of shape [n_samples, n_features]
             Training data.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            HashingVectorizer instance.
         """
         return self
 
     def fit(self, X, y=None):
-        """Does nothing: this transformer is stateless.
+        """No-op: this transformer is stateless.
 
         Parameters
         ----------
         X : ndarray of shape [n_samples, n_features]
             Training data.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            HashingVectorizer instance.
         """
         # triggers a parameter validation
         if isinstance(X, str):

From 9f57e4bd31a30d538cc38b81f58b1a0636b58a1b Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Thu, 16 Sep 2021 05:01:03 -0300
Subject: [PATCH 34/49] DOC Ensures that Normalizer passes numpydoc validation
 (#21061)

---
 maint_tools/test_docstrings.py |  1 -
 sklearn/preprocessing/_data.py | 14 +++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index a94f91b7f2b20..0b5071a808eaa 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -25,7 +25,6 @@
     "MultiTaskLassoCV",
     "NearestCentroid",
     "NeighborhoodComponentsAnalysis",
-    "Normalizer",
     "OrthogonalMatchingPursuit",
     "OrthogonalMatchingPursuitCV",
     "OutputCodeClassifier",
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index ebfa2304f75a7..c0b42c5aa1946 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -1860,7 +1860,7 @@ class Normalizer(TransformerMixin, BaseEstimator):
         values.
 
     copy : bool, default=True
-        set to False to perform inplace row normalization and avoid a
+        Set to False to perform inplace row normalization and avoid a
         copy (if the input is already a numpy array or a scipy.sparse
         CSR matrix).
 
@@ -1877,6 +1877,10 @@ class Normalizer(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    normalize : Equivalent function without the estimator API.
+
     Notes
     -----
     This estimator is stateless (besides constructor parameters), the
@@ -1886,10 +1890,6 @@ class Normalizer(TransformerMixin, BaseEstimator):
     see :ref:`examples/preprocessing/plot_all_scaling.py
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
 
-    See Also
-    --------
-    normalize : Equivalent function without the estimator API.
-
     Examples
     --------
     >>> from sklearn.preprocessing import Normalizer
@@ -1920,8 +1920,8 @@ def fit(self, X, y=None):
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
             The data to estimate the normalization parameters.
 
-        y : None
-            Ignored.
+        y : Ignored
+            Not used, present here for API consistency by convention.
 
         Returns
         -------

From c87f5cfcef29a0cf14663a86d123c3b4509dea2e Mon Sep 17 00:00:00 2001
From: TONY GEORGE <tony.george1984@gmail.com>
Date: Thu, 16 Sep 2021 05:48:25 -0400
Subject: [PATCH 35/49] DOC Add a note for some data considerations with
 20newsgroups dataset (#20728)

Co-authored-by: Reshama Shaikh <reshama.stat@gmail.com>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/datasets/descr/twenty_newsgroups.rst | 28 +++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/sklearn/datasets/descr/twenty_newsgroups.rst b/sklearn/datasets/descr/twenty_newsgroups.rst
index 0acb2c8191905..a7542ea57d529 100644
--- a/sklearn/datasets/descr/twenty_newsgroups.rst
+++ b/sklearn/datasets/descr/twenty_newsgroups.rst
@@ -219,12 +219,32 @@ Some other classifiers cope better with this harder version of the task. Try
 running :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` with and without
 the ``--filter`` option to compare the results.
 
+.. topic:: Data Considerations
+
+  The Cleveland Indians is a major league baseball team based in Cleveland,
+  Ohio, USA. In December 2020, it was reported that "After several months of
+  discussion sparked by the death of George Floyd and a national reckoning over
+  race and colonialism, the Cleveland Indians have decided to change their
+  name." Team owner Paul Dolan "did make it clear that the team will not make
+  its informal nickname -- the Tribe -- its new team name." "It’s not going to
+  be a half-step away from the Indians," Dolan said."We will not have a Native
+  American-themed name."
+
+  https://www.mlb.com/news/cleveland-indians-team-name-change
+
 .. topic:: Recommendation
 
-  When evaluating text classifiers on the 20 Newsgroups data, you
-  should strip newsgroup-related metadata. In scikit-learn, you can do this by
-  setting ``remove=('headers', 'footers', 'quotes')``. The F-score will be
-  lower because it is more realistic.
+  - When evaluating text classifiers on the 20 Newsgroups data, you
+    should strip newsgroup-related metadata. In scikit-learn, you can do this
+    by setting ``remove=('headers', 'footers', 'quotes')``. The F-score will be
+    lower because it is more realistic.
+  - This text dataset contains data which may be inappropriate for certain NLP
+    applications. An example is listed in the "Data Considerations" section
+    above. The challenge with using current text datasets in NLP for tasks such
+    as sentence completion, clustering, and other applications is that text
+    that is culturally biased and inflammatory will propagate biases. This
+    should be taken into consideration when using the dataset, reviewing the
+    output, and the bias should be documented.
 
 .. topic:: Examples
 

From f0c0e33ce83c96fb90d18befbfdb887af1844804 Mon Sep 17 00:00:00 2001
From: Manimaran <manimaran_p@outlook.com>
Date: Thu, 16 Sep 2021 17:30:34 +0530
Subject: [PATCH 36/49] DOC Ensures that QuantileTransformer passes numpydoc
 validation (#21065)

---
 maint_tools/test_docstrings.py |  2 --
 sklearn/preprocessing/_data.py | 22 +++++++++++-----------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 0b5071a808eaa..4a256c9cb0f59 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -36,8 +36,6 @@
     "PatchExtractor",
     "PolynomialFeatures",
     "QuadraticDiscriminantAnalysis",
-    "QuantileRegressor",
-    "QuantileTransformer",
     "RANSACRegressor",
     "RandomizedSearchCV",
     "RobustScaler",
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index c0b42c5aa1946..33fe7943c0681 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2388,7 +2388,7 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
         noise.
         Please see ``subsample`` for more details.
         Pass an int for reproducible results across multiple function calls.
-        See :term:`Glossary <random_state>`
+        See :term:`Glossary <random_state>`.
 
     copy : bool, default=True
         Set to False to perform inplace transformation and avoid a copy (if the
@@ -2417,16 +2417,6 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
 
         .. versionadded:: 1.0
 
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.preprocessing import QuantileTransformer
-    >>> rng = np.random.RandomState(0)
-    >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
-    >>> qt = QuantileTransformer(n_quantiles=10, random_state=0)
-    >>> qt.fit_transform(X)
-    array([...])
-
     See Also
     --------
     quantile_transform : Equivalent function without the estimator API.
@@ -2445,6 +2435,16 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import QuantileTransformer
+    >>> rng = np.random.RandomState(0)
+    >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
+    >>> qt = QuantileTransformer(n_quantiles=10, random_state=0)
+    >>> qt.fit_transform(X)
+    array([...])
     """
 
     def __init__(

From 745bde1ac81f83f2c72016396fe628247bc0ff63 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Thu, 16 Sep 2021 08:50:56 -0400
Subject: [PATCH 37/49] DOC remove incorrect sentence about dependencies being
 automatically installed (#21012)

---
 doc/developers/advanced_installation.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index 34f0979f8ddb8..f46b899c22b62 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -101,9 +101,6 @@ runtime:
 - Joblib (>= |JoblibMinVersion|),
 - threadpoolctl (>= |ThreadpoolctlMinVersion|).
 
-Those dependencies are **automatically installed by pip** if they were missing
-when building scikit-learn from source.
-
 .. note::
 
    For running on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+

From f3f93ff44345c4fa3289fdb2a11243f27875156f Mon Sep 17 00:00:00 2001
From: EricEllwanger <52634304+EricEllwanger@users.noreply.github.com>
Date: Fri, 17 Sep 2021 05:48:33 -0400
Subject: [PATCH 38/49] DOC Ensure HuberRegressor passes numpydoc validation
 (#21062)

Co-authored-by: frellwan <frellwan@hotmail.com>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/linear_model/_huber.py | 21 ++++++++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 4a256c9cb0f59..7d1a785227459 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -9,7 +9,6 @@
 
 # List of modules ignored when checking for numpydoc validation.
 DOCSTRING_IGNORE_LIST = [
-    "HuberRegressor",
     "IterativeImputer",
     "KNNImputer",
     "LabelPropagation",
diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py
index caba01242fb5d..055fcca2becf6 100644
--- a/sklearn/linear_model/_huber.py
+++ b/sklearn/linear_model/_huber.py
@@ -205,6 +205,19 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
         A boolean mask which is set to True where the samples are identified
         as outliers.
 
+    See Also
+    --------
+    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.
+    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.
+    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.
+
+    References
+    ----------
+    .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
+           Concomitant scale estimates, pg 172
+    .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
+           https://statweb.stanford.edu/~owen/reports/hhu.pdf
+
     Examples
     --------
     >>> import numpy as np
@@ -227,13 +240,6 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
     Huber coefficients: [17.7906... 31.0106...]
     >>> print("Linear Regression coefficients:", linear.coef_)
     Linear Regression coefficients: [-1.9221...  7.0226...]
-
-    References
-    ----------
-    .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
-           Concomitant scale estimates, pg 172
-    .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
-           https://statweb.stanford.edu/~owen/reports/hhu.pdf
     """
 
     def __init__(
@@ -271,6 +277,7 @@ def fit(self, X, y, sample_weight=None):
         Returns
         -------
         self : object
+            Fitted `HuberRegressor` estimator.
         """
         X, y = self._validate_data(
             X,

From 9b91d4280f0dceb5fed67802d86149a8c7bf56d6 Mon Sep 17 00:00:00 2001
From: EricEllwanger <52634304+EricEllwanger@users.noreply.github.com>
Date: Fri, 17 Sep 2021 08:14:45 -0400
Subject: [PATCH 39/49] DOC Ensure that RANSACRegressor passes numpydoc
 validation (#21072)

Co-authored-by: frellwan <frellwan@hotmail.com>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py  |  1 -
 sklearn/linear_model/_ransac.py | 39 +++++++++++++++++++++------------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 7d1a785227459..e72fe229287a6 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -35,7 +35,6 @@
     "PatchExtractor",
     "PolynomialFeatures",
     "QuadraticDiscriminantAnalysis",
-    "RANSACRegressor",
     "RandomizedSearchCV",
     "RobustScaler",
     "SGDOneClassSVM",
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index b53574358adb2..d93f107ebe98e 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -95,7 +95,7 @@ class RANSACRegressor(
         :class:`linear_model.LinearRegression` is used, the user is
         encouraged to provide a value.
 
-        .. deprecated :: 1.0
+        .. deprecated:: 1.0
            Not setting `min_samples` explicitly will raise an error in version
            1.2 for models other than
            :class:`~sklearn.linear_model.LinearRegression`. To keep the old
@@ -147,7 +147,7 @@ class RANSACRegressor(
         as 0.99 (the default) and e is the current fraction of inliers w.r.t.
         the total number of samples.
 
-    loss : string, callable, default='absolute_error'
+    loss : str, callable, default='absolute_error'
         String inputs, 'absolute_error' and 'squared_error' are supported which
         find the absolute error and squared error per sample respectively.
 
@@ -214,6 +214,18 @@ class RANSACRegressor(
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    HuberRegressor : Linear regression model that is robust to outliers.
+    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.
+    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.
+
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/RANSAC
+    .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
+    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf
+
     Examples
     --------
     >>> from sklearn.linear_model import RANSACRegressor
@@ -225,12 +237,6 @@ class RANSACRegressor(
     0.9885...
     >>> reg.predict(X[:1,])
     array([-31.9417...])
-
-    References
-    ----------
-    .. [1] https://en.wikipedia.org/wiki/RANSAC
-    .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
-    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf
     """  # noqa: E501
 
     def __init__(
@@ -268,7 +274,7 @@ def fit(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : array-like or sparse matrix, shape [n_samples, n_features]
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Training data.
 
         y : array-like of shape (n_samples,) or (n_samples, n_targets)
@@ -281,13 +287,17 @@ def fit(self, X, y, sample_weight=None):
 
             .. versionadded:: 0.18
 
+        Returns
+        -------
+        self : object
+            Fitted `RANSACRegressor` estimator.
+
         Raises
         ------
         ValueError
             If no valid consensus set could be found. This occurs if
             `is_data_valid` and `is_model_valid` return False for all
             `max_trials` randomly chosen sub-samples.
-
         """
         # Need to validate separately here. We can't pass multi_ouput=True
         # because that would allow y to be csr. Delay expensive finiteness
@@ -555,7 +565,8 @@ def predict(self, X):
 
         Parameters
         ----------
-        X : numpy array of shape [n_samples, n_features]
+        X : {array-like or sparse matrix} of shape (n_samples, n_features)
+            Input data.
 
         Returns
         -------
@@ -572,16 +583,16 @@ def predict(self, X):
         return self.estimator_.predict(X)
 
     def score(self, X, y):
-        """Returns the score of the prediction.
+        """Return the score of the prediction.
 
         This is a wrapper for `estimator_.score(X, y)`.
 
         Parameters
         ----------
-        X : numpy array or sparse matrix of shape [n_samples, n_features]
+        X : (array-like or sparse matrix} of shape (n_samples, n_features)
             Training data.
 
-        y : array, shape = [n_samples] or [n_samples, n_targets]
+        y : array-like of shape (n_samples,) or (n_samples, n_targets)
             Target values.
 
         Returns

From 56b61cf477260ec5c0fda03044130e1395256c64 Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Fri, 17 Sep 2021 09:18:08 -0300
Subject: [PATCH 40/49] DOC Ensures that OutputCodeClassifier passes numpydoc
 validation (#21063)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/multiclass.py          | 42 +++++++++++++++++++---------------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index e72fe229287a6..ba5f664e60245 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -26,7 +26,6 @@
     "NeighborhoodComponentsAnalysis",
     "OrthogonalMatchingPursuit",
     "OrthogonalMatchingPursuitCV",
-    "OutputCodeClassifier",
     "PLSCanonical",
     "PLSRegression",
     "PLSSVD",
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index c5d70b284771d..7caa9634db85e 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -913,7 +913,7 @@ def _more_tags(self):
 
 
 class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
-    """(Error-Correcting) Output-Code multiclass strategy
+    """(Error-Correcting) Output-Code multiclass strategy.
 
     Output-code based strategies consist in representing each class with a
     binary code (an array of 0s and 1s). At fitting time, one binary
@@ -956,10 +956,10 @@ class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
     estimators_ : list of `int(n_classes * code_size)` estimators
         Estimators used for predictions.
 
-    classes_ : numpy array of shape [n_classes]
+    classes_ : ndarray of shape (n_classes,)
         Array containing labels.
 
-    code_book_ : numpy array of shape [n_classes, code_size]
+    code_book_ : ndarray of shape (n_classes, code_size)
         Binary array containing the code of each class.
 
     n_features_in_ : int
@@ -974,19 +974,10 @@ class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
-    Examples
+    See Also
     --------
-    >>> from sklearn.multiclass import OutputCodeClassifier
-    >>> from sklearn.ensemble import RandomForestClassifier
-    >>> from sklearn.datasets import make_classification
-    >>> X, y = make_classification(n_samples=100, n_features=4,
-    ...                            n_informative=2, n_redundant=0,
-    ...                            random_state=0, shuffle=False)
-    >>> clf = OutputCodeClassifier(
-    ...     estimator=RandomForestClassifier(random_state=0),
-    ...     random_state=0).fit(X, y)
-    >>> clf.predict([[0, 0, 0, 0]])
-    array([1])
+    OneVsRestClassifier : One-vs-all multiclass strategy.
+    OneVsOneClassifier : One-vs-one multiclass strategy.
 
     References
     ----------
@@ -1005,6 +996,20 @@ class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
     .. [3] "The Elements of Statistical Learning",
        Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)
        2008.
+
+    Examples
+    --------
+    >>> from sklearn.multiclass import OutputCodeClassifier
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> from sklearn.datasets import make_classification
+    >>> X, y = make_classification(n_samples=100, n_features=4,
+    ...                            n_informative=2, n_redundant=0,
+    ...                            random_state=0, shuffle=False)
+    >>> clf = OutputCodeClassifier(
+    ...     estimator=RandomForestClassifier(random_state=0),
+    ...     random_state=0).fit(X, y)
+    >>> clf.predict([[0, 0, 0, 0]])
+    array([1])
     """
 
     def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None):
@@ -1021,12 +1026,13 @@ def fit(self, X, y):
         X : (sparse) array-like of shape (n_samples, n_features)
             Data.
 
-        y : numpy array of shape [n_samples]
+        y : array-like of shape (n_samples,)
             Multi-class targets.
 
         Returns
         -------
-        self
+        self : object
+            Returns a fitted instance of self.
         """
         y = self._validate_data(X="no_validation", y=y)
 
@@ -1085,7 +1091,7 @@ def predict(self, X):
 
         Returns
         -------
-        y : numpy array of shape [n_samples]
+        y : ndarray of shape (n_samples,)
             Predicted multi-class targets.
         """
         check_is_fitted(self)

From 1b800b00e9ad3496da05641f365d010495586aa7 Mon Sep 17 00:00:00 2001
From: Dimitri Papadopoulos Orfanos
 <3234522+DimitriPapadopoulos@users.noreply.github.com>
Date: Fri, 17 Sep 2021 19:04:54 +0200
Subject: [PATCH 41/49] DOC Typos found by codespell (#21069)

---
 benchmarks/bench_mnist.py                                 | 2 +-
 benchmarks/bench_random_projections.py                    | 6 +++---
 build_tools/azure/posix-docker.yml                        | 2 +-
 build_tools/circle/list_versions.py                       | 2 +-
 build_tools/shared.sh                                     | 2 +-
 doc/common_pitfalls.rst                                   | 2 +-
 doc/conf.py                                               | 2 +-
 doc/developers/advanced_installation.rst                  | 2 +-
 doc/developers/contributing.rst                           | 2 +-
 doc/developers/maintainer.rst                             | 4 ++--
 doc/install.rst                                           | 4 ++--
 doc/modules/compose.rst                                   | 2 +-
 doc/modules/cross_decomposition.rst                       | 2 +-
 doc/modules/cross_validation.rst                          | 2 +-
 doc/modules/decomposition.rst                             | 2 +-
 doc/modules/lda_qda.rst                                   | 4 ++--
 doc/modules/model_evaluation.rst                          | 2 +-
 doc/modules/outlier_detection.rst                         | 2 +-
 doc/modules/sgd.rst                                       | 4 ++--
 doc/modules/svm.rst                                       | 2 +-
 doc/roadmap.rst                                           | 2 +-
 doc/themes/scikit-learn-modern/static/css/theme.css       | 2 +-
 doc/tutorial/machine_learning_map/ML_MAPS_README.txt      | 2 +-
 doc/tutorial/machine_learning_map/pyparsing.py            | 2 +-
 doc/whats_new/v0.16.rst                                   | 2 +-
 doc/whats_new/v0.20.rst                                   | 2 +-
 doc/whats_new/v0.21.rst                                   | 2 +-
 doc/whats_new/v0.23.rst                                   | 2 +-
 doc/whats_new/v0.24.rst                                   | 2 +-
 .../applications/plot_cyclical_feature_engineering.py     | 2 +-
 examples/calibration/plot_calibration_multiclass.py       | 2 +-
 examples/covariance/plot_mahalanobis_distances.py         | 2 +-
 examples/cross_decomposition/plot_pcr_vs_pls.py           | 2 +-
 .../ensemble/plot_gradient_boosting_early_stopping.py     | 4 ++--
 examples/ensemble/plot_gradient_boosting_quantile.py      | 2 +-
 .../plot_linear_model_coefficient_interpretation.py       | 4 ++--
 .../kernel_approximation/plot_scalable_poly_kernels.py    | 6 +++---
 .../plot_poisson_regression_non_normal_loss.py            | 2 +-
 examples/linear_model/plot_quantile_regression.py         | 2 +-
 .../plot_tweedie_regression_insurance_claims.py           | 2 +-
 .../miscellaneous/plot_display_object_visualization.py    | 2 +-
 .../plot_partial_dependence_visualization_api.py          | 2 +-
 examples/model_selection/plot_grid_search_stats.py        | 6 +++---
 examples/neighbors/plot_lof_novelty_detection.py          | 2 +-
 examples/neighbors/plot_lof_outlier_detection.py          | 2 +-
 .../release_highlights/plot_release_highlights_0_23_0.py  | 2 +-
 setup.py                                                  | 2 +-
 sklearn/_loss/glm_distribution.py                         | 6 +++---
 sklearn/_min_dependencies.py                              | 2 +-
 sklearn/calibration.py                                    | 2 +-
 sklearn/cluster/_bicluster.py                             | 2 +-
 sklearn/cluster/_birch.py                                 | 4 ++--
 sklearn/cluster/_k_means_elkan.pyx                        | 4 ++--
 sklearn/cluster/_k_means_lloyd.pyx                        | 2 +-
 sklearn/cluster/_kmeans.py                                | 8 ++++----
 sklearn/compose/tests/test_column_transformer.py          | 2 +-
 sklearn/covariance/_shrunk_covariance.py                  | 2 +-
 sklearn/cross_decomposition/_pls.py                       | 2 +-
 sklearn/cross_decomposition/tests/test_pls.py             | 2 +-
 sklearn/datasets/_base.py                                 | 4 ++--
 sklearn/datasets/descr/linnerud.rst                       | 2 +-
 sklearn/datasets/tests/test_openml.py                     | 2 +-
 sklearn/decomposition/_dict_learning.py                   | 2 +-
 sklearn/decomposition/_factor_analysis.py                 | 2 +-
 sklearn/decomposition/_nmf.py                             | 4 ++--
 sklearn/discriminant_analysis.py                          | 4 ++--
 sklearn/ensemble/_gb.py                                   | 6 +++---
 .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 2 +-
 sklearn/ensemble/_hist_gradient_boosting/histogram.pyx    | 2 +-
 sklearn/ensemble/_hist_gradient_boosting/splitting.pyx    | 4 ++--
 .../ensemble/_hist_gradient_boosting/tests/test_grower.py | 6 +++---
 sklearn/ensemble/tests/test_forest.py                     | 2 +-
 .../tests/test_gradient_boosting_loss_functions.py        | 2 +-
 sklearn/externals/_arff.py                                | 2 +-
 sklearn/feature_extraction/tests/test_text.py             | 2 +-
 sklearn/feature_extraction/text.py                        | 2 +-
 sklearn/gaussian_process/kernels.py                       | 2 +-
 sklearn/impute/_iterative.py                              | 2 +-
 sklearn/inspection/_partial_dependence.py                 | 2 +-
 sklearn/inspection/_plot/partial_dependence.py            | 4 ++--
 sklearn/inspection/tests/test_permutation_importance.py   | 2 +-
 sklearn/linear_model/_glm/tests/test_glm.py               | 4 ++--
 sklearn/linear_model/_ransac.py                           | 2 +-
 sklearn/linear_model/_ridge.py                            | 2 +-
 sklearn/linear_model/_sag_fast.pyx.tp                     | 4 ++--
 sklearn/linear_model/_stochastic_gradient.py              | 4 ++--
 sklearn/linear_model/tests/test_coordinate_descent.py     | 4 ++--
 sklearn/linear_model/tests/test_ransac.py                 | 2 +-
 sklearn/manifold/_barnes_hut_tsne.pyx                     | 2 +-
 sklearn/manifold/_t_sne.py                                | 2 +-
 sklearn/metrics/_classification.py                        | 4 ++--
 sklearn/metrics/_plot/roc_curve.py                        | 2 +-
 sklearn/metrics/_plot/tests/test_plot_precision_recall.py | 2 +-
 sklearn/metrics/_regression.py                            | 2 +-
 sklearn/metrics/tests/test_classification.py              | 2 +-
 sklearn/metrics/tests/test_common.py                      | 2 +-
 sklearn/metrics/tests/test_ranking.py                     | 4 ++--
 sklearn/model_selection/_search_successive_halving.py     | 8 ++++----
 sklearn/model_selection/tests/test_search.py              | 2 +-
 sklearn/model_selection/tests/test_successive_halving.py  | 2 +-
 sklearn/model_selection/tests/test_validation.py          | 4 ++--
 sklearn/neighbors/_classification.py                      | 2 +-
 sklearn/neighbors/_dist_metrics.pyx                       | 2 +-
 sklearn/neighbors/_quad_tree.pxd                          | 2 +-
 sklearn/neighbors/_quad_tree.pyx                          | 6 +++---
 sklearn/neural_network/tests/test_mlp.py                  | 2 +-
 sklearn/preprocessing/_data.py                            | 8 ++++----
 sklearn/preprocessing/_polynomial.py                      | 2 +-
 sklearn/preprocessing/tests/test_data.py                  | 4 ++--
 sklearn/preprocessing/tests/test_polynomial.py            | 4 ++--
 sklearn/semi_supervised/_self_training.py                 | 2 +-
 sklearn/svm/src/libsvm/libsvm_sparse_helper.c             | 2 +-
 sklearn/tree/_classes.py                                  | 2 +-
 sklearn/tree/tests/test_tree.py                           | 2 +-
 sklearn/utils/__init__.py                                 | 4 ++--
 sklearn/utils/_pprint.py                                  | 2 +-
 sklearn/utils/_readonly_array_wrapper.pyx                 | 2 +-
 sklearn/utils/estimator_checks.py                         | 2 +-
 sklearn/utils/extmath.py                                  | 2 +-
 sklearn/utils/fixes.py                                    | 4 ++--
 sklearn/utils/tests/test_arpack.py                        | 2 +-
 sklearn/utils/tests/test_estimator_checks.py              | 2 +-
 sklearn/utils/tests/test_estimator_html_repr.py           | 6 +++---
 sklearn/utils/validation.py                               | 2 +-
 124 files changed, 173 insertions(+), 173 deletions(-)

diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py
index e0844e70475e4..84111d0a37969 100644
--- a/benchmarks/bench_mnist.py
+++ b/benchmarks/bench_mnist.py
@@ -6,7 +6,7 @@
 Benchmark on the MNIST dataset.  The dataset comprises 70,000 samples
 and 784 features. Here, we consider the task of predicting
 10 classes -  digits from 0 to 9 from their raw images. By contrast to the
-covertype dataset, the feature space is homogenous.
+covertype dataset, the feature space is homogeneous.
 
 Example of output :
     [..]
diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
index 7c03b3de365ca..6da894c0ead1e 100644
--- a/benchmarks/bench_random_projections.py
+++ b/benchmarks/bench_random_projections.py
@@ -43,10 +43,10 @@ def compute_time(t_start, delta):
     return delta.seconds + delta.microseconds / mu_second
 
 
-def bench_scikit_transformer(X, transfomer):
+def bench_scikit_transformer(X, transformer):
     gc.collect()
 
-    clf = clone(transfomer)
+    clf = clone(transformer)
 
     # start time
     t_start = datetime.now()
@@ -195,7 +195,7 @@ def print_row(clf_type, time_fit, time_transform):
     ###########################################################################
     n_nonzeros = int(opts.ratio_nonzeros * opts.n_features)
 
-    print("Dataset statics")
+    print("Dataset statistics")
     print("===========================")
     print("n_samples \t= %s" % opts.n_samples)
     print("n_features \t= %s" % opts.n_features)
diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml
index 443ba1d1434e0..18642f1f39b7b 100644
--- a/build_tools/azure/posix-docker.yml
+++ b/build_tools/azure/posix-docker.yml
@@ -39,7 +39,7 @@ jobs:
       ${{ insert }}: ${{ parameters.matrix }}
 
   steps:
-    # Container is detached and sleeping, allowing steps to run commmands
+    # Container is detached and sleeping, allowing steps to run commands
     # in the container. The TEST_DIR is mapped allowing the host to access
     # the JUNITXML file
     - script: >
diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index 1f7b39cdca32e..68e198f8bdb38 100755
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -34,7 +34,7 @@ def human_readable_data_quantity(quantity, multiple=1024):
 
 def get_file_extension(version):
     if "dev" in version:
-        # The 'dev' branch should be explictly handled
+        # The 'dev' branch should be explicitly handled
         return "zip"
 
     current_version = LooseVersion(version)
diff --git a/build_tools/shared.sh b/build_tools/shared.sh
index 4aa260675b9be..59592acbbb93b 100644
--- a/build_tools/shared.sh
+++ b/build_tools/shared.sh
@@ -5,7 +5,7 @@ get_dep() {
         # do not install with none
         echo
     elif [[ "${version%%[^0-9.]*}" ]]; then
-        # version number is explicity passed
+        # version number is explicitly passed
         echo "$package==$version"
     elif [[ "$version" == "latest" ]]; then
         # use latest
diff --git a/doc/common_pitfalls.rst b/doc/common_pitfalls.rst
index ac5dccb3b5609..308edb4c67c79 100644
--- a/doc/common_pitfalls.rst
+++ b/doc/common_pitfalls.rst
@@ -560,7 +560,7 @@ bad performance. Similarly, we want a random forest to be robust w.r.t the
 set of randomly selected features that each tree will be using.
 
 For these reasons, it is preferable to evaluate the cross-validation
-preformance by letting the estimator use a different RNG on each fold. This
+performance by letting the estimator use a different RNG on each fold. This
 is done by passing a `RandomState` instance (or `None`) to the estimator
 initialization.
 
diff --git a/doc/conf.py b/doc/conf.py
index 07ef95e2d470f..659a5d70fa32b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -240,7 +240,7 @@
     "release_highlights"
 ] = f"auto_examples/release_highlights/{latest_highlights}"
 
-# get version from higlight name assuming highlights have the form
+# get version from highlight name assuming highlights have the form
 # plot_release_highlights_0_22_0
 highlight_version = ".".join(latest_highlights.split("_")[-3:-1])
 html_context["release_highlights_version"] = highlight_version
diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index f46b899c22b62..f5944d371a550 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -374,7 +374,7 @@ isolation from the Python packages installed via the system packager. When
 using an isolated environment, ``pip3`` should be replaced by ``pip`` in the
 above commands.
 
-When precompiled wheels of the runtime dependencies are not avalaible for your
+When precompiled wheels of the runtime dependencies are not available for your
 architecture (e.g. ARM), you can install the system versions:
 
 .. prompt:: bash $
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index e063d7f9846f4..985a246eb73b4 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -1004,7 +1004,7 @@ installed in your current Python environment:
 
   asv run --python=same
 
-It's particulary useful when you installed scikit-learn in editable mode to
+It's particularly useful when you installed scikit-learn in editable mode to
 avoid creating a new environment each time you run the benchmarks. By default
 the results are not saved when using an existing installation. To save the
 results you must specify a commit hash:
diff --git a/doc/developers/maintainer.rst b/doc/developers/maintainer.rst
index 8fd439c984660..fdf191ef8843c 100644
--- a/doc/developers/maintainer.rst
+++ b/doc/developers/maintainer.rst
@@ -33,7 +33,7 @@ Before a release
 
    - ``maint_tools/sort_whats_new.py`` can put what's new entries into
      sections. It's not perfect, and requires manual checking of the changes.
-     If the whats new list is well curated, it may not be necessary.
+     If the what's new list is well curated, it may not be necessary.
 
    - The ``maint_tools/whats_missing.sh`` script may be used to identify pull
      requests that were merged but likely missing from What's New.
@@ -198,7 +198,7 @@ Making a release
   `Continuous Integration
   <https://en.wikipedia.org/wiki/Continuous_integration>`_. The CD workflow on
   GitHub Actions is also used to automatically create nightly builds and
-  publish packages for the developement branch of scikit-learn. See
+  publish packages for the development branch of scikit-learn. See
   :ref:`install_nightly_builds`.
 
 4. Once all the CD jobs have completed successfully in the PR, merge it,
diff --git a/doc/install.rst b/doc/install.rst
index d0b0f50e78f90..808609b96586f 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -158,7 +158,7 @@ Installing on Apple Silicon M1 hardware
 
 The recently introduced `macos/arm64` platform (sometimes also known as
 `macos/aarch64`) requires the open source community to upgrade the build
-configuation and automation to properly support it.
+configuration and automation to properly support it.
 
 At the time of writing (January 2021), the only way to get a working
 installation of scikit-learn on this hardware is to install scikit-learn and its
@@ -204,7 +204,7 @@ It can be installed by typing the following command:
 Debian/Ubuntu
 -------------
 
-The Debian/Ubuntu package is splitted in three different packages called
+The Debian/Ubuntu package is split in three different packages called
 ``python3-sklearn`` (python modules), ``python3-sklearn-lib`` (low-level
 implementations and bindings), ``python3-sklearn-doc`` (documentation).
 Only the Python 3 version is available in the Debian Buster (the more recent
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index d4dacb6d723ea..6f986b2c6a4ec 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -573,7 +573,7 @@ many estimators. This visualization is activated by setting the
 
   >>> from sklearn import set_config
   >>> set_config(display='diagram')   # doctest: +SKIP
-  >>> # diplays HTML representation in a jupyter context
+  >>> # displays HTML representation in a jupyter context
   >>> column_trans  # doctest: +SKIP
 
 An example of the HTML output can be seen in the
diff --git a/doc/modules/cross_decomposition.rst b/doc/modules/cross_decomposition.rst
index 981f7d98fbbde..5c9aed46e66ea 100644
--- a/doc/modules/cross_decomposition.rst
+++ b/doc/modules/cross_decomposition.rst
@@ -64,7 +64,7 @@ Set :math:`X_1` to :math:`X` and :math:`Y_1` to :math:`Y`. Then, for each
   :math:`C = X_k^T Y_k`.
   :math:`u_k` and :math:`v_k` are called the *weights*.
   By definition, :math:`u_k` and :math:`v_k` are
-  choosen so that they maximize the covariance between the projected
+  chosen so that they maximize the covariance between the projected
   :math:`X_k` and the projected target, that is :math:`\text{Cov}(X_k u_k,
   Y_k v_k)`.
 - b) Project :math:`X_k` and :math:`Y_k` on the singular vectors to obtain
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 36d3d842540c0..0e4b3580c5735 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -974,7 +974,7 @@ test is therefore only able to show when the model reliably outperforms
 random guessing.
 
 Finally, :func:`~sklearn.model_selection.permutation_test_score` is computed
-using brute force and interally fits ``(n_permutations + 1) * n_cv`` models.
+using brute force and internally fits ``(n_permutations + 1) * n_cv`` models.
 It is therefore only tractable with small datasets for which fitting an
 individual model is very fast.
 
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index f90948fdc66b2..a76810bbead6f 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -829,7 +829,7 @@ and the intensity of the regularization with the :attr:`alpha_W` and :attr:`alph
 (:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are scaled by the number
 of samples (:math:`n\_samples`) for `H` and the number of features (:math:`n\_features`)
 for `W` to keep their impact balanced with respect to one another and to the data fit
-term as independant as possible of the size of the training set. Then the priors terms
+term as independent as possible of the size of the training set. Then the priors terms
 are:
 
 .. math::
diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst
index 076c8fbb38498..02b6c88cb7001 100644
--- a/doc/modules/lda_qda.rst
+++ b/doc/modules/lda_qda.rst
@@ -187,7 +187,7 @@ an estimate for the covariance matrix). Setting this parameter to a value
 between these two extrema will estimate a shrunk version of the covariance
 matrix.
 
-The shrinked Ledoit and Wolf estimator of covariance may not always be the
+The shrunk Ledoit and Wolf estimator of covariance may not always be the
 best choice. For example if the distribution of the data
 is normally distributed, the
 Oracle Shrinkage Approximating estimator :class:`sklearn.covariance.OAS`
@@ -234,7 +234,7 @@ For QDA, the use of the SVD solver relies on the fact that the covariance
 matrix :math:`\Sigma_k` is, by definition, equal to :math:`\frac{1}{n - 1}
 X_k^tX_k = \frac{1}{n - 1} V S^2 V^t` where :math:`V` comes from the SVD of the (centered)
 matrix: :math:`X_k = U S V^t`. It turns out that we can compute the
-log-posterior above without having to explictly compute :math:`\Sigma`:
+log-posterior above without having to explicitly compute :math:`\Sigma`:
 computing :math:`S` and :math:`V` via the SVD of :math:`X` is enough. For
 LDA, two SVDs are computed: the SVD of the centered input matrix :math:`X`
 and the SVD of the class-wise mean vectors.
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index f39b29eb8ea86..1f80a30312588 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -2381,7 +2381,7 @@ of 0.0.
 A scorer object with a specific choice of ``power`` can be built by::
 
   >>> from sklearn.metrics import d2_tweedie_score, make_scorer
-  >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, pwoer=1.5)
+  >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5)
 
 .. _pinball_loss:
 
diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst
index 496b840e0c6da..cce39e4080071 100644
--- a/doc/modules/outlier_detection.rst
+++ b/doc/modules/outlier_detection.rst
@@ -332,7 +332,7 @@ chosen 1) greater than the minimum number of objects a cluster has to contain,
 so that other objects can be local outliers relative to this cluster, and 2)
 smaller than the maximum number of close by objects that can potentially be
 local outliers.
-In practice, such informations are generally not available, and taking
+In practice, such information is generally not available, and taking
 n_neighbors=20 appears to work well in general.
 When the proportion of outliers is high (i.e. greater than 10 \%, as in the
 example below), n_neighbors should be greater (n_neighbors=35 in the example
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index 0b618289b84ec..49a1008eae23f 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -123,7 +123,7 @@ Please refer to the :ref:`mathematical section below
 The first two loss functions are lazy, they only update the model
 parameters if an example violates the margin constraint, which makes
 training very efficient and may result in sparser models (i.e. with more zero
-coefficents), even when L2 penalty is used.
+coefficients), even when L2 penalty is used.
 
 Using ``loss="log"`` or ``loss="modified_huber"`` enables the
 ``predict_proba`` method, which gives a vector of probability estimates
@@ -408,7 +408,7 @@ parameters, we minimize the regularized training error given by
 where :math:`L` is a loss function that measures model (mis)fit and
 :math:`R` is a regularization term (aka penalty) that penalizes model
 complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls
-the regularization stength.
+the regularization strength.
 
 Different choices for :math:`L` entail different classifiers or regressors:
 
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index fcf1d3e23976b..085d52b89ea8b 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -623,7 +623,7 @@ misclassified or within the margin boundary. Ideally, the value :math:`y_i
 (w^T \phi (x_i) + b)` would be :math:`\geq 1` for all samples, which
 indicates a perfect prediction. But problems are usually not always perfectly
 separable with a hyperplane, so we allow some samples to be at a distance :math:`\zeta_i` from
-their correct margin boundary. The penalty term `C` controls the strengh of
+their correct margin boundary. The penalty term `C` controls the strength of
 this penalty, and as a result, acts as an inverse regularization parameter
 (see note below).
 
diff --git a/doc/roadmap.rst b/doc/roadmap.rst
index 2bead90522739..df8811b968d7e 100644
--- a/doc/roadmap.rst
+++ b/doc/roadmap.rst
@@ -51,7 +51,7 @@ external to the core library.
   (i.e. rectangular data largely invariant to column and row order;
   predicting targets with simple structure)
 * improve the ease for users to develop and publish external components
-* improve inter-operability with modern data science tools (e.g. Pandas, Dask)
+* improve interoperability with modern data science tools (e.g. Pandas, Dask)
   and infrastructures (e.g. distributed processing)
 
 Many of the more fine-grained goals can be found under the `API tag
diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index 4d2b78c6a7322..b3765285a4b07 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -1237,7 +1237,7 @@ table.sk-sponsor-table td {
   text-align: center
 }
 
-/* pygments - highlightning */
+/* pygments - highlighting */
 
 .highlight .hll { background-color: #ffffcc }
 .highlight  { background: #f8f8f8; }
diff --git a/doc/tutorial/machine_learning_map/ML_MAPS_README.txt b/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
index 47fe633767995..114ecb2d13f59 100644
--- a/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
+++ b/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
@@ -7,7 +7,7 @@ by Andreas Mueller:
 
 (https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html)
 
-The image is made interactive using an imagemap, and uses the jQuery Map Hilight plugin module
+The image is made interactive using an imagemap, and uses the jQuery Map Highlight plugin module
 by David Lynch (https://davidlynch.org/projects/maphilight/docs/) to highlight
 the different items on the image upon mouseover.
 
diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
index 0c5fca5cf891d..a0f4a66c7291e 100644
--- a/doc/tutorial/machine_learning_map/pyparsing.py
+++ b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -2836,7 +2836,7 @@ class QuotedString(Token):
     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
         super(QuotedString,self).__init__()
 
-        # remove white space from quote chars - wont work anyway
+        # remove white space from quote chars - won't work anyway
         quoteChar = quoteChar.strip()
         if not quoteChar:
             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
diff --git a/doc/whats_new/v0.16.rst b/doc/whats_new/v0.16.rst
index 931c7e0fbb923..a9c9f0b2614fd 100644
--- a/doc/whats_new/v0.16.rst
+++ b/doc/whats_new/v0.16.rst
@@ -54,7 +54,7 @@ Highlights
 
 - Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
 
-- Probability callibration of classifiers using
+- Probability calibration of classifiers using
   :class:`calibration.CalibratedClassifierCV`.
 
 - :class:`cluster.Birch` clustering method for large-scale datasets.
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 2eaf3199fbc3c..add4d97f6de09 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -1286,7 +1286,7 @@ Support for Python 3.3 has been officially dropped.
   be used for novelty detection, i.e. predict on new unseen data. Available
   prediction methods are ``predict``, ``decision_function`` and
   ``score_samples``. By default, ``novelty`` is set to ``False``, and only
-  the ``fit_predict`` method is avaiable.
+  the ``fit_predict`` method is available.
   By :user:`Albert Thomas <albertcthomas>`.
 
 - |Fix| Fixed a bug in :class:`neighbors.NearestNeighbors` where fitting a
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 8012fd02b4733..daedf3d3808f6 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -1060,7 +1060,7 @@ These changes mostly affect library developers.
 
 - Add ``check_fit_idempotent`` to
   :func:`~utils.estimator_checks.check_estimator`, which checks that
-  when `fit` is called twice with the same data, the ouput of
+  when `fit` is called twice with the same data, the output of
   `predict`, `predict_proba`, `transform`, and `decision_function` does not
   change. :pr:`12328` by :user:`Nicolas Hug <NicolasHug>`
 
diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index 598d9adc5cef4..ebf63eac5b8a3 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -341,7 +341,7 @@ Changelog
   :pr:`16006` by :user:`Rushabh Vasani <rushabh-v>`.
 
 - |API| The `StreamHandler` was removed from `sklearn.logger` to avoid
-  double logging of messages in common cases where a hander is attached
+  double logging of messages in common cases where a handler is attached
   to the root logger, and to follow the Python logging documentation
   recommendation for libraries to leave the log message handling to
   users and application code. :pr:`16451` by :user:`Christoph Deil <cdeil>`.
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 9303a98ea4c97..f602303d1fcb0 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -713,7 +713,7 @@ Changelog
   :user:`Joseph Willard <josephwillard>`
 
 - |Fix| bug in :func:`metrics.hinge_loss` where error occurs when
-  ``y_true`` is missing some labels that are provided explictly in the
+  ``y_true`` is missing some labels that are provided explicitly in the
   ``labels`` parameter.
   :pr:`17935` by :user:`Cary Goltermann <Ultramann>`.
 
diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py
index c27fe5f1e0494..2d57c1a7b41b5 100644
--- a/examples/applications/plot_cyclical_feature_engineering.py
+++ b/examples/applications/plot_cyclical_feature_engineering.py
@@ -215,7 +215,7 @@
 # %%
 #
 # Lets evaluate our gradient boosting model with the mean absolute error of the
-# relative demand averaged accross our 5 time-based cross-validation splits:
+# relative demand averaged across our 5 time-based cross-validation splits:
 
 
 def evaluate(model, X, y, cv):
diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py
index 5aaeedbb079fe..ef1a53056009d 100644
--- a/examples/calibration/plot_calibration_multiclass.py
+++ b/examples/calibration/plot_calibration_multiclass.py
@@ -178,7 +178,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3).
 print(f" * calibrated classifier: {cal_score:.3f}")
 
 # %%
-# Finally we generate a grid of possibile uncalibrated probabilities over
+# Finally we generate a grid of possible uncalibrated probabilities over
 # the 2-simplex, compute the corresponding calibrated probabilities and
 # plot arrows for each. The arrows are colored according the highest
 # uncalibrated probability. This illustrates the learned calibration map:
diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py
index b517db5fa9613..ab2e9fe8471d5 100644
--- a/examples/covariance/plot_mahalanobis_distances.py
+++ b/examples/covariance/plot_mahalanobis_distances.py
@@ -70,7 +70,7 @@
 # are Gaussian distributed with mean of 0 but feature 1 has a standard
 # deviation equal to 2 and feature 2 has a standard deviation equal to 1. Next,
 # 25 samples are replaced with Gaussian outlier samples where feature 1 has
-# a standard devation equal to 1 and feature 2 has a standard deviation equal
+# a standard deviation equal to 1 and feature 2 has a standard deviation equal
 # to 7.
 
 import numpy as np
diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py
index 17a9cce651a73..cc22f3bd0ebc6 100644
--- a/examples/cross_decomposition/plot_pcr_vs_pls.py
+++ b/examples/cross_decomposition/plot_pcr_vs_pls.py
@@ -134,7 +134,7 @@
 #
 # On the other hand, the PLS regressor manages to capture the effect of the
 # direction with the lowest variance, thanks to its use of target information
-# during the transformation: it can recogize that this direction is actually
+# during the transformation: it can recognize that this direction is actually
 # the most predictive. We note that the first PLS component is negatively
 # correlated with the target, which comes from the fact that the signs of
 # eigenvectors are arbitrary.
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index ab3acdf6ce08f..6f38e57a15ca1 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -17,7 +17,7 @@
 model is trained using the training set and evaluated using the validation set.
 When each additional stage of regression tree is added, the validation set is
 used to score the model.  This is continued until the scores of the model in
-the last ``n_iter_no_change`` stages do not improve by atleast `tol`. After
+the last ``n_iter_no_change`` stages do not improve by at least `tol`. After
 that the model is considered to have converged and further addition of stages
 is "stopped early".
 
@@ -64,7 +64,7 @@
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                         random_state=0)
 
-    # We specify that if the scores don't improve by atleast 0.01 for the last
+    # We specify that if the scores don't improve by at least 0.01 for the last
     # 10 stages, stop fitting additional stages
     gbes = ensemble.GradientBoostingClassifier(n_estimators=n_estimators,
                                                validation_fraction=0.2,
diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
index 00be70721c1da..67e208ece0b06 100644
--- a/examples/ensemble/plot_gradient_boosting_quantile.py
+++ b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -184,7 +184,7 @@ def highlight_min(x):
 # the fact the squared error estimator is very sensitive to large outliers
 # which can cause significant overfitting. This can be seen on the right hand
 # side of the previous plot. The conditional median estimator is biased
-# (underestimation for this asymetric noise) but is also naturally robust to
+# (underestimation for this asymmetric noise) but is also naturally robust to
 # outliers and overfits less.
 #
 # Calibration of the confidence interval
diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py
index 89c27110e5d16..69b52c6b5ce3f 100644
--- a/examples/inspection/plot_linear_model_coefficient_interpretation.py
+++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py
@@ -354,7 +354,7 @@
 
 # %%
 # Two regions are populated: when the EXPERIENCE coefficient is
-# positive the AGE one is negative and viceversa.
+# positive the AGE one is negative and vice-versa.
 #
 # To go further we remove one of the 2 features and check what is the impact
 # on the model stability.
@@ -664,7 +664,7 @@
 # It is important to keep in mind that the coefficients that have been
 # dropped may still be related to the outcome by themselves: the model
 # chose to suppress them because they bring little or no additional
-# information on top of the other features. Additionnaly, this selection
+# information on top of the other features. Additionally, this selection
 # is unstable for correlated features, and should be interpreted with
 # caution.
 #
diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py
index 845ba1fdf3050..7d026dbcf16d6 100644
--- a/examples/kernel_approximation/plot_scalable_poly_kernels.py
+++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py
@@ -1,6 +1,6 @@
 """
 =======================================================
-Scalable learning with polynomial kernel aproximation
+Scalable learning with polynomial kernel approximation
 =======================================================
 
 This example illustrates the use of :class:`PolynomialCountSketch` to
@@ -143,7 +143,7 @@
 print(f"Kernel-SVM score on raw featrues: {ksvm_score:.2f}%")
 
 # %%
-# Finally, plot the resuts of the different methods against their training
+# Finally, plot the results of the different methods against their training
 # times. As we can see, the kernelized SVM achieves a higher accuracy,
 # but its training time is much larger and, most importantly, will grow
 # much faster if the number of training samples increases.
@@ -170,7 +170,7 @@
            label="Kernel SVM", c="red", marker="x")
 
 ax.set_xlabel("Training time (s)")
-ax.set_ylabel("Accurary (%)")
+ax.set_ylabel("Accuracy (%)")
 ax.legend()
 plt.show()
 
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 7ebda543b4059..570baee9e1f67 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -440,7 +440,7 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 # well calibrated (to estimate the mean frequency of the entire population).
 #
 # The ``Ridge`` regression model can predict very low expected frequencies that
-# do not match the data. It can therefore severly under-estimate the risk for
+# do not match the data. It can therefore severely under-estimate the risk for
 # some policyholders.
 #
 # ``PoissonRegressor`` and ``HistGradientBoostingRegressor`` show better
diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py
index 404d7a314d553..e1f1d484bf6b5 100644
--- a/examples/linear_model/plot_quantile_regression.py
+++ b/examples/linear_model/plot_quantile_regression.py
@@ -25,7 +25,7 @@
 # ------------------
 #
 # To illustrate the behaviour of quantile regression, we will generate two
-# synthetic datasets. The true generative random processess for both datasets
+# synthetic datasets. The true generative random processes for both datasets
 # will be composed by the same expected value with a linear relationship with a
 # single feature `x`.
 import numpy as np
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 35ced1a58983c..8edf97d0738a9 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -252,7 +252,7 @@ def score_estimator(
 #
 # The number of claims (``ClaimNb``) is a positive integer (0 included).
 # Thus, this target can be modelled by a Poisson distribution.
-# It is then assumed to be the number of discrete events occuring with a
+# It is then assumed to be the number of discrete events occurring with a
 # constant rate in a given time interval (``Exposure``, in units of years).
 # Here we model the frequency ``y = ClaimNb / Exposure``, which is still a
 # (scaled) Poisson distribution, and use ``Exposure`` as `sample_weight`.
diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py
index 3a16dfa7d1a68..a05f17fc9aba5 100644
--- a/examples/miscellaneous/plot_display_object_visualization.py
+++ b/examples/miscellaneous/plot_display_object_visualization.py
@@ -22,7 +22,7 @@
 # `OpenML <https://www.openml.org/d/1464>`. This is a binary classification
 # problem where the target is whether an individual donated blood. Then the
 # data is split into a train and test dataset and a logistic regression is
-# fitted wtih the train dataset.
+# fitted with the train dataset.
 from sklearn.datasets import fetch_openml
 from sklearn.preprocessing import StandardScaler
 from sklearn.pipeline import make_pipeline
diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py
index a2219c4cb1c13..342ba14a338b1 100644
--- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py
+++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py
@@ -57,7 +57,7 @@
 tree_disp = PartialDependenceDisplay.from_estimator(tree, X, ["age", "bmi"], ax=ax)
 
 # %%
-# The partial depdendence curves can be plotted for the multi-layer perceptron.
+# The partial dependence curves can be plotted for the multi-layer perceptron.
 # In this case, `line_kw` is passed to
 # :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to change the
 # color of the curve.
diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py
index e0620deb73e2b..1b434ec0e8b6e 100644
--- a/examples/model_selection/plot_grid_search_stats.py
+++ b/examples/model_selection/plot_grid_search_stats.py
@@ -80,7 +80,7 @@
 # We can see that the estimator using the `'rbf'` kernel performed best,
 # closely followed by `'linear'`. Both estimators with a `'poly'` kernel
 # performed worse, with the one using a two-degree polynomial achieving a much
-# lower perfomance than all other models.
+# lower performance than all other models.
 #
 # Usually, the analysis just ends here, but half the story is missing. The
 # output of :class:`~sklearn.model_selection.GridSearchCV` does not provide
@@ -101,7 +101,7 @@
 # Let's inspect this partition effect by plotting the performance of all models
 # in each fold, and calculating the correlation between models across folds:
 
-# create df of model scores ordered by perfomance
+# create df of model scores ordered by performance
 model_scores = results_df.filter(regex=r'split\d*_test_score')
 
 # plot 30 examples of dependency between cv fold and AUC scores
@@ -146,7 +146,7 @@
 # described in the previous section. We will use the one proven to obtain the
 # highest replicability scores (which rate how similar the performance of a
 # model is when evaluating it on different random partitions of the same
-# dataset) while mantaining a low rate of false postitives and false negatives:
+# dataset) while maintaining a low rate of false positives and false negatives:
 # the Nadeau and Bengio's corrected t-test [2]_ that uses a 10 times repeated
 # 10-fold cross validation [3]_.
 #
diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py
index 71c0736a256a5..f1129d0bd64e6 100644
--- a/examples/neighbors/plot_lof_novelty_detection.py
+++ b/examples/neighbors/plot_lof_novelty_detection.py
@@ -20,7 +20,7 @@
 so that other samples can be local outliers relative to this cluster, and 2)
 smaller than the maximum number of close by samples that can potentially be
 local outliers.
-In practice, such informations are generally not available, and taking
+In practice, such information is generally not available, and taking
 n_neighbors=20 appears to work well in general.
 """
 
diff --git a/examples/neighbors/plot_lof_outlier_detection.py b/examples/neighbors/plot_lof_outlier_detection.py
index 6f0e5bb490b9b..4bb2949bcdcd7 100644
--- a/examples/neighbors/plot_lof_outlier_detection.py
+++ b/examples/neighbors/plot_lof_outlier_detection.py
@@ -19,7 +19,7 @@
 so that other samples can be local outliers relative to this cluster, and 2)
 smaller than the maximum number of close by samples that can potentially be
 local outliers.
-In practice, such informations are generally not available, and taking
+In practice, such information is generally not available, and taking
 n_neighbors=20 appears to work well in general.
 """
 
diff --git a/examples/release_highlights/plot_release_highlights_0_23_0.py b/examples/release_highlights/plot_release_highlights_0_23_0.py
index 364cd7958003e..409c41a035540 100644
--- a/examples/release_highlights/plot_release_highlights_0_23_0.py
+++ b/examples/release_highlights/plot_release_highlights_0_23_0.py
@@ -110,7 +110,7 @@
 # Various improvements were made to
 # :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
 # :class:`~sklearn.ensemble.HistGradientBoostingRegressor`. On top of the
-# Poisson loss mentionned above, these estimators now support :ref:`sample
+# Poisson loss mentioned above, these estimators now support :ref:`sample
 # weights <sw_hgbdt>`. Also, an automatic early-stopping criterion was added:
 # early-stopping is enabled by default when the number of samples exceeds 10k.
 # Finally, users can now define :ref:`monotonic constraints
diff --git a/setup.py b/setup.py
index 85e7a52be34f6..9bced528b6e11 100755
--- a/setup.py
+++ b/setup.py
@@ -183,7 +183,7 @@ def configuration(parent_package="", top_path=None):
 
     config = Configuration(None, parent_package, top_path)
 
-    # Avoid non-useful msg:
+    # Avoid useless msg:
     # "Ignoring attempt to set 'name' (from ... "
     config.set_options(
         ignore_setup_xxx_py=True,
diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py
index cfdd42dede46f..dfc512c8b10b7 100644
--- a/sklearn/_loss/glm_distribution.py
+++ b/sklearn/_loss/glm_distribution.py
@@ -279,7 +279,7 @@ def unit_deviance(self, y, y_pred, check_input=False):
                 )
             )
             if p < 0:
-                # 'Extreme stable', y any realy number, y_pred > 0
+                # 'Extreme stable', y any real number, y_pred > 0
                 if (y_pred <= 0).any():
                     raise ValueError(message + "strictly positive y_pred.")
             elif p == 0:
@@ -290,7 +290,7 @@ def unit_deviance(self, y, y_pred, check_input=False):
                     "Tweedie deviance is only defined for power<=0 and power>=1."
                 )
             elif 1 <= p < 2:
-                # Poisson and Compount poisson distribution, y >= 0, y_pred > 0
+                # Poisson and compound Poisson distribution, y >= 0, y_pred > 0
                 if (y < 0).any() or (y_pred <= 0).any():
                     raise ValueError(
                         message + "non-negative y and strictly positive y_pred."
@@ -304,7 +304,7 @@ def unit_deviance(self, y, y_pred, check_input=False):
                 raise ValueError
 
         if p < 0:
-            # 'Extreme stable', y any realy number, y_pred > 0
+            # 'Extreme stable', y any real number, y_pred > 0
             dev = 2 * (
                 np.power(np.maximum(y, 0), 2 - p) / ((1 - p) * (2 - p))
                 - y * np.power(y_pred, 1 - p) / (1 - p)
diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py
index dac599d9f0ebe..85a5d8555ae1e 100644
--- a/sklearn/_min_dependencies.py
+++ b/sklearn/_min_dependencies.py
@@ -18,7 +18,7 @@
 
 # 'build' and 'install' is included to have structured metadata for CI.
 # It will NOT be included in setup's extras_require
-# The values are (version_spec, comma seperated tags)
+# The values are (version_spec, comma separated tags)
 dependent_packages = {
     "numpy": (NUMPY_MIN_VERSION, "build, install"),
     "scipy": (SCIPY_MIN_VERSION, "build, install"),
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index fe5e21577a434..66fa5d05687ab 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -127,7 +127,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator)
 
         If `True`, the `base_estimator` is fitted using training data and
         calibrated using testing data, for each `cv` fold. The final estimator
-        is an ensemble of `n_cv` fitted classifer and calibrator pairs, where
+        is an ensemble of `n_cv` fitted classifier and calibrator pairs, where
         `n_cv` is the number of cross-validation folds. The output is the
         average predicted probabilities of all pairs.
 
diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py
index ee77d135a3107..da7d0733267eb 100644
--- a/sklearn/cluster/_bicluster.py
+++ b/sklearn/cluster/_bicluster.py
@@ -195,7 +195,7 @@ def _more_tags(self):
                 "check_estimator_sparse_data": "does not fail gracefully",
                 "check_methods_subset_invariance": "empty array passed inside",
                 "check_dont_overwrite_parameters": "empty array passed inside",
-                "check_fit2d_predict1d": "emptry array passed inside",
+                "check_fit2d_predict1d": "empty array passed inside",
             }
         }
 
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index 4f0e7200c2734..78c15bb8e1a15 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -512,7 +512,7 @@ def fit(self, X, y=None):
         self
             Fitted estimator.
         """
-        # TODO: Remove deprected flags in 1.2
+        # TODO: Remove deprecated flags in 1.2
         self._deprecated_fit, self._deprecated_partial_fit = True, False
         return self._fit(X, partial=False)
 
@@ -616,7 +616,7 @@ def partial_fit(self, X=None, y=None):
         self
             Fitted estimator.
         """
-        # TODO: Remove deprected flags in 1.2
+        # TODO: Remove deprecated flags in 1.2
         self._deprecated_partial_fit, self._deprecated_fit = True, False
         if X is None:
             # Perform just the final global clustering step.
diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
index fea22d9184c7e..9459d5e9fc316 100644
--- a/sklearn/cluster/_k_means_elkan.pyx
+++ b/sklearn/cluster/_k_means_elkan.pyx
@@ -257,7 +257,7 @@ def elkan_iter_chunked_dense(
         int n_clusters = centers_new.shape[0]
 
         # hard-coded number of samples per chunk. Splitting in chunks is
-        # necessary to get parallelism. Chunk size chosed to be same as lloyd's
+        # necessary to get parallelism. Chunk size chosen to be same as lloyd's
         int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
         int n_chunks = n_samples // n_samples_chunk
         int n_samples_rem = n_samples % n_samples_chunk
@@ -486,7 +486,7 @@ def elkan_iter_chunked_sparse(
         int[::1] X_indptr = X.indptr
 
         # hard-coded number of samples per chunk. Splitting in chunks is
-        # necessary to get parallelism. Chunk size chosed to be same as lloyd's
+        # necessary to get parallelism. Chunk size chosen to be same as lloyd's
         int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
         int n_chunks = n_samples // n_samples_chunk
         int n_samples_rem = n_samples % n_samples_chunk
diff --git a/sklearn/cluster/_k_means_lloyd.pyx b/sklearn/cluster/_k_means_lloyd.pyx
index a9366c0028381..e3526888c82ab 100644
--- a/sklearn/cluster/_k_means_lloyd.pyx
+++ b/sklearn/cluster/_k_means_lloyd.pyx
@@ -279,7 +279,7 @@ def lloyd_iter_chunked_sparse(
         int n_features = X.shape[1]
         int n_clusters = centers_new.shape[0]
 
-        # Chosed same as for dense. Does not have the same impact since with
+        # Choose same as for dense. Does not have the same impact since with
         # sparse data the pairwise distances matrix is not precomputed.
         # However, splitting in chunks is necessary to get parallelism.
         int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
index 0e8e668b9d61c..5155f49713df5 100644
--- a/sklearn/cluster/_kmeans.py
+++ b/sklearn/cluster/_kmeans.py
@@ -80,7 +80,7 @@ def kmeans_plusplus(
     Returns
     -------
     centers : ndarray of shape (n_clusters, n_features)
-        The inital centers for k-means.
+        The initial centers for k-means.
 
     indices : ndarray of shape (n_clusters,)
         The index location of the chosen centers in the data array X. For a
@@ -172,7 +172,7 @@ def _kmeans_plusplus(X, n_clusters, x_squared_norms, random_state, n_local_trial
     Returns
     -------
     centers : ndarray of shape (n_clusters, n_features)
-        The inital centers for k-means.
+        The initial centers for k-means.
 
     indices : ndarray of shape (n_clusters,)
         The index location of the chosen centers in the data array X. For a
@@ -830,7 +830,7 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):
         intensive due to the allocation of an extra array of shape
         (n_samples, n_clusters).
 
-        For now "auto" (kept for backward compatibiliy) chooses "elkan" but it
+        For now "auto" (kept for backward compatibility) chooses "elkan" but it
         might change in the future for a better heuristic.
 
         .. versionchanged:: 0.18
@@ -1616,7 +1616,7 @@ class MiniBatchKMeans(KMeans):
         .. versionadded:: 1.0
 
     counts_ : ndarray of shape (n_clusters,)
-        Weigth sum of each cluster.
+        Weight sum of each cluster.
 
         .. deprecated:: 0.24
            This attribute is deprecated in 0.24 and will be removed in
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 1dfed6f3e7436..ff860a164e6c9 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1666,7 +1666,7 @@ def test_get_feature_names_empty_selection(selector):
 def test_feature_names_in_():
     """Feature names are stored in column transformer.
 
-    Column transfomer deliberately does not check for column name consistency.
+    Column transformer deliberately does not check for column name consistency.
     It only checks that the non-dropped names seen in `fit` are seen
     in `transform`. This behavior is already tested in
     `test_feature_name_validation_missing_columns_drop_passthough`"""
diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index 1eb27b993412d..b0511a0e03ea3 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -272,7 +272,7 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     delta = delta_ - 2.0 * mu * emp_cov_trace.sum() + n_features * mu ** 2
     delta /= n_features
     # get final beta as the min between beta and delta
-    # We do this to prevent shrinking more than "1", which whould invert
+    # We do this to prevent shrinking more than "1", which would invert
     # the value of covariances
     beta = min(beta, delta)
     # finally get shrinkage
diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py
index 052d09366d88d..e36b79648d6c7 100644
--- a/sklearn/cross_decomposition/_pls.py
+++ b/sklearn/cross_decomposition/_pls.py
@@ -345,7 +345,7 @@ def fit(self, X, Y):
         # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is
         # whatever is left to fully reconstruct X, and can be 0 if X is of rank
         # n_components.
-        # Similiarly, Y was approximated as Omega . Delta.T + Y_(R+1)
+        # Similarly, Y was approximated as Omega . Delta.T + Y_(R+1)
 
         # Compute transformation matrices (rotations_). See User Guide.
         self.x_rotations_ = np.dot(
diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py
index 4aca6dd6031f1..1ddc0d0da443f 100644
--- a/sklearn/cross_decomposition/tests/test_pls.py
+++ b/sklearn/cross_decomposition/tests/test_pls.py
@@ -432,7 +432,7 @@ def _generate_test_scale_and_stability_datasets():
     X *= 1000
     yield X, Y
 
-    # Data set where one of the features is constaint
+    # Data set where one of the features is constraint
     X, Y = load_linnerud(return_X_y=True)
     # causes X[:, -1].std() to be zero
     X[:, -1] = 1.0
diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index 756c799c094be..1ecb35fb0aada 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -1006,7 +1006,7 @@ def load_diabetes(*, return_X_y=False, as_frame=False):
 
 
 def load_linnerud(*, return_X_y=False, as_frame=False):
-    """Load and return the physical excercise linnerud dataset.
+    """Load and return the physical exercise Linnerud dataset.
 
     This dataset is suitable for multi-ouput regression tasks.
 
@@ -1161,7 +1161,7 @@ def load_boston(*, return_X_y=False):
 
     .. deprecated:: 1.0
        This function is deprecated in 1.0 and will be removed in 1.2. See the
-       warning message below for futher details regarding the alternative
+       warning message below for further details regarding the alternative
        datasets.
 
     .. warning::
diff --git a/sklearn/datasets/descr/linnerud.rst b/sklearn/datasets/descr/linnerud.rst
index 55eda902448d9..f7c10a95423d0 100644
--- a/sklearn/datasets/descr/linnerud.rst
+++ b/sklearn/datasets/descr/linnerud.rst
@@ -10,7 +10,7 @@ Linnerrud dataset
     :Missing Attribute Values: None
 
 The Linnerud dataset is a multi-output regression dataset. It consists of three
-excercise (data) and three physiological (target) variables collected from
+exercise (data) and three physiological (target) variables collected from
 twenty middle-aged men in a fitness club:
 
 - *physiological* - CSV containing 20 observations on 3 physiological variables:
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 221e9362f4819..2a76f3f1cfc9a 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -227,7 +227,7 @@ def _file_name(url, suffix):
         output = (
             re.sub(r"\W", "-", url[len("https://openml.org/") :]) + suffix + path_suffix
         )
-        # Shorten the filenames to have better compability with windows 10
+        # Shorten the filenames to have better compatibility with windows 10
         # and filenames > 260 characters
         return (
             output.replace("-json-data-list", "-jdl")
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index e41c850689948..a18adb6f1e3bc 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -1035,7 +1035,7 @@ def __init__(
         self.positive_code = positive_code
 
     def _transform(self, X, dictionary):
-        """Private method allowing to accomodate both DictionaryLearning and
+        """Private method allowing to accommodate both DictionaryLearning and
         SparseCoder."""
         X = self._validate_data(X, reset=False)
 
diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py
index 05188a602b247..fcf96cb0eb532 100644
--- a/sklearn/decomposition/_factor_analysis.py
+++ b/sklearn/decomposition/_factor_analysis.py
@@ -62,7 +62,7 @@ class FactorAnalysis(TransformerMixin, BaseEstimator):
         of ``X`` that are obtained after ``transform``.
         If None, n_components is set to the number of features.
 
-    tol : float, defaul=1e-2
+    tol : float, default=1e-2
         Stopping tolerance for log-likelihood increase.
 
     copy : bool, default=True
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 0cc664379cf6e..d914bd5b6126d 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -924,7 +924,7 @@ def non_negative_factorization(
 
     The regularization terms are scaled by `n_features` for `W` and by `n_samples` for
     `H` to keep their impact balanced with respect to one another and to the data fit
-    term as independant as possible of the size `n_samples` of the training set.
+    term as independent as possible of the size `n_samples` of the training set.
 
     The objective function is minimized with an alternating minimization of W
     and H. If H is given and update_H=False, it solves for W only.
@@ -1142,7 +1142,7 @@ class NMF(TransformerMixin, BaseEstimator):
 
     The regularization terms are scaled by `n_features` for `W` and by `n_samples` for
     `H` to keep their impact balanced with respect to one another and to the data fit
-    term as independant as possible of the size `n_samples` of the training set.
+    term as independent as possible of the size `n_samples` of the training set.
 
     The objective function is minimized with an alternating minimization of W
     and H.
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 80898f4132400..50d0ad92b4115 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -216,7 +216,7 @@ class LinearDiscriminantAnalysis(
         `transform` method.
 
     store_covariance : bool, default=False
-        If True, explicitely compute the weighted within-class covariance
+        If True, explicitly compute the weighted within-class covariance
         matrix when solver is 'svd'. The matrix is always computed
         and stored for the other solvers.
 
@@ -732,7 +732,7 @@ class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):
         where S2 corresponds to the `scaling_` attribute of a given class.
 
     store_covariance : bool, default=False
-        If True, the class covariance matrices are explicitely computed and
+        If True, the class covariance matrices are explicitly computed and
         stored in the `self.covariance_` attribute.
 
         .. versionadded:: 0.17
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 63ce07d9c137c..7b66324d1f08b 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -1016,7 +1016,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         boosting iteration.
         In addition, it controls the random permutation of the features at
         each split (see Notes for more details).
-        It also controls the random spliting of the training data to obtain a
+        It also controls the random splitting of the training data to obtain a
         validation set if `n_iter_no_change` is not None.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
@@ -1593,7 +1593,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         boosting iteration.
         In addition, it controls the random permutation of the features at
         each split (see Notes for more details).
-        It also controls the random spliting of the training data to obtain a
+        It also controls the random splitting of the training data to obtain a
         validation set if `n_iter_no_change` is not None.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
@@ -1780,7 +1780,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     0.4...
     """
 
-    # TODO: remove "ls" in verion 1.2
+    # TODO: remove "ls" in version 1.2
     _SUPPORTED_LOSS = (
         "squared_error",
         "ls",
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 35dcb1d7acd8b..d2a2b1ddde9e4 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -237,7 +237,7 @@ def fit(self, X, y, sample_weight=None):
         # computation
         if sample_weight is not None:
             sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)
-            # TODO: remove when PDP suports sample weights
+            # TODO: remove when PDP supports sample weights
             self._fitted_with_sw = True
 
         rng = check_random_state(self.random_state)
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 068935230e900..54cfdcc077dc7 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -53,7 +53,7 @@ cdef class HistogramBuilder:
 
     There are different ways to build a histogram:
     - by subtraction: hist(child) = hist(parent) - hist(sibling)
-    - from scratch. In this case we have rountines that update the hessians
+    - from scratch. In this case we have routines that update the hessians
       or not (not useful when hessians are constant for some losses e.g.
       least squares). Also, there's a special case for the root which
       contains all the samples, leading to some possible optimizations.
diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index 5ddba5cd02678..08ae7aaf0862c 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -8,7 +8,7 @@
 - Find the best possible split of a node. For a given node, a split is
   characterized by a feature and a bin.
 - Apply a split to a node, i.e. split the indices of the samples at the node
-  into the newly created left and right childs.
+  into the newly created left and right children.
 """
 # Author: Nicolas Hug
 
@@ -206,7 +206,7 @@ cdef class Splitter:
         self.n_threads = n_threads
 
         # The partition array maps each sample index into the leaves of the
-        # tree (a leaf in this context is a node that isn't splitted yet, not
+        # tree (a leaf in this context is a node that isn't split yet, not
         # necessarily a 'finalized' leaf). Initially, the root contains all
         # the indices, e.g.:
         # partition = [abcdefghijkl]
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
index 6ff30a5888fe3..ea538a0db246a 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
@@ -102,7 +102,7 @@ def test_grow_tree(n_bins, constant_hessian, stopping_param, shrinkage):
         **stopping_param,
     )
 
-    # The root node is not yet splitted, but the best possible split has
+    # The root node is not yet split, but the best possible split has
     # already been evaluated:
     assert grower.root.left_child is None
     assert grower.root.right_child is None
@@ -116,7 +116,7 @@ def test_grow_tree(n_bins, constant_hessian, stopping_param, shrinkage):
     # for each of the two newly introduced children nodes.
     left_node, right_node = grower.split_next()
 
-    # All training samples have ben splitted in the two nodes, approximately
+    # All training samples have ben split in the two nodes, approximately
     # 50%/50%
     _check_children_consistency(grower.root, left_node, right_node)
     assert len(left_node.sample_indices) > 0.4 * n_samples
@@ -127,7 +127,7 @@ def test_grow_tree(n_bins, constant_hessian, stopping_param, shrinkage):
         assert left_node.split_info.gain < grower.min_gain_to_split
         assert left_node in grower.finalized_leaves
 
-    # The right node can still be splitted further, this time on feature #1
+    # The right node can still be split further, this time on feature #1
     split_info = right_node.split_info
     assert split_info.gain > 1.0
     assert split_info.feature_idx == 1
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 77c217d5a5c5e..02321310b5324 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -597,7 +597,7 @@ def test_forest_regressor_oob(ForestRegressor, X, y, X_type, lower_bound_r2):
 @pytest.mark.parametrize("ForestEstimator", FOREST_CLASSIFIERS_REGRESSORS.values())
 def test_forest_oob_warning(ForestEstimator):
     """Check that a warning is raised when not enough estimator and the OOB
-    estimates will be inacurrate."""
+    estimates will be inaccurate."""
     estimator = ForestEstimator(
         n_estimators=1,
         oob_score=True,
diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
index 64f8a9735fa45..7d0d58bb0e3da 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
@@ -49,7 +49,7 @@ def alt_dev(y, raw_pred):
     for datum in test_data:
         assert bd(*datum) == approx(alt_dev(*datum))
 
-    # check the negative gradient against altenative formula from ESLII
+    # check the negative gradient against alternative formula from ESLII
     # Note: negative_gradient is half the negative gradient.
     def alt_ng(y, raw_pred):
         z = 2 * y - 1
diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py
index 9be602c6bf972..7c9d51d0702ff 100644
--- a/sklearn/externals/_arff.py
+++ b/sklearn/externals/_arff.py
@@ -32,7 +32,7 @@
 
 ARFF (Attribute-Relation File Format) is an file format specially created for
 describe datasets which are commonly used for machine learning experiments and
-softwares. This file format was created to be used in Weka, the best
+software. This file format was created to be used in Weka, the best
 representative software for machine learning automated experiments.
 
 An ARFF file can be divided into two sections: header and data. The Header
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index cc4ff2ec49492..1de2dfa5b8121 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -96,7 +96,7 @@ def test_strip_accents():
     assert strip_accents_unicode(a) == expected
 
     # strings that are already decomposed
-    a = "o\u0308"  # o with diaresis
+    a = "o\u0308"  # o with diaeresis
     expected = "o"
     assert strip_accents_unicode(a) == expected
 
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 236326bb01f1b..a0b74a60dab4d 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -60,7 +60,7 @@ def _preprocess(doc, accent_function=None, lower=False):
         Function for handling accented characters. Common strategies include
         normalizing and removing.
     lower: bool, default=False
-        Whether to use str.lower to lowercase all fo the text
+        Whether to use str.lower to lowercase all of the text
 
     Returns
     -------
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 52d229d9b0c17..53ab5c4af15bd 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -2100,7 +2100,7 @@ class DotProduct(Kernel):
     ----------
     sigma_0 : float >= 0, default=1.0
         Parameter controlling the inhomogenity of the kernel. If sigma_0=0,
-        the kernel is homogenous.
+        the kernel is homogeneous.
 
     sigma_0_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
         The lower and upper bound on 'sigma_0'.
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index 11a7bd99c6735..d95e78ccd36d3 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -466,7 +466,7 @@ def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):
         if self.n_nearest_features is None or self.n_nearest_features >= n_features:
             return None
         with np.errstate(invalid="ignore"):
-            # if a feature in the neighboorhood has only a single value
+            # if a feature in the neighborhood has only a single value
             # (e.g., categorical feature), the std. dev. will be null and
             # np.corrcoef will raise a warning due to a division by zero
             abs_corr_mat = np.abs(np.corrcoef(X_filled.T))
diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index 6bf6677825c98..215cb4b21c179 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -292,7 +292,7 @@ def partial_dependence(
           This is more efficient in terms of speed.
           With this method, the target response of a
           classifier is always the decision function, not the predicted
-          probabilities. Since the `'recursion'` method implicitely computes
+          probabilities. Since the `'recursion'` method implicitly computes
           the average of the Individual Conditional Expectation (ICE) by
           design, it is not compatible with ICE and thus `kind` must be
           `'average'`.
diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py
index 67d5c4e6273ca..e20205e86e51e 100644
--- a/sklearn/inspection/_plot/partial_dependence.py
+++ b/sklearn/inspection/_plot/partial_dependence.py
@@ -170,7 +170,7 @@ def plot_partial_dependence(
           but is more efficient in terms of speed.
           With this method, the target response of a
           classifier is always the decision function, not the predicted
-          probabilities. Since the `'recursion'` method implicitely computes
+          probabilities. Since the `'recursion'` method implicitly computes
           the average of the ICEs by design, it is not compatible with ICE and
           thus `kind` must be `'average'`.
 
@@ -808,7 +808,7 @@ def from_estimator(
               but is more efficient in terms of speed.
               With this method, the target response of a
               classifier is always the decision function, not the predicted
-              probabilities. Since the `'recursion'` method implicitely computes
+              probabilities. Since the `'recursion'` method implicitly computes
               the average of the ICEs by design, it is not compatible with ICE and
               thus `kind` must be `'average'`.
 
diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py
index 46065cac4f560..d68fc718da8b5 100644
--- a/sklearn/inspection/tests/test_permutation_importance.py
+++ b/sklearn/inspection/tests/test_permutation_importance.py
@@ -328,7 +328,7 @@ def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples)
     X_df[new_col_idx] = cat_column
     assert X_df[new_col_idx].dtype == cat_column.dtype
 
-    # Stich an aribtrary index to the dataframe:
+    # Stich an arbitrary index to the dataframe:
     X_df.index = np.arange(len(X_df)).astype(str)
 
     rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 04d3e03811456..2180f3c88f87b 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -220,7 +220,7 @@ def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
     assert_allclose(glm.coef_, coef, rtol=1e-12)
 
     # setting one element of sample_weight to 0 is equivalent to removing
-    # the correspoding sample
+    # the corresponding sample
     sample_weight = np.ones(y.shape)
     sample_weight[-1] = 0
     glm.fit(X, y, sample_weight=sample_weight)
@@ -229,7 +229,7 @@ def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
     assert_allclose(glm.coef_, coef1, rtol=1e-12)
 
     # check that multiplying sample_weight by 2 is equivalent
-    # to repeating correspoding samples twice
+    # to repeating corresponding samples twice
     X2 = np.concatenate([X, X[: n_samples // 2]], axis=0)
     y2 = np.concatenate([y, y[: n_samples // 2]])
     sample_weight_1 = np.ones(len(y))
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index d93f107ebe98e..c565c8c6ce403 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -318,7 +318,7 @@ def fit(self, X, y, sample_weight=None):
             if not isinstance(base_estimator, LinearRegression):
                 # FIXME: in 1.2, turn this warning into an error
                 warnings.warn(
-                    "From version 1.2, `min_samples` needs to be explicitely "
+                    "From version 1.2, `min_samples` needs to be explicitly "
                     "set otherwise an error will be raised. To keep the "
                     "current behavior, you need to set `min_samples` to "
                     f"`X.shape[1] + 1 that is {X.shape[1] + 1}",
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 6541206b90f26..1dcc81e3b988f 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1546,7 +1546,7 @@ def _compute_covariance(self, X, sqrt_sw):
 
     def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw):
         """Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)
-        without explicitely centering X nor computing X.dot(A)
+        without explicitly centering X nor computing X.dot(A)
         when X is sparse.
 
         Parameters
diff --git a/sklearn/linear_model/_sag_fast.pyx.tp b/sklearn/linear_model/_sag_fast.pyx.tp
index 6ca141fe99305..b6493f5f32f96 100644
--- a/sklearn/linear_model/_sag_fast.pyx.tp
+++ b/sklearn/linear_model/_sag_fast.pyx.tp
@@ -260,7 +260,7 @@ def sag{{name_suffix}}(SequentialDataset{{name_suffix}} dataset,
     # the number of non-zero features for current sample
     cdef int xnnz = -1
     # the label value for current sample
-    # the label value for curent sample
+    # the label value for current sample
     cdef {{c_type}} y
     # the sample weight
     cdef {{c_type}} sample_weight
@@ -353,7 +353,7 @@ def sag{{name_suffix}}(SequentialDataset{{name_suffix}} dataset,
 
     # Loss function to optimize
     cdef LossFunction loss
-    # Wether the loss function is multinomial
+    # Whether the loss function is multinomial
     cdef bint multinomial = False
     # Multinomial loss function
     cdef MultinomialLogLoss{{name_suffix}} multiloss
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 4858f4a71a5fd..b429d8227553a 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1088,7 +1088,7 @@ class SGDClassifier(BaseSGDClassifier):
         existing counter.
 
     average : bool or int, default=False
-        When set to True, computes the averaged SGD weights accross all
+        When set to True, computes the averaged SGD weights across all
         updates and stores the result in the ``coef_`` attribute. If set to
         an int greater than 1, averaging will begin once the total number of
         samples seen reaches `average`. So ``average=10`` will begin
@@ -1829,7 +1829,7 @@ class SGDRegressor(BaseSGDRegressor):
         existing counter.
 
     average : bool or int, default=False
-        When set to True, computes the averaged SGD weights accross all
+        When set to True, computes the averaged SGD weights across all
         updates and stores the result in the ``coef_`` attribute. If set to
         an int greater than 1, averaging will begin once the total number of
         samples seen reaches `average`. So ``average=10`` will begin
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 9da62db1dddcc..dd67c49585bad 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -406,7 +406,7 @@ def _scale_alpha_inplace(estimator, n_samples):
 )
 def test_model_pipeline_same_as_normalize_true(LinearModel, params):
     # Test that linear models (LinearModel) set with normalize set to True are
-    # doing the same as the same linear model preceeded by StandardScaler
+    # doing the same as the same linear model preceded by StandardScaler
     # in the pipeline and with normalize set to False
 
     # normalize is True
@@ -567,7 +567,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     ],
 )
 def test_model_pipeline_same_dense_and_sparse(LinearModel, params):
-    # Test that linear model preceeded by StandardScaler in the pipeline and
+    # Test that linear model preceded by StandardScaler in the pipeline and
     # with normalize set to False gives the same y_pred and the same .coef_
     # given X sparse or dense
 
diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index 016f76e247a87..f26d2088263b8 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -387,7 +387,7 @@ def test_ransac_min_n_samples():
     with pytest.raises(ValueError):
         ransac_estimator7.fit(X, y)
 
-    err_msg = "From version 1.2, `min_samples` needs to be explicitely set"
+    err_msg = "From version 1.2, `min_samples` needs to be explicitly set"
     with pytest.warns(FutureWarning, match=err_msg):
         ransac_estimator8.fit(X, y)
 
diff --git a/sklearn/manifold/_barnes_hut_tsne.pyx b/sklearn/manifold/_barnes_hut_tsne.pyx
index b15462e597684..936a74373e735 100644
--- a/sklearn/manifold/_barnes_hut_tsne.pyx
+++ b/sklearn/manifold/_barnes_hut_tsne.pyx
@@ -273,7 +273,7 @@ def gradient(float[:] val_P,
              bint compute_error=1,
              int num_threads=1):
     # This function is designed to be called from external Python
-    # it passes the 'forces' array by reference and fills thats array
+    # it passes the 'forces' array by reference and fills that's array
     # up in-place
     cdef float C
     cdef int n
diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py
index 436d1cf73efc3..2cb1369b6cf0f 100644
--- a/sklearn/manifold/_t_sne.py
+++ b/sklearn/manifold/_t_sne.py
@@ -798,7 +798,7 @@ def _fit(self, X, skip_num_points=0):
 
         if isinstance(self._init, str) and self._init == "pca" and issparse(X):
             raise TypeError(
-                "PCA initialization is currently not suported "
+                "PCA initialization is currently not supported "
                 "with the sparse input matrix. Use "
                 'init="random" instead.'
             )
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 0919cfc200091..7237fa53fda25 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2584,12 +2584,12 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
         Sample weights.
 
     pos_label : int or str, default=None
-        Label of the positive class. `pos_label` will be infered in the
+        Label of the positive class. `pos_label` will be inferred in the
         following manner:
 
         * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;
         * else if `y_true` contains string, an error will be raised and
-          `pos_label` should be explicitely specified;
+          `pos_label` should be explicitly specified;
         * otherwise, `pos_label` defaults to the greater label,
           i.e. `np.unique(y_true)[-1]`.
 
diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py
index 7d222b82e4638..6c39e6bc152cd 100644
--- a/sklearn/metrics/_plot/roc_curve.py
+++ b/sklearn/metrics/_plot/roc_curve.py
@@ -429,7 +429,7 @@ def plot_roc_curve(
     See Also
     --------
     roc_curve : Compute Receiver operating characteristic (ROC) curve.
-    RocCurveDisplay.from_estimator : ROC Curve visualzation given an estimator
+    RocCurveDisplay.from_estimator : ROC Curve visualization given an estimator
         and some data.
     RocCurveDisplay.from_predictions : ROC Curve visualisation given the
         true and predicted values.
diff --git a/sklearn/metrics/_plot/tests/test_plot_precision_recall.py b/sklearn/metrics/_plot/tests/test_plot_precision_recall.py
index 2c5a83094ae7b..1d687b0c31abc 100644
--- a/sklearn/metrics/_plot/tests/test_plot_precision_recall.py
+++ b/sklearn/metrics/_plot/tests/test_plot_precision_recall.py
@@ -32,7 +32,7 @@ def test_errors(pyplot):
     )
     y_binary = y_multiclass == 0
 
-    # Unfitted classifer
+    # Unfitted classifier
     binary_clf = DecisionTreeClassifier()
     with pytest.raises(NotFittedError):
         plot_precision_recall_curve(binary_clf, X, y_binary)
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index ed9da69b1261c..dd4c4c52f55fd 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -325,7 +325,7 @@ def mean_absolute_percentage_error(
         weighted average of all output errors is returned.
 
         MAPE output is non-negative floating point. The best value is 0.0.
-        But note the fact that bad predictions can lead to arbitarily large
+        But note the fact that bad predictions can lead to arbitrarily large
         MAPE values, especially if some y_true values are very close to zero.
         Note that we return a large value instead of `inf` when y_true is zero.
 
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 4f29c127defb5..992fb99e8c0e0 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -297,7 +297,7 @@ def test_precision_recall_f_extra_labels():
         actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4], average="macro")
         assert_array_almost_equal(np.mean([0.0, 1.0, 1.0, 0.5, 0.0]), actual)
 
-        # No effect otheriwse
+        # No effect otherwise
         for average in ["micro", "weighted", "samples"]:
             if average == "samples" and i == 0:
                 continue
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 47e6bec38388f..d5a4fa7adfa17 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -79,7 +79,7 @@
 # all metrics that have the same behavior.
 #
 # Two types of datastructures are used in order to implement this system:
-# dictionaries of metrics and lists of metrics wit common properties.
+# dictionaries of metrics and lists of metrics with common properties.
 #
 # Dictionaries of metrics
 # ------------------------
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 62f07bf1364a2..01de37b189733 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -1479,7 +1479,7 @@ def test_coverage_error():
     assert_almost_equal(coverage_error([[1, 1, 0]], [[0.5, 0.75, 0.25]]), 2)
     assert_almost_equal(coverage_error([[1, 1, 1]], [[0.5, 0.75, 0.25]]), 3)
 
-    # Non trival case
+    # Non trivial case
     assert_almost_equal(
         coverage_error([[0, 1, 0], [1, 1, 0]], [[0.1, 10.0, -3], [0, 1, 3]]),
         (1 + 3) / 2.0,
@@ -1538,7 +1538,7 @@ def test_label_ranking_loss():
     assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.25, 0.5, 0.5]]), 0)
     assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 0)
 
-    # Non trival case
+    # Non trivial case
     assert_almost_equal(
         label_ranking_loss([[0, 1, 0], [1, 1, 0]], [[0.1, 10.0, -3], [0, 1, 3]]),
         (0 + 2 / 2) / 2.0,
diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index 2c3bff9fd19e6..0184cc4477e0a 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -587,8 +587,8 @@ class HalvingGridSearchCV(BaseSuccessiveHalving):
 
     cv_results_ : dict of numpy (masked) ndarrays
         A dict with keys as column headers and values as columns, that can be
-        imported into a pandas ``DataFrame``. It contains many informations for
-        analysing the results of a search.
+        imported into a pandas ``DataFrame``. It contains lots of information
+        for analysing the results of a search.
         Please refer to the :ref:`User guide<successive_halving_cv_results>`
         for details.
 
@@ -925,8 +925,8 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
 
     cv_results_ : dict of numpy (masked) ndarrays
         A dict with keys as column headers and values as columns, that can be
-        imported into a pandas ``DataFrame``. It contains many informations for
-        analysing the results of a search.
+        imported into a pandas ``DataFrame``. It contains lots of information
+        for analysing the results of a search.
         Please refer to the :ref:`User guide<successive_halving_cv_results>`
         for details.
 
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 85361ecfc789b..6960a17fb629b 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -2356,7 +2356,7 @@ def test_search_cv_using_minimal_compatible_estimator(SearchCV, Predictor):
 @pytest.mark.parametrize("return_train_score", [True, False])
 def test_search_cv_verbose_3(capsys, return_train_score):
     """Check that search cv with verbose>2 shows the score for single
-    metrics. non-regression test fo #19658."""
+    metrics. non-regression test for #19658."""
     X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
     clf = LinearSVC(random_state=0)
     grid = {"C": [0.1]}
diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py
index 7918a29053c07..a994e080bbb2a 100644
--- a/sklearn/model_selection/tests/test_successive_halving.py
+++ b/sklearn/model_selection/tests/test_successive_halving.py
@@ -310,7 +310,7 @@ def test_random_search_discrete_distributions(
     # depends whether the distributions are 'all lists' or not (see
     # ParameterSampler for details). This is somewhat redundant with the checks
     # in ParameterSampler but interaction bugs were discovered during
-    # developement of SH
+    # development of SH
 
     n_samples = 1024
     X, y = make_classification(n_samples=n_samples, random_state=0)
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index ebfbfec1092f7..215ceb5877669 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -607,7 +607,7 @@ def test_cross_val_score_pandas():
         pass
     for TargetType, InputFeatureType in types:
         # X dataframe, y series
-        # 3 fold cross val is used so we need atleast 3 samples per class
+        # 3 fold cross val is used so we need at least 3 samples per class
         X_df, y_ser = InputFeatureType(X), TargetType(y2)
         check_df = lambda x: isinstance(x, InputFeatureType)
         check_series = lambda x: isinstance(x, TargetType)
@@ -1049,7 +1049,7 @@ def test_cross_val_predict_input_types():
     multioutput_y = np.column_stack([y, y[::-1]])
 
     clf = Ridge(fit_intercept=False, random_state=0)
-    # 3 fold cv is used --> atleast 3 samples per class
+    # 3 fold cv is used --> at least 3 samples per class
     # Smoke test
     predictions = cross_val_predict(clf, X, y)
     assert predictions.shape == (150,)
diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index ec5ef9b2ddcbc..ced21c7885962 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -537,7 +537,7 @@ def fit(self, X, y):
 
             for classes, label in zip(classes_, outlier_label_):
                 if _is_arraylike(label) and not isinstance(label, str):
-                    # ensure the outlier lable for each output is a scalar.
+                    # ensure the outlier label for each output is a scalar.
                     raise TypeError(
                         "The outlier_label of classes {} is "
                         "supposed to be a scalar, got "
diff --git a/sklearn/neighbors/_dist_metrics.pyx b/sklearn/neighbors/_dist_metrics.pyx
index d484f57afe7c9..240a7a3f7d14d 100755
--- a/sklearn/neighbors/_dist_metrics.pyx
+++ b/sklearn/neighbors/_dist_metrics.pyx
@@ -13,7 +13,7 @@ cimport numpy as np
 np.import_array()  # required in order to use C-API
 
 
-# First, define a function to get an ndarray from a memory bufffer
+# First, define a function to get an ndarray from a memory buffer
 cdef extern from "arrayobject.h":
     object PyArray_SimpleNewFromData(int nd, np.npy_intp* dims,
                                      int typenum, void* data)
diff --git a/sklearn/neighbors/_quad_tree.pxd b/sklearn/neighbors/_quad_tree.pxd
index 7fb65ec2e4fb6..6f61b60cc0ab3 100644
--- a/sklearn/neighbors/_quad_tree.pxd
+++ b/sklearn/neighbors/_quad_tree.pxd
@@ -31,7 +31,7 @@ cdef struct Cell:
 
     # Tree structure
     SIZE_t parent              # Parent cell of this cell
-    SIZE_t[8] children         # Array pointing to childrens of this cell
+    SIZE_t[8] children         # Array pointing to children of this cell
 
     # Cell description
     SIZE_t cell_id             # Id of the cell in the cells array in the Tree
diff --git a/sklearn/neighbors/_quad_tree.pyx b/sklearn/neighbors/_quad_tree.pyx
index 0bcc7aeb6af11..619467e69dd0c 100644
--- a/sklearn/neighbors/_quad_tree.pyx
+++ b/sklearn/neighbors/_quad_tree.pyx
@@ -356,7 +356,7 @@ cdef class _QuadTree:
                         child = self.cells[child_id]
                         n_points += child.cumulative_size
                         assert child.cell_id == child_id, (
-                            "Cell id not correctly initiliazed.")
+                            "Cell id not correctly initialized.")
                 if n_points != cell.cumulative_size:
                     raise ValueError(
                         "Cell {} is incoherent. Size={} but found {} points "
@@ -365,7 +365,7 @@ cdef class _QuadTree:
                                 n_points, cell.children))
 
         # Make sure that the number of point in the tree correspond to the
-        # cummulative size in root cell.
+        # cumulative size in root cell.
         if self.n_points != self.cells[0].cumulative_size:
             raise ValueError(
                 "QuadTree is incoherent. Size={} but found {} points "
@@ -557,7 +557,7 @@ cdef class _QuadTree:
                                    np.NPY_DEFAULT, None)
         Py_INCREF(self)
         if PyArray_SetBaseObject(arr, <PyObject*> self) < 0:
-            raise ValueError("Can't intialize array!")
+            raise ValueError("Can't initialize array!")
         return arr
 
     cdef int _resize(self, SIZE_t capacity) nogil except -1:
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 4ddfe3004d4cc..3948a4eccc760 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -393,7 +393,7 @@ def test_multilabel_classification():
         mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
     assert mlp.score(X, y) > 0.9
 
-    # Make sure early stopping still work now that spliting is stratified by
+    # Make sure early stopping still work now that splitting is stratified by
     # default (it is disabled for multilabel classification)
     mlp = MLPClassifier(early_stopping=True)
     mlp.fit(X, y).predict(X)
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 33fe7943c0681..ecce5e6b6d096 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -103,7 +103,7 @@ def _handle_zeros_in_scale(scale, copy=True, constant_mask=None):
     elif isinstance(scale, np.ndarray):
         if constant_mask is None:
             # Detect near constant values to avoid dividing by a very small
-            # value that could lead to suprising results and numerical
+            # value that could lead to surprising results and numerical
             # stability issues.
             constant_mask = scale < 10 * np.finfo(scale.dtype).eps
 
@@ -384,7 +384,7 @@ def _reset(self):
 
         __init__ parameters are not touched.
         """
-        # Checking one attribute is enough, becase they are all set together
+        # Checking one attribute is enough, because they are all set together
         # in partial_fit
         if hasattr(self, "scale_"):
             del self.scale_
@@ -770,7 +770,7 @@ def _reset(self):
 
         __init__ parameters are not touched.
         """
-        # Checking one attribute is enough, becase they are all set together
+        # Checking one attribute is enough, because they are all set together
         # in partial_fit
         if hasattr(self, "scale_"):
             del self.scale_
@@ -1121,7 +1121,7 @@ def _reset(self):
 
         __init__ parameters are not touched.
         """
-        # Checking one attribute is enough, becase they are all set together
+        # Checking one attribute is enough, because they are all set together
         # in partial_fit
         if hasattr(self, "scale_"):
             del self.scale_
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index cf47de062b94d..72125198c0b9c 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -54,7 +54,7 @@ class PolynomialFeatures(TransformerMixin, BaseEstimator):
         with power of 2 or higher of the same input feature are excluded:
 
             - included: ``x[0]``, `x[1]`, ``x[0] * x[1]``, etc.
-            - exluded: ``x[0] ** 2``, ``x[0] ** 2 * x[1]``, etc.
+            - excluded: ``x[0] ** 2``, ``x[0] ** 2 * x[1]``, etc.
 
     include_bias : bool, default=True
         If True (default), then include a bias column, the feature in which
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 4014465ab7eab..9e7a8a174c182 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -1562,7 +1562,7 @@ def test_quantile_transformer_sorted_quantiles(array_type):
     n_quantiles = 100
     qt = QuantileTransformer(n_quantiles=n_quantiles).fit(X)
 
-    # Check that the estimated quantile threasholds are monotically
+    # Check that the estimated quantile thresholds are monotically
     # increasing:
     quantiles = qt.quantiles_[:, 0]
     assert len(quantiles) == 100
@@ -2621,7 +2621,7 @@ def test_standard_scaler_sparse_partial_fit_finite_variance(X_2):
 
 @pytest.mark.parametrize("feature_range", [(0, 1), (-10, 10)])
 def test_minmax_scaler_clip(feature_range):
-    # test behaviour of the paramter 'clip' in MinMaxScaler
+    # test behaviour of the parameter 'clip' in MinMaxScaler
     X = iris.data
     scaler = MinMaxScaler(feature_range=feature_range, clip=True).fit(X)
     X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 14faf8f5365c4..909e1fcdd06bc 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -359,7 +359,7 @@ def test_spline_transformer_periodic_splines_smoothness(degree):
     dXt = Xt
     # We expect splines of degree `degree` to be (`degree`-1) times
     # continuously differentiable. I.e. for d = 0, ..., `degree` - 1 the d-th
-    # derivative should be continous. This is the case if the (d+1)-th
+    # derivative should be continuous. This is the case if the (d+1)-th
     # numerical derivative is reasonably small (smaller than `tol` in absolute
     # value). We thus compute d-th numeric derivatives for d = 1, ..., `degree`
     # and compare them to `tol`.
@@ -373,7 +373,7 @@ def test_spline_transformer_periodic_splines_smoothness(degree):
         # Compute d-th numeric derivative
         dXt = diff / delta
 
-    # As degree `degree` splines are not `degree` times continously
+    # As degree `degree` splines are not `degree` times continuously
     # differentiable at the knots, the `degree + 1`-th numeric derivative
     # should have spikes at the knots.
     diff = np.diff(dXt, axis=0)
diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index 611d54b7c377f..ad627c6f98574 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -246,7 +246,7 @@ def fit(self, X, y):
                 if n_to_select == max_proba.shape[0]:
                     selected = np.ones_like(max_proba, dtype=bool)
                 else:
-                    # NB these are indicies, not a mask
+                    # NB these are indices, not a mask
                     selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]
 
             # Map selected indices into original array
diff --git a/sklearn/svm/src/libsvm/libsvm_sparse_helper.c b/sklearn/svm/src/libsvm/libsvm_sparse_helper.c
index 9018000a1d47b..a85a532319d88 100644
--- a/sklearn/svm/src/libsvm/libsvm_sparse_helper.c
+++ b/sklearn/svm/src/libsvm/libsvm_sparse_helper.c
@@ -242,7 +242,7 @@ npy_intp get_nonzero_SV (struct svm_csr_model *model) {
 
 
 /*
- * Predict using a model, where data is expected to be enconded into a csr matrix.
+ * Predict using a model, where data is expected to be encoded into a csr matrix.
  */
 int csr_copy_predict (npy_intp *data_size, char *data, npy_intp *index_size,
 		char *index, npy_intp *intptr_size, char *intptr, struct svm_csr_model *model,
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 99be12f0cbd3b..818563f0acf59 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -391,7 +391,7 @@ def fit(
         else:
             self.tree_ = Tree(
                 self.n_features_in_,
-                # TODO: tree should't need this in this case
+                # TODO: tree shouldn't need this in this case
                 np.array([1] * self.n_outputs_, dtype=np.intp),
                 self.n_outputs_,
             )
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index dea278103100a..cee55d2c40d8d 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -857,7 +857,7 @@ def test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input(name):
 
 def test_min_impurity_decrease():
     # test if min_impurity_decrease ensure that a split is made only if
-    # if the impurity decrease is atleast that value
+    # if the impurity decrease is at least that value
     X, y = datasets.make_classification(n_samples=10000, random_state=42)
 
     # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index b77ee0cb7a7e2..60ed17d111e22 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -1085,7 +1085,7 @@ def _approximate_mode(class_counts, n_draws, rng):
 def check_matplotlib_support(caller_name):
     """Raise ImportError with detailed error message if mpl is not installed.
 
-    Plot utilities like any of the Display's ploting functions should lazily import
+    Plot utilities like any of the Display's plotting functions should lazily import
     matplotlib and call this helper before any computation.
 
     Parameters
@@ -1143,7 +1143,7 @@ def all_estimators(type_filter=None):
     -------
     estimators : list of tuples
         List of (name, class), where ``name`` is the class name as string
-        and ``class`` is the actuall type of the class.
+        and ``class`` is the actual type of the class.
     """
     # lazy import to avoid circular imports from sklearn.base
     from ._testing import ignore_warnings
diff --git a/sklearn/utils/_pprint.py b/sklearn/utils/_pprint.py
index 9c10ae443313c..c96b1ce764c4a 100644
--- a/sklearn/utils/_pprint.py
+++ b/sklearn/utils/_pprint.py
@@ -129,7 +129,7 @@ class _EstimatorPrettyPrinter(pprint.PrettyPrinter):
       here)
     - format() directly calls _safe_repr() for a first try at rendering the
       object
-    - _safe_repr formats the whole object reccursively, only calling itself,
+    - _safe_repr formats the whole object recursively, only calling itself,
       not caring about line length or anything
     - back to _format(), if the output string is too long, _format() then calls
       the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on
diff --git a/sklearn/utils/_readonly_array_wrapper.pyx b/sklearn/utils/_readonly_array_wrapper.pyx
index 73ba3e968a7e6..55ac82f9d80fd 100644
--- a/sklearn/utils/_readonly_array_wrapper.pyx
+++ b/sklearn/utils/_readonly_array_wrapper.pyx
@@ -1,5 +1,5 @@
 """
-ReadonlyArrayWrapper implements the buffer protocol to make the wraped buffer behave as if
+ReadonlyArrayWrapper implements the buffer protocol to make the wrapped buffer behave as if
 writeable, even for readonly buffers. This way, even readonly arrays can be passed as
 argument of type (non const) memoryview.
 This is a workaround for the missing support for const fused-typed memoryviews in
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 817fcf6d14d80..914b4e6168247 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -2625,7 +2625,7 @@ def check_classifiers_predictions(X, y, name, classifier_orig):
 
 
 def _choose_check_classifiers_labels(name, y, y_names):
-    # Semisupervised classifers use -1 as the indicator for an unlabeled
+    # Semisupervised classifiers use -1 as the indicator for an unlabeled
     # sample.
     return (
         y
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 565ce20a4ba2b..1af4efe9d8a89 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -723,7 +723,7 @@ def svd_flip(u, v, u_based_decision=True):
         :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner
         dimensions so one can compute `np.dot(u * s, v)`.
         The input v should really be called vt to be consistent with scipy's
-        ouput.
+        output.
 
     u_based_decision : bool, default=True
         If True, use the columns of u as the basis for sign flipping.
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index e45058584cbd8..ee84f632abaae 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -1,7 +1,7 @@
 """Compatibility fixes for older version of python, numpy and scipy
 
 If you add content to this file, please give the version of the package
-at which the fixe is no longer needed.
+at which the fix is no longer needed.
 """
 # Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
 #          Gael Varoquaux <gael.varoquaux@normalesup.org>
@@ -210,7 +210,7 @@ def __call__(self, *args, **kwargs):
 
 
 def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0):
-    """Implements a simplified linspace function as of numpy verion >= 1.16.
+    """Implements a simplified linspace function as of numpy version >= 1.16.
 
     As of numpy 1.16, the arguments start and stop can be array-like and
     there is an optional argument `axis`.
diff --git a/sklearn/utils/tests/test_arpack.py b/sklearn/utils/tests/test_arpack.py
index 0014d162ad140..ab1d622d51a08 100644
--- a/sklearn/utils/tests/test_arpack.py
+++ b/sklearn/utils/tests/test_arpack.py
@@ -7,7 +7,7 @@
 
 @pytest.mark.parametrize("seed", range(100))
 def test_init_arpack_v0(seed):
-    # check that the initalization a sampling from an uniform distribution
+    # check that the initialization a sampling from an uniform distribution
     # where we can fix the random state
     size = 1000
     v0 = _init_arpack_v0(size, seed)
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 47a30e2ccb961..c4f954790cd26 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -504,7 +504,7 @@ def test_check_estimator():
     msg = "Estimator changes __dict__ during predict"
     with raises(AssertionError, match=msg):
         check_estimator(ChangesDict())
-    # check that `fit` only changes attribures that
+    # check that `fit` only changes attributes that
     # are private (start with an _ or end with a _).
     msg = (
         "Estimator ChangesWrongAttribute should not change or mutate  "
diff --git a/sklearn/utils/tests/test_estimator_html_repr.py b/sklearn/utils/tests/test_estimator_html_repr.py
index 0edf85b121c05..f22c03f20bdd7 100644
--- a/sklearn/utils/tests/test_estimator_html_repr.py
+++ b/sklearn/utils/tests/test_estimator_html_repr.py
@@ -48,7 +48,7 @@ def test_write_label_html(checked):
 
 @pytest.mark.parametrize("est", ["passthrough", "drop", None])
 def test_get_visual_block_single_str_none(est):
-    # Test estimators that are represnted by strings
+    # Test estimators that are represented by strings
     est_html_info = _get_visual_block(est)
     assert est_html_info.kind == "single"
     assert est_html_info.estimators == est
@@ -192,7 +192,7 @@ def test_estimator_html_repr_pipeline():
         assert f"<pre>{str(first)}</pre>" in html_output
         assert f"<pre>{str(select)}</pre>" in html_output
 
-        # voting classifer
+        # voting classifier
         for name, est in clf.estimators:
             assert f"<label>{name}</label>" in html_output
             assert f"<pre>{str(est)}</pre>" in html_output
@@ -256,7 +256,7 @@ def test_ovo_classifier_duck_typing_meta():
         assert f"<pre>{str(ovo.estimator)}" in html_output
         assert "LinearSVC</label>" in html_output
 
-    # outter estimator
+    # outer estimator
     assert f"<pre>{str(ovo)}" in html_output
 
 
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 87f957b931073..f850d11a7f8dd 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -608,7 +608,7 @@ def check_array(
     has_pd_integer_array = False
     if hasattr(array, "dtypes") and hasattr(array.dtypes, "__array__"):
         # throw warning if columns are sparse. If all columns are sparse, then
-        # array.sparse exists and sparsity will be perserved (later).
+        # array.sparse exists and sparsity will be preserved (later).
         with suppress(ImportError):
             from pandas.api.types import is_sparse
 

From 49899b2aad3c924a148b4c44a29cae2d96121b2e Mon Sep 17 00:00:00 2001
From: Juan Martin Loyola <jmloyola@outlook.com>
Date: Sat, 18 Sep 2021 05:35:32 -0300
Subject: [PATCH 42/49] DOC Ensures that SimpleImputer passes numpydoc
 validation (#21077)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 maint_tools/test_docstrings.py |  1 -
 sklearn/impute/_base.py        | 53 ++++++++++++++++++----------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index ba5f664e60245..d094a661ea654 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -39,7 +39,6 @@
     "SGDOneClassSVM",
     "SGDRegressor",
     "SelfTrainingClassifier",
-    "SimpleImputer",
     "SparseRandomProjection",
     "SpectralBiclustering",
     "SpectralClustering",
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index 13aa7a1f19c35..3fba0fa55f6c6 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -134,7 +134,7 @@ class SimpleImputer(_BaseImputer):
         nullable integer dtypes with missing values, `missing_values`
         should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.
 
-    strategy : string, default='mean'
+    strategy : str, default='mean'
         The imputation strategy.
 
         - If "mean", then replace missing values using the mean along
@@ -150,25 +150,25 @@ class SimpleImputer(_BaseImputer):
         .. versionadded:: 0.20
            strategy="constant" for fixed value imputation.
 
-    fill_value : string or numerical value, default=None
+    fill_value : str or numerical value, default=None
         When strategy == "constant", fill_value is used to replace all
         occurrences of missing_values.
         If left to the default, fill_value will be 0 when imputing numerical
         data and "missing_value" for strings or object data types.
 
-    verbose : integer, default=0
+    verbose : int, default=0
         Controls the verbosity of the imputer.
 
-    copy : boolean, default=True
-        If True, a copy of X will be created. If False, imputation will
+    copy : bool, default=True
+        If True, a copy of `X` will be created. If False, imputation will
         be done in-place whenever possible. Note that, in the following cases,
         a new copy will always be made, even if `copy=False`:
 
-        - If X is not an array of floating values;
-        - If X is encoded as a CSR matrix;
-        - If add_indicator=True.
+        - If `X` is not an array of floating values;
+        - If `X` is encoded as a CSR matrix;
+        - If `add_indicator=True`.
 
-    add_indicator : boolean, default=False
+    add_indicator : bool, default=False
         If True, a :class:`MissingIndicator` transform will stack onto output
         of the imputer's transform. This allows a predictive estimator
         to account for missingness despite imputation. If a feature has no
@@ -186,7 +186,7 @@ class SimpleImputer(_BaseImputer):
 
     indicator_ : :class:`~sklearn.impute.MissingIndicator`
         Indicator used to add binary indicators for missing values.
-        ``None`` if add_indicator is False.
+        `None` if `add_indicator=False`.
 
     n_features_in_ : int
         Number of features seen during :term:`fit`.
@@ -203,6 +203,11 @@ class SimpleImputer(_BaseImputer):
     --------
     IterativeImputer : Multivariate imputation of missing values.
 
+    Notes
+    -----
+    Columns which only contained missing values at :meth:`fit` are discarded
+    upon :meth:`transform` if strategy is not `"constant"`.
+
     Examples
     --------
     >>> import numpy as np
@@ -215,12 +220,6 @@ class SimpleImputer(_BaseImputer):
     [[ 7.   2.   3. ]
      [ 4.   3.5  6. ]
      [10.   3.5  9. ]]
-
-    Notes
-    -----
-    Columns which only contained missing values at :meth:`fit` are discarded
-    upon :meth:`transform` if strategy is not "constant".
-
     """
 
     def __init__(
@@ -301,17 +300,21 @@ def _validate_input(self, X, in_fit):
         return X
 
     def fit(self, X, y=None):
-        """Fit the imputer on X.
+        """Fit the imputer on `X`.
 
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
-            Input data, where ``n_samples`` is the number of samples and
-            ``n_features`` is the number of features.
+            Input data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
 
         Returns
         -------
-        self : SimpleImputer
+        self : object
+            Fitted estimator.
         """
         X = self._validate_input(X, in_fit=True)
 
@@ -449,7 +452,7 @@ def _dense_fit(self, X, strategy, missing_values, fill_value):
             return np.full(X.shape[1], fill_value, dtype=X.dtype)
 
     def transform(self, X):
-        """Impute all missing values in X.
+        """Impute all missing values in `X`.
 
         Parameters
         ----------
@@ -538,10 +541,10 @@ def inverse_transform(self, X):
         This operation can only be performed after :class:`SimpleImputer` is
         instantiated with `add_indicator=True`.
 
-        Note that ``inverse_transform`` can only invert the transform in
+        Note that `inverse_transform` can only invert the transform in
         features that have binary indicators for missing values. If a feature
-        has no missing values at ``fit`` time, the feature won't have a binary
-        indicator, and the imputation done at ``transform`` time won't be
+        has no missing values at `fit` time, the feature won't have a binary
+        indicator, and the imputation done at `transform` time won't be
         inverted.
 
         .. versionadded:: 0.24
@@ -556,7 +559,7 @@ def inverse_transform(self, X):
         Returns
         -------
         X_original : ndarray of shape (n_samples, n_features)
-            The original X with missing values as it was prior
+            The original `X` with missing values as it was prior
             to imputation.
         """
         check_is_fitted(self)

From 479d891dd23577be17bc3a7c3a85cc8facd7c845 Mon Sep 17 00:00:00 2001
From: J Alexander <java568@gmail.com>
Date: Sat, 18 Sep 2021 10:42:58 -0500
Subject: [PATCH 43/49] DOC minor fixes to examples for neighbors transformers
 (#21057)

Co-authored-by: Johnathan Alexander <jalexander86@gatech.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 doc/modules/neighbors.rst   | 13 ++++++++++---
 sklearn/neighbors/_graph.py | 22 ++++++++++++++++------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index 03842dcc704cb..2d1209ed8b42e 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -562,13 +562,20 @@ First, the precomputed graph can be re-used multiple times, for instance while
 varying a parameter of the estimator. This can be done manually by the user, or
 using the caching properties of the scikit-learn pipeline:
 
+    >>> import tempfile
     >>> from sklearn.manifold import Isomap
     >>> from sklearn.neighbors import KNeighborsTransformer
     >>> from sklearn.pipeline import make_pipeline
+    >>> from sklearn.datasets import make_regression
+    >>> cache_path = tempfile.gettempdir()  # we use a temporary folder here
+    >>> X, _ = make_regression(n_samples=50, n_features=25, random_state=0)
     >>> estimator = make_pipeline(
-    ...     KNeighborsTransformer(n_neighbors=5, mode='distance'),
-    ...     Isomap(metric='precomputed'),
-    ...     memory='/path/to/cache')
+    ...     KNeighborsTransformer(mode='distance'),
+    ...     Isomap(n_components=3, metric='precomputed'),
+    ...     memory=cache_path)
+    >>> X_embedded = estimator.fit_transform(X)
+    >>> X_embedded.shape
+    (50, 3)
 
 Second, precomputing the graph can give finer control on the nearest neighbors
 estimation, for instance enabling multiprocessing though the parameter
diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py
index 77751d65ba9b5..e6fdeffe3b291 100644
--- a/sklearn/neighbors/_graph.py
+++ b/sklearn/neighbors/_graph.py
@@ -332,12 +332,15 @@ class KNeighborsTransformer(KNeighborsMixin, TransformerMixin, NeighborsBase):
 
     Examples
     --------
-    >>> from sklearn.manifold import Isomap
+    >>> from sklearn.datasets import load_wine
     >>> from sklearn.neighbors import KNeighborsTransformer
-    >>> from sklearn.pipeline import make_pipeline
-    >>> estimator = make_pipeline(
-    ...     KNeighborsTransformer(n_neighbors=5, mode='distance'),
-    ...     Isomap(metric='precomputed'))
+    >>> X, _ = load_wine(return_X_y=True)
+    >>> X.shape
+    (178, 13)
+    >>> transformer = KNeighborsTransformer(n_neighbors=5, mode='distance')
+    >>> X_dist_graph = transformer.fit_transform(X)
+    >>> X_dist_graph.shape
+    (178, 178)
     """
 
     def __init__(
@@ -549,12 +552,19 @@ class RadiusNeighborsTransformer(RadiusNeighborsMixin, TransformerMixin, Neighbo
 
     Examples
     --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import load_wine
     >>> from sklearn.cluster import DBSCAN
     >>> from sklearn.neighbors import RadiusNeighborsTransformer
     >>> from sklearn.pipeline import make_pipeline
+    >>> X, _ = load_wine(return_X_y=True)
     >>> estimator = make_pipeline(
     ...     RadiusNeighborsTransformer(radius=42.0, mode='distance'),
-    ...     DBSCAN(min_samples=30, metric='precomputed'))
+    ...     DBSCAN(eps=25.0, metric='precomputed'))
+    >>> X_clustered = estimator.fit_predict(X)
+    >>> clusters, counts = np.unique(X_clustered, return_counts=True)
+    >>> print(counts)
+    [ 29  15 111  11  12]
     """
 
     def __init__(

From f54a46ce384215d8aa7fa92c91efa6e8f8cdf32f Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Sat, 18 Sep 2021 18:43:28 +0300
Subject: [PATCH 44/49] DOC Add m2cgen to related projects (#20646)

---
 doc/related_projects.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index e23d70b3963fd..1488195381dcc 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -123,6 +123,11 @@ enhance the functionality of scikit-learn's estimators.
 - `sklearn-porter <https://github.com/nok/sklearn-porter>`_
   Transpile trained scikit-learn models to C, Java, Javascript and others.
 
+- `m2cgen <https://github.com/BayesWitnesses/m2cgen>`_
+  A lightweight library which allows to transpile trained machine learning
+  models including many scikit-learn estimators into a native code of C, Java,
+  Go, R, PHP, Dart, Haskell, Rust and many other programming languages.
+
 - `treelite <https://treelite.readthedocs.io>`_
   Compiles tree-based ensemble models into C code for minimizing prediction
   latency.

From 9f0a671907e656088d59811ef5da9f2df279aa02 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Sun, 19 Sep 2021 14:06:24 +0200
Subject: [PATCH 45/49] DOC add release highlights for 1.0 (#20980)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Tom Dupré la Tour <tom.dupre-la-tour@m4x.org>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
Co-authored-by: Christian Lorentzen <lorentzen.ch@gmail.com>
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
Co-authored-by: Roman Yurchak <rth.yurchak@gmail.com>
---
 .../plot_release_highlights_1_0_0.py          | 240 ++++++++++++++++++
 1 file changed, 240 insertions(+)
 create mode 100644 examples/release_highlights/plot_release_highlights_1_0_0.py

diff --git a/examples/release_highlights/plot_release_highlights_1_0_0.py b/examples/release_highlights/plot_release_highlights_1_0_0.py
new file mode 100644
index 0000000000000..a0b44dbff15d5
--- /dev/null
+++ b/examples/release_highlights/plot_release_highlights_1_0_0.py
@@ -0,0 +1,240 @@
+# flake8: noqa
+"""
+=======================================
+Release Highlights for scikit-learn 1.0
+=======================================
+
+.. currentmodule:: sklearn
+
+We are very pleased to announce the release of scikit-learn 1.0! The library
+has been stable for quite some time, releasing version 1.0 is recognizing that
+and signalling it to our users. This release does not include any breaking
+changes apart from the usual two-release deprecation cycle. For the future, we
+do our best to keep this pattern.
+
+This release includes some new key features as well as many improvements and
+bug fixes. We detail below a few of the major features of this release. **For
+an exhaustive list of all the changes**, please refer to the :ref:`release
+notes <changes_1_0>`.
+
+To install the latest version (with pip)::
+
+    pip install --upgrade scikit-learn
+
+or with conda::
+
+    conda install -c conda-forge scikit-learn
+"""
+
+##############################################################################
+# Keyword and positional arguments
+# ---------------------------------------------------------
+# The scikit-learn API exposes many functions and methods which have many input
+# parameters. For example, before this release, one could instantiate a
+# :class:`~ensemble.HistGradientBoostingRegressor` as::
+#
+#         HistGradientBoostingRegressor("squared_error", 0.1, 100, 31, None,
+#             20, 0.0, 255, None, None, False, "auto", "loss", 0.1, 10, 1e-7,
+#             0, None)
+#
+# Understanding the above code requires the reader to go to the API
+# documentation and to check each and every parameter for its position and
+# its meaning. To improve the readability of code written based on scikit-learn,
+# now users have to provide most parameters with their names, as keyword
+# arguments, instead of positional arguments. For example, the above code would
+# be::
+#
+#     HistGradientBoostingRegressor(
+#         loss="squared_error",
+#         learning_rate=0.1,
+#         max_iter=100,
+#         max_leaf_nodes=31,
+#         max_depth=None,
+#         min_samples_leaf=20,
+#         l2_regularization=0.0,
+#         max_bins=255,
+#         categorical_features=None,
+#         monotonic_cst=None,
+#         warm_start=False,
+#         early_stopping="auto",
+#         scoring="loss",
+#         validation_fraction=0.1,
+#         n_iter_no_change=10,
+#         tol=1e-7,
+#         verbose=0,
+#         random_state=None,
+#     )
+#
+# which is much more readable. Positional arguments have been deprecated since
+# version 0.23 and will now raise a ``TypeError``. A limited number of
+# positional arguments are still allowed in some cases, for example in
+# :class:`~decomposition.PCA`, where ``PCA(10)`` is still allowed, but ``PCA(10,
+# False)`` is not allowed.
+
+##############################################################################
+# Spline Transformers
+# ---------------------------------------------------------
+# One way to add nonlinear terms to a dataset's feature set is to generate
+# spline basis functions for continuous/numerical features with the new
+# :class:`~preprocessing.SplineTransformer`. Splines are piecewise polynomials,
+# parametrized by their polynomial degree and the positions of the knots. The
+# :class:`~preprocessing.SplineTransformer` implements a B-spline basis.
+#
+# .. figure:: ../linear_model/images/sphx_glr_plot_polynomial_interpolation_001.png
+#   :target: ../linear_model/plot_polynomial_interpolation.html
+#   :align: center
+#
+# The following code shows splines in action, for more information, please
+# refer to the :ref:`User Guide <spline_transformer>`.
+
+import numpy as np
+from sklearn.preprocessing import SplineTransformer
+
+X = np.arange(5).reshape(5, 1)
+spline = SplineTransformer(degree=2, n_knots=3)
+spline.fit_transform(X)
+
+
+##############################################################################
+# Quantile Regressor
+# --------------------------------------------------------------------------
+# Quantile regression estimates the median or other quantiles of :math:`y`
+# conditional on :math:`X`, while ordinary least squares (OLS) estimates the
+# conditional mean.
+#
+# As a linear model, the new :class:`~linear_model.QuantileRegressor` gives
+# linear predictions :math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile,
+# :math:`q \in (0, 1)`. The weights or coefficients :math:`w` are then found by
+# the following minimization problem:
+#
+# .. math::
+#     \min_{w} {\frac{1}{n_{\text{samples}}}
+#     \sum_i PB_q(y_i - X_i w) + \alpha ||w||_1}.
+#
+# This consists of the pinball loss (also known as linear loss),
+# see also :class:`~sklearn.metrics.mean_pinball_loss`,
+#
+# .. math::
+#     PB_q(t) = q \max(t, 0) + (1 - q) \max(-t, 0) =
+#     \begin{cases}
+#         q t, & t > 0, \\
+#         0,    & t = 0, \\
+#         (1-q) t, & t < 0
+#     \end{cases}
+#
+# and the L1 penalty controlled by parameter ``alpha``, similar to
+# :class:`linear_model.Lasso`.
+#
+# Please check the following example to see how it works, and the :ref:`User
+# Guide <quantile_regression>` for more details.
+#
+# .. figure:: ../linear_model/images/sphx_glr_plot_quantile_regression_002.png
+#    :target: ../linear_model/plot_quantile_regression.html
+#    :align: center
+#    :scale: 50%
+
+##############################################################################
+# Feature Names Support
+# --------------------------------------------------------------------------
+# When an estimator is passed a `pandas' dataframe
+# <https://pandas.pydata.org/docs/user_guide/dsintro.html#dataframe>`_ during
+# :term:`fit`, the estimator will set a `feature_names_in_` attribute
+# containing the feature names. Note that feature names support is only enabled
+# when the column names in the dataframe are all strings. `feature_names_in_`
+# is used to check that the column names of the dataframe passed in
+# non-:term:`fit`, such as :term:`predict`, are consistent with features in
+# :term:`fit`:
+from sklearn.preprocessing import StandardScaler
+import pandas as pd
+
+X = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+scalar = StandardScaler().fit(X)
+scalar.feature_names_in_
+
+# %%
+# The support of :term:`get_feature_names_out` is avaliable for transformers
+# that already had :term:`get_feature_names` and transformers with a one-to-one
+# correspondence between input and output such as
+# :class:`~preprocessing.StandardScalar`. :term:`get_feature_names_out` support
+# will be added to all other transformers in future releases. Additionally,
+# :meth:`compose.ColumnTransformer.get_feature_names_out` is avaliable to
+# combine feature names of its transformers:
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+import pandas as pd
+
+X = pd.DataFrame({"pet": ["dog", "cat", "fish"], "age": [3, 7, 1]})
+preprocessor = ColumnTransformer(
+    [
+        ("numerical", StandardScaler(), ["age"]),
+        ("categorical", OneHotEncoder(), ["pet"]),
+    ],
+    prefix_feature_names_out=False,
+).fit(X)
+
+preprocessor.get_feature_names_out()
+
+# %%
+# When this ``preprocessor`` is used with a pipeline, the feature names used
+# by the classifier are obtained by slicing and calling
+# :term:`get_feature_names_out`:
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import make_pipeline
+
+y = [1, 0, 1]
+pipe = make_pipeline(preprocessor, LogisticRegression())
+pipe.fit(X, y)
+pipe[:-1].get_feature_names_out()
+
+
+##############################################################################
+# A more flexible plotting API
+# --------------------------------------------------------------------------
+# :class:`metrics.ConfusionMatrixDisplay`,
+# :class:`metrics.PrecisionRecallDisplay`, :class:`metrics.DetCurveDisplay`,
+# and :class:`inspection.PartialDependenceDisplay` now expose two class
+# methods: `from_estimator` and `from_predictions` which allow users to create
+# a plot given the predictions or an estimator. This means the corresponsing
+# `plot_*` functions are deprecated. Please check :ref:`example one
+# <sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py>` and
+# :ref:`example two
+# <sphx_glr_auto_examples_classification_plot_digits_classification.py>` for
+# how to use the new plotting functionalities.
+
+##############################################################################
+# Online One-Class SVM
+# --------------------------------------------------------------------------
+# The new class :class:`~linear_model.SGDOneClassSVM` implements an online
+# linear version of the One-Class SVM using a stochastic gradient descent.
+# Combined with kernel approximation techniques,
+# :class:`~linear_model.SGDOneClassSVM` can be used to approximate the solution
+# of a kernelized One-Class SVM, implemented in :class:`~svm.OneClassSVM`, with
+# a fit time complexity linear in the number of samples. Note that the
+# complexity of a kernelized One-Class SVM is at best quadratic in the number
+# of samples. :class:`~linear_model.SGDOneClassSVM` is thus well suited for
+# datasets with a large number of training samples (> 10,000) for which the SGD
+# variant can be several orders of magnitude faster. Please check this
+# :ref:`example
+# <sphx_glr_auto_examples_miscellaneous_plot_anomaly_comparison.py>` to see how
+# it's used, and the :ref:`User Guide <sgd_online_one_class_svm>` for more
+# details.
+#
+# .. figure:: ../miscellaneous/images/sphx_glr_plot_anomaly_comparison_001.png
+#    :target: ../miscellaneous/plot_anomaly_comparison.html
+#    :align: center
+
+##############################################################################
+# Histogram-based Gradient Boosting Models are now stable
+# --------------------------------------------------------------------------
+# :class:`~sklearn.ensemble.HistGradientBoostingRegressor` and
+# :class:`~ensemble.HistGradientBoostingClassifier` are no longer experimental
+# and can simply be imported and used as::
+#
+#     from sklearn.ensemble import HistGradientBoostingClassifier
+
+##############################################################################
+# New documentation improvements
+# ------------------------------
+# This release includes many documentation improvements. Out of over 2100
+# merged pull requests, about 800 of them are improvements to our
+# documentation.

From 5918ebb76c3f313bc569e5e1c35800add077aa8b Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Tue, 21 Sep 2021 11:16:51 -0400
Subject: [PATCH 46/49] API Change ColumnTransformer parameter name to
 verbose_feature_names_out (#21080)

---
 doc/modules/compose.rst                       |  8 +++---
 doc/whats_new/v1.0.rst                        |  4 +--
 ...linear_model_coefficient_interpretation.py |  2 +-
 sklearn/compose/_column_transformer.py        | 18 ++++++-------
 .../compose/tests/test_column_transformer.py  | 26 +++++++++----------
 5 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 6f986b2c6a4ec..1f18dac1624d1 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -449,13 +449,13 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
   >>> column_trans = ColumnTransformer(
   ...     [('categories', OneHotEncoder(dtype='int'), ['city']),
   ...      ('title_bow', CountVectorizer(), 'title')],
-  ...     remainder='drop', prefix_feature_names_out=False)
+  ...     remainder='drop', verbose_feature_names_out=False)
 
   >>> column_trans.fit(X)
-  ColumnTransformer(prefix_feature_names_out=False,
-                    transformers=[('categories', OneHotEncoder(dtype='int'),
+  ColumnTransformer(transformers=[('categories', OneHotEncoder(dtype='int'),
                                    ['city']),
-                                  ('title_bow', CountVectorizer(), 'title')])
+                                  ('title_bow', CountVectorizer(), 'title')],
+                    verbose_feature_names_out=False)
 
   >>> column_trans.get_feature_names_out()
   array(['city_London', 'city_Paris', 'city_Sallisaw', 'bow', 'feast',
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index b3d69843dfa3e..a60f6b9232058 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -251,9 +251,9 @@ Changelog
   an adequate transformer.
   :pr:`18898` by :user:`Oras Phongpanagnam <panangam>`.
 
-- |API| Adds `prefix_feature_names_out` to :class:`compose.ColumnTransformer`.
+- |API| Adds `verbose_feature_names_out` to :class:`compose.ColumnTransformer`.
   This flag controls the prefixing of feature names out in
-  :term:`get_feature_names_out`. :pr:`18444` by `Thomas Fan`_.
+  :term:`get_feature_names_out`. :pr:`18444` and :pr:`21080` by `Thomas Fan`_.
 
 :mod:`sklearn.covariance`
 .........................
diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py
index 69b52c6b5ce3f..b8fe44009e4f6 100644
--- a/examples/inspection/plot_linear_model_coefficient_interpretation.py
+++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py
@@ -135,7 +135,7 @@
 preprocessor = make_column_transformer(
     (OneHotEncoder(drop="if_binary"), categorical_columns),
     remainder="passthrough",
-    prefix_feature_names_out=False,
+    verbose_feature_names_out=False,
 )
 
 # %%
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index f4e602cc44f3d..d4881a95a3604 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -112,7 +112,7 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
         If True, the time elapsed while fitting each transformer will be
         printed as it is completed.
 
-    prefix_feature_names_out : bool, default=True
+    verbose_feature_names_out : bool, default=True
         If True, :meth:`get_feature_names_out` will prefix all feature names
         with the name of the transformer that generated that feature.
         If False, :meth:`get_feature_names_out` will not prefix any feature
@@ -204,7 +204,7 @@ def __init__(
         n_jobs=None,
         transformer_weights=None,
         verbose=False,
-        prefix_feature_names_out=True,
+        verbose_feature_names_out=True,
     ):
         self.transformers = transformers
         self.remainder = remainder
@@ -212,7 +212,7 @@ def __init__(
         self.n_jobs = n_jobs
         self.transformer_weights = transformer_weights
         self.verbose = verbose
-        self.prefix_feature_names_out = prefix_feature_names_out
+        self.verbose_feature_names_out = verbose_feature_names_out
 
     @property
     def _transformers(self):
@@ -489,7 +489,7 @@ def get_feature_names_out(self, input_features=None):
             # No feature names
             return np.array([], dtype=object)
 
-        if self.prefix_feature_names_out:
+        if self.verbose_feature_names_out:
             # Prefix the feature names out with the transformers name
             names = list(
                 chain.from_iterable(
@@ -499,7 +499,7 @@ def get_feature_names_out(self, input_features=None):
             )
             return np.asarray(names, dtype=object)
 
-        # prefix_feature_names_out is False
+        # verbose_feature_names_out is False
         # Check that names are all unique without a prefix
         feature_names_count = Counter(
             chain.from_iterable(s for _, s in transformer_with_feature_names_out)
@@ -517,7 +517,7 @@ def get_feature_names_out(self, input_features=None):
                 names_repr = str(top_6_overlap)
             raise ValueError(
                 f"Output feature names: {names_repr} are not unique. Please set "
-                "prefix_feature_names_out=True to add prefixes to feature names"
+                "verbose_feature_names_out=True to add prefixes to feature names"
             )
 
         return np.concatenate(
@@ -856,7 +856,7 @@ def make_column_transformer(
     sparse_threshold=0.3,
     n_jobs=None,
     verbose=False,
-    prefix_feature_names_out=True,
+    verbose_feature_names_out=True,
 ):
     """Construct a ColumnTransformer from the given transformers.
 
@@ -919,7 +919,7 @@ def make_column_transformer(
         If True, the time elapsed while fitting each transformer will be
         printed as it is completed.
 
-    prefix_feature_names_out : bool, default=True
+    verbose_feature_names_out : bool, default=True
         If True, :meth:`get_feature_names_out` will prefix all feature names
         with the name of the transformer that generated that feature.
         If False, :meth:`get_feature_names_out` will not prefix any feature
@@ -959,7 +959,7 @@ def make_column_transformer(
         remainder=remainder,
         sparse_threshold=sparse_threshold,
         verbose=verbose,
-        prefix_feature_names_out=prefix_feature_names_out,
+        verbose_feature_names_out=verbose_feature_names_out,
     )
 
 
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index ff860a164e6c9..7a0d9eb0f5a01 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -709,7 +709,7 @@ def test_column_transformer_get_set_params():
         "trans2__with_std": True,
         "transformers": ct.transformers,
         "transformer_weights": None,
-        "prefix_feature_names_out": True,
+        "verbose_feature_names_out": True,
         "verbose": False,
     }
 
@@ -730,7 +730,7 @@ def test_column_transformer_get_set_params():
         "trans2__with_std": True,
         "transformers": ct.transformers,
         "transformer_weights": None,
-        "prefix_feature_names_out": True,
+        "verbose_feature_names_out": True,
         "verbose": False,
     }
 
@@ -1149,7 +1149,7 @@ def test_column_transformer_get_set_params_with_remainder():
         "trans1__with_std": True,
         "transformers": ct.transformers,
         "transformer_weights": None,
-        "prefix_feature_names_out": True,
+        "verbose_feature_names_out": True,
         "verbose": False,
     }
 
@@ -1169,7 +1169,7 @@ def test_column_transformer_get_set_params_with_remainder():
         "trans1": "passthrough",
         "transformers": ct.transformers,
         "transformer_weights": None,
-        "prefix_feature_names_out": True,
+        "verbose_feature_names_out": True,
         "verbose": False,
     }
     assert ct.get_params() == exp
@@ -1760,8 +1760,8 @@ def get_feature_names_out(self, input_features=None):
         ),
     ],
 )
-def test_feature_names_out_prefix_true(transformers, remainder, expected_names):
-    """Check feature_names_out for prefix_feature_names_out==True (default)"""
+def test_verbose_feature_names_out_true(transformers, remainder, expected_names):
+    """Check feature_names_out for verbose_feature_names_out=True (default)"""
     pd = pytest.importorskip("pandas")
     df = pd.DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"])
     ct = ColumnTransformer(
@@ -1835,14 +1835,14 @@ def test_feature_names_out_prefix_true(transformers, remainder, expected_names):
         ),
     ],
 )
-def test_feature_names_out_prefix_false(transformers, remainder, expected_names):
-    """Check feature_names_out for prefix_feature_names_out==True (default)"""
+def test_verbose_feature_names_out_false(transformers, remainder, expected_names):
+    """Check feature_names_out for verbose_feature_names_out=False"""
     pd = pytest.importorskip("pandas")
     df = pd.DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"])
     ct = ColumnTransformer(
         transformers,
         remainder=remainder,
-        prefix_feature_names_out=False,
+        verbose_feature_names_out=False,
     )
     ct.fit(df)
 
@@ -1923,23 +1923,23 @@ def test_feature_names_out_prefix_false(transformers, remainder, expected_names)
         ),
     ],
 )
-def test_feature_names_out_prefix_false_errors(
+def test_verbose_feature_names_out_false_errors(
     transformers, remainder, colliding_columns
 ):
-    """Check feature_names_out for prefix_feature_names_out==False"""
+    """Check feature_names_out for verbose_feature_names_out=False"""
 
     pd = pytest.importorskip("pandas")
     df = pd.DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"])
     ct = ColumnTransformer(
         transformers,
         remainder=remainder,
-        prefix_feature_names_out=False,
+        verbose_feature_names_out=False,
     )
     ct.fit(df)
 
     msg = re.escape(
         f"Output feature names: {colliding_columns} are not unique. Please set "
-        "prefix_feature_names_out=True to add prefixes to feature names"
+        "verbose_feature_names_out=True to add prefixes to feature names"
     )
     with pytest.raises(ValueError, match=msg):
         ct.get_feature_names_out()

From 8d5938eb92b5f663cd062c55ba9bda41c109a17d Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Tue, 21 Sep 2021 18:53:53 +0200
Subject: [PATCH 47/49] DOC fix verbose_feature_names_out usage in release
 highlights (#21100)

---
 examples/release_highlights/plot_release_highlights_1_0_0.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/release_highlights/plot_release_highlights_1_0_0.py b/examples/release_highlights/plot_release_highlights_1_0_0.py
index a0b44dbff15d5..5b5ab4fb2e016 100644
--- a/examples/release_highlights/plot_release_highlights_1_0_0.py
+++ b/examples/release_highlights/plot_release_highlights_1_0_0.py
@@ -169,7 +169,7 @@
         ("numerical", StandardScaler(), ["age"]),
         ("categorical", OneHotEncoder(), ["pet"]),
     ],
-    prefix_feature_names_out=False,
+    verbose_feature_names_out=False,
 ).fit(X)
 
 preprocessor.get_feature_names_out()

From bebb23f369540409708e0e78161759086b5d35e1 Mon Sep 17 00:00:00 2001
From: Dimitri Papadopoulos Orfanos
 <3234522+DimitriPapadopoulos@users.noreply.github.com>
Date: Wed, 22 Sep 2021 23:38:30 +0200
Subject: [PATCH 48/49] DOC Fix a few typos in release highlights (#21096)

---
 examples/release_highlights/plot_release_highlights_1_0_0.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/release_highlights/plot_release_highlights_1_0_0.py b/examples/release_highlights/plot_release_highlights_1_0_0.py
index 5b5ab4fb2e016..ef4e500b85e7c 100644
--- a/examples/release_highlights/plot_release_highlights_1_0_0.py
+++ b/examples/release_highlights/plot_release_highlights_1_0_0.py
@@ -152,12 +152,12 @@
 scalar.feature_names_in_
 
 # %%
-# The support of :term:`get_feature_names_out` is avaliable for transformers
+# The support of :term:`get_feature_names_out` is available for transformers
 # that already had :term:`get_feature_names` and transformers with a one-to-one
 # correspondence between input and output such as
 # :class:`~preprocessing.StandardScalar`. :term:`get_feature_names_out` support
 # will be added to all other transformers in future releases. Additionally,
-# :meth:`compose.ColumnTransformer.get_feature_names_out` is avaliable to
+# :meth:`compose.ColumnTransformer.get_feature_names_out` is available to
 # combine feature names of its transformers:
 from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder

From 887009ab5c7a47c7cb6d07f1969fd9193788d89a Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Wed, 22 Sep 2021 23:34:33 +0200
Subject: [PATCH 49/49] DOC update contributors for 1.0 (#21111)

---
 doc/whats_new/v1.0.rst | 67 +++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index a60f6b9232058..fc1f53e6b283b 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -947,47 +947,48 @@ Raghunathan, bmalezieux, Brian Rice, Brian Sun, Bruno Charron, Bryan Chen,
 bumblebee, caherrera-meli, Carsten Allefeld, CeeThinwa, Chiara Marmo,
 chrissobel, Christian Lorentzen, Christopher Yeh, Chuliang Xiao, Clément
 Fauchereau, cliffordEmmanuel, Conner Shen, Connor Tann, David Dale, David Katz,
-David Poznik, Divyanshu Deoli, dmallia17, Dmitry Kobak, DS_anas, Eduardo
-Jardim, EdwinWenink, EL-ATEIF Sara, Eleni Markou, Eric Fiegel, Erich Schubert,
-Ezri-Mudde, Fatos Morina, Felipe Rodrigues, Felix Hafner, Fenil Suchak,
-flyingdutchman23, Flynn, Fortune Uwha, Francois Berenger, Frankie Robertson,
-Frans Larsson, Frederick Robinson, Gabriel S Vicente, Gael Varoquaux, genvalen,
-Geoffrey Thomas, geroldcsendes, Gleb Levitskiy, Glen, Glòria Macià Muñoz,
-gregorystrubel, groceryheist, Guillaume Lemaitre, guiweber, Haidar Almubarak,
-Hans Moritz Günther, Haoyin Xu, Harris Mirza, Harry Wei, Harutaka Kawamura,
-Hassan Alsawadi, Helder Geovane Gomes de Lima, Hugo DEFOIS, Igor Ilic, Ikko
-Ashimine, Isaack Mungui, Ishaan Bhat, Ishan Mishra, Iván Pulido, iwhalvic,
+David Poznik, Dimitri Papadopoulos Orfanos, Divyanshu Deoli, dmallia17,
+Dmitry Kobak, DS_anas, Eduardo Jardim, EdwinWenink, EL-ATEIF Sara, Eleni
+Markou, EricEllwanger, Eric Fiegel, Erich Schubert, Ezri-Mudde, Fatos Morina,
+Felipe Rodrigues, Felix Hafner, Fenil Suchak, flyingdutchman23, Flynn, Fortune
+Uwha, Francois Berenger, Frankie Robertson, Frans Larsson, Frederick Robinson,
+frellwan, Gabriel S Vicente, Gael Varoquaux, genvalen, Geoffrey Thomas,
+geroldcsendes, Gleb Levitskiy, Glen, Glòria Macià Muñoz, gregorystrubel,
+groceryheist, Guillaume Lemaitre, guiweber, Haidar Almubarak, Hans Moritz
+Günther, Haoyin Xu, Harris Mirza, Harry Wei, Harutaka Kawamura, Hassan
+Alsawadi, Helder Geovane Gomes de Lima, Hugo DEFOIS, Igor Ilic, Ikko Ashimine,
+Isaack Mungui, Ishaan Bhat, Ishan Mishra, Iván Pulido, iwhalvic, J Alexander,
 Jack Liu, James Alan Preiss, James Budarz, James Lamb, Jannik, Jeff Zhao,
-Jennifer Maldonado, Jérémie du Boisberranger, Jesse Lima, Jianzhu Guo,
-jnboehm, Joel Nothman, JohanWork, John Paton, Jonathan Schneider, Jon Crall,
-Jon Haitz Legarreta Gorroño, Joris Van den Bossche, José Manuel Nápoles
-Duarte, Juan Carlos Alfaro Jiménez, Juan Martin Loyola, Julien Jerphanion,
-Julio Batista Silva, julyrashchenko, JVM, Kadatatlu Kishore, Karen Palacio, Kei
-Ishikawa, kmatt10, kobaski, Kot271828, Kunj, KurumeYuta, kxytim, lacrosse91,
-LalliAcqua, Laveen Bagai, Leonardo Rocco, Leonardo Uieda, Leopoldo Corona, Loic
-Esteve, LSturtew, Luca Bittarello, Luccas Quadros, Lucy Jiménez, Lucy Liu,
-ly648499246, Mabu Manaileng, makoeppel, Marco Gorelli, Maren Westermann,
+Jennifer Maldonado, Jérémie du Boisberranger, Jesse Lima, Jianzhu Guo, jnboehm,
+Joel Nothman, JohanWork, John Paton, Jonathan Schneider, Jon Crall, Jon Haitz
+Legarreta Gorroño, Joris Van den Bossche, José Manuel Nápoles Duarte, Juan
+Carlos Alfaro Jiménez, Juan Martin Loyola, Julien Jerphanion, Julio Batista
+Silva, julyrashchenko, JVM, Kadatatlu Kishore, Karen Palacio, Kei Ishikawa,
+kmatt10, kobaski, Kot271828, Kunj, KurumeYuta, kxytim, lacrosse91, LalliAcqua,
+Laveen Bagai, Leonardo Rocco, Leonardo Uieda, Leopoldo Corona, Loic Esteve,
+LSturtew, Luca Bittarello, Luccas Quadros, Lucy Jiménez, Lucy Liu, ly648499246,
+Mabu Manaileng, Manimaran, makoeppel, Marco Gorelli, Maren Westermann,
 Mariangela, Maria Telenczuk, marielaraj, Martin Hirzel, Mateo Noreña, Mathieu
 Blondel, Mathis Batoul, mathurinm, Matthew Calcote, Maxime Prieur, Maxwell,
 Mehdi Hamoumi, Mehmet Ali Özer, Miao Cai, Michal Karbownik, michalkrawczyk,
 Mitzi, mlondschien, Mohamed Haseeb, Mohamed Khoualed, Muhammad Jarir Kanji,
 murata-yu, Nadim Kawwa, Nanshan Li, naozin555, Nate Parsons, Neal Fultz, Nic
-Annau, Nicolas Hug, Nicolas Miller, Nico Stefani, Nigel Bosch, Nodar
-Okroshiashvili, Norbert Preining, novaya, Ogbonna Chibuike Stephen, OGordon100,
-Oliver Pfaffel, Olivier Grisel, Oras Phongpanangam, Pablo Duque, Pablo
-Ibieta-Jimenez, Patric Lacouth, Paulo S. Costa, Paweł Olszewski, Peter Dye,
-PierreAttard, Pierre-Yves Le Borgne, PranayAnchuri, Prince Canuma, putschblos,
-qdeffense, RamyaNP, ranjanikrishnan, Ray Bell, Rene Jean Corneille, Reshama
-Shaikh, ricardojnf, RichardScottOZ, Rodion Martynov, Rohan Paul, Roman Lutz,
-Roman Yurchak, Samuel Brice, Sandy Khosasi, Sean Benhur J, Sebastian Flores,
-Sebastian Pölsterl, Shao Yang Hong, shinehide, shinnar, shivamgargsya,
+Annau, Nicolas Hug, Nicolas Miller, Nico Stefani, Nigel Bosch, Nikita Titov,
+Nodar Okroshiashvili, Norbert Preining, novaya, Ogbonna Chibuike Stephen,
+OGordon100, Oliver Pfaffel, Olivier Grisel, Oras Phongpanangam, Pablo Duque,
+Pablo Ibieta-Jimenez, Patric Lacouth, Paulo S. Costa, Paweł Olszewski, Peter
+Dye, PierreAttard, Pierre-Yves Le Borgne, PranayAnchuri, Prince Canuma,
+putschblos, qdeffense, RamyaNP, ranjanikrishnan, Ray Bell, Rene Jean Corneille,
+Reshama Shaikh, ricardojnf, RichardScottOZ, Rodion Martynov, Rohan Paul, Roman
+Lutz, Roman Yurchak, Samuel Brice, Sandy Khosasi, Sean Benhur J, Sebastian
+Flores, Sebastian Pölsterl, Shao Yang Hong, shinehide, shinnar, shivamgargsya,
 Shooter23, Shuhei Kayawari, Shyam Desai, simonamaggio, Sina Tootoonian,
 solosilence, Steven Kolawole, Steve Stagg, Surya Prakash, swpease, Sylvain
 Marié, Takeshi Oura, Terence Honles, TFiFiE, Thomas A Caswell, Thomas J. Fan,
 Tim Gates, TimotheeMathieu, Timothy Wolodzko, Tim Vink, t-jakubek, t-kusanagi,
 tliu68, Tobias Uhmann, tom1092, Tomás Moreyra, Tomás Ronald Hughes, Tom
-Dupré la Tour, Tommaso Di Noto, Tomohiro Endo, Toshihiro NAKAE, tsuga, Uttam
-kumar, vadim-ushtanit, Vangelis Gkiastas, Venkatachalam N, Vilém Zouhar,
-Vinicius Rios Fuck, Vlasovets, waijean, Whidou, xavier dupré, xiaoyuchai,
-Yasmeen Alsaedy, yoch, Yosuke KOBAYASHI, Yu Feng, YusukeNagasaka, yzhenman,
-Zero, ZeyuSun, ZhaoweiWang, Zito, Zito Relova
+Dupré la Tour, Tommaso Di Noto, Tomohiro Endo, TONY GEORGE, Toshihiro NAKAE,
+tsuga, Uttam kumar, vadim-ushtanit, Vangelis Gkiastas, Venkatachalam N, Vilém
+Zouhar, Vinicius Rios Fuck, Vlasovets, waijean, Whidou, xavier dupré,
+xiaoyuchai, Yasmeen Alsaedy, yoch, Yosuke KOBAYASHI, Yu Feng, YusukeNagasaka,
+yzhenman, Zero, ZeyuSun, ZhaoweiWang, Zito, Zito Relova