From d93b74ccf342819a17e2bfdec05f2d9a460aa5cc Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 26 Jan 2021 15:33:01 +0300
Subject: [PATCH 01/72] KBD changes

'auto' option added (Sturges rule);
KMeans algorithm changed from 'auto' to 'full';
Format-strings changed to f-strings.
---
 sklearn/preprocessing/_discretization.py | 73 ++++++++++++++----------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 22fa236f3314e..caecdf5917920 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,8 +28,9 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int or array-like of shape (n_features,), default=5
+    n_bins : int or array-like of shape (n_features,), default='auto'
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
+        For 'auto' option Sturges formula is used.
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
         Method used to encode the transformed result.
@@ -126,7 +127,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     """
 
     @_deprecate_positional_args
-    def __init__(self, n_bins=5, *, encode='onehot', strategy='quantile',
+    def __init__(self, n_bins='auto', *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
         self.encode = encode
@@ -163,20 +164,22 @@ def fit(self, X, y=None):
                 f"{supported_dtype + (None,)}. Got dtype={self.dtype} "
                 f" instead."
             )
-
         valid_encode = ('onehot', 'onehot-dense', 'ordinal')
         if self.encode not in valid_encode:
-            raise ValueError("Valid options for 'encode' are {}. "
-                             "Got encode={!r} instead."
-                             .format(valid_encode, self.encode))
+            raise ValueError(
+                f"Valid options for 'encode' are {valid_encode}. "
+                f"Got encode={self.encode!r} instead."
+            )
         valid_strategy = ('uniform', 'quantile', 'kmeans')
         if self.strategy not in valid_strategy:
-            raise ValueError("Valid options for 'strategy' are {}. "
-                             "Got strategy={!r} instead."
-                             .format(valid_strategy, self.strategy))
+            raise ValueError(
+                f"Valid options for 'strategy' are {valid_strategy}. "
+                f"Got strategy={self.strategy!r} instead."
+            )
 
         n_features = X.shape[1]
-        n_bins = self._validate_n_bins(n_features)
+        n_samples = X.shape[0]
+        n_bins = self._validate_n_bins(n_features, n_samples)
 
         bin_edges = np.zeros(n_features, dtype=object)
         for jj in range(n_features):
@@ -184,8 +187,8 @@ def fit(self, X, y=None):
             col_min, col_max = column.min(), column.max()
 
             if col_min == col_max:
-                warnings.warn("Feature %d is constant and will be "
-                              "replaced with 0." % jj)
+                warnings.warn(f"Feature {jj} is constant and will be "
+                              f"replaced with 0.")
                 n_bins[jj] = 1
                 bin_edges[jj] = np.array([-np.inf, np.inf])
                 continue
@@ -205,7 +208,7 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)
+                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, algorithm='full')
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()
@@ -217,9 +220,9 @@ def fit(self, X, y=None):
                 mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8
                 bin_edges[jj] = bin_edges[jj][mask]
                 if len(bin_edges[jj]) - 1 != n_bins[jj]:
-                    warnings.warn('Bins whose width are too small (i.e., <= '
-                                  '1e-8) in feature %d are removed. Consider '
-                                  'decreasing the number of bins.' % jj)
+                    warnings.warn(f"Bins whose width are too small (i.e., <= "
+                                  f"1e-8) in feature {jj} are removed. Consider "
+                                  f"decreasing the number of bins.")
                     n_bins[jj] = len(bin_edges[jj]) - 1
 
         self.bin_edges_ = bin_edges
@@ -236,20 +239,25 @@ def fit(self, X, y=None):
 
         return self
 
-    def _validate_n_bins(self, n_features):
+    def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self.n_bins
+        if self.n_bins == 'auto':
+            # calculcate number of bins depending on number of samples in X
+            orig_bins = np.ceil(np.log2(n_samples) + 1.)
+        else:
+            orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
-                raise ValueError("{} received an invalid n_bins type. "
-                                 "Received {}, expected int."
-                                 .format(KBinsDiscretizer.__name__,
-                                         type(orig_bins).__name__))
+                raise ValueError(
+                    f"{KBinsDiscretizer.__name__} received an invalid n_bins type. "
+                    f"Received {type(orig_bins).__name__}, expected int."
+                )
             if orig_bins < 2:
-                raise ValueError("{} received an invalid number "
-                                 "of bins. Received {}, expected at least 2."
-                                 .format(KBinsDiscretizer.__name__, orig_bins))
+                raise ValueError(
+                    f"{KBinsDiscretizer.__name__} received an invalid number "
+                    f"of bins. Received {orig_bins}, expected at least 2."
+                )
             return np.full(n_features, orig_bins, dtype=int)
 
         n_bins = check_array(orig_bins, dtype=int, copy=True,
@@ -264,10 +272,11 @@ def _validate_n_bins(self, n_features):
         violating_indices = np.where(bad_nbins_value)[0]
         if violating_indices.shape[0] > 0:
             indices = ", ".join(str(i) for i in violating_indices)
-            raise ValueError("{} received an invalid number "
-                             "of bins at indices {}. Number of bins "
-                             "must be at least 2, and must be an int."
-                             .format(KBinsDiscretizer.__name__, indices))
+            raise ValueError(
+                f"{KBinsDiscretizer.__name__} received an invalid number "
+                f"of bins at indices {indices}. Number of bins "
+                f"must be at least 2, and must be an int."
+            )
         return n_bins
 
     def transform(self, X):
@@ -342,8 +351,10 @@ def inverse_transform(self, Xt):
         Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))
         n_features = self.n_bins_.shape[0]
         if Xinv.shape[1] != n_features:
-            raise ValueError("Incorrect number of features. Expecting {}, "
-                             "received {}.".format(n_features, Xinv.shape[1]))
+            raise ValueError(
+                f"Incorrect number of features. Expecting {n_features}, "
+                f"received {Xinv.shape[1]}."
+            )
 
         for jj in range(n_features):
             bin_edges = self.bin_edges_[jj]

From 59a84d6408339bf7ffd7e7b80cc524f9018c84c9 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 26 Jan 2021 15:45:58 +0300
Subject: [PATCH 02/72] Small fix

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index caecdf5917920..7a561c7ccd3fa 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -244,7 +244,7 @@ def _validate_n_bins(self, n_features, n_samples):
         """
         if self.n_bins == 'auto':
             # calculcate number of bins depending on number of samples in X
-            orig_bins = np.ceil(np.log2(n_samples) + 1.)
+            orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
         else:
             orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number):

From 566ac59799d4cb01dcf48763b03b69e22cb78459 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 26 Jan 2021 20:16:55 +0300
Subject: [PATCH 03/72] Added checks for n_bins=str

---
 sklearn/preprocessing/_discretization.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 7a561c7ccd3fa..a0d4e1a76a09f 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -242,11 +242,17 @@ def fit(self, X, y=None):
     def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        if self.n_bins == 'auto':
-            # calculcate number of bins depending on number of samples in X
-            orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-        else:
-            orig_bins = self.n_bins
+        orig_bins = self.n_bins
+        if isinstance(orig_bins, str):
+            if self.n_bins == 'auto':
+                # calculcate number of bins
+                # depending on number of samples with Sturges rule
+                orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+            else:
+                raise ValueError(
+                    f"{KBinsDiscretizer.__name__} received an invalid n_bins value "
+                    f"{orig_bins!r}, while only 'auto' is supported."
+                )
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(

From 675e1e6db86f94d1e44ef8a51e32b8dad580524f Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 08:29:34 +0000
Subject: [PATCH 04/72] Lint changes

---
 sklearn/preprocessing/_discretization.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index a0d4e1a76a09f..d6fe4c2e9808c 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -208,7 +208,8 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, algorithm='full')
+                km = KMeans(n_clusters=n_bins[jj], init=init,
+                            n_init=1, algorithm='full')
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()
@@ -220,9 +221,10 @@ def fit(self, X, y=None):
                 mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8
                 bin_edges[jj] = bin_edges[jj][mask]
                 if len(bin_edges[jj]) - 1 != n_bins[jj]:
-                    warnings.warn(f"Bins whose width are too small (i.e., <= "
-                                  f"1e-8) in feature {jj} are removed. Consider "
-                                  f"decreasing the number of bins.")
+                    warnings.warn(f"Bins whose width are too small "
+                                  f"(i.e., <= 1e-8) in feature {jj} "
+                                  f"are removed. Consider decreasing "
+                                  f"the number of bins.")
                     n_bins[jj] = len(bin_edges[jj]) - 1
 
         self.bin_edges_ = bin_edges
@@ -250,13 +252,15 @@ def _validate_n_bins(self, n_features, n_samples):
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
             else:
                 raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received an invalid n_bins value "
+                    f"{KBinsDiscretizer.__name__} received "
+                    f"an invalid n_bins value "
                     f"{orig_bins!r}, while only 'auto' is supported."
                 )
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received an invalid n_bins type. "
+                    f"{KBinsDiscretizer.__name__} received "
+                    f"an invalid n_bins type. "
                     f"Received {type(orig_bins).__name__}, expected int."
                 )
             if orig_bins < 2:

From ab4f868aa0845443e71019078a297f3672282768 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 09:01:56 +0000
Subject: [PATCH 05/72] Changed behaviour to catch n_bins<2 in 'auto'

---
 sklearn/preprocessing/_discretization.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index d6fe4c2e9808c..3adc96316d707 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -244,24 +244,18 @@ def fit(self, X, y=None):
     def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self.n_bins
-        if isinstance(orig_bins, str):
-            if self.n_bins == 'auto':
-                # calculcate number of bins
-                # depending on number of samples with Sturges rule
-                orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-            else:
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received "
-                    f"an invalid n_bins value "
-                    f"{orig_bins!r}, while only 'auto' is supported."
-                )
+        if self.n_bins == 'auto':
+            # calculcate number of bins
+            # depending on number of samples with Sturges rule
+            orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+        else:
+            orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "
-                    f"an invalid n_bins type. "
-                    f"Received {type(orig_bins).__name__}, expected int."
+                    f"an invalid n_bins type. Received "
+                    f"{type(orig_bins).__name__}, expected int or 'auto'."
                 )
             if orig_bins < 2:
                 raise ValueError(

From 87579eddef96a3de24514669fd563f7a9acbdd2d Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 10:01:59 +0000
Subject: [PATCH 06/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 3adc96316d707..75cfeec4c4ddf 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -244,13 +244,12 @@ def fit(self, X, y=None):
     def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        if self.n_bins == 'auto':
-            # calculcate number of bins
-            # depending on number of samples with Sturges rule
-            orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-        else:
-            orig_bins = self.n_bins
-        if isinstance(orig_bins, numbers.Number):
+        orig_bins = self.n_bins
+        if isinstance(orig_bins, numbers.Number) | isinstance(orig_bins, str):
+            if self.n_bins == 'auto':
+                # calculcate number of bins
+                # depending on number of samples with Sturges rule
+                orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From 44ea08db3019861160e9546f96ae2a3f2b45e5e1 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 10:05:44 +0000
Subject: [PATCH 07/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 75cfeec4c4ddf..344f48647908b 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -246,7 +246,7 @@ def _validate_n_bins(self, n_features, n_samples):
         """
         orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number) | isinstance(orig_bins, str):
-            if self.n_bins == 'auto':
+            if orig_bins == 'auto':
                 # calculcate number of bins
                 # depending on number of samples with Sturges rule
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))

From 731de9dd0c16e7dc7c013c1db7d65dc61f98634b Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 10:48:04 +0000
Subject: [PATCH 08/72] Update sklearn/preprocessing/_discretization.py

Co-authored-by: Joel Nothman <joel.nothman@gmail.com>
---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 344f48647908b..27e7f97fcebac 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -245,7 +245,7 @@ def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
         orig_bins = self.n_bins
-        if isinstance(orig_bins, numbers.Number) | isinstance(orig_bins, str):
+        if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
             if orig_bins == 'auto':
                 # calculcate number of bins
                 # depending on number of samples with Sturges rule

From 55b84f683541dbd3391972bd21b2b3e848789b29 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 10:48:21 +0000
Subject: [PATCH 09/72] Update sklearn/preprocessing/_discretization.py

Co-authored-by: Joel Nothman <joel.nothman@gmail.com>
---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 27e7f97fcebac..03cb171fbbecd 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -30,7 +30,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     ----------
     n_bins : int or array-like of shape (n_features,), default='auto'
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
-        For 'auto' option Sturges formula is used.
+        For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
         Method used to encode the transformed result.

From fc9f93582c8f884af8793b465a4aedf511904e29 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 11:35:36 +0000
Subject: [PATCH 10/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 9d607c82d5831..fe1c9c114b19f 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -45,7 +45,14 @@ def test_invalid_n_bins():
 
     est = KBinsDiscretizer(n_bins=1.1)
     err_msg = ("KBinsDiscretizer received an invalid "
-               "n_bins type. Received float, expected int.")
+               "n_bins type. Received float, expected int or 'auto'.")
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+    
+    # Bad string value
+    est = KBinsDiscretizer(n_bins='rice')
+    err_msg = ("KBinsDiscretizer received an invalid "
+               "n_bins type. Received float, expected int or 'auto'.")
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
 

From f5bff722cccfea293ed6016b8af0b50f27d95b0c Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 11:41:57 +0000
Subject: [PATCH 11/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index fe1c9c114b19f..95abb40e6503d 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -48,7 +48,7 @@ def test_invalid_n_bins():
                "n_bins type. Received float, expected int or 'auto'.")
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
-    
+
     # Bad string value
     est = KBinsDiscretizer(n_bins='rice')
     err_msg = ("KBinsDiscretizer received an invalid "

From a8e24e543b7569b56e99fc081cad1c1db883adef Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 14:21:53 +0000
Subject: [PATCH 12/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 03cb171fbbecd..eaee8e7b794ae 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,7 +28,8 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int or array-like of shape (n_features,), default='auto'
+    n_bins : int, array-like of integers of shape (n_features,) or 'auto', \
+             default='auto'
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
 

From c3638272edaf089435b1a4854f5594cad8878659 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 14:36:51 +0000
Subject: [PATCH 13/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index eaee8e7b794ae..d9cb7ba0fc955 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -29,9 +29,12 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     Parameters
     ----------
     n_bins : int, array-like of integers of shape (n_features,) or 'auto', \
-             default='auto'
+             default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
+        
+        .. versionadded:: 0.24
+            Added 'auto' option
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
         Method used to encode the transformed result.
@@ -128,7 +131,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     """
 
     @_deprecate_positional_args
-    def __init__(self, n_bins='auto', *, encode='onehot', strategy='quantile',
+    def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
         self.encode = encode
@@ -152,6 +155,10 @@ def fit(self, X, y=None):
         -------
         self
         """
+        if self.n_bins == 'warn':
+            warnings.warn("The default value of n_bins will change from "
+                          "5 to 'auto' in 0.25.", FutureWarning)
+        self.n_bins = 5
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From 6d767c5a38464a04609f004f559036b3ecfbec64 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 15:21:55 +0000
Subject: [PATCH 14/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index d9cb7ba0fc955..39046a9689c38 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -32,7 +32,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
              default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
-        
+
         .. versionadded:: 0.24
             Added 'auto' option
 

From dc6b095ce3f9fe28c0bcd3aba6b018cc1112b24a Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 16:47:43 +0000
Subject: [PATCH 15/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 39046a9689c38..6a4cfda168d5d 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -158,7 +158,8 @@ def fit(self, X, y=None):
         if self.n_bins == 'warn':
             warnings.warn("The default value of n_bins will change from "
                           "5 to 'auto' in 0.25.", FutureWarning)
-        self.n_bins = 5
+            self.n_bins = 5
+        
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From 1dac5f902cac182fce96bb3cc6a6d29f3ba2ad4b Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Thu, 28 Jan 2021 16:50:39 +0000
Subject: [PATCH 16/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 6a4cfda168d5d..d919aeedc160f 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -159,7 +159,7 @@ def fit(self, X, y=None):
             warnings.warn("The default value of n_bins will change from "
                           "5 to 'auto' in 0.25.", FutureWarning)
             self.n_bins = 5
-        
+
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From abff57615c49204c62e6ba8647473a028bbcd45e Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 08:10:48 +0000
Subject: [PATCH 17/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index d919aeedc160f..75a15db35e49a 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -134,6 +134,10 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
+        if self.n_bins == 'warn':
+            warnings.warn("The default value of n_bins will change from "
+                          "5 to 'auto' in 0.25.", FutureWarning)
+            self.n_bins = 5
         self.encode = encode
         self.strategy = strategy
         self.dtype = dtype
@@ -155,11 +159,6 @@ def fit(self, X, y=None):
         -------
         self
         """
-        if self.n_bins == 'warn':
-            warnings.warn("The default value of n_bins will change from "
-                          "5 to 'auto' in 0.25.", FutureWarning)
-            self.n_bins = 5
-
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From bcb118d774974aad1547a2bbfa21069ae7de83a1 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 13:38:15 +0300
Subject: [PATCH 18/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 75a15db35e49a..8dbadf3d3c659 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -255,8 +255,7 @@ def _validate_n_bins(self, n_features, n_samples):
         orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
             if orig_bins == 'auto':
-                # calculcate number of bins
-                # depending on number of samples with Sturges rule
+                # calculate number of bins with Sturges rule
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(

From cca972cccb0131da44a6f868a3e12b418eb8610f Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 11:30:13 +0000
Subject: [PATCH 19/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 95abb40e6503d..549dad86dd98d 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -52,7 +52,7 @@ def test_invalid_n_bins():
     # Bad string value
     est = KBinsDiscretizer(n_bins='rice')
     err_msg = ("KBinsDiscretizer received an invalid "
-               "n_bins type. Received float, expected int or 'auto'.")
+               "n_bins type. Received str, expected int or 'auto'.")
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
 

From 7b552e6c4a650ddad5864ba086a565780469576f Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 11:33:59 +0000
Subject: [PATCH 20/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 8dbadf3d3c659..0f98c3429d90f 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -134,10 +134,6 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
-        if self.n_bins == 'warn':
-            warnings.warn("The default value of n_bins will change from "
-                          "5 to 'auto' in 0.25.", FutureWarning)
-            self.n_bins = 5
         self.encode = encode
         self.strategy = strategy
         self.dtype = dtype
@@ -159,6 +155,9 @@ def fit(self, X, y=None):
         -------
         self
         """
+        if self.n_bins == 'warn':
+            warnings.warn("The default value of n_bins will change from "
+                          "5 to 'auto' in 0.25.", FutureWarning)
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)
@@ -257,6 +256,9 @@ def _validate_n_bins(self, n_features, n_samples):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+            if orig_bins == 'warn:
+                # deprecation cycle case, should be deleted afterwards
+                orig_bins = 5
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From 617bf90c8213b45aad7d61b0654f38be1f1dd3b1 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 11:39:48 +0000
Subject: [PATCH 21/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 0f98c3429d90f..d910b7712c334 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,8 +28,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int, array-like of integers of shape (n_features,) or 'auto', \
-             default=5
+    n_bins : {int, array-like (n_features,), dtype=integral, 'auto'}, default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
 

From 9563a1cdd728970ee092aa8c4283c46de2bf9576 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 11:40:53 +0000
Subject: [PATCH 22/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index d910b7712c334..22ed9e30b0a82 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -255,7 +255,7 @@ def _validate_n_bins(self, n_features, n_samples):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-            if orig_bins == 'warn:
+            if orig_bins == 'warn':
                 # deprecation cycle case, should be deleted afterwards
                 orig_bins = 5
             if not isinstance(orig_bins, numbers.Integral):

From 28dbbc5fd72889ea0fe59cef96250587cb575347 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 12:38:19 +0000
Subject: [PATCH 23/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 22ed9e30b0a82..5711dbe56b812 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -129,7 +129,6 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     """
 
-    @_deprecate_positional_args
     def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
@@ -137,6 +136,7 @@ def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
         self.strategy = strategy
         self.dtype = dtype
 
+    @_deprecate_positional_args
     def fit(self, X, y=None):
         """
         Fit the estimator.
@@ -154,9 +154,10 @@ def fit(self, X, y=None):
         -------
         self
         """
-        if self.n_bins == 'warn':
-            warnings.warn("The default value of n_bins will change from "
-                          "5 to 'auto' in 0.25.", FutureWarning)
+        if  isinstance(self.n_bins, str):
+            if self.n_bins == 'warn':
+                warnings.warn("The default value of n_bins will change from "
+                              "5 to 'auto' in 0.25.", FutureWarning)
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From 3e5a86dd0fa9414684844d153618ba4074fb60f8 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 12:43:50 +0000
Subject: [PATCH 24/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 5711dbe56b812..b60ba7bbb1d9b 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -154,7 +154,7 @@ def fit(self, X, y=None):
         -------
         self
         """
-        if  isinstance(self.n_bins, str):
+        if isinstance(self.n_bins, str):
             if self.n_bins == 'warn':
                 warnings.warn("The default value of n_bins will change from "
                               "5 to 'auto' in 0.25.", FutureWarning)

From 0d7cc149ddf24c8f529eed6d8dd37385e96246b2 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 13:40:23 +0000
Subject: [PATCH 25/72] Update sklearn/preprocessing/_discretization.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index b60ba7bbb1d9b..6dc4f960f7f40 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -32,7 +32,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
 
-        .. versionadded:: 0.24
+        .. versionadded:: 1.0
             Added 'auto' option
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'

From 0c9eb1815b96d65107b7cf1c8853ddb987648cd4 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 13:43:03 +0000
Subject: [PATCH 26/72] Update sklearn/preprocessing/_discretization.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 6dc4f960f7f40..5910d9e9e469e 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,7 +28,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : {int, array-like (n_features,), dtype=integral, 'auto'}, default=5
+    n_bins : int, 'auto' or array-like of shape (n_features,), dtype=integral, default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
 

From 119976b176cfdebbac5ad64b320feccd27c9541c Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 13:56:38 +0000
Subject: [PATCH 27/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 549dad86dd98d..13226190925ea 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -36,6 +36,15 @@ def test_valid_n_bins():
     assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(int)
 
 
+def test_n_bins_5_deprecated():
+    # FIXME: remove in 1.2
+    est = KBinsDiscretizer()
+    depr_msg = ("The default value of n_bins will change from "
+                "5 to 'auto' in 1.2")
+    with pytest.warns(FutureWarning, match=depr_msg):
+        est.fit(X)
+
+
 def test_invalid_n_bins():
     est = KBinsDiscretizer(n_bins=1)
     err_msg = ("KBinsDiscretizer received an invalid "

From fdcbd299993e29e3cafe8d49d32119454724c313 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 13:56:41 +0000
Subject: [PATCH 28/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 5910d9e9e469e..83d4824b8dad3 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,7 +28,8 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int, 'auto' or array-like of shape (n_features,), dtype=integral, default=5
+    n_bins : int, 'auto' or array-like of shape (n_features,), dtype=integral,\
+             default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
         For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
 
@@ -154,10 +155,12 @@ def fit(self, X, y=None):
         -------
         self
         """
+        self._n_bins = self.n_bins
         if isinstance(self.n_bins, str):
             if self.n_bins == 'warn':
                 warnings.warn("The default value of n_bins will change from "
-                              "5 to 'auto' in 0.25.", FutureWarning)
+                              "5 to 'auto' in 1.2", FutureWarning)
+                self._n_bins = 5
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)
@@ -251,14 +254,11 @@ def fit(self, X, y=None):
     def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self.n_bins
+        orig_bins = self._n_bins
         if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-            if orig_bins == 'warn':
-                # deprecation cycle case, should be deleted afterwards
-                orig_bins = 5
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From fe5bc1d0ca538c93464c51536d5fa759e91794c3 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 14:53:01 +0000
Subject: [PATCH 29/72] added test for auto

---
 sklearn/preprocessing/tests/test_discretization.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 13226190925ea..39c82742f6605 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -45,6 +45,14 @@ def test_n_bins_5_deprecated():
         est.fit(X)
 
 
+def test_auto_bins():
+    est = KBinsDiscretizer(n_bins='auto')
+    # for sturges rule: ceil(log2(4) + 1)
+    expected_bins = [3, 3, 3, 3]
+    est.fit(X)
+    assert est.n_bins_ = expected_bins
+
+
 def test_invalid_n_bins():
     est = KBinsDiscretizer(n_bins=1)
     err_msg = ("KBinsDiscretizer received an invalid "

From aede54ad2b1bc690593256a64a089bb5143a4e1b Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 14:55:09 +0000
Subject: [PATCH 30/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 39c82742f6605..61ca8d6f445aa 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -48,9 +48,10 @@ def test_n_bins_5_deprecated():
 def test_auto_bins():
     est = KBinsDiscretizer(n_bins='auto')
     # for sturges rule: ceil(log2(4) + 1)
+    # where 4 is n_samples in X
     expected_bins = [3, 3, 3, 3]
     est.fit(X)
-    assert est.n_bins_ = expected_bins
+    assert est.n_bins_ == expected_bins
 
 
 def test_invalid_n_bins():

From a0b04da9a7b45ea0f0efc2307ee8a2eeaa148544 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Fri, 29 Jan 2021 15:28:36 +0000
Subject: [PATCH 31/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 61ca8d6f445aa..ff406d2c30410 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -51,7 +51,7 @@ def test_auto_bins():
     # where 4 is n_samples in X
     expected_bins = [3, 3, 3, 3]
     est.fit(X)
-    assert est.n_bins_ == expected_bins
+    assert np.all(est.n_bins_ == expected_bins)
 
 
 def test_invalid_n_bins():

From ca735de561dd2a39e9d12c4fb994b3269cb9259f Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 15:36:44 +0000
Subject: [PATCH 32/72] Update sklearn/preprocessing/_discretization.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 sklearn/preprocessing/_discretization.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 83d4824b8dad3..40e9af8c2a893 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -156,8 +156,7 @@ def fit(self, X, y=None):
         self
         """
         self._n_bins = self.n_bins
-        if isinstance(self.n_bins, str):
-            if self.n_bins == 'warn':
+        if isinstance(self.n_bins, str) and self.n_bins == 'warn':
                 warnings.warn("The default value of n_bins will change from "
                               "5 to 'auto' in 1.2", FutureWarning)
                 self._n_bins = 5

From 08f5467343d887b42e233626e407ebcf000502c3 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 15:37:34 +0000
Subject: [PATCH 33/72] Update
 sklearn/preprocessing/tests/test_discretization.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 sklearn/preprocessing/tests/test_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index ff406d2c30410..ad67f5943e18f 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -47,7 +47,7 @@ def test_n_bins_5_deprecated():
 
 def test_auto_bins():
     est = KBinsDiscretizer(n_bins='auto')
-    # for sturges rule: ceil(log2(4) + 1)
+    # for sturges rule: ceil(log2(4) + 1) = 3
     # where 4 is n_samples in X
     expected_bins = [3, 3, 3, 3]
     est.fit(X)

From eba8dbb07a72f8baf7d7a1bad26536dcaadf5555 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 15:56:10 +0000
Subject: [PATCH 34/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index ad67f5943e18f..1ab40c544743f 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -47,7 +47,7 @@ def test_n_bins_5_deprecated():
 
 def test_auto_bins():
     est = KBinsDiscretizer(n_bins='auto')
-    # for sturges rule: ceil(log2(4) + 1) = 3
+    # for Sturges rule: ceil(log2(4) + 1) = 3
     # where 4 is n_samples in X
     expected_bins = [3, 3, 3, 3]
     est.fit(X)

From 1eb2322a32d1c2024842767d7c5e4f30b4585300 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 15:56:17 +0000
Subject: [PATCH 35/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 40e9af8c2a893..a8c4df3ada7a8 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -130,6 +130,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     """
 
+    @_deprecate_positional_args
     def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
@@ -137,7 +138,7 @@ def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
         self.strategy = strategy
         self.dtype = dtype
 
-    @_deprecate_positional_args
+
     def fit(self, X, y=None):
         """
         Fit the estimator.
@@ -186,8 +187,7 @@ def fit(self, X, y=None):
                 f"Got strategy={self.strategy!r} instead."
             )
 
-        n_features = X.shape[1]
-        n_samples = X.shape[0]
+        n_samples, n_features = X.shape
         n_bins = self._validate_n_bins(n_features, n_samples)
 
         bin_edges = np.zeros(n_features, dtype=object)
@@ -254,10 +254,10 @@ def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
         orig_bins = self._n_bins
-        if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
-            if orig_bins == 'auto':
-                # calculate number of bins with Sturges rule
-                orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+        if isinstance(orig_bins, str) and (orig_bins == 'auto'):
+            # calculate number of bins with Sturges rule
+            orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+        if isinstance(orig_bins, numbers.Number)
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From d623a3c3b99f18339fe23756a94d669af1b384a3 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 15:56:49 +0000
Subject: [PATCH 36/72] Update test_docstring_parameters.py

---
 sklearn/tests/test_docstring_parameters.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 1756e0e4a65a6..2c4a18d2fa0e4 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -205,6 +205,9 @@ def test_fit_docstring_attributes(name, Estimator):
 
     if Estimator.__name__ == 'SelectKBest':
         est.k = 2
+    
+    if Estimator.__name__ == 'KBinsDiscretizer':
+        est.n_bins = 'auto'
 
     if Estimator.__name__ == 'DummyClassifier':
         est.strategy = "stratified"

From 78cdde1190099a671c4178f3b90b51a4cd60b701 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 15:56:51 +0000
Subject: [PATCH 37/72] Update test_common.py

---
 sklearn/tests/test_common.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index b900f94231419..5e4659e5240ca 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -294,5 +294,7 @@ def test_search_cv(estimator, check, request):
 @pytest.mark.parametrize("estimator", N_FEATURES_IN_AFTER_FIT_ESTIMATORS,
                          ids=_get_check_estimator_ids)
 def test_check_n_features_in_after_fitting(estimator):
+    if estimator.__name__ == 'KBinsDiscretizer':
+        estimator.n_bins = 'auto'
     _set_checking_parameters(estimator)
     check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)

From 396d4a862e7778258c002a3fe3c8b58a945b69bf Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:07:37 +0000
Subject: [PATCH 38/72] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index a00523ec2223b..24c7db0779575 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -135,6 +135,15 @@ Changelog
   polynomial ``degree`` of the splines, number of knots ``n_knots`` and knot
   positioning strategy ``knots``.
   :pr:`18368` by :user:`Christian Lorentzen <lorentzenchr>`.
+- |Enhancement| Added new argument `auto` for `n_bins` parameter in
+  :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
+  via Sturges rule.
+  :pr:`9337` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
+  and :user:`Jérémie du Boisberranger`<jeremiedbb>.
+- |Efficiency| Changed `algorithm` argument for :class:`cluster.KMeans` in 
+  :class:`preprocessing.KBinsDiscretizer` from `auto` to `full`. 
+  :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
+  and :user:`Jérémie du Boisberranger`<jeremiedbb>.
 
 :mod:`sklearn.tree`
 ...................

From f577a03146ff49c27618c0303fa7849a12fddeba Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:08:19 +0000
Subject: [PATCH 39/72] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 24c7db0779575..373a9c4b6a1ba 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -143,7 +143,7 @@ Changelog
 - |Efficiency| Changed `algorithm` argument for :class:`cluster.KMeans` in 
   :class:`preprocessing.KBinsDiscretizer` from `auto` to `full`. 
   :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
-  and :user:`Jérémie du Boisberranger`<jeremiedbb>.
+  and :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 :mod:`sklearn.tree`
 ...................

From 16abf9e3189058229068c40bf42ba51b79f1058a Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:09:45 +0000
Subject: [PATCH 40/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index a8c4df3ada7a8..a299312e03ea8 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -257,7 +257,7 @@ def _validate_n_bins(self, n_features, n_samples):
         if isinstance(orig_bins, str) and (orig_bins == 'auto'):
             # calculate number of bins with Sturges rule
             orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-        if isinstance(orig_bins, numbers.Number)
+        if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From 94f9c87c92e3378629c650acf9cabd1a5f92be16 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:11:34 +0000
Subject: [PATCH 41/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index a299312e03ea8..2cdbbc8119401 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -158,9 +158,9 @@ def fit(self, X, y=None):
         """
         self._n_bins = self.n_bins
         if isinstance(self.n_bins, str) and self.n_bins == 'warn':
-                warnings.warn("The default value of n_bins will change from "
-                              "5 to 'auto' in 1.2", FutureWarning)
-                self._n_bins = 5
+            warnings.warn("The default value of n_bins will change from "
+                          "5 to 'auto' in 1.2", FutureWarning)
+            self._n_bins = 5
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From f499549812533dabd25f702f026dc2e147530ff4 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:13:29 +0000
Subject: [PATCH 42/72] Update test_docstring_parameters.py

---
 sklearn/tests/test_docstring_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 2c4a18d2fa0e4..5cb585fe9e950 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -205,7 +205,7 @@ def test_fit_docstring_attributes(name, Estimator):
 
     if Estimator.__name__ == 'SelectKBest':
         est.k = 2
-    
+
     if Estimator.__name__ == 'KBinsDiscretizer':
         est.n_bins = 'auto'
 

From e266fadc7b6b3544107ddf77cd4a013553ebd1cb Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:18:23 +0000
Subject: [PATCH 43/72] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 373a9c4b6a1ba..c7b6092a6ed76 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -135,13 +135,13 @@ Changelog
   polynomial ``degree`` of the splines, number of knots ``n_knots`` and knot
   positioning strategy ``knots``.
   :pr:`18368` by :user:`Christian Lorentzen <lorentzenchr>`.
-- |Enhancement| Added new argument `auto` for `n_bins` parameter in
+- |Enhancement| Added new argument ``auto`` for ``n_bins`` parameter in
   :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
   via Sturges rule.
   :pr:`9337` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
   and :user:`Jérémie du Boisberranger`<jeremiedbb>.
-- |Efficiency| Changed `algorithm` argument for :class:`cluster.KMeans` in 
-  :class:`preprocessing.KBinsDiscretizer` from `auto` to `full`. 
+- |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
+  :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
   :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
   and :user:`Jérémie du Boisberranger <jeremiedbb>`.
 

From cb72479e078c5fc08723a3f7aae87e4459e6e156 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:18:49 +0000
Subject: [PATCH 44/72] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index c7b6092a6ed76..bf0a2d629d8db 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -139,7 +139,7 @@ Changelog
   :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
   via Sturges rule.
   :pr:`9337` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
-  and :user:`Jérémie du Boisberranger`<jeremiedbb>.
+  and :user:`Jérémie du Boisberranger <jeremiedbb>`.
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
   :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`

From 66a46828cb71eccee95e5787fa1a608ca349b6e4 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 16:21:01 +0000
Subject: [PATCH 45/72] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index bf0a2d629d8db..1f674f0bb8741 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -138,12 +138,10 @@ Changelog
 - |Enhancement| Added new argument ``auto`` for ``n_bins`` parameter in
   :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
   via Sturges rule.
-  :pr:`9337` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
-  and :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  :pr:`9337` by :user:`Joel Nothman <jnothman>`.
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
-  :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`, :user:`Joel Nothman <jnothman>`
-  and :user:`Jérémie du Boisberranger <jeremiedbb>`.
+  :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`.
 
 :mod:`sklearn.tree`
 ...................

From 840c77a67a8e7c60b835b4ca6a457d9f9c55a07b Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 17:08:20 +0000
Subject: [PATCH 46/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 2cdbbc8119401..7f9e055b62479 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -253,11 +253,11 @@ def fit(self, X, y=None):
     def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self._n_bins
-        if isinstance(orig_bins, str) and (orig_bins == 'auto'):
-            # calculate number of bins with Sturges rule
-            orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
-        if isinstance(orig_bins, numbers.Number):
+        orig_bins = self._n_bins  
+        if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
+            if orig_bins == 'auto':
+                # calculate number of bins with Sturges rule
+                orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From 9cdb920486406034149e9a551c057dac5b96ba24 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 17:09:28 +0000
Subject: [PATCH 47/72] Update test_common.py

---
 sklearn/tests/test_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 5e4659e5240ca..47705ff0bf5e5 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -294,7 +294,7 @@ def test_search_cv(estimator, check, request):
 @pytest.mark.parametrize("estimator", N_FEATURES_IN_AFTER_FIT_ESTIMATORS,
                          ids=_get_check_estimator_ids)
 def test_check_n_features_in_after_fitting(estimator):
-    if estimator.__name__ == 'KBinsDiscretizer':
+    if estimator.__class__.__name__ == 'KBinsDiscretizer':
         estimator.n_bins = 'auto'
     _set_checking_parameters(estimator)
     check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)

From ff127f5b59fdf5ceb2fc08aa71c3e596c859e8d3 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 17:12:43 +0000
Subject: [PATCH 48/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 7f9e055b62479..7ee2d45b6968a 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -253,7 +253,7 @@ def fit(self, X, y=None):
     def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self._n_bins  
+        orig_bins = self._n_bins
         if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule

From 641d58dfe1b73002a33ae42cf7d8461abaf459ae Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:14:45 +0000
Subject: [PATCH 49/72] Update test_docstring_parameters.py

---
 sklearn/tests/test_docstring_parameters.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 5cb585fe9e950..3d0cfc5151193 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -207,6 +207,7 @@ def test_fit_docstring_attributes(name, Estimator):
         est.k = 2
 
     if Estimator.__name__ == 'KBinsDiscretizer':
+        # FIX ME: remove in 1.2
         est.n_bins = 'auto'
 
     if Estimator.__name__ == 'DummyClassifier':

From 3e82bee9a3f2c39ff25b014141a8ae34a5412722 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:14:55 +0000
Subject: [PATCH 50/72] Update v1.0.rst

---
 doc/whats_new/v1.0.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 1f674f0bb8741..2cec2c6d8d9df 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -135,13 +135,15 @@ Changelog
   polynomial ``degree`` of the splines, number of knots ``n_knots`` and knot
   positioning strategy ``knots``.
   :pr:`18368` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Enhancement| Added new argument ``auto`` for ``n_bins`` parameter in
   :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
   via Sturges rule.
-  :pr:`9337` by :user:`Joel Nothman <jnothman>`.
+  :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.
+
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
-  :pr:`19256` by :user:`Gleb Levitskiy <GLevV>`.
+  :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.
 
 :mod:`sklearn.tree`
 ...................

From 71520f391cdda19fbd4d4aab880e0e0fe3220eda Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:16:01 +0000
Subject: [PATCH 51/72] Update test_common.py

---
 sklearn/tests/test_common.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 47705ff0bf5e5..b900f94231419 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -294,7 +294,5 @@ def test_search_cv(estimator, check, request):
 @pytest.mark.parametrize("estimator", N_FEATURES_IN_AFTER_FIT_ESTIMATORS,
                          ids=_get_check_estimator_ids)
 def test_check_n_features_in_after_fitting(estimator):
-    if estimator.__class__.__name__ == 'KBinsDiscretizer':
-        estimator.n_bins = 'auto'
     _set_checking_parameters(estimator)
     check_n_features_in_after_fitting(estimator.__class__.__name__, estimator)

From 76c68a7114712addf00c340f06811b31f227b30c Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:20:44 +0000
Subject: [PATCH 52/72] Update estimator_checks.py

---
 sklearn/utils/estimator_checks.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 849d8a1f3921b..5377851f75892 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -613,6 +613,10 @@ def _set_checking_parameters(estimator):
     if name == "TheilSenRegressor":
         estimator.max_subpopulation = 100
 
+    if name == 'KBinsDiscretizer':
+        # FIX ME: remove in 1.2
+        estimator.n_bins = 'auto'    
+
     if isinstance(estimator, BaseRandomProjection):
         # Due to the jl lemma and often very few samples, the number
         # of components of the random matrix projection will be probably

From 4e01fd5670dbe2236997c5639a7b5ef5211565d0 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:26:09 +0000
Subject: [PATCH 53/72] Update estimator_checks.py

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5377851f75892..0895f9a97cc85 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -615,7 +615,7 @@ def _set_checking_parameters(estimator):
 
     if name == 'KBinsDiscretizer':
         # FIX ME: remove in 1.2
-        estimator.n_bins = 'auto'    
+        estimator.n_bins = 'auto' 
 
     if isinstance(estimator, BaseRandomProjection):
         # Due to the jl lemma and often very few samples, the number

From 71bb2e5dabac6eac0b72ea3ff910e9577953bcb4 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:26:19 +0000
Subject: [PATCH 54/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 7ee2d45b6968a..1045f845ff603 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -254,15 +254,22 @@ def _validate_n_bins(self, n_features, n_samples):
         """Returns n_bins_, the number of bins per feature.
         """
         orig_bins = self._n_bins
-        if isinstance(orig_bins, numbers.Number) or isinstance(orig_bins, str):
+        if isinstance(orig_bins, str):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule
                 orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+            else:
+                raise ValueError(
+                    f"{KBinsDiscretizer.__name__} received "
+                    f"an invalid n_bins value. Received "
+                    f"{orig_bins}, while only 'auto' is supported."
+                )
+        if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "
                     f"an invalid n_bins type. Received "
-                    f"{type(orig_bins).__name__}, expected int or 'auto'."
+                    f"{type(orig_bins).__name__}, expected int."
                 )
             if orig_bins < 2:
                 raise ValueError(

From 28ae05e985f7070a71458b3aeec0e9a647cb3768 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:26:21 +0000
Subject: [PATCH 55/72] Update test_discretization.py

---
 sklearn/preprocessing/tests/test_discretization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index 1ab40c544743f..cfb74e95afbb6 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -63,14 +63,14 @@ def test_invalid_n_bins():
 
     est = KBinsDiscretizer(n_bins=1.1)
     err_msg = ("KBinsDiscretizer received an invalid "
-               "n_bins type. Received float, expected int or 'auto'.")
+               "n_bins type. Received float, expected int.")
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
 
     # Bad string value
     est = KBinsDiscretizer(n_bins='rice')
     err_msg = ("KBinsDiscretizer received an invalid "
-               "n_bins type. Received str, expected int or 'auto'.")
+               "n_bins value. Received rice, while only 'auto' is supported.")
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
 

From 835a5146081eeb368ae0c9ff69a8ca2d95a9ad5e Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 18:33:01 +0000
Subject: [PATCH 56/72] Update estimator_checks.py

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 0895f9a97cc85..55c047faecbcc 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -615,7 +615,7 @@ def _set_checking_parameters(estimator):
 
     if name == 'KBinsDiscretizer':
         # FIX ME: remove in 1.2
-        estimator.n_bins = 'auto' 
+        estimator.n_bins = 'auto'
 
     if isinstance(estimator, BaseRandomProjection):
         # Due to the jl lemma and often very few samples, the number

From 6ff5ee64b50f17944d7c4ff391c4664194e65b39 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 19:31:25 +0000
Subject: [PATCH 57/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 1045f845ff603..26505291f7da6 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -161,6 +161,7 @@ def fit(self, X, y=None):
             warnings.warn("The default value of n_bins will change from "
                           "5 to 'auto' in 1.2", FutureWarning)
             self._n_bins = 5
+
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)

From 01ad4debe496bbba9291cd886ddc4cdafbac5304 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Sun, 31 Jan 2021 20:02:54 +0000
Subject: [PATCH 58/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 26505291f7da6..ace82dd2fedea 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -258,7 +258,8 @@ def _validate_n_bins(self, n_features, n_samples):
         if isinstance(orig_bins, str):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule
-                orig_bins = int(np.ceil(np.log2(n_samples) + 1.))
+                orig_bins = np.maximum(int(np.ceil(np.log2(n_samples) + 1.)),
+                                       2)
             else:
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From fce49a542eff007ede622ac537b42440f3085931 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 1 Feb 2021 11:33:32 +0000
Subject: [PATCH 59/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index ace82dd2fedea..2e1cd8512c91e 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -138,7 +138,6 @@ def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
         self.strategy = strategy
         self.dtype = dtype
 
-
     def fit(self, X, y=None):
         """
         Fit the estimator.
@@ -258,8 +257,7 @@ def _validate_n_bins(self, n_features, n_samples):
         if isinstance(orig_bins, str):
             if orig_bins == 'auto':
                 # calculate number of bins with Sturges rule
-                orig_bins = np.maximum(int(np.ceil(np.log2(n_samples) + 1.)),
-                                       2)
+                orig_bins = max(int(np.ceil(np.log2(n_samples) + 1.)), 2)
             else:
                 raise ValueError(
                     f"{KBinsDiscretizer.__name__} received "

From 230307578ef6a39d283ccbaf50c67cb2454de66c Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 2 Feb 2021 16:02:56 +0300
Subject: [PATCH 60/72] Revert "DOC Add URL to reference of Minka paper used in
 PCA (#19207)"

This reverts commit ca7fc5dd4f31a3b2eee5352e9ce615d2aa104b4d.
---
 sklearn/decomposition/_pca.py | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index ac4a1d1d9816b..80ac7e856dfd0 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -32,8 +32,7 @@ def _assess_dimension(spectrum, rank, n_samples):
     """Compute the log-likelihood of a rank ``rank`` dataset.
 
     The dataset is assumed to be embedded in gaussian noise of shape(n,
-    dimf) having spectrum ``spectrum``. This implements the method of
-    T. P. Minka.
+    dimf) having spectrum ``spectrum``.
 
     Parameters
     ----------
@@ -51,11 +50,10 @@ def _assess_dimension(spectrum, rank, n_samples):
     ll : float
         The log-likelihood.
 
-    References
-    ----------
+    Notes
+    -----
     This implements the method of `Thomas P. Minka:
-    Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604
-    <https://proceedings.neurips.cc/paper/2000/file/7503cfacd12053d309b6bed5c89de212-Paper.pdf>`_
+    Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604`
     """
 
     n_features = spectrum.shape[0]
@@ -273,30 +271,26 @@ class PCA(_BasePCA):
 
     References
     ----------
-    For n_components == 'mle', this class uses the method from:
-    `Minka, T. P.. "Automatic choice of dimensionality for PCA".
-    In NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_
+    For n_components == 'mle', this class uses the method of *Minka, T. P.
+    "Automatic choice of dimensionality for PCA". In NIPS, pp. 598-604*
 
     Implements the probabilistic PCA model from:
-    `Tipping, M. E., and Bishop, C. M. (1999). "Probabilistic principal
+    Tipping, M. E., and Bishop, C. M. (1999). "Probabilistic principal
     component analysis". Journal of the Royal Statistical Society:
     Series B (Statistical Methodology), 61(3), 611-622.
-    <http://www.miketipping.com/papers/met-mppca.pdf>`_
     via the score and score_samples methods.
+    See http://www.miketipping.com/papers/met-mppca.pdf
 
     For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.
 
     For svd_solver == 'randomized', see:
-    `Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).
+    *Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).
     "Finding structure with randomness: Probabilistic algorithms for
     constructing approximate matrix decompositions".
-    SIAM review, 53(2), 217-288.
-    <https://doi.org/10.1137/090771806>`_
-    and also
-    `Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).
+    SIAM review, 53(2), 217-288.* and also
+    *Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).
     "A randomized algorithm for the decomposition of matrices".
-    Applied and Computational Harmonic Analysis, 30(1), 47-68
-    <https://doi.org/10.1016/j.acha.2010.02.003>`_.
+    Applied and Computational Harmonic Analysis, 30(1), 47-68.*
 
     Examples
     --------

From 4f99c48a2378a140b0ceb20ccbd7739dd3717f13 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 2 Feb 2021 16:03:01 +0300
Subject: [PATCH 61/72] Revert "DOC update Keras description in related
 projects (#19265)"

This reverts commit 315463f3d20494581b440cd9c26d48c580419edc.
---
 doc/related_projects.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index 38d8bc555638e..8496b2b9b1df0 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -148,8 +148,8 @@ and tasks.
 - `nolearn <https://github.com/dnouri/nolearn>`_ A number of wrappers and
   abstractions around existing neural network libraries
 
-- `Keras <https://www.tensorflow.org/api_docs/python/tf/keras>`_ High-level API for
-  TensorFlow with a scikit-learn inspired API.
+- `keras <https://github.com/fchollet/keras>`_ Deep Learning library capable of
+  running on top of either TensorFlow or Theano.
 
 - `lasagne <https://github.com/Lasagne/Lasagne>`_ A lightweight library to
   build and train neural networks in Theano.

From cdee3573072c680eaa661c27232336cde163a7f5 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 2 Feb 2021 16:03:08 +0300
Subject: [PATCH 62/72] Revert "CLN Removes duplicated or unneeded code in
 ColumnTransformer (#19261)"

This reverts commit 8965abb264aaf70d11d9f56d2947bcc0b5ddaf75.
---
 sklearn/compose/_column_transformer.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 6693c9896c87a..553e8c3afa263 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -19,6 +19,7 @@
 from ..utils import Bunch
 from ..utils import _safe_indexing
 from ..utils import _get_column_indices
+from ..utils import _determine_key_type
 from ..utils.metaestimators import _BaseComposition
 from ..utils.validation import check_array, check_is_fitted
 from ..utils.validation import _deprecate_positional_args
@@ -319,6 +320,12 @@ def _validate_remainder(self, X):
                 "'passthrough', or estimator. '%s' was passed instead" %
                 self.remainder)
 
+        # Make it possible to check for reordered named columns on transform
+        self._has_str_cols = any(_determine_key_type(cols) == 'str'
+                                 for cols in self._columns)
+        if hasattr(X, 'columns'):
+            self._df_columns = X.columns
+
         self._n_features = X.shape[1]
         cols = []
         for columns in self._columns:
@@ -355,12 +362,12 @@ def get_feature_names(self):
                     hasattr(column, '__len__') and not len(column)):
                 continue
             if trans == 'passthrough':
-                if self._feature_names_in is not None:
+                if hasattr(self, '_df_columns'):
                     if ((not isinstance(column, slice))
                             and all(isinstance(col, str) for col in column)):
                         feature_names.extend(column)
                     else:
-                        feature_names.extend(self._feature_names_in[column])
+                        feature_names.extend(self._df_columns[column])
                 else:
                     indices = np.arange(self._n_features)
                     feature_names.extend(['x%d' % i for i in indices[column]])
@@ -434,7 +441,7 @@ def _fit_transform(self, X, y, func, fitted=False):
                     message_clsname='ColumnTransformer',
                     message=self._log_message(name, idx, len(transformers)))
                 for idx, (name, trans, column, weight) in enumerate(
-                    transformers, 1))
+                        self._iter(fitted=fitted, replace_strings=True), 1))
         except ValueError as e:
             if "Expected 2D array, got 1D array instead" in str(e):
                 raise ValueError(_ERR_MSG_1DCOLUMN) from e
@@ -599,9 +606,9 @@ def _sk_visual_block_(self):
             transformers = self.transformers
         elif hasattr(self, "_remainder"):
             remainder_columns = self._remainder[2]
-            if self._feature_names_in is not None:
+            if hasattr(self, '_df_columns'):
                 remainder_columns = (
-                    self._feature_names_in[remainder_columns].tolist()
+                    self._df_columns[remainder_columns].tolist()
                 )
             transformers = chain(self.transformers,
                                  [('remainder', self.remainder,

From 15ba4125b87a8c211eb6fc5ff7f84e86e8b39102 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 11:28:08 +0000
Subject: [PATCH 63/72] Revert "Kbd changes"

---
 doc/whats_new/v1.0.rst                        |  9 --
 sklearn/preprocessing/_discretization.py      | 93 +++++++------------
 .../tests/test_discretization.py              | 25 -----
 sklearn/tests/test_docstring_parameters.py    |  4 -
 sklearn/utils/estimator_checks.py             |  4 -
 5 files changed, 32 insertions(+), 103 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 2cec2c6d8d9df..a00523ec2223b 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -136,15 +136,6 @@ Changelog
   positioning strategy ``knots``.
   :pr:`18368` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Enhancement| Added new argument ``auto`` for ``n_bins`` parameter in
-  :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
-  via Sturges rule.
-  :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.
-
-- |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
-  :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
-  :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.
-
 :mod:`sklearn.tree`
 ...................
 
diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 2e1cd8512c91e..22fa236f3314e 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,13 +28,8 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int, 'auto' or array-like of shape (n_features,), dtype=integral,\
-             default=5
+    n_bins : int or array-like of shape (n_features,), default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
-        For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
-
-        .. versionadded:: 1.0
-            Added 'auto' option
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
         Method used to encode the transformed result.
@@ -131,7 +126,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     """
 
     @_deprecate_positional_args
-    def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
+    def __init__(self, n_bins=5, *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
         self.encode = encode
@@ -155,12 +150,6 @@ def fit(self, X, y=None):
         -------
         self
         """
-        self._n_bins = self.n_bins
-        if isinstance(self.n_bins, str) and self.n_bins == 'warn':
-            warnings.warn("The default value of n_bins will change from "
-                          "5 to 'auto' in 1.2", FutureWarning)
-            self._n_bins = 5
-
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)
@@ -174,21 +163,20 @@ def fit(self, X, y=None):
                 f"{supported_dtype + (None,)}. Got dtype={self.dtype} "
                 f" instead."
             )
+
         valid_encode = ('onehot', 'onehot-dense', 'ordinal')
         if self.encode not in valid_encode:
-            raise ValueError(
-                f"Valid options for 'encode' are {valid_encode}. "
-                f"Got encode={self.encode!r} instead."
-            )
+            raise ValueError("Valid options for 'encode' are {}. "
+                             "Got encode={!r} instead."
+                             .format(valid_encode, self.encode))
         valid_strategy = ('uniform', 'quantile', 'kmeans')
         if self.strategy not in valid_strategy:
-            raise ValueError(
-                f"Valid options for 'strategy' are {valid_strategy}. "
-                f"Got strategy={self.strategy!r} instead."
-            )
+            raise ValueError("Valid options for 'strategy' are {}. "
+                             "Got strategy={!r} instead."
+                             .format(valid_strategy, self.strategy))
 
-        n_samples, n_features = X.shape
-        n_bins = self._validate_n_bins(n_features, n_samples)
+        n_features = X.shape[1]
+        n_bins = self._validate_n_bins(n_features)
 
         bin_edges = np.zeros(n_features, dtype=object)
         for jj in range(n_features):
@@ -196,8 +184,8 @@ def fit(self, X, y=None):
             col_min, col_max = column.min(), column.max()
 
             if col_min == col_max:
-                warnings.warn(f"Feature {jj} is constant and will be "
-                              f"replaced with 0.")
+                warnings.warn("Feature %d is constant and will be "
+                              "replaced with 0." % jj)
                 n_bins[jj] = 1
                 bin_edges[jj] = np.array([-np.inf, np.inf])
                 continue
@@ -217,8 +205,7 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init,
-                            n_init=1, algorithm='full')
+                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()
@@ -230,10 +217,9 @@ def fit(self, X, y=None):
                 mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8
                 bin_edges[jj] = bin_edges[jj][mask]
                 if len(bin_edges[jj]) - 1 != n_bins[jj]:
-                    warnings.warn(f"Bins whose width are too small "
-                                  f"(i.e., <= 1e-8) in feature {jj} "
-                                  f"are removed. Consider decreasing "
-                                  f"the number of bins.")
+                    warnings.warn('Bins whose width are too small (i.e., <= '
+                                  '1e-8) in feature %d are removed. Consider '
+                                  'decreasing the number of bins.' % jj)
                     n_bins[jj] = len(bin_edges[jj]) - 1
 
         self.bin_edges_ = bin_edges
@@ -250,32 +236,20 @@ def fit(self, X, y=None):
 
         return self
 
-    def _validate_n_bins(self, n_features, n_samples):
+    def _validate_n_bins(self, n_features):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self._n_bins
-        if isinstance(orig_bins, str):
-            if orig_bins == 'auto':
-                # calculate number of bins with Sturges rule
-                orig_bins = max(int(np.ceil(np.log2(n_samples) + 1.)), 2)
-            else:
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received "
-                    f"an invalid n_bins value. Received "
-                    f"{orig_bins}, while only 'auto' is supported."
-                )
+        orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received "
-                    f"an invalid n_bins type. Received "
-                    f"{type(orig_bins).__name__}, expected int."
-                )
+                raise ValueError("{} received an invalid n_bins type. "
+                                 "Received {}, expected int."
+                                 .format(KBinsDiscretizer.__name__,
+                                         type(orig_bins).__name__))
             if orig_bins < 2:
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received an invalid number "
-                    f"of bins. Received {orig_bins}, expected at least 2."
-                )
+                raise ValueError("{} received an invalid number "
+                                 "of bins. Received {}, expected at least 2."
+                                 .format(KBinsDiscretizer.__name__, orig_bins))
             return np.full(n_features, orig_bins, dtype=int)
 
         n_bins = check_array(orig_bins, dtype=int, copy=True,
@@ -290,11 +264,10 @@ def _validate_n_bins(self, n_features, n_samples):
         violating_indices = np.where(bad_nbins_value)[0]
         if violating_indices.shape[0] > 0:
             indices = ", ".join(str(i) for i in violating_indices)
-            raise ValueError(
-                f"{KBinsDiscretizer.__name__} received an invalid number "
-                f"of bins at indices {indices}. Number of bins "
-                f"must be at least 2, and must be an int."
-            )
+            raise ValueError("{} received an invalid number "
+                             "of bins at indices {}. Number of bins "
+                             "must be at least 2, and must be an int."
+                             .format(KBinsDiscretizer.__name__, indices))
         return n_bins
 
     def transform(self, X):
@@ -369,10 +342,8 @@ def inverse_transform(self, Xt):
         Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))
         n_features = self.n_bins_.shape[0]
         if Xinv.shape[1] != n_features:
-            raise ValueError(
-                f"Incorrect number of features. Expecting {n_features}, "
-                f"received {Xinv.shape[1]}."
-            )
+            raise ValueError("Incorrect number of features. Expecting {}, "
+                             "received {}.".format(n_features, Xinv.shape[1]))
 
         for jj in range(n_features):
             bin_edges = self.bin_edges_[jj]
diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index cfb74e95afbb6..9d607c82d5831 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -36,24 +36,6 @@ def test_valid_n_bins():
     assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(int)
 
 
-def test_n_bins_5_deprecated():
-    # FIXME: remove in 1.2
-    est = KBinsDiscretizer()
-    depr_msg = ("The default value of n_bins will change from "
-                "5 to 'auto' in 1.2")
-    with pytest.warns(FutureWarning, match=depr_msg):
-        est.fit(X)
-
-
-def test_auto_bins():
-    est = KBinsDiscretizer(n_bins='auto')
-    # for Sturges rule: ceil(log2(4) + 1) = 3
-    # where 4 is n_samples in X
-    expected_bins = [3, 3, 3, 3]
-    est.fit(X)
-    assert np.all(est.n_bins_ == expected_bins)
-
-
 def test_invalid_n_bins():
     est = KBinsDiscretizer(n_bins=1)
     err_msg = ("KBinsDiscretizer received an invalid "
@@ -67,13 +49,6 @@ def test_invalid_n_bins():
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
 
-    # Bad string value
-    est = KBinsDiscretizer(n_bins='rice')
-    err_msg = ("KBinsDiscretizer received an invalid "
-               "n_bins value. Received rice, while only 'auto' is supported.")
-    with pytest.raises(ValueError, match=err_msg):
-        est.fit_transform(X)
-
 
 def test_invalid_n_bins_array():
     # Bad shape
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 3d0cfc5151193..1756e0e4a65a6 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -206,10 +206,6 @@ def test_fit_docstring_attributes(name, Estimator):
     if Estimator.__name__ == 'SelectKBest':
         est.k = 2
 
-    if Estimator.__name__ == 'KBinsDiscretizer':
-        # FIX ME: remove in 1.2
-        est.n_bins = 'auto'
-
     if Estimator.__name__ == 'DummyClassifier':
         est.strategy = "stratified"
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 55c047faecbcc..849d8a1f3921b 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -613,10 +613,6 @@ def _set_checking_parameters(estimator):
     if name == "TheilSenRegressor":
         estimator.max_subpopulation = 100
 
-    if name == 'KBinsDiscretizer':
-        # FIX ME: remove in 1.2
-        estimator.n_bins = 'auto'
-
     if isinstance(estimator, BaseRandomProjection):
         # Due to the jl lemma and often very few samples, the number
         # of components of the random matrix projection will be probably

From f7f394a711318bd7deb6abfb451bfa07681e051c Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 11:34:19 +0000
Subject: [PATCH 64/72] reverse

---
 sklearn/preprocessing/_discretization.py | 29 ++++--------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 2e1cd8512c91e..520020dce1b05 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -31,10 +31,6 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     n_bins : int, 'auto' or array-like of shape (n_features,), dtype=integral,\
              default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
-        For 'auto' option Sturges formula is used: bins are log(n_samples) + 1.
-
-        .. versionadded:: 1.0
-            Added 'auto' option
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
         Method used to encode the transformed result.
@@ -131,7 +127,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
     """
 
     @_deprecate_positional_args
-    def __init__(self, n_bins='warn', *, encode='onehot', strategy='quantile',
+    def __init__(self, n_bins=5, *, encode='onehot', strategy='quantile',
                  dtype=None):
         self.n_bins = n_bins
         self.encode = encode
@@ -155,11 +151,6 @@ def fit(self, X, y=None):
         -------
         self
         """
-        self._n_bins = self.n_bins
-        if isinstance(self.n_bins, str) and self.n_bins == 'warn':
-            warnings.warn("The default value of n_bins will change from "
-                          "5 to 'auto' in 1.2", FutureWarning)
-            self._n_bins = 5
 
         X = self._validate_data(X, dtype='numeric')
 
@@ -187,8 +178,8 @@ def fit(self, X, y=None):
                 f"Got strategy={self.strategy!r} instead."
             )
 
-        n_samples, n_features = X.shape
-        n_bins = self._validate_n_bins(n_features, n_samples)
+        n_features = X.shape[1]
+        n_bins = self._validate_n_bins(n_features)
 
         bin_edges = np.zeros(n_features, dtype=object)
         for jj in range(n_features):
@@ -250,20 +241,10 @@ def fit(self, X, y=None):
 
         return self
 
-    def _validate_n_bins(self, n_features, n_samples):
+    def _validate_n_bins(self, n_features):
         """Returns n_bins_, the number of bins per feature.
         """
-        orig_bins = self._n_bins
-        if isinstance(orig_bins, str):
-            if orig_bins == 'auto':
-                # calculate number of bins with Sturges rule
-                orig_bins = max(int(np.ceil(np.log2(n_samples) + 1.)), 2)
-            else:
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received "
-                    f"an invalid n_bins value. Received "
-                    f"{orig_bins}, while only 'auto' is supported."
-                )
+        orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
                 raise ValueError(

From 173f18fe1b492fe9b7d94352c09ac853dc963662 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 11:35:26 +0000
Subject: [PATCH 65/72] reverse

---
 .../tests/test_discretization.py              | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index cfb74e95afbb6..9d607c82d5831 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -36,24 +36,6 @@ def test_valid_n_bins():
     assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(int)
 
 
-def test_n_bins_5_deprecated():
-    # FIXME: remove in 1.2
-    est = KBinsDiscretizer()
-    depr_msg = ("The default value of n_bins will change from "
-                "5 to 'auto' in 1.2")
-    with pytest.warns(FutureWarning, match=depr_msg):
-        est.fit(X)
-
-
-def test_auto_bins():
-    est = KBinsDiscretizer(n_bins='auto')
-    # for Sturges rule: ceil(log2(4) + 1) = 3
-    # where 4 is n_samples in X
-    expected_bins = [3, 3, 3, 3]
-    est.fit(X)
-    assert np.all(est.n_bins_ == expected_bins)
-
-
 def test_invalid_n_bins():
     est = KBinsDiscretizer(n_bins=1)
     err_msg = ("KBinsDiscretizer received an invalid "
@@ -67,13 +49,6 @@ def test_invalid_n_bins():
     with pytest.raises(ValueError, match=err_msg):
         est.fit_transform(X)
 
-    # Bad string value
-    est = KBinsDiscretizer(n_bins='rice')
-    err_msg = ("KBinsDiscretizer received an invalid "
-               "n_bins value. Received rice, while only 'auto' is supported.")
-    with pytest.raises(ValueError, match=err_msg):
-        est.fit_transform(X)
-
 
 def test_invalid_n_bins_array():
     # Bad shape

From 192a37c4791d544b8b67394471f140858e99b97d Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 11:36:03 +0000
Subject: [PATCH 66/72] reverse

---
 sklearn/tests/test_docstring_parameters.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 3d0cfc5151193..1756e0e4a65a6 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -206,10 +206,6 @@ def test_fit_docstring_attributes(name, Estimator):
     if Estimator.__name__ == 'SelectKBest':
         est.k = 2
 
-    if Estimator.__name__ == 'KBinsDiscretizer':
-        # FIX ME: remove in 1.2
-        est.n_bins = 'auto'
-
     if Estimator.__name__ == 'DummyClassifier':
         est.strategy = "stratified"
 

From f003f99ef0c04ace0c16336a0ddac2827730da35 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 11:37:16 +0000
Subject: [PATCH 67/72] reverse

---
 sklearn/utils/estimator_checks.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 55c047faecbcc..849d8a1f3921b 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -613,10 +613,6 @@ def _set_checking_parameters(estimator):
     if name == "TheilSenRegressor":
         estimator.max_subpopulation = 100
 
-    if name == 'KBinsDiscretizer':
-        # FIX ME: remove in 1.2
-        estimator.n_bins = 'auto'
-
     if isinstance(estimator, BaseRandomProjection):
         # Due to the jl lemma and often very few samples, the number
         # of components of the random matrix projection will be probably

From 181e0c4544a9608644c9cc1af1eddfacee417cf0 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 11:39:53 +0000
Subject: [PATCH 68/72] reverse

---
 doc/whats_new/v1.0.rst | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 2cec2c6d8d9df..1f91634735a9c 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -136,11 +136,6 @@ Changelog
   positioning strategy ``knots``.
   :pr:`18368` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Enhancement| Added new argument ``auto`` for ``n_bins`` parameter in
-  :class:`preprocessing.KBinsDiscretizer` for calculating number of bins
-  via Sturges rule.
-  :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.
-
 - |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
   :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
   :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.

From 4a3380af0118054785c52735eaab702bbeba86b0 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Mon, 8 Feb 2021 12:01:44 +0000
Subject: [PATCH 69/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 520020dce1b05..2810834d529ea 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,7 +28,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int, 'auto' or array-like of shape (n_features,), dtype=integral,\
+    n_bins : int or array-like of shape (n_features,), dtype=integral,\
              default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
 

From cf290758d7fd3e33a0cb72128625e874f9123d89 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 20 Apr 2021 11:10:37 +0000
Subject: [PATCH 70/72] reverse

---
 sklearn/preprocessing/_discretization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 22fa236f3314e..526aa74d31007 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -205,7 +205,8 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)
+                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, 
+                            algorithm='full')
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()

From f4d30ab7ca6e39b7803e7ca57f24326ba2a18422 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 20 Apr 2021 11:13:22 +0000
Subject: [PATCH 71/72] reverse

---
 sklearn/preprocessing/_discretization.py | 65 ++++++++++--------------
 1 file changed, 28 insertions(+), 37 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 2810834d529ea..9ce95a97544a5 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -28,8 +28,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Parameters
     ----------
-    n_bins : int or array-like of shape (n_features,), dtype=integral,\
-             default=5
+    n_bins : int or array-like of shape (n_features,), default=5
         The number of bins to produce. Raises ValueError if ``n_bins < 2``.
 
     encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
@@ -151,7 +150,6 @@ def fit(self, X, y=None):
         -------
         self
         """
-
         X = self._validate_data(X, dtype='numeric')
 
         supported_dtype = (np.float64, np.float32)
@@ -165,18 +163,17 @@ def fit(self, X, y=None):
                 f"{supported_dtype + (None,)}. Got dtype={self.dtype} "
                 f" instead."
             )
+
         valid_encode = ('onehot', 'onehot-dense', 'ordinal')
         if self.encode not in valid_encode:
-            raise ValueError(
-                f"Valid options for 'encode' are {valid_encode}. "
-                f"Got encode={self.encode!r} instead."
-            )
+            raise ValueError("Valid options for 'encode' are {}. "
+                             "Got encode={!r} instead."
+                             .format(valid_encode, self.encode))
         valid_strategy = ('uniform', 'quantile', 'kmeans')
         if self.strategy not in valid_strategy:
-            raise ValueError(
-                f"Valid options for 'strategy' are {valid_strategy}. "
-                f"Got strategy={self.strategy!r} instead."
-            )
+            raise ValueError("Valid options for 'strategy' are {}. "
+                             "Got strategy={!r} instead."
+                             .format(valid_strategy, self.strategy))
 
         n_features = X.shape[1]
         n_bins = self._validate_n_bins(n_features)
@@ -187,8 +184,8 @@ def fit(self, X, y=None):
             col_min, col_max = column.min(), column.max()
 
             if col_min == col_max:
-                warnings.warn(f"Feature {jj} is constant and will be "
-                              f"replaced with 0.")
+                warnings.warn("Feature %d is constant and will be "
+                              "replaced with 0." % jj)
                 n_bins[jj] = 1
                 bin_edges[jj] = np.array([-np.inf, np.inf])
                 continue
@@ -208,8 +205,8 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init,
-                            n_init=1, algorithm='full')
+                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1,
+                            algorithm='full')
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()
@@ -221,10 +218,9 @@ def fit(self, X, y=None):
                 mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8
                 bin_edges[jj] = bin_edges[jj][mask]
                 if len(bin_edges[jj]) - 1 != n_bins[jj]:
-                    warnings.warn(f"Bins whose width are too small "
-                                  f"(i.e., <= 1e-8) in feature {jj} "
-                                  f"are removed. Consider decreasing "
-                                  f"the number of bins.")
+                    warnings.warn('Bins whose width are too small (i.e., <= '
+                                  '1e-8) in feature %d are removed. Consider '
+                                  'decreasing the number of bins.' % jj)
                     n_bins[jj] = len(bin_edges[jj]) - 1
 
         self.bin_edges_ = bin_edges
@@ -247,16 +243,14 @@ def _validate_n_bins(self, n_features):
         orig_bins = self.n_bins
         if isinstance(orig_bins, numbers.Number):
             if not isinstance(orig_bins, numbers.Integral):
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received "
-                    f"an invalid n_bins type. Received "
-                    f"{type(orig_bins).__name__}, expected int."
-                )
+                raise ValueError("{} received an invalid n_bins type. "
+                                 "Received {}, expected int."
+                                 .format(KBinsDiscretizer.__name__,
+                                         type(orig_bins).__name__))
             if orig_bins < 2:
-                raise ValueError(
-                    f"{KBinsDiscretizer.__name__} received an invalid number "
-                    f"of bins. Received {orig_bins}, expected at least 2."
-                )
+                raise ValueError("{} received an invalid number "
+                                 "of bins. Received {}, expected at least 2."
+                                 .format(KBinsDiscretizer.__name__, orig_bins))
             return np.full(n_features, orig_bins, dtype=int)
 
         n_bins = check_array(orig_bins, dtype=int, copy=True,
@@ -271,11 +265,10 @@ def _validate_n_bins(self, n_features):
         violating_indices = np.where(bad_nbins_value)[0]
         if violating_indices.shape[0] > 0:
             indices = ", ".join(str(i) for i in violating_indices)
-            raise ValueError(
-                f"{KBinsDiscretizer.__name__} received an invalid number "
-                f"of bins at indices {indices}. Number of bins "
-                f"must be at least 2, and must be an int."
-            )
+            raise ValueError("{} received an invalid number "
+                             "of bins at indices {}. Number of bins "
+                             "must be at least 2, and must be an int."
+                             .format(KBinsDiscretizer.__name__, indices))
         return n_bins
 
     def transform(self, X):
@@ -350,10 +343,8 @@ def inverse_transform(self, Xt):
         Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))
         n_features = self.n_bins_.shape[0]
         if Xinv.shape[1] != n_features:
-            raise ValueError(
-                f"Incorrect number of features. Expecting {n_features}, "
-                f"received {Xinv.shape[1]}."
-            )
+            raise ValueError("Incorrect number of features. Expecting {}, "
+                             "received {}.".format(n_features, Xinv.shape[1]))
 
         for jj in range(n_features):
             bin_edges = self.bin_edges_[jj]

From 6e3f50f39424bdd8cc2ef955594e4da1556bff46 Mon Sep 17 00:00:00 2001
From: Gleb Levitskiy <36483986+GLevV@users.noreply.github.com>
Date: Tue, 20 Apr 2021 12:11:13 +0000
Subject: [PATCH 72/72] Update _discretization.py

---
 sklearn/preprocessing/_discretization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index 526aa74d31007..4747c4adc4945 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -205,8 +205,8 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, 
-                            algorithm='full')
+                km = KMeans(n_clusters=n_bins[jj], init=init, 
+                            n_init=1, algorithm='full')
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()