scikit-learn · glevv · Jan 26, 2021 · Jan 26, 2021 · Jan 26, 2021 · Jan 28, 2021
diff --git a/doc/related_projects.rst b/doc/related_projects.rst
@@ -151,8 +151,8 @@ and tasks.
 - `nolearn <https://github.com/dnouri/nolearn>`_ A number of wrappers and
   abstractions around existing neural network libraries
 
-- `Keras <https://www.tensorflow.org/api_docs/python/tf/keras>`_ High-level API for
-  TensorFlow with a scikit-learn inspired API.
+- `keras <https://github.com/fchollet/keras>`_ Deep Learning library capable of
+  running on top of either TensorFlow or Theano.
 
 - `lasagne <https://github.com/Lasagne/Lasagne>`_ A lightweight library to
   build and train neural networks in Theano.

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -386,6 +386,10 @@ Changelog
   supporting sparse matrix and raise the appropriate error message.
   :pr:`19879` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Efficiency| Changed ``algorithm`` argument for :class:`cluster.KMeans` in 
+  :class:`preprocessing.KBinsDiscretizer` from ``auto`` to ``full``. 
+  :pr:`19290` by :user:`Gleb Levitskiy <GLevV>`.
+
 :mod:`sklearn.tree`
 ...................
 

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
@@ -19,6 +19,7 @@
 from ..utils import Bunch
 from ..utils import _safe_indexing
 from ..utils import _get_column_indices
+from ..utils import _determine_key_type
 from ..utils.metaestimators import _BaseComposition
 from ..utils.validation import check_array, check_is_fitted
 from ..utils.validation import _deprecate_positional_args
@@ -327,6 +328,12 @@ def _validate_remainder(self, X):
                 "'passthrough', or estimator. '%s' was passed instead" %
                 self.remainder)
 
+        # Make it possible to check for reordered named columns on transform
+        self._has_str_cols = any(_determine_key_type(cols) == 'str'
+                                 for cols in self._columns)
+        if hasattr(X, 'columns'):
+            self._df_columns = X.columns
+
         self._n_features = X.shape[1]
         cols = []
         for columns in self._columns:
@@ -362,12 +369,12 @@ def get_feature_names(self):
             if trans == 'drop' or _is_empty_column_selection(column):
                 continue
             if trans == 'passthrough':
-                if self._feature_names_in is not None:
+                if hasattr(self, '_df_columns'):
                     if ((not isinstance(column, slice))
                             and all(isinstance(col, str) for col in column)):
                         feature_names.extend(column)
                     else:
-                        feature_names.extend(self._feature_names_in[column])
+                        feature_names.extend(self._df_columns[column])
                 else:
                     indices = np.arange(self._n_features)
                     feature_names.extend(['x%d' % i for i in indices[column]])
@@ -463,7 +470,7 @@ def _fit_transform(self, X, y, func, fitted=False):
                     message_clsname='ColumnTransformer',
                     message=self._log_message(name, idx, len(transformers)))
                 for idx, (name, trans, column, weight) in enumerate(
-                    transformers, 1))
+                        self._iter(fitted=fitted, replace_strings=True), 1))
         except ValueError as e:
             if "Expected 2D array, got 1D array instead" in str(e):
                 raise ValueError(_ERR_MSG_1DCOLUMN) from e
@@ -629,9 +636,9 @@ def _sk_visual_block_(self):
             transformers = self.transformers
         elif hasattr(self, "_remainder"):
             remainder_columns = self._remainder[2]
-            if self._feature_names_in is not None:
+            if hasattr(self, '_df_columns'):
                 remainder_columns = (
-                    self._feature_names_in[remainder_columns].tolist()
+                    self._df_columns[remainder_columns].tolist()
                 )
             transformers = chain(self.transformers,
                                  [('remainder', self.remainder,

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -32,8 +32,7 @@ def _assess_dimension(spectrum, rank, n_samples):
     """Compute the log-likelihood of a rank ``rank`` dataset.
 
     The dataset is assumed to be embedded in gaussian noise of shape(n,
-    dimf) having spectrum ``spectrum``. This implements the method of
-    T. P. Minka.
+    dimf) having spectrum ``spectrum``.
 
     Parameters
     ----------
@@ -51,11 +50,10 @@ def _assess_dimension(spectrum, rank, n_samples):
     ll : float
         The log-likelihood.
 
-    References
-    ----------
+    Notes
+    -----
     This implements the method of `Thomas P. Minka:
-    Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604
-    <https://proceedings.neurips.cc/paper/2000/file/7503cfacd12053d309b6bed5c89de212-Paper.pdf>`_
+    Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604`
     """
 
     n_features = spectrum.shape[0]
@@ -274,30 +272,26 @@ class PCA(_BasePCA):
 
     References
     ----------
-    For n_components == 'mle', this class uses the method from:
-    `Minka, T. P.. "Automatic choice of dimensionality for PCA".
-    In NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_
+    For n_components == 'mle', this class uses the method of *Minka, T. P.
+    "Automatic choice of dimensionality for PCA". In NIPS, pp. 598-604*
 
     Implements the probabilistic PCA model from:
-    `Tipping, M. E., and Bishop, C. M. (1999). "Probabilistic principal
+    Tipping, M. E., and Bishop, C. M. (1999). "Probabilistic principal
     component analysis". Journal of the Royal Statistical Society:
     Series B (Statistical Methodology), 61(3), 611-622.
-    <http://www.miketipping.com/papers/met-mppca.pdf>`_
     via the score and score_samples methods.
+    See http://www.miketipping.com/papers/met-mppca.pdf
 
     For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.
 
     For svd_solver == 'randomized', see:
-    `Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).
+    *Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).
     "Finding structure with randomness: Probabilistic algorithms for
     constructing approximate matrix decompositions".
-    SIAM review, 53(2), 217-288.
-    <https://doi.org/10.1137/090771806>`_
-    and also
-    `Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).
+    SIAM review, 53(2), 217-288.* and also
+    *Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).
     "A randomized algorithm for the decomposition of matrices".
-    Applied and Computational Harmonic Analysis, 30(1), 47-68
-    <https://doi.org/10.1016/j.acha.2010.02.003>`_.
+    Applied and Computational Harmonic Analysis, 30(1), 47-68.*
 
     Examples
     --------

diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
@@ -205,7 +205,8 @@ def fit(self, X, y=None):
                 init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
 
                 # 1D k-means procedure
-                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)
+                km = KMeans(n_clusters=n_bins[jj], init=init, 
+                            n_init=1, algorithm='full')
                 centers = km.fit(column[:, None]).cluster_centers_[:, 0]
                 # Must sort, centers may be unsorted even with sorted init
                 centers.sort()