scikit-learn · lesteve · Feb 7, 2022 · Sep 17, 2021 · Sep 17, 2021 · Sep 17, 2021
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
@@ -550,6 +550,12 @@ Changelog
   `fit` instead of `__init__`.
   :pr:`21434` by :user:`Krum Arnaudov <krumeto>`.
 
+- |API| Adds :meth:`get_feature_names_out` to
+  :class:`preprocessing.Normalizer`,
+  :class:`preprocessing.KernelCenterer`,
+  :class:`preprocessing.OrdinalEncoder`, and
+  :class:`preprocessing.Binarizer`. :pr:`21079` by `Thomas Fan`_.
+
 :mod:`sklearn.random_projection`
 ................................
 

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
@@ -16,7 +16,12 @@
 from scipy import optimize
 from scipy.special import boxcox
 
-from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin
+from ..base import (
+    BaseEstimator,
+    TransformerMixin,
+    _OneToOneFeatureMixin,
+    _ClassNamePrefixFeaturesOutMixin,
+)
 from ..utils import check_array
 from ..utils.deprecation import deprecated
 from ..utils.extmath import _incremental_mean_and_var, row_norms
@@ -1825,7 +1830,7 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
         return X
 
 
-class Normalizer(TransformerMixin, BaseEstimator):
+class Normalizer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     """Normalize samples individually to unit norm.
 
     Each sample (i.e. each row of the data matrix) with at least one
@@ -1996,7 +2001,7 @@ def binarize(X, *, threshold=0.0, copy=True):
     return X
 
 
-class Binarizer(TransformerMixin, BaseEstimator):
+class Binarizer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     """Binarize data (set feature values to 0 or 1) according to a threshold.
 
     Values greater than the threshold map to 1, while values less than
@@ -2119,7 +2124,7 @@ def _more_tags(self):
         return {"stateless": True}
 
 
-class KernelCenterer(TransformerMixin, BaseEstimator):
+class KernelCenterer(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
     r"""Center an arbitrary kernel matrix :math:`K`.
 
     Let define a kernel :math:`K` such that:
@@ -2258,6 +2263,15 @@ def transform(self, K, copy=True):
 
         return K
 
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        # Used by _ClassNamePrefixFeaturesOutMixin. This model preserves the
+        # number of input features but this is not a one-to-one mapping in the
+        # usual sense. Hence the choice not to use _OneToOneFeatureMixin to
+        # implement get_feature_names_out for this class.
+        return self.n_features_in_
+
     def _more_tags(self):
         return {"pairwise": True}
 

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
@@ -7,7 +7,7 @@
 from scipy import sparse
 import numbers
 
-from ..base import BaseEstimator, TransformerMixin
+from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin
 from ..utils import check_array, is_scalar_nan
 from ..utils.deprecation import deprecated
 from ..utils.validation import check_is_fitted
@@ -731,7 +731,7 @@ def get_feature_names_out(self, input_features=None):
         return np.asarray(feature_names, dtype=object)
 
 
-class OrdinalEncoder(_BaseEncoder):
+class OrdinalEncoder(_OneToOneFeatureMixin, _BaseEncoder):
     """
     Encode categorical features as an integer array.
 

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -45,6 +45,7 @@
 from sklearn.preprocessing import power_transform
 from sklearn.preprocessing._data import _handle_zeros_in_scale
 from sklearn.preprocessing._data import BOUNDS_THRESHOLD
+from sklearn.metrics.pairwise import linear_kernel
 
 from sklearn.exceptions import NotFittedError
 
@@ -2672,6 +2673,8 @@ def test_one_to_one_features(Transformer):
         StandardScaler,
         QuantileTransformer,
         PowerTransformer,
+        Normalizer,
+        Binarizer,
     ],
 )
 def test_one_to_one_features_pandas(Transformer):
@@ -2691,3 +2694,16 @@ def test_one_to_one_features_pandas(Transformer):
     with pytest.raises(ValueError, match=msg):
         invalid_names = list("abcd")
         tr.get_feature_names_out(invalid_names)
+
+
+def test_kernel_centerer_feature_names_out():
+    """Test that kernel centerer `feature_names_out`."""
+
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((6, 4))
+    X_pairwise = linear_kernel(X)
+    centerer = KernelCenterer().fit(X_pairwise)
+
+    names_out = centerer.get_feature_names_out()
+    samples_out2 = X_pairwise.shape[1]
+    assert_array_equal(names_out, [f"kernelcenterer{i}" for i in range(samples_out2)])
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
@@ -1387,3 +1387,15 @@ def test_ordinal_encoder_python_integer():
     assert_array_equal(encoder.categories_, np.sort(X, axis=0).T)
     X_trans = encoder.transform(X)
     assert_array_equal(X_trans, [[0], [3], [2], [1]])
+
+
+def test_ordinal_encoder_features_names_out_pandas():
+    """Check feature names out is same as the input."""
+    pd = pytest.importorskip("pandas")
+
+    names = ["b", "c", "a"]
+    X = pd.DataFrame([[1, 2, 3]], columns=names)
+    enc = OrdinalEncoder().fit(X)
+
+    feature_names_out = enc.get_feature_names_out()
+    assert_array_equal(names, feature_names_out)
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -382,7 +382,6 @@ def test_pandas_column_name_consistency(estimator):
 GET_FEATURES_OUT_MODULES_TO_IGNORE = [
     "ensemble",
     "kernel_approximation",
-    "preprocessing",
 ]
 
 

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
@@ -1828,7 +1828,9 @@ def _get_feature_names(X):
 
 
 def _check_feature_names_in(estimator, input_features=None, *, generate_names=True):
-    """Get output feature names for transformation.
+    """Check `input_features` and generate names if needed.
+
+    Commonly used in :term:`get_feature_names_out`.
 
     Parameters
     ----------
@@ -1842,8 +1844,10 @@ def _check_feature_names_in(estimator, input_features=None, *, generate_names=Tr
             match `feature_names_in_` if `feature_names_in_` is defined.
 
     generate_names : bool, default=True
-        Wether to generate names when `input_features` is `None` and
-        `estimator.feature_names_in_` is not defined.
+        Whether to generate names when `input_features` is `None` and
+        `estimator.feature_names_in_` is not defined. This is useful for transformers
+        that validates `input_features` but do not require them in
+        :term:`get_feature_names_out` e.g. `PCA`.
 
     Returns
     -------