scikit-learn · jeremiedbb · Mar 8, 2022 · Mar 4, 2022 · Mar 4, 2022 · Mar 4, 2022
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
@@ -342,6 +342,10 @@ Changelog
   :class:`ensemble.ExtraTreesClassifier`.
   :pr:`20803` by :user:`Brian Sun <bsun94>`.
 
+- |API| Adds :meth:`get_feature_names_out` to
+  :class:`ensemble.StackingClassifier`, and
+  :class:`ensemble.StackingRegressor`. :pr:`22695` by `Thomas Fan`_.
+
 - |Fix| Removed a potential source of CPU oversubscription in
   :class:`ensemble.HistGradientBoostingClassifier` and
   :class:`ensemble.HistGradientBoostingRegressor` when CPU resource usage is limited,

diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
@@ -33,6 +33,7 @@
 from ..utils.validation import check_is_fitted
 from ..utils.validation import column_or_1d
 from ..utils.fixes import delayed
+from ..utils.validation import _check_feature_names_in
 
 
 class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble, metaclass=ABCMeta):
@@ -93,6 +94,8 @@ def _concatenate_predictions(self, X, predictions):
                     X_meta.append(preds[:, 1:])
                 else:
                     X_meta.append(preds)
+
+        self._n_feature_outs = [pred.shape[1] for pred in X_meta]
         if self.passthrough:
             X_meta.append(X)
             if sparse.issparse(X):
@@ -256,6 +259,51 @@ def _transform(self, X):
         ]
         return self._concatenate_predictions(X, predictions)
 
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Input features. The input feature names are only used when `passthrough` is
+            `True`.
+
+            - If `input_features` is `None`, then `feature_names_in_` is
+              used as feature names in. If `feature_names_in_` is not defined,
+              then names are generated: `[x0, x1, ..., x(n_features_in_ - 1)]`.
+            - If `input_features` is an array-like, then `input_features` must
+              match `feature_names_in_` if `feature_names_in_` is defined.
+
+            If `passthrough` is `False`, then only the names of `estimators` are used
+            to generate the output feature names.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names.
+        """
+        input_features = _check_feature_names_in(
+            self, input_features, generate_names=self.passthrough
+        )
+
+        class_name = self.__class__.__name__.lower()
+        non_dropped_estimators = (
+            name for name, est in self.estimators if est != "drop"
+        )
+        meta_names = []
+        for est, n_features_out in zip(non_dropped_estimators, self._n_feature_outs):
+            if n_features_out == 1:
+                meta_names.append(f"{class_name}_{est}")
+            else:
+                meta_names.extend(
+                    f"{class_name}_{est}{i}" for i in range(n_features_out)
+                )
+
+        if self.passthrough:
+            return np.concatenate((meta_names, input_features))
+
+        return np.asarray(meta_names, dtype=object)
+
     @if_delegate_has_method(delegate="final_estimator_")
     def predict(self, X, **predict_params):
         """Predict target for X.

diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py
@@ -5,6 +5,7 @@
 
 import pytest
 import numpy as np
+from numpy.testing import assert_array_equal
 import scipy.sparse as sparse
 
 from sklearn.base import BaseEstimator
@@ -47,8 +48,10 @@
 
 from unittest.mock import Mock
 
-X_diabetes, y_diabetes = load_diabetes(return_X_y=True)
-X_iris, y_iris = load_iris(return_X_y=True)
+diabetes = load_diabetes()
+X_diabetes, y_diabetes = diabetes.data, diabetes.target
+iris = load_iris()
+X_iris, y_iris = iris.data, iris.target
 
 
 @pytest.mark.parametrize(
@@ -648,3 +651,79 @@ def fit(self, X, y):
     msg = "'MyEstimator' object has no attribute 'n_features_in_'"
     with pytest.raises(AttributeError, match=msg):
         stacker.n_features_in_
+
+
+@pytest.mark.parametrize(
+    "stacker, feature_names, X, y, expected_names",
+    [
+        (
+            StackingClassifier(
+                estimators=[
+                    ("lr", LogisticRegression(random_state=0)),
+                    ("svm", LinearSVC(random_state=0)),
+                ]
+            ),
+            iris.feature_names,
+            X_iris,
+            y_iris,
+            [
+                "stackingclassifier_lr0",
+                "stackingclassifier_lr1",
+                "stackingclassifier_lr2",
+                "stackingclassifier_svm0",
+                "stackingclassifier_svm1",
+                "stackingclassifier_svm2",
+            ],
+        ),
+        (
+            StackingClassifier(
+                estimators=[
+                    ("lr", LogisticRegression(random_state=0)),
+                    ("other", "drop"),
+                    ("svm", LinearSVC(random_state=0)),
+                ]
+            ),
+            iris.feature_names,
+            X_iris[:100],
+            y_iris[:100],  # keep only classes 0 and 1
+            [
+                "stackingclassifier_lr",
+                "stackingclassifier_svm",
+            ],
+        ),
+        (
+            StackingRegressor(
+                estimators=[
+                    ("lr", LinearRegression()),
+                    ("svm", LinearSVR(random_state=0)),
+                ]
+            ),
+            diabetes.feature_names,
+            X_diabetes,
+            y_diabetes,
+            [
+                "stackingregressor_lr",
+                "stackingregressor_svm",
+            ],
+        ),
+    ],
+    ids=[
+        "StackingClassifier_multiclass",
+        "StackingClassifier_binary",
+        "StackingRegressor",
+    ],
+)
+@pytest.mark.parametrize("passthrough", [True, False])
+def test_get_feature_names_out(
+    stacker, feature_names, X, y, expected_names, passthrough
+):
+    """Check get_feature_names_out works for stacking."""
+
+    stacker.set_params(passthrough=passthrough)
+    stacker.fit(scale(X), y)
+
+    if passthrough:
+        expected_names = np.concatenate((expected_names, feature_names))
+
+    names_out = stacker.get_feature_names_out(feature_names)
+    assert_array_equal(names_out, expected_names)