8000 ENH Adds feature_names_out to stacking estimators by thomasjpfan · Pull Request #22695 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

ENH Adds feature_names_out to stacking estimators #22695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ Changelog
:class:`ensemble.ExtraTreesClassifier`.
:pr:`20803` by :user:`Brian Sun <bsun94>`.

- |API| Adds :meth:`get_feature_names_out` to
:class:`ensemble.StackingClassifier`, and
:class:`ensemble.StackingRegressor`. :pr:`22695` by `Thomas Fan`_.

- |Fix| Removed a potential source of CPU oversubscription in
:class:`ensemble.HistGradientBoostingClassifier` and
:class:`ensemble.HistGradientBoostingRegressor` when CPU resource usage is limited,
Expand Down
48 changes: 48 additions & 0 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from ..utils.validation import check_is_fitted
from ..utils.validation import column_or_1d
from ..utils.fixes import delayed
from ..utils.validation import _check_feature_names_in


class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble, metaclass=ABCMeta):
Expand Down Expand Up @@ -93,6 +94,8 @@ def _concatenate_predictions(self, X, predictions):
X_meta.append(preds[:, 1:])
else:
X_meta.append(preds)

self._n_feature_outs = [pred.shape[1] for pred in X_meta]
if self.passthrough:
X_meta.append(X)
if sparse.issparse(X):
Expand Down Expand Up @@ -256,6 +259,51 @@ def _transform(self, X):
]
return self._concatenate_predictions(X, predictions)

def get_feature_names_out(self, input_features=None):
"""Get output feature names for transformation.

Parameters
----------
input_features : array-like of str or None, default=None
Input features. The input feature names are only used when `passthrough` is
`True`.

- If `input_features` is `None`, then `feature_names_in_` is
used as feature names in. If `feature_names_in_` is not defined,
then names are generated: `[x0, x1, ..., x(n_features_in_ - 1)]`.
- If `input_features` is an array-like, then `input_features` must
match `feature_names_in_` if `feature_names_in_` is defined.

If `passthrough` is `False`, then only the names of `estimators` are used
to generate the output feature names.

Returns
-------
feature_names_out : ndarray of str objects
Transformed feature names.
"""
input_features = _check_feature_names_in(
self, input_features, generate_names=self.passthrough
)

class_name = self.__class__.__name__.lower()
non_dropped_estimators = (
name for name, est in self.estimators if est != "drop"
)
meta_names = []
for est, n_features_out in zip(non_dropped_estimators, self._n_feature_outs):
if n_features_out == 1:
meta_names.append(f"{class_name}_{est}")
else:
meta_names.extend(
f"{class_name}_{est}{i}" for i in range(n_features_out)
)

if self.passthrough:
return np.concatenate((meta_names, input_features))

return np.asarray(meta_names, dtype=object)

@if_delegate_has_method(delegate="final_estimator_")
def predict(self, X, **predict_params):
"""Predict target for X.
Expand Down
83 changes: 81 additions & 2 deletions sklearn/ensemble/tests/test_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest
import numpy as np
from numpy.testing import assert_array_equal
import scipy.sparse as sparse

from sklearn.base import BaseEstimator
Expand Down Expand Up @@ -47,8 +48,10 @@

from unittest.mock import Mock

X_diabetes, y_diabetes = load_diabetes(return_X_y=True)
X_iris, y_iris = load_iris(return_X_y=True)
diabetes = load_diabetes()
X_diabetes, y_diabetes = diabetes.data, diabetes.target
iris = load_iris()
X_iris, y_iris = iris.data, iris.target


@pytest.mark.parametrize(
Expand Down Expand Up @@ -648,3 +651,79 @@ def fit(self, X, y):
msg = "'MyEstimator' object has no attribute 'n_features_in_'"
with pytest.raises(AttributeError, match=msg):
stacker.n_features_in_


@pytest.mark.parametrize(
"stacker, feature_names, X, y, expected_names",
[
(
StackingClassifier(
estimators=[
("lr", LogisticRegression(random_state=0)),
("svm", LinearSVC(random_state=0)),
]
),
iris.feature_names,
X_iris,
y_iris,
[
"stackingclassifier_lr0",
"stackingclassifier_lr1",
"stackingclassifier_lr2",
"stackingclassifier_svm0",
"stackingclassifier_svm1",
"stackingclassifier_svm2",
],
),
(
StackingClassifier(
estimators=[
("lr", LogisticRegression(random_state=0)),
("other", "drop"),
("svm", LinearSVC(random_state=0)),
]
),
iris.feature_names,
X_iris[:100],
y_iris[:100], # keep only classes 0 and 1
[
"stackingclassifier_lr",
"stackingclassifier_svm",
],
),
(
StackingRegressor(
estimators=[
("lr", LinearRegression()),
("svm", LinearSVR(random_state=0)),
]
),
diabetes.feature_names,
X_diabetes,
y_diabetes,
[
"stackingregressor_lr",
"stackingregressor_svm",
],
),
],
ids=[
"StackingClassifier_multiclass",
"StackingClassifier_binary",
"StackingRegressor",
],
)
@pytest.mark.parametrize("passthrough", [True, False])
def test_get_feature_names_out(
stacker, feature_names, X, y, expected_names, passthrough
):
"""Check get_feature_names_out works for stacking."""

stacker.set_params(passthrough=passthrough)
stacker.fit(scale(X), y)

if passthrough:
expected_names = np.concatenate((expected_names, feature_names))

names_out = stacker.get_feature_names_out(feature_names)
assert_array_equal(names_out, expected_names)
0