10000 ENH Add feature_names_out to voting estimators (#22697) · scikit-learn/scikit-learn@692225d · GitHub
[go: up one dir, main page]

Skip to content

Commit 692225d

Browse files
authored
ENH Add feature_names_out to voting estimators (#22697)
1 parent 26f5b26 commit 692225d

File tree

3 files changed

+140
-2
lines changed

3 files changed

+140
-2
lines changed

doc/whats_new/v1.1.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,9 @@ Changelog
343343
:pr:`20803` by :user:`Brian Sun <bsun94>`.
344344

345345
- |API| Adds :meth:`get_feature_names_out` to
346+
:class:`ensemble.VotingClassifier`, :class:`ensemble.VotingRegressor`,
346347
:class:`ensemble.StackingClassifier`, and
347-
:class:`ensemble.StackingRegressor`. :pr:`22695` by `Thomas Fan`_.
348+
:class:`ensemble.StackingRegressor`. :pr:`22695` and :pr:`22697` by `Thomas Fan`_.
348349

349350
- |Fix| Removed a potential source of CPU oversubscription in
350351
:class:`ensemble.HistGradientBoostingClassifier` and
@@ -504,7 +505,7 @@ Changelog
504505
:class:`kernel_approximation.RBFSampler`, and
505506
:class:`kernel_approximation.SkewedChi2Sampler`. :pr:`22694` by `Thomas Fan`_.
506507

507-
- |API| Adds :term:`get_feature_names_out` to the following transformers
508+
- |API| Adds :term:`get_feature_names_out` to the following transformers
508509
of the :mod:`~sklearn.kernel_approximation` module:
509510
:class:`~sklearn.kernel_approximation.AdditiveChi2Sampler`.
510511
:pr:`22137` by `Thomas Fan`_.

sklearn/ensemble/_voting.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from ..utils import check_scalar
3232
from ..utils.metaestimators import available_if
3333
from ..utils.validation import check_is_fitted
34+
from ..utils.validation import _check_feature_names_in
3435
from ..utils.multiclass import check_classification_targets
3536
from ..utils.validation import column_or_1d
3637
from ..exceptions import NotFittedError
@@ -441,6 +442,42 @@ class labels predicted by each classifier.
441442
else:
442443
return self._predict(X)
443444

445+
def get_feature_names_out(self, input_features=None):
446+
"""Get output feature names for transformation.
447+
448+
Parameters
449+
----------
450+
input_features : array-like of str or None, default=None
451+
Not used, present here for API consistency by convention.
452+
453+
Returns
454+
-------
455+
feature_names_out : ndarray of str objects
456+
Transformed feature names.
457+
"""
458+
if self.voting == "soft" and not self.flatten_transform:
459+
raise ValueError(
460+
"get_feature_names_out is not supported when `voting='soft'` and "
461+
"`flatten_transform=False`"
462+
)
463+
464+
_check_feature_names_in(self, input_features, generate_names=False)
465+
class_name = self.__class__.__name__.lower()
466+
467+
active_names = [name for name, est in self.estimators if est != "drop"]
468+
469+
if self.voting == "hard":
470+
return np.asarray(
471+
[f"{class_name}_{name}" for name in active_names], dtype=object
472+
)
473+
474+
# voting == "soft"
475+
n_classes = len(self.classes_)
476+
names_out = [
477+
f"{class_name}_{name}{i}" for name in active_names for i in range(n_classes)
478+
]
479+
return np.asarray(names_out, dtype=object)
480+
444481

445482
class VotingRegressor(RegressorMixin, _BaseVoting):
446483
"""Prediction voting regressor for unfitted estimators.
@@ -597,3 +634,23 @@ def transform(self, X):
597634
"""
598635
check_is_fitted(self)
599636
return self._predict(X)
637+
638+
def get_feature_names_out(self, input_features=None):
639+
"""Get output feature names for transformation.
640+
641+
Parameters
642+
----------
643+
input_features : array-like of str or None, default=None
644+
Not used, present here for API consistency by convention.
645+
646+
Returns
647+
-------
648+
feature_names_out : ndarray of str objects
649+
Transformed feature names.
650+
"""
651+
_check_feature_names_in(self, input_features, generate_names=False)
652+
class_name = self.__class__.__name__.lower()
653+
return np.asarray(
654+
[f"{class_name}_{name}" for name, est in self.estimators if est != "drop"],
655+
dtype=object,
656+
)

sklearn/ensemble/tests/test_voting.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,3 +626,83 @@ def test_voting_verbose(estimator, capsys):
626626

627627
estimator.fit(X, y)
628628
assert re.match(pattern, capsys.readouterr()[0])
629+
630+
631+
def test_get_features_names_out_regressor():
632+
"""Check get_feature_names_out output for regressor."""
633+
634+
X = [[1, 2], [3, 4], [5, 6]]
635+
y = [0, 1, 2]
636+
637+
voting = VotingRegressor(
638+
estimators=[
639+
("lr", LinearRegression()),
640+
("tree", DecisionTreeRegressor(random_state=0)),
641+
("ignore", "drop"),
642+
]
643+
)
644+
voting.fit(X, y)
645+
646+
names_out = voting.get_feature_names_out()
647+
expected_names = ["votingregressor_lr", "votingregressor_tree"]
648+
assert_array_equal(names_out, expected_names)
649+
650+
651+
@pytest.mark.parametrize(
652+
"kwargs, expected_names",
653+
[
654+
(
655+
{"voting": "soft", "flatten_transform": True},
656+
[
657+
"votingclassifier_lr0",
658+
"votingclassifier_lr1",
659+
"votingclassifier_lr2",
660+
"votingclassifier_tree0",
661+
"votingclassifier_tree1",
662+
"votingclassifier_tree2",
663+
],
664+
),
665+
({"voting": "hard"}, ["votingclassifier_lr", "votingclassifier_tree"]),
666+
],
667+
)
668+
def test_get_features_names_out_classifier(kwargs, expected_names):
669+
"""Check get_feature_names_out for classifier for different settings."""
670+
X = [[1, 2], [3, 4], [5, 6], [1, 1.2]]
671+
y = [0, 1, 2, 0]
672+
673+
voting = VotingClassifier(
674+
estimators=[
675+
("lr", LogisticRegression(random_state=0)),
676+
("tree", DecisionTreeClassifier(random_state=0)),
677+
],
678+
**kwargs,
679+
)
680+
voting.fit(X, y)
681+
X_trans = voting.transform(X)
682+
names_out = voting.get_feature_names_out()
683+
684+
assert X_trans.shape[1] == len(expected_names)
685+
assert_array_equal(names_out, expected_names)
686+
687+
688+
def test_get_features_names_out_classifier_error():
689+
"""Check that error is raised when voting="soft" and flatten_transform=False."""
690+
X = [[1, 2], [3, 4], [5, 6]]
691+
y = [0, 1, 2]
692+
693+
voting = VotingClassifier(
694+
estimators=[
695+
("lr", LogisticRegression(random_state=0)),
696+
("tree", DecisionTreeClassifier(random_state=0)),
697+
],
698+
voting="soft",
699+
flatten_transform=False,
700+
)
701+
voting.fit(X, y)
702+
703+
msg = (
704+
"get_feature_names_out is not supported when `voting='soft'` and "
705+
"`flatten_transform=False`"
706+
)
707+
with pytest.raises(ValueError, match=msg):
708+
voting.get_feature_names_out()

0 commit comments

Comments
 (0)
0