8000 FIX ColumnTransformer supports empty selection for pandas output (#25… · scikit-learn/scikit-learn@9b2cce5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9b2cce5

Browse files
FIX ColumnTransformer supports empty selection for pandas output (#25570)
Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
1 parent 57bb1ac commit 9b2cce5

File tree

3 files changed

+39
-1
lines changed

3 files changed

+39
-1
lines changed

doc/whats_new/v1.2.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ Changelog
3434
fail due to a permutation of the labels when running multiple inits.
3535
:pr:`25563` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
3636

37+
:mod:`sklearn.compose`
38+
......................
39+
40+
- |Fix| Fixes a bug in :class:`compose.ColumnTransformer` which now supports
41+
empty selection of columns when `set_output(transform="pandas")`.
42+
:pr:`25570` by `Thomas Fan`_.
43+
3744
:mod:`sklearn.isotonic`
3845
.......................
3946

sklearn/compose/_column_transformer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,9 @@ def _hstack(self, Xs):
865865
transformer_names = [
866866
t[0] for t in self._iter(fitted=True, replace_strings=True)
867867
]
868-
feature_names_outs = [X.columns for X in Xs]
868+
# Selection of columns might be empty.
869+
# Hence feature names are filtered for non-emptiness.
870+
feature_names_outs = [X.columns for X in Xs if X.shape[1] != 0]
869871
names_out = self._add_prefix_for_feature_names_out(
870872
list(zip(transformer_names, feature_names_outs))
871873
)

sklearn/compose/tests/test_column_transformer.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,3 +2129,32 @@ def test_transformers_with_pandas_out_but_not_feature_names_out(
21292129
ct.set_params(verbose_feature_names_out=False)
21302130
X_trans_df1 = ct.fit_transform(X_df)
21312131
assert_array_equal(X_trans_df1.columns, expected_non_verbose_names)
2132+
2133+
2134+
@pytest.mark.parametrize(
2135+
"empty_selection",
2136+
[[], np.array([False, False]), [False, False]],
2137+
ids=["list", "bool", "bool_int"],
2138+
)
2139+
def test_empty_selection_pandas_output(empty_selection):
2140+
"""Check that pandas output works when there is an empty selection.
2141+
2142+
Non-regression test for gh-25487
2143+
"""
2144+
pd = pytest.importorskip("pandas")
2145+
2146+
X = pd.DataFrame([[1.0, 2.2], [3.0, 1.0]], columns=["a", "b"])
2147+
ct = ColumnTransformer(
2148+
[
2149+
("categorical", "passthrough", empty_selection),
2150+
("numerical", StandardScaler(), ["a", "b"]),
2151+
],
2152+
verbose_feature_names_out=True,
2153+
)
2154+
ct.set_output(transform="pandas")
2155+
X_out = ct.fit_transform(X)
2156+
assert_array_equal(X_out.columns, ["numerical__a", "numerical__b"])
2157+
2158+
ct.set_params(verbose_feature_names_out=False)
2159+
X_out = ct.fit_transform(X)
2160+
assert_array_equal(X_out.columns, ["a", "b"])

0 commit comments

Comments
 (0)
0