8000 ENH Adds drop in FeatureUnion (#11640) · scikit-learn/scikit-learn@5be05c6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5be05c6

Browse files
thomasjpfanqinhanmin2014
authored andcommitted
ENH Adds drop in FeatureUnion (#11640)
1 parent c2f9206 commit 5be05c6

File tree

4 files changed

+25
-14
lines changed

4 files changed

+25
-14
lines changed

doc/modules/compose.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -353,13 +353,13 @@ Like pipelines, feature unions have a shorthand constructor called
353353

354354

355355
Like ``Pipeline``, individual steps may be replaced using ``set_params``,
356-
and ignored by setting to ``None``::
356+
and ignored by setting to ``'drop'``::
357357

358-
>>> combined.set_params(kernel_pca=None)
358+
>>> combined.set_params(kernel_pca='drop')
359359
... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
360360
FeatureUnion(n_jobs=None,
361361
transformer_list=[('linear_pca', PCA(copy=True,...)),
362-
('kernel_pca', None)],
362+
('kernel_pca', 'drop')],
363363
transformer_weights=None)
364364

365365
.. topic:: Examples:

doc/whats_new/v0.20.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,9 @@ Support for Python 3.3 has been officially dropped.
868868
keyword arguments on to the pipeline's last estimator, enabling the use of
869869
parameters such as ``return_std`` in a pipeline with caution.
870870
:issue:`9304` by :user:`Breno Freitas <brenolf>`.
871+
872+
- |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer
873+
to drop features. :issue:`11144` by :user:`thomasjpfan`.
871874

872875

873876
:mod:`sklearn.preprocessing`

sklearn/pipeline.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin):
630630
Parameters of the transformers may be set using its name and the parameter
631631
name separated by a '__'. A transformer may be replaced entirely by
632632
setting the parameter with its name to another transformer,
633-
or removed by setting to ``None``.
633+
or removed by setting to 'drop' or ``None``.
634634
635635
Read more in the :ref:`User Guide <feature_union>`.
636636
@@ -709,7 +709,7 @@ def _validate_transformers(self):
709709

710710
# validate estimators
711711
for t in transformers:
712-
if t is None:
712+
if t is None or t == 'drop':
713713
continue
714714
if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
715715
hasattr(t, "transform")):
@@ -719,12 +719,13 @@ def _validate_transformers(self):
719719

720720
def _iter(self):
721721
"""
722-
Generate (name, trans, weight) tuples excluding None transformers
722+
Generate (name, trans, weight) tuples excluding None and
723+
'drop' transformers.
723724
"""
724725
get_weight = (self.transformer_weights or {}).get
725726
return ((name, trans, get_weight(name))
726727
for name, trans in self.transformer_list
727-
if trans is not None)
728+
if trans is not None and trans != 'drop')
728729

729730
def get_feature_names(self):
730731
"""Get feature names from all transformers.
@@ -830,10 +831,9 @@ def transform(self, X):
830831

831832
def _update_transformer_list(self, transformers):
832833
transformers = iter(transformers)
833-
self.transformer_list[:] = [
834-
(name, None if old is None else next(transformers))
835-
for name, old in self.transformer_list
836-
]
834+
self.transformer_list[:] = [(name, old if old is None or old == 'drop'
835+
else next(transformers))
836+
for name, old in self.transformer_list]
837837

838838

839839
def make_union(*transformers, **kwargs):

sklearn/tests/test_pipeline.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytest
1010
import numpy as np
1111
from scipy import sparse
12+
import pytest
1213

1314
from sklearn.externals.six.moves import zip
1415
from sklearn.utils.testing import assert_raises
@@ -832,7 +833,8 @@ def test_set_feature_union_steps():
832833
assert_equal(['mock__x5'], ft.get_feature_names())
833834

834835

835-
def test_set_feature_union_step_none():
836+
@pytest.mark.parametrize('drop', ['drop', None])
837+
def test_set_feature_union_step_drop(drop):
836838
mult2 = Mult(2)
837839
mult2.get_feature_names = lambda: ['x2']
838840
mult3 = Mult(3)
@@ -844,12 +846,12 @@ def test_set_feature_union_step_none():
844846
assert_array_equal([[2, 3]], ft.fit_transform(X))
845847
assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())
846848

847-
ft.set_params(m2=None)
849+
ft.set_params(m2=drop)
848850
assert_array_equal([[3]], ft.fit(X).transform(X))
849851
assert_array_equal([[3]], ft.fit_transform(X))
850852
assert_equal(['m3__x3'], ft.get_feature_names())
851853

852-
ft.set_params(m3=None)
854+
ft.set_params(m3=drop)
853855
assert_array_equal([[]], ft.fit(X).transform(X))
854856
assert_array_equal([[]], ft.fit_transform(X))
855857
assert_equal([], ft.get_feature_names())
@@ -858,6 +860,12 @@ def test_set_feature_union_step_none():
858860
ft.set_params(m3=mult3)
859861
assert_array_equal([[3]], ft.fit(X).transform(X))
860862

863+
# Check 'drop' step at construction time
864+
ft = FeatureUnion([('m2', drop), ('m3', mult3)])
865+
assert_array_equal([[3]], ft.fit(X).transform(X))
866+
assert_array_equal([[3]], ft.fit_transform(X))
867+
assert_equal(['m3__x3'], ft.get_feature_names())
868+
861869

862870
def test_step_name_validation():
863871
bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))]

0 commit comments

Comments
 (0)
0