8000 [MRG+1] Adds drop in FeatureUnion by thomasjpfan · Pull Request #11640 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG+1] Adds drop in FeatureUnion #11640

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Sep 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions doc/modules/compose.rst
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,13 @@ Like pipelines, feature unions have a shorthand constructor called


Like ``Pipeline``, individual steps may be replaced using ``set_params``,
and ignored by setting to ``None``::
and ignored by setting to ``'drop'``::

>>> combined.set_params(kernel_pca=None)
>>> combined.set_params(kernel_pca='drop')
... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
FeatureUnion(n_jobs=None,
transformer_list=[('linear_pca', PCA(copy=True,...)),
('kernel_pca', None)],
('kernel_pca', 'drop')],
transformer_weights=None)

.. topic:: Examples:
Expand Down
3 changes: 3 additions & 0 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,9 @@ Support for Python 3.3 has been officially dropped.
keyword arguments on to the pipeline's last estimator, enabling the use of
parameters such as ``return_std`` in a pipeline with caution.
:issue:`9304` by :user:`Breno Freitas <brenolf>`.

- |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer
to drop features. :issue:`11144` by :user:`thomasjpfan`.


:mod:`sklearn.preprocessing`
Expand Down
16 changes: 8 additions & 8 deletions sklearn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin):
Parameters of the transformers may be set using its name and the parameter
name separated by a '__'. A transformer may be replaced entirely by
setting the parameter with its name to another transformer,
or removed by setting to ``None``.
or removed by setting to 'drop' or ``None``.

Read more in the :ref:`User Guide <feature_union>`.

Expand Down Expand Up @@ -709,7 +709,7 @@ def _validate_transformers(self):

# validate estimators
for t in transformers:
if t is None:
if t is None or t == 'drop':
continue
if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
hasattr(t, "transform")):
Expand All @@ -719,12 +719,13 @@ def _validate_transformers(self):

def _iter(self):
"""
Generate (name, trans, weight) tuples excluding None transformers
Generate (name, trans, weight) tuples excluding None and
'drop' transformers.
"""
get_weight = (self.transformer_weights or {}).get
return ((name, trans, get_weight(name))
for name, trans in self.transformer_list
if trans is not None)
if trans is not None and trans != 'drop')

def get_feature_names(self):
"""Get feature names from all transformers.
Expand Down Expand Up @@ -830,10 +831,9 @@ def transform(self, X):

def _update_transformer_list(self, transformers):
transformers = iter(transformers)
self.transformer_list[:] = [
(name, None if old is None else next(transformers))
for name, old in self.transformer_list
]
self.transformer_list[:] = [(name, old if old is None or old == 'drop'
else next(transformers))
for name, old in self.transformer_list]


def make_union(*transformers, **kwargs):
Expand Down
14 changes: 11 additions & 3 deletions sklearn/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
from scipy import sparse
import pytest

from sklearn.externals.six.moves import zip
from sklearn.utils.testing import assert_raises
Expand Down Expand Up @@ -827,7 +828,8 @@ def test_set_feature_union_steps():
assert_equal(['mock__x5'], ft.get_feature_names())


def test_set_feature_union_step_none():
@pytest.mark.parametrize('drop', ['drop', None])
def test_set_feature_union_step_drop(drop):
mult2 = Mult(2)
mult2.get_feature_names = lambda: ['x2']
mult3 = Mult(3)
Expand All @@ -839,12 +841,12 @@ def test_set_feature_union_step_none():
assert_array_equal([[2, 3]], ft.fit_transform(X))
assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())

ft.set_params(m2=None)
ft.set_params(m2=drop)
assert_array_equal([[3]], ft.fit(X).transform(X))
assert_array_equal([[3]], ft.fit_transform(X))
assert_equal(['m3__x3'], ft.get_feature_names())

ft.set_params(m3=None)
ft.set_params(m3=drop)
assert_array_equal([[]], ft.fit(X).transform(X))
assert_array_equal([[]], ft.fit_transform(X))
assert_equal([], ft.get_feature_names())
Expand All @@ -853,6 +855,12 @@ def test_set_feature_union_step_none():
ft.set_params(m3=mult3)
assert_array_equal([[3]], ft.fit(X).transform(X))

# Check 'drop' step at construction time
ft = FeatureUnion([('m2', drop), ('m3', mult3)])
assert_array_equal([[3]], ft.fit(X).transform(X))
assert_array_equal([[3]], ft.fit_transform(X))
assert_equal(['m3__x3'], ft.get_feature_names())


def test_step_name_validation():
bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))]
Expand Down
0