From b132f8c41a3085837e1e9ff670051cb3e571d0c1 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Thu, 19 Jul 2018 23:10:41 -0400 Subject: [PATCH 01/16] RFC: Converts None to drop in FeatureUnion --- sklearn/pipeline.py | 8 ++++---- sklearn/tests/test_pipeline.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 1e99dd54615ae..d2be2542a7672 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -618,7 +618,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin): Parameters of the transformers may be set using its name and the parameter name separated by a '__'. A transformer may be replaced entirely by setting the parameter with its name to another transformer, - or removed by setting to ``None``. + or removed by setting to 'drop'. Read more in the :ref:`User Guide `. @@ -693,7 +693,7 @@ def _validate_transformers(self): # validate estimators for t in transformers: - if t is None: + if t == 'drop': continue if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(t, "transform")): @@ -708,7 +708,7 @@ def _iter(self): get_weight = (self.transformer_weights or {}).get return ((name, trans, get_weight(name)) for name, trans in self.transformer_list - if trans is not None) + if trans != 'drop') def get_feature_names(self): """Get feature names from all transformers. @@ -815,7 +815,7 @@ def transform(self, X): def _update_transformer_list(self, transformers): transformers = iter(transformers) self.transformer_list[:] = [ - (name, None if old is None else next(transformers)) + (name, 'drop' if old == 'drop' else next(transformers)) for name, old in self.transformer_list ] diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index beb4be61d0b07..2711895c05f9e 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -826,7 +826,7 @@ def test_set_feature_union_steps(): assert_equal(['mock__x5'], ft.get_feature_names()) -def test_set_feature_union_step_none(): +def test_set_feature_union_step_drop(): mult2 = Mult(2) mult2.get_feature_names = lambda: ['x2'] mult3 = Mult(3) @@ -838,12 +838,12 @@ def test_set_feature_union_step_none(): assert_array_equal([[2, 3]], ft.fit_transform(X)) assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names()) - ft.set_params(m2=None) + ft.set_params(m2='drop') assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) assert_equal(['m3__x3'], ft.get_feature_names()) - ft.set_params(m3=None) + ft.set_params(m3='drop') assert_array_equal([[]], ft.fit(X).transform(X)) assert_array_equal([[]], ft.fit_transform(X)) assert_equal([], ft.get_feature_names()) From e300433a67efabb8339ab30db8ab2f238100b7d6 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Sun, 22 Jul 2018 15:14:03 -0400 Subject: [PATCH 02/16] RFC: Deprecates using None --- sklearn/pipeline.py | 17 +++++++++++++++++ sklearn/tests/test_pipeline.py | 26 +++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 4125af2e5272b..483c2e2ed90ea 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -10,6 +10,7 @@ # License: BSD from collections import defaultdict +import warnings import numpy as np from scipy import sparse @@ -697,6 +698,19 @@ def set_params(self, **kwargs): self._set_params('transformer_list', **kwargs) return self + def _check_params(self): + show_warning = False + for idx, (name, trans) in enumerate(self.transformer_list): + if trans is None: + self.transformer_list[idx] = (name, 'drop') + show_warning = True + + if show_warning: + warnings.warn( + "Transformers set to None is now set with 'drop' " + "in version 0.20 and will be removed in 0.22.", + DeprecationWarning) + def _validate_transformers(self): names, transformers = zip(*self.transformer_list) @@ -756,6 +770,7 @@ def fit(self, X, y=None): self : FeatureUnion This estimator """ + self._check_params() self.transformer_list = list(self.transformer_list) self._validate_transformers() transformers = Parallel(n_jobs=self.n_jobs)( @@ -781,6 +796,7 @@ def fit_transform(self, X, y=None, **fit_params): hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ + self._check_params() self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, X, y, weight, @@ -812,6 +828,7 @@ def transform(self, X): hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ + self._check_params() Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, X, None, weight) for name, trans, weight in self._iter()) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 349d0c36d9eb1..007586d01a02a 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -8,6 +8,7 @@ import numpy as np from scipy import sparse +import pytest from sklearn.externals.six.moves import zip from sklearn.utils.testing import assert_raises @@ -488,7 +489,7 @@ def test_make_union_kwargs(): assert_equal(3, fu.n_jobs) # invalid keyword parameters should raise an error message assert_raise_message( - TypeError, + TypeError, 'Unknown keyword arguments: "transformer_weights"', make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1} ) @@ -827,6 +828,29 @@ def test_set_feature_union_steps(): assert_equal(['mock__x5'], ft.get_feature_names()) +def test_set_feature_union_test_none(): + mult2 = Mult(2) + mult2.get_feature_names = lambda: ['x2'] + mult3 = Mult(3) + mult3.get_feature_names = lambda: ['x3'] + X = np.asarray([[1]]) + + ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) + ft.set_params(m2=None) + + depr_message = ("Transformers set to None is now set with 'drop' " + "in version 0.20 and will be removed in 0.22.") + + with pytest.warns(DeprecationWarning, match=depr_message): + assert_array_equal([[3]], ft.fit_transform(X)) + + # The previous `fit_transform` calls replaces None with 'drop' + # `m2` to set back to None to test `fit` and `transform`. + ft.set_params(m2=None) + with pytest.warns(DeprecationWarning, match=depr_message): + assert_array_equal([[3]], ft.fit(X).transform(X)) + + def test_set_feature_union_step_drop(): mult2 = Mult(2) mult2.get_feature_names = lambda: ['x2'] From 7d4222ee5e92fe9b418e89ccc367d7620b35486f Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 23 Jul 2018 11:15:16 -0400 Subject: [PATCH 03/16] RFC: Does not change transformer_list --- sklearn/pipeline.py | 25 ++++++++++--------------- sklearn/tests/test_pipeline.py | 8 +++----- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 483c2e2ed90ea..d19eb0fc7e6d6 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -699,17 +699,13 @@ def set_params(self, **kwargs): return self def _check_params(self): - show_warning = False for idx, (name, trans) in enumerate(self.transformer_list): if trans is None: - self.transformer_list[idx] = (name, 'drop') - show_warning = True - - if show_warning: - warnings.warn( - "Transformers set to None is now set with 'drop' " - "in version 0.20 and will be removed in 0.22.", - DeprecationWarning) + warnings.warn( + "Transformer '%s' is set to None. Please use 'drop' " + "for the same behavior. None will be removed " + "in version 0.20 and will be removed in 0.22." % name, + DeprecationWarning) def _validate_transformers(self): names, transformers = zip(*self.transformer_list) @@ -719,7 +715,7 @@ def _validate_transformers(self): # validate estimators for t in transformers: - if t == 'drop': + if t is None or t == 'drop': continue if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(t, "transform")): @@ -734,7 +730,7 @@ def _iter(self): get_weight = (self.transformer_weights or {}).get return ((name, trans, get_weight(name)) for name, trans in self.transformer_list - if trans != 'drop') + if trans is not None and trans != 'drop') def get_feature_names(self): """Get feature names from all transformers. @@ -843,10 +839,9 @@ def transform(self, X): def _update_transformer_list(self, transformers): transformers = iter(transformers) - self.transformer_list[:] = [ - (name, 'drop' if old == 'drop' else next(transformers)) - for name, old in self.transformer_list - ] + self.transformer_list[:] = [(name, old if old is None or old == 'drop' + else next(transformers)) + for name, old in self.transformer_list] def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 007586d01a02a..e01aae2a9e21f 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -489,7 +489,7 @@ def test_make_union_kwargs(): assert_equal(3, fu.n_jobs) # invalid keyword parameters should raise an error message assert_raise_message( - TypeError, + TypeError, 'Unknown keyword arguments: "transformer_weights"', make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1} ) @@ -838,15 +838,13 @@ def test_set_feature_union_test_none(): ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) ft.set_params(m2=None) - depr_message = ("Transformers set to None is now set with 'drop' " + depr_message = ("Transformer 'm2' is set to None. Please use 'drop' " + "for the same behavior. None will be removed " "in version 0.20 and will be removed in 0.22.") with pytest.warns(DeprecationWarning, match=depr_message): assert_array_equal([[3]], ft.fit_transform(X)) - # The previous `fit_transform` calls replaces None with 'drop' - # `m2` to set back to None to test `fit` and `transform`. - ft.set_params(m2=None) with pytest.warns(DeprecationWarning, match=depr_message): assert_array_equal([[3]], ft.fit(X).transform(X)) From 554a35ddbf8e9eb8b69e3e5a68281018e42a91c8 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 23 Jul 2018 11:15:35 -0400 Subject: [PATCH 04/16] DOC: Update to 'drop' --- doc/modules/compose.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 8817b6d83a385..3e62ee3118ec0 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -353,13 +353,13 @@ Like pipelines, feature unions have a shorthand constructor called Like ``Pipeline``, individual steps may be replaced using ``set_params``, -and ignored by setting to ``None``:: +and ignored by setting to ``'drop'``:: - >>> combined.set_params(kernel_pca=None) + >>> combined.set_params(kernel_pca='drop') ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS FeatureUnion(n_jobs=1, transformer_list=[('linear_pca', PCA(copy=True,...)), - ('kernel_pca', None)], + ('kernel_pca', 'drop')], transformer_weights=None) .. topic:: Examples: From 37d2deb1ce8fe5a4747cc926ace2f401ece2bead Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 23 Jul 2018 11:17:19 -0400 Subject: [PATCH 05/16] DOC: Rewords --- sklearn/pipeline.py | 2 +- sklearn/tests/test_pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index d19eb0fc7e6d6..2dfaf09e2cb54 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -703,7 +703,7 @@ def _check_params(self): if trans is None: warnings.warn( "Transformer '%s' is set to None. Please use 'drop' " - "for the same behavior. None will be removed " + "for the same behavior. None has been deprecated " "in version 0.20 and will be removed in 0.22." % name, DeprecationWarning) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index e01aae2a9e21f..434286bf36f26 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -839,7 +839,7 @@ def test_set_feature_union_test_none(): ft.set_params(m2=None) depr_message = ("Transformer 'm2' is set to None. Please use 'drop' " - "for the same behavior. None will be removed " + "for the same behavior. None has been deprecated " "in version 0.20 and will be removed in 0.22.") with pytest.warns(DeprecationWarning, match=depr_message): From 7b8ab7cd867750729999f1d9acb0affe60b89612 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Tue, 24 Jul 2018 09:47:15 -0400 Subject: [PATCH 06/16] DOC: Adds whats_new --- doc/whats_new/v0.20.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 1efe39f8b5e55..171b00161b7ae 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -175,6 +175,10 @@ Model evaluation ``'balanced_accuracy'`` scorer for binary classification. :issue:`8066` by :user:`xyguo` and :user:`Aman Dalmia `. +- Using ``None`` as a transformer in :class:`pipeline.FeatureUnion` is + deprecated and will be removed in v0.22. Use ``'drop'`` instead. + :issue:`11144` by :user:`thomasjpfan`. + Decomposition, manifold learning and clustering - A new clustering algorithm: :class:`cluster.OPTICS`: an algoritm @@ -194,7 +198,7 @@ Decomposition, manifold learning and clustering :issue:`6374` by :user:`John Kirkham `. - :class:`decomposition.SparsePCA` now exposes ``normalize_components``. When - set to True, the train and test data are centered with the train mean + set to True, the train and test data are centered with the train mean repsectively during the fit phase and the transform phase. This fixes the behavior of SparsePCA. When set to False, which is the default, the previous abnormal behaviour still holds. The False value is for backward @@ -906,7 +910,7 @@ Outlier Detection models - Novelty detection with :class:`neighbors.LocalOutlierFactor`: Add a ``novelty`` parameter to :class:`neighbors.LocalOutlierFactor`. When - ``novelty`` is set to True, :class:`neighbors.LocalOutlierFactor` can then + ``novelty`` is set to True, :class:`neighbors.LocalOutlierFactor` can then be used for novelty detection, i.e. predict on new unseen data. Available prediction methods are ``predict``, ``decision_function`` and ``score_samples``. By default, ``novelty`` is set to ``False``, and only From 016670d6fea27ee2e323c1a4af12c33b0abbbab0 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Tue, 24 Jul 2018 15:47:47 -0400 Subject: [PATCH 07/16] TST: Adds construction time test --- sklearn/tests/test_pipeline.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 434286bf36f26..5ade5beb82186 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -875,6 +875,12 @@ def test_set_feature_union_step_drop(): ft.set_params(m3=mult3) assert_array_equal([[3]], ft.fit(X).transform(X)) + # Check 'drop' step at construction time + ft = FeatureUnion([('m2', 'drop'), ('m3', mult3)]) + assert_array_equal([[3]], ft.fit(X).transform(X)) + assert_array_equal([[3]], ft.fit_transform(X)) + assert_equal(['m3__x3'], ft.get_feature_names()) + def test_step_name_validation(): bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))] From d1ef9548c01cc2816b0f971dd5762ffec24030c0 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Tue, 24 Jul 2018 17:22:51 -0400 Subject: [PATCH 08/16] DOC: Include 'drop' in docstring --- sklearn/pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2dfaf09e2cb54..bce2bed5234a6 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -725,7 +725,8 @@ def _validate_transformers(self): def _iter(self): """ - Generate (name, trans, weight) tuples excluding None transformers + Generate (name, trans, weight) tuples excluding None and + 'drop' transformers. """ get_weight = (self.transformer_weights or {}).get return ((name, trans, get_weight(name)) From f4a652940ca74a89365117a4e93010d8053fb458 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Tue, 24 Jul 2018 19:52:50 -0400 Subject: [PATCH 09/16] DOC: Move to meta-estimators section --- doc/whats_new/v0.20.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 171b00161b7ae..50b32d8238dbb 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -175,10 +175,6 @@ Model evaluation ``'balanced_accuracy'`` scorer for binary classification. :issue:`8066` by :user:`xyguo` and :user:`Aman Dalmia `. -- Using ``None`` as a transformer in :class:`pipeline.FeatureUnion` is - deprecated and will be removed in v0.22. Use ``'drop'`` instead. - :issue:`11144` by :user:`thomasjpfan`. - Decomposition, manifold learning and clustering - A new clustering algorithm: :class:`cluster.OPTICS`: an algoritm @@ -381,6 +377,10 @@ Model evaluation and meta-estimators hyperparameter optimization and refitting the best model on the whole dataset. :issue:`11310` by :user:`Matthias Feurer `. +- Using ``None`` as a transformer in :class:`pipeline.FeatureUnion` is + deprecated and will be removed in v0.22. Use ``'drop'`` instead. + :issue:`11144` by :user:`thomasjpfan`. + Decomposition and manifold learning - Speed improvements for both 'exact' and 'barnes_hut' methods in From 522cafd1d9f9bc2de36d871fc272bce025cdbb49 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Sat, 18 Aug 2018 01:16:11 -0400 Subject: [PATCH 10/16] MRG: Fix --- doc/whats_new/v0.20.rst | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 8b2d6d8640744..ef73a440e1499 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -994,20 +994,7 @@ Multiple modules ``raw_values`` parameter is deprecated as the shifted Mahalanobis distance will be always returned in 0.22. :issue:`9015` by `Nicolas Goix`_. -<<<<<<< HEAD -- Novelty detection with :class:`neighbors.LocalOutlierFactor`: - Add a ``novelty`` parameter to :class:`neighbors.LocalOutlierFactor`. When - ``novelty`` is set to True, :class:`neighbors.LocalOutlierFactor` can then - be used for novelty detection, i.e. predict on new unseen data. Available - prediction methods are ``predict``, ``decision_function`` and - ``score_samples``. By default, ``novelty`` is set to ``False``, and only - the ``fit_predict`` method is avaiable. - By :user:`Albert Thomas `. - - - A ``behaviour`` parameter has been introduced in :class:`ensemble.IsolationForest` -======= - |Feature| |API| A ``behaviour`` parameter has been introduced in :class:`ensemble.IsolationForest` ->>>>>>> upstream/master to ensure backward compatibility. In the old behaviour, the ``decision_function`` is independent of the ``contamination`` parameter. A threshold attribute depending on the ``contamination`` parameter is thus From 28eca87ff05d35d8e64e93e2fc894459234d9b7d Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 22 Aug 2018 14:29:11 -0400 Subject: [PATCH 11/16] REV: Does not deprecate None --- sklearn/pipeline.py | 14 +------------- sklearn/tests/test_pipeline.py | 11 ++--------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 6f22635d62012..b7244fddad898 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -631,7 +631,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin): Parameters of the transformers may be set using its name and the parameter name separated by a '__'. A transformer may be replaced entirely by setting the parameter with its name to another transformer, - or removed by setting to 'drop'. + or removed by setting to 'drop' or ``None``. Read more in the :ref:`User Guide `. @@ -702,15 +702,6 @@ def set_params(self, **kwargs): self._set_params('transformer_list', **kwargs) return self - def _check_params(self): - for idx, (name, trans) in enumerate(self.transformer_list): - if trans is None: - warnings.warn( - "Transformer '%s' is set to None. Please use 'drop' " - "for the same behavior. None has been deprecated " - "in version 0.20 and will be removed in 0.22." % name, - DeprecationWarning) - def _validate_transformers(self): names, transformers = zip(*self.transformer_list) @@ -771,7 +762,6 @@ def fit(self, X, y=None): self : FeatureUnion This estimator """ - self._check_params() self.transformer_list = list(self.transformer_list) self._validate_transformers() transformers = Parallel(n_jobs=self.n_jobs)( @@ -797,7 +787,6 @@ def fit_transform(self, X, y=None, **fit_params): hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ - self._check_params() self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, X, y, weight, @@ -829,7 +818,6 @@ def transform(self, X): hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ - self._check_params() Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, X, None, weight) for name, trans, weight in self._iter()) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 5ade5beb82186..2565eed7eaef8 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -838,15 +838,8 @@ def test_set_feature_union_test_none(): ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) ft.set_params(m2=None) - depr_message = ("Transformer 'm2' is set to None. Please use 'drop' " - "for the same behavior. None has been deprecated " - "in version 0.20 and will be removed in 0.22.") - - with pytest.warns(DeprecationWarning, match=depr_message): - assert_array_equal([[3]], ft.fit_transform(X)) - - with pytest.warns(DeprecationWarning, match=depr_message): - assert_array_equal([[3]], ft.fit(X).transform(X)) + assert_array_equal([[3]], ft.fit_transform(X)) + assert_array_equal([[3]], ft.fit(X).transform(X)) def test_set_feature_union_step_drop(): From d4fbc6c50bd68dac6d9624ed405c4dc860dc23c5 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 22 Aug 2018 14:33:37 -0400 Subject: [PATCH 12/16] DOC: Updates whats_new --- doc/whats_new/v0.20.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 3a9e68801afa3..c5571daf24323 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -840,6 +840,10 @@ Support for Python 3.3 has been officially dropped. keyword arguments on to the pipeline's last estimator, enabling the use of parameters such as ``return_std`` in a pipeline with caution. :issue:`9304` by :user:`Breno Freitas `. + +- |Feature| ``drop`` can be used as a transformer in + :class:`pipeline.FeatureUnion` to drop features. + :issue:`11144` by :user:`thomasjpfan`. :mod:`sklearn.preprocessing` From 130a1ea087f45d8ff6893c99bd2300fa9895bd47 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 22 Aug 2018 14:34:24 -0400 Subject: [PATCH 13/16] DOC: Updates whats_new with API tag --- doc/whats_new/v0.20.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index c5571daf24323..d5b2ffcea777b 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -841,7 +841,7 @@ Support for Python 3.3 has been officially dropped. parameters such as ``return_std`` in a pipeline with caution. :issue:`9304` by :user:`Breno Freitas `. -- |Feature| ``drop`` can be used as a transformer in +- |API| ``drop`` can be used as a transformer in :class:`pipeline.FeatureUnion` to drop features. :issue:`11144` by :user:`thomasjpfan`. From 35d8cbe6a0cf7c801ee1115f3279a633fb287a55 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 22 Aug 2018 14:39:48 -0400 Subject: [PATCH 14/16] DOC: Update whats_new --- doc/whats_new/v0.20.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index d5b2ffcea777b..557fe7e920191 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -841,9 +841,8 @@ Support for Python 3.3 has been officially dropped. parameters such as ``return_std`` in a pipeline with caution. :issue:`9304` by :user:`Breno Freitas `. -- |API| ``drop`` can be used as a transformer in - :class:`pipeline.FeatureUnion` to drop features. - :issue:`11144` by :user:`thomasjpfan`. +- |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer + to drop features. :issue:`11144` by :user:`thomasjpfan`. :mod:`sklearn.preprocessing` From 57e4e468df268744fa5b0163cd958433661ffb71 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 22 Aug 2018 23:25:45 -0400 Subject: [PATCH 15/16] STY: flake8 --- sklearn/pipeline.py | 1 - sklearn/tests/test_pipeline.py | 1 - 2 files changed, 2 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index b7244fddad898..294f69a113992 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -10,7 +10,6 @@ # License: BSD from collections import defaultdict -import warnings import numpy as np from scipy import sparse diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 2565eed7eaef8..9227dca17b2f9 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -8,7 +8,6 @@ import numpy as np from scipy import sparse -import pytest from sklearn.externals.six.moves import zip from sklearn.utils.testing import assert_raises From 5f92129fe3303365d52baf3496aa9dbaebcd4681 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Sat, 25 Aug 2018 09:23:39 -0400 Subject: [PATCH 16/16] RFC: Combines none and drop tests --- sklearn/tests/test_pipeline.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 9227dca17b2f9..500af22591d6c 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -8,6 +8,7 @@ import numpy as np from scipy import sparse +import pytest from sklearn.externals.six.moves import zip from sklearn.utils.testing import assert_raises @@ -827,21 +828,8 @@ def test_set_feature_union_steps(): assert_equal(['mock__x5'], ft.get_feature_names()) -def test_set_feature_union_test_none(): - mult2 = Mult(2) - mult2.get_feature_names = lambda: ['x2'] - mult3 = Mult(3) - mult3.get_feature_names = lambda: ['x3'] - X = np.asarray([[1]]) - - ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) - ft.set_params(m2=None) - - assert_array_equal([[3]], ft.fit_transform(X)) - assert_array_equal([[3]], ft.fit(X).transform(X)) - - -def test_set_feature_union_step_drop(): +@pytest.mark.parametrize('drop', ['drop', None]) +def test_set_feature_union_step_drop(drop): mult2 = Mult(2) mult2.get_feature_names = lambda: ['x2'] mult3 = Mult(3) @@ -853,12 +841,12 @@ def test_set_feature_union_step_drop(): assert_array_equal([[2, 3]], ft.fit_transform(X)) assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names()) - ft.set_params(m2='drop') + ft.set_params(m2=drop) assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) assert_equal(['m3__x3'], ft.get_feature_names()) - ft.set_params(m3='drop') + ft.set_params(m3=drop) assert_array_equal([[]], ft.fit(X).transform(X)) assert_array_equal([[]], ft.fit_transform(X)) assert_equal([], ft.get_feature_names()) @@ -868,7 +856,7 @@ def test_set_feature_union_step_drop(): assert_array_equal([[3]], ft.fit(X).transform(X)) # Check 'drop' step at construction time - ft = FeatureUnion([('m2', 'drop'), ('m3', mult3)]) + ft = FeatureUnion([('m2', drop), ('m3', mult3)]) assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) assert_equal(['m3__x3'], ft.get_feature_names())