From 280d0824f566e8e21263d314d92f6c5856c377e9 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Thu, 26 Aug 2021 18:47:20 +0530 Subject: [PATCH 01/20] Add support for passthrough in FeatureUnion --- sklearn/pipeline.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 34788d64de9e9..ebdbffa2b6a8b 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -17,6 +17,7 @@ from joblib import Parallel from .base import clone, TransformerMixin +from .preprocessing import FunctionTransformer from .utils._estimator_html_repr import _VisualBlock from .utils.metaestimators import available_if from .utils import ( @@ -942,7 +943,7 @@ def _validate_transformers(self): # validate estimators for t in transformers: - if t == "drop": + if t == "drop" or t == "passthrough": continue if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr( t, "transform" @@ -971,7 +972,13 @@ def _iter(self): """ get_weight = (self.transformer_weights or {}).get return ( - (name, trans, get_weight(name)) + ( + name, + FunctionTransformer(accept_sparse=True, check_inverse=False), + get_weight(name), + ) + if trans == "passthrough" + else (name, trans, get_weight(name)) for name, trans in self.transformer_list if trans != "drop" ) From 893cd1e44eef359210487e63a6753811aa07cec5 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Fri, 27 Aug 2021 14:48:32 +0530 Subject: [PATCH 02/20] Add tests for passthrough --- sklearn/pipeline.py | 8 ++++++- sklearn/tests/test_pipeline.py | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index ebdbffa2b6a8b..078883277bd78 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -965,16 +965,22 @@ def _validate_transformer_weights(self): "but it is not present in transformer_list." ) + def _passthrough_function(self): + ft = FunctionTransformer(accept_sparse=True, check_inverse=False) + ft.get_feature_names = lambda: [] + return ft + def _iter(self): """ Generate (name, trans, weight) tuples excluding None and 'drop' transformers. """ get_weight = (self.transformer_weights or {}).get + return ( ( name, - FunctionTransformer(accept_sparse=True, check_inverse=False), + self._passthrough_function(), get_weight(name), ) if trans == "passthrough" diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 4ec5c7f081a15..49abe9e4ed92f 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -983,6 +983,50 @@ def test_set_feature_union_step_drop(): assert not record +def test_set_feature_union_passthrough(): + mult2 = Mult(2) + mult2.get_feature_names = lambda: ["x2"] + mult3 = Mult(3) + mult3.get_feature_names = lambda: ["x3"] + X = np.asarray([[1]]) + + ft = FeatureUnion([("m2", mult2), ("m3", mult3)]) + assert_array_equal([[2, 3]], ft.fit(X).transform(X)) + assert_array_equal([[2, 3]], ft.fit_transform(X)) + assert ["m2__x2", "m3__x3"] == ft.get_feature_names() + + with pytest.warns(None) as record: + ft.set_params(m2="passthrough") + assert_array_equal([[1, 3]], ft.fit(X).transform(X)) + assert_array_equal([[1, 3]], ft.fit_transform(X)) + assert ["m3__x3"] == ft.get_feature_names() + assert not record + + with pytest.warns(None) as record: + ft.set_params(m3="passthrough") + assert_array_equal([[1, 1]], ft.fit(X).transform(X)) + assert_array_equal([[1, 1]], ft.fit_transform(X)) + assert [] == ft.get_feature_names() + assert not record + + with pytest.warns(None) as record: + # check we can change back + ft.set_params(m3=mult3) + assert_array_equal([[1, 3]], ft.fit(X).transform(X)) + assert_array_equal([[1, 3]], ft.fit_transform(X)) + assert not record + + with pytest.warns(None) as record: + # Check 'drop' step at construction time + ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)]) + assert_array_equal([[1, 3]], ft.fit(X).transform(X)) + assert_array_equal([[1, 3]], ft.fit_transform(X)) + assert ["m3__x3"] == ft.get_feature_names() + assert not record + + pass + + def test_step_name_validation(): error_message_1 = r"Estimator names must not contain __: got \['a__q'\]" error_message_2 = r"Names provided are not unique: \['a', 'a'\]" From 8862c6a4362cceffb775e9d58812f4ed2bb21827 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Fri, 27 Aug 2021 15:18:39 +0530 Subject: [PATCH 03/20] Add documentation for passthrough --- sklearn/pipeline.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 078883277bd78..d73529d90a42b 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -829,8 +829,9 @@ class FeatureUnion(TransformerMixin, _BaseComposition): Parameters of the transformers may be set using its name and the parameter name separated by a '__'. A transformer may be replaced entirely by - setting the parameter with its name to another transformer, - or removed by setting to 'drop'. + setting the parameter with its name to another transformer, removed by + setting to 'drop' or disabled by setting to 'passthrough' (features are passed + without transformation). Read more in the :ref:`User Guide `. @@ -840,8 +841,9 @@ class FeatureUnion(TransformerMixin, _BaseComposition): ---------- transformer_list : list of (string, transformer) tuples List of transformer objects to be applied to the data. The first - half of each tuple is the name of the transformer. The tranformer can - be 'drop' for it to be ignored. + half of each tuple is the name of the transformer. The transformer can + be 'drop' for it to be ignored or can be 'passthrough' for features to + be passed as it is. .. versionchanged:: 0.22 Deprecated `None` as a transformer in favor of 'drop'. From a34eeb2735ab2a055cd465f7eb98c92295605a0c Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Sat, 28 Aug 2021 08:36:28 +0530 Subject: [PATCH 04/20] Remove get_feature_names from passthrough as not necessary --- sklearn/pipeline.py | 7 +------ sklearn/tests/test_pipeline.py | 4 ---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index d73529d90a42b..455f8cd62d9c0 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -967,11 +967,6 @@ def _validate_transformer_weights(self): "but it is not present in transformer_list." ) - def _passthrough_function(self): - ft = FunctionTransformer(accept_sparse=True, check_inverse=False) - ft.get_feature_names = lambda: [] - return ft - def _iter(self): """ Generate (name, trans, weight) tuples excluding None and @@ -982,7 +977,7 @@ def _iter(self): return ( ( name, - self._passthrough_function(), + FunctionTransformer(accept_sparse=True, check_inverse=False), get_weight(name), ) if trans == "passthrough" diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 49abe9e4ed92f..3b5f3d2e52e39 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -993,20 +993,17 @@ def test_set_feature_union_passthrough(): ft = FeatureUnion([("m2", mult2), ("m3", mult3)]) assert_array_equal([[2, 3]], ft.fit(X).transform(X)) assert_array_equal([[2, 3]], ft.fit_transform(X)) - assert ["m2__x2", "m3__x3"] == ft.get_feature_names() with pytest.warns(None) as record: ft.set_params(m2="passthrough") assert_array_equal([[1, 3]], ft.fit(X).transform(X)) assert_array_equal([[1, 3]], ft.fit_transform(X)) - assert ["m3__x3"] == ft.get_feature_names() assert not record with pytest.warns(None) as record: ft.set_params(m3="passthrough") assert_array_equal([[1, 1]], ft.fit(X).transform(X)) assert_array_equal([[1, 1]], ft.fit_transform(X)) - assert [] == ft.get_feature_names() assert not record with pytest.warns(None) as record: @@ -1021,7 +1018,6 @@ def test_set_feature_union_passthrough(): ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)]) assert_array_equal([[1, 3]], ft.fit(X).transform(X)) assert_array_equal([[1, 3]], ft.fit_transform(X)) - assert ["m3__x3"] == ft.get_feature_names() assert not record pass From fee6b225507a3ff9b62ec3b7935bc5a8f6de3b14 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Sat, 28 Aug 2021 08:49:28 +0530 Subject: [PATCH 05/20] Add more tests --- sklearn/tests/test_pipeline.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 3b5f3d2e52e39..2ead589de2c8b 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1020,6 +1020,32 @@ def test_set_feature_union_passthrough(): assert_array_equal([[1, 3]], ft.fit_transform(X)) assert not record + X = iris.data + columns = X.shape[1] + pca = PCA(n_components=2, svd_solver="randomized", random_state=0) + + with pytest.warns(None) as record: + ft = FeatureUnion([("passthrough", "passthrough"), ("pca", pca)]) + assert_array_equal(X, ft.fit(X).transform(X)[:, :columns]) + assert_array_equal(X, ft.fit_transform(X)[:, :columns]) + assert not record + + with pytest.warns(None) as record: + ft.set_params(pca="passthrough") + X_ft = ft.fit(X).transform(X) + assert_array_equal(X, X_ft[:, :columns]) + assert_array_equal(X, X_ft[:, columns:]) + X_ft = ft.fit_transform(X) + assert_array_equal(X, X_ft[:, :columns]) + assert_array_equal(X, X_ft[:, columns:]) + assert not record + + with pytest.warns(None) as record: + ft.set_params(passthrough=pca) + assert_array_equal(X, ft.fit(X).transform(X)[:, -columns:]) + assert_array_equal(X, ft.fit_transform(X)[:, -columns:]) + assert not record + pass From b36efba5edd7c18933e05aecccd4a0143ce3a3b4 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Sat, 28 Aug 2021 08:53:32 +0530 Subject: [PATCH 06/20] Minor Code Cleanup: t in ["drop", "passthrough"] --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 455f8cd62d9c0..2763f84c6e508 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -945,7 +945,7 @@ def _validate_transformers(self): # validate estimators for t in transformers: - if t == "drop" or t == "passthrough": + if t in ["drop", "passthrough"]: continue if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr( t, "transform" From e5d03bfa4b69c2a47de9dd203bacb21f3756aa28 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Mon, 30 Aug 2021 17:04:47 +0530 Subject: [PATCH 07/20] Add changelog entry --- doc/whats_new/v1.0.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 7d8175a3b5046..a6d5b7956c468 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -694,6 +694,10 @@ Changelog the final estimator. :pr:`19790` by :user:`Christopher Flynn `. +- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`. + Setting a transformer to "passthrough" will pass the features as it is. + :pr:`20860` by :user:`Shubhraneel Pal `. + :mod:`sklearn.preprocessing` ............................ From aa8dc93c7aa6bf76011dc2fbb8c2b6ece6901996 Mon Sep 17 00:00:00 2001 From: Shubhraneel Pal Date: Fri, 3 Sep 2021 08:57:21 +0530 Subject: [PATCH 08/20] Apply suggestions for minor changes from code review Co-authored-by: Guillaume Lemaitre --- sklearn/pipeline.py | 9 ++++++--- sklearn/tests/test_pipeline.py | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2763f84c6e508..728f8b6b32bb1 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -830,8 +830,8 @@ class FeatureUnion(TransformerMixin, _BaseComposition): Parameters of the transformers may be set using its name and the parameter name separated by a '__'. A transformer may be replaced entirely by setting the parameter with its name to another transformer, removed by - setting to 'drop' or disabled by setting to 'passthrough' (features are passed - without transformation). + setting to 'drop' or disabled by setting to 'passthrough' (features are + passed without transformation). Read more in the :ref:`User Guide `. @@ -844,6 +844,9 @@ class FeatureUnion(TransformerMixin, _BaseComposition): half of each tuple is the name of the transformer. The transformer can be 'drop' for it to be ignored or can be 'passthrough' for features to be passed as it is. + + .. versionadded:: 1.0 + Added the option `"passthrough"`. .. versionchanged:: 0.22 Deprecated `None` as a transformer in favor of 'drop'. @@ -945,7 +948,7 @@ def _validate_transformers(self): # validate estimators for t in transformers: - if t in ["drop", "passthrough"]: + if t in ("drop", "passthrough"): continue if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr( t, "transform" diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 2ead589de2c8b..db5c7374f54a0 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -984,6 +984,7 @@ def test_set_feature_union_step_drop(): def test_set_feature_union_passthrough(): + """Check the behaviour of setting a transformer to `"passthrough"`.""" mult2 = Mult(2) mult2.get_feature_names = lambda: ["x2"] mult3 = Mult(3) @@ -1014,7 +1015,7 @@ def test_set_feature_union_passthrough(): assert not record with pytest.warns(None) as record: - # Check 'drop' step at construction time + # Check 'passthrough' step at construction time ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)]) assert_array_equal([[1, 3]], ft.fit(X).transform(X)) assert_array_equal([[1, 3]], ft.fit_transform(X)) From ac7ea168d85ec8812e6d206fc52098b591375529 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Fri, 3 Sep 2021 09:23:06 +0530 Subject: [PATCH 09/20] Remove unnecessary line and fix linting errors --- sklearn/tests/test_pipeline.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 2ead589de2c8b..1c30479a4f518 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1046,8 +1046,6 @@ def test_set_feature_union_passthrough(): assert_array_equal(X, ft.fit_transform(X)[:, -columns:]) assert not record - pass - def test_step_name_validation(): error_message_1 = r"Estimator names must not contain __: got \['a__q'\]" From 67f7753709d01da7e07ab02e3a16f5709df5ca2f Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Fri, 3 Sep 2021 09:29:23 +0530 Subject: [PATCH 10/20] Linter Errrors fixed --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 728f8b6b32bb1..f2e21ea85bc18 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -844,7 +844,7 @@ class FeatureUnion(TransformerMixin, _BaseComposition): half of each tuple is the name of the transformer. The transformer can be 'drop' for it to be ignored or can be 'passthrough' for features to be passed as it is. - + .. versionadded:: 1.0 Added the option `"passthrough"`. From d1a6dc6f5321e22736f610552abeea9e9a454344 Mon Sep 17 00:00:00 2001 From: Shubhraneel Pal Date: Wed, 8 Sep 2021 21:24:34 +0530 Subject: [PATCH 11/20] Apply suggestions from code review Co-authored-by: Olivier Grisel --- doc/whats_new/v1.0.rst | 2 +- sklearn/pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index a6d5b7956c468..4cea1b6027c20 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -695,7 +695,7 @@ Changelog :pr:`19790` by :user:`Christopher Flynn `. - |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`. - Setting a transformer to "passthrough" will pass the features as it is. + Setting a transformer to "passthrough" will pass the features unchanged. :pr:`20860` by :user:`Shubhraneel Pal `. :mod:`sklearn.preprocessing` diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index f2e21ea85bc18..ce228aaaf5b5a 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -843,7 +843,7 @@ class FeatureUnion(TransformerMixin, _BaseComposition): List of transformer objects to be applied to the data. The first half of each tuple is the name of the transformer. The transformer can be 'drop' for it to be ignored or can be 'passthrough' for features to - be passed as it is. + be passed unchanged. .. versionadded:: 1.0 Added the option `"passthrough"`. From 1ee325065aeddf84408136bb80bd3a9e97c3afde Mon Sep 17 00:00:00 2001 From: Shubhraneel Pal Date: Wed, 8 Sep 2021 21:24:59 +0530 Subject: [PATCH 12/20] Shorter code for test_pipeline.py Co-authored-by: Olivier Grisel --- sklearn/tests/test_pipeline.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index fad1f3a15270c..9b0b092baa76a 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1034,11 +1034,9 @@ def test_set_feature_union_passthrough(): with pytest.warns(None) as record: ft.set_params(pca="passthrough") X_ft = ft.fit(X).transform(X) - assert_array_equal(X, X_ft[:, :columns]) - assert_array_equal(X, X_ft[:, columns:]) + assert_array_equal(X_ft, np.hstack([X, X])) X_ft = ft.fit_transform(X) - assert_array_equal(X, X_ft[:, :columns]) - assert_array_equal(X, X_ft[:, columns:]) + assert_array_equal(X_ft, np.hstack([X, X])) assert not record with pytest.warns(None) as record: From 661ecb63b381a3d65ae663fbc8c46fd72ce6f00b Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Wed, 8 Sep 2021 21:35:51 +0530 Subject: [PATCH 13/20] Move changelog entry to 1.1 --- doc/whats_new/v1.0.rst | 4 ---- doc/whats_new/v1.1.rst | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 997832c073729..698a3503adc4f 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -771,10 +771,6 @@ Changelog the final estimator. :pr:`19790` by :user:`Christopher Flynn `. -- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`. - Setting a transformer to "passthrough" will pass the features unchanged. - :pr:`20860` by :user:`Shubhraneel Pal `. - :mod:`sklearn.preprocessing` ............................ diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index fba40e25a9e7e..9c1084e393e8d 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -53,6 +53,12 @@ Changelog :pr:`20880` by :user:`Guillaume Lemaitre ` and :user:`AndrĂ¡s Simon `. +:mod:`sklearn.pipeline` +....................... + +- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`. + Setting a transformer to "passthrough" will pass the features unchanged. + :pr:`20860` by :user:`Shubhraneel Pal `. Code and Documentation Contributors ----------------------------------- From 8d797f24f3638048cd7d31a1b4d1215e86eab3f8 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Wed, 8 Sep 2021 21:39:04 +0530 Subject: [PATCH 14/20] Remove get_feature_names from test --- sklearn/tests/test_pipeline.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 50df485417899..29eaaecbe5847 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1007,9 +1007,7 @@ def test_set_feature_union_step_drop(get_names): def test_set_feature_union_passthrough(): """Check the behaviour of setting a transformer to `"passthrough"`.""" mult2 = Mult(2) - mult2.get_feature_names = lambda: ["x2"] mult3 = Mult(3) - mult3.get_feature_names = lambda: ["x3"] X = np.asarray([[1]]) ft = FeatureUnion([("m2", mult2), ("m3", mult3)]) From 51401032b8daae5894a968c313674646d62163c0 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Wed, 8 Sep 2021 21:44:28 +0530 Subject: [PATCH 15/20] Remove context managers --- sklearn/tests/test_pipeline.py | 64 +++++++++++++--------------------- 1 file changed, 25 insertions(+), 39 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 29eaaecbe5847..debed7c1d796d 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1014,55 +1014,41 @@ def test_set_feature_union_passthrough(): assert_array_equal([[2, 3]], ft.fit(X).transform(X)) assert_array_equal([[2, 3]], ft.fit_transform(X)) - with pytest.warns(None) as record: - ft.set_params(m2="passthrough") - assert_array_equal([[1, 3]], ft.fit(X).transform(X)) - assert_array_equal([[1, 3]], ft.fit_transform(X)) - assert not record + ft.set_params(m2="passthrough") + assert_array_equal([[1, 3]], ft.fit(X).transform(X)) + assert_array_equal([[1, 3]], ft.fit_transform(X)) - with pytest.warns(None) as record: - ft.set_params(m3="passthrough") - assert_array_equal([[1, 1]], ft.fit(X).transform(X)) - assert_array_equal([[1, 1]], ft.fit_transform(X)) - assert not record + ft.set_params(m3="passthrough") + assert_array_equal([[1, 1]], ft.fit(X).transform(X)) + assert_array_equal([[1, 1]], ft.fit_transform(X)) - with pytest.warns(None) as record: - # check we can change back - ft.set_params(m3=mult3) - assert_array_equal([[1, 3]], ft.fit(X).transform(X)) - assert_array_equal([[1, 3]], ft.fit_transform(X)) - assert not record + # check we can change back + ft.set_params(m3=mult3) + assert_array_equal([[1, 3]], ft.fit(X).transform(X)) + assert_array_equal([[1, 3]], ft.fit_transform(X)) - with pytest.warns(None) as record: - # Check 'passthrough' step at construction time - ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)]) - assert_array_equal([[1, 3]], ft.fit(X).transform(X)) - assert_array_equal([[1, 3]], ft.fit_transform(X)) - assert not record + # Check 'passthrough' step at construction time + ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)]) + assert_array_equal([[1, 3]], ft.fit(X).transform(X)) + assert_array_equal([[1, 3]], ft.fit_transform(X)) X = iris.data columns = X.shape[1] pca = PCA(n_components=2, svd_solver="randomized", random_state=0) - with pytest.warns(None) as record: - ft = FeatureUnion([("passthrough", "passthrough"), ("pca", pca)]) - assert_array_equal(X, ft.fit(X).transform(X)[:, :columns]) - assert_array_equal(X, ft.fit_transform(X)[:, :columns]) - assert not record + ft = FeatureUnion([("passthrough", "passthrough"), ("pca", pca)]) + assert_array_equal(X, ft.fit(X).transform(X)[:, :columns]) + assert_array_equal(X, ft.fit_transform(X)[:, :columns]) - with pytest.warns(None) as record: - ft.set_params(pca="passthrough") - X_ft = ft.fit(X).transform(X) - assert_array_equal(X_ft, np.hstack([X, X])) - X_ft = ft.fit_transform(X) - assert_array_equal(X_ft, np.hstack([X, X])) - assert not record + ft.set_params(pca="passthrough") + X_ft = ft.fit(X).transform(X) + assert_array_equal(X_ft, np.hstack([X, X])) + X_ft = ft.fit_transform(X) + assert_array_equal(X_ft, np.hstack([X, X])) - with pytest.warns(None) as record: - ft.set_params(passthrough=pca) - assert_array_equal(X, ft.fit(X).transform(X)[:, -columns:]) - assert_array_equal(X, ft.fit_transform(X)[:, -columns:]) - assert not record + ft.set_params(passthrough=pca) + assert_array_equal(X, ft.fit(X).transform(X)[:, -columns:]) + assert_array_equal(X, ft.fit_transform(X)[:, -columns:]) def test_step_name_validation(): From 84a0dcc8c99cd8b2b5f70d02281f76b4d8a0570a Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Wed, 8 Sep 2021 22:21:37 +0530 Subject: [PATCH 16/20] Add test for passthrough with transformation --- sklearn/tests/test_pipeline.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index debed7c1d796d..fa01b6e834b11 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1050,6 +1050,13 @@ def test_set_feature_union_passthrough(): assert_array_equal(X, ft.fit(X).transform(X)[:, -columns:]) assert_array_equal(X, ft.fit_transform(X)[:, -columns:]) + ft = FeatureUnion( + [("passthrough", "passthrough"), ("pca", pca)], + transformer_weights={"passthrough": 2}, + ) + assert_array_equal(X * 2, ft.fit(X).transform(X)[:, :columns]) + assert_array_equal(X * 2, ft.fit_transform(X)[:, :columns]) + def test_step_name_validation(): error_message_1 = r"Estimator names must not contain __: got \['a__q'\]" From 398c4bcaa6286f69968294f58e06f9b86bf913d7 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Wed, 8 Sep 2021 22:55:07 +0530 Subject: [PATCH 17/20] Fix documentation --- sklearn/pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 1086a2d1f1f53..99298f1d1bfa6 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -864,7 +864,6 @@ class FeatureUnion(TransformerMixin, _BaseComposition): Parameters ---------- - transformer_list : list of (string, transformer) tuples List of transformer objects to be applied to the data. The first half of each tuple is the name of the transformer. The transformer can From f06cb1671f0f2d7821825ec1f31836e8f69d2093 Mon Sep 17 00:00:00 2001 From: shubhraneel Date: Wed, 8 Sep 2021 23:26:57 +0530 Subject: [PATCH 18/20] Fix documentation numpydoc validation: string -> str --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 99298f1d1bfa6..3d976472d942a 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -864,7 +864,7 @@ class FeatureUnion(TransformerMixin, _BaseComposition): Parameters ---------- - transformer_list : list of (string, transformer) tuples + transformer_list : list of (str, transformer) tuples List of transformer objects to be applied to the data. The first half of each tuple is the name of the transformer. The transformer can be 'drop' for it to be ignored or can be 'passthrough' for features to From 39c24fd0f723b4f705976bb6fd4de2e967a0f6c8 Mon Sep 17 00:00:00 2001 From: Shubhraneel Pal Date: Mon, 13 Sep 2021 09:04:52 +0530 Subject: [PATCH 19/20] Remove `accept_sparse` and `check_inverse` Co-authored-by: Thomas J. Fan --- sklearn/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 3d976472d942a..a10cc6d386f6a 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -1013,7 +1013,7 @@ def _iter(self): return ( ( name, - FunctionTransformer(accept_sparse=True, check_inverse=False), + FunctionTransformer(), get_weight(name), ) if trans == "passthrough" From 36de49e1be05bdcf7835779bf983a5442c13b693 Mon Sep 17 00:00:00 2001 From: Shubhraneel Pal Date: Mon, 13 Sep 2021 09:11:54 +0530 Subject: [PATCH 20/20] Make iter a generator --- sklearn/pipeline.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index a10cc6d386f6a..e2f9b0f0950ec 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -1008,19 +1008,15 @@ def _iter(self): Generate (name, trans, weight) tuples excluding None and 'drop' transformers. """ + get_weight = (self.transformer_weights or {}).get - return ( - ( - name, - FunctionTransformer(), - get_weight(name), - ) - if trans == "passthrough" - else (name, trans, get_weight(name)) - for name, trans in self.transformer_list - if trans != "drop" - ) + for name, trans in self.transformer_list: + if trans == "drop": + continue + if trans == "passthrough": + trans = FunctionTransformer() + yield (name, trans, get_weight(name)) @deprecated( "get_feature_names is deprecated in 1.0 and will be removed "