8000 Revert "API Adds passthrough option to Pipeline (#11674)" · xhluca/scikit-learn@02b51d9 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 02b51d9

Browse files
author
Xing
committed
Revert "API Adds passthrough option to Pipeline (scikit-learn#11674)"
This reverts commit 5098474.
1 parent 85f59b5 commit 02b51d9

File tree

5 files changed

+81
-101
lines changed

5 files changed

+81
-101
lines changed

doc/modules/compose.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ This is particularly important for doing grid searches::
107107
>>> grid_search = GridSearchCV(pipe, param_grid=param_grid)
108108

109109
Individual steps may also be replaced as parameters, and non-final steps may be
110-
ignored by setting them to ``'passthrough'``::
110+
ignored by setting them to ``None``::
111111

112112
>>> from sklearn.linear_model import LogisticRegression
113-
>>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)],
113+
>>> param_grid = dict(reduce_dim=[None, PCA(5), PCA(10)],
114114
... clf=[SVC(), LogisticRegression()],
115115
... clf__C=[0.1, 10, 100])
116116
>>> grid_search = GridSearchCV(pipe, param_grid=param_grid)

doc/whats_new/v0.21.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,6 @@ Support for Python 3.4 and below has been officially dropped.
7171
in the dense case. Also added a new parameter ``order`` which controls output
7272
order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
7373

74-
:mod:`sklearn.pipeline`
75-
.......................
76-
77-
- |API| :class:`pipeline.Pipeline` now supports using ``'passthrough'`` as a
78-
transformer. :issue:`11144` by :user:`thomasjpfan`.
7974

8075
:mod:`sklearn.tree`
8176
...................

examples/compose/plot_compare_reduction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
pipe = Pipeline([
4646
# the reduce_dim stage is populated by the param_grid
47-
('reduce_dim', 'passthrough'),
47+
('reduce_dim', None),
4848
('classify', LinearSVC())
4949
])
5050

sklearn/pipeline.py

Lines changed: 66 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
# License: BSD
1111

1212
from collections import defaultdict
13-
from itertools import islice
1413

1514
import numpy as np
1615
from scipy import sparse
@@ -42,7 +41,7 @@ class Pipeline(_BaseComposition):
4241
names and the parameter name separated by a '__', as in the example below.
4342
A step's estimator may be replaced entirely by setting the parameter
4443
with its name to another estimator, or a transformer removed by setting
45-
it to 'passthrough' or ``None``.
44+
to None.
4645
4746
Read more in the :ref:`User Guide <pipeline>`.
4847
@@ -159,34 +158,19 @@ def _validate_steps(self):
159158
estimator = estimators[-1]
160159

161160
for t in transformers:
162-
if t is None or t == 'passthrough':
161+
if t is None:
163162
continue
164163
if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
165164
hasattr(t, "transform")):
166165
raise TypeError("All intermediate steps should be "
167-
"transformers and implement fit and transform "
168-
"or be the string 'passthrough' "
169-
"'%s' (type %s) doesn't" % (t, type(t)))
166+
"transformers and implement fit and transform."
167+
" '%s' (type %s) doesn't" % (t, type(t)))
170168

171169
# We allow last estimator to be None as an identity transformation
172-
if (estimator is not None and estimator != 'passthrough'
173-
and not hasattr(estimator, "fit")):
174-
raise TypeError(
175-
"Last step of Pipeline should implement fit "
176-
"or be the string 'passthrough'. "
177-
"'%s' (type %s) doesn't" % (estimator, type(estimator)))
178-
179-
def _iter(self, with_final=True):
180-
"""
181-
Generate (name, trans) tuples excluding 'passthrough' transformers
182-
"""
183-
stop = len(self.steps)
184-
if not with_final:
185-
stop -= 1
186-
187-
for name, trans in islice(self.steps, 0, stop):
188-
if trans is not None and trans != 'passthrough':
189-
yield name, trans
170+
if estimator is not None and not hasattr(estimator, "fit"):
171+
raise TypeError("Last step of Pipeline should implement fit. "
172+
"'%s' (type %s) doesn't"
173+
% (estimator, type(estimator)))
190174

191175
@property
192176
def _estimator_type(self):
@@ -199,8 +183,7 @@ def named_steps(self):
199183

200184
@property
201185
def _final_estimator(self):
202-
estimator = self.steps[-1][1]
203-
return 'passthrough' if estimator is None else estimator
186+
return self.steps[-1][1]
204187

205188
# Estimator interface
206189

@@ -219,35 +202,37 @@ def _fit(self, X, y=None, **fit_params):
219202
step, param = pname.split('__', 1)
220203
fit_params_steps[step][param] = pval
221204
Xt = X
222-
for step_idx, (name, transformer) in enumerate(
223-
self._iter(with_final=False)):
224-
if hasattr(memory, 'location'):
225-
# joblib >= 0.12
226-
if memory.location is None:
227-
# we do not clone when caching is disabled to
228-
# preserve backward compatibility
229-
cloned_transformer = transformer
230-
else:
231-
cloned_transformer = clone(transformer)
232-
elif hasattr(memory, 'cachedir'):
233-
# joblib < 0.11
234-
if memory.cachedir is None:
235-
# we do not clone when caching is disabled to
236-
# preserve backward compatibility
237-
cloned_transformer = transformer
205+
for step_idx, (name, transformer) in enumerate(self.steps[:-1]):
206+
if transformer is None:
207+
pass
208+
else:
209+
if hasattr(memory, 'location'):
210+
# joblib >= 0.12
211+
if memory.location is None:
212+
# we do not clone when caching is disabled to
213+
# preserve backward compatibility
214+
cloned_transformer = transformer
215+
else:
216+
cloned_transformer = clone(transformer)
217+
elif hasattr(memory, 'cachedir'):
218+
# joblib < 0.11
219+
if memory.cachedir is None:
220+
# we do not clone when caching is disabled to
221+
# preserve backward compatibility
222+
cloned_transformer = transformer
223+
else:
224+
cloned_transformer = clone(transformer)
238225
else:
239226
cloned_transformer = clone(transformer)
240-
else:
241-
cloned_transformer = clone(transformer)
242-
# Fit or load from cache the current transfomer
243-
Xt, fitted_transformer = fit_transform_one_cached(
244-
cloned_transformer, Xt, y, None,
245-
**fit_params_steps[name])
246-
# Replace the transformer of the step with the fitted
247-
# transformer. This is necessary when loading the transformer
248-
# from the cache.
249-
self.steps[step_idx] = (name, fitted_transformer)
250-
if self._final_estimator == 'passthrough':
227+
# Fit or load from cache the current transfomer
228+
Xt, fitted_transformer = fit_transform_one_cached(
229+
cloned_transformer, Xt, y, None,
230+
**fit_params_steps[name])
231+
# Replace the transformer of the step with the fitted
232+
# transformer. This is necessary when loading the transformer
233+
# from the cache.
234+
self.steps[step_idx] = (name, fitted_transformer)
235+
if self._final_estimator is None:
251236
return Xt, {}
252237
return Xt, fit_params_steps[self.steps[-1][0]]
253238

@@ -278,7 +263,7 @@ def fit(self, X, y=None, **fit_params):
278263
This estimator
279264
"""
280265
Xt, fit_params = self._fit(X, y, **fit_params)
281-
if self._final_estimator != 'passthrough':
266+
if self._final_estimator is not None:
282267
self._final_estimator.fit(Xt, y, **fit_params)
283268
return self
284269

@@ -313,7 +298,7 @@ def fit_transform(self, X, y=None, **fit_params):
313298
Xt, fit_params = self._fit(X, y, **fit_params)
314299
if hasattr(last_step, 'fit_transform'):
315300
return last_step.fit_transform(Xt, y, **fit_params)
316-
elif last_step == 'passthrough':
301+
elif last_step is None:
317302
return Xt
318303
else:
319< F438 /code>304
return last_step.fit(Xt, y, **fit_params).transform(Xt)
@@ -341,8 +326,9 @@ def predict(self, X, **predict_params):
341326
y_pred : array-like
342327
"""
343328
Xt = X
344-
for name, transform in self._iter(with_final=False):
345-
Xt = transform.transform(Xt)
329+
for name, transform in self.steps[:-1]:
330+
if transform is not None:
331+
Xt = transform.transform(Xt)
346332
return self.steps[-1][-1].predict(Xt, **predict_params)
347333

348334
@if_delegate_has_method(delegate='_final_estimator')
@@ -390,8 +376,9 @@ def predict_proba(self, X):
390376
y_proba : array-like, shape = [n_samples, n_classes]
391377
"""
392378
Xt = X
393-
for name, transform in self._iter(with_final=False):
394-
Xt = transform.transform(Xt)
379+
for name, transform in self.steps[:-1]:
380+
if transform is not None:
381+
Xt = transform.transform(Xt)
395382
return self.steps[-1][-1].predict_proba(Xt)
396383

397384
@if_delegate_has_method(delegate='_final_estimator')
@@ -409,8 +396,9 @@ def decision_function(self, X):
409396
y_score : array-like, shape = [n_samples, n_classes]
410397
"""
411398
Xt = X
412-
for name, transform in self._iter(with_final=False):
413-
Xt = transform.transform(Xt)
399+
for name, transform in self.steps[:-1]:
400+
if transform is not None:
401+
Xt = transform.transform(Xt)
414402
return self.steps[-1][-1].decision_function(Xt)
415403

416404
@if_delegate_has_method(delegate='_final_estimator')
@@ -428,8 +416,9 @@ def predict_log_proba(self, X):
428416
y_score : array-like, shape = [n_samples, n_classes]
429417
"""
430418
Xt = X
431-
for name, transform in self._iter(with_final=False):
432-
Xt = transform.transform(Xt)
419+
for name, transform in self.steps[:-1]:
420+
if transform is not None:
421+
Xt = transform.transform(Xt)
433422
return self.steps[-1][-1].predict_log_proba(Xt)
434423

435424
@property
@@ -451,14 +440,15 @@ def transform(self):
451440
"""
452441
# _final_estimator is None or has transform, otherwise attribute error
453442
# XXX: Handling the None case means we can't use if_delegate_has_method
454-
if self._final_estimator != 'passthrough':
443+
if self._final_estimator is not None:
455444
self._final_estimator.transform
456445
return self._transform
457446

458447
def _transform(self, X):
459448
Xt = X
460-
for _, transform in self._iter():
461-
Xt = transform.transform(Xt)
449+
for name, transform in self.steps:
450+
if transform is not None:
451+
Xt = transform.transform(Xt)
462452
return Xt
463453

464454
@property
@@ -481,15 +471,16 @@ def inverse_transform(self):
481471
"""
482472
# raise AttributeError if necessary for hasattr behaviour
483473
# XXX: Handling the None case means we can't use if_delegate_has_method
484-
for _, transform in self._iter():
485-
transform.inverse_transform
474+
for name, transform in self.steps:
475+
if transform is not None:
476+
transform.inverse_transform
486477
return self._inverse_transform
487478

488479
def _inverse_transform(self, X):
489480
Xt = X
490-
reverse_iter = reversed(list(self._iter()))
491-
for _, transform in reverse_iter:
492-
Xt = transform.inverse_transform(Xt)
481+
for name, transform in self.steps[::-1]:
482+
if transform is not None:
483+
Xt = transform.inverse_transform(Xt)
493484
return Xt
494485

495486
@if_delegate_has_method(delegate='_final_estimator')
@@ -515,8 +506,9 @@ def score(self, X, y=None, sample_weight=None):
515506
score : float
516507
"""
517508
Xt = X
518-
for name, transform in self._iter(with_final=False):
519-
Xt = transform.transform(Xt)
509+
for name, transform in self.steps[:-1]:
510+
if transform is not None:
511+
Xt = transform.transform(Xt)
520512
score_params = {}
521513
if sample_weight is not None:
522514
score_params['sample_weight'] = sample_weight
@@ -535,11 +527,7 @@ def _pairwise(self):
535527
def _name_estimators(estimators):
536528
"""Generate names for estimators."""
537529

538-
names = [
539-
estimator
540-
if isinstance(estimator, str) else type(estimator).__name__.lower()
541-
for estimator in estimators
542-
]
530+
names = [type(estimator).__name__.lower() for estimator in estimators]
543531
namecount = defaultdict(int)
544532
for est, name in zip(estimators, names):
545533
namecount[name] += 1

sklearn/tests/test_pipeline.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ def test_pipeline_init():
163163
# Check that we can't instantiate pipelines with objects without fit
164164
# method
165165
assert_raises_regex(TypeError,
166-
'Last step of Pipeline should implement fit '
167-
'or be the string \'passthrough\''
166+
'Last step of Pipeline should implement fit. '
168167
'.*NoFit.*',
169168
Pipeline, [('clf', NoFit())])
170169
# Smoke test with only an estimator
@@ -231,7 +230,7 @@ def test_pipeline_init_tuple():
231230
pipe.fit(X, y=None)
232231
pipe.score(X)
233232

234-
pipe.set_params(transf='passthrough')
233+
pipe.set_params(transf=None)
235234
pipe.fit(X, y=None)
236235
pipe.score(X)
237236

@@ -575,8 +574,8 @@ def test_pipeline_named_steps():
575574
assert_true(pipeline.named_steps.mult is mult2)
576575

577576

578-
@pytest.mark.parametrize('passthrough', [None, 'passthrough'])
579-
def test_set_pipeline_step_passthrough(passthrough):
577+
def test_set_pipeline_step_none():
578+
# Test setting Pipeline steps to None
580579
X = np.array([[1]])
581580
y = np.array([1])
582581
mult2 = Mult(mult=2)
@@ -593,22 +592,22 @@ def make():
593592
assert_array_equal([exp], pipeline.fit(X).predict(X))
594593
assert_array_equal(X, pipeline.inverse_transform([[exp]]))
595594

596-
pipeline.set_params(m3=passthrough)
595+
pipeline.set_params(m3=None)
597596
exp = 2 * 5
598597
assert_array_equal([[exp]], pipeline.fit_transform(X, y))
599598
assert_array_equal([exp], pipeline.fit(X).predict(X))
600599
assert_array_equal(X, pipeline.inverse_transform([[exp]]))
601600
assert_dict_equal(pipeline.get_params(deep=True),
602601
{'steps': pipeline.steps,
603602
'm2': mult2,
604-
'm3': passthrough,
603+
'm3': None,
605604
'last': mult5,
606605
'memory': None,
607606
'm2__mult': 2,
608607
'last__mult': 5,
609608
})
610609

611-
pipeline.set_params(m2=passthrough)
610+
pipeline.set_params(m2=None)
612611
exp = 5
613612
assert_array_equal([[exp]], pipeline.fit_transform(X, y))
614613
assert_array_equal([exp], pipeline.fit(X).predict(X))
@@ -627,20 +626,19 @@ def make():
627626
assert_array_equal(X, pipeline.inverse_transform([[exp]]))
628627

629628
pipeline = make()
630-
pipeline.set_params(last=passthrough)
629+
pipeline.set_params(last=None)
631630
# mult2 and mult3 are active
632631
exp = 6
633632
assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
634633
assert_array_equal([[exp]], pipeline.fit_transform(X, y))
635634
assert_array_equal(X, pipeline.inverse_transform([[exp]]))
636635
assert_raise_message(AttributeError,
637-
"'str' object has no attribute 'predict'",
636+
"'NoneType' object has no attribute 'predict'",
638637
getattr, pipeline, 'predict')
639638

640-
# Check 'passthrough' step at construction time
639+
# Check None step at construction time
641640
exp = 2 * 5
642-
pipeline = Pipeline(
643-
[('m2', mult2), ('m3', passthrough), ('last', mult5)])
641+
pipeline = Pipeline([('m2', mult2), ('m3', None), ('last', mult5)])
644642
assert_array_equal([[exp]], pipeline.fit_transform(X, y))
645643
assert_array_equal([exp], pipeline.fit(X).predict(X))
646644
assert_array_equal(X, pipeline.inverse_transform([[exp]]))
@@ -657,8 +655,7 @@ def test_pipeline_ducktyping():
657655
pipeline.transform
658656
pipeline.inverse_transform
659657

660-
pipeline = make_pipeline('passthrough')
661-
assert pipeline.steps[0] == ('passthrough', 'passthrough')
658+
pipeline = make_pipeline(None)
662659
assert_false(hasattr(pipeline, 'predict'))
663660
pipeline.transform
664661
pipeline.inverse_transform

0 commit comments

Comments
 (0)
0