10
10
# License: BSD
11
11
12
12
from collections import defaultdict
13
- from itertools import islice
14
13
15
14
import numpy as np
16
15
from scipy import sparse
@@ -42,7 +41,7 @@ class Pipeline(_BaseComposition):
42
41
names and the parameter name separated by a '__', as in the example below.
43
42
A step's estimator may be replaced entirely by setting the parameter
44
43
with its name to another estimator, or a transformer removed by setting
45
- it to 'passthrough' or `` None`` .
44
+ to None.
46
45
47
46
Read more in the :ref:`User Guide <pipeline>`.
48
47
@@ -159,34 +158,19 @@ def _validate_steps(self):
159
158
estimator = estimators [- 1 ]
160
159
161
160
for t in transformers :
162
- if t is None or t == 'passthrough' :
161
+ if t is None :
163
162
continue
164
163
if (not (hasattr (t , "fit" ) or hasattr (t , "fit_transform" )) or not
165
164
hasattr (t , "transform" )):
166
165
raise TypeError ("All intermediate steps should be "
167
- "transformers and implement fit and transform "
168
- "or be the string 'passthrough' "
169
- "'%s' (type %s) doesn't" % (t , type (t )))
166
+ "transformers and implement fit and transform."
167
+ " '%s' (type %s) doesn't" % (t , type (t )))
170
168
171
169
# We allow last estimator to be None as an identity transformation
172
- if (estimator is not None and estimator != 'passthrough'
173
- and not hasattr (estimator , "fit" )):
174
- raise TypeError (
175
- "Last step of Pipeline should implement fit "
176
- "or be the string 'passthrough'. "
177
- "'%s' (type %s) doesn't" % (estimator , type (estimator )))
178
-
179
- def _iter (self , with_final = True ):
180
- """
181
- Generate (name, trans) tuples excluding 'passthrough' transformers
182
- """
183
- stop = len (self .steps )
184
- if not with_final :
185
- stop -= 1
186
-
187
- for name , trans in islice (self .steps , 0 , stop ):
188
- if trans is not None and trans != 'passthrough' :
189
- yield name , trans
170
+ if estimator is not None and not hasattr (estimator , "fit" ):
171
+ raise TypeError ("Last step of Pipeline should implement fit. "
172
+ "'%s' (type %s) doesn't"
173
+ % (estimator , type (estimator )))
190
174
191
175
@property
192
176
def _estimator_type (self ):
@@ -199,8 +183,7 @@ def named_steps(self):
199
183
200
184
@property
201
185
def _final_estimator (self ):
202
- estimator = self .steps [- 1 ][1 ]
203
- return 'passthrough' if estimator is None else estimator
186
+ return self .steps [- 1 ][1 ]
204
187
205
188
# Estimator interface
206
189
@@ -219,35 +202,37 @@ def _fit(self, X, y=None, **fit_params):
219
202
step , param = pname .split ('__' , 1 )
220
203
fit_params_steps [step ][param ] = pval
221
204
Xt = X
222
- for step_idx , (name , transformer ) in enumerate (
223
- self ._iter (with_final = False )):
224
- if hasattr (memory , 'location' ):
225
- # joblib >= 0.12
226
- if memory .location is None :
227
- # we do not clone when caching is disabled to
228
- # preserve backward compatibility
229
- cloned_transformer = transformer
230
- else :
231
- cloned_transformer = clone (transformer )
232
- elif hasattr (memory , 'cachedir' ):
233
- # joblib < 0.11
234
- if memory .cachedir is None :
235
- # we do not clone when caching is disabled to
236
- # preserve backward compatibility
237
- cloned_transformer = transformer
205
+ for step_idx , (name , transformer ) in enumerate (self .steps [:- 1 ]):
206
+ if transformer is None :
207
+ pass
208
+ else :
209
+ if hasattr (memory , 'location' ):
210
+ # joblib >= 0.12
211
+ if memory .location is None :
212
+ # we do not clone when caching is disabled to
213
+ # preserve backward compatibility
214
+ cloned_transformer = transformer
215
+ else :
216
+ cloned_transformer = clone (transformer )
217
+ elif hasattr (memory , 'cachedir' ):
218
+ # joblib < 0.11
219
+ if memory .cachedir is None :
220
+ # we do not clone when caching is disabled to
221
+ # preserve backward compatibility
222
+ cloned_transformer = transformer
223
+ else :
224
+ cloned_transformer = clone (transformer )
238
225
else :
239
226
cloned_transformer = clone (transformer )
240
- else :
241
- cloned_transformer = clone (transformer )
242
- # Fit or load from cache the current transfomer
243
- Xt , fitted_transformer = fit_transform_one_cached (
244
- cloned_transformer , Xt , y , None ,
245
- ** fit_params_steps [name ])
246
- # Replace the transformer of the step with the fitted
247
- # transformer. This is necessary when loading the transformer
248
- # from the cache.
249
- self .steps [step_idx ] = (name , fitted_transformer )
250
- if self ._final_estimator == 'passthrough' :
227
+ # Fit or load from cache the current transfomer
228
+ Xt , fitted_transformer = fit_transform_one_cached (
229
+ cloned_transformer , Xt , y , None ,
230
+ ** fit_params_steps [name ])
231
+ # Replace the transformer of the step with the fitted
232
+ # transformer. This is necessary when loading the transformer
233
+ # from the cache.
234
+ self .steps [step_idx ] = (name , fitted_transformer )
235
+ if self ._final_estimator is None :
251
236
return Xt , {}
252
237
return Xt , fit_params_steps [self .steps [- 1 ][0 ]]
253
238
@@ -278,7 +263,7 @@ def fit(self, X, y=None, **fit_params):
278
263
This estimator
279
264
"""
280
265
Xt , fit_params = self ._fit (X , y , ** fit_params )
281
- if self ._final_estimator != 'passthrough' :
266
+ if self ._final_estimator is not None :
282
267
self ._final_estimator .fit (Xt , y , ** fit_params )
283
268
return self
284
269
@@ -313,7 +298,7 @@ def fit_transform(self, X, y=None, **fit_params):
313
298
Xt , fit_params = self ._fit (X , y , ** fit_params )
314
299
if hasattr (last_step , 'fit_transform' ):
315
300
return last_step .fit_transform (Xt , y , ** fit_params )
316
- elif last_step == 'passthrough' :
301
+ elif last_step is None :
317
302
return Xt
318
303
else :
319<
F438
/code>
304
return last_step .fit (Xt , y , ** fit_params ).transform (Xt )
@@ -341,8 +326,9 @@ def predict(self, X, **predict_params):
341
326
y_pred : array-like
342
327
"""
343
328
Xt = X
344
- for name , transform in self ._iter (with_final = False ):
345
- Xt = transform .transform (Xt )
329
+ for name , transform in self .steps [:- 1 ]:
330
+ if transform is not None :
331
+ Xt = transform .transform (Xt )
346
332
return self .steps [- 1 ][- 1 ].predict (Xt , ** predict_params )
347
333
348
334
@if_delegate_has_method (delegate = '_final_estimator' )
@@ -390,8 +376,9 @@ def predict_proba(self, X):
390
376
y_proba : array-like, shape = [n_samples, n_classes]
391
377
"""
392
378
Xt = X
393
- for name , transform in self ._iter (with_final = False ):
394
- Xt = transform .transform (Xt )
379
+ for name , transform in self .steps [:- 1 ]:
380
+ if transform is not None :
381
+ Xt = transform .transform (Xt )
395
382
return self .steps [- 1 ][- 1 ].predict_proba (Xt )
396
383
397
384
@if_delegate_has_method (delegate = '_final_estimator' )
@@ -409,8 +396,9 @@ def decision_function(self, X):
409
396
y_score : array-like, shape = [n_samples, n_classes]
410
397
"""
411
398
Xt = X
412
- for name , transform in self ._iter (with_final = False ):
413
- Xt = transform .transform (Xt )
399
+ for name , transform in self .steps [:- 1 ]:
400
+ if transform is not None :
401
+ Xt = transform .transform (Xt )
414
402
return self .steps [- 1 ][- 1 ].decision_function (Xt )
415
403
416
404
@if_delegate_has_method (delegate = '_final_estimator' )
@@ -428,8 +416,9 @@ def predict_log_proba(self, X):
428
416
y_score : array-like, shape = [n_samples, n_classes]
429
417
"""
430
418
Xt = X
431
- for name , transform in self ._iter (with_final = False ):
432
- Xt = transform .transform (Xt )
419
+ for name , transform in self .steps [:- 1 ]:
420
+ if transform is not None :
421
+ Xt = transform .transform (Xt )
433
422
return self .steps [- 1 ][- 1 ].predict_log_proba (Xt )
434
423
435
424
@property
@@ -451,14 +440,15 @@ def transform(self):
451
440
"""
452
441
# _final_estimator is None or has transform, otherwise attribute error
453
442
# XXX: Handling the None case means we can't use if_delegate_has_method
454
- if self ._final_estimator != 'passthrough' :
443
+ if self ._final_estimator is not None :
455
444
self ._final_estimator .transform
456
445
return self ._transform
457
446
458
447
def _transform (self , X ):
459
448
Xt = X
460
- for _ , transform in self ._iter ():
461
- Xt = transform .transform (Xt )
449
+ for name , transform in self .steps :
450
+ if transform is not None :
451
+ Xt = transform .transform (Xt )
462
452
return Xt
463
453
464
454
@property
@@ -481,15 +471,16 @@ def inverse_transform(self):
481
471
"""
482
472
# raise AttributeError if necessary for hasattr behaviour
483
473
# XXX: Handling the None case means we can't use if_delegate_has_method
484
- for _ , transform in self ._iter ():
485
- transform .inverse_transform
474
+ for name , transform in self .steps :
475
+ if transform is not None :
476
+ transform .inverse_transform
486
477
return self ._inverse_transform
487
478
488
479
def _inverse_transform (self , X ):
489
480
Xt = X
490
- reverse_iter = reversed ( list ( self ._iter ()))
491
- for _ , transform in reverse_iter :
492
- Xt = transform .inverse_transform (Xt )
481
+ for name , transform in self .steps [:: - 1 ]:
482
+ if transform is not None :
483
+ Xt = transform .inverse_transform (Xt )
493
484
return Xt
494
485
495
486
@if_delegate_has_method (delegate = '_final_estimator' )
@@ -515,8 +506,9 @@ def score(self, X, y=None, sample_weight=None):
515
506
score : float
516
507
"""
517
508
Xt = X
518
- for name , transform in self ._iter (with_final = False ):
519
- Xt = transform .transform (Xt )
509
+ for name , transform in self .steps [:- 1 ]:
510
+ if transform is not None :
511
+ Xt = transform .transform (Xt )
520
512
score_params = {}
521
513
if sample_weight is not None :
522
514
score_params ['sample_weight' ] = sample_weight
@@ -535,11 +527,7 @@ def _pairwise(self):
535
527
def _name_estimators (estimators ):
536
528
"""Generate names for estimators."""
537
529
538
- names = [
539
- estimator
540
- if isinstance (estimator , str ) else type (estimator ).__name__ .lower ()
541
- for estimator in estimators
542
- ]
530
+ names = [type (estimator ).__name__ .lower () for estimator in estimators ]
543
531
namecount = defaultdict (int )
544
532
for est , name in zip (estimators , names ):
545
533
namecount [name ] += 1
0 commit comments