8000 FIX make Pipeline methods properties as per #1805 · jnothman/scikit-learn@a308cdb · GitHub
[go: up one dir, main page]

Skip to content

Commit a308cdb

Browse files
committed
FIX make Pipeline methods properties as per scikit-learn#1805
1 parent 5319994 commit a308cdb

File tree

2 files changed

+207
-57
lines changed

2 files changed

+207
-57
lines changed

sklearn/pipeline.py

Lines changed: 185 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,19 @@ def get_params(self, deep=True):
110110
out['%s__%s' % (name, key)] = value
111111
return out
112112

113+
@property
114+
def named_steps(self):
115+
return dict(self.steps)
116+
117+
@property
118+
def _transforms(self):
119+
"""Non-final estimators in (name, est) tuples."""
120+
return self.steps[:-1]
121+
122+
@property
123+
def _final_estimator(self):
124+
return self.steps[-1][1]
125+
113126
# Estimator interface
114127

115128
def _pre_transform(self, X, y=None, **fit_params):
@@ -118,7 +131,9 @@ def _pre_transform(self, X, y=None, **fit_params):
118131
step, param = pname.split('__', 1)
119132
fit_params_steps[step][param] = pval
120133
Xt = X
121-
for name, transform in self.steps[:-1]:
134+
for name, transform in self._transforms:
135+
if transform is None:
136+
continue
122137
if hasattr(transform, "fit_transform"):
123138
Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
124139
else:
@@ -129,79 +144,192 @@ def _pre_transform(self, X, y=None, **fit_params):
129144
def fit(self, X, y=None, **fit_params):
130145
"""Fit all the transforms one after the other and transform the
131146
data, then fit the transformed data using the final estimator.
147+
148+
Parameters
149+
----------
150+
X : array-like, shape = [n_samples, n_features]
151+
Training data, where n_samples in the number of samples and
152+
n_features is the number of features.
153+
y : array-like, shape = [n_samples], optional
154+
Target vector relative to X for classification;
155+
None for unsupervised learning.
156+
fit_params : dict of string -> object
157+
Parameters passed to the `fit` method of each step, where
158+
each parameter name is prefixed such that parameter ``p`` for step
159+
``s`` has key ``s__p``.
132160
"""
133161
Xt, fit_params = self._pre_transform(X, y, **fit_params)
134-
self.steps[-1][-1].fit(Xt, y, **fit_params)
162+
self._final_estimator.fit(Xt, y, **fit_params)
135163
return self
136164

137-
def fit_transform(self, X, y=None, **fit_params):
138-
"""Fit all the transforms one after the other and transform the
165+
@property
166+
def fit_transform(self):
167+
"""Pipeline.fit_transform(X, y=None, **fit_params)
168+
169+
Fit all the transforms one after the other and transform the
139170
data, then use fit_transform on transformed data using the final
140-
estimator."""
141-
Xt, fit_params = self._pre_transform(X, y, **fit_params)
142-
if hasattr(self.steps[-1][-1], 'fit_transform'):
143-
return self.steps[-1][-1].fit_transform(Xt, y, **fit_params)
144-
else:
145-
return self.steps[-1][-1].fit(Xt, y, **fit_params).transform(Xt)
171+
estimator.
146172
147-
def predict(self, X):
148-
"""Applies transforms to the data, and the predict method of the
149-
final estimator. Valid only if the final estimator implements
150-
predict."""
173+
Parameters
174+
----------
175+
X : array-like, shape = [n_samples, n_features]
176+
Training data, where n_samples in the number of samples and
177+
n_features is the number of features.
178+
y : array-like, shape = [n_samples], optional
179+
Target vector relative to X for classification;
180+
None for unsupervised learning.
181+
fit_params : dict of string -> object
182+
Parameters passed to the `fit` method of each step, where
183+
each parameter name is prefixed such that parameter ``p`` for step
184+
``s`` has key ``s__p``.
185+
"""
186+
last_step = self._final_estimator
187+
if (
188+
not hasattr(last_step, 'fit_transform')
189+
and not hasattr(last_step, 'transform')):
190+
raise AttributeError(
191+
'last step has neither `transform` nor `fit_transform`')
192+
193+
def fn(X, y=None, **fit_params):
194+
Xt, fit_params = self._pre_transform(X, y, **fit_params)
195+
if hasattr(last_step, 'fit_transform'):
196+
return last_step.fit_transform(Xt, y, **fit_params)
197+
else:
198+
return last_step.fit(Xt, y, **fit_params).transform(Xt)
199+
return fn
200+
201+
def _run_pipeline(self, est_fn, X, *args, **kwargs):
151202
Xt = X
152-
for name, transform in self.steps[:-1]:
153-
Xt = transform.transform(Xt)
154-
return self.steps[-1][-1].predict(Xt)
203+
for name, transform in self._transforms:
204+
if transform is not None:
205+
Xt = transform.transform(Xt)
206+
return est_fn(Xt, *args, **kwargs)
207+
208+
@property
209+
def predict(self):
210+
"""Pipeline.predict(X)
155211
156-
def predict_proba(self, X):
157-
"""Applies transforms to the data, and the predict_proba method of the
212+
Applies transforms to the data, and the `predict` method of the
158213
final estimator. Valid only if the final estimator implements
159-
predict_proba."""
160-
Xt = X
161-
for name, transform in self.steps[:-1]:
162-
Xt = transform.transform(Xt)
163-
return self.steps[-1][-1].predict_proba(Xt)
214+
predict.
215+
216+
Parameters
217+
----------
218+
X : array-like, shape = [n_samples, n_features]
219+
Data samples, where n_samples in the number of samples and
220+
n_features is the number of features.
221+
"""
222+
return partial(self._run_pipeline, self._final_estimator.predict)
223+
224+
@property
225+
def predict_proba(self):
226+
"""Pipeline.predict_proba(X)
164227
165-
def decision_function(self, X):
166-
"""Applies transforms to the data, and the decision_function method of
228+
Applies transforms to the data, and the `predict_proba` method of
167229
the final estimator. Valid only if the final estimator implements
168-
decision_function."""
169-
Xt = X
170-
for name, transform in self.steps[:-1]:
171-
Xt = transform.transform(Xt)
172-
return self.steps[-1][-1].decision_function(Xt)
230+
predict_proba.
173231
174-
def predict_log_proba(self, X):
175-
Xt = X
176-
for name, transform in self.steps[:-1]:
177-
Xt = transform.transform(Xt)
178-
return self.steps[-1][-1].predict_log_proba(Xt)
232+
Parameters
233+
----------
234+
X : array-like, shape = [n_samples, n_features]
235+
Data samples, where n_samples in the number of samples and
236+
n_features is the number of features.
237+
"""
238+
return partial(self._run_pipeline, self._final_estimator.predict_proba)
179239

180-
def transform(self, X):
181-
"""Applies transforms to the data, and the transform method of the
240+
@property
241+
def predict_log_proba(self):
242+
"""Pipeline.predict_log_proba(X)
243+
244+
Applies transforms to the data, and the `predict_log_proba` method
245+
of the final estimator. Valid only if the final estimator implements
246+
predict_log_proba.
247+
248+
Parameters
249+
----------
250+
X : array-like, shape = [n_samples, n_features]
251+
Data samples, where n_samples in the number of samples and
252+
n_features is the number of features.
253+
"""
254+
return partial(self._run_pipeline,
255+
self._final_estimator.predict_log_proba)
256+
257+
@property
258+
def decision_function(self):
259+
"""Pipeline.decision_function(X)
260+
261+
Applies transforms to the data, and the `decision_function` method
262+
of the final estimator. Valid only if the final estimator implements
263+
decision_function.
264+
265+
Parameters
266+
----------
267+
X : array-like, shape = [n_samples, n_features]
268+
Data samples, where n_samples in the number of samples and
269+
n_features is the number of features.
270+
"""
271+
return partial(self._run_pipeline,
272+
self._final_estimator.decision_function)
273+
274+
@property
275+
def transform(self):
276+
"""Pipeline.transform(X)
277+
278+
Applies transforms to the data, and the `transform` method of the
182279
final estimator. Valid only if the final estimator implements
183-
transform."""
184-
Xt = X
185-
for name, transform in self.steps:
186-
Xt = transform.transform(Xt)
187-
return Xt
280+
transform.
188281
189-
def inverse_transform(self, X):
190-
if X.ndim == 1:
191-
X = X[None, :]
192-
Xt = X
193-
for name, step in self.steps[::-1]:
194-
Xt = step.inverse_transform(Xt)
195-
return Xt
282+
Parameters
283+
----------
284+
X : array-like, shape = [n_samples, n_features]
285+
Data samples, where n_samples in the number of samples and
286+
n_features is the number of features.
287+
"""
288+
return partial(self._run_pipeline, self._final_estimator.transform)
289+
290+
@property
291+
def inverse_transform(self):
292+
"""Pipeline.inverse_transform(X)
196293
197-
def score(self, X, y=None):
198-
"""Applies transforms to the data, and the score method of the
294+
Applies inverse transforms to the data from the last step to the
295+
first.
296+
297+
Parameters
298+
----------
299+
X : array-like, shape = [n_samples, n_features]
300+
Data samples, where n_samples in the number of samples and
301+
n_features is the number of features.
302+
"""
303+
inverse_transforms = [step.inverse_transform
304+
for name, step in self.steps[::-1] if step is not None]
305+
306+
def fn(X):
307+
if X.ndim == 1:
308+
X = X[None, :]
309+
Xt = X
310+
for inv_transform in inverse_transforms:
311+
Xt = inv_transform(Xt)
312+
return Xt
313+
return fn
314+
315+
@property
316+
def score(self):
317+
"""Pipeline.score(X, y=None)
318+
319+
Applies transforms to the data, and the `score` method of the
199320
final estimator. Valid only if the final estimator implements
200-
score."""
201-
Xt = X
202-
for name, transform in self.steps[:-1]:
203-
Xt = transform.transform(Xt)
204-
return self.steps[-1][-1].score(Xt, y)
321+
score.
322+
323+
Parameters
324+
----------
325+
X : array-like, shape = [n_samples, n_features]
326+
Data samples, where n_samples in the number of samples and
327+
n_features is the number of features.
328+
y : array-like, shape = [n_samples], optional
329+
Target vector relative to X;
330+
None for unsupervised learning.
331+
"""
332+
return partial(self._run_pipeline, self._final_estimator.score)
205333

206334
@property
207335
def _pairwise(self):

sklearn/tests/test_pipeline.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,28 @@ def test_make_pipeline():
289289
assert_equal(pipe.steps[2][0], "fitparamt")
290290

291291

292+
def test_pipeline_attributes():
293+
"""Ensure that the Pipeline only provides post-fit methods that are present
294+
on the last step"""
295+
296+
def make(method):
297+
"""Make a pipeline whose estimator has specified method"""
298+
transf = TransfT()
299+
setattr(transf, method, lambda *args, **kwargs: True)
300+
return Pipeline([('est', transf)]).fit([[1]], [1])
301+
302+
attribs = ['predict_proba', 'predict_log_proba', 'predict',
303+
'decision_function', 'score', 'inverse_transform']
304+
305+
for attrib in attribs:
306+
pipeline = make(attrib)
307+
getattr(pipeline, attrib)(np.asarray([[1]]))
308+
for attrib2 in attribs:
309+
if attrib2 != attrib:
310+
assert_false(hasattr(pipeline, attrib2))
311+
>>>>>>> FIX make Pipeline methods properties as per #1805
312+
313+
292314
def test_feature_union_weights():
293315
# test feature union with transformer weights
294316
iris = load_iris()

0 commit comments

Comments
 (0)
0