diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 8817e9a789cb9..dbefe12b7b369 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -15,7 +15,12 @@ enhancements to features released in 0.20.0. Changelog --------- +:mod:`sklearn.pipeline` +....................... +- |Fix| Fixed a regression in :class:`pipeline.Pipeline` where the ``steps`` + parameter may not have been updated correctly when a step is set to ``None`` + or ``'passthrough'``. :user:`Thomas Fan `. .. _changes_0_20_1: diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index ef4470d91b2ae..a7e03c31294bb 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -184,9 +184,9 @@ def _iter(self, with_final=True): if not with_final: stop -= 1 - for name, trans in islice(self.steps, 0, stop): + for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)): if trans is not None and trans != 'passthrough': - yield name, trans + yield idx, name, trans @property def _estimator_type(self): @@ -219,8 +219,7 @@ def _fit(self, X, y=None, **fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X - for step_idx, (name, transformer) in enumerate( - self._iter(with_final=False)): + for step_idx, name, transformer in self._iter(with_final=False): if hasattr(memory, 'location'): # joblib >= 0.12 if memory.location is None: @@ -341,7 +340,7 @@ def predict(self, X, **predict_params): y_pred : array-like """ Xt = X - for name, transform in self._iter(with_final=False): + for _, name, transform in self._iter(with_final=False): Xt = transform.transform(Xt) return self.steps[-1][-1].predict(Xt, **predict_params) @@ -390,7 +389,7 @@ def predict_proba(self, X): y_proba : array-like, shape = [n_samples, n_classes] """ Xt = X - for name, transform in self._iter(with_final=False): + for _, name, transform in self._iter(with_final=False): Xt = transform.transform(Xt) return self.steps[-1][-1].predict_proba(Xt) @@ -409,7 +408,7 @@ def decision_function(self, X): y_score : array-like, shape = [n_samples, n_classes] """ Xt = X - for name, transform in self._iter(with_final=False): + for _, name, transform in self._iter(with_final=False): Xt = transform.transform(Xt) return self.steps[-1][-1].decision_function(Xt) @@ -428,7 +427,7 @@ def predict_log_proba(self, X): y_score : array-like, shape = [n_samples, n_classes] """ Xt = X - for name, transform in self._iter(with_final=False): + for _, name, transform in self._iter(with_final=False): Xt = transform.transform(Xt) return self.steps[-1][-1].predict_log_proba(Xt) @@ -457,7 +456,7 @@ def transform(self): def _transform(self, X): Xt = X - for _, transform in self._iter(): + for _, _, transform in self._iter(): Xt = transform.transform(Xt) return Xt @@ -481,14 +480,14 @@ def inverse_transform(self): """ # raise AttributeError if necessary for hasattr behaviour # XXX: Handling the None case means we can't use if_delegate_has_method - for _, transform in self._iter(): + for _, _, transform in self._iter(): transform.inverse_transform return self._inverse_transform def _inverse_transform(self, X): Xt = X reverse_iter = reversed(list(self._iter())) - for _, transform in reverse_iter: + for _, _, transform in reverse_iter: Xt = transform.inverse_transform(Xt) return Xt @@ -515,7 +514,7 @@ def score(self, X, y=None, sample_weight=None): score : float """ Xt = X - for name, transform in self._iter(with_final=False): + for _, name, transform in self._iter(with_final=False): Xt = transform.transform(Xt) score_params = {} if sample_weight is not None: diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 17793e35998a4..ceec724eec10a 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -574,6 +574,27 @@ def test_pipeline_named_steps(): assert pipeline.named_steps.mult is mult2 +@pytest.mark.parametrize('passthrough', [None, 'passthrough']) +def test_pipeline_correctly_adjusts_steps(passthrough): + X = np.array([[1]]) + y = np.array([1]) + mult2 = Mult(mult=2) + mult3 = Mult(mult=3) + mult5 = Mult(mult=5) + + pipeline = Pipeline([ + ('m2', mult2), + ('bad', passthrough), + ('m3', mult3), + ('m5', mult5) + ]) + + pipeline.fit(X, y) + expected_names = ['m2', 'bad', 'm3', 'm5'] + actual_names = [name for name, _ in pipeline.steps] + assert expected_names == actual_names + + @pytest.mark.parametrize('passthrough', [None, 'passthrough']) def test_set_pipeline_step_passthrough(passthrough): X = np.array([[1]])