From ab2acbd29bcb88ef00b039dd75e5a45d1e59c17b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 10:52:05 -0500
Subject: [PATCH 01/54] work on get_feature_names for pipeline

---
 sklearn/base.py                        | 15 +++++++++++++++
 sklearn/compose/_column_transformer.py |  8 ++++++--
 sklearn/impute.py                      |  4 ++--
 sklearn/pipeline.py                    | 15 +++++++++++++++
 sklearn/preprocessing/data.py          | 18 +++++++++---------
 5 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/sklearn/base.py b/sklearn/base.py
index 34998270cea88..b474f774bcf1c 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -509,6 +509,21 @@ def fit_predict(self, X, y=None):
         return self.fit(X).predict(X)
 
 
+class OneToOneMixin(object):
+    """Provides get_feature_names for simple transformers
+
+    Assumes there's a 1-to-1 correspondence between input features
+    and output features.
+    """
+
+    def get_feature_names(self, input_features=None):
+        if input_features is not None:
+            return input_features
+        else:
+            raise ValueError("Don't know how to get"
+                             " input feature names for {}".format(self))
+
+
 ###############################################################################
 class MetaEstimatorMixin(object):
     """Mixin class for all meta estimators in scikit-learn."""
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 990374c27affe..540b49fc0852b 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -325,7 +325,7 @@ def get_feature_names(self):
         """
         check_is_fitted(self, 'transformers_')
         feature_names = []
-        for name, trans, _, _ in self._iter(fitted=True):
+        for name, trans, columns, _ in self._iter(fitted=True):
             if trans == 'drop':
                 continue
             elif trans == 'passthrough':
@@ -336,8 +336,12 @@ def get_feature_names(self):
                 raise AttributeError("Transformer %s (type %s) does not "
                                      "provide get_feature_names."
                                      % (str(name), type(trans).__name__))
+            try:
+                more_names = trans.get_feature_names(input_features=columns)
+            except TypeError:
+                more_names = trans.get_feature_names()
             feature_names.extend([name + "__" + f for f in
-                                  trans.get_feature_names()])
+                                 more_names])
         return feature_names
 
     def _update_fitted_transformers(self, transformers):
diff --git a/sklearn/impute.py b/sklearn/impute.py
index a10f6c9eb947f..e55a7a7e19e57 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -11,7 +11,7 @@
 from scipy import sparse
 from scipy import stats
 
-from .base import BaseEstimator, TransformerMixin
+from .base import BaseEstimator, TransformerMixin, OneToOneMixin
 from .utils import check_array
 from .utils.sparsefuncs import _get_median
 from .utils.validation import check_is_fitted
@@ -90,7 +90,7 @@ def _most_frequent(array, extra_value, n_repeat):
             return extra_value
 
 
-class SimpleImputer(BaseEstimator, TransformerMixin):
+class SimpleImputer(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Imputation transformer for completing missing values.
 
     Read more in the :ref:`User Guide <impute>`.
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 3f69f5c18558f..0d370b2283a64 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -531,6 +531,21 @@ def _pairwise(self):
         # check if first estimator expects pairwise input
         return getattr(self.steps[0][1], '_pairwise', False)
 
+    def get_feature_names(self, input_features=None):
+        feature_names = input_features
+        with_final = hasattr(self._final_estimator, "get_feature_names")
+        
+        for name, transform in self._iter(with_final=with_final):
+            if not hasattr(transform, "get_feature_names"):
+                raise TypeError("Transformer {} does provide"
+                                " get_feature_names".format(name))
+            try:
+                feature_names = transform.get_feature_names(
+                    input_features=feature_names)
+            except TypeError:
+                    feature_names = transform.get_feature_names()
+        return feature_names
+
 
 def _name_estimators(estimators):
     """Generate names for estimators."""
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 77c2d2cc970fc..d2167b683cdd9 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -19,7 +19,7 @@
 from scipy import stats
 from scipy import optimize
 
-from ..base import BaseEstimator, TransformerMixin
+from ..base import BaseEstimator, TransformerMixin, OneToOneMixin
 from ..externals import six
 from ..utils import check_array
 from ..utils.extmath import row_norms
@@ -199,7 +199,7 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
     return X
 
 
-class MinMaxScaler(BaseEstimator, TransformerMixin):
+class MinMaxScaler(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Transforms features by scaling each feature to a given range.
 
     This estimator scales and translates each feature individually such
@@ -477,7 +477,7 @@ def minmax_scale(X, feature_range=(0, 1), axis=0, copy=True):
     return X
 
 
-class StandardScaler(BaseEstimator, TransformerMixin):
+class StandardScaler(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Standardize features by removing the mean and scaling to unit variance
 
     The standard score of a sample `x` is calculated as:
@@ -798,7 +798,7 @@ def inverse_transform(self, X, copy=None):
         return X
 
 
-class MaxAbsScaler(BaseEstimator, TransformerMixin):
+class MaxAbsScaler(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Scale each feature by its maximum absolute value.
 
     This estimator scales and translates each feature individually such
@@ -1024,7 +1024,7 @@ def maxabs_scale(X, axis=0, copy=True):
     return X
 
 
-class RobustScaler(BaseEstimator, TransformerMixin):
+class RobustScaler(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Scale features using statistics that are robust to outliers.
 
     This Scaler removes the median and scales the data according to
@@ -1619,7 +1619,7 @@ def normalize(X, norm='l2', axis=1, copy=True, return_norm=False):
         return X
 
 
-class Normalizer(BaseEstimator, TransformerMixin):
+class Normalizer(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Normalize samples individually to unit norm.
 
     Each sample (i.e. each row of the data matrix) with at least one
@@ -1754,7 +1754,7 @@ def binarize(X, threshold=0.0, copy=True):
     return X
 
 
-class Binarizer(BaseEstimator, TransformerMixin):
+class Binarizer(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Binarize data (set feature values to 0 or 1) according to a threshold
 
     Values greater than the threshold map to 1, while values less than
@@ -1988,7 +1988,7 @@ def add_dummy_feature(X, value=1.0):
         return np.hstack((np.full((n_samples, 1), value), X))
 
 
-class QuantileTransformer(BaseEstimator, TransformerMixin):
+class QuantileTransformer(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Transform features using quantiles information.
 
     This method transforms the features to follow a uniform or a normal
@@ -2488,7 +2488,7 @@ def quantile_transform(X, axis=0, n_quantiles=1000,
                          " axis={}".format(axis))
 
 
-class PowerTransformer(BaseEstimator, TransformerMixin):
+class PowerTransformer(BaseEstimator, TransformerMixin, OneToOneMixin):
     """Apply a power transform featurewise to make data more Gaussian-like.
 
     Power transforms are a family of parametric, monotonic transformations

From 3bc674b5cd24b203d04a35ca3ae552fcd8e094b9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 13:27:20 -0500
Subject: [PATCH 02/54] fix SimpleImputer get_feature_names

---
 sklearn/impute.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/sklearn/impute.py b/sklearn/impute.py
index e55a7a7e19e57..d16e7479dd3a4 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -11,7 +11,7 @@
 from scipy import sparse
 from scipy import stats
 
-from .base import BaseEstimator, TransformerMixin, OneToOneMixin
+from .base import BaseEstimator, TransformerMixin
 from .utils import check_array
 from .utils.sparsefuncs import _get_median
 from .utils.validation import check_is_fitted
@@ -90,7 +90,7 @@ def _most_frequent(array, extra_value, n_repeat):
             return extra_value
 
 
-class SimpleImputer(BaseEstimator, TransformerMixin, OneToOneMixin):
+class SimpleImputer(BaseEstimator, TransformerMixin):
     """Imputation transformer for completing missing values.
 
     Read more in the :ref:`User Guide <impute>`.
@@ -257,7 +257,8 @@ def fit(self, X, y=None):
                                                self.strategy,
                                                self.missing_values,
                                                fill_value)
-
+        invalid_mask = _get_mask(self.statistics_, np.nan)
+        self._valid_mask = np.logical_not(invalid_mask)
         return self
 
     def _sparse_fit(self, X, strategy, missing_values, fill_value):
@@ -373,8 +374,8 @@ def transform(self, X):
             valid_statistics = statistics
         else:
             # same as np.isnan but also works for object dtypes
-            invalid_mask = _get_mask(statistics, np.nan)
-            valid_mask = np.logical_not(invalid_mask)
+            valid_mask = self._valid_mask
+            invalid_mask = np.logical_not(valid_mask)
             valid_statistics = statistics[valid_mask]
             valid_statistics_indexes = np.flatnonzero(valid_mask)
 
@@ -408,6 +409,11 @@ def transform(self, X):
 
         return X
 
+    def get_feature_names(self, input_features=None):
+        if input_features is None:
+            raise TypeError("Don't have input_features")
+        return np.array(input_features)[self._valid_mask]
+
 
 class MissingIndicator(BaseEstimator, TransformerMixin):
     """Binary indicators for missing values.

From 1c4a78f976f4498920c0c5de3530d9212f728796 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 15:01:49 -0500
Subject: [PATCH 03/54] use hasattr(transform) to check whether to use final
 estimator in get_feature_names

---
 sklearn/pipeline.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 0d370b2283a64..6388d4eea1844 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -533,8 +533,7 @@ def _pairwise(self):
 
     def get_feature_names(self, input_features=None):
         feature_names = input_features
-        with_final = hasattr(self._final_estimator, "get_feature_names")
-        
+        with_final = hasattr(self._final_estimator, "transform")
         for name, transform in self._iter(with_final=with_final):
             if not hasattr(transform, "get_feature_names"):
                 raise TypeError("Transformer {} does provide"

From 788193061f5c233cdf707c5c281cca40be5f47a7 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 20 Nov 2018 15:39:56 -0500
Subject: [PATCH 04/54] add some docstrings

---
 sklearn/base.py     | 15 +++++++++++++++
 sklearn/impute.py   | 12 ++++++++++++
 sklearn/pipeline.py | 16 ++++++++++++++++
 3 files changed, 43 insertions(+)

diff --git a/sklearn/base.py b/sklearn/base.py
index b474f774bcf1c..1629fac63503e 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -517,6 +517,21 @@ class OneToOneMixin(object):
     """
 
     def get_feature_names(self, input_features=None):
+        """Get feature names for transformation.
+
+        Returns input_features as this transformation
+        doesn't add or drop features.
+
+        Parameters
+        ----------
+        input_feature : array-like of string
+            Input feature names.
+
+        Returns
+        -------
+        feature_names : array-like of string
+            Transformed feature names
+        """
         if input_features is not None:
             return input_features
         else:
diff --git a/sklearn/impute.py b/sklearn/impute.py
index d16e7479dd3a4..5b23ab8f866a8 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -410,6 +410,18 @@ def transform(self, X):
         return X
 
     def get_feature_names(self, input_features=None):
+        """Get feature names for transformation.
+
+        Parameters
+        ----------
+        input_feature : array-like of string
+            Input feature names.
+
+        Returns
+        -------
+        feature_names : array-like of string
+            Transformed feature names
+        """
         if input_features is None:
             raise TypeError("Don't have input_features")
         return np.array(input_features)[self._valid_mask]
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 6388d4eea1844..64e37e29b0365 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -532,6 +532,22 @@ def _pairwise(self):
         return getattr(self.steps[0][1], '_pairwise', False)
 
     def get_feature_names(self, input_features=None):
+        """Get feature names for transformation.
+
+        Transform input features using the pipeline.
+        If the last step is a transformer, it's included
+        in the transformation, otherwise it's not.
+
+        Parameters
+        ----------
+        input_feature : array-like of string
+            Input feature names.
+
+        Returns
+        -------
+        feature_names : array-like of string
+            Transformed feature names
+        """
         feature_names = input_features
         with_final = hasattr(self._final_estimator, "transform")
         for name, transform in self._iter(with_final=with_final):

From de63353cd8e48d2dd14194c28ad33bf23053147f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Tue, 27 Nov 2018 15:33:51 -0500
Subject: [PATCH 05/54] fix docstring

---
 sklearn/base.py     | 2 +-
 sklearn/impute.py   | 2 +-
 sklearn/pipeline.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 1629fac63503e..392eb6ed60573 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -524,7 +524,7 @@ def get_feature_names(self, input_features=None):
 
         Parameters
         ----------
-        input_feature : array-like of string
+        input_features : array-like of string
             Input feature names.
 
         Returns
diff --git a/sklearn/impute.py b/sklearn/impute.py
index 5b23ab8f866a8..c2368731e148e 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -414,7 +414,7 @@ def get_feature_names(self, input_features=None):
 
         Parameters
         ----------
-        input_feature : array-like of string
+        input_features : array-like of string
             Input feature names.
 
         Returns
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 64e37e29b0365..3fb3119941491 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -540,7 +540,7 @@ def get_feature_names(self, input_features=None):
 
         Parameters
         ----------
-        input_feature : array-like of string
+        input_features : array-like of string
             Input feature names.
 
         Returns

From 6ca8b0360c02d71f899cec12380ffec0b1fcb983 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 13:49:59 +0100
Subject: [PATCH 06/54] add set_feature_names to pipeline, remove hack in
 pipeline.get_feature_names

---
 sklearn/pipeline.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index eba2d42abe7fd..b2bac34ae08ae 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -246,6 +246,10 @@ def _fit(self, X, y=None, **fit_params):
             # transformer. This is necessary when loading the transformer
             # from the cache.
             self.steps[step_idx] = (name, fitted_transformer)
+
+        if hasattr(X, 'columns'):
+            self.set_feature_names(X.columns)
+
         if self._final_estimator == 'passthrough':
             return Xt, {}
         return Xt, fit_params_steps[self.steps[-1][0]]
@@ -529,6 +533,20 @@ def classes_(self):
     def _pairwise(self):
         # check if first estimator expects pairwise input
         return getattr(self.steps[0][1], '_pairwise', False)
+    
+    def set_feature_names(self, input_features):
+        self.input_features_ = input_features
+        feature_names = input_features
+        for name, transform in self._iter(with_final=True):
+            transform.input_features_ = feature_names
+            if not hasattr(transform, "get_feature_names"):
+                raise TypeError("Transformer {} does provide"
+                                " get_feature_names".format(name))
+            try:
+                feature_names = transform.get_feature_names(
+                    input_features=feature_names)
+            except TypeError:
+                feature_names = transform.get_feature_names()
 
     def get_feature_names(self, input_features=None):
         """Get feature names for transformation.
@@ -548,8 +566,7 @@ def get_feature_names(self, input_features=None):
             Transformed feature names
         """
         feature_names = input_features
-        with_final = hasattr(self._final_estimator, "transform")
-        for name, transform in self._iter(with_final=with_final):
+        for name, transform in self._iter(with_final=True):
             if not hasattr(transform, "get_feature_names"):
                 raise TypeError("Transformer {} does provide"
                                 " get_feature_names".format(name))

From ddd03413c010415b9806d519cf9ac3f730e41827 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 14:08:15 +0100
Subject: [PATCH 07/54] fix to use new _iter, deal with last transformer

---
 sklearn/pipeline.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index b2bac34ae08ae..1b2dd98d2180c 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -247,9 +247,6 @@ def _fit(self, X, y=None, **fit_params):
             # from the cache.
             self.steps[step_idx] = (name, fitted_transformer)
 
-        if hasattr(X, 'columns'):
-            self.set_feature_names(X.columns)
-
         if self._final_estimator == 'passthrough':
             return Xt, {}
         return Xt, fit_params_steps[self.steps[-1][0]]
@@ -283,6 +280,10 @@ def fit(self, X, y=None, **fit_params):
         Xt, fit_params = self._fit(X, y, **fit_params)
         if self._final_estimator != 'passthrough':
             self._final_estimator.fit(Xt, y, **fit_params)
+
+        if hasattr(X, 'columns'):
+            self.set_feature_names(X.columns)
+
         return self
 
     def fit_transform(self, X, y=None, **fit_params):
@@ -315,11 +316,14 @@ def fit_transform(self, X, y=None, **fit_params):
         last_step = self._final_estimator
         Xt, fit_params = self._fit(X, y, **fit_params)
         if hasattr(last_step, 'fit_transform'):
-            return last_step.fit_transform(Xt, y, **fit_params)
-        elif last_step == 'passthrough':
-            return Xt
-        else:
-            return last_step.fit(Xt, y, **fit_params).transform(Xt)
+            Xt = last_step.fit_transform(Xt, y, **fit_params)
+        elif last_step != 'passthrough':
+            Xt = last_step.fit(Xt, y, **fit_params).transform(Xt)
+
+        if hasattr(X, 'columns'):
+            self.set_feature_names(X.columns)
+
+        return Xt
 
     @if_delegate_has_method(delegate='_final_estimator')
     def predict(self, X, **predict_params):
@@ -533,11 +537,11 @@ def classes_(self):
     def _pairwise(self):
         # check if first estimator expects pairwise input
         return getattr(self.steps[0][1], '_pairwise', False)
-    
+
     def set_feature_names(self, input_features):
         self.input_features_ = input_features
         feature_names = input_features
-        for name, transform in self._iter(with_final=True):
+        for _, name, transform in self._iter(with_final=False):
             transform.input_features_ = feature_names
             if not hasattr(transform, "get_feature_names"):
                 raise TypeError("Transformer {} does provide"
@@ -547,6 +551,7 @@ def set_feature_names(self, input_features):
                     input_features=feature_names)
             except TypeError:
                 feature_names = transform.get_feature_names()
+        self._final_estimator.input_features_ = feature_names
 
     def get_feature_names(self, input_features=None):
         """Get feature names for transformation.
@@ -566,7 +571,7 @@ def get_feature_names(self, input_features=None):
             Transformed feature names
         """
         feature_names = input_features
-        for name, transform in self._iter(with_final=True):
+        for _, name, transform in self._iter(with_final=True):
             if not hasattr(transform, "get_feature_names"):
                 raise TypeError("Transformer {} does provide"
                                 " get_feature_names".format(name))

From ba053acdb619ea11c1c4831eeb25746ac715fa74 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:12:36 +0100
Subject: [PATCH 08/54] always call generation of feature names, generate if X
 has none.

---
 sklearn/impute.py   |  3 ++-
 sklearn/pipeline.py | 32 +++++++++++++++++++++++++++-----
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/sklearn/impute.py b/sklearn/impute.py
index 95dfe046537a3..a201db198c384 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -437,7 +437,8 @@ def get_feature_names(self, input_features=None):
             Transformed feature names
         """
         if input_features is None:
-            raise TypeError("Don't have input_features")
+            input_features = ['x%d' % i
+                              for i in range(self.statistics_.shape[0])]
         return np.array(input_features)[self._valid_mask]
 
     def _more_tags(self):
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 1b2dd98d2180c..847760c64dc61 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -26,6 +26,17 @@
 __all__ = ['Pipeline', 'FeatureUnion', 'make_pipeline', 'make_union']
 
 
+def _get_feature_names(X):
+    if hasattr(X, 'columns'):
+        feature_names = X.columns
+    elif getattr(X, 'ndim', 0) > 1:
+        feature_names = getattr(X, 'columns',
+                                ['x%d' % i for i in range(X.shape[1])])
+    else:
+        feature_names = None
+    return feature_names
+
+
 class Pipeline(_BaseComposition):
     """Pipeline of transforms with a final estimator.
 
@@ -280,9 +291,7 @@ def fit(self, X, y=None, **fit_params):
         Xt, fit_params = self._fit(X, y, **fit_params)
         if self._final_estimator != 'passthrough':
             self._final_estimator.fit(Xt, y, **fit_params)
-
-        if hasattr(X, 'columns'):
-            self.set_feature_names(X.columns)
+        self.set_feature_names(_get_feature_names(X))
 
         return self
 
@@ -320,8 +329,7 @@ def fit_transform(self, X, y=None, **fit_params):
         elif last_step != 'passthrough':
             Xt = last_step.fit(Xt, y, **fit_params).transform(Xt)
 
-        if hasattr(X, 'columns'):
-            self.set_feature_names(X.columns)
+        self.set_feature_names(_get_feature_names(X))
 
         return Xt
 
@@ -539,6 +547,20 @@ def _pairwise(self):
         return getattr(self.steps[0][1], '_pairwise', False)
 
     def set_feature_names(self, input_features):
+        """Set the input feature names for all steps.
+        
+        Sets the input_features_ attribute on the pipeline and
+        on all pipeline steps using the provided input feature names
+        as input for the first step.
+        
+        Some estimators like `ColumnTransformer` and `CountVectorizer`
+        might ignore the provided input feature names.
+        
+        Parameters
+        ----------
+        
+        
+        """
         self.input_features_ = input_features
         feature_names = input_features
         for _, name, transform in self._iter(with_final=False):

From 5da22070f98fe614edee20fec2c5bafacb0c0ac4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:28:29 +0100
Subject: [PATCH 09/54] add get_feature_names to feature selection estimators

---
 sklearn/feature_selection/base.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
index 5add330188f78..3a92a0145a718 100644
--- a/sklearn/feature_selection/base.py
+++ b/sklearn/feature_selection/base.py
@@ -119,3 +119,10 @@ def inverse_transform(self, X):
         Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
         Xt[:, support] = X
         return Xt
+    
+    def get_feature_names(self, input_features=None):
+        mask = self.get_support()
+        if input_features is None:
+            input_features = ['x%d' % i
+                              for i in range(mask.shape[0])]
+        return np.array(input_features)[mask]

From 58d65b1ff8145207c4118e0d2cfe171aec1adf32 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:28:40 +0100
Subject: [PATCH 10/54] add basic test for input features in pipeline

---
 sklearn/tests/test_pipeline.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 259876acd1a42..db7a77a5feaf9 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -30,6 +30,7 @@
 from sklearn.decomposition import PCA, TruncatedSVD
 from sklearn.datasets import load_iris
 from sklearn.preprocessing import StandardScaler
+from sklearn.impute import SimpleImputer
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.utils._joblib import Memory
 from sklearn.utils._joblib import __version__ as joblib_version
@@ -1048,3 +1049,29 @@ def test_make_pipeline_memory():
     assert pipeline.memory is None
 
     shutil.rmtree(cachedir)
+    
+
+def test_input_feature_names_pandas():
+    pass
+
+    
+def test_set_input_features():
+    pipe = Pipeline(steps=[
+        ('imputer', SimpleImputer(strategy='median')),
+        ('scaler', StandardScaler()),
+        ('select', SelectKBest(k=2)),
+        ('clf', LogisticRegression())])
+    iris = load_iris()
+    pipe.fit(iris.data, iris.target)
+    xs = np.array(['x0', 'x1', 'x2', 'x3'])
+    assert_array_equal(pipe.input_features_, xs)
+    mask = pipe.named_steps.select.get_support()
+    assert_array_equal(pipe.named_steps.clf.input_features_, xs[mask])
+    pipe.set_feature_names(iris.feature_names)
+    assert_array_equal(pipe.input_features_, iris.feature_names)
+    assert_array_equal(pipe.named_steps.clf.input_features_,
+                       np.array(iris.feature_names)[mask])
+
+    
+def test_input_features_count_vectorizer():
+    pass

From 8026d8d9650d9529a102e16e928932046602a695 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:30:11 +0100
Subject: [PATCH 11/54] pep8, fixup docstring

---
 sklearn/feature_selection/base.py |  2 +-
 sklearn/pipeline.py               | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
index 3a92a0145a718..e03102989b6a0 100644
--- a/sklearn/feature_selection/base.py
+++ b/sklearn/feature_selection/base.py
@@ -119,7 +119,7 @@ def inverse_transform(self, X):
         Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
         Xt[:, support] = X
         return Xt
-    
+
     def get_feature_names(self, input_features=None):
         mask = self.get_support()
         if input_features is None:
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 847760c64dc61..c77074711bcbd 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -548,18 +548,20 @@ def _pairwise(self):
 
     def set_feature_names(self, input_features):
         """Set the input feature names for all steps.
-        
+
         Sets the input_features_ attribute on the pipeline and
         on all pipeline steps using the provided input feature names
         as input for the first step.
-        
+
         Some estimators like `ColumnTransformer` and `CountVectorizer`
         might ignore the provided input feature names.
-        
+
         Parameters
         ----------
-        
-        
+        input_features : array-like of string or None
+            Feature names to use as input feature names for the first step
+            of the pipeline.
+
         """
         self.input_features_ = input_features
         feature_names = input_features

From 6a61ed9dcd5ff004ce72394d1fa22477c8b57512 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:37:23 +0100
Subject: [PATCH 12/54] add test for count vectorizer

---
 sklearn/tests/test_pipeline.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index db7a77a5feaf9..f54416e939e33 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1072,6 +1072,19 @@ def test_set_input_features():
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
 
-    
-def test_input_features_count_vectorizer():
+
+def test_input_features_passthrough():
     pass
+
+
+def test_input_features_count_vectorizer():
+    pipe = Pipeline(steps=[
+        ('vect', CountVectorizer()),
+        ('clf', LogisticRegression())])
+    y = ["pizza" in x for x in JUNK_FOOD_DOCS]
+    pipe.fit(JUNK_FOOD_DOCS, y)
+    assert_array_equal(pipe.named_steps.clf.input_features_,
+                       ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
+    pipe.set_feature_names(["nonsense_is_ignored"])
+    assert_array_equal(pipe.named_steps.clf.input_features_,
+                       ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
\ No newline at end of file

From e0c0a5400208f62b1e9007064d4a5cfc6344099d Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:39:59 +0100
Subject: [PATCH 13/54] add test for passthrough

---
 sklearn/tests/test_pipeline.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index f54416e939e33..b7861743b7126 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1074,7 +1074,18 @@ def test_set_input_features():
 
 
 def test_input_features_passthrough():
-    pass
+    pipe = Pipeline(steps=[
+        ('imputer', 'passthrough'),
+        ('scaler', StandardScaler()),
+        ('select', 'passthrough'),
+        ('clf', LogisticRegression())])
+    iris = load_iris()
+    pipe.fit(iris.data, iris.target)
+    xs = ['x0', 'x1', 'x2', 'x3']
+    assert_array_equal(pipe.named_steps.clf.input_features_, xs)
+    pipe.set_feature_names(iris.feature_names)
+    assert_array_equal(pipe.named_steps.clf.input_features_,
+                       iris.feature_names)
 
 
 def test_input_features_count_vectorizer():

From 968163b3657a900571e6a46794efd7c09d86f3a1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 15:43:52 +0100
Subject: [PATCH 14/54] add tests for pandas feature names

---
 sklearn/tests/test_pipeline.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index b7861743b7126..86df228f85abd 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1049,12 +1049,8 @@ def test_make_pipeline_memory():
     assert pipeline.memory is None
 
     shutil.rmtree(cachedir)
-    
 
-def test_input_feature_names_pandas():
-    pass
 
-    
 def test_set_input_features():
     pipe = Pipeline(steps=[
         ('imputer', SimpleImputer(strategy='median')),
@@ -1073,6 +1069,20 @@ def test_set_input_features():
                        np.array(iris.feature_names)[mask])
 
 
+def test_input_feature_names_pandas():
+    pd = pytest.importorskip("pandas")
+    pipe = Pipeline(steps=[
+        ('imputer', SimpleImputer(strategy='median')),
+        ('scaler', StandardScaler()),
+        ('select', SelectKBest(k=2)),
+        ('clf', LogisticRegression())])
+    iris = load_iris()
+    df = pd.DataFrame(iris.data, names=iris.feature_names)
+    pipe.fit(df, iris.target)
+    assert_array_equal(pipe.named_steps.clf.input_features_,
+                       iris.feature_names)
+
+
 def test_input_features_passthrough():
     pipe = Pipeline(steps=[
         ('imputer', 'passthrough'),
@@ -1098,4 +1108,4 @@ def test_input_features_count_vectorizer():
                        ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
     pipe.set_feature_names(["nonsense_is_ignored"])
     assert_array_equal(pipe.named_steps.clf.input_features_,
-                       ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
\ No newline at end of file
+                       ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])

From 3fd5f6dd7ab0a7da60baf42f32aa6d650bfdfbfe Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 16:02:27 +0100
Subject: [PATCH 15/54] add feature plot with feature names to pipeline anova
 example

---
 .../feature_selection/plot_feature_selection_pipeline.py  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py
index c4b61990ef6e5..5b470af376535 100644
--- a/examples/feature_selection/plot_feature_selection_pipeline.py
+++ b/examples/feature_selection/plot_feature_selection_pipeline.py
@@ -6,6 +6,7 @@
 Simple usage of Pipeline that runs successively a univariate
 feature selection with anova and then a C-SVM of the selected features.
 """
+import matplotlib.pyplot as plt
 from sklearn import svm
 from sklearn.datasets import samples_generator
 from sklearn.feature_selection import SelectKBest, f_regression
@@ -17,7 +18,7 @@
 
 # import some data to play with
 X, y = samples_generator.make_classification(
-    n_features=20, n_informative=3, n_redundant=0, n_classes=4,
+    n_features=20, n_informative=3, n_redundant=0, n_classes=2,
     n_clusters_per_class=2)
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
@@ -32,3 +33,8 @@
 anova_svm.fit(X_train, y_train)
 y_pred = anova_svm.predict(X_test)
 print(classification_report(y_test, y_pred))
+
+# access and plot the coefficients of the fitted model
+plt.bar((0, 1, 2), anova_svm.named_steps.svc.coef_.ravel())
+plt.xticks((0, 1, 2), anova_svm.named_steps.svc.input_features_)
+plt.show()

From d7c66e1dad99619a96001008a13f7d37305ddaaf Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 27 Feb 2019 16:03:49 +0100
Subject: [PATCH 16/54] Improve the titanic column transformer example

---
 .../plot_column_transformer_mixed_types.py    | 46 +++++++++++++++++--
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 45898fe27e911..ef000c4a0077e 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -68,16 +68,52 @@
 
 # Append classifier to preprocessing pipeline.
 # Now we have a full prediction pipeline.
-clf = Pipeline(steps=[('preprocessor', preprocessor),
-                      ('classifier', LogisticRegression(solver='lbfgs'))])
+pipeline = Pipeline(steps=[('preprocessor', preprocessor),
+                           ('classifier', LogisticRegression(solver='lbfgs'))])
 
 X = data.drop('survived', axis=1)
 y = data['survived']
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
-clf.fit(X_train, y_train)
-print("model score: %.3f" % clf.score(X_test, y_test))
+pipeline.fit(X_train, y_train)
+print("model score: %.3f" % pipeline.score(X_test, y_test))
+
+
+###############################################################################
+# Introspecting the coefficients values of the classifier
+###############################################################################
+# The coefficients of the final classification step of the pipeline gives an
+# idea how each feature impacts the likelihood of survival assuming that the
+# usual linear model assumptions hold (uncorrelated features, linear
+# separability, homoschedastic and normally distributed errors...) which we do
+# not verify in this example.
+#
+# To get error bars we perform cross-validation and compute the mean and
+# standard deviation for each coefficient accross CV splits. Because we use a
+# standard scaler on the numerical features, the coefficient weights gives us
+# an idea on how much the log odds of surviving are impacted by a change in
+# this dimension contrasted to the mean. Note that the categorical features
+# here are overspecified which makes it slightly harder to interpret because of
+# the information redundancy.
+#
+# We can see that the linear model coefficients are in agreement with the
+# historical reports: people in higher classes and therefore in the upper decks
+# were first to access the lifeboats, and often, priority was given to women
+# and children.
+
+import matplotlib.pyplot as plt
+from sklearn.model_selection import cross_validate
+
+cv_results = cross_validate(pipeline, X_train, y_train, cv=10,
+                            return_estimator=True)
+cv_coefs = np.concatenate([cv_pipeline.named_steps["classifier"].coef_
+                           for cv_pipeline in cv_results["estimator"]])
+fig, ax = plt.subplots()
+ax.barh(pipeline.named_steps["classifier"].input_features_,
+        cv_coefs.mean(axis=0), xerr=cv_coefs.std(axis=0))
+plt.tight_layout()
+plt.show()
 
 
 ###############################################################################
@@ -96,7 +132,7 @@
     'classifier__C': [0.1, 1.0, 10, 100],
 }
 
-grid_search = GridSearchCV(clf, param_grid, cv=10, iid=False)
+grid_search = GridSearchCV(pipeline, param_grid, cv=10, iid=False)
 grid_search.fit(X_train, y_train)
 
 print(("best logistic regression from grid search: %.3f"

From b3308417ce6867811a483876d373316206ad0638 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 17:27:58 +0100
Subject: [PATCH 17/54] don't error when get_feature_names is not available in
 pipeline

---
 sklearn/pipeline.py            | 8 ++++----
 sklearn/tests/test_pipeline.py | 5 +++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index c77074711bcbd..e9562926b349a 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -567,15 +567,15 @@ def set_feature_names(self, input_features):
         feature_names = input_features
         for _, name, transform in self._iter(with_final=False):
             transform.input_features_ = feature_names
-            if not hasattr(transform, "get_feature_names"):
-                raise TypeError("Transformer {} does provide"
-                                " get_feature_names".format(name))
             try:
                 feature_names = transform.get_feature_names(
                     input_features=feature_names)
             except TypeError:
                 feature_names = transform.get_feature_names()
-        self._final_estimator.input_features_ = feature_names
+            except AttributeError:
+                feature_names = None
+        if self._final_estimator != "passthrough":
+            self._final_estimator.input_features_ = feature_names
 
     def get_feature_names(self, input_features=None):
         """Get feature names for transformation.
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 86df228f85abd..c2ffcb2b0e703 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1077,10 +1077,11 @@ def test_input_feature_names_pandas():
         ('select', SelectKBest(k=2)),
         ('clf', LogisticRegression())])
     iris = load_iris()
-    df = pd.DataFrame(iris.data, names=iris.feature_names)
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
     pipe.fit(df, iris.target)
+    mask = pipe.named_steps.select.get_support()
     assert_array_equal(pipe.named_steps.clf.input_features_,
-                       iris.feature_names)
+                       np.array(iris.feature_names)[mask])
 
 
 def test_input_features_passthrough():

From 8da4ebde7aa2c46a48b953c37936524eb38c3787 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 17:28:25 +0100
Subject: [PATCH 18/54] start on user guide for input_features_

---
 doc/modules/compose.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 0145842b88e16..74ea996fd164c 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -115,6 +115,10 @@ ignored by setting them to ``'passthrough'``::
     ...                   clf__C=[0.1, 10, 100])
     >>> grid_search = GridSearchCV(pipe, param_grid=param_grid)
 
+To enable model inspection, `Pipeline` sets a ``input_features_`` attribute on
+all pipeline steps during fitting. This allows the user to understand how
+features are transformed during a pipeline:
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py`

From 372eb7180dc346d5ce829a869c25c2bc6fe6d47c Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 18:00:26 +0100
Subject: [PATCH 19/54] Add example for input_features_ in pipeline userguide

---
 doc/modules/compose.rst | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 74ea996fd164c..af84a1e95b4e6 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -115,9 +115,32 @@ ignored by setting them to ``'passthrough'``::
     ...                   clf__C=[0.1, 10, 100])
     >>> grid_search = GridSearchCV(pipe, param_grid=param_grid)
 
+
 To enable model inspection, `Pipeline` sets a ``input_features_`` attribute on
 all pipeline steps during fitting. This allows the user to understand how
-features are transformed during a pipeline:
+features are transformed during a pipeline::
+
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.feature_selection import SelectKBest
+    >>> iris = load_iris()
+    >>> pipe = Pipeline(steps=[
+    ...    ('select', SelectKBest(k=2)),
+    ...    ('clf', LogisticRegression())])
+    >>> pipe.fit(iris.data, iris.target)
+    ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+    Pipeline(memory=None,
+              steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))])
+    >>> pipe.named_steps.clf.input_features_
+    array(['x2', 'x3'], dtype='<U2')
+
+You can also provide custom feature names for a more human readable format using
+set_feature_names::
+
+    >>> pipe.set_feature_names(iris.feature_names)
+    >>> pipe.named_steps.select.input_features_
+    ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
+    >>> pipe.named_steps.clf.input_features_
+    array(['petal length (cm)', 'petal width (cm)'], dtype='<U17')
 
 .. topic:: Examples:
 
@@ -435,11 +458,11 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
 
   >>> column_trans.get_feature_names()
   ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-  ['city_category__x0_London', 'city_category__x0_Paris', 'city_category__x0_Sallisaw',
-  'title_bow__bow', 'title_bow__feast', 'title_bow__grapes', 'title_bow__his',
-  'title_bow__how', 'title_bow__last', 'title_bow__learned', 'title_bow__moveable',
-  'title_bow__of', 'title_bow__the', 'title_bow__trick', 'title_bow__watson',
-  'title_bow__wrath']
+  ['city_category__city_London', 'city_category__city_Paris', 'city_category__city_Sallisaw',
+   'title_bow__bow', 'title_bow__feast', 'title_bow__grapes', 'title_bow__his',
+   'title_bow__how', 'title_bow__last', 'title_bow__learned', 'title_bow__moveable',
+   'title_bow__of', 'title_bow__the', 'title_bow__trick', 'title_bow__watson',
+   'title_bow__wrath']
 
   >>> column_trans.transform(X).toarray()
   ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS

From 66eb4e6667e54cc261a366409980574160ead3d6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 18:10:00 +0100
Subject: [PATCH 20/54] use self.input_features_ in get_feature_names if
 available.

---
 sklearn/pipeline.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index e9562926b349a..d6644e67e7f79 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -594,6 +594,9 @@ def get_feature_names(self, input_features=None):
         feature_names : array-like of string
             Transformed feature names
         """
+        if input_features is None and hasattr(self, 'input_features_'):
+            input_features = self.input_features_
+
         feature_names = input_features
         for _, name, transform in self._iter(with_final=True):
             if not hasattr(transform, "get_feature_names"):

From 0d8dc704801b1f6c85fab902cf12351e6b72243e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Wed, 27 Feb 2019 18:20:25 +0100
Subject: [PATCH 21/54] ignore logreg deprecations

---
 sklearn/tests/test_pipeline.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index c2ffcb2b0e703..302e87380c80a 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1051,6 +1051,7 @@ def test_make_pipeline_memory():
     shutil.rmtree(cachedir)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_set_input_features():
     pipe = Pipeline(steps=[
         ('imputer', SimpleImputer(strategy='median')),
@@ -1069,6 +1070,7 @@ def test_set_input_features():
                        np.array(iris.feature_names)[mask])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_feature_names_pandas():
     pd = pytest.importorskip("pandas")
     pipe = Pipeline(steps=[
@@ -1084,6 +1086,7 @@ def test_input_feature_names_pandas():
                        np.array(iris.feature_names)[mask])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_features_passthrough():
     pipe = Pipeline(steps=[
         ('imputer', 'passthrough'),
@@ -1099,6 +1102,7 @@ def test_input_features_passthrough():
                        iris.feature_names)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_features_count_vectorizer():
     pipe = Pipeline(steps=[
         ('vect', CountVectorizer()),

From 7550aacbf2ccb4450a400d0809fc163fbb922b22 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 13:49:19 +0100
Subject: [PATCH 22/54] remove set_feature_names, reuse get_feature_names

Add more test. General meta-estimators not working yet.
---
 doc/modules/compose.rst        |  4 ++--
 sklearn/pipeline.py            | 27 +++++----------------
 sklearn/tests/test_pipeline.py | 44 +++++++++++++++++++++++++++++++---
 3 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index af84a1e95b4e6..a7cde459aea1e 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -134,9 +134,9 @@ features are transformed during a pipeline::
     array(['x2', 'x3'], dtype='<U2')
 
 You can also provide custom feature names for a more human readable format using
-set_feature_names::
+get_feature_names::
 
-    >>> pipe.set_feature_names(iris.feature_names)
+    >>> pipe.get_feature_names(iris.feature_names)
     >>> pipe.named_steps.select.input_features_
     ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
     >>> pipe.named_steps.clf.input_features_
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index d6644e67e7f79..30688facb6f36 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -291,7 +291,7 @@ def fit(self, X, y=None, **fit_params):
         Xt, fit_params = self._fit(X, y, **fit_params)
         if self._final_estimator != 'passthrough':
             self._final_estimator.fit(Xt, y, **fit_params)
-        self.set_feature_names(_get_feature_names(X))
+        self.get_feature_names(_get_feature_names(X))
 
         return self
 
@@ -329,7 +329,7 @@ def fit_transform(self, X, y=None, **fit_params):
         elif last_step != 'passthrough':
             Xt = last_step.fit(Xt, y, **fit_params).transform(Xt)
 
-        self.set_feature_names(_get_feature_names(X))
+        self.get_feature_names(_get_feature_names(X))
 
         return Xt
 
@@ -546,7 +546,7 @@ def _pairwise(self):
         # check if first estimator expects pairwise input
         return getattr(self.steps[0][1], '_pairwise', False)
 
-    def set_feature_names(self, input_features):
+    def get_feature_names(self, input_features):
         """Set the input feature names for all steps.
 
         Sets the input_features_ attribute on the pipeline and
@@ -565,7 +565,7 @@ def set_feature_names(self, input_features):
         """
         self.input_features_ = input_features
         feature_names = input_features
-        for _, name, transform in self._iter(with_final=False):
+        for _, name, transform in self._iter(with_final=True):
             transform.input_features_ = feature_names
             try:
                 feature_names = transform.get_feature_names(
@@ -574,10 +574,9 @@ def set_feature_names(self, input_features):
                 feature_names = transform.get_feature_names()
             except AttributeError:
                 feature_names = None
-        if self._final_estimator != "passthrough":
-            self._final_estimator.input_features_ = feature_names
+        return feature_names
 
-    def get_feature_names(self, input_features=None):
+    # def get_feature_names(self, input_features=None):
         """Get feature names for transformation.
 
         Transform input features using the pipeline.
@@ -594,20 +593,6 @@ def get_feature_names(self, input_features=None):
         feature_names : array-like of string
             Transformed feature names
         """
-        if input_features is None and hasattr(self, 'input_features_'):
-            input_features = self.input_features_
-
-        feature_names = input_features
-        for _, name, transform in self._iter(with_final=True):
-            if not hasattr(transform, "get_feature_names"):
-                raise TypeError("Transformer {} does provide"
-                                " get_feature_names".format(name))
-            try:
-                feature_names = transform.get_feature_names(
-                    input_features=feature_names)
-            except TypeError:
-                    feature_names = transform.get_feature_names()
-        return feature_names
 
 
 def _name_estimators(estimators):
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 302e87380c80a..86cc07a9caf89 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -24,6 +24,7 @@
 from sklearn.svm import SVC
 from sklearn.linear_model import LogisticRegression, Lasso
 from sklearn.linear_model import LinearRegression
+from sklearn.multiclass import OneVsRestClassifier
 from sklearn.cluster import KMeans
 from sklearn.feature_selection import SelectKBest, f_classif
 from sklearn.dummy import DummyRegressor
@@ -1064,7 +1065,9 @@ def test_set_input_features():
     assert_array_equal(pipe.input_features_, xs)
     mask = pipe.named_steps.select.get_support()
     assert_array_equal(pipe.named_steps.clf.input_features_, xs[mask])
-    pipe.set_feature_names(iris.feature_names)
+    res = pipe.get_feature_names(iris.feature_names)
+    # LogisticRegression doesn't have get_feature_names
+    assert res is None
     assert_array_equal(pipe.input_features_, iris.feature_names)
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
@@ -1097,7 +1100,7 @@ def test_input_features_passthrough():
     pipe.fit(iris.data, iris.target)
     xs = ['x0', 'x1', 'x2', 'x3']
     assert_array_equal(pipe.named_steps.clf.input_features_, xs)
-    pipe.set_feature_names(iris.feature_names)
+    pipe.get_feature_names(iris.feature_names)
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        iris.feature_names)
 
@@ -1111,6 +1114,41 @@ def test_input_features_count_vectorizer():
     pipe.fit(JUNK_FOOD_DOCS, y)
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
-    pipe.set_feature_names(["nonsense_is_ignored"])
+    pipe.get_feature_names(["nonsense_is_ignored"])
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
+
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+def test_input_features_nested():
+    pipe = Pipeline(steps=[
+        ('inner_pipe', Pipeline(steps=[('select', SelectKBest(k=2)),
+                                       ('clf', LogisticRegression())]))])
+    iris = load_iris()
+    pipe.fit(iris.data, iris.target)
+    xs = np.array(['x0', 'x1', 'x2', 'x3'])
+    assert_array_equal(pipe.input_features_, xs)
+    mask = pipe.named_steps.inner_pipe.named_steps.select.get_support()
+    assert_array_equal(pipe.named_steps.inner_pipe.named_steps.clf.input_features_, xs[mask])
+    pipe.get_feature_names(iris.feature_names)
+    assert_array_equal(pipe.input_features_, iris.feature_names)
+    assert_array_equal(pipe.named_steps.inner_pipe.named_steps.clf.input_features_,
+                       np.array(iris.feature_names)[mask])
+
+                 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+def test_input_features_meta():
+    ovr = OneVsRestClassifier(Pipeline(steps=[('select', SelectKBest(k=2)),
+                                              ('clf', LogisticRegression())]))
+    pipe = Pipeline(steps=[('ovr', ovr)])
+    iris = load_iris()
+    pipe.fit(iris.data, iris.target)
+    xs = np.array(['x0', 'x1', 'x2', 'x3'])
+    assert_array_equal(pipe.input_features_, xs)
+    # check 0ths estimator in OVR only
+    inner_pipe = pipe.named_steps.ovr.estimators_[0]
+    mask = inner_pipe.named_steps.select.get_support()
+    assert_array_equal(inner_pipe.named_steps.clf.input_features_, xs[mask])
+    pipe.get_feature_names(iris.feature_names)
+    assert_array_equal(pipe.input_features_, iris.feature_names)
+    assert_array_equal(inner_pipe.named_steps.clf.input_features_,
+                       np.array(iris.feature_names)[mask])
\ No newline at end of file

From 4287cb843baa7a93284a54ab6985f42438a205ca Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 14:19:07 +0100
Subject: [PATCH 23/54] slightly easier to debug get_feature_names recursion,
 better test

---
 sklearn/pipeline.py            | 16 ++++++++++------
 sklearn/tests/test_pipeline.py |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 30688facb6f36..48a4baa33c307 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -567,12 +567,16 @@ def get_feature_names(self, input_features):
         feature_names = input_features
         for _, name, transform in self._iter(with_final=True):
             transform.input_features_ = feature_names
-            try:
-                feature_names = transform.get_feature_names(
-                    input_features=feature_names)
-            except TypeError:
-                feature_names = transform.get_feature_names()
-            except AttributeError:
+            if hasattr(transform, "get_feature_names"):
+                # doing hassattr instead of a try-except on everything
+                # b/c catching AttributeError makes recursive code
+                # impossible to debug
+                try:
+                    feature_names = transform.get_feature_names(
+                        input_features=feature_names)
+                except TypeError:
+                    feature_names = transform.get_feature_names()
+            else:
                 feature_names = None
         return feature_names
 
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 86cc07a9caf89..b39242fcc77e3 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1150,5 +1150,6 @@ def test_input_features_meta():
     assert_array_equal(inner_pipe.named_steps.clf.input_features_, xs[mask])
     pipe.get_feature_names(iris.feature_names)
     assert_array_equal(pipe.input_features_, iris.feature_names)
+    assert_array_equal(inner_pipe.input_features_, iris.feature_names)
     assert_array_equal(inner_pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
\ No newline at end of file

From eb78eac0d5aceaa59acf8c2b97ecb627e2fe7f04 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 14:21:02 +0100
Subject: [PATCH 24/54] really ugly stuff to make the last 1% usecase work

---
 sklearn/base.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/sklearn/base.py b/sklearn/base.py
index 682bc75c9d5cd..e33c933e1d3f1 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -6,11 +6,15 @@
 import copy
 import warnings
 from collections import defaultdict
+from collections.abc import Iterable
+
 import inspect
 
 import numpy as np
 
 from . import __version__
+from .exceptions import NotFittedError
+
 from sklearn.utils import _IS_32BIT
 
 _DEFAULT_TAGS = {
@@ -573,10 +577,40 @@ def get_feature_names(self, input_features=None):
                              " input feature names for {}".format(self))
 
 
+def _get_sub_estimators(est, fitted_only=True):
+    attrs = [getattr(est, x, None) for x in dir(est) if not x.startswith("_")]
+
+    def _recurse_sub_ests(candidates):
+        sub_ests = []
+        for a in candidates:
+            if hasattr(a, "set_params") and hasattr(a, "fit"):
+                sub_ests.append(a)
+            elif isinstance(a, Iterable) and not isinstance(a, str):
+                sub_ests.extend(_recurse_sub_ests(a))
+        return sub_ests
+    return list(set(_recurse_sub_ests(attrs)))
+
+
 class MetaEstimatorMixin:
     _required_parameters = ["estimator"]
     """Mixin class for all meta estimators in scikit-learn."""
 
+    def get_feature_names(self, input_features=None):
+        sub_ests = _get_sub_estimators(self)
+        for est in sub_ests:
+            if hasattr(est, "get_feature_names"):
+                # doing hassattr instead of a try-except on everything
+                # b/c catching AttributeError makes recursive code
+                # impossible to debug
+                try:
+                    est.get_feature_names(input_features=input_features)
+                except TypeError:
+                    # do we need this?
+                    est.get_feature_names()
+                except NotFittedError:
+                    pass
+        print("done recursing")
+
 
 class MultiOutputMixin(object):
     """Mixin to mark estimators that support multioutput."""

From d373b87e6df01248edc028c013c2372f908a18fa Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 14:24:49 +0100
Subject: [PATCH 25/54] barh instead of bar in example

---
 examples/feature_selection/plot_feature_selection_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py
index 5b470af376535..466501b005b46 100644
--- a/examples/feature_selection/plot_feature_selection_pipeline.py
+++ b/examples/feature_selection/plot_feature_selection_pipeline.py
@@ -35,6 +35,6 @@
 print(classification_report(y_test, y_pred))
 
 # access and plot the coefficients of the fitted model
-plt.bar((0, 1, 2), anova_svm.named_steps.svc.coef_.ravel())
-plt.xticks((0, 1, 2), anova_svm.named_steps.svc.input_features_)
+plt.barh((0, 1, 2), anova_svm.named_steps.svc.coef_.ravel())
+plt.yticks((0, 1, 2), anova_svm.named_steps.svc.input_features_)
 plt.show()

From 4d4e6c6cf3b1b82cdb7dc9542768efebf12bcb9c Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 15:20:25 +0100
Subject: [PATCH 26/54] test "simple" nested meta-estimator

---
 sklearn/base.py                |  2 +-
 sklearn/tests/test_pipeline.py | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 951622a822470..f07a49efd1fa3 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -599,6 +599,7 @@ class MetaEstimatorMixin:
     def get_feature_names(self, input_features=None):
         sub_ests = _get_sub_estimators(self)
         for est in sub_ests:
+            est.input_features_ = input_features
             if hasattr(est, "get_feature_names"):
                 # doing hassattr instead of a try-except on everything
                 # b/c catching AttributeError makes recursive code
@@ -610,7 +611,6 @@ def get_feature_names(self, input_features=None):
                     est.get_feature_names()
                 except NotFittedError:
                     pass
-        print("done recursing")
 
 
 class MultiOutputMixin(object):
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index b39242fcc77e3..9d3bd701c438e 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1136,7 +1136,7 @@ def test_input_features_nested():
 
                  
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
-def test_input_features_meta():
+def test_input_features_meta_pipe():
     ovr = OneVsRestClassifier(Pipeline(steps=[('select', SelectKBest(k=2)),
                                               ('clf', LogisticRegression())]))
     pipe = Pipeline(steps=[('ovr', ovr)])
@@ -1152,4 +1152,22 @@ def test_input_features_meta():
     assert_array_equal(pipe.input_features_, iris.feature_names)
     assert_array_equal(inner_pipe.input_features_, iris.feature_names)
     assert_array_equal(inner_pipe.named_steps.clf.input_features_,
+                       np.array(iris.feature_names)[mask])
+                       
+
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+def test_input_features_meta():
+    ovr = OneVsRestClassifier(LogisticRegression())
+    pipe = Pipeline(steps=[('select', SelectKBest(k=2)), ('ovr', ovr)])
+    iris = load_iris()
+    pipe.fit(iris.data, iris.target)
+    xs = np.array(['x0', 'x1', 'x2', 'x3'])
+    assert_array_equal(pipe.input_features_, xs)
+    # check 0ths estimator in OVR only
+    one_logreg = pipe.named_steps.ovr.estimators_[0]
+    mask = pipe.named_steps.select.get_support()
+    assert_array_equal(one_logreg.input_features_, xs[mask])
+    pipe.get_feature_names(iris.feature_names)
+    assert_array_equal(pipe.input_features_, iris.feature_names)
+    assert_array_equal(one_logreg.input_features_,
                        np.array(iris.feature_names)[mask])
\ No newline at end of file

From 003fcf3b6d89f4bc94d9517459a944c81de4ea2f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 15:25:02 +0100
Subject: [PATCH 27/54] allow None in pipelines get_feature_names, don't
 overwrite

---
 sklearn/pipeline.py            | 9 ++++++---
 sklearn/tests/test_pipeline.py | 7 ++++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 48a4baa33c307..0a9d239604d1f 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -546,7 +546,7 @@ def _pairwise(self):
         # check if first estimator expects pairwise input
         return getattr(self.steps[0][1], '_pairwise', False)
 
-    def get_feature_names(self, input_features):
+    def get_feature_names(self, input_features=None):
         """Set the input feature names for all steps.
 
         Sets the input_features_ attribute on the pipeline and
@@ -563,8 +563,11 @@ def get_feature_names(self, input_features):
             of the pipeline.
 
         """
-        self.input_features_ = input_features
-        feature_names = input_features
+        if input_features is not None:
+            self.input_features_ = input_features
+        if self.input_features_ is None:
+            raise ValueError("No feature names provided and none stored.")
+        feature_names = self.input_features_
         for _, name, transform in self._iter(with_final=True):
             transform.input_features_ = feature_names
             if hasattr(transform, "get_feature_names"):
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 9d3bd701c438e..4691fda7a7c4a 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1071,7 +1071,12 @@ def test_set_input_features():
     assert_array_equal(pipe.input_features_, iris.feature_names)
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
-
+    # check that empty get_feature_names() doesn't overwrite
+    res = pipe.get_feature_names()
+    assert res is None
+    assert_array_equal(pipe.input_features_, iris.feature_names)
+    assert_array_equal(pipe.named_steps.clf.input_features_,
+                       np.array(iris.feature_names)[mask])
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_feature_names_pandas():

From f185af3c49c2c90bc8a3a0f5efcd37283fe7e792 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 15:29:05 +0100
Subject: [PATCH 28/54] nicer error on not fitted pipeline

---
 sklearn/pipeline.py            | 5 +++--
 sklearn/tests/test_pipeline.py | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 0a9d239604d1f..dbff87f5bd5e2 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -16,6 +16,7 @@
 from scipy import sparse
 
 from .base import clone, TransformerMixin
+from .exceptions import NotFittedError
 from .utils._joblib import Parallel, delayed
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch
@@ -565,8 +566,8 @@ def get_feature_names(self, input_features=None):
         """
         if input_features is not None:
             self.input_features_ = input_features
-        if self.input_features_ is None:
-            raise ValueError("No feature names provided and none stored.")
+        if getattr(self, 'input_features_', None) is None:
+            raise NotFittedError("Estimator Pipeline not fitted.")
         feature_names = self.input_features_
         for _, name, transform in self._iter(with_final=True):
             transform.input_features_ = feature_names
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 4691fda7a7c4a..eecf1ea49beae 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -20,6 +20,7 @@
 from sklearn.utils.testing import assert_no_warnings
 
 from sklearn.base import clone, BaseEstimator
+from sklearn.exceptions import NotFittedError
 from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
 from sklearn.svm import SVC
 from sklearn.linear_model import LogisticRegression, Lasso
@@ -1059,6 +1060,7 @@ def test_set_input_features():
         ('scaler', StandardScaler()),
         ('select', SelectKBest(k=2)),
         ('clf', LogisticRegression())])
+    assert_raises(NotFittedError, pipe.get_feature_names)
     iris = load_iris()
     pipe.fit(iris.data, iris.target)
     xs = np.array(['x0', 'x1', 'x2', 'x3'])

From acc4c76490fad08c672760b1718d5d3d37c768cc Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 15:31:37 +0100
Subject: [PATCH 29/54] flake8

---
 sklearn/tests/test_pipeline.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index eecf1ea49beae..7f007b4640a5a 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1080,6 +1080,7 @@ def test_set_input_features():
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
 
+
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_feature_names_pandas():
     pd = pytest.importorskip("pandas")
@@ -1125,6 +1126,7 @@ def test_input_features_count_vectorizer():
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        ['beer', 'burger', 'coke', 'copyright', 'pizza', 'the'])
 
+
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_features_nested():
     pipe = Pipeline(steps=[
@@ -1135,13 +1137,15 @@ def test_input_features_nested():
     xs = np.array(['x0', 'x1', 'x2', 'x3'])
     assert_array_equal(pipe.input_features_, xs)
     mask = pipe.named_steps.inner_pipe.named_steps.select.get_support()
-    assert_array_equal(pipe.named_steps.inner_pipe.named_steps.clf.input_features_, xs[mask])
+    assert_array_equal(
+        pipe.named_steps.inner_pipe.named_steps.clf.input_features_, xs[mask])
     pipe.get_feature_names(iris.feature_names)
     assert_array_equal(pipe.input_features_, iris.feature_names)
-    assert_array_equal(pipe.named_steps.inner_pipe.named_steps.clf.input_features_,
-                       np.array(iris.feature_names)[mask])
+    assert_array_equal(
+        pipe.named_steps.inner_pipe.named_steps.clf.input_features_,
+        np.array(iris.feature_names)[mask])
+
 
-                 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_features_meta_pipe():
     ovr = OneVsRestClassifier(Pipeline(steps=[('select', SelectKBest(k=2)),
@@ -1160,7 +1164,7 @@ def test_input_features_meta_pipe():
     assert_array_equal(inner_pipe.input_features_, iris.feature_names)
     assert_array_equal(inner_pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
-                       
+
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
 def test_input_features_meta():
@@ -1177,4 +1181,4 @@ def test_input_features_meta():
     pipe.get_feature_names(iris.feature_names)
     assert_array_equal(pipe.input_features_, iris.feature_names)
     assert_array_equal(one_logreg.input_features_,
-                       np.array(iris.feature_names)[mask])
\ No newline at end of file
+                       np.array(iris.feature_names)[mask])

From eef87b638b74b7eac83aa01dcabb7824d046c75a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 15:52:44 +0100
Subject: [PATCH 30/54] better error message, allow call to get_feature_names
 with None again whoops

---
 sklearn/impute.py   | 1 +
 sklearn/pipeline.py | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/impute.py b/sklearn/impute.py
index a201db198c384..210ec67741b2a 100644
--- a/sklearn/impute.py
+++ b/sklearn/impute.py
@@ -436,6 +436,7 @@ def get_feature_names(self, input_features=None):
         feature_names : array-like of string
             Transformed feature names
         """
+        check_is_fitted(self, 'statistics_')
         if input_features is None:
             input_features = ['x%d' % i
                               for i in range(self.statistics_.shape[0])]
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index dbff87f5bd5e2..02fc41e2b5c09 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -16,7 +16,6 @@
 from scipy import sparse
 
 from .base import clone, TransformerMixin
-from .exceptions import NotFittedError
 from .utils._joblib import Parallel, delayed
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch
@@ -564,10 +563,9 @@ def get_feature_names(self, input_features=None):
             of the pipeline.
 
         """
-        if input_features is not None:
+        if input_features is not None or not hasattr(self, 'input_features_'):
             self.input_features_ = input_features
-        if getattr(self, 'input_features_', None) is None:
-            raise NotFittedError("Estimator Pipeline not fitted.")
+
         feature_names = self.input_features_
         for _, name, transform in self._iter(with_final=True):
             transform.input_features_ = feature_names

From 4ed56c81a20c4c182fe39ec934b0ff0d2ec93ff2 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 17:10:31 +0100
Subject: [PATCH 31/54] replace too-smart solution with explicit simple
 solution for meta-estimators

---
 sklearn/base.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index f07a49efd1fa3..9754f39b67ab4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -6,7 +6,6 @@
 import copy
 import warnings
 from collections import defaultdict
-from collections.abc import Iterable
 
 import platform
 import inspect
@@ -578,18 +577,14 @@ def get_feature_names(self, input_features=None):
                              " input feature names for {}".format(self))
 
 
-def _get_sub_estimators(est, fitted_only=True):
-    attrs = [getattr(est, x, None) for x in dir(est) if not x.startswith("_")]
-
-    def _recurse_sub_ests(candidates):
-        sub_ests = []
-        for a in candidates:
-            if hasattr(a, "set_params") and hasattr(a, "fit"):
-                sub_ests.append(a)
-            elif isinstance(a, Iterable) and not isinstance(a, str):
-                sub_ests.extend(_recurse_sub_ests(a))
-        return sub_ests
-    return list(set(_recurse_sub_ests(attrs)))
+def _get_sub_estimators(est):
+    # Explicitly declare all fitted subestimators of existing meta-estimators
+    if hasattr(est, "estimator_"):
+        return [est.estimator_]
+    if hasattr(est, "base_estimator_"):
+        return [est.base_estimator_]
+    if hasattr(est, "estimators_"):
+        return est.estimators_
 
 
 class MetaEstimatorMixin:

From 8787e0454b19f2922130b82813e975c50b9febb1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 17:10:48 +0100
Subject: [PATCH 32/54] convert feature names from pandas to numpy array

---
 sklearn/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 02fc41e2b5c09..06c5edcfd58a0 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -28,7 +28,7 @@
 
 def _get_feature_names(X):
     if hasattr(X, 'columns'):
-        feature_names = X.columns
+        feature_names = np.array(X.columns)
     elif getattr(X, 'ndim', 0) > 1:
         feature_names = getattr(X, 'columns',
                                 ['x%d' % i for i in range(X.shape[1])])

From c0575996b180b85f6400a98c9631a36516808bb8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 17:16:23 +0100
Subject: [PATCH 33/54] Fix get_feature_name docstrings

---
 sklearn/base.py                   | 7 +++++++
 sklearn/feature_selection/base.py | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/sklearn/base.py b/sklearn/base.py
index 9754f39b67ab4..32d88d3de80c4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -592,6 +592,13 @@ class MetaEstimatorMixin:
     """Mixin class for all meta estimators in scikit-learn."""
 
     def get_feature_names(self, input_features=None):
+        """Ensure feature names are set on sub-estimators
+
+        Parameters
+        ----------
+        input_features : list of string or None
+            Input features to the meta-estimator.
+        """
         sub_ests = _get_sub_estimators(self)
         for est in sub_ests:
             est.input_features_ = input_features
diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
index e03102989b6a0..b644c2f09673a 100644
--- a/sklearn/feature_selection/base.py
+++ b/sklearn/feature_selection/base.py
@@ -121,6 +121,14 @@ def inverse_transform(self, X):
         return Xt
 
     def get_feature_names(self, input_features=None):
+        """Mask feature names according to selected features.
+
+        Parameters
+        ----------
+        input_features : list of string or None
+            Input features to select from. If none, they are generated as
+            x0, x1, ..., xn.
+        """
         mask = self.get_support()
         if input_features is None:
             input_features = ['x%d' % i

From fca9ac292946b73f2463e304630e068de243adf8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 18:45:14 +0100
Subject: [PATCH 34/54] fix pipeline get_feature_names docstring

---
 sklearn/pipeline.py | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 06c5edcfd58a0..7f5efba3aa2c9 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -547,11 +547,12 @@ def _pairwise(self):
         return getattr(self.steps[0][1], '_pairwise', False)
 
     def get_feature_names(self, input_features=None):
-        """Set the input feature names for all steps.
+        """Get the feature names for all steps.
 
         Sets the input_features_ attribute on the pipeline and
         on all pipeline steps using the provided input feature names
-        as input for the first step.
+        as input for the first step, and returns the output features
+        if the last step is a transformer.
 
         Some estimators like `ColumnTransformer` and `CountVectorizer`
         might ignore the provided input feature names.
@@ -562,6 +563,12 @@ def get_feature_names(self, input_features=None):
             Feature names to use as input feature names for the first step
             of the pipeline.
 
+        Returns
+        -------
+        feature_names : array-like of string or None
+            Output feature names of the last step if it is a transformer,
+            and None otherwise.
+
         """
         if input_features is not None or not hasattr(self, 'input_features_'):
             self.input_features_ = input_features
@@ -582,24 +589,6 @@ def get_feature_names(self, input_features=None):
                 feature_names = None
         return feature_names
 
-    # def get_feature_names(self, input_features=None):
-        """Get feature names for transformation.
-
-        Transform input features using the pipeline.
-        If the last step is a transformer, it's included
-        in the transformation, otherwise it's not.
-
-        Parameters
-        ----------
-        input_features : array-like of string
-            Input feature names.
-
-        Returns
-        -------
-        feature_names : array-like of string
-            Transformed feature names
-        """
-
 
 def _name_estimators(estimators):
     """Generate names for estimators."""

From d660f9284aafa2e54fa7cc6fd1b15b0cb30f3b47 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 19:13:22 +0100
Subject: [PATCH 35/54] minor fix for meta-estimators with array estimators

---
 sklearn/base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/base.py b/sklearn/base.py
index 32d88d3de80c4..b31ab792eac16 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -600,6 +600,9 @@ def get_feature_names(self, input_features=None):
             Input features to the meta-estimator.
         """
         sub_ests = _get_sub_estimators(self)
+        if hasattr(sub_ests, 'shape'):
+            # Gradient boosting has a 2d array of estimators
+            sub_ests = sub_ests.ravel()
         for est in sub_ests:
             est.input_features_ = input_features
             if hasattr(est, "get_feature_names"):

From a74f4c4b1df93bc00949d994079ca6fad37a5e44 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 28 Feb 2019 19:55:58 +0100
Subject: [PATCH 36/54] ignore more deprecation warnings from logistic

---
 sklearn/tests/test_pipeline.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 7f007b4640a5a..59cf32b430588 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1054,6 +1054,7 @@ def test_make_pipeline_memory():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_set_input_features():
     pipe = Pipeline(steps=[
         ('imputer', SimpleImputer(strategy='median')),
@@ -1082,6 +1083,7 @@ def test_set_input_features():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_input_feature_names_pandas():
     pd = pytest.importorskip("pandas")
     pipe = Pipeline(steps=[
@@ -1098,6 +1100,7 @@ def test_input_feature_names_pandas():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_input_features_passthrough():
     pipe = Pipeline(steps=[
         ('imputer', 'passthrough'),
@@ -1114,6 +1117,7 @@ def test_input_features_passthrough():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_input_features_count_vectorizer():
     pipe = Pipeline(steps=[
         ('vect', CountVectorizer()),
@@ -1128,6 +1132,7 @@ def test_input_features_count_vectorizer():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_input_features_nested():
     pipe = Pipeline(steps=[
         ('inner_pipe', Pipeline(steps=[('select', SelectKBest(k=2)),
@@ -1147,6 +1152,7 @@ def test_input_features_nested():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_input_features_meta_pipe():
     ovr = OneVsRestClassifier(Pipeline(steps=[('select', SelectKBest(k=2)),
                                               ('clf', LogisticRegression())]))
@@ -1167,6 +1173,7 @@ def test_input_features_meta_pipe():
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_input_features_meta():
     ovr = OneVsRestClassifier(LogisticRegression())
     pipe = Pipeline(steps=[('select', SelectKBest(k=2)), ('ovr', ovr)])

From eefe54c9aa8515de56ed2827c53c21498bc5a42b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 13:40:20 +0100
Subject: [PATCH 37/54] refinement of _get_sub_estimators, add crazy test

---
 sklearn/base.py            | 20 ++++++-----
 sklearn/tests/test_base.py | 71 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index b31ab792eac16..e316e364d23a9 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -579,12 +579,19 @@ def get_feature_names(self, input_features=None):
 
 def _get_sub_estimators(est):
     # Explicitly declare all fitted subestimators of existing meta-estimators
-    if hasattr(est, "estimator_"):
-        return [est.estimator_]
-    if hasattr(est, "base_estimator_"):
-        return [est.base_estimator_]
+    sub_ests = []
+    # OHE is not really needed
+    sub_names = ['estimator_', 'base_estimator_', 'one_hot_encoder_']
+    for name in sub_names:
+        sub_est = getattr(est, name, None)
+        if sub_est is not None:
+            sub_ests.append(est.estimator_)
     if hasattr(est, "estimators_"):
-        return est.estimators_
+        if hasattr(est.estimators_, 'shape'):
+            sub_ests.extend(est.estimators_.ravel())
+        else:    
+            sub_ests.extend(est.estimators_)
+    return sub_ests
 
 
 class MetaEstimatorMixin:
@@ -600,9 +607,6 @@ def get_feature_names(self, input_features=None):
             Input features to the meta-estimator.
         """
         sub_ests = _get_sub_estimators(self)
-        if hasattr(sub_ests, 'shape'):
-            # Gradient boosting has a 2d array of estimators
-            sub_ests = sub_ests.ravel()
         for est in sub_ests:
             est.input_features_ = input_features
             if hasattr(est, "get_feature_names"):
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index cf1f9739d6384..1df0e0dd74621 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -486,3 +486,74 @@ def test_tag_inheritance():
     diamond_tag_est = DiamondOverwriteTag()
     with pytest.raises(TypeError, match="Inconsistent values for tag"):
         diamond_tag_est._get_tags()
+
+
+def test_sub_estimator_consistency():
+    # check that _get_sub_estimators finds all fitted sub estimators
+    # if this breaks, you probably introduced a sub-estimator that's
+    # non-standard (not estimator_, base_estimator_ or estimators_)
+    from sklearn.utils.testing import all_estimators
+    from sklearn.base import (MetaEstimatorMixin, _get_sub_estimators,
+                              ClassifierMixin, RegressorMixin)
+    
+    from sklearn.model_selection._search import BaseSearchCV
+    from sklearn.feature_selection.base import SelectorMixin
+    from sklearn.datasets import make_blobs
+    from sklearn.linear_model import Ridge, LogisticRegression
+    from sklearn.utils.estimator_checks import \
+        multioutput_estimator_convert_y_2d
+    from collections.abc import Iterable
+    
+    def has_fitted_attr(est):
+        attrs = [(x, getattr(est, x, None))
+                 for x in dir(est) if x.endswith("_") and not x.startswith("__")]
+        return len(attrs)
+
+    def get_sub_estimators_brute(est):
+        # recurse through all attributes to get sub-estimators
+        attrs = [(x, getattr(est, x, None))
+                 for x in dir(est) if not x.startswith("_")]
+
+        def _recurse_sub_ests(candidates):
+            sub_ests = []
+            for a in candidates:
+                if hasattr(a, "set_params") and hasattr(a, "fit"):
+                    sub_ests.append(a)
+                elif isinstance(a, Iterable) and not isinstance(a, str):
+                    sub_ests.extend(_recurse_sub_ests(a))
+            return sub_ests
+        ests = _recurse_sub_ests(attrs)
+        # we don't consider label processors child estimators
+        return set([e for e in ests if has_fitted_attr(e)
+                    and e.__module__ != "sklearn.preprocessing.label"])
+
+    al = all_estimators()
+    mets = [x for x in al if issubclass(x[1], MetaEstimatorMixin)]
+    
+    X, y = make_blobs()
+    others = []
+
+    for name, Est in mets:
+        # instantiate and fit
+        try:
+            est = Est()
+        except TypeError:
+            if issubclass(Est, (ClassifierMixin, SelectorMixin)):
+                est = Est(LogisticRegression(solver='lbfgs', multi_class='auto'))
+            elif issubclass(Est, RegressorMixin):
+                est = Est(Ridge())
+            else:
+                others.append((name, Est))
+        if est._get_tags()['_skip_test']:
+            continue
+
+        y = multioutput_estimator_convert_y_2d(est, y)
+        est.fit(X, y)
+        # test recursive sub estimators are the same as result of
+        #_get_sub_estimators which uses a hard-coded list
+        assert (set(_get_sub_estimators(est)) ==
+                get_sub_estimators_brute(est))
+
+    for name, Est in others:
+        # only things we couldn't instantiate are the search CV
+        assert issubclass(Est, BaseSearchCV)
\ No newline at end of file

From ad48edf9a848cff184a5d6d3aa8ab0153f87a68d Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 13:41:04 +0100
Subject: [PATCH 38/54] typo / make crazy test pass

---
 sklearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index e316e364d23a9..1545bca3d692c 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -585,7 +585,7 @@ def _get_sub_estimators(est):
     for name in sub_names:
         sub_est = getattr(est, name, None)
         if sub_est is not None:
-            sub_ests.append(est.estimator_)
+            sub_ests.append(sub_est)
     if hasattr(est, "estimators_"):
         if hasattr(est.estimators_, 'shape'):
             sub_ests.extend(est.estimators_.ravel())

From 4bbd8cd9ad865563cf1ff9138db697dc4b01fea6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 14:36:57 +0100
Subject: [PATCH 39/54] add get_feature_names to TransformerMixin, overwrite in
 random tree embedding

---
 sklearn/base.py            | 19 +++++++++++++++++++
 sklearn/ensemble/forest.py |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/sklearn/base.py b/sklearn/base.py
index 1545bca3d692c..0bfe219bf2c43 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -502,6 +502,25 @@ def fit_transform(self, X, y=None, **fit_params):
             # fit method of arity 2 (supervised transformation)
             return self.fit(X, y, **fit_params).transform(X)
 
+    def get_feature_names(self, input_features=None):
+        # OneToOneMixin is higher in the class hierarchy
+        # because we put mixins on the wrong side
+        if hasattr(super(), 'get_feature_names'):
+            return super().get_feature_names(input_features)
+        # generate feature names from class name by default
+        if hasattr(self, 'n_components_'):
+            # n_components could be auto or None
+            # this is more likely to be an int
+            n_features = self.n_components_
+        elif hasattr(self, 'n_components') and self.n_components is not None:
+            n_features = self.n_components
+        elif hasattr(self, 'components_'):
+            n_features = self.components_.shape[0]
+        else:
+            return None
+        return ["{}{}".format(str(type(self)).lower(), i)
+                for i in range(n_features)]
+
 
 class DensityMixin:
     """Mixin class for all density estimators in scikit-learn."""
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index aae9dd8c72349..2345366feb89d 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -2025,3 +2025,6 @@ def transform(self, X):
         """
         check_is_fitted(self, 'one_hot_encoder_')
         return self.one_hot_encoder_.transform(self.apply(X))
+
+    def get_feature_names(self, input_features=None):
+        return None
\ No newline at end of file

From eb9aa528c82dc3cfd173bbdde8f24b0deb3eb5a9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 14:53:51 +0100
Subject: [PATCH 40/54] fix docstrings

---
 sklearn/base.py            | 12 ++++++++++++
 sklearn/ensemble/forest.py |  6 ++++++
 2 files changed, 18 insertions(+)

diff --git a/sklearn/base.py b/sklearn/base.py
index 0bfe219bf2c43..8bdfdcca5d3bc 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -503,6 +503,18 @@ def fit_transform(self, X, y=None, **fit_params):
             return self.fit(X, y, **fit_params).transform(X)
 
     def get_feature_names(self, input_features=None):
+        """Get output feature names.
+        
+        Parameters
+        ----------
+        input_features : list of string or None
+            String names of the input features.        
+
+        Returns
+        -------
+        output_feature_names : list of string
+            Feature names for transformer output.
+        """
         # OneToOneMixin is higher in the class hierarchy
         # because we put mixins on the wrong side
         if hasattr(super(), 'get_feature_names'):
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 2345366feb89d..97fccce4913ae 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -2027,4 +2027,10 @@ def transform(self, X):
         return self.one_hot_encoder_.transform(self.apply(X))
 
     def get_feature_names(self, input_features=None):
+        """Feature names - not implemented yet.
+        
+        Parameters
+        ----------
+        input_features : list of strings or None
+        """
         return None
\ No newline at end of file

From fe4a02070dc8e7d526bd0c3a8b0cc787e2d8df5a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 14:59:52 +0100
Subject: [PATCH 41/54] add "init_" and "best_estimator_" to list of sub
 estimators

---
 sklearn/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 8bdfdcca5d3bc..e76a4db10bb1c 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -612,7 +612,8 @@ def _get_sub_estimators(est):
     # Explicitly declare all fitted subestimators of existing meta-estimators
     sub_ests = []
     # OHE is not really needed
-    sub_names = ['estimator_', 'base_estimator_', 'one_hot_encoder_']
+    sub_names = ['estimator_', 'base_estimator_', 'one_hot_encoder_',
+                 'best_estimator_', 'init_']
     for name in sub_names:
         sub_est = getattr(est, name, None)
         if sub_est is not None:

From 750906bdc5282610d78e93621a4fe4f7eb131d16 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 15:03:36 +0100
Subject: [PATCH 42/54] pep8

---
 sklearn/base.py            |  6 +++---
 sklearn/tests/test_base.py | 16 +++++++++-------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index e76a4db10bb1c..eec1a1d9c4218 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -504,11 +504,11 @@ def fit_transform(self, X, y=None, **fit_params):
 
     def get_feature_names(self, input_features=None):
         """Get output feature names.
-        
+
         Parameters
         ----------
         input_features : list of string or None
-            String names of the input features.        
+            String names of the input features.
 
         Returns
         -------
@@ -621,7 +621,7 @@ def _get_sub_estimators(est):
     if hasattr(est, "estimators_"):
         if hasattr(est.estimators_, 'shape'):
             sub_ests.extend(est.estimators_.ravel())
-        else:    
+        else:
             sub_ests.extend(est.estimators_)
     return sub_ests
 
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 1df0e0dd74621..1ef88436113ef 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -495,7 +495,7 @@ def test_sub_estimator_consistency():
     from sklearn.utils.testing import all_estimators
     from sklearn.base import (MetaEstimatorMixin, _get_sub_estimators,
                               ClassifierMixin, RegressorMixin)
-    
+
     from sklearn.model_selection._search import BaseSearchCV
     from sklearn.feature_selection.base import SelectorMixin
     from sklearn.datasets import make_blobs
@@ -503,10 +503,11 @@ def test_sub_estimator_consistency():
     from sklearn.utils.estimator_checks import \
         multioutput_estimator_convert_y_2d
     from collections.abc import Iterable
-    
+
     def has_fitted_attr(est):
         attrs = [(x, getattr(est, x, None))
-                 for x in dir(est) if x.endswith("_") and not x.startswith("__")]
+                 for x in dir(est) if x.endswith("_")
+                 and not x.startswith("__")]
         return len(attrs)
 
     def get_sub_estimators_brute(est):
@@ -529,7 +530,7 @@ def _recurse_sub_ests(candidates):
 
     al = all_estimators()
     mets = [x for x in al if issubclass(x[1], MetaEstimatorMixin)]
-    
+
     X, y = make_blobs()
     others = []
 
@@ -539,7 +540,8 @@ def _recurse_sub_ests(candidates):
             est = Est()
         except TypeError:
             if issubclass(Est, (ClassifierMixin, SelectorMixin)):
-                est = Est(LogisticRegression(solver='lbfgs', multi_class='auto'))
+                est = Est(LogisticRegression(solver='lbfgs',
+                                             multi_class='auto'))
             elif issubclass(Est, RegressorMixin):
                 est = Est(Ridge())
             else:
@@ -550,10 +552,10 @@ def _recurse_sub_ests(candidates):
         y = multioutput_estimator_convert_y_2d(est, y)
         est.fit(X, y)
         # test recursive sub estimators are the same as result of
-        #_get_sub_estimators which uses a hard-coded list
+        # _get_sub_estimators which uses a hard-coded list
         assert (set(_get_sub_estimators(est)) ==
                 get_sub_estimators_brute(est))
 
     for name, Est in others:
         # only things we couldn't instantiate are the search CV
-        assert issubclass(Est, BaseSearchCV)
\ No newline at end of file
+        assert issubclass(Est, BaseSearchCV)

From 0ca6e9d1dc90949e2c04aab0f4d445306aba5f65 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 15:13:40 +0100
Subject: [PATCH 43/54] fix class name formatting, add test for pca feature
 names in pipeline

---
 sklearn/base.py                |  2 +-
 sklearn/tests/test_pipeline.py | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index eec1a1d9c4218..865af6a7cc1a9 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -530,7 +530,7 @@ def get_feature_names(self, input_features=None):
             n_features = self.components_.shape[0]
         else:
             return None
-        return ["{}{}".format(str(type(self)).lower(), i)
+        return ["{}{}".format(type(self).__name__.lower(), i)
                 for i in range(n_features)]
 
 
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 59cf32b430588..65660332a3976 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1080,6 +1080,18 @@ def test_set_input_features():
     assert_array_equal(pipe.input_features_, iris.feature_names)
     assert_array_equal(pipe.named_steps.clf.input_features_,
                        np.array(iris.feature_names)[mask])
+    pipe = Pipeline(steps=[
+        ('scaler', StandardScaler()),
+        ('pca', PCA(n_components=3)),
+        ('select', SelectKBest(k=2)),
+        ('clf', LogisticRegression())])
+    pipe.fit(iris.data, iris.target)
+    assert_array_equal(pipe.named_steps.clf.input_features_, ['pca0', 'pca1'])
+    # setting names doesn't change names after PCA
+    pipe.get_feature_names(iris.feature_names)
+    assert_array_equal(pipe.named_steps.select.input_features_,
+                       ['pca0', 'pca1', 'pca2'])
+
 
 
 @pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22

From 7cd3dd0b4153bb4f5e1b07cdfa8cfff635bd90b8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 15:40:53 +0100
Subject: [PATCH 44/54] ignore warnings from changing init parameters

---
 sklearn/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 1ef88436113ef..80953df843bbb 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -487,7 +487,7 @@ def test_tag_inheritance():
     with pytest.raises(TypeError, match="Inconsistent values for tag"):
         diamond_tag_est._get_tags()
 
-
+@ignore_warnings(category=(FutureWarning, DeprecationWarning))
 def test_sub_estimator_consistency():
     # check that _get_sub_estimators finds all fitted sub estimators
     # if this breaks, you probably introduced a sub-estimator that's

From a85ab5e0af8f41a2387c67f4cac22ca37629e50c Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 15:43:22 +0100
Subject: [PATCH 45/54] common test for feature name length

---
 sklearn/base.py                   | 13 ++++++++++++-
 sklearn/utils/estimator_checks.py |  8 ++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 865af6a7cc1a9..47855558af565 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -520,7 +520,18 @@ def get_feature_names(self, input_features=None):
         if hasattr(super(), 'get_feature_names'):
             return super().get_feature_names(input_features)
         # generate feature names from class name by default
-        if hasattr(self, 'n_components_'):
+        # would be much less guessing if we stored the number
+        # of output features.
+        # Ideally this would be done in each class.
+        if hasattr(self, 'n_clusters'):
+            # this is before n_components_
+            # because n_components_ means something else
+            # in agglomerative clustering
+            n_features = self.n_clusters
+        elif hasattr(self, '_max_components'):
+            # special case for LinearDiscriminantAnalysis
+            n_features = min(self._max_components, self.n_components)
+        elif hasattr(self, 'n_components_'):
             # n_components could be auto or None
             # this is more likely to be an int
             n_features = self.n_components_
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 433fe8fabd6f9..f97ba0e9c97f0 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -991,6 +991,14 @@ def _check_transformer(name, transformer_orig, X, y):
     transformer_clone = clone(transformer)
     X_pred = transformer_clone.fit_transform(X, y=y_)
 
+    input_features = ['feature%d' % i for i in range(n_features)]
+    feature_names = transformer_clone.get_feature_names(input_features)
+    if feature_names is not None:
+        if isinstance(X_pred, tuple):
+            assert len(feature_names) == X_pred[0].shape[1]
+        else:
+            assert len(feature_names) == X_pred.shape[1]
+
     if isinstance(X_pred, tuple):
         for x_pred in X_pred:
             assert_equal(x_pred.shape[0], n_samples)

From 8001cdbabbedc569de0a607e33f8cb93c0889066 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 15:51:30 +0100
Subject: [PATCH 46/54] renamed one hot encoder for more intuitive feature
 names

---
 doc/modules/compose.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index a7cde459aea1e..152ae182e37a1 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -447,7 +447,7 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
   >>> from sklearn.feature_extraction.text import CountVectorizer
   >>> from sklearn.preprocessing import OneHotEncoder
   >>> column_trans = ColumnTransformer(
-  ...     [('city_category', OneHotEncoder(dtype='int'),['city']),
+  ...     [('categories', OneHotEncoder(dtype='int'),['city']),
   ...      ('title_bow', CountVectorizer(), 'title')],
   ...     remainder='drop')
 
@@ -458,11 +458,11 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
 
   >>> column_trans.get_feature_names()
   ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-  ['city_category__city_London', 'city_category__city_Paris', 'city_category__city_Sallisaw',
-   'title_bow__bow', 'title_bow__feast', 'title_bow__grapes', 'title_bow__his',
-   'title_bow__how', 'title_bow__last', 'title_bow__learned', 'title_bow__moveable',
-   'title_bow__of', 'title_bow__the', 'title_bow__trick', 'title_bow__watson',
-   'title_bow__wrath']
+  ['categories__city_London', 'categories__city_Paris',
+   'categories__city_Sallisaw', 'title_bow__bow', 'title_bow__feast',
+   'title_bow__grapes', 'title_bow__his', 'title_bow__how', 'title_bow__last',
+   'title_bow__learned', 'title_bow__moveable', 'title_bow__of', 'title_bow__the',
+   'title_bow__trick', 'title_bow__watson', 'title_bow__wrath']
 
   >>> column_trans.transform(X).toarray()
   ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS

From fa00af0d6f02dda6cab3f4006d8c7e37334e9f01 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 16:59:59 +0100
Subject: [PATCH 47/54] LDA Special case fixes

---
 sklearn/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 47855558af565..f5bf59bb8d58e 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -530,7 +530,8 @@ def get_feature_names(self, input_features=None):
             n_features = self.n_clusters
         elif hasattr(self, '_max_components'):
             # special case for LinearDiscriminantAnalysis
-            n_features = min(self._max_components, self.n_components)
+            n_components = self.n_components or np.inf
+            n_features = min(self._max_components, n_components)
         elif hasattr(self, 'n_components_'):
             # n_components could be auto or None
             # this is more likely to be an int

From ccfc971cca047e3fb9a9543ba1997482d3a3883e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 1 Mar 2019 17:51:41 +0100
Subject: [PATCH 48/54] only check feature names if they exist to be nice to
 contrib estimators

---
 sklearn/utils/estimator_checks.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index f97ba0e9c97f0..fee446adc7420 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -992,12 +992,13 @@ def _check_transformer(name, transformer_orig, X, y):
     X_pred = transformer_clone.fit_transform(X, y=y_)
 
     input_features = ['feature%d' % i for i in range(n_features)]
-    feature_names = transformer_clone.get_feature_names(input_features)
-    if feature_names is not None:
-        if isinstance(X_pred, tuple):
-            assert len(feature_names) == X_pred[0].shape[1]
-        else:
-            assert len(feature_names) == X_pred.shape[1]
+    if hasattr(transformer_clone, 'get_feature_names'):
+        feature_names = transformer_clone.get_feature_names(input_features)
+        if feature_names is not None:
+            if isinstance(X_pred, tuple):
+                assert len(feature_names) == X_pred[0].shape[1]
+            else:
+                assert len(feature_names) == X_pred.shape[1]
 
     if isinstance(X_pred, tuple):
         for x_pred in X_pred:

From 2dae33925bad9328ba5a621fe0dfff7d357b4e7e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Sat, 2 Mar 2019 00:16:30 +0100
Subject: [PATCH 49/54] hackety hack

---
 sklearn/compose/tests/test_column_transformer.py | 13 +++++++++++++
 sklearn/pipeline.py                              |  8 +++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index a8a1cbea8e524..05ebf4a216a3a 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -19,6 +19,7 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
 from sklearn.feature_extraction import DictVectorizer
+from sklearn.pipeline import make_pipeline
 
 
 class Trans(BaseEstimator):
@@ -658,6 +659,18 @@ def test_column_transformer_get_feature_names():
     assert_raise_message(AttributeError,
                          "Transformer trans (type Trans) does not provide "
                          "get_feature_names", ct.get_feature_names)
+    
+    # if some transformers support and some don't
+    ct = ColumnTransformer([('trans', Trans(), [0, 1]),
+                            ('scale', StandardScaler(), [0])])
+    ct.fit(X_array)
+    assert_raise_message(AttributeError,
+                         "Transformer trans (type Trans) does not provide "
+                         "get_feature_names", ct.get_feature_names)
+                         
+    # inside a pipeline
+    make_pipeline(ct).fit(X_array)
+ 
 
     # working example
     X = np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 7f5efba3aa2c9..04e35ebd299a7 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -576,16 +576,14 @@ def get_feature_names(self, input_features=None):
         feature_names = self.input_features_
         for _, name, transform in self._iter(with_final=True):
             transform.input_features_ = feature_names
-            if hasattr(transform, "get_feature_names"):
-                # doing hassattr instead of a try-except on everything
-                # b/c catching AttributeError makes recursive code
-                # impossible to debug
+            try:
                 try:
                     feature_names = transform.get_feature_names(
                         input_features=feature_names)
                 except TypeError:
                     feature_names = transform.get_feature_names()
-            else:
+            except AttributeError:
+                # this can come from inside a meta-estimator
                 feature_names = None
         return feature_names
 

From 534c4eddd65abae91cb5624276d0044100e447d8 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 6 Mar 2019 17:36:24 +0100
Subject: [PATCH 50/54] Better titanic interpretation

---
 .../plot_column_transformer_mixed_types.py       | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index ef000c4a0077e..405661ff73f22 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -86,8 +86,8 @@
 # The coefficients of the final classification step of the pipeline gives an
 # idea how each feature impacts the likelihood of survival assuming that the
 # usual linear model assumptions hold (uncorrelated features, linear
-# separability, homoschedastic and normally distributed errors...) which we do
-# not verify in this example.
+# separability, homoschedastic errors...) which we do not verify in this
+# example.
 #
 # To get error bars we perform cross-validation and compute the mean and
 # standard deviation for each coefficient accross CV splits. Because we use a
@@ -99,13 +99,21 @@
 #
 # We can see that the linear model coefficients are in agreement with the
 # historical reports: people in higher classes and therefore in the upper decks
-# were first to access the lifeboats, and often, priority was given to women
+# were the first to reach the lifeboats, and often, priority was given to women
 # and children.
+#
+# Note that conditionned on the "pclass_x" one-hot features, the "fare"
+# numerical feature does not seem to be significantly predictive. If we drop
+# the "pclass" feature, then higher "fare" values would appear significantly
+# correlated with a higher likelihood of survival as the "fare" and "pclass"
+# features have a strong statistical dependency.
 
 import matplotlib.pyplot as plt
 from sklearn.model_selection import cross_validate
+from sklearn.model_selection import StratifiedShuffleSplit
 
-cv_results = cross_validate(pipeline, X_train, y_train, cv=10,
+cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42)
+cv_results = cross_validate(pipeline, X_train, y_train, cv=cv,
                             return_estimator=True)
 cv_coefs = np.concatenate([cv_pipeline.named_steps["classifier"].coef_
                            for cv_pipeline in cv_results["estimator"]])

From dc1c349cc9e21051914aa46848ad2c58e8de6101 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 7 Mar 2019 17:46:35 +0100
Subject: [PATCH 51/54] Phrasing in example

---
 examples/compose/plot_column_transformer_mixed_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 405661ff73f22..71551856ac35b 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -81,7 +81,7 @@
 
 
 ###############################################################################
-# Introspecting the coefficients values of the classifier
+# Inspecting the coefficients values of the classifier
 ###############################################################################
 # The coefficients of the final classification step of the pipeline gives an
 # idea how each feature impacts the likelihood of survival assuming that the

From 089c65dc4d384b4cf4425c334e485d7184a47bac Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Thu, 7 Mar 2019 13:47:19 -0500
Subject: [PATCH 52/54] Apply suggestions from code review

minor doc fixes by adrin

Co-Authored-By: amueller <t3kcit@gmail.com>
---
 doc/modules/compose.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 152ae182e37a1..4d47737889330 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -116,7 +116,7 @@ ignored by setting them to ``'passthrough'``::
     >>> grid_search = GridSearchCV(pipe, param_grid=param_grid)
 
 
-To enable model inspection, `Pipeline` sets a ``input_features_`` attribute on
+To enable model inspection, `Pipeline` sets an ``input_features_`` attribute on
 all pipeline steps during fitting. This allows the user to understand how
 features are transformed during a pipeline::
 
@@ -134,7 +134,7 @@ features are transformed during a pipeline::
     array(['x2', 'x3'], dtype='<U2')
 
 You can also provide custom feature names for a more human readable format using
-get_feature_names::
+``get_feature_names``::
 
     >>> pipe.get_feature_names(iris.feature_names)
     >>> pipe.named_steps.select.input_features_

From 4c17e96e33e043be248258300d5819a418e07377 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 31 May 2019 11:04:09 -0400
Subject: [PATCH 53/54] fix merge issue

---
 sklearn/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 072e452dea9d6..97ddde7333641 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -628,7 +628,7 @@ def get_feature_names(self, input_features=None):
         """
         feature_names = input_features
         with_final = hasattr(self._final_estimator, "transform")
-        for name, transform in self._iter(with_final=with_final):
+        for i, name, transform in self._iter(with_final=with_final):
             if not hasattr(transform, "get_feature_names"):
                 raise TypeError("Transformer {} does provide"
                                 " get_feature_names".format(name))

From 2733d20036dd993b07b50bad66f175278427b44d Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 31 May 2019 11:19:39 -0400
Subject: [PATCH 54/54] fix impute feature names after file was moved. merging
 fun

---
 sklearn/impute/_base.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index 7be9da691ce11..6b63c4529c06a 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -273,7 +273,8 @@ def fit(self, X, y=None):
             self.indicator_.fit(X)
         else:
             self.indicator_ = None
-
+        invalid_mask = _get_mask(self.statistics_, np.nan)
+        self._valid_mask = np.logical_not(invalid_mask)
         return self
 
     def _sparse_fit(self, X, strategy, missing_values, fill_value):
@@ -433,6 +434,25 @@ def transform(self, X):
     def _more_tags(self):
         return {'allow_nan': True}
 
+    def get_feature_names(self, input_features=None):
+        """Get feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array-like of string
+            Input feature names.
+
+        Returns
+        -------
+        feature_names : array-like of string
+            Transformed feature names
+        """
+        check_is_fitted(self, 'statistics_')
+        if input_features is None:
+            input_features = ['x%d' % i
+                              for i in range(self.statistics_.shape[0])]
+        return np.array(input_features)[self._valid_mask]
+
 
 class MissingIndicator(BaseEstimator, TransformerMixin):
     """Binary indicators for missing values.