scikit-learn · amueller · Nov 20, 2018 · Nov 20, 2018 · Nov 20, 2018 · Nov 20, 2018
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -139,6 +139,27 @@ or by name::
     >>> pipe['reduce_dim']
     PCA()
 
+To enable model inspection, `Pipeline` has an ``get_feature_names()`` method,
+just like all transformers. You can use pipeline slicing to get the feature names
+going into each step::
+
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.feature_selection import SelectKBest
+    >>> iris = load_iris()
+    >>> pipe = Pipeline(steps=[
+    ...    ('select', SelectKBest(k=2)),
+    ...    ('clf', LogisticRegression())])
+    >>> pipe.fit(iris.data, iris.target)
+    Pipeline(steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))])
+    >>> pipe[:-1].get_feature_names()
+    array(['x2', 'x3'], dtype='<U2')
+
+You can also provide custom feature names for a more human readable format using
+``get_feature_names``::
+
+    >>> pipe[:-1].get_feature_names(iris.feature_names)
+    array(['petal length (cm)', 'petal width (cm)'], dtype='<U17')
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py`
@@ -428,21 +449,21 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
   >>> from sklearn.feature_extraction.text import CountVectorizer
   >>> from sklearn.preprocessing import OneHotEncoder
   >>> column_trans = ColumnTransformer(
-  ...     [('city_category', OneHotEncoder(dtype='int'),['city']),
+  ...     [('categories', OneHotEncoder(dtype='int'),['city']),
   ...      ('title_bow', CountVectorizer(), 'title')],
   ...     remainder='drop')
 
   >>> column_trans.fit(X)
-  Column
8000
Transformer(transformers=[('city_category', OneHotEncoder(dtype='int'),
+  ColumnTransformer(transformers=[('categories', OneHotEncoder(dtype='int'),
                                    ['city']),
                                   ('title_bow', CountVectorizer(), 'title')])
 
   >>> column_trans.get_feature_names()
-  ['city_category__x0_London', 'city_category__x0_Paris', 'city_category__x0_Sallisaw',
-  'title_bow__bow', 'title_bow__feast', 'title_bow__grapes', 'title_bow__his',
-  'title_bow__how', 'title_bow__last', 'title_bow__learned', 'title_bow__moveable',
-  'title_bow__of', 'title_bow__the', 'title_bow__trick', 'title_bow__watson',
-  'title_bow__wrath']
+  ['categories__city_London', 'categories__city_Paris',
+   'categories__city_Sallisaw', 'title_bow__bow', 'title_bow__feast',
+   'title_bow__grapes', 'title_bow__his', 'title_bow__how', 'title_bow__last',
+   'title_bow__learned', 'title_bow__moveable', 'title_bow__of', 'title_bow__the',
+   'title_bow__trick', 'title_bow__watson', 'title_bow__wrath']
 
   >>> column_trans.transform(X).toarray()
   array([[1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
@@ -459,7 +480,7 @@ to specify the column as a list of strings (``['city']``).
 
 Apart from a scalar or a single item list, the column selection can be specified
 as a list of multiple items, an integer array, a slice, a boolean mask, or
-with a :func:`~sklearn.compose.make_column_selector`. The 
+with a :func:`~sklearn.compose.make_column_selector`. The
 :func:`~sklearn.compose.make_column_selector` is used to select columns based
 on data type or column name::
 
@@ -544,8 +565,8 @@ many estimators. This visualization is activated by setting the
   >>> # diplays HTML representation in a jupyter context
   >>> column_trans  # doctest: +SKIP
 
-An example of the HTML output can be seen in the 
-**HTML representation of Pipeline** section of 
+An example of the HTML output can be seen in the
+**HTML representation of Pipeline** section of
 :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
 As an alternative, the HTML can be written to a file using
 :func:`~sklearn.utils.estimator_html_repr`::

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
@@ -145,6 +145,50 @@
 clf.fit(X_train, y_train)
 print("model score: %.3f" % clf.score(X_test, y_test))
 
+
+###############################################################################
+# Inspecting the coefficients values of the classifier
+###############################################################################
+# The coefficients of the final classification step of the pipeline gives an
+# idea how each feature impacts the likelihood of survival assuming that the
+# usual linear model assumptions hold (uncorrelated features, linear
+# separability, homoschedastic errors...) which we do not verify in this
+# example.
+#
+# To get error bars we perform cross-validation and compute the mean and
+# standard deviation for each coefficient accross CV splits. Because we use a
+# standard scaler on the numerical features, the coefficient weights gives us
+# an idea on how much the log odds of surviving are impacted by a change in
+# this dimension contrasted to the mean. Note that the categorical features
+# here are overspecified which makes it slightly harder to interpret because of
+# the information redundancy.
+#
+# We can see that the linear model coefficients are in agreement with the
+# historical reports: people in higher classes and therefore in the upper decks
+# were the first to reach the lifeboats, and often, priority was given to women
+# and children.
+#
+# Note that conditionned on the "pclass_x" one-hot features, the "fare"
+# numerical feature does not seem to be significantly predictive. If we drop
+# the "pclass" feature, then higher "fare" values would appear significantly
+# correlated with a higher likelihood of survival as the "fare" and "pclass"
+# features have a strong statistical dependency.
+
+import matplotlib.pyplot as plt
+from sklearn.model_selection import cross_validate
+from sklearn.model_selection import StratifiedShuffleSplit
+
+cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42)
+cv_results = cross_validate(clf, X_train, y_train, cv=cv,
+                            return_estimator=True)
+cv_coefs = np.concatenate([cv_pipeline[-1].coef_
+                           for cv_pipeline in cv_results["estimator"]])
+fig, ax = plt.subplots()
+ax.barh(clf[:-1].get_feature_names(),
+        cv_coefs.mean(axis=0), xerr=cv_coefs.std(axis=0))
+plt.tight_layout()
+plt.show()
+
 ###############################################################################
 # The resulting score is not exactly the same as the one from the previous
 # pipeline becase the dtype-based selector treats the ``pclass`` columns as

diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py
@@ -9,6 +9,7 @@
 Using a sub-pipeline, the fitted coefficients can be mapped back into
 the original feature space.
 """
+import matplotlib.pyplot as plt
 from sklearn import svm
 from sklearn.datasets import make_classification
 from sklearn.feature_selection import SelectKBest, f_regression
@@ -20,7 +21,7 @@
 
 # import some data to play with
 X, y = make_classification(
-    n_features=20, n_informative=3, n_redundant=0, n_classes=4,
+    n_features=20, n_informative=3, n_redundant=0, n_classes=2,
     n_clusters_per_class=2)
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
@@ -36,5 +37,7 @@
 y_pred = anova_svm.predict(X_test)
 print(classification_report(y_test, y_pred))
 
-coef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_)
-print(coef)
+# access and plot the coefficients of the fitted model
+plt.barh((0, 1, 2), anova_svm[-1].coef_.ravel())
+plt.yticks((0, 1, 2), anova_svm[:-1].get_feature_names())
+plt.show()
diff --git a/sklearn/base.py b/sklearn/base.py
@@ -17,6 +17,7 @@
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
+from .utils._feature_names import _make_feature_names
 from .utils._estimator_html_repr import estimator_html_repr
 from .utils.validation import _deprecate_positional_args
 
@@ -689,6 +690,45 @@ def fit_transform(self, X, y=None, **fit_params):
             # fit method of arity 2 (supervised transformation)
             return self.fit(X, y, **fit_params).transform(X)
 
+    def get_feature_names(self, input_features=None):
+        """Get output feature names.
+
+        Parameters
+        ----------
+        input_features : list of string or None
+            String names of the input features.
+
+        Returns
+        -------
+        output_feature_names : list of string
+            Feature names for transformer output.
+        """
+        # generate feature names from class name by default
+        # would be much less guessing if we stored the number
+        # of output features.
+        # Ideally this would be done in each class.
+        if hasattr(self, 'n_clusters'):
+            # this is before n_components_
+            # because n_components_ means something else
+            # in agglomerative clustering
+            n_features = self.n_clusters
+        elif hasattr(self, '_max_components'):
+            # special case for LinearDiscriminantAnalysis
+            n_components = self.n_components or np.inf
+            n_features = min(self._max_components, n_components)
+        elif hasattr(self, 'n_components_'):
+            # n_components could be auto or None
+            # this is more likely to be an int
+            n_features = self.n_components_
+        elif hasattr(self, 'components_'):
+            n_features = self.components_.shape[0]
+        elif hasattr(self, 'n_components') and self.n_components is not None:
+            n_features = self.n_components
+        else:
+            return None
+        return _make_feature_names(n_features=n_features,
+                                   prefix=type(self).__name__.lower())
+
 
 class DensityMixin:
     """Mixin class for all density estimators in scikit-learn."""
@@ -737,6 +777,34 @@ def fit_predict(self, X, y=None):
         return self.fit(X).predict(X)
 
 
+class OneToOneMixin(object):
+    """Provides get_feature_names for simple transformers
+
+    Assumes there's a 1-to-1 correspondence between input features
+    and output features.
+    """
+
+    def get_feature_names(self, input_features=None):
+            """Get feature names for transformation.
+
+        Returns input_features as this transformation
+        doesn't add or drop features.
+
+        Parameters
+        ----------
+        input_features : array-like of string
+            Input feature names.
+
+        Returns
+        -------
+        feature_names : array-like of string
+            Transformed feature names
+        """
+
+        return _make_feature_names(self.n_features_in_,
+                                   input_features=input_features)
+
+
 class MetaEstimatorMixin:
     _required_parameters = ["estimator"]
     """Mixin class for all meta estimators in scikit-learn."""

diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
@@ -15,6 +15,7 @@
 from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
+from ..utils._feature_names import _make_feature_names
 from ..exceptions import ConvergenceWarning
 from . import AgglomerativeClustering
 
@@ -656,3 +657,20 @@ def _global_clustering(self, X=None):
 
         if compute_labels:
             self.labels_ = self.predict(X)
+
+    def get_feature_names(self, input_features=None):
+        """Get output feature names.
+
+        Parameters
+        ----------
+        input_features : list of string or None
+            String names of the input features.
+
+        Returns
+        -------
+        output_feature_names : list of string
+            Feature names for transformer output.
+        """
+        return _make_feature_names(
+            n_features=self.subcluster_centers_.shape[0],
+            prefix=type(self).__name__.lower())
diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
@@ -28,6 +28,7 @@
 from ..utils import check_random_state
 from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..utils._openmp_helpers import _openmp_effective_n_threads
+from ..utils._feature_names import _make_feature_names
 from ..exceptions import ConvergenceWarning
 from ._k_means_fast import _inertia_dense
 from ._k_means_fast import _inertia_sparse
@@ -1215,6 +1216,23 @@ def score(self, X, y=None, sample_weight=None):
         return -_labels_inertia(X, sample_weight, x_squared_norms,
                                 self.cluster_centers_)[1]
 
+    def get_feature_names(self, input_features=None):
+        """Get output feature names.
+
+        Parameters
+        ----------
+        input_features : list of string or None
+            String names of the input features.
+
+        Returns
+        -------
+        output_feature_names : list of string
+            Feature names for transformer output.
+        """
+        return _make_feature_names(
+            n_features=self.n_clusters,
+            prefix=type(self).__name__.lower())
+
 
 def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
                      old_center_buffer, compute_squared_diff,
@@ -1871,3 +1889,20 @@ def predict(self, X, sample_weight=None):
 
         X = self._check_test_data(X)
         return self._labels_inertia_minibatch(X, sample_weight)[0]
+
+    def get_feature_names(self, input_features=None):
+        """Get output feature names.
+
+        Parameters
+        ----------
+        input_features : list of string or None
+            String names of the input features.
+
+        Returns
+        -------
+        output_feature_names : list of string
+            Feature names for transformer output.
+        """
+        return _make_feature_names(
+            n_features=self.n_clusters,
+            prefix=type(self).__name__.lower())
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
@@ -371,8 +371,12 @@ def get_feature_names(self):
                 raise AttributeError("Transformer %s (type %s) does not "
                                      "provide get_feature_names."
                                      % (str(name), type(trans).__name__))
+            try:
+                more_names = trans.get_feature_names(input_features=column)
+            except TypeError:
+                more_names = trans.get_feature_names()
             feature_names.extend([name + "__" + f for f in
-                                  trans.get_feature_names()])
+                                 more_names])
         return feature_names
 
     def _update_fitted_transformers(self, transformers):

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
@@ -23,6 +23,7 @@
 from sklearn.preprocessing import FunctionTransformer
 from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
 from sklearn.feature_extraction import DictVectorizer
+from sklearn.pipeline import make_pipeline
 
 
 class Trans(BaseEstimator):
@@ -660,6 +661,17 @@ def test_column_transformer_get_feature_names():
                          "Transformer trans (type Trans) does not provide "
                          "get_feature_names", ct.get_feature_names)
 
+    # if some transformers support and some don't
+    ct = ColumnTransformer([('trans', Trans(), [0, 1]),
+                            ('scale', StandardScaler(), [0])])
+    ct.fit(X_array)
+    assert_raise_message(AttributeError,
+                         "Transformer trans (type Trans) does not provide "
+                         "get_feature_names", ct.get_feature_names)
+
+    # inside a pipeline
+    make_pipeline(ct).fit(X_array)
+
     # working example
     X = np.array([[{'a': 1, 'b': 2}, {'a': 3, 'b': 4}],
                   [{'c': 5}, {'c': 6}]], dtype=object).T
@@ -1367,4 +1379,5 @@ def test_feature_names_empty_columns(empty_col):
     )
 
     ct.fit(df)
-    assert ct.get_feature_names() == ['ohe__x0_a', 'ohe__x0_b', 'ohe__x1_z']
+    assert ct.get_feature_names() == ['ohe__col1_a', 'ohe__col1_b',
+                                      'ohe__col2_z']