8000 ENH Adds feature names out to decomposition module (#21334) · samronsin/scikit-learn@5b6893a · GitHub
[go: up one dir, main page]

Skip to content

Commit 5b6893a

Browse files
thomasjpfansamronsin
authored andcommitted
ENH Adds feature names out to decomposition module (scikit-learn#21334)
* ENH Adds feature names out to decomposition module * DOC Adds whats enw * REV Removes module not in decomposition * ENH Uses a mixin * REV Better merge
1 parent ce904a6 commit 5b6893a

20 files changed

+248
-23
lines changed

doc/whats_new/v1.1.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,22 @@ Changelog
5858
- |Fix| :class:`decomposition.FastICA` now validates input parameters in `fit` instead of `__init__`.
5959
:pr:`21432` by :user:`Hannah Bohle <hhnnhh>` and :user:`Maren Westermann <marenwestermann>`.
6060

61+
- |API| Adds :term:`get_feature_names_out` to all transformers in the
62+
:mod:`~sklearn.decomposition` module:
63+
:class:`~sklearn.decomposition.DictionaryLearning`,
64+
:class:`~sklearn.decomposition.FactorAnalysis`,
65+
:class:`~sklearn.decomposition.FastICA`,
66+
:class:`~sklearn.decomposition.IncrementalPCA`,
67+
:class:`~sklearn.decomposition.KernelPCA`,
68+
:class:`~sklearn.decomposition.LatentDirichletAllocation`,
69+
:class:`~sklearn.decomposition.MiniBatchDictionaryLearning`,
70+
:class:`~sklearn.decomposition.MiniBatchSparsePCA`,
71+
:class:`~sklearn.decomposition.NMF`,
72+
:class:`~sklearn.decomposition.PCA`,
73+
:class:`~sklearn.decomposition.SparsePCA`,
74+
and :class:`~sklearn.decomposition.TruncatedSVD`. :pr:`21334` by
75+
`Thomas Fan`_.
76+
6177
:mod:`sklearn.impute`
6278
.....................
6379

sklearn/base.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from .utils.validation import _check_y
2525
from .utils.validation import _num_features
2626
from .utils.validation import _check_feature_names_in
27+
from .utils.validation import _generate_get_feature_names_out
28+
from .utils.validation import check_is_fitted
2729
from .utils._estimator_html_repr import estimator_html_repr
2830
from .utils.validation import _get_feature_names
2931

@@ -879,6 +881,31 @@ def get_feature_names_out(self, input_features=None):
879881
return _check_feature_names_in(self, input_features)
880882

881883

884+
class _ClassNamePrefixFeaturesOutMixin:
885+
"""Mixin class for transformers that generate their own names by prefixing.
886+
887+
Assumes that `_n_features_out` is defined for the estimator.
888+
"""
889+
890+
def get_feature_names_out(self, input_features=None):
891+
"""Get output feature names for transformation.
892+
893+
Parameters
894+
----------
895+
input_features : array-like of str or None, default=None
896+
Only used to validate feature names with the names seen in :meth:`fit`.
897+
898+
Returns
899+
-------
900+
feature_names_out : ndarray of str objects
901+
Transformed feature names.
902+
"""
903+
check_is_fitted(self, "_n_features_out")
904+
return _generate_get_feature_names_out(
905+
self, self._n_features_out, input_features=input_features
906+
)
907+
908+
882909
class DensityMixin:
883910
"""Mixin class for all density estimators in scikit-learn."""
884911

sklearn/decomposition/_base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111
import numpy as np
1212
from scipy import linalg
1313

14-
from ..base import BaseEstimator, TransformerMixin
14+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
1515
from ..utils.validation import check_is_fitted
1616
from abc import ABCMeta, abstractmethod
1717

1818

19-
class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta):
19+
class _BasePCA(
20+
_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta
21+
):
2022
"""Base class for PCA methods.
2123
2224
Warning: This class should not be used directly.
@@ -154,3 +156,8 @@ def inverse_transform(self, X):
154156
)
155157
else:
156158
return np.dot(X, self.components_) + self.mean_
159+
160+
@property
161+
def _n_features_out(self):
162+
"""Number of transformed output features."""
163+
return self.components_.shape[0]

sklearn/decomposition/_dict_learning.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from scipy import linalg
1515
from joblib import Parallel, effective_n_jobs
1616

17-
from ..base import BaseEstimator, TransformerMixin
17+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
1818
from ..utils import deprecated
1919
from ..utils import check_array, check_random_state, gen_even_slices, gen_batches
2020
from ..utils.extmath import randomized_svd, row_norms, svd_flip
@@ -1014,7 +1014,7 @@ def dict_learning_online(
10141014
return dictionary
10151015

10161016

1017-
class _BaseSparseCoding(TransformerMixin):
1017+
class _BaseSparseCoding(_ClassNamePrefixFeaturesOutMixin, TransformerMixin):
10181018
"""Base class from SparseCoder and DictionaryLearning algorithms."""
10191019

10201020
def __init__(
@@ -1315,6 +1315,11 @@ def n_features_in_(self):
13151315
"""Number of features seen during `fit`."""
13161316
return self.dictionary.shape[1]
13171317

1318+
@property
1319+
def _n_features_out(self):
1320+
"""Number of transformed output features."""
1321+
return self.n_components_
1322+
13181323

13191324
class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
13201325
"""Dictionary learning.
@@ -1587,6 +1592,11 @@ def fit(self, X, y=None):
15871592
self.error_ = E
15881593
return self
15891594

1595+
@property
1596+
def _n_features_out(self):
1597+
"""Number of transformed output features."""
1598+
return self.components_.shape[0]
1599+
15901600

15911601
class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
15921602
"""Mini-batch dictionary learning.
@@ -1926,3 +1936,8 @@ def partial_fit(self, X, y=None, iter_offset=None):
19261936
self.inner_stats_ = (A, B)
19271937
self.iter_offset_ = iter_offset + 1
19281938
return self
1939+
1940+
@property
1941+
def _n_features_out(self):
1942+
"""Number of transformed output features."""
1943+
return self.components_.shape[0]

sklearn/decomposition/_factor_analysis.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@
2525
from scipy import linalg
2626

2727

28-
from ..base import BaseEstimator, TransformerMixin
28+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
2929
from ..utils import check_random_state
3030
from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
3131
from ..utils.validation import check_is_fitted
3232
from ..exceptions import ConvergenceWarning
3333

3434

35-
class FactorAnalysis(TransformerMixin, BaseEstimator):
35+
class FactorAnalysis(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
3636
"""Factor Analysis (FA).
3737
3838
A simple linear generative model with Gaussian latent variables.
@@ -426,6 +426,11 @@ def _rotate(self, components, n_components=None, tol=1e-6):
426426
else:
427427
raise ValueError("'method' must be in %s, not %s" % (implemented, method))
428428

429+
@property
430+
def _n_features_out(self):
431+
"""Number of transformed output features."""
432+
return self.components_.shape[0]
433+
429434

430435
def _ortho_rotation(components, method="varimax", tol=1e-6, max_iter=100):
431436
"""Return rotated components."""

sklearn/decomposition/_fastica.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import numpy as np
1515
from scipy import linalg
1616

17-
from ..base import BaseEstimator, TransformerMixin
17+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
1818
from ..exceptions import ConvergenceWarning
1919

2020
from ..utils import check_array, as_float_array, check_random_state
@@ -319,7 +319,7 @@ def my_g(x):
319319
return None, est._unmixing, sources
320320

321321

322-
class FastICA(TransformerMixin, BaseEstimator):
322+
class FastICA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
323323
"""FastICA: a fast algorithm for Independent Component Analysis.
324324
325325
The implementation is based on [1]_.
@@ -689,3 +689,8 @@ def inverse_transform(self, X, copy=True):
689689
X += self.mean_
690690

691691
return X
692+
693+
@property
694+
def _n_features_out(self):
695+
"""Number of transformed output features."""
696+
return self.components_.shape[0]

sklearn/decomposition/_kernel_pca.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,18 @@
1010

1111
from ..utils._arpack import _init_arpack_v0
1212
from ..utils.extmath import svd_flip, _randomized_eigsh
13-
from ..utils.validation import check_is_fitted, _check_psd_eigenvalues
13+
from ..utils.validation import (
14+
check_is_fitted,
15+
_check_psd_eigenvalues,
16+
)
1417
from ..utils.deprecation import deprecated
1518
from ..exceptions import NotFittedError
16-
from ..base import BaseEstimator, TransformerMixin
19+
from ..base import BaseEstimator, TransformerMixin, _Class 3D11 NamePrefixFeaturesOutMixin
1720
from ..preprocessing import KernelCenterer
1821
from ..metrics.pairwise import pairwise_kernels
1922

2023

21-
class KernelPCA(TransformerMixin, BaseEstimator):
24+
class KernelPCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
2225
"""Kernel Principal component analysis (KPCA).
2326
2427
Non-linear dimensionality reduction through the use of kernels (see
@@ -546,3 +549,8 @@ def _more_tags(self):
546549
"preserves_dtype": [np.float64, np.float32],
547550
"pairwise": self.kernel == "precomputed",
548551
}
552+
553+
@property
554+
def _n_features_out(self):
555+
"""Number of transformed output features."""
556+
return self.eigenvalues_.shape[0]

sklearn/decomposition/_lda.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from scipy.special import gammaln, logsumexp
1717
from joblib import Parallel, effective_n_jobs
1818

19-
from ..base import BaseEstimator, TransformerMixin
19+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
2020
from ..utils import check_random_state, gen_batches, gen_even_slices
2121
from ..utils.validation import check_non_negative
2222
from ..utils.validation import check_is_fitted
@@ -138,7 +138,9 @@ def _update_doc_distribution(
138138
return (doc_topic_distr, suff_stats)
139139

140140

141-
class LatentDirichletAllocation(TransformerMixin, BaseEstimator):
141+
class LatentDirichletAllocation(
142+
_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
143+
):
142144
"""Latent Dirichlet Allocation with online variational Bayes algorithm.
143145
144146
The implementation is based on [1]_ and [2]_.
@@ -887,3 +889,8 @@ def perplexity(self, X, sub_sampling=False):
887889
X, reset_n_features=True, whom="LatentDirichletAllocation.perplexity"
888890
)
889891
return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)
892+
893+
@property
894+
def _n_features_out(self):
895+
"""Number of transformed output features."""
896+
return self.components_.shape[0]

sklearn/decomposition/_nmf.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515

1616
from ._cdnmf_fast import _update_cdnmf_fast
1717
from .._config import config_context
18-
from ..base import BaseEstimator, TransformerMixin
18+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
1919
from ..exceptions import ConvergenceWarning
2020
from ..utils import check_random_state, check_array
2121
from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
22-
from ..utils.validation import check_is_fitted, check_non_negative
22+
from ..utils.validation import (
23+
check_is_fitted,
24+
check_non_negative,
25+
)
2326

2427
EPSILON = np.finfo(np.float32).eps
2528

@@ -1109,7 +1112,7 @@ def non_negative_factorization(
11091112
return W, H, n_iter
11101113

11111114

1112-
class NMF(TransformerMixin, BaseEstimator):
1115+
class NMF(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
11131116
"""Non-Negative Matrix Factorization (NMF).
11141117
11151118
Find two non-negative matrices (W, H) whose product approximates the non-
@@ -1708,3 +1711,8 @@ def inverse_transform(self, W):
17081711
"""
17091712
check_is_fitted(self)
17101713
return np.dot(W, self.components_)
1714+
1715+
@property
1716+
def _n_features_out(self):
1717+
"""Number of transformed output features."""
1718+
return self.components_.shape[0]

sklearn/decomposition/_sparse_pca.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
from ..utils import check_random_state
88
from ..utils.validation import check_is_fitted
99
from ..linear_model import ridge_regression
10-
from ..base import BaseEstimator, TransformerMixin
10+
from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
1111
from ._dict_learning import dict_learning, dict_learning_online
1212

1313

14-
class SparsePCA(TransformerMixin, BaseEstimator):
14+
class SparsePCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
1515
"""Sparse Principal Components Analysis (SparsePCA).
1616
1717
Finds the set of sparse components that can optimally reconstruct
@@ -236,6 +236,11 @@ def transform(self, X):
236236

237237
return U
238238

239+
@property
240+
def _n_features_out(self):
241+
"""Number of transformed output features."""
242+
return self.components_.shape[0]
243+
239244

240245
class MiniBatchSparsePCA(SparsePCA):
241246
"""Mini-batch Sparse Principal Components Analysis.

0 commit comments

Comments
 (0)
0