8000 ENH Add get_feature_names_out for random_projection module (#21330) · glemaitre/scikit-learn@ffe5003 · GitHub
[go: up one dir, main page]

Skip to content

Commit ffe5003

Browse files
lesteveglemaitre
authored andcommitted
ENH Add get_feature_names_out for random_projection module (scikit-learn#21330)
1 parent 0096787 commit ffe5003

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

doc/whats_new/v1.1.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,15 @@ Changelog
162162
ndarray with `np.nan` when passed a `Float32` or `Float64` pandas extension
163163
array with `pd.NA`. :pr:`21278` by `Thomas Fan`_.
164164

165+
:mod:`sklearn.random_projection`
166+
................................
167+
168+
- |API| Adds :term:`get_feature_names_out` to all transformers in the
169+
:mod:`~sklearn.random_projection` module:
170+
:class:`~sklearn.random_projection.GaussianRandomProjection` and
171+
:class:`~sklearn.random_projection.SparseRandomProjection`. :pr:`21330` by
172+
:user:`Loïc Estève <lesteve>`.
173+
165174
Code and Documentation Contributors
166175
-----------------------------------
167176

sklearn/random_projection.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import scipy.sparse as sp
3535

3636
from .base import BaseEstimator, TransformerMixin
37+
from .base import _ClassNamePrefixFeaturesOutMixin
3738

3839
from .utils import check_random_state
3940
from .utils.extmath import safe_sparse_dot
@@ -290,7 +291,9 @@ def _sparse_random_matrix(n_components, n_features, density="auto", random_state
290291
return np.sqrt(1 / density) / np.sqrt(n_components) * components
291292

292293

293-
class BaseRandomProjection(TransformerMixin, BaseEstimator, metaclass=ABCMeta):
294+
class BaseRandomProjection(
295+
TransformerMixin, BaseEstimator, _ClassNamePrefixFeaturesOutMixin, metaclass=ABCMeta
296+
):
294297
"""Base class for random projections.
295298
296299
Warning: This class should not be used directly.
@@ -420,6 +423,14 @@ def transform(self, X):
420423
X_new = safe_sparse_dot(X, self.components_.T, dense_output=self.dense_output)
421424
return X_new
422425

426+
@property
427+
def _n_features_out(self):
428+
"""Number of transformed output features.
429+
430+
Used by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out.
431+
"""
432+
return self.n_components
433+
423434

424435
class GaussianRandomProjection(BaseRandomProjection):
425436
"""Reduce dimensionality through Gaussian random projection.

sklearn/tests/test_common.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,6 @@ def test_pandas_column_name_consistency(estimator):
371371
"manifold",
372372
"neighbors",
373373
"neural_network",
374-
"random_projection",
375374
]
376375

377376

sklearn/tests/test_random_projection.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
all_SparseRandomProjection: List[Any] = [SparseRandomProjection]
2626
all_DenseRandomProjection: List[Any] = [GaussianRandomProjection]
27-
all_RandomProjection = set(all_SparseRandomProjection + all_DenseRandomProjection)
27+
all_RandomProjection = all_SparseRandomProjection + all_DenseRandomProjection
2828

2929

3030
# Make some random data with uniformly located non zero entries with
@@ -359,3 +359,17 @@ def test_johnson_lindenstrauss_min_dim():
359359
Regression test for #17111: before #19374, 32-bit systems would fail.
360360
"""
361361
assert johnson_lindenstrauss_min_dim(100, eps=1e-5) == 368416070986
362+
363+
364+
@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
365+
def test_random_projection_feature_names_out(random_projection_cls):
366+
random_projection = random_projection_cls(n_components=2)
367+
random_projection.fit(data)
368+
names_out = random_projection.get_feature_names_out()
369+
class_name_lower = random_projection_cls.__name__.lower()
370+
expected_names_out = np.array(
371+
[f"{class_name_lower}{i}" for i in range(random_projection.n_components_)],
372+
dtype=object,
373+
)
374+
375+
assert_array_equal(names_out, expected_names_out)

0 commit comments

Comments
 (0)
0