8000 ENH Preserving dtype for np.float32 in SparsePCA and MiniBatchSparsePCA by takoika · Pull Request #22111 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

ENH Preserving dtype for np.float32 in SparsePCA and MiniBatchSparsePCA #22111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 4, 2022
Merged
4 changes: 4 additions & 0 deletions doc/whats_new/v1.1.rst
8000
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ Changelog
and :class:`decomposition.SparseCoder` preserve dtype for `numpy.float32`.
:pr:`22002` by :user:`Takeshi Oura <takoika>`.

- |Enhancement| :class:`decomposition.SparsePCA` and :class:`decomposition.MiniBatchSparsePCA`
preserve dtype for `numpy.float32`.
:pr:`22111` by :user:`Takeshi Oura <takoika>`.

- |API| Adds :term:`get_feature_names_out` to all transformers in the
:mod:`~sklearn.decomposition` module:
:class:`~sklearn.decomposition.DictionaryLearning`,
Expand Down
5 changes: 5 additions & 0 deletions sklearn/decomposition/_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,11 @@ def _n_features_out(self):
"""Number of transformed output features."""
return self.components_.shape[0]

def _more_tags(self):
return {
"preserves_dtype": [np.float64, np.float32],
}


class MiniBatchSparsePCA(SparsePCA):
"""Mini-batch Sparse Principal Components Analysis.
Expand Down
47 changes: 47 additions & 0 deletions sklearn/decomposition/tests/test_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,53 @@ def test_spca_n_components_(SPCA, n_components):
assert model.n_components_ == n_features


@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA))
@pytest.mark.parametrize("method", ("lars", "cd"))
@pytest.mark.parametrize(
"data_type, expected_type",
(
(np.float32, np.float32),
(np.float64, np.float64),
(np.int32, np.float64),
(np.int64, np.float64),
),
)
def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type):
# Verify output matrix dtype
n_samples, n_features, n_components = 12, 10, 3
rng = np.random.RandomState(0)
input_array = rng.randn(n_samples, n_features).astype(data_type)
model = SPCA(n_components=n_components, method=method)
transformed = model.fit_transform(input_array)

assert transformed.dtype == expected_type
assert model.components_.dtype == expected_type


@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA))
@pytest.mark.parametrize("method", ("lars", "cd"))
def test_sparse_pca_numerical_consistency(SPCA, method):
# Verify numericall consistentency among np.float32 and np.float64
rtol = 1e-3
alpha = 2
n_samples, n_features, n_components = 12, 10, 3
rng = np.random.RandomState(0)
input_array = rng.randn(n_samples, n_features)

model_32 = SPCA(
n_components=n_components, alpha=alpha, method=method, random_state=0
)
transformed_32 = model_32.fit_transform(input_array.astype(np.float32))

model_64 = SPCA(
n_components=n_components, alpha=alpha, method=method, random_state=0
)
transformed_64 = model_64.fit_transform(input_array.astype(np.float64))

assert_allclose(transformed_64, transformed_32, rtol=rtol)
assert_allclose(model_64.components_, model_32.components_, rtol=rtol)


@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
def test_spca_feature_names_out(SPCA):
"""Check feature names out for *SparsePCA."""
Expand Down
0