diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 935be46bba5af..542d3ef0946db 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -154,6 +154,10 @@ Changelog and :class:`decomposition.SparseCoder` preserve dtype for `numpy.float32`. :pr:`22002` by :user:`Takeshi Oura `. +- |Enhancement| :class:`decomposition.SparsePCA` and :class:`decomposition.MiniBatchSparsePCA` + preserve dtype for `numpy.float32`. + :pr:`22111` by :user:`Takeshi Oura `. + - |API| Adds :term:`get_feature_names_out` to all transformers in the :mod:`~sklearn.decomposition` module: :class:`~sklearn.decomposition.DictionaryLearning`, diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 31c8d2168a3e6..6f2f4c8b10582 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -241,6 +241,11 @@ def _n_features_out(self): """Number of transformed output features.""" return self.components_.shape[0] + def _more_tags(self): + return { + "preserves_dtype": [np.float64, np.float32], + } + class MiniBatchSparsePCA(SparsePCA): """Mini-batch Sparse Principal Components Analysis. diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index c77aabf9c182c..db92ec582abdd 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -206,6 +206,53 @@ def test_spca_n_components_(SPCA, n_components): assert model.n_components_ == n_features +@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) +@pytest.mark.parametrize("method", ("lars", "cd")) +@pytest.mark.parametrize( + "data_type, expected_type", + ( + (np.float32, np.float32), + (np.float64, np.float64), + (np.int32, np.float64), + (np.int64, np.float64), + ), +) +def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): + # Verify output matrix dtype + n_samples, n_features, n_components = 12, 10, 3 + rng = np.random.RandomState(0) + input_array = rng.randn(n_samples, n_features).astype(data_type) + model = SPCA(n_components=n_components, method=method) + transformed = model.fit_transform(input_array) + + assert transformed.dtype == expected_type + assert model.components_.dtype == expected_type + + +@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) +@pytest.mark.parametrize("method", ("lars", "cd")) +def test_sparse_pca_numerical_consistency(SPCA, method): + # Verify numericall consistentency among np.float32 and np.float64 + rtol = 1e-3 + alpha = 2 + n_samples, n_features, n_components = 12, 10, 3 + rng = np.random.RandomState(0) + input_array = rng.randn(n_samples, n_features) + + model_32 = SPCA( + n_components=n_components, alpha=alpha, method=method, random_state=0 + ) + transformed_32 = model_32.fit_transform(input_array.astype(np.float32)) + + model_64 = SPCA( + n_components=n_components, alpha=alpha, method=method, random_state=0 + ) + transformed_64 = model_64.fit_transform(input_array.astype(np.float64)) + + assert_allclose(transformed_64, transformed_32, rtol=rtol) + assert_allclose(model_64.components_, model_32.components_, rtol=rtol) + + @pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA]) def test_spca_feature_names_out(SPCA): """Check feature names out for *SparsePCA."""