8000 TST Extend tests for `scipy.sparse.*array` in `sklearn/cluster/tests/test_bicluster.py` by Kislovskiy · Pull Request #27093 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

TST Extend tests for scipy.sparse.*array in sklearn/cluster/tests/test_bicluster.py #27093

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 18, 2023
33 changes: 20 additions & 13 deletions sklearn/cluster/tests/test_bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
import pytest
from scipy.sparse import csr_matrix, issparse
from scipy.sparse import issparse

from sklearn.base import BaseEstimator, BiclusterMixin
from sklearn.cluster import SpectralBiclustering, SpectralCoclustering
Expand All @@ -19,6 +19,7 @@
assert_array_almost_equal,
assert_array_equal,
)
from sklearn.utils.fixes import CSR_CONTAINERS


class MockBiclustering(BiclusterMixin, BaseEstimator):
Expand All @@ -34,11 +35,12 @@ def get_indices(self, i):
)


def test_get_submatrix():
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_get_submatrix(csr_container):
data = np.arange(20).reshape(5, 4)
model = MockBiclustering()

for X in (data, csr_matrix(data), data.tolist()):
for X in (data, csr_container(data), data.tolist()):
submatrix = model.get_submatrix(0, X)
if issparse(submatrix):
submatrix = submatrix.toarray()
Expand All @@ -58,7 +60,8 @@ def _test_shape_indices(model):
assert len(j_ind) == n


def test_spectral_coclustering(global_random_seed):
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_spectral_coclustering(global_random_seed, csr_container):
# Test Dhillon's Spectral CoClustering on a simple problem.
param_grid = {
"svd_method": ["randomized", "arpack"],
Expand All @@ -72,7 +75,7 @@ def test_spectral_coclustering(global_random_seed):
)
S -= S.min() # needs to be nonnegative before making it sparse
S = np.where(S < 1, 0, S) # threshold some values
for mat in (S, csr_matrix(S)):
for mat in (S, csr_container(S)):
for kwargs in ParameterGrid(param_grid):
model = SpectralCoclustering(
n_clusters=3, random_state=global_random_seed, **kwargs
Expand All @@ -87,7 +90,8 @@ def test_spectral_coclustering(global_random_seed):
_test_shape_indices(model)


def test_spectral_biclustering(global_random_seed):
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_spectral_biclustering(global_random_seed, csr_container):
# Test Kluger methods on a checkerboard dataset.
S, rows, cols = make_checkerboard(
(30, 30), 3, noise=0.5, random_state=global_random_seed
Expand All @@ -100,7 +104,7 @@ def test_spectral_biclustering(global_random_seed):
"mini_batch": [True],
}

for mat in (S, csr_matrix(S)):
for mat in (S, csr_container(S)):
for param_name, param_values in non_default_params.items():
for param_value in param_values:
model = SpectralBiclustering(
Expand Down Expand Up @@ -145,20 +149,22 @@ def _do_bistochastic_test(scaled):
assert_almost_equal(scaled.sum(axis=0).mean(), scaled.sum(axis=1).mean(), decimal=1)


def test_scale_normalize(global_random_seed):
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_scale_normalize(global_random_seed, csr_container):
generator = np.random.RandomState(global_random_seed)
X = generator.rand(100, 100)
for mat in (X, csr_matrix(X)):
for mat in (X, csr_container(X)):
scaled, _, _ = _scale_normalize(mat)
_do_scale_test(scaled)
if issparse(mat):
assert issparse(scaled)


def test_bistochastic_normalize(global_random_seed):
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_bistochastic_normalize(global_random_seed, csr_container):
generator = np.random.RandomState(global_random_seed)
X = generator.rand(100, 100)
for mat in (X, csr_matrix(X)):
for mat in (X, csr_container(X)):
scaled = _bistochastic_normalize(mat)
_do_bistochastic_test(scaled)
if issparse(mat):
Expand All @@ -181,11 +187,12 @@ def test_fit_best_piecewise(global_random_seed):
assert_array_equal(best, vectors[:2])


def test_project_and_cluster(global_random_seed):
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_project_and_cluster(global_random_seed, csr_container):
model = SpectralBiclustering(random_state=global_random_seed)
data = np.array([[1, 1, 1], [1, 1, 1], [3, 6, 3], [3, 6, 3]])
vectors = np.array([[1, 0], [0, 1], [0, 0]])
for mat in (data, csr_matrix(data)):
for mat in (data, csr_container(data)):
labels = model._project_and_cluster(mat, vectors, n_clusters=2)
assert_almost_equal(v_measure_score(labels, [0, 0, 1, 1]), 1.0)

Expand Down
0