8000 TST Extend tests for `scipy.sparse.*array` in `sklearn/manifold/tests… · scikit-learn/scikit-learn@0169bde · GitHub
[go: up one dir, main page]

Skip to content

Commit 0169bde

Browse files
Tialoglemaitre
andauthored
TST Extend tests for scipy.sparse.*array in sklearn/manifold/tests/test_t_sne.py (#27221)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
1 parent 1fe5350 commit 0169bde

File tree

1 file changed

+31
-22
lines changed

1 file changed

+31
-22
lines changed

sklearn/manifold/tests/test_t_sne.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
ignore_warnings,
4141
skip_if_32bit,
4242
)
43+
from sklearn.utils.fixes import CSR_CONTAINERS, LIL_CONTAINERS
4344

4445
x = np.linspace(0, 1, 10)
4546
xx, yy = np.meshgrid(x, x)
@@ -336,14 +337,15 @@ def test_optimization_minimizes_kl_divergence():
336337

337338

338339
@pytest.mark.parametrize("method", ["exact", "barnes_hut"])
339-
def test_fit_transform_csr_matrix(method):
340+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
341+
def test_fit_transform_csr_matrix(method, csr_container):
340342
# TODO: compare results on dense and sparse data as proposed in:
341343
# https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
342344
# X can be a sparse matrix.
343345
rng = check_random_state(0)
344346
X = rng.randn(50, 2)
345347
X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
346-
X_csr = sp.csr_matrix(X)
348+
X_csr = csr_container(X)
347349
tsne = TSNE(
348350
n_components=2,
349351
init="random",
@@ -394,7 +396,7 @@ def test_trustworthiness_not_euclidean_metric():
394396
[
395397
("exact", np.asarray),
396398
("barnes_hut", np.asarray),
397-
("barnes_hut", sp.csr_matrix),
399+
*[("barnes_hut", csr_container) for csr_container in CSR_CONTAINERS],
398400
],
399401
)
400402
@pytest.mark.parametrize(
@@ -416,7 +418,8 @@ def test_bad_precomputed_distances(method, D, retype, message_regex):
416418
tsne.fit_transform(retype(D))
417419

418420

419-
def test_exact_no_precomputed_sparse():
421+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
422+
def test_exact_no_precomputed_sparse(csr_container):
420423
tsne = TSNE(
421424
metric="precomputed",
422425
method="exact",
@@ -425,21 +428,23 @@ def test_exact_no_precomputed_sparse():
425428
perplexity=1,
426429
)
427430
with pytest.raises(TypeError, match="sparse"):
428-
tsne.fit_transform(sp.csr_matrix([[0, 5], [5, 0]]))
431+
tsne.fit_transform(csr_container([[0, 5], [5, 0]]))
429432

430433

431-
def test_high_perplexity_precomputed_sparse_distances():
434+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
435+
def test_high_perplexity_precomputed_sparse_distances(csr_container):
432436
# Perplexity should be less than 50
433437
dist = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]])
434-
bad_dist = sp.csr_matrix(dist)
438+
bad_dist = csr_container(dist)
435439
tsne = TSNE(metric="precomputed", init="random", random_state=42, perplexity=1)
436440
msg = "3 neighbors per samples are required, but some samples have only 1"
437441
with pytest.raises(ValueError, match=msg):
438442
tsne.fit_transform(bad_dist)
439443

440444

441445
@ignore_warnings(category=EfficiencyWarning)
442-
def test_sparse_precomputed_distance():
446+
@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + LIL_CONTAINERS)
447+
def test_sparse_precomputed_distance(sparse_container):
443448
"""Make sure that TSNE works identically for sparse and dense matrix"""
444449
random_state = check_random_state(0)
445450
X = random_state.randn(100, 2)
@@ -454,9 +459,8 @@ def test_sparse_precomputed_distance():
454459
)
455460
Xt_dense = tsne.fit_transform(D)
456461

457-
for fmt in ["csr", "lil"]:
458-
Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
459-
assert_almost_equal(Xt_dense, Xt_sparse)
462+
Xt_sparse = tsne.fit_transform(sparse_container(D_sparse))
463+
assert_almost_equal(Xt_dense, Xt_sparse)
460464

461465

462466
def test_non_positive_computed_distances():
@@ -499,11 +503,12 @@ def test_pca_initialization_not_compatible_with_precomputed_kernel():
499503
tsne.fit_transform(np.array([[0.0], [1.0]]))
500504

501505

502-
def test_pca_initialization_not_compatible_with_sparse_input():
506+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
507+
def test_pca_initialization_not_compatible_with_sparse_input(csr_container):
503508
# Sparse input matrices cannot use PCA initialization.
504509
tsne = TSNE(init="pca", learning_rate=100.0, perplexity=1)
505510
with pytest.raises(TypeError, match="PCA initialization.*"):
506-
tsne.fit_transform(sp.csr_matrix([[0, 5], [5, 0]]))
511+
tsne.fit_transform(csr_container([[0, 5], [5, 0]]))
507512

508513

509514
def test_n_components_range():
@@ -569,7 +574,8 @@ def test_n_iter_used():
569574
assert tsne.n_iter_ == n_iter - 1
570575

571576

572-
def test_answer_gradient_two_points():
577+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
578+
def test_answer_gradient_two_points(csr_container):
573579
# Test the tree with only a single set of children.
574580
#
575581
# These tests & answers have been checked against the reference
@@ -582,10 +588,11 @@ def test_answer_gradient_two_points():
582588
grad_output = np.array(
583589
[[-2.37012478e-05, -6.29044398e-05], [2.37012478e-05, 6.29044398e-05]]
584590
)
585-
_run_answer_test(pos_input, pos_output, neighbors, grad_output)
591+
_run_answer_test(pos_input, pos_output, neighbors, grad_output, csr_container)
586592

587593

588-
def test_answer_gradient_four_points():
594+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
595+
def test_answer_gradient_four_points(csr_container):
589596
# Four points tests the tree with multiple levels of children.
590597
#
591598
# These tests & answers have been checked against the reference
@@ -608,10 +615,11 @@ def test_answer_gradient_four_points():
608615
[-2.58720939e-09, 7.52706374e-09],
609616
]
610617
)
611-
_run_answer_test(pos_input, pos_output, neighbors, grad_output)
618+
_run_answer_test(pos_input, pos_output, neighbors, grad_output, csr_container)
612619

613620

614-
def test_skip_num_points_gradient():
621+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
622+
def test_skip_num_points_gradient(csr_container):
615623
# Test the kwargs option skip_num_points.
616624
#
617625
# Skip num points should make it such that the Barnes_hut gradient
@@ -637,14 +645,17 @@ def test_skip_num_points_gradient():
637645
[-2.58720939e-09, 7.52706374e-09],
638646
]
639647
)
640-
_run_answer_test(pos_input, pos_output, neighbors, grad_output, False, 0.1, 2)
648+
_run_answer_test(
649+
pos_input, pos_output, neighbors, grad_output, csr_container, False, 0.1, 2
650+
)
641651

642652

643653
def _run_answer_test(
644654
pos_input,
645655
pos_output,
646656
neighbors,
647657
grad_output,
658+
csr_container,
648659
verbose=False,
649660
perplexity=0.1,
650661
skip_num_points=0,
@@ -657,9 +668,7 @@ def _run_answer_test(
657668
pij_input = squareform(pij_input).astype(np.float32)
658669
grad_bh = np.zeros(pos_output.shape, dtype=np.float32)
659670

660-
from scipy.sparse import csr_matrix
661-
662-
P = csr_matrix(pij_input)
671+
P = csr_container(pij_input)
663672

664673
neighbors = P.indices.astype(np.int64)
665674
indptr = P.indptr.astype(np.int64)

0 commit comments

Comments
 (0)
0