40
40
ignore_warnings ,
41
41
skip_if_32bit ,
42
42
)
43
+ from sklearn .utils .fixes import CSR_CONTAINERS , LIL_CONTAINERS
43
44
44
45
x = np .linspace (0 , 1 , 10 )
45
46
xx , yy = np .meshgrid (x , x )
@@ -336,14 +337,15 @@ def test_optimization_minimizes_kl_divergence():
336
337
337
338
338
339
@pytest .mark .parametrize ("method" , ["exact" , "barnes_hut" ])
339
- def test_fit_transform_csr_matrix (method ):
340
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
341
+ def test_fit_transform_csr_matrix (method , csr_container ):
340
342
# TODO: compare results on dense and sparse data as proposed in:
341
343
# https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
342
344
# X can be a sparse matrix.
343
345
rng = check_random_state (0 )
344
346
X = rng .randn (50 , 2 )
345
347
X [(rng .randint (0 , 50 , 25 ), rng .randint (0 , 2 , 25 ))] = 0.0
346
- X_csr = sp . csr_matrix (X )
348
+ X_csr = csr_container (X )
347
349
tsne = TSNE (
348
350
n_components = 2 ,
349
351
init = "random" ,
@@ -394,7 +396,7 @@ def test_trustworthiness_not_euclidean_metric():
394
396
[
395
397
("exact" , np .asarray ),
396
398
("barnes_hut" , np .asarray ),
397
- ("barnes_hut" , sp . csr_matrix ) ,
399
+ * [ ("barnes_hut" , csr_container ) for csr_container in CSR_CONTAINERS ] ,
398
400
],
399
401
)
400
402
@pytest .mark .parametrize (
@@ -416,7 +418,8 @@ def test_bad_precomputed_distances(method, D, retype, message_regex):
416
418
tsne .fit_transform (retype (D ))
417
419
418
420
419
- def test_exact_no_precomputed_sparse ():
421
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
422
+ def test_exact_no_precomputed_sparse (csr_container ):
420
423
tsne = TSNE (
421
424
metric = "precomputed" ,
422
425
method = "exact" ,
@@ -425,21 +428,23 @@ def test_exact_no_precomputed_sparse():
425
428
perplexity = 1 ,
426
429
)
427
430
with pytest .raises (TypeError , match = "sparse" ):
428
- tsne .fit_transform (sp . csr_matrix ([[0 , 5 ], [5 , 0 ]]))
431
+ tsne .fit_transform (csr_container ([[0 , 5 ], [5 , 0 ]]))
429
432
430
433
431
- def test_high_perplexity_precomputed_sparse_distances ():
434
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
435
+ def test_high_perplexity_precomputed_sparse_distances (csr_container ):
432
436
# Perplexity should be less than 50
433
437
dist = np .array ([[1.0 , 0.0 , 0.0 ], [0.0 , 1.0 , 0.0 ], [1.0 , 0.0 , 0.0 ]])
434
- bad_dist = sp . csr_matrix (dist )
438
+ bad_dist = csr_container (dist )
435
439
tsne = TSNE (metric = "precomputed" , init = "random" , random_state = 42 , perplexity = 1 )
436
440
msg = "3 neighbors per samples are required, but some samples have only 1"
437
441
with pytest .raises (ValueError , match = msg ):
438
442
tsne .fit_transform (bad_dist )
439
443
440
444
441
445
@ignore_warnings (category = EfficiencyWarning )
442
- def test_sparse_precomputed_distance ():
446
+ @pytest .mark .parametrize ("sparse_container" , CSR_CONTAINERS + LIL_CONTAINERS )
447
+ def test_sparse_precomputed_distance (sparse_container ):
443
448
"""Make sure that TSNE works identically for sparse and dense matrix"""
444
449
random_state = check_random_state (0 )
445
450
X = random_state .randn (100 , 2 )
@@ -454,9 +459,8 @@ def test_sparse_precomputed_distance():
454
459
)
455
460
Xt_dense = tsne .fit_transform (D )
456
461
457
- for fmt in ["csr" , "lil" ]:
458
- Xt_sparse = tsne .fit_transform (D_sparse .asformat (fmt ))
459
- assert_almost_equal (Xt_dense , Xt_sparse )
462
+ Xt_sparse = tsne .fit_transform (sparse_container (D_sparse ))
463
+ assert_almost_equal (Xt_dense , Xt_sparse )
460
464
461
465
462
466
def test_non_positive_computed_distances ():
@@ -499,11 +503,12 @@ def test_pca_initialization_not_compatible_with_precomputed_kernel():
499
503
tsne .fit_transform (np .array ([[0.0 ], [1.0 ]]))
500
504
501
505
502
- def test_pca_initialization_not_compatible_with_sparse_input ():
506
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
507
+ def test_pca_initialization_not_compatible_with_sparse_input (csr_container ):
503
508
# Sparse input matrices cannot use PCA initialization.
504
509
tsne = TSNE (init = "pca" , learning_rate = 100.0 , perplexity = 1 )
505
510
with pytest .raises (TypeError , match = "PCA initialization.*" ):
506
- tsne .fit_transform (sp . csr_matrix ([[0 , 5 ], [5 , 0 ]]))
511
+ tsne .fit_transform (csr_container ([[0 , 5 ], [5 , 0 ]]))
507
512
508
513
509
514
def test_n_components_range ():
@@ -569,7 +574,8 @@ def test_n_iter_used():
569
574
assert tsne .n_iter_ == n_iter - 1
570
575
571
576
572
- def test_answer_gradient_two_points ():
577
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
578
+ def test_answer_gradient_two_points (csr_container ):
573
579
# Test the tree with only a single set of children.
574
580
#
575
581
# These tests & answers have been checked against the reference
@@ -582,10 +588,11 @@ def test_answer_gradient_two_points():
582
588
grad_output = np .array (
583
589
[[- 2.37012478e-05 , - 6.29044398e-05 ], [2.37012478e-05 , 6.29044398e-05 ]]
584
590
)
585
- _run_answer_test (pos_input , pos_output , neighbors , grad_output )
591
+ _run_answer_test (pos_input , pos_output , neighbors , grad_output , csr_container )
586
592
587
593
588
- def test_answer_gradient_four_points ():
594
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
595
+ def test_answer_gradient_four_points (csr_container ):
589
596
# Four points tests the tree with multiple levels of children.
590
597
#
591
598
# These tests & answers have been checked against the reference
@@ -608,10 +615,11 @@ def test_answer_gradient_four_points():
608
615
[- 2.58720939e-09 , 7.52706374e-09 ],
609
616
]
610
617
)
611
- _run_answer_test (pos_input , pos_output , neighbors , grad_output )
618
+ _run_answer_test (pos_input , pos_output , neighbors , grad_output , csr_container )
612
619
613
620
614
- def test_skip_num_points_gradient ():
621
+ @pytest .mark .parametrize ("csr_container" , CSR_CONTAINERS )
622
+ def test_skip_num_points_gradient (csr_container ):
615
623
# Test the kwargs option skip_num_points.
616
624
#
617
625
# Skip num points should make it such that the Barnes_hut gradient
@@ -637,14 +645,17 @@ def test_skip_num_points_gradient():
637
645
[- 2.58720939e-09 , 7.52706374e-09 ],
638
646
]
639
647
)
640
- _run_answer_test (pos_input , pos_output , neighbors , grad_output , False , 0.1 , 2 )
648
+ _run_answer_test (
649
+ pos_input , pos_output , neighbors , grad_output , csr_container , False , 0.1 , 2
650
+ )
641
651
642
652
643
653
def _run_answer_test (
644
654
pos_input ,
645
655
pos_output ,
646
656
neighbors ,
647
657
grad_output ,
658
+ csr_container ,
648
659
verbose = False ,
649
660
perplexity = 0.1 ,
650
661
skip_num_points = 0 ,
@@ -657,9 +668,7 @@ def _run_answer_test(
657
668
pij_input = squareform (pij_input ).astype (np .float32 )
658
669
grad_bh = np .zeros (pos_output .shape , dtype = np .float32 )
659
670
660
- from scipy .sparse import csr_matrix
661
-
662
- P = csr_matrix (pij_input )
671
+ P = csr_container (pij_input )
663
672
664
673
neighbors = P .indices .astype (np .int64 )
665
674
indptr = P .indptr .astype (np .int64 )
0 commit comments