1
1
import numpy as np
2
2
import pytest
3
- from scipy .sparse import bsr_matrix , csc_matrix , csr_matrix
4
3
5
4
from sklearn .feature_selection import VarianceThreshold
6
5
from sklearn .utils ._testing import assert_array_equal
6
+ from sklearn .utils .fixes import BSR_CONTAINERS , CSC_CONTAINERS , CSR_CONTAINERS
7
7
8
8
data = [[0 , 1 , 2 , 3 , 4 ], [0 , 2 , 2 , 3 , 5 ], [1 , 1 , 2 , 4 , 0 ]]
9
9
10
10
data2 = [[- 0.13725701 ]] * 10
11
11
12
12
13
- def test_zero_variance ():
13
+ @pytest .mark .parametrize (
14
+ "sparse_container" , [None ] + BSR_CONTAINERS + CSC_CONTAINERS + CSR_CONTAINERS
15
+ )
16
+ def test_zero_variance (sparse_container ):
14
17
# Test VarianceThreshold with default setting, zero variance.
18
+ X = data if sparse_container is None else sparse_container (data )
19
+ sel = VarianceThreshold ().fit (X )
20
+ assert_array_equal ([0 , 1 , 3 , 4 ], sel .get_support (indices = True ))
15
21
16
- for X in [data , csr_matrix (data ), csc_matrix (data ), bsr_matrix (data )]:
17
- sel = VarianceThreshold ().fit (X )
18
- assert_array_equal ([0 , 1 , 3 , 4 ], sel .get_support (indices = True ))
19
22
23
+ def test_zero_variance_value_error ():
24
+ # Test VarianceThreshold with default setting, zero variance, error cases.
20
25
with pytest .raises (ValueError ):
21
26
VarianceThreshold ().fit ([[0 , 1 , 2 , 3 ]])
22
27
with pytest .raises (ValueError ):
23
28
VarianceThreshold ().fit ([[0 , 1 ], [0 , 1 ]])
24
29
25
30
26
- def test_variance_threshold ():
31
+ @pytest .mark .parametrize ("sparse_container" , [None ] + CSR_CONTAINERS )
32
+ def test_variance_threshold (sparse_container ):
27
33
# Test VarianceThreshold with custom variance.
28
- for X in [ data , csr_matrix (data )]:
29
- X = VarianceThreshold (threshold = 0.4 ).fit_transform (X )
30
- assert (len (data ), 1 ) == X .shape
34
+ X = data if sparse_container is None else sparse_container (data )
35
+ X = VarianceThreshold (threshold = 0.4 ).fit_transform (X )
36
+ assert (len (data ), 1 ) == X .shape
31
37
32
38
33
39
@pytest .mark .skipif (
@@ -37,25 +43,30 @@ def test_variance_threshold():
37
43
"as it relies on numerical instabilities."
38
44
),
39
45
)
40
- def test_zero_variance_floating_point_error ():
46
+ @pytest .mark .parametrize (
47
+ "sparse_container" , [None ] + BSR_CONTAINERS + CSC_CONTAINERS + CSR_CONTAINERS
48
+ )
49
+ def test_zero_variance_floating_point_error (sparse_container ):
41
50
# Test that VarianceThreshold(0.0).fit eliminates features that have
42
51
# the same value in every sample, even when floating point errors
43
52
# cause np.var not to be 0 for the feature.
44
53
# See #13691
54
+ X = data2 if sparse_container is None else sparse_container (data2 )
55
+ msg = "No feature in X meets the variance threshold 0.00000"
56
+ with pytest .raises (ValueError , match = msg ):
57
+ VarianceThreshold ().fit (X )
45
58
46
- for X in [data2 , csr_matrix (data2 ), csc_matrix (data2 ), bsr_matrix (data2 )]:
47
- msg = "No feature in X meets the variance threshold 0.00000"
48
- with pytest .raises (ValueError , match = msg ):
49
- VarianceThreshold ().fit (X )
50
59
51
-
52
- def test_variance_nan ():
60
+ @pytest .mark .parametrize (
61
+ "sparse_container" , [None ] + BSR_CONTAINERS + CSC_CONTAINERS + CSR_CONTAINERS
62
+ )
63
+ def test_variance_nan (sparse_container ):
53
64
arr = np .array (data , dtype = np .float64 )
54
65
# add single NaN and feature should still be included
55
66
arr [0 , 0 ] = np .nan
56
67
# make all values in feature NaN and feature should be rejected
57
68
arr [:, 1 ] = np .nan
58
69
59
- for X in [ arr , csr_matrix ( arr ), csc_matrix ( arr ), bsr_matrix (arr )]:
60
- sel = VarianceThreshold ().fit (X )
61
- assert_array_equal ([0 , 3 , 4 ], sel .get_support (indices = True ))
70
+ X = arr if sparse_container is None else sparse_container (arr )
71
+ sel = VarianceThreshold ().fit (X )
72
+ assert_array
33F8
_equal ([0 , 3 , 4 ], sel .get_support (indices = True ))
0 commit comments