8000 #5778: Harder test cases for sparse min and max, wrote csc impl · scikit-learn/scikit-learn@97385f8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 97385f8

Browse files
committed
#5778: Harder test cases for sparse min and max, wrote csc impl
1 parent 0643a49 commit 97385f8

File tree

3 files changed

+89
-15
lines changed

3 files changed

+89
-15
lines changed

sklearn/preprocessing/tests/test_discretizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
def test_discretizer_bad_n_bins():
1212
try:
13-
dis = Discretizer(n_bins=1)
13+
dis = Discretizer(n_bins=1).fit(X)
1414
except:
1515
return
1616
else:

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,62 @@ np.import_array()
1717

1818
ctypedef np.float64_t DOUBLE
1919

20+
@cython.boundscheck(False)
21+
@cython.wraparound(False)
22+
@cython.cdivision(True)
23+
def csc_col_min(X):
24+
"""Minimum of each element of X down axis 0.
25+
Equivalent to calling X.min(axis=0)
26+
"""
27+
if type(X) is not sp.csc_matrix:
28+
raise ValueError("Invalid matrix format being used for csc_col_min.")
29+
30+
cdef:
31+
unsigned int n_features = X.shape[1]
32+
unsigned int[::1] col_ptr = X.indptr
33+
double[::1] data = X.data
34+
double[::1] minimum = np.zeros(n_features)
35+
36+
unsigned int i, j
37+
38+
for i in range(X.nnz - 1):
39+
start = col_ptr[i]
40+
end = col_ptr[i + 1]
41+
42+
# Iterate downwards through matrix
43+
for j in range(start, end):
44+
minimum[i] = double_min(minimum[i], data[j])
45+
46+
return minimum
47+
48+
@cython.boundscheck(False)
49+
@cython.wraparound(False)
50+
@cython.cdivision(True)
51+
def csc_col_min(X):
52+
"""Minimum of each element of X down axis 0.
53+
Equivalent to calling X.min(axis=0)
54+
"""
55+
if type(X) is not sp.csc_matrix:
56+
raise ValueError("Invalid matrix format being used for csc_col_min.")
57+
58+
cdef:
59+
unsigned int n_features = X.shape[1]
60+
unsigned int[::1] col_ptr = X.indptr
61+
double[::1] data = X.data
62+
double[::1] maximum = np.zeros(n_features)
63+
64+
unsigned int i, j
65+
66+
for i in range(X.nnz - 1):
67+
start = col_ptr[i]
68+
end = col_ptr[i + 1]
69+
70+
# Iterate downwards through matrix
71+
for j in range(start, end):
72+
maximum[i] = double_max(maximum[i], data[j])
73+
74+
return maximum
75+
2076
@cython.boundscheck(False)
2177
@cython.wraparound(False)
2278
@cython.cdivision(True)
@@ -35,7 +91,7 @@ def csr_col_min(X):
3591

3692
int col_ind, index
3793
double num
38-
for index in range(X.data.shape[0]):
94+
for index in range(X.nnz):
3995
col_ind = col_indices[index]
4096
num = data[index]
4197
minimum[col_ind] = double_min(num, minimum[col_ind])
@@ -60,7 +116,7 @@ def csr_col_max(X):
60116

61117
int col_ind, index
62118
double num
63-
for index in range(X.data.shape[0]):
119+
for index in range(X.nnz):
64120
col_ind = col_indices[index]
65121
num = data[index]
66122
maximum[col_ind] = double_max(num, maximum[col_ind])

sklearn/utils/tests/test_sparsefuncs.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,38 +14,56 @@
1414
inplace_swap_row, inplace_swap_column,
1515
min_max_axis,
1616
count_nonzero, csc_median_axis_0,
17-
csr_col_min, csr_col_max)
17+
csr_col_min, csr_col_max,
18+
csc_col_min, csc_col_max)
1819
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
1920
from sklearn.utils.testing import assert_raises
2021

21-
X = [[0, 71, 0, 0, 0, 0],
22-
[0, 0, -2, 0, 4, 0],
23-
[99, 0, 0, 0, 0, 0],
24-
[72, -1, 0, 0, 0, 73]]
22+
X = np.array([[-67.8, 71. , 0. , 0. , 0. , 74.5],
23+
[ 0. , 0. , -2. , 0. , 4. , 0. ],
24+
[ 99.8, 0. , 0. , 0. , 4.5, 0. ],
25+
[ 0. , 0. , 0. , 0. , 0. , 0. ],
26+
[ 72. , -1. , 0. , 0. , 0. , 73. ]])
27+
def test_csr_csc_col_min():
28+
expected = [-67.8, -1, -2, 0, 4.5, 0]
2529

26-
def test_csr_col_min():
2730
sparse = sp.csr_matrix(X)
28-
29-
expected = [0, -1, -2, 0, 0, 0]
3031
minimum = csr_col_min(sparse)
3132
assert_array_equal(expected, minimum)
3233

34+
sparse = sp.csc_matrix(X)
35+
minimum = csc_col_min(sparse)
36+
assert_array_equal(expected, minimum)
37+
3338
def test_csr_col_min_error():
3439
try:
35-
sparse = sp.csc_matrix(X)
40+
sparse = sp.csr_matrix(X)
3641
except ValueError:
3742
pass
3843
else:
3944
raise ValueError("Error wasn't thrown for bad csr_matrix input "
4045
"to csr_col_min.")
4146

42-
def test_csr_col_max():
43-
sparse = sp.csr_matrix(X)
47+
def test_csr_col_min_error():
48+
try:
49+
sparse = sp.csr_matrix(X)
50+
except ValueError:
51+
pass
52+
else:
53+
raise ValueError("Error wasn't thrown for bad csr_matrix input "
54+
"to csr_col_min.")
4455

45-
expected = [99, 71, 0, 0, 4, 73]
56+
def test_csr_csc_col_max():
57+
expected = [99.8, 71, 0, 0, 4.5, 74.5]
58+
59+
sparse = sp.csr_matrix(X)
4660
maximum = csr_col_max(sparse)
4761
assert_array_equal(expected, maximum)
4862

63+
sparse = sp.csc_matrix(X)
64+
maximum = csc_col_max(sparse)
65+
assert_array_equal(expected, maximum)
66+
4967
def test_csr_col_max_error():
5068
try:
5169
sparse = sp.csc_matrix(X)

0 commit comments

Comments
 (0)
0