@@ -9,17 +9,18 @@ from numpy cimport ndarray
9
9
cimport numpy as np
10
10
11
11
np.import_array()
12
- ctypedef np.int32_t INDEX_T
12
+ ctypedef np.int32_t INDEX_A
13
+ ctypedef fused INDEX_B:
14
+ np.int32_t
15
+ np.int64_t
16
+ ctypedef np.int8_t FLAG_T
13
17
14
18
ctypedef fused DATA_T:
15
19
np.float32_t
16
20
np.float64_t
17
- np.int32_t
18
- np.int64_t
19
21
20
-
21
- cdef inline INDEX_T _deg2_column(INDEX_T d, INDEX_T i, INDEX_T j,
22
- INDEX_T interaction_only) nogil:
22
+ cdef inline INDEX_B _deg2_column(INDEX_B d, INDEX_B i, INDEX_B j,
23
+ FLAG_T interaction_only) nogil:
23
24
""" Compute the index of the column for a degree 2 expansion
24
25
25
26
d is the dimensionality of the input data, i and j are the indices
@@ -31,8 +32,8 @@ cdef inline INDEX_T _deg2_column(INDEX_T d, INDEX_T i, INDEX_T j,
31
32
return d * i - (i** 2 + i) / 2 + j
32
33
33
34
34
- cdef inline INDEX_T _deg3_column(INDEX_T d, INDEX_T i, INDEX_T j, INDEX_T k,
35
- INDEX_T interaction_only) nogil:
35
+ cdef inline INDEX_B _deg3_column(INDEX_B d, INDEX_B i, INDEX_B j, INDEX_B k,
36
+ FLAG_T interaction_only) nogil:
36
37
""" Compute the index of the column for a degree 3 expansion
37
38
38
39
d is the dimensionality of the input data, i, j and k are the indices
@@ -49,10 +50,14 @@ cdef inline INDEX_T _deg3_column(INDEX_T d, INDEX_T i, INDEX_T j, INDEX_T k,
49
50
50
51
51
52
def _csr_polynomial_expansion (ndarray[DATA_T , ndim = 1 ] data,
52
- ndarray[INDEX_T , ndim = 1 ] indices,
53
- ndarray[INDEX_T , ndim = 1 ] indptr,
54
- INDEX_T d , INDEX_T interaction_only ,
55
- INDEX_T degree ):
53
+ ndarray[INDEX_A , ndim = 1 ] indices,
54
+ ndarray[INDEX_A , ndim = 1 ] indptr,
55
+ INDEX_A d ,
56
+ ndarray[DATA_T , ndim = 1 ] result_data,
57
+ ndarray[INDEX_B , ndim = 1 ] result_indices,
58
+ ndarray[INDEX_B , ndim = 1 ] result_indptr,
59
+ FLAG_T interaction_only ,
60
+ FLAG_T degree ):
56
61
"""
57
62
Perform a second-degree polynomial or interaction expansion on a scipy
58
63
compressed sparse row (CSR) matrix. The method used only takes products of
@@ -74,6 +79,15 @@ def _csr_polynomial_expansion(ndarray[DATA_T, ndim=1] data,
74
79
d : int
75
80
The dimensionality of the input CSR matrix.
76
81
82
+ result_data : nd-array
83
+ The output CSR matrix's "data" attribute
84
+
85
+ result_indices : nd-array
86
+ The output CSR matrix's "indices" attribute
87
+
88
+ result_indptr : nd-array
89
+ The output CSR matrix's "indptr" attribute
90
+
77
91
interaction_only : int
78
92
0 for a polynomial expansion, 1 for an interaction expansion.
79
93
@@ -86,44 +100,13 @@ def _csr_polynomial_expansion(ndarray[DATA_T, ndim=1] data,
86
100
Matrices Using K-Simplex Numbers" by Andrew Nystrom and John Hughes.
87
101
"""
88
102
89
- assert degree in (2 , 3 )
90
-
91
- if degree == 2 :
92
- expanded_dimensionality = int ((d** 2 + d) / 2 - interaction_only* d)
93
- else :
94
- expanded_dimensionality = int ((d** 3 + 3 * d** 2 + 2 * d) / 6
95
- - interaction_only* d** 2 )
96
- if expanded_dimensionality == 0 :
97
- return None
98
- assert expanded_dimensionality > 0
99
-
100
- cdef INDEX_T total_nnz = 0 , row_i, nnz
101
-
102
- # Count how many nonzero elements the expanded matrix will contain.
103
- for row_i in range (indptr.shape[0 ]- 1 ):
104
- # nnz is the number of nonzero elements in this row.
105
- nnz = indptr[row_i + 1 ] - indptr[row_i]
106
- if degree == 2 :
107
- total_nnz += (nnz ** 2 + nnz) / 2 - interaction_only * nnz
108
- else :
109
- total_nnz += ((nnz ** 3 + 3 * nnz ** 2 + 2 * nnz) / 6
110
- - interaction_only * nnz ** 2 )
111
-
112
103
# Make the arrays that will form the CSR matrix of the expansion.
113
- cdef ndarray[DATA_T, ndim= 1 ] expanded_data = ndarray(
114
- shape = total_nnz, dtype = data.dtype)
115
- cdef ndarray[INDEX_T, ndim= 1 ] expanded_indices = ndarray(
116
- shape = total_nnz, dtype = indices.dtype)
117
- cdef INDEX_T num_rows = indptr.shape[0 ] - 1
118
- cdef ndarray[INDEX_T, ndim= 1 ] expanded_indptr = ndarray(
119
- shape = num_rows + 1 , dtype = indptr.dtype)
120
-
121
- cdef INDEX_T expanded_index = 0 , row_starts, row_ends, i, j, k, \
122
- i_ptr, j_ptr, k_ptr, num_cols_in_row, \
123
- expanded_column
104
+ cdef INDEX_A row_i, row_starts, row_ends, i, j, k, i_ptr, j_ptr, k_ptr
105
+
106
+ cdef INDEX_B expanded_index= 0 , num_cols_in_row, col
124
107
125
108
with nogil:
126
- expanded_indptr [0 ] = indptr[0 ]
109
+ result_indptr [0 ] = indptr[0 ]
127
110
for row_i in range (indptr.shape[0 ]- 1 ):
128
111
row_starts = indptr[row_i]
129
112
row_ends = indptr[row_i + 1 ]
@@ -133,9 +116,9 @@ def _csr_polynomial_expansion(ndarray[DATA_T, ndim=1] data,
133
116
for j_ptr in range (i_ptr + interaction_only, row_ends):
134
117
j = indices[j_ptr]
135
118
if degree == 2 :
136
- col = _deg2_column(d, i, j, interaction_only)
137
- expanded_indices [expanded_index] = col
138
- expanded_data [expanded_index] = (
119
+ col = _deg2_column[INDEX_B] (d, i, j, interaction_only)
120
+ result_indices [expanded_index] = col
121
+ result_data [expanded_index] = (
139
122
data[i_ptr] * data[j_ptr])
140
123
expanded_index += 1
141
124
num_cols_in_row += 1
@@ -144,14 +127,11 @@ def _csr_polynomial_expansion(ndarray[DATA_T, ndim=1] data,
144
127
for k_ptr in range (j_ptr + interaction_only,
145
128
row_ends):
146
129
k = indices[k_ptr]
147
- col = _deg3_column(d, i, j, k, interaction_only)
148
- expanded_indices [expanded_index] = col
149
- expanded_data [expanded_index] = (
130
+ col = _deg3_column[INDEX_B] (d, i, j, k, interaction_only)
131
+ result_indices [expanded_index] = col
132
+ result_data [expanded_index] = (
150
133
data[i_ptr] * data[j_ptr] * data[k_ptr])
151
134
expanded_index += 1
152
135
num_cols_in_row += 1
153
136
154
- expanded_indptr[row_i+ 1 ] = expanded_indptr[row_i] + num_cols_in_row
155
-
156
- return csr_matrix((expanded_data, expanded_indices, expanded_indptr),
157
- shape = (num_rows, expanded_dimensionality))
137
+ result_indptr[row_i+ 1 ] = result_indptr[row_i] + num_cols_in_row
0 commit comments