7
7
# License: BSD 3 clause
8
8
9
9
from libc.math cimport fabs, sqrt, isnan
10
+ from libc.stdint cimport intptr_t
10
11
11
12
cimport numpy as cnp
12
13
import numpy as np
13
14
from cython cimport floating
15
+ from ..utils._typedefs cimport float64_t, int32_t, intp_t, uint64_t
14
16
15
17
cnp.import_array()
16
18
17
- ctypedef fused integral:
18
- int
19
- long long
20
19
21
- ctypedef cnp.float64_t DOUBLE
20
+ # This `integral` fused type is defined to be used over `cython.integral`
21
+ # to only generate implementations for `int{32,64}_t`.
22
+ # TODO: use `{int,float}{32,64}_t` when cython#5230 is resolved:
23
+ # https://github.com/cython/cython/issues/5230
24
+ ctypedef fused integral:
25
+ int # int32_t
26
+ long long # int64_t
22
27
23
28
24
29
def csr_row_norms (X ):
@@ -95,40 +100,40 @@ def csr_mean_variance_axis0(X, weights=None, return_sum_weights=False):
95
100
96
101
def _csr_mean_variance_axis0 (
97
102
const floating[::1] X_data ,
98
- unsigned long long n_samples ,
99
- unsigned long long n_features ,
103
+ uint64_t n_samples ,
104
+ uint64_t n_features ,
100
105
const integral[:] X_indices ,
101
106
const integral[:] X_indptr ,
102
107
const floating[:] weights ,
103
108
):
104
109
# Implement the function here since variables using fused types
105
110
# cannot be declared directly and can only be passed as function arguments
106
111
cdef:
107
- cnp. intp_t row_ind
108
- unsigned long long feature_idx
112
+ intp_t row_ind
113
+ uint64_t feature_idx
109
114
integral i, col_ind
110
- cnp. float64_t diff
115
+ float64_t diff
111
116
# means[j] contains the mean of feature j
112
- cnp. float64_t[::1 ] means = np.zeros(n_features)
117
+ float64_t[::1 ] means = np.zeros(n_features)
113
118
# variances[j] contains the variance of feature j
114
- cnp. float64_t[::1 ] variances = np.zeros(n_features)
119
+ float64_t[::1 ] variances = np.zeros(n_features)
115
120
116
- cnp. float64_t[::1 ] sum_weights = np.full(
121
+ float64_t[::1 ] sum_weights = np.full(
117
122
fill_value = np.sum(weights, dtype = np.float64), shape = n_features
118
123
)
119
- cnp. float64_t[::1 ] sum_weights_nz = np.zeros(shape = n_features)
120
- cnp. float64_t[::1 ] correction = np.zeros(shape = n_features)
124
+ float64_t[::1 ] sum_weights_nz = np.zeros(shape = n_features)
125
+ float64_t[::1 ] correction = np.zeros(shape = n_features)
121
126
122
- cnp. uint64_t[::1 ] counts = np.full(
127
+ uint64_t[::1 ] counts = np.full(
123
128
fill_value = weights.shape[0 ], shape = n_features, dtype = np.uint64
124
129
)
125
- cnp. uint64_t[::1 ] counts_nz = np.zeros(shape = n_features, dtype = np.uint64)
130
+ uint64_t[::1 ] counts_nz = np.zeros(shape = n_features, dtype = np.uint64)
126
131
127
132
for row_ind in range (len (X_indptr) - 1 ):
128
133
for i in range (X_indptr[row_ind], X_indptr[row_ind + 1 ]):
129
134
col_ind = X_indices[i]
130
135
if not isnan(X_data[i]):
131
- means[col_ind] += < cnp. float64_t> (X_data[i]) * weights[row_ind]
136
+ means[col_ind] += < float64_t> (X_data[i]) * weights[row_ind]
132
137
# sum of weights where X[:, col_ind] is non-zero
133
138
sum_weights_nz[col_ind] += weights[row_ind]
134
139
# number of non-zero elements of X[:, col_ind]
@@ -229,8 +234,8 @@ def csc_mean_variance_axis0(X, weights=None, return_sum_weights=False):
229
234
230
235
def _csc_mean_variance_axis0 (
231
236
const floating[::1] X_data ,
232
- unsigned long long n_samples ,
233
- unsigned long long n_features ,
237
+ uint64_t n_samples ,
238
+ uint64_t n_features ,
234
239
const integral[:] X_indices ,
235
240
const integral[:] X_indptr ,
236
241
const floating[:] weights ,
@@ -239,29 +244,29 @@ def _csc_mean_variance_axis0(
239
244
# cannot be declared directly and can only be passed as function arguments
240
245
cdef:
241
246
integral i, row_ind
242
- unsigned long long feature_idx, col_ind
243
- cnp. float64_t diff
247
+ uint64_t feature_idx, col_ind
248
+ float64_t diff
244
249
# means[j] contains the mean of feature j
245
- cnp. float64_t[::1 ] means = np.zeros(n_features)
250
+ float64_t[::1 ] means = np.zeros(n_features)
246
251
# variances[j] contains the variance of feature j
247
- cnp. float64_t[::1 ] variances = np.zeros(n_features)
252
+ float64_t[::1 ] variances = np.zeros(n_features)
248
253
249
- cnp. float64_t[::1 ] sum_weights = np.full(
254
+ float64_t[::1 ] sum_weights = np.full(
250
255
fill_value = np.sum(weights, dtype = np.float64), shape = n_features
251
256
)
252
- cnp. float64_t[::1 ] sum_weights_nz = np.zeros(shape = n_features)
253
- cnp. f
8000
loat64_t[::1 ] correction = np.zeros(shape = n_features)
257
+ float64_t[::1 ] sum_weights_nz = np.zeros(shape = n_features)
258
+ float64_t[::1 ] correction = np.zeros(shape = n_features)
254
259
255
- cnp. uint64_t[::1 ] counts = np.full(
260
+ uint64_t[::1 ] counts = np.full(
256
261
fill_value = weights.shape[0 ], shape = n_features, dtype = np.uint64
257
262
)
258
- cnp. uint64_t[::1 ] counts_nz = np.zeros(shape = n_features, dtype = np.uint64)
263
+ uint64_t[::1 ] counts_nz = np.zeros(shape = n_features, dtype = np.uint64)
259
264
260
265
for col_ind in range (n_features):
261
266
for i in range (X_indptr[col_ind], X_indptr[col_ind + 1 ]):
262
267
row_ind = X_indices[i]
263
268
if not isnan(X_data[i]):
264
- means[col_ind] += < cnp. float64_t> (X_data[i]) * weights[row_ind]
269
+ means[col_ind] += < float64_t> (X_data[i]) * weights[row_ind]
265
270
# sum of weights where X[:, col_ind] is non-zero
266
271
sum_weights_nz[col_ind] += weights[row_ind]
267
272
# number of non-zero elements of X[:, col_ind]
@@ -387,9 +392,9 @@ def incr_mean_variance_axis0(X, last_mean, last_var, last_n, weights=None):
387
392
def _incr_mean_variance_axis0 (
388
393
const floatin
4839
g[:] X_data ,
389
394
floating n_samples ,
390
- unsigned long long n_features ,
395
+ uint64_t n_features ,
391
396
const int[:] X_indices ,
392
- # X_indptr might be either in32 or int64
397
+ # X_indptr might be either int32 or int64
393
398
const integral[:] X_indptr ,
394
399
str X_format ,
395
400
floating[:] last_mean ,
@@ -401,7 +406,7 @@ def _incr_mean_variance_axis0(
401
406
# Implement the function here since variables using fused types
402
407
# cannot be declared directly and can only be passed as function arguments
403
408
cdef:
404
- unsigned long long i
409
+ uint64_t i
405
410
406
411
# last = stats until now
407
412
# new = the current increment
@@ -493,13 +498,13 @@ def _inplace_csr_row_normalize_l1(
493
498
const integral[:] X_indptr ,
494
499
):
495
500
cdef:
496
- unsigned long long n_samples = shape[0 ]
501
+ uint64_t n_samples = shape[0 ]
497
502
498
503
# the column indices for row i are stored in:
499
504
# indices[indptr[i]:indices[i+1]]
500
505
# and their corresponding values are stored in:
501
506
# data[indptr[i]:indptr[i+1]]
502
- unsigned long long i
507
+ uint64_t i
503
508
integral j
504
509
double sum_
505
510
@@ -530,8 +535,8 @@ def _inplace_csr_row_normalize_l2(
530
535
const integral[:] X_indptr ,
531
536
):
532
537
cdef:
533
- unsigned long long n_samples = shape[0 ]
534
- unsigned long long i
538
+ uint64_t n_samples = shape[0 ]
539
+ uint64_t i
535
540
integral j
536
541
double sum_
537
542
@@ -554,8 +559,8 @@ def _inplace_csr_row_normalize_l2(
554
559
555
560
def assign_rows_csr (
556
561
X ,
557
- const cnp.npy_intp [:] X_rows ,
558
- const cnp.npy_intp [:] out_rows ,
562
+ const intptr_t [:] X_rows ,
563
+ const intptr_t [:] out_rows ,
559
564
floating[:, ::1] out ,
560
565
):
561
566
""" Densify selected rows of a CSR matrix into a preallocated array.
@@ -571,12 +576,13 @@ def assign_rows_csr(
571
576
out : array, shape=(arbitrary, n_features)
572
577
"""
573
578
cdef:
574
- # npy_intp ( np.intp in Python) is what np.where returns,
579
+ # intptr_t (npy_intp, np.intp in Python) is what np.where returns,
575
580
# but int is what scipy.sparse uses.
576
- int i, ind, j, k
577
- cnp.npy_intp rX
581
+ intp_t i, ind, j, k
582
+ intptr_t rX
578
583
const floating[:] data = X.data
579
- const int [:] indices = X.indices, indptr = X.indptr
584
+ const int32_t[:] indices = X.indices
585
+ const int32_t[:] indptr = X.indptr
580
586
581
587
if X_rows.shape[0 ] != out_rows.shape[0 ]:
582
588
raise ValueError (" cannot assign %d rows to %d "
0 commit comments