8000 MAINT cython typedefs in sparsefuncs_fast (#27333) · REDVM/scikit-learn@925ce0f · GitHub
[go: up one dir, main page]

Skip to content

Commit 925ce0f

Browse files
lorentzenchrglemaitre
authored andcommitted
MAINT cython typedefs in sparsefuncs_fast (scikit-learn#27333)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
1 parent 10f8723 commit 925ce0f

File tree

3 files changed

+50
-42
lines changed

3 files changed

+50
-42
lines changed

sklearn/utils/_typedefs.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# NOTE: Extend this list as needed when converting more cython extensions.
1717
ctypedef unsigned char uint8_t
1818
ctypedef unsigned int uint32_t
19+
ctypedef unsigned long long uint64_t
1920
ctypedef Py_ssize_t intp_t
2021
ctypedef float float32_t
2122
ctypedef double float64_t

sklearn/utils/_typedefs.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import numpy as np
99
ctypedef fused testing_type_t:
1010
uint8_t
1111
uint32_t
12+
uint64_t
1213
intp_t
1314
float32_t
1415
float64_t

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,23 @@
77
# License: BSD 3 clause
88

99
from libc.math cimport fabs, sqrt, isnan
10+
from libc.stdint cimport intptr_t
1011

1112
cimport numpy as cnp
1213
import numpy as np
1314
from cython cimport floating
15+
from ..utils._typedefs cimport float64_t, int32_t, intp_t, uint64_t
1416

1517
cnp.import_array()
1618

17-
ctypedef fused integral:
18-
int
19-
long long
2019

21-
ctypedef cnp.float64_t DOUBLE
20+
# This `integral` fused type is defined to be used over `cython.integral`
21+
# to only generate implementations for `int{32,64}_t`.
22+
# TODO: use `{int,float}{32,64}_t` when cython#5230 is resolved:
23+
# https://github.com/cython/cython/issues/5230
24+
ctypedef fused integral:
25+
int # int32_t
26+
long long # int64_t
2227

2328

2429
def csr_row_norms(X):
@@ -95,40 +100,40 @@ def csr_mean_variance_axis0(X, weights=None, return_sum_weights=False):
95100

96101
def _csr_mean_variance_axis0(
97102
const floating[::1] X_data,
98-
unsigned long long n_samples,
99-
unsigned long long n_features,
103+
uint64_t n_samples,
104+
uint64_t n_features,
100105
const integral[:] X_indices,
101106
const integral[:] X_indptr,
102107
const floating[:] weights,
103108
):
104109
# Implement the function here since variables using fused types
105110
# cannot be declared directly and can only be passed as function arguments
106111
cdef:
107-
cnp.intp_t row_ind
108-
unsigned long long feature_idx
112+
intp_t row_ind
113+
uint64_t feature_idx
109114
integral i, col_ind
110-
cnp.float64_t diff
115+
float64_t diff
111116
# means[j] contains the mean of feature j
112-
cnp.float64_t[::1] means = np.zeros(n_features)
117+
float64_t[::1] means = np.zeros(n_features)
113118
# variances[j] contains the variance of feature j
114-
cnp.float64_t[::1] variances = np.zeros(n_features)
119+
float64_t[::1] variances = np.zeros(n_features)
115120

116-
cnp.float64_t[::1] sum_weights = np.full(
121+
float64_t[::1] sum_weights = np.full(
117122
fill_value=np.sum(weights, dtype=np.float64), shape=n_features
118123
)
119-
cnp.float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
120-
cnp.float64_t[::1] correction = np.zeros(shape=n_features)
124+
float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
125+
float64_t[::1] correction = np.zeros(shape=n_features)
121126

122-
cnp.uint64_t[::1] counts = np.full(
127+
uint64_t[::1] counts = np.full(
123128
fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
124129
)
125-
cnp.uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
130+
uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
126131

127132
for row_ind in range(len(X_indptr) - 1):
128133
for i in range(X_indptr[row_ind], X_indptr[row_ind + 1]):
129134
col_ind = X_indices[i]
130135
if not isnan(X_data[i]):
131-
means[col_ind] += <cnp.float64_t>(X_data[i]) * weights[row_ind]
136+
means[col_ind] += <float64_t>(X_data[i]) * weights[row_ind]
132137
# sum of weights where X[:, col_ind] is non-zero
133138
sum_weights_nz[col_ind] += weights[row_ind]
134139
# number of non-zero elements of X[:, col_ind]
@@ -229,8 +234,8 @@ def csc_mean_variance_axis0(X, weights=None, return_sum_weights=False):
229234

230235
def _csc_mean_variance_axis0(
231236
const floating[::1] X_data,
232-
unsigned long long n_samples,
233-
unsigned long long n_features,
237+
uint64_t n_samples,
238+
uint64_t n_features,
234239
const integral[:] X_indices,
235240
const integral[:] X_indptr,
236241
const floating[:] weights,
@@ -239,29 +244,29 @@ def _csc_mean_variance_axis0(
239244
# cannot be declared directly and can only be passed as function arguments
240245
cdef:
241246
integral i, row_ind
242-
unsigned long long feature_idx, col_ind
243-
cnp.float64_t diff
247+
uint64_t feature_idx, col_ind
248+
float64_t diff
244249
# means[j] contains the mean of feature j
245-
cnp.float64_t[::1] means = np.zeros(n_features)
250+
float64_t[::1] means = np.zeros(n_features)
246251
# variances[j] contains the variance of feature j
247-
cnp.float64_t[::1] variances = np.zeros(n_features)
252+
float64_t[::1] variances = np.zeros(n_features)
248253

249-
cnp.float64_t[::1] sum_weights = np.full(
254+
float64_t[::1] sum_weights = np.full(
250255
fill_value=np.sum(weights, dtype=np.float64), shape=n_features
251256
)
252-
cnp.float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
253-
cnp.f 8000 loat64_t[::1] correction = np.zeros(shape=n_features)
257+
float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
258+
float64_t[::1] correction = np.zeros(shape=n_features)
254259

255-
cnp.uint64_t[::1] counts = np.full(
260+
uint64_t[::1] counts = np.full(
256261
fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
257262
)
258-
cnp.uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
263+
uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
259264

260265
for col_ind in range(n_features):
261266
for i in range(X_indptr[col_ind], X_indptr[col_ind + 1]):
262267
row_ind = X_indices[i]
263268
if not isnan(X_data[i]):
264-
means[col_ind] += <cnp.float64_t>(X_data[i]) * weights[row_ind]
269+
means[col_ind] += <float64_t>(X_data[i]) * weights[row_ind]
265270
# sum of weights where X[:, col_ind] is non-zero
266271
sum_weights_nz[col_ind] += weights[row_ind]
267272
# number of non-zero elements of X[:, col_ind]
@@ -387,9 +392,9 @@ def incr_mean_variance_axis0(X, last_mean, last_var, last_n, weights=None):
387392
def _incr_mean_variance_axis0(
388393
const floatin 4839 g[:] X_data,
389394
floating n_samples,
390-
unsigned long long n_features,
395+
uint64_t n_features,
391396
const int[:] X_indices,
392-
# X_indptr might be either in32 or int64
397+
# X_indptr might be either int32 or int64
393398
const integral[:] X_indptr,
394399
str X_format,
395400
floating[:] last_mean,
@@ -401,7 +406,7 @@ def _incr_mean_variance_axis0(
401406
# Implement the function here since variables using fused types
402407
# cannot be declared directly and can only be passed as function arguments
403408
cdef:
404-
unsigned long long i
409+
uint64_t i
405410

406411
# last = stats until now
407412
# new = the current increment
@@ -493,13 +498,13 @@ def _inplace_csr_row_normalize_l1(
493498
const integral[:] X_indptr,
494499
):
495500
cdef:
496-
unsigned long long n_samples = shape[0]
501+
uint64_t n_samples = shape[0]
497502

498503
# the column indices for row i are stored in:
499504
# indices[indptr[i]:indices[i+1]]
500505
# and their corresponding values are stored in:
501506
# data[indptr[i]:indptr[i+1]]
502-
unsigned long long i
507+
uint64_t i
503508
integral j
504509
double sum_
505510

@@ -530,8 +535,8 @@ def _inplace_csr_row_normalize_l2(
530535
const integral[:] X_indptr,
531536
):
532537
cdef:
533-
unsigned long long n_samples = shape[0]
534-
unsigned long long i
538+
uint64_t n_samples = shape[0]
539+
uint64_t i
535540
integral j
536541
double sum_
537542

@@ -554,8 +559,8 @@ def _inplace_csr_row_normalize_l2(
554559

555560
def assign_rows_csr(
556561
X,
557-
const cnp.npy_intp[:] X_rows,
558-
const cnp.npy_intp[:] out_rows,
562+
const intptr_t[:] X_rows,
563+
const intptr_t[:] out_rows,
559564
floating[:, ::1] out,
560565
):
561566
"""Densify selected rows of a CSR matrix into a preallocated array.
@@ -571,12 +576,13 @@ def assign_rows_csr(
571576
out : array, shape=(arbitrary, n_features)
572577
"""
573578
cdef:
574-
# npy_intp (np.intp in Python) is what np.where returns,
579+
# intptr_t (npy_intp, np.intp in Python) is what np.where returns,
575580
# but int is what scipy.sparse uses.
576-
int i, ind, j, k
577-
cnp.npy_intp rX
581+
intp_t i, ind, j, k
582+
intptr_t rX
578583
const floating[:] data = X.data
579-
const int[:] indices = X.indices, indptr = X.indptr
584+
const int32_t[:] indices = X.indices
585+
const int32_t[:] indptr = X.indptr
580586

581587
if X_rows.shape[0] != out_rows.shape[0]:
582588
raise ValueError("cannot assign %d rows to %d"

0 commit comments

Comments
 (0)
0