@@ -106,9 +106,9 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
106
106
# Implement the function here since variables using fused types
107
107
# cannot be declared directly and can only be passed as function arguments
108
108
cdef:
109
- cnp.npy_intp i
110
- unsigned long long row_ind
111
- integral col_ind
109
+ cnp.intp_t row_ind
110
+ unsigned long long feature_idx
111
+ integral i, col_ind
112
112
cnp.float64_t diff
113
113
# means[j] contains the mean of feature j
114
114
cnp.ndarray[cnp.float64_t, ndim= 1 ] means = np.zeros(n_features)
@@ -142,8 +142,8 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
142
142
# number of non nan elements of X[:, col_ind]
143
143
counts[col_ind] -= 1
144
144
145
- for i in range (n_features):
146
- means[i ] /= sum_weights[i ]
145
+ for feature_idx in range (n_features):
146
+ means[feature_idx ] /= sum_weights[feature_idx ]
147
147
148
148
for row_ind in range (len (X_indptr) - 1 ):
149
149
for i in range (X_indptr[row_ind], X_indptr[row_ind + 1 ]):
@@ -156,15 +156,22 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
156
156
correction[col_ind] += diff * weights[row_ind]
157
157
variances[col_ind] += diff * diff * weights[row_ind]
158
158
159
- for i in range (n_features):
160
- if counts[i] != counts_nz[i]:
161
- correction[i] -= (sum_weights[i] - sum_weights_nz[i]) * means[i]
162
- correction[i] = correction[i]** 2 / sum_weights[i]
163
- if counts[i] != counts_nz[i]:
159
+ for feature_idx in range (n_features):
160
+ if counts[feature_idx] != counts_nz[feature_idx]:
161
+ correction[feature_idx] -= (
162
+ sum_weights[feature_idx] - sum_weights_nz[feature_idx]
163
+ ) * means[feature_idx]
164
+ correction[feature_idx] = correction[feature_idx]** 2 / sum_weights[feature_idx]
165
+ if counts[feature_idx] != counts_nz[feature_idx]:
164
166
# only compute it when it's guaranteed to be non-zero to avoid
165
167
# catastrophic cancellation.
166
- variances[i] += (sum_weights[i] - sum_weights_nz[i]) * means[i]** 2
167
- variances[i] = (variances[i] - correction[i]) / sum_weights[i]
168
+ variances[feature_idx] += (
169
+ sum_weights[feature_idx] - sum_weights_nz[feature_idx]
170
+ ) * means[feature_idx]** 2
171
+ variances[feature_idx] = (
172
+ (variances[feature_idx] - correction[feature_idx]) /
173
+ sum_weights[feature_idx]
174
+ )
168
175
169
176
if floating is float :
170
177
return (np.array(means, dtype = np.float32),
@@ -228,9 +235,8 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
228
235
# Implement the function here since variables using fused types
229
236
# cannot be declared directly and can only be passed as function arguments
230
237
cdef:
231
- cnp.npy_intp i
232
- unsigned long long col_ind
233
- integral row_ind
238
+ integral i, row_ind
239
+ unsigned long long feature_idx, col_ind
234
240
cnp.float64_t diff
235
241
# means[j] contains the mean of feature j
236
242
cnp.ndarray[cnp.float64_t, ndim= 1 ] means = np.zeros(n_features)
@@ -264,8 +270,8 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
264
270
# number of non nan elements of X[:, col_ind]
265
271
counts[col_ind] -= 1
266
272
267
- for i in range (n_features):
268
- means[i ] /= sum_weights[i ]
273
+ for feature_idx in range (n_features):
274
+ means[feature_idx ] /= sum_weights[feature_idx ]
269
275
270
276
for col_ind in range (n_features):
271
277
for i in range (X_indptr[col_ind], X_indptr[col_ind + 1 ]):
@@ -278,15 +284,21 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
278
284
correction[col_ind] += diff * weights[row_ind]
279
285
variances[col_ind] += diff * diff * weights[row_ind]
280
286
281
- for i in range (n_features):
282
- if counts[i] != counts_nz[i]:
283
- correction[i] -= (sum_weights[i] - sum_weights_nz[i]) * means[i]
284
- correction[i] = correction[i]** 2 / sum_weights[i]
285
- if counts[i] != counts_nz[i]:
287
+ for feature_idx in range (n_features):
288
+ if counts[feature_idx] != counts_nz[feature_idx]:
289
+ correction[feature_idx] -= (
290
+ sum_weights[feature_idx] - sum_weights_nz[feature_idx]
291
+ ) * means[feature_idx]
292
+ correction[feature_idx] = correction[feature_idx]** 2 / sum_weights[feature_idx]
293
+ if counts[feature_idx] != counts_nz[feature_idx]:
286
294
# only compute it when it's guaranteed to be non-zero to avoid
287
295
# catastrophic cancellation.
288
- variances[i] += (sum_weights[i] - sum_weights_nz[i]) * means[i]** 2
289
- variances[i] = (variances[i] - correction[i]) / sum_weights[i]
296
+ variances[feature_idx] += (
297
+ sum_weights[feature_idx] - sum_weights_nz[feature_idx]
298
+ ) * means[feature_idx]** 2
299
+ variances[feature_idx] = (
300
+ (variances[feature_idx] - correction[feature_idx])
301
+ ) / sum_weights[feature_idx]
290
302
291
303
if floating is float :
292
304
return (np.array(means, dtype = np.float32),
@@ -383,13 +395,12 @@ def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
383
395
# Implement the function here since variables using fused types
384
396
# cannot be declared directly and can only be passed as function arguments
385
397
cdef:
386
- cnp.npy_intp i
398
+ unsigned long long i
387
399
388
- # last = stats until now
389
- # new = the current increment
390
- # updated = the aggregated stats
391
- # when arrays, they are indexed by i per-feature
392
- cdef:
400
+ # last = stats until now
401
+ # new = the current increment
402
+ # updated = the aggregated stats
403
+ # when arrays, they are indexed by i per-feature
393
404
cnp.ndarray[floating, ndim= 1 ] new_mean
394
405
cnp.ndarray[floating, ndim= 1 ] new_var
395
406
cnp.ndarray[floating, ndim= 1 ] updated_mean
@@ -469,15 +480,17 @@ def _inplace_csr_row_normalize_l1(cnp.ndarray[floating, ndim=1] X_data,
469
480
shape ,
470
481
cnp.ndarray[integral , ndim = 1 ] X_indices,
471
482
cnp.ndarray[integral , ndim = 1 ] X_indptr):
472
- cdef unsigned long long n_samples = shape[0 ]
473
- cdef unsigned long long n_features = shape[1 ]
483
+ cdef:
484
+ unsigned long long n_samples = shape[0 ]
485
+ unsigned long long n_features = shape[1 ]
474
486
475
- # the column indices for row i are stored in:
476
- # indices[indptr[i]:indices[i+1]]
477
- # and their corresponding values are stored in:
478
- # data[indptr[i]:indptr[i+1]]
479
- cdef cnp.npy_intp i, j
480
- cdef double sum_
487
+ # the column indices for row i are stored in:
488
+ # indices[indptr[i]:indices[i+1]]
489
+ # and their corresponding values are stored in:
490
+ # data[indptr[i]:indptr[i+1]]
491
+ unsigned long long i
492
+ integral j
493
+ double sum_
481
494
482
495
for i in range (n_samples):
483
496
sum_ = 0.0
@@ -503,11 +516,12 @@ def _inplace_csr_row_normalize_l2(cnp.ndarray[floating, ndim=1] X_data,
503
516
shape ,
504
517
cnp.ndarray[integral , ndim = 1 ] X_indices,
505
518
cnp.ndarray[integral , ndim = 1 ] X_indptr):
506
- cdef integral n_samples = shape[0 ]
507
- cdef integral n_features = shape[1 ]
508
-
509
- cdef cnp.npy_intp i, j
510
- cdef double sum_
519
+ cdef:
520
+ unsigned long long n_samples = shape[0 ]
521
+ unsigned long long n_features = shape[1 ]
522
+ unsigned long long i
523
+ integral j
524
+ double sum_
511
525
512
526
for i in range (n_samples):
513
527
sum_ = 0.0
0 commit comments