8000 MAINT remove -Wsign-compare when compiling `sklearn.utils.sparsefuncs… · jjerphan/scikit-learn@029c25d · GitHub
[go: up one dir, main page]

Skip to content

Commit 029c25d

Browse files
Vincent-Maladierejjerphan
authored andcommitted
MAINT remove -Wsign-compare when compiling sklearn.utils.sparsefuncs_fast (scikit-learn#25244)
1 parent c51bcfd commit 029c25d

File tree

1 file changed

+57
-43
lines changed

1 file changed

+57
-43
lines changed

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
106106
# Implement the function here since variables using fused types
107107
# cannot be declared directly and can only be passed as function arguments
108108
cdef:
109-
cnp.npy_intp i
110-
unsigned long long row_ind
111-
integral col_ind
109+
cnp.intp_t row_ind
110+
unsigned long long feature_idx
111+
integral i, col_ind
112112
cnp.float64_t diff
113113
# means[j] contains the mean of feature j
114114
cnp.ndarray[cnp.float64_t, ndim=1] means = np.zeros(n_features)
@@ -142,8 +142,8 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
142142
# number of non nan elements of X[:, col_ind]
143143
counts[col_ind] -= 1
144144

145-
for i in range(n_features):
146-
means[i] /= sum_weights[i]
145+
for feature_idx in range(n_features):
146+
means[feature_idx] /= sum_weights[feature_idx]
147147

148148
for row_ind in range(len(X_indptr) - 1):
149149
for i in range(X_indptr[row_ind], X_indptr[row_ind + 1]):
@@ -156,15 +156,22 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
156156
correction[col_ind] += diff * weights[row_ind]
157157
variances[col_ind] += diff * diff * weights[row_ind]
158158

159-
for i in range(n_features):
160-
if counts[i] != counts_nz[i]:
161-
correction[i] -= (sum_weights[i] - sum_weights_nz[i]) * means[i]
162-
correction[i] = correction[i]**2 / sum_weights[i]
163-
if counts[i] != counts_nz[i]:
159+
for feature_idx in range(n_features):
160+
if counts[feature_idx] != counts_nz[feature_idx]:
161+
correction[feature_idx] -= (
162+
sum_weights[feature_idx] - sum_weights_nz[feature_idx]
163+
) * means[feature_idx]
164+
correction[feature_idx] = correction[feature_idx]**2 / sum_weights[feature_idx]
165+
if counts[feature_idx] != counts_nz[feature_idx]:
164166
# only compute it when it's guaranteed to be non-zero to avoid
165167
# catastrophic cancellation.
166-
variances[i] += (sum_weights[i] - sum_weights_nz[i]) * means[i]**2
167-
variances[i] = (variances[i] - correction[i]) / sum_weights[i]
168+
variances[feature_idx] += (
169+
sum_weights[feature_idx] - sum_weights_nz[feature_idx]
170+
) * means[feature_idx]**2
171+
variances[feature_idx] = (
172+
(variances[feature_idx] - correction[feature_idx]) /
173+
sum_weights[feature_idx]
174+
)
168175

169176
if floating is float:
170177
return (np.array(means, dtype=np.float32),
@@ -228,9 +235,8 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
228235
# Implement the function here since variables using fused types
229236
# cannot be declared directly and can only be passed as function arguments
230237
cdef:
231-
cnp.npy_intp i
232-
unsigned long long col_ind
233-
integral row_ind
238+
integral i, row_ind
239+
unsigned long long feature_idx, col_ind
234240
cnp.float64_t diff
235241
# means[j] contains the mean of feature j
236242
cnp.ndarray[cnp.float64_t, ndim=1] means = np.zeros(n_features)
@@ -264,8 +270,8 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
264270
# number of non nan elements of X[:, col_ind]
265271
counts[col_ind] -= 1
266272

267-
for i in range(n_features):
268-
means[i] /= sum_weights[i]
273+
for feature_idx in range(n_features):
274+
means[feature_idx] /= sum_weights[feature_idx]
269275

270276
for col_ind in range(n_features):
271277
for i in range(X_indptr[col_ind], X_indptr[col_ind + 1]):
@@ -278,15 +284,21 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
278284
correction[col_ind] += diff * weights[row_ind]
279285
variances[col_ind] += diff * diff * weights[row_ind]
280286

281-
for i in range(n_features):
282-
if counts[i] != counts_nz[i]:
283-
correction[i] -= (sum_weights[i] - sum_weights_nz[i]) * means[i]
284-
correction[i] = correction[i]**2 / sum_weights[i]
285-
if counts[i] != counts_nz[i]:
287+
for feature_idx in range(n_features):
288+
if counts[feature_idx] != counts_nz[feature_idx]:
289+
correction[feature_idx] -= (
290+
sum_weights[feature_idx] - sum_weights_nz[feature_idx]
291+
) * means[feature_idx]
292+
correction[feature_idx] = correction[feature_idx]**2 / sum_weights[feature_idx]
293+
if counts[feature_idx] != counts_nz[feature_idx]:
286294
# only compute it when it's guaranteed to be non-zero to avoid
287295
# catastrophic cancellation.
288-
variances[i] += (sum_weights[i] - sum_weights_nz[i]) * means[i]**2
289-
variances[i] = (variances[i] - correction[i]) / sum_weights[i]
296+
variances[feature_idx] += (
297+
sum_weights[feature_idx] - sum_weights_nz[feature_idx]
298+
) * means[feature_idx]**2
299+
variances[feature_idx] = (
300+
(variances[feature_idx] - correction[feature_idx])
301+
) / sum_weights[feature_idx]
290302

291303
if floating is float:
292304
return (np.array(means, dtype=np.float32),
@@ -383,13 +395,12 @@ def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
383395
# Implement the function here since variables using fused types
384396
# cannot be declared directly and can only be passed as function arguments
385397
cdef:
386-
cnp.npy_intp i
398+
unsigned long long i
387399

388-
# last = stats until now
389-
# new = the current increment
390-
# updated = the aggregated stats
391-
# when arrays, they are indexed by i per-feature
392-
cdef:
400+
# last = stats until now
401+
# new = the current increment
402+
# updated = the aggregated stats
403+
# when arrays, they are indexed by i per-feature
393404
cnp.ndarray[floating, ndim=1] new_mean
394405
cnp.ndarray[floating, ndim=1] new_var
395406
cnp.ndarray[floating, ndim=1] updated_mean
@@ -469,15 +480,17 @@ def _inplace_csr_row_normalize_l1(cnp.ndarray[floating, ndim=1] X_data,
469480
shape,
470481
cnp.ndarray[integral, ndim=1] X_indices,
471482
cnp.ndarray[integral, ndim=1] X_indptr):
472-
cdef unsigned long long n_samples = shape[0]
473-
cdef unsigned long long n_features = shape[1]
483+
cdef:
484+
unsigned long long n_samples = shape[0]
485+
unsigned long long n_features = shape[1]
474486

475-
# the column indices for row i are stored in:
476-
# indices[indptr[i]:indices[i+1]]
477-
# and their corresponding values are stored in:
478-
# data[indptr[i]:indptr[i+1]]
479-
cdef cnp.npy_intp i, j
480-
cdef double sum_
487+
# the column indices for row i are stored in:
488+
# indices[indptr[i]:indices[i+1]]
489+
# and their corresponding values are stored in:
490+
# data[indptr[i]:indptr[i+1]]
491+
unsigned long long i
492+
integral j
493+
double sum_
481494

482495
for i in range(n_samples):
483496
sum_ = 0.0
@@ -503,11 +516,12 @@ def _inplace_csr_row_normalize_l2(cnp.ndarray[floating, ndim=1] X_data,
503516
shape,
504517
cnp.ndarray[integral, ndim=1] X_indices,
505518
cnp.ndarray[integral, ndim=1] X_indptr):
506-
cdef integral n_samples = shape[0]
507-
cdef integral n_features = shape[1]
508-
509-
cdef cnp.npy_intp i, j
510-
cdef double sum_
519+
cdef:
520+
unsigned long long n_samples = shape[0]
521+
unsigned long long n_features = shape[1]
522+
unsigned long long i
523+
integral j
524+
double sum_
511525

512526
for i in range(n_samples):
513527
sum_ = 0.0

0 commit comments

Comments
 (0)
0