8000 COSMIT tree: unused variable warnings and use for/range · jwchennlp/scikit-learn@b83e74d · GitHub
[go: up one dir, main page]

Skip to content

Commit b83e74d

Browse files
committed
COSMIT tree: unused variable warnings and use for/range
Not recompiled with Cython, as line numbers still match up.
1 parent f309023 commit b83e74d

File tree

1 file changed

+52
-52
lines changed

1 file changed

+52
-52
lines changed

sklearn/tree/_tree.pyx

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ cdef class ClassificationCriterion(Criterion):
138138
cdef SIZE_t k = 0
139139
cdef SIZE_t label_count_stride = 0
140140

141-
for k from 0 <= k < n_outputs:
141+
for k in range(n_outputs):
142142
self.n_classes[k] = n_classes[k]
143143

144144
if n_classes[k] > label_count_stride:
@@ -211,17 +211,17 @@ cdef class ClassificationCriterion(Criterion):
211211
cdef DOUBLE_t w = 1.0
212212
cdef SIZE_t offset = 0
213213

214-
for k from 0 <= k < n_outputs:
214+
for k in range(n_outputs):
215215
memset(label_count_total + offset, 0, n_classes[k] * sizeof(double))
216216
offset += label_count_stride
217217

218-
for p from start <= p < end:
218+
for p in range(start, end):
219219
i = samples[p]
220220

221221
if sample_weight != NULL:
222222
w = sample_weight[i]
223223

224-
for k from 0 <= k < n_outputs:
224+
for k in range(n_outputs):
225225
c = <SIZE_t> y[i * y_stride + k]
226226
label_count_total[k * label_count_stride + c] += w
227227

@@ -248,7 +248,7 @@ cdef class ClassificationCriterion(Criterion):
248248

249249
cdef SIZE_t k = 0
250250

251-
for k from 0 <= k < n_outputs:
251+
for k in range(n_outputs):
252252
memset(label_count_left, 0, n_classes[k] * sizeof(double))
253253
memcpy(label_count_right, label_count_total, n_classes[k] * sizeof(double))
254254

@@ -284,13 +284,13 @@ cdef class ClassificationCriterion(Criterion):
284284

285285
# Note: We assume start <= pos < new_pos <= end
286286

287-
for p from pos <= p < new_pos:
287+
for p in range(pos, new_pos):
288288
i = samples[p]
289289

290290
if sample_weight != NULL:
291291
w = sample_weight[i]
292292

293-
for k from 0 <= k < n_outputs:
293+
for k in range(n_outputs):
294294
label_index = (k * label_count_stride +
295295
<SIZE_t> y[i * y_stride + k])
296296
label_count_left[label_index] += w
@@ -318,7 +318,7 @@ cdef class ClassificationCriterion(Criterion):
318318
cdef double* label_count_total = self.label_count_total
319319
cdef SIZE_t k
320320

321-
for k from 0 <= k < n_outputs:
321+
for k in range(n_outputs):
322322
memcpy(dest, label_count_total, n_classes[k] * sizeof(double))
323323
dest += label_count_stride
324324
label_count_total += label_count_stride
@@ -354,10 +354,10 @@ cdef class Entropy(ClassificationCriterion):
354354
cdef SIZE_t k
355355
cdef SIZE_t c
356356

357-
for k from 0 <= k < n_outputs:
357+
for k in range(n_outputs):
358358
entropy = 0.0
359359

360-
for c from 0 <= c < n_classes[k]:
360+
for c in range(n_classes[k]):
361361
tmp = label_count_total[c]
362362
if tmp > 0.0:
363363
tmp /= weighted_n_node_samples
@@ -390,11 +390,11 @@ cdef class Entropy(ClassificationCriterion):
390390
cdef SIZE_t k
391391
cdef SIZE_t c
392392

393-
for k from 0 <= k < n_outputs:
393+
for k in range(n_outputs):
394394
entropy_left = 0.0
395395
entropy_right = 0.0
396396

397-
for c from 0 <= c < n_classes[k]:
397+
for c in range(n_classes[k]):
398398
tmp = label_count_left[c]
399399
if tmp > 0.0:
400400
tmp /= weighted_n_left
@@ -445,11 +445,11 @@ cdef class Gini(ClassificationCriterion):
445445
cdef SIZE_t k
446446
cdef SIZE_t c
447447

448-
for k from 0 <= k < n_outputs:
448+
for k in range(n_outputs):
449449
gini = 0.0
450450

451-
for c from 0 <= c < n_classes[k]:
452-
tmp = label_count_total[c] # TODO: use weighted count instead
451+
for c in range(n_classes[k]):
452+
tmp = label_count_total[c] # TODO: use weighted count instead
453453
gini += tmp * tmp
454454

455455
gini = 1.0 - gini / (weighted_n_node_samples *
@@ -483,12 +483,12 @@ cdef class Gini(ClassificationCriterion):
483483
cdef SIZE_t k
484484
cdef SIZE_t c
485485

486-
for k from 0 <= k < n_outputs:
486+
for k in range(n_outputs):
487487
gini_left = 0.0
488488
gini_right = 0.0
489489

490-
for c from 0 <= c < n_classes[k]:
491-
tmp = label_count_left[c] # TODO: use weighted count instead
490+
for c in range(n_classes[k]):
491+
tmp = label_count_left[c] # TODO: use weighted count instead
492492
gini_left += tmp * tmp
493493
tmp = label_count_right[c]
494494
gini_right += tmp * tmp
@@ -638,7 +638,7 @@ cdef class RegressionCriterion(Criterion):
638638
cdef DOUBLE_t y_ik = 0.0
639639
cdef DOUBLE_t w = 1.0
640640

641-
for k from 0 <= k < n_outputs:
641+
for k in range(n_outputs):
642642
mean_left[k] = 0.0
643643
mean_right[k] = 0.0
644644
mean_total[k] = 0.0
@@ -651,13 +651,13 @@ cdef class RegressionCriterion(Criterion):
651651
self.sum_right[k] = 0.0
652652
self.sum_total[k] = 0.0
653653

654-
for p from start <= p < end:
654+
for p in range(start, end):
655655
i = samples[p]
656656

657657
if sample_weight != NULL:
658658
w = sample_weight[i]
659659

660-
for k from 0 <= k < n_outputs:
660+
for k in range(n_outputs):
661661
y_ik = y[i * y_stride + k]
662662
sq_sum_total[k] += w * y_ik * y_ik
663663
mean_total[k] += w * y_ik
@@ -667,7 +667,7 @@ cdef class RegressionCriterion(Criterion):
667667

668668
self.weighted_n_node_samples = weighted_n_node_samples
669669

670-
for k from 0 <= k < n_outputs:
670+
for k in range(n_outputs):
671671
mean_total[k] /= weighted_n_node_samples
672672

673673
# Reset to pos=start
@@ -696,7 +696,7 @@ cdef class RegressionCriterion(Criterion):
696696

697697
cdef SIZE_t k = 0
698698

699-
for k from 0 <= k < n_outputs:
699+
for k in range(n_outputs):
700700
mean_right[k] = mean_total[k]
701701
mean_left[k] = 0.0
702702
sq_sum_right[k] = sq_sum_total[k]
@@ -738,13 +738,13 @@ cdef class RegressionCriterion(Criterion):
738738

739739
# Note: We assume start <= pos < new_pos <= end
740740

741-
for p from pos <= p < new_pos:
741+
for p in range(pos, new_pos):
742742
i = samples[p]
743743

744744
if sample_weight != NULL:
745745
w = sample_weight[i]
746746

747-
for k from 0 <= k < n_outputs:
747+
for k in range(n_outputs):
748748
y_ik = y[i * y_stride + k]
749749
w_y_ik = w * y_ik
750750

@@ -762,7 +762,7 @@ cdef class RegressionCriterion(Criterion):
762762
weighted_n_left += w
763763
weighted_n_right -= w
764764

765-
for k from 0 <= k < n_outputs:
765+
for k in range(n_outputs):
766766
var_left[k] = (sq_sum_left[k] / weighted_n_left -
767767
mean_left[k] * mean_left[k])
768768
var_right[k] = (sq_sum_right[k] / weighted_n_right -
@@ -799,7 +799,7 @@ cdef class MSE(RegressionCriterion):
799799
cdef double total = 0.0
800800
cdef SIZE_t k
801801

802-
for k from 0 <= k < n_outputs:
802+
for k in range(n_outputs):
803803
total += (sq_sum_total[k] / weighted_n_node_samples -
804804
mean_total[k] * mean_total[k])
805805

@@ -816,7 +816,7 @@ cdef class MSE(RegressionCriterion):
816816
cdef double total_right = 0.0
817817
cdef SIZE_t k
818818

819-
for k from 0 <= k < n_outputs:
819+
for k in range(n_outputs):
820820
total_left += var_left[k]
821821
total_right += var_right[k]
822822

@@ -913,7 +913,7 @@ cdef class Splitter:
913913
cdef SIZE_t i, j
914914
j = 0
915915

916-
for i from 0 <= i < n_samples:
916+
for i in range(n_samples):
917917
# Only work with positively weighted samples
918918
if sample_weight == NULL or sample_weight[i] != 0.0:
919919
samples[j] = i
@@ -925,7 +925,7 @@ cdef class Splitter:
925925
cdef SIZE_t n_features = X.shape[1]
926926
cdef SIZE_t* features = <SIZE_t*> malloc(n_features * sizeof(SIZE_t))
927927

928-
for i from 0 <= i < n_features:
928+
for i in range(n_features):
929929
features[i] = i
930930

931931
self.features = features
@@ -994,8 +994,8 @@ cdef class BestSplitter(Splitter):
994994
cdef double best_impurity_left = INFINITY
995995
cdef double best_impurity_right = INFINITY
996996
cdef SIZE_t best_pos = end
997-
cdef SIZE_t best_feature
998-
cdef double best_threshold
997+
cdef SIZE_t best_feature = 0
998+
cdef double best_threshold = 0.
999999
cdef double best_improvement = -INFINITY
10001000

10011001
cdef double current_improvement
@@ -1012,7 +1012,7 @@ cdef class BestSplitter(Splitter):
10121012
cdef SIZE_t partition_start
10131013
cdef SIZE_t partition_end
10141014

1015-
for f_idx from 0 <= f_idx < n_features:
1015+
for f_idx in range(n_features):
10161016
# Draw a feature at random
10171017
f_i = n_features - f_idx - 1
10181018
f_j = rand_int(n_features - f_idx, random_state)
@@ -1176,8 +1176,8 @@ cdef class RandomSplitter(Splitter):
11761176
cdef double best_impurity_left = INFINITY
11771177
cdef double best_impurity_right = INFINITY
11781178
cdef SIZE_t best_pos = end
1179-
cdef SIZE_t best_feature
1180-
cdef double best_threshold
1179+
cdef SIZE_t best_feature = 0
1180+
cdef double best_threshold = 0.
11811181
cdef double best_improvement = -INFINITY
11821182

11831183
cdef double current_improvement
@@ -1197,7 +1197,7 @@ cdef class RandomSplitter(Splitter):
11971197
cdef SIZE_t partition_start
11981198
cdef SIZE_t partition_end
11991199

1200-
for f_idx from 0 <= f_idx < n_features:
1200+
for f_idx in range(n_features):
12011201
# Draw a feature at random
12021202
f_i = n_features - f_idx - 1
12031203
f_j = rand_int(n_features - f_idx, random_state)
@@ -1211,7 +1211,7 @@ cdef class RandomSplitter(Splitter):
12111211
# Find min, max
12121212
min_feature_value = max_feature_value = X[X_sample_stride * samples[start] + X_fx_stride * current_feature]
12131213

1214-
for p from start < p < end:
1214+
for p in range(start + 1, end):
12151215
current_feature_value = X[X_sample_stride * samples[p] + X_fx_stride * current_feature]
12161216

12171217
if current_feature_value < min_feature_value:
@@ -1379,8 +1379,8 @@ cdef class PresortBestSplitter(Splitter):
13791379
cdef double best_impurity_left = INFINITY
13801380
cdef double best_impurity_right = INFINITY
13811381
cdef SIZE_t best_pos = end
1382-
cdef SIZE_t best_feature
1383-
cdef double best_threshold
1382+
cdef SIZE_t best_feature = 0
1383+
cdef double best_threshold = 0.
13841384
cdef double best_improvement = -INFINITY
13851385

13861386
cdef double current_improvement
@@ -1400,11 +1400,11 @@ cdef class PresortBestSplitter(Splitter):
14001400
cdef SIZE_t i, j
14011401

14021402
# Set sample mask
1403-
for p from start <= p < end:
1403+
for p in range(start, end):
14041404
sample_mask[samples[p]] = 1
14051405

14061406
# Look for splits
1407-
for f_idx from 0 <= f_idx < n_features:
1407+
for f_idx in range(n_features):
14081408
# Draw a feature at random
14091409
f_i = n_features - f_idx - 1
14101410
f_j = rand_int(n_features - f_idx, random_state)
@@ -1420,7 +1420,7 @@ cdef class PresortBestSplitter(Splitter):
14201420
# Extract ordering from X_argsorted
14211421
p = start
14221422

1423-
for i from 0 <= i < n_total_samples:
1423+
for i in range(n_total_samples):
14241424
j = X_argsorted[X_argsorted_stride * current_feature + i]
14251425
if sample_mask[j] == 1:
14261426
samples[p] = j
@@ -1496,7 +1496,7 @@ cdef class PresortBestSplitter(Splitter):
14961496
samples[p] = tmp
14971497

14981498
# Reset sample mask
1499-
for p from start <= p < end:
1499+
for p in range(start, end):
15001500
sample_mask[samples[p]] = 0
15011501

15021502
# Return values
@@ -1933,7 +1933,7 @@ cdef class Tree:
19331933

19341934
cdef SIZE_t k
19351935

1936-
for k from 0 <= k < n_outputs:
1936+
for k in range(n_outputs):
19371937
self.n_classes[k] = n_classes[k]
19381938

19391939
# Parameters
@@ -2152,7 +2152,7 @@ cdef class Tree:
21522152
out = np.zeros((n_samples, max_n_classes), dtype=np.float64)
21532153

21542154
with nogil:
2155-
for i from 0 <= i < n_samples:
2155+
for i in range(n_samples):
21562156
node_id = 0
21572157

21582158
# While node_id not a leaf
@@ -2165,7 +2165,7 @@ cdef class Tree:
21652165

21662166
offset = node_id * value_stride
21672167

2168-
for c from 0 <= c < n_classes[0]:
2168+
for c in range(n_classes[0]):
21692169
out[i, c] = value[offset + c]
21702170

21712171
return out
@@ -2176,7 +2176,7 @@ cdef class Tree:
21762176
max_n_classes), dtype=np.float64)
21772177

21782178
with nogil:
2179-
for i from 0 <= i < n_samples:
2179+
for i in range(n_samples):
21802180
node_id = 0
21812181

21822182
# While node_id not a leaf
@@ -2189,8 +2189,8 @@ cdef class Tree:
21892189

21902190
offset = node_id * value_stride
21912191

2192-
for k from 0 <= k < n_outputs:
2193-
for c from 0 <= c < n_classes[k]:
2192+
for k in range(n_outputs):
2193+
for c in range(n_classes[k]):
21942194
out_multi[i, k, c] = value[offset + c]
21952195
offset += max_n_classes
21962196

@@ -2211,7 +2211,7 @@ cdef class Tree:
22112211
out = np.zeros((n_samples,), dtype=np.int32)
22122212

22132213
with nogil:
2214-
for i from 0 <= i < n_samples:
2214+
for i in range(n_samples):
22152215
node_id = 0
22162216

22172217
# While node_id not a leaf
@@ -2244,7 +2244,7 @@ cdef class Tree:
22442244
cdef np.ndarray[np.float64_t, ndim=1] importances
22452245
importances = np.zeros((self.n_features,))
22462246

2247-
for node from 0 <= node < node_count:
2247+
for node in range(node_count):
22482248
if children_left[node] != _TREE_LEAF:
22492249
# ... and children_right[node] != _TREE_LEAF:
22502250
n_left = n_node_samples[children_left[node]]
@@ -2279,7 +2279,7 @@ cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil:
22792279
seed[0] ^= <UINT32_t>(seed[0] >> 17)
22802280
seed[0] ^= <UINT32_t>(seed[0] << 5)
22812281

2282-
return seed[0] % <UINT32_t>(RAND_R_MAX + 1)
2282+
return seed[0] % (<UINT32_t>RAND_R_MAX + 1)
22832283

22842284
cdef inline np.ndarray int_ptr_to_ndarray(int* data, SIZE_t size):
22852285
"""Encapsulate data into a 1D numpy array of int's."""

0 commit comments

Comments
 (0)
0