8000 MAINT Cython linting (#25861) · scikit-learn/scikit-learn@9cb1111 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9cb1111

Browse files
authored
MAINT Cython linting (#25861)
1 parent 42c2731 commit 9cb1111

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+477
-423
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
command: |
1212
source build_tools/shared.sh
1313
# Include pytest compatibility with mypy
14-
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min)
14+
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
1515
- run:
1616
name: linting
1717
command: ./build_tools/linting.sh

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,9 @@ repos:
2020
- id: mypy
2121
files: sklearn/
2222
additional_dependencies: [pytest==6.2.4]
23+
- repo: https://github.com/MarcoGorelli/cython-lint
24+
rev: v0.15.0
25+
hooks:
26+
# TODO: add the double-quote-cython-strings hook when it's usability has improved:
27+
# possibility to pass a directory and use it as a check instead of auto-formatter.
28+
- id: cython-lint

azure-pipelines.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
- bash: |
3636
source build_tools/shared.sh
3737
# Include pytest compatibility with mypy
38-
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min)
38+
pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
3939
displayName: Install linters
4040
- bash: |
4141
./build_tools/linting.sh

build_tools/linting.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ echo -e "No problem detected by flake8\n"
1313
mypy sklearn/
1414
echo -e "No problem detected by mypy\n"
1515

16+
cython-lint sklearn/
17+
echo -e "No problem detected by cython-lint\n"
18+
1619
# For docstrings and warnings of deprecated attributes to be rendered
1720
# properly, the property decorator must come before the deprecated decorator
1821
# (else they are treated as functions)

pyproject.toml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,49 @@ exclude = '''
3838
| asv_benchmarks/env
3939
)/
4040
'''
41+
42+
[tool.cython-lint]
43+
# Ignore the same error codes as flake8
44+
# + E501 (line too long) because keeping it < 88 in cython
45+
# often makes code less readable.
46+
ignore = [
47+
# check ignored by default in flake8. Meaning unclear.
48+
'E24',
49+
# space before : (needed for how black formats slicing)
50+
'E203',
51+
# line too long
52+
'E501',
53+
# do not assign a lambda expression, use a def
54+
'E731',
55+
# do not use variables named 'l', 'O', or 'I'
56+
'E741',
57+
# line break before binary operator
58+
'W503',
59+
# line break after binary operator
60+
'W504',
61+
]
62+
# Exclude files are generated from tempita templates
63+
exclude= '''
64+
(
65+
sklearn/_loss/_loss.pyx
66+
| sklearn/linear_model/_sag_fast.pyx
67+
| sklearn/linear_model/_sgd_fast.pyx
68+
| sklearn/utils/_seq_dataset.pyx
69+
| sklearn/utils/_seq_dataset.pxd
70+
| sklearn/utils/_weight_vector.pyx
71+
| sklearn/utils/_weight_vector.pxd
72+
| sklearn/metrics/_dist_metrics.pyx
73+
| sklearn/metrics/_dist_metrics.pxd
74+
| sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd
75+
| sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx
76+
| sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx
77+
| sklearn/metrics/_pairwise_distances_reduction/_base.pxd
78+
| sklearn/metrics/_pairwise_distances_reduction/_base.pyx
79+
| sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd
80+
| sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx
81+
| sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd
82+
| sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx
83+
| sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd
84+
| sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx
85+
)
86+
'''

setup.cfg

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,8 @@ target-version = ['py37']
3333
ignore=
3434
# check ignored by default in flake8. Meaning unclear.
3535
E24,
36-
# continuation line under-indented
37-
E121,
38-
# closing bracket does not match indentation
39-
E123,
40-
# continuation line over-indented for hanging indent
41-
E126,
4236
# space before : (needed for how black formats slicing)
4337
E203,
44-
# missing whitespace around arithmetic operator
45-
E226,
46-
# multiple statements on one line (def)
47-
E704,
4838
# do not assign a lambda expression, use a def
4939
E731,
5040
# do not use variables named 'l', 'O', or 'I'
@@ -82,6 +72,7 @@ allow_redefinition = True
8272
ignore =
8373
sklearn/_loss/_loss.pyx
8474
sklearn/linear_model/_sag_fast.pyx
75+
sklearn/linear_model/_sgd_fast.pyx
8576
sklearn/utils/_seq_dataset.pyx
8677
sklearn/utils/_seq_dataset.pxd
8778
sklearn/utils/_weight_vector.pyx

sklearn/_isotonic.pyx

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import numpy as np
88
from cython cimport floating
99

1010

11-
1211
def _inplace_contiguous_isotonic_regression(floating[::1] y, floating[::1] w):
1312
cdef:
1413
Py_ssize_t n = y.shape[0], i, k
@@ -85,7 +84,6 @@ def _make_unique(const floating[::1] X,
8584
cdef floating current_x = X[0]
8685
cdef floating current_y = 0
8786
cdef floating current_weight = 0
88-
cdef floating y_old = 0
8987
cdef int i = 0
9088
cdef int j
9189
cdef floating x
@@ -114,4 +112,4 @@ def _make_unique(const floating[::1] X,
114112
np.asarray(x_out[:i+1]),
115113
np.asarray(y_out[:i+1]),
116114
np.asarray(weights_out[:i+1]),
117-
)
115+
)

sklearn/_loss/_loss.pxd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ ctypedef fused G_DTYPE_C:
1414

1515
# Struct to return 2 doubles
1616
ctypedef struct double_pair:
17-
double val1
18-
double val2
17+
double val1
18+
double val2
1919

2020

2121
# C base class for loss functions

sklearn/cluster/_dbscan_inner.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ cimport numpy as cnp
77

88
cnp.import_array()
99

10+
1011
def dbscan_inner(const cnp.uint8_t[::1] is_core,
1112
object[:] neighborhoods,
1213
cnp.npy_intp[::1] labels):

sklearn/cluster/_k_means_common.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ cdef floating _euclidean_sparse_dense(
1212
const floating[::1],
1313
const int[::1],
1414
const floating[::1],
15-
floating,
15+
floating,
1616
bint
1717
) noexcept nogil
1818

sklearn/cluster/_k_means_common.pyx

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ cdef floating _euclidean_dense_dense(
3535

3636
# We manually unroll the loop for better cache optimization.
3737
for i in range(n):
38-
result += ((a[0] - b[0]) * (a[0] - b[0])
39-
+(a[1] - b[1]) * (a[1] - b[1])
40-
+(a[2] - b[2]) * (a[2] - b[2])
41-
+(a[3] - b[3]) * (a[3] - b[3]))
42-
a += 4; b += 4
38+
result += (
39+
(a[0] - b[0]) * (a[0] - b[0]) +
40+
(a[1] - b[1]) * (a[1] - b[1]) +
41+
(a[2] - b[2]) * (a[2] - b[2]) +
42+
(a[3] - b[3]) * (a[3] - b[3])
43+
)
44+
a += 4
45+
b += 4
4346

4447
for i in range(rem):
4548
result += (a[i] - b[i]) * (a[i] - b[i])
@@ -77,7 +80,8 @@ cdef floating _euclidean_sparse_dense(
7780

7881
result += b_squared_norm
7982

80-
if result < 0: result = 0.0
83+
if result < 0:
84+
result = 0.0
8185

8286
return result if squared else sqrt(result)
8387

sklearn/cluster/_k_means_elkan.pyx

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ def init_bounds_sparse(
154154
cdef:
155155
int n_samples = X.shape[0]
156156
int n_clusters = centers.shape[0]
157-
int n_features = X.shape[1]
158157

159158
floating[::1] X_data = X.data
160159
int[::1] X_indices = X.indices
@@ -269,7 +268,7 @@ def elkan_iter_chunked_dense(
269268
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
270269
int n_chunks = n_samples // n_samples_chunk
271270
int n_samples_rem = n_samples % n_samples_chunk
272-
int chunk_idx, n_samples_chunk_eff
271+
int chunk_idx
273272
int start, end
274273

275274
int i, j, k
@@ -386,9 +385,11 @@ cdef void _update_chunk_dense(
386385
# If this holds, then center_index is a good candidate for the
387386
# sample to be relabelled, and we need to confirm this by
388387
# recomputing the upper and lower bounds.
389-
if (j != label
388+
if (
389+
j != label
390390
and (upper_bound > lower_bounds[i, j])
391-
and (upper_bound > center_half_distances[label, j])):
391+
and (upper_bound > center_half_distances[label, j])
392+
):
392393

393394
# Recompute upper bound by calculating the actual distance
394395
# between the sample and its current assigned center.
@@ -401,8 +402,10 @@ cdef void _update_chunk_dense(
401402
# If the condition still holds, then compute the actual
402403
# distance between the sample and center. If this is less
403404
# than the previous distance, reassign label.
404-
if (upper_bound > lower_bounds[i, j]
405-
or (upper_bound > center_half_distances[label, j])):
405+
if (
406+
upper_bound > lower_bounds[i, j]
407+
or (upper_bound > center_half_distances[label, j])
408+
):
406409

407410
distance = _euclidean_dense_dense(
408411
&X[i, 0], &centers_old[j, 0], n_features, False)
@@ -504,7 +507,7 @@ def elkan_iter_chunked_sparse(
504507
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
505508
int n_chunks = n_samples // n_samples_chunk
506509
int n_samples_rem = n_samples % n_samples_chunk
507-
int chunk_idx, n_samples_chunk_eff
510+
int chunk_idx
508511
int start, end
509512

510513
int i, j, k
@@ -631,9 +634,11 @@ cdef void _update_chunk_sparse(
631634
# If this holds, then center_index is a good candidate for the
632635
# sample to be relabelled, and we need to confirm this by
633636
# recomputing the upper and lower bounds.
634-
if (j != label
637+
if (
638+
j != label
635639
and (upper_bound > lower_bounds[i, j])
636-
and (upper_bound > center_half_distances[label, j])):
640+
and (upper_bound > center_half_distances[label, j])
641+
):
637642

638643
# Recompute upper bound by calculating the actual distance
639644
# between the sample and its current assigned center.
@@ -648,8 +653,10 @@ cdef void _update_chunk_sparse(
648653
# If the condition still holds, then compute the actual
649654
# distance between the sample and center. If this is less
650655
# than the previous distance, reassign label.
651-
if (upper_bound > lower_bounds[i, j]
652-
or (upper_bound > center_half_distances[label, j])):
656+
if (
657+
upper_bound > lower_bounds[i, j]
658+
or (upper_bound > center_half_distances[label, j])
659+
):
653660
distance = _euclidean_sparse_dense(
654661
X_data[X_indptr[i] - s: X_indptr[i + 1] - s],
655662
X_indices[X_indptr[i] - s: X_indptr[i + 1] - s],

sklearn/cluster/_k_means_lloyd.pyx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def lloyd_iter_chunked_dense(
8787
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
8888
int n_chunks = n_samples // n_samples_chunk
8989
int n_samples_rem = n_samples % n_samples_chunk
90-
int chunk_idx, n_samples_chunk_eff
90+
int chunk_idx
9191
int start, end
9292

9393
int j, k
@@ -153,8 +153,9 @@ def lloyd_iter_chunked_dense(
153153

154154
if update_centers:
155155
omp_destroy_lock(&lock)
156-
_relocate_empty_clusters_dense(X, sample_weight, centers_old,
157-
centers_new, weight_in_clusters, labels)
156+
_relocate_empty_clusters_dense(
157+
X, sample_weight, centers_old, centers_new, weight_in_clusters, labels
158+
)
158159

159160
_average_centers(centers_new, weight_in_clusters)
160161
_center_shift(centers_old, centers_new, center_shift)
@@ -278,7 +279,7 @@ def lloyd_iter_chunked_sparse(
278279
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
279280
int n_chunks = n_samples // n_samples_chunk
280281
int n_samples_rem = n_samples % n_samples_chunk
281-
int chunk_idx, n_samples_chunk_eff = 0
282+
int chunk_idx
282283
int start = 0, end = 0
283284

284285
int j, k

sklearn/datasets/_svmlight_format_fast.pyx

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based,
113113

114114
return (dtype, data, indices, indptr, labels, query)
115115

116+
116117
# Two fused types are defined to be able to
117118
# use all possible combinations of parameters.
118119
ctypedef fused int_or_float:
@@ -128,8 +129,9 @@ ctypedef fused int_or_longlong:
128129
cython.integral
129130
signed long long
130131

132+
131133
def get_dense_row_string(
132-
int_or_float[:,:] X,
134+
int_or_float[:, :] X,
133135
Py_ssize_t[:] x_inds,
134136
double_or_longlong[:] x_vals,
135137
Py_ssize_t row,
@@ -143,7 +145,7 @@ def get_dense_row_string(
143145
int_or_float val
144146

145147
for k in range(row_length):
146-
val = X[row,k]
148+
val = X[row, k]
147149
if val == 0:
148150
continue
149151
x_inds[x_nz_used] = k
@@ -157,6 +159,7 @@ def get_dense_row_string(
157159

158160
return " ".join(reprs)
159161

162+
160163
def get_sparse_row_string(
161164
int_or_float[:] X_data,
162165
int[:] X_indptr,
@@ -176,6 +179,7 @@ def get_sparse_row_string(
176179

177180
return " ".join(reprs)
178181

182+
179183
def _dump_svmlight_file(
180184
X,
181185
y,
@@ -211,8 +215,6 @@ def _dump_svmlight_file(
211215
Py_ssize_t j
212216
Py_ssize_t col_start
213217
Py_ssize_t col_end
214-
bint first
215-
Py_ssize_t x_nz_used
216218
Py_ssize_t[:] x_inds = np.empty(row_length, dtype=np.intp)
217219
signed long long[:] x_vals_int
218220
double[:] x_vals_float
@@ -224,8 +226,6 @@ def _dump_svmlight_file(
224226
x_vals_float = np.zeros(row_length, dtype=np.float64)
225227

226228
for i in range(x_len):
227-
x_nz_used = 0
228-
229229
if not X_is_sp:
230230
if X_is_integral:
231231
s = get_dense_row_string(X, x_inds, x_vals_int, i, value_pattern, one_based)
@@ -234,18 +234,17 @@ def _dump_svmlight_file(
234234
else:
235235
s = get_sparse_row_string(X.data, X.indptr, X.indices, i, value_pattern, one_based)
236236
if multilabel:
237-
first = True
238237
if y_is_sp:
239238
col_start = y.indptr[i]
240239
col_end = y.indptr[i+1]
241240
labels_str = ','.join(tuple(label_pattern % y.indices[j] for j in range(col_start, col_end) if y.data[j] != 0))
242241
else:
243-
labels_str = ','.join(label_pattern % j for j in range(num_labels) if y[i,j] != 0)
242+
labels_str = ','.join(label_pattern % j for j in range(num_labels) if y[i, j] != 0)
244243
else:
245244
if y_is_sp:
246245
labels_str = label_pattern % y.data[i]
247246
else:
248-
labels_str = label_pattern % y[i,0]
247+
labels_str = label_pattern % y[i, 0]
249248

250249
if query_id_is_not_empty:
251250
feat = (labels_str, query_id[i], s)

sklearn/decomposition/_cdnmf_fast.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt,
3434

3535
if hess != 0:
3636
W[i, t] = max(W[i, t] - grad / hess, 0.)
37-
37+
3838
return violation

0 commit comments

Comments
 (0)
0