8000 Remove add_row_csr (#6676) · scikit-learn/scikit-learn@20f89ef · GitHub
[go: up one dir, main page]

Skip to content

Commit 20f89ef

Browse files
committed
Remove add_row_csr (#6676)
1 parent e17d207 commit 20f89ef

File tree

3 files changed

+14
-39
lines changed

3 files changed

+14
-39
lines changed

sklearn/cluster/_k_means.pyx

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ cimport numpy as np
1515
cimport cython
1616

1717
from ..utils.extmath import norm
18-
from sklearn.utils.sparsefuncs_fast cimport add_row_csr
18+
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
1919
from sklearn.utils.fixes import bincount
2020

2121
ctypedef np.float64_t DOUBLE
@@ -326,9 +326,8 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
326326
centers: array, shape (n_clusters, n_features)
327327
The resulting centers
328328
"""
329-
n_features = X.shape[1]
330-
331-
cdef np.npy_intp cluster_id
329+
cdef int n_features = X.shape[1]
330+
cdef int curr_label
332331

333332
cdef np.ndarray[DOUBLE, ndim=1] data = X.data
334333
cdef np.ndarray[int, ndim=1] indices = X.indices
@@ -341,24 +340,25 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
341340
bincount(labels, minlength=n_clusters)
342341
cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \
343342
np.where(n_samples_in_cluster == 0)[0]
343+
cdef int n_empty_clusters = empty_clusters.shape[0]
344344

345345
# maybe also relocate small clusters?
346346

347-
if empty_clusters.shape[0] > 0:
347+
if n_empty_clusters > 0:
348348
# find points to reassign empty clusters to
349-
far_from_centers = distances.argsort()[::-1]
349+
far_from_centers = distances.argsort()[::-1][:n_empty_clusters]
350350

351-
for i in range(empty_clusters.shape[0]):
352-
cluster_id = empty_clusters[i]
351+
# XXX two relocated clusters could be close to each other
352+
assign_rows_csr(X, far_from_centers, empty_clusters, centers)
353353

354-
# XXX two relocated clusters could be close to each other
355-
centers[cluster_id] = 0.
356-
add_row_csr(data, indices, indptr, far_from_centers[i],
357-
centers[cluster_id])
358-
n_samples_in_cluster[cluster_id] = 1
354+
for i in range(n_empty_clusters):
355+
n_samples_in_cluster[empty_clusters[i]] = 1
359356

360357
for i in range(labels.shape[0]):
361-
add_row_csr(data, indices, indptr, i, centers[labels[i]])
358+
curr_label = labels[i]
359+
for ind in range(indptr[i], indptr[i + 1]):
360+
j = indices[ind]
361+
centers[curr_label, j] += data[ind]
362362

363363
centers /= n_samples_in_cluster[:, np.newaxis]
364364

sklearn/utils/sparsefuncs_fast.pxd

Lines changed: 0 additions & 8 deletions
This file was deleted.

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -382,21 +382,6 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
382382
X_data[j] /= sum_
383383

384384

385-
cdef void add_row_csr(np.ndarray[np.float64_t, ndim=1] data,
386-
np.ndarray[int, ndim=1] indices,
387-
np.ndarray[int, ndim=1] indptr,
388-
int i, np.ndarray[np.float64_t, ndim=1, mode="c"] out):
389-
"""Add row i of CSR matrix (data, indices, indptr) to array out.
390-
391-
Equivalent to out += X[i].toarray(). Returns None.
392-
"""
393-
cdef int ind, j
394-
395-
for ind in range(indptr[i], indptr[i + 1]):
396-
j = indices[ind]
397-
out[j] += data[ind]
398-
399-
400385
def assign_rows_csr(X,
401386
np.ndarray[np.npy_intp, ndim=1] X_rows,
402387
np.ndarray[np.npy_intp, ndim=1] out_rows,
@@ -427,8 +412,6 @@ def assign_rows_csr(X,
427412

428413
out[out_rows] = 0.
429414
for i in range(X_rows.shape[0]):
430-
# XXX we could reuse add_row_csr here, but the array slice
431-
# is not optimized away.
432415
rX = X_rows[i]
433416
for ind in range(indptr[rX], indptr[rX + 1]):
434417
j = indices[ind]

0 commit comments

Comments
 (0)
0