8000 Remove add_row_csr · scikit-learn/scikit-learn@de60297 · GitHub
[go: up one dir, main page]

Skip to content

Commit de60297

Browse files
committed
Remove add_row_csr
1 parent 28758cc commit de60297

File tree

3 files changed

+14
-39
lines changed

3 files changed

+14
-39
lines changed

sklearn/cluster/_k_means.pyx

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ cimport numpy as np
1515
cimport cython
1616

1717
from ..utils.extmath import norm
18-
from sklearn.utils.sparsefuncs_fast cimport add_row_csr
18+
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
1919
from sklearn.utils.fixes import bincount
2020

2121
ctypedef np.float64_t DOUBLE
@@ -323,9 +323,8 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
323323
centers: array, shape (n_clusters, n_features)
324324
The resulting centers
325325
"""
326-
n_features = X.shape[1]
327-
328-
cdef np.npy_intp cluster_id
326+
cdef int n_features = X.shape[1]
327+
cdef int curr_label
329328

330329
cdef np.ndarray[DOUBLE, ndim=1] data = X.data
331330
cdef np.ndarray[int, ndim=1] indices = X.indices
@@ -338,24 +337,25 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
338337
bincount(labels, minlength=n_clusters)
339338
cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \
340339
np.where(n_samples_in_cluster == 0)[0]
340+
cdef int n_empty_clusters = empty_clusters.shape[0]
341341

342342
# maybe also relocate small clusters?
343343

344-
if empty_clusters.shape[0] > 0:
344+
if n_empty_clusters > 0:
345345
# find points to reassign empty clusters to
346-
far_from_centers = distances.argsort()[::-1]
346+
far_from_centers = distances.argsort()[::-1][:n_empty_clusters]
347347

348-
for i in range(empty_clusters.shape[0]):
349-
cluster_id = empty_clusters[i]
348+
# XXX two relocated clusters could be close to each other
349+
assign_rows_csr(X, far_from_centers, empty_clusters, centers)
350350

351-
# XXX two relocated clusters could be close to each other
352-
centers[cluster_id] = 0.
353-
add_row_csr(data, indices, indptr, far_from_centers[i],
354-
centers[cluster_id])
355-
n_samples_in_cluster[cluster_id] = 1
351+
for i in range(n_empty_clusters):
352+
n_samples_in_cluster[empty_clusters[i]] = 1
356353

357354
for i in range(labels.shape[0]):
358-
add_row_csr(data, indices, indptr, i, centers[labels[i]])
355+
curr_label = labels[i]
356+
for ind in range(indptr[i], indptr[i + 1]):
357+
j = indices[ind]
358+
centers[curr_label, j] += data[ind]
359359

360360
centers /= n_samples_in_cluster[:, np.newaxis]
361361

sklearn/utils/sparsefuncs_fast.pxd

Lines changed: 0 additions & 8 deletions
This file was deleted.

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -382,21 +382,6 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
382382
X_data[j] /= sum_
383383

384384

385-
cdef void add_row_csr(np.ndarray[np.float64_t, ndim=1] data,
386-
np.ndarray[int, ndim=1] indices,
387-
np.ndarray[int, ndim=1] indptr,
388-
int i, np.ndarray[np.float64_t, ndim=1, mode="c"] out):
389-
"""Add row i of CSR matrix (data, indices, indptr) to array out.
390-
391-
Equivalent to out += X[i].toarray(). Returns None.
392-
"""
393-
cdef int ind, j
394-
395-
for ind in range(indptr[i], indptr[i + 1]):
396-
j = indices[ind]
397-
out[j] += data[ind]
398-
399-
400385
def assign_rows_csr(X,
401386
np.ndarray[np.npy_intp, ndim=1] X_rows,
402387
np.ndarray[np.npy_intp, ndim=1] out_rows,
@@ -427,8 +412,6 @@ def assign_rows_csr(X,
427412

428413
out[out_rows] = 0.
429414
for i in range(X_rows.shape[0]):
430-
# XXX we could reuse add_row_csr here, but the array slice
431-
# is not optimized away.
432415
rX = X_rows[i]
433416
for ind in range(indptr[rX], indptr[rX + 1]):
434417
j = indices[ind]

0 commit comments

Comments
 (0)
0