@@ -15,7 +15,7 @@ cimport numpy as np
15
15
cimport cython
16
16
17
17
from ..utils.extmath import norm
18
- from sklearn.utils.sparsefuncs_fast cimport add_row_csr
18
+ from sklearn.utils.sparsefuncs_fast import assign_rows_csr
19
19
from sklearn.utils.fixes import bincount
20
20
21
21
ctypedef np.float64_t DOUBLE
@@ -323,9 +323,8 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
323
323
centers: array, shape (n_clusters, n_features)
324
324
The resulting centers
325
325
"""
326
- n_features = X.shape[1 ]
327
-
328
- cdef np.npy_intp cluster_id
326
+ cdef int n_features = X.shape[1 ]
327
+ cdef int curr_label
329
328
330
329
cdef np.ndarray[DOUBLE, ndim= 1 ] data = X.data
331
330
cdef np.ndarray[int , ndim= 1 ] indices = X.indices
@@ -338,24 +337,25 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
338
337
bincount(labels, minlength = n_clusters)
339
338
cdef np.ndarray[np.npy_intp, ndim= 1 , mode= " c" ] empty_clusters = \
340
339
np.where(n_samples_in_cluster == 0 )[0 ]
340
+ cdef int n_empty_clusters = empty_clusters.shape[0 ]
341
341
342
342
# maybe also relocate small clusters?
343
343
344
- if empty_clusters.shape[ 0 ] > 0 :
344
+ if n_empty_clusters > 0 :
345
345
# find points to reassign empty clusters to
346
- far_from_centers = distances.argsort()[::- 1 ]
346
+ far_from_centers = distances.argsort()[::- 1 ][:n_empty_clusters]
347
347
348
- for i in range (empty_clusters.shape[ 0 ]):
349
- cluster_id = empty_clusters[i]
348
+ # XXX two relocated clusters could be close to each other
349
+ assign_rows_csr(X, far_from_centers, empty_clusters, centers)
350
350
351
- # XXX two relocated clusters could be close to each other
352
- centers[cluster_id] = 0.
353
- add_row_csr(data, indices, indptr, far_from_centers[i],
354
- centers[cluster_id])
355
- n_samples_in_cluster[cluster_id] = 1
351
+ for i in range (n_empty_clusters):
352
+ n_samples_in_cluster[empty_clusters[i]] = 1
356
353
357
354
for i in range (labels.shape[0 ]):
358
- add_row_csr(data, indices, indptr, i, centers[labels[i]])
355
+ curr_label = labels[i]
356
+ for ind in range (indptr[i], indptr[i + 1 ]):
357
+ j = indices[ind]
358
+ centers[curr_label, j] += data[ind]
359
359
360
360
centers /= n_samples_in_cluster[:, np.newaxis]
361
361
0 commit comments