8000 [MRG+2] MAINT: Remove add_row_csr by MechCoder · Pull Request #6676 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG+2] MAINT: Remove add_row_csr #6676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions sklearn/cluster/_k_means.pyx
8000
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cimport numpy as np
cimport cython

from ..utils.extmath import norm
from sklearn.utils.sparsefuncs_fast cimport add_row_csr
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
from sklearn.utils.fixes import bincount

ctypedef np.float64_t DOUBLE
Expand Down Expand Up @@ -323,9 +323,8 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
centers: array, shape (n_clusters, n_features)
The resulting centers
"""
n_features = X.shape[1]

cdef np.npy_intp cluster_id
cdef int n_features = X.shape[1]
cdef int curr_label

cdef np.ndarray[DOUBLE, ndim=1] data = X.data
cdef np.ndarray[int, ndim=1] indices = X.indices
Expand All @@ -338,24 +337,25 @@ def _centers_sparse(X, np.ndarray[INT, ndim=1] labels, n_clusters,
bincount(labels, minlength=n_clusters)
cdef np.ndarray[np.npy_intp, ndim=1, mode="c"] empty_clusters = \
np.where(n_samples_in_cluster == 0)[0]
cdef int n_empty_clusters = empty_clusters.shape[0]

# maybe also relocate small clusters?

if empty_clusters.shape[0] > 0:
if n_empty_clusters > 0:
# find points to reassign empty clusters to
far_from_centers = distances.argsort()[::-1]
far_from_centers = distances.argsort()[::-1][:n_empty_clusters]

for i in range(empty_clusters.shape[0]):
cluster_id = empty_clusters[i]
# XXX two relocated clusters could be close to each other
assign_rows_csr(X, far_from_centers, empty_clusters, centers)

# XXX two relocated clusters could be close to each other
centers[cluster_id] = 0.
add_row_csr(data, indices, indptr, far_from_centers[i],
centers[cluster_id])
n_samples_in_cluster[cluster_id] = 1
for i in range(n_empty_clusters):
n_samples_in_cluster[empty_clusters[i]] = 1

for i in range(labels.shape[0]):
add_row_csr(data, indices, indptr, i, centers[labels[i]])
curr_label = labels[i]
for ind in range(indptr[i], indptr[i + 1]):
j = indices[ind]
centers[curr_label, j] += data[ind]

centers /= n_samples_in_cluster[:, np.newaxis]

Expand Down
8 changes: 0 additions & 8 deletions sklearn/utils/sparsefuncs_fast.pxd

This file was deleted.

17 changes: 0 additions & 17 deletions sklearn/utils/sparsefuncs_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -382,21 +382,6 @@ def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
X_data[j] /= sum_


cdef void add_row_csr(np.ndarray[np.float64_t, ndim=1] data,
np.ndarray[int, ndim=1] indices,
np.ndarray[int, ndim=1] indptr,
int i, np.ndarray[np.float64_t, ndim=1, mode="c"] out):
"""Add row i of CSR matrix (data, indices, indptr) to array out.

Equivalent to out += X[i].toarray(). Returns None.
"""
cdef int ind, j

for ind in range(indptr[i], indptr[i + 1]):
j = indices[ind]
out[j] += data[ind]


def assign_rows_csr(X,
np.ndarray[np.npy_intp, ndim=1] X_rows,
np.ndarray[np.npy_intp, ndim=1] out_rows,
Expand Down Expand Up @@ -427,8 +412,6 @@ def assign_rows_csr(X,

out[out_rows] = 0.
for i in range(X_rows.shape[0]):
# XXX we could reuse add_row_csr here, but the array slice
# is not optimized away.
rX = X_rows[i]
for ind in range(indptr[rX], indptr[rX + 1]):
j = indices[ind]
Expand Down
0