8000 Use fused type in inplace normalize · scikit-learn/scikit-learn@84decaf · GitHub
[go: up one dir, main page]

Skip to content
< 10000 div style="--spacing:var(--spacing-none)" class="Box-sc-g0xbh4-0 prc-PageLayout-PageLayoutRoot-1zlEO">

Commit 84decaf

Browse files
committed
Use fused type in inplace normalize
1 parent aadaf0f commit 84decaf

File tree

2 files changed

+48
-14
lines changed

2 files changed

+48
-14
lines changed

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ cimport numpy as np
1111
import numpy as np
1212
import scipy.sparse as sp
1313
cimport cython
14+
from cython cimport floating
1415

1516
np.import_array()
1617

@@ -274,13 +275,15 @@ def incr_mean_variance_axis0(X, last_mean, last_var, unsigned long last_n):
274275
@cython.wraparound(False)
275276
@cython.cdivision(True)
276277
def inplace_csr_row_normalize_l1(X):
277-
"""Inplace row normalize using the l1 norm"""
278-
cdef unsigned int n_samples = X.shape[0]
279-
cdef unsigned int n_features = X.shape[1]
278+
_inplace_csr_row_normalize_l1(X.data, X.shape, X.indices, X.indptr)
280279

281-
cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data
282-
cdef np.ndarray[int, ndim=1] X_indices = X.indices
283-
cdef np.ndarray[int, ndim=1] X_indptr = X.indptr
280+
281+
def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data, shape,
282+
np.ndarray[int, ndim=1] X_indices,
283+
np.ndarray[int, ndim=1] X_indptr):
284+
"""Inplace row normalize using the l1 norm"""
285+
cdef unsigned int n_samples = shape[0]
286+
cdef unsigned int n_features = shape[1]
284287

285288
# the column indices for row i are stored in:
286289
# indices[indptr[i]:indices[i+1]]
@@ -309,13 +312,16 @@ def inplace_csr_row_normalize_l1(X):
309312
@cython.wraparound(False)
310313
@cython.cdivision(True)
311314
def inplace_csr_row_normalize_l2(X):
312-
"""Inplace row normalize using the l2 norm"""
313-
cdef unsigned int n_samples = X.shape[0]
314-
cdef unsigned int n_features = X.shape[1]
315+
_inplace_csr_row_normalize_l2(X.data, X.shape, X.indices, X.indptr)
315316

316-
cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data
317-
cdef np.ndarray[int, ndim=1] X_indices = X.indices
318-
cdef np.ndarray[int, ndim=1] X_indptr = X.indptr
317+
318+
def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
319+
shape,
320+
np.ndarray[int, ndim=1] X_indices,
321+
np.ndarray[int, ndim=1] X_indptr):
322+
"""Inplace row normalize using the l2 norm"""
323+
cdef unsigned int n_samples = shape[0]
324+
cdef unsigned int n_features = shape[1]
319325

320326
cdef unsigned int i
321327
cdef unsigned int j
@@ -364,7 +370,7 @@ def assign_rows_csr(X,
364370
"""Densify selected rows of a CSR matrix into a preallocated array.
365371
366372
Like out[out_rows] = X[X_rows].toarray() but without copying.
367-
Only supported for dtype=np.float64.
373+
No-copy supported for both dtype=np.float32 and dtype=np.float64.
368374
369375
Parameters
370376
----------

sklearn/utils/tests/test_sparsefuncs.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
inplace_swap_row, inplace_swap_column,
1515
min_max_axis,
1616
count_nonzero, csc_median_axis_0)
17-
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
17+
from sklearn.utils.sparsefuncs_fast import (assign_rows_csr,
18+
inplace_csr_row_normalize_l1,
19+
inplace_csr_row_normalize_l2)
1820
from sklearn.utils.testing import assert_raises
1921

2022

@@ -478,3 +480,29 @@ def test_csc_row_median():
478480

479481
# Test that it raises an Error for non-csc matrices.
480482
assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X))
483+
484+
485+
def test_inplace_normalize_l1():
486+
ones = np.ones((10, 1))
487+
488+
X = sp.rand(10, 5, dtype=np.float64, density=.7).tocsr()
489+
inplace_csr_row_normalize_l1(X)
490+
assert_array_almost_equal(X.sum(axis=1), ones)
491+
492+
X = sp.rand(10, 5, dtype=np.float32, density=.7).tocsr()
493+
inplace_csr_row_normalize_l1(X)
494+
assert_array_almost_equal(X.sum(axis=1), ones)
495+
496+
497+
def test_inplace_normalize_l2():
498+
ones = np.ones((10, 1))
499+
500+
X = sp.rand(10, 5, dtype=np.float64, density=.7).tocsr()
501+
inplace_csr_row_normalize_l2(X)
502+
X.data **= 2
503+
assert_array_almost_equal(X.sum(axis=1), ones)
504+
505+
X = sp.rand(10, 5, dtype=np.float32, density=.7).tocsr()
506+
inplace_csr_row_normalize_l2(X)
507+
X.data **= 2
508+
assert_array_almost_equal(X.sum(axis=1), ones)

0 commit comments

Comments
 (0)
0