8000 Use fused type in inplace normalize · scikit-learn/scikit-learn@27ac56b · GitHub
[go: up one dir, main page]

Skip to content

Commit 27ac56b

Browse files
committed
Use fused type in inplace normalize
1 parent 06bf797 commit 27ac56b

File tree

2 files changed

+48
-14
lines changed

2 files changed

+48
-14
lines changed

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ cimport numpy as np
1111
import numpy as np
1212
import scipy.sparse as sp
1313
cimport cython
14+
from cython cimport floating
1415

1516
np.import_array()
1617

@@ -274,13 +275,15 @@ def incr_mean_variance_axis0(X, last_mean, last_var, unsigned long last_n):
274275
@cython.wraparound(False)
275276
@cython.cdivision(True)
276277
def inplace_csr_row_normalize_l1(X):
277-
"""Inplace row normalize using the l1 norm"""
278-
cdef unsigned int n_samples = X.shape[0]
279-
cdef unsigned int n_features = X.shape[1]
278+
_inplace_csr_row_normalize_l1(X.data, X.shape, X.indices, X.indptr)
280279

281-
cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data
282-
cdef np.ndarray[int, ndim=1] X_indices = X.indices
283-
cdef np.ndarray[int, ndim=1] X_indptr = X.indptr
280+
281+
def _inplace_csr_row_normalize_l1(np.ndarray[floating, ndim=1] X_data, shape,
282+
np.ndarray[int, ndim=1] X_indices,
283+
np.ndarray[int, ndim=1] X_indptr):
284+
"""Inplace row normalize using the l1 norm"""
285+
cdef unsigned int n_samples = shape[0]
286+
cdef unsigned int n_features = shape[1]
284287

285288
# the column indices for row i are stored in:
286289
# indices[indptr[i]:indices[i+1]]
@@ -309,13 +312,16 @@ def inplace_csr_row_normalize_l1(X):
309312
@cython.wraparound(False)
310313
@cython.cdivision(True)
311314
def inplace_csr_row_normalize_l2(X):
312-
"""Inplace row normalize using the l2 norm"""
313-
cdef unsigned int n_samples = X.shape[0]
314-
cdef unsigned int n_features = X.shape[1]
315+
_inplace_csr_row_normalize_l2(X.data, X.shape, X.indices, X.indptr)
315316

316-
cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data
317-
cdef np.ndarray[int, ndim=1] X_indices = X.indices
318-
cdef np.ndarray[int, ndi 10000 m=1] X_indptr = X.indptr
317+
318+
def _inplace_csr_row_normalize_l2(np.ndarray[floating, ndim=1] X_data,
319+
shape,
320+
np.ndarray[int, ndim=1] X_indices,
321+
np.ndarray[int, ndim=1] X_indptr):
322+
"""Inplace row normalize using the l2 norm"""
323+
cdef unsigned int n_samples = shape[0]
324+
cdef unsigned int n_features = shape[1]
319325

320326
cdef unsigned int i
321327
cdef unsigned int j
@@ -364,7 +370,7 @@ def assign_rows_csr(X,
364370
"""Densify selected rows of a CSR matrix into a preallocated array.
365371
366372
Like out[out_rows] = X[X_rows].toarray() but without copying.
367-
Only supported for dtype=np.float64.
373+
No-copy supported for both dtype=np.float32 and dtype=np.float64.
368374
369375
Parameters
370376
----------

sklearn/utils/tests/test_sparsefuncs.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from numpy.testing import (assert_array_almost_equal,
66
assert_array_equal,
77
assert_equal)
8+
from numpy.random import RandomState
89

910
from sklearn.datasets import make_classification
1011
from sklearn.utils.sparsefuncs import (mean_variance_axis,
@@ -14,7 +15,9 @@
1415
inplace_swap_row, inplace_swap_column,
1516
min_max_axis,
1617
count_nonzero, csc_median_axis_0)
17-
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
18+
from sklearn.utils.sparsefuncs_fast import (assign_rows_csr,
19+
inplace_csr_row_normalize_l1,
20+
inplace_csr_row_normalize_l2)
1821
from sklearn.utils.testing import assert_raises
1922

2023

@@ -478,3 +481,28 @@ def test_csc_row_median():
478481

479482
# Test that it raises an Error for non-csc matrices.
480483
assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X))
484+
485+
486+
def test_inplace_normalize_l1():
487+
ones = np.ones((10, 1))
488+
rs = RandomState(10)
489+
490+
for dtype in (np.float64, np.float32):
491+
X = rs.rand(10, 5).astype(dtype)
492+
X_csr = sp.csr_matrix(X)
493+
inplace_csr_row_normalize_l1(X_csr)
494+
assert_equal(X_csr.dtype, dtype)
495+
assert_array_almost_equal(X_csr.sum(axis=1), ones)
496+
497+
498+
def test_inplace_normalize_l2():
499+
ones = np.ones((10, 1))
500+
rs = RandomState(10)
501+
502+
for dtype in (np.float64, np.float32):
503+
X = rs.rand(10, 5).astype(dtype)
504+
X_csr = sp.csr_matrix(X)
505+
inplace_csr_row_normalize_l2(X_csr)
506+
assert_equal(X_csr.dtype, dtype)
507+
X_csr.data **= 2
508+
assert_array_almost_equal(X_csr.sum(axis=1), ones)

0 commit comments

Comments
 (0)
0