8000 Merge pull request #5791 from MechCoder/scale_csc · scikit-learn/scikit-learn@d644e8e · GitHub
[go: up one dir, main page]

Skip to content

Commit d644e8e

Browse files
committed
Merge pull request #5791 from MechCoder/scale_csc
[MRG+1] Scaling a sparse matrix along axis 0 should accept a csc by default
2 parents 5bba3d0 + 6e87813 commit d644e8e

File tree

2 files changed

+5
-12
lines changed

2 files changed

+5
-12
lines changed

sklearn/preprocessing/data.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
103103
copy : boolean, optional, default True
104104
set to False to perform inplace row normalization and avoid a
105105
copy (if the input is already a numpy array or a scipy.sparse
106-
CSR matrix and if axis is 1).
106+
CSC matrix and if axis is 1).
107107
108108
Notes
109109
-----
@@ -113,18 +113,18 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
113113
114114
Instead the caller is expected to either set explicitly
115115
`with_mean=False` (in that case, only variance scaling will be
116-
performed on the features of the CSR matrix) or to call `X.toarray()`
116+
performed on the features of the CSC matrix) or to call `X.toarray()`
117117
if he/she expects the materialized dense array to fit in memory.
118118
119-
To avoid memory copy the caller should pass a CSR matrix.
119+
To avoid memory copy the caller should pass a CSC matrix.
120120
121121
See also
122122
--------
123123
:class:`sklearn.preprocessing.StandardScaler` to perform centering and
124124
scaling using the ``Transformer`` API (e.g. as part of a preprocessing
125125
:class:`sklearn.pipeline.Pipeline`)
126126
"""
127-
X = check_array(X, accept_sparse='csr', copy=copy, ensure_2d=False,
127+
X = check_array(X, accept_sparse='csc', copy=copy, ensure_2d=False,
128128
warn_on_dtype=True, estimator='the scale function',
129129
dtype=FLOAT_DTYPES)
130130
if sparse.issparse(X):
@@ -135,11 +135,6 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
135135
if axis != 0:
136136
raise ValueError("Can only scale sparse matrix on axis=0, "
137137
" got axis=%d" % axis)
138-
if not sparse.isspmatrix_csr(X):
139-
X = X.tocsr()
140-
copy = False
141-
if copy:
142-
X = X.copy()
143138
if with_std:
144139
_, var = mean_variance_axis(X, axis=0)
145140
var = _handle_zeros_in_scale(var, copy=False)
@@ -150,8 +145,6 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
150145
mean_ = np.mean(X, axis)
151146
if with_std:
152147
scale_ = np.std(X, axis)
153-
if copy:
154-
X = X.copy()
155148
# Xr is a view on the original array that enables easy use of
156149
# broadcasting on the axis in which we are interested in
157150
Xr = np.rollaxis(X, axis)

sklearn/preprocessing/tests/test_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ def test_scale_function_without_centering():
843843

844844
# null scale
845845
X_csr_scaled = scale(X_csr, with_mean=False, with_std=False, copy=True)
846-
assert_array_almost_equal(X_csr.data, X_csr_scaled.data)
846+
assert_array_almost_equal(X_csr.toarray(), X_csr_scaled.toarray())
847847

848848

849849
def test_robust_scale_axis1():

0 commit comments

Comments
 (0)
0