8000 [MRG+2] Ridge linear model dtype consistency (all solvers but sag) (#… · dmohns/scikit-learn@51808f4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 51808f4

Browse files
massichdmohns
authored andcommitted
[MRG+2] Ridge linear model dtype consistency (all solvers but sag) (scikit-learn#9033)
1 parent acd47fd commit 51808f4

File tree

5 files changed

+86
-15
lines changed

5 files changed

+86
-15
lines changed

doc/whats_new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ Enhancements
200200
:class:`linear_model.LogisticRegression` when using newton-cg
201201
solver. :issue:`8835` by :user:`Joan Massich <massich>`.
202202

203+
- Prevent cast from float32 to float64 in
204+
:class:`sklearn.linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr solvers
205+
by :user:`Joan Massich <massich>`, ::user::`Nicolas Cordier <ncordier>`
206+
203207
- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit`
204208
:issue:`8282` by :user:`Aman Dalmia <dalmia>`.
205209

sklearn/linear_model/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
171171
if sp.issparse(X):
172172
X_offset, X_var = mean_variance_axis(X, axis=0)
173173
if not return_mean:
174-
X_offset = np.zeros(X.shape[1])
174+
X_offset[:] = 0
175175

176176
if normalize:
177177

@@ -186,7 +186,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
186186
X_scale[X_scale == 0] = 1
187187
inplace_column_scale(X, 1. / X_scale)
188188
else:
189-
X_scale = np.ones(X.shape[1])
189+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
190190

191191
else:
192192
X_offset = np.average(X, axis=0, weights=sample_weight)
@@ -195,12 +195,12 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
195195
X, X_scale = f_normalize(X, axis=0, copy=False,
196196
return_norm=True)
197197
else:
198-
X_scale = np.ones(X.shape[1])
198+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
199199
y_offset = np.average(y, axis=0, weights=sample_weight)
200200
y = y - y_offset
201201
else:
202-
X_offset = np.zeros(X.shape[1])
203-
X_scale = np.ones(X.shape[1])
202+
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
203+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
204204
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
205205

206206
return X, y, X_offset, y_offset, X_scale

sklearn/linear_model/ridge.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0):
3737
n_samples, n_features = X.shape
3838
X1 = sp_linalg.aslinearoperator(X)
39-
coefs = np.empty((y.shape[1], n_features))
39+
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
4040

4141
if n_features > n_samples:
4242
def create_mv(curr_alpha):
@@ -80,7 +80,7 @@ def _mv(x):
8080

8181
def _solve_lsqr(X, y, alpha, max_iter=None, tol=1e-3):
8282
n_samples, n_features = X.shape
83-
coefs = np.empty((y.shape[1], n_features))
83+
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
8484
n_iter = np.empty(y.shape[1], dtype=np.int32)
8585

8686
# According to the lsqr documentation, alpha = damp^2.
@@ -111,7 +111,7 @@ def _solve_cholesky(X, y, alpha):
111111
return linalg.solve(A, Xy, sym_pos=True,
112112
overwrite_a=True).T
113113
else:
114-
coefs = np.empty([n_targets, n_features])
114+
coefs = np.empty([n_targets, n_features], dtype=X.dtype)
115115
for coef, target, current_alpha in zip(coefs, Xy.T, alpha):
116116
A.flat[::n_features + 1] += current_alpha
117117
coef[:] = linalg.solve(A, target, sym_pos=True,
@@ -165,7 +165,7 @@ def _solve_cholesky_kernel(K, y, alpha, sample_weight=None, copy=False):
165165
return dual_coef
166166
else:
167167
# One penalty per target. We need to solve each target separately.
168-
dual_coefs = np.empty([n_targets, n_samples])
168+
dual_coefs = np.empty([n_targets, n_samples], K.dtype)
169169

170170
for dual_coef, target, current_alpha in zip(dual_coefs, y.T, alpha):
171171
K.flat[::n_samples + 1] += current_alpha
@@ -186,7 +186,7 @@ def _solve_svd(X, y, alpha):
186186
idx = s > 1e-15 # same default value as scipy.linalg.pinv
187187
s_nnz = s[idx][:, np.newaxis]
188188
UTy = np.dot(U.T, y)
189-
d = np.zeros((s.size, alpha.size))
189+
d = np.zeros((s.size, alpha.size), dtype=X.dtype)
190190
d[idx] = s_nnz / (s_nnz ** 2 + alpha)
191191
d_UT_y = d * UTy
192192
return np.dot(Vt.T, d_UT_y).T
@@ -320,15 +320,17 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
320320
"automatically changed into 'sag'.")
321321
solver = 'sag'
322322

323+
_dtype = [np.float64, np.float32]
324+
323325
# SAG needs X and y columns to be C-contiguous and np.float64
324326
if solver in ['sag', 'saga']:
325327
X = check_array(X, accept_sparse=['csr'],
326328
dtype=np.float64, order='C')
327329
y = check_array(y, dtype=np.float64, ensure_2d=False, order='F')
328330
else:
329331
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
330-
dtype=np.float64)
331-
y = check_array(y, dtype='numeric', ensure_2d=False)
332+
dtype=_dtype)
333+
y = check_array(y, dtype=X.dtype, ensure_2d=False)
332334
check_consistent_length(X, y)
333335

334336
n_samples, n_features = X.shape
@@ -371,7 +373,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
371373
X, y = _rescale_data(X, y, sample_weight)
372374

373375
# There should be either 1 or n_targets penalties
374-
alpha = np.asarray(alpha).ravel()
376+
alpha = np.asarray(alpha, dtype=X.dtype).ravel()
375377
if alpha.size not in [1, n_targets]:
376378
raise ValueError("Number of targets and number of penalties "
377379
"do not correspond: %d != %d"
@@ -469,7 +471,13 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
469471
self.random_state = random_state
470472

471473
def fit(self, X, y, sample_weight=None):
472-
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64,
474+
475+
if self.solver in ['svd', 'sparse_cg', 'cholesky', 'lsqr']:
476+
_dtype = [np.float64, np.float32]
477+
else:
478+
_dtype = np.float64
479+
480+
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
473481
multi_output=True, y_numeric=True)
474482

475483
if ((sample_weight is not None) and

sklearn/linear_model/tests/test_ridge.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,3 +788,62 @@ def test_errors_and_values_svd_helper():
788788
def test_ridge_classifier_no_support_multilabel():
789789
X, y = make_multilabel_classification(n_samples=10, random_state=0)
790790
assert_raises(ValueError, RidgeClassifier().fit, X, y)
791+
792+
793+
def test_dtype_match():
794+
rng = np.random.RandomState(0)
795+
alpha = 1.0
796+
797+
n_samples, n_features = 6, 5
798+
X_64 = rng.randn(n_samples, n_features)
799+
y_64 = rng.randn(n_samples)
800+
X_32 = X_64.astype(np.float32)
801+
y_32 = y_64.astype(np.float32)
802+
803+
solvers = ["svd", "sparse_cg", "cholesky", "lsqr"]
804+
for solver in solvers:
805+
806+
# Check type consistency 32bits
807+
ridge_32 = Ridge(alpha=alpha, solver=solver)
808+
ridge_32.fit(X_32, y_32)
809+
coef_32 = ridge_32.coef_
810+
811+
# Check type consistency 64 bits
812+
ridge_64 = Ridge(alpha=alpha, solver=solver)
813+
ridge_64.fit(X_64, y_64)
814+
coef_64 = ridge_64.coef_
815+
816+
# Do all the checks at once, like this is easier to debug
817+
assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
818+
819+
# Do the actual checks at once for easier debug
820+
assert_equal(coef_32.dtype, X_32.dtype)
821+
assert_equal(coef_64.dtype, X_64.dtype)
822+
823+
824+
def test_dtype_match_cholesky():
825+
# Test different alphas in cholesky solver to ensure full coverage.
826+
# This test is separated from test_dtype_match for clarity.
827+
rng = np.random.RandomState(0)
828+
alpha = (1.0, 0.5)
829+
830+
n_samples, n_features, n_target = 6, 7, 2
831+
X_64 = rng.randn(n_samples, n_features)
832+
y_64 = rng.randn(n_samples, n_target)
833+
X_32 = X_64.astype(np.float32)
834+
y_32 = y_64.astype(np.float32)
835+
836+
# Check type consistency 32bits
837+
ridge_32 = Ridge(alpha=alpha, solver='cholesky')
838+
ridge_32.fit(X_32, y_32)
839+
coef_32 = ridge_32.coef_
840+
841+
# Check type consistency 64 bits
842+
ridge_64 = Ridge(alpha=alpha, solver='cholesky')
843+
ridge_64.fit(X_64, y_64)
844+
coef_64 = ridge_64.coef_
845+
846+
# Do all the checks at once, like this is easier to debug
847+
assert_equal(coef_32.dtype, X_32.dtype)
848+
assert_equal(coef_64.dtype, X_64.dtype)
849+
assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)

sklearn/utils/validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def check_array(array, accept_sparse=False, dtype="numeric", order=None,
363363
accept_sparse = False
364364

365365
# store whether originally we wanted numeric dtype
366-
dtype_numeric = dtype == "numeric"
366+
dtype_numeric = isinstance(dtype, six.string_types) and dtype == "numeric"
367367

368368
dtype_orig = getattr(array, "dtype", None)
369369
if not hasattr(dtype_orig, 'kind'):

0 commit comments

Comments
 (0)
0