8000 ENH Ridge with solver SAG/SAGA does not cast to float64 (#13302) · scikit-learn/scikit-learn@0dac63f · GitHub
[go: up one dir, main page]

Skip to content

Commit 0dac63f

massichjnothman
authored andcommitted
ENH Ridge with solver SAG/SAGA does not cast to float64 (#13302)
1 parent 28728f5 commit 0dac63f

File tree

3 files changed

+97
-54
lines changed

3 files changed

+97
-54
lines changed

doc/whats_new/v0.21.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ Support for Python 3.4 and below has been officially dropped.
384384
:mod:`sklearn.linear_model`
385385
...........................
386386

387+
- |Enhancement| :mod:`linear_model.ridge` now preserves ``float32`` and
388+
``float64`` dtypes. :issues:`8769` and :issues:`11000` by
389+
:user:`Guillaume Lemaitre <glemaitre>`, and :user:`Joan Massich <massich>`
390+
387391
- |Feature| :class:`linear_model.LogisticRegression` and
388392
:class:`linear_model.LogisticRegressionCV` now support Elastic-Net penalty,
389393
with the 'saga' solver. :pr:`11646` by :user:`Nicolas Hug <NicolasHug>`.

sklearn/linear_model/ridge.py

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,17 @@ def _solve_svd(X, y, alpha):
226226
return np.dot(Vt.T, d_UT_y).T
227227

228228

229+
def _get_valid_accept_sparse(is_X_sparse, solver):
230+
if is_X_sparse and solver in ['auto', 'sag', 'saga']:
231+
return 'csr'
232+
else:
233+
return ['csr', 'csc', 'coo']
234+
235+
229236
def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
230237
max_iter=None, tol=1e-3, verbose=0, random_state=None,
231-
return_n_iter=False, return_intercept=False):
238+
return_n_iter=False, return_intercept=False,
239+
check_input=True):
232240
"""Solve the ridge equation by the method of normal equations.
233241
234242
Read more in the :ref:`User Guide <ridge_regression>`.
@@ -332,6 +340,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
332340
33 6D40 3341
.. versionadded:: 0.17
334342
343+
check_input : boolean, default True
344+
If False, the input arrays X and y will not be checked.
345+
346+
.. versionadded:: 0.21
347+
335348
Returns
336349
-------
337350
coef : array, shape = [n_features] or [n_targets, n_features]
@@ -360,13 +373,14 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
360373
return_n_iter=return_n_iter,
361374
return_intercept=return_intercept,
362375
X_scale=None,
363-
X_offset=None)
376+
X_offset=None,
377+
check_input=check_input)
364378

365379

366380
def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
367381
max_iter=None, tol=1e-3, verbose=0, random_state=None,
368382
return_n_iter=False, return_intercept=False,
369-
X_scale=None, X_offset=None):
383+
X_scale=None, X_offset=None, check_input=True):
370384

371385
has_sw = sample_weight is not None
372386

@@ -388,17 +402,12 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
388402
"intercept. Please change solver to 'sag' or set "
389403
"return_intercept=False.")
390404

391-
_dtype = [np.float64, np.float32]
392-
393-
# SAG needs X and y columns to be C-contiguous and np.float64
394-
if solver in ['sag', 'saga']:
395-
X = check_array(X, accept_sparse=['csr'],
396-
dtype=np.float64, order='C')
397-
y = check_array(y, dtype=np.float64, ensure_2d=False, order='F')
398-
else:
399-
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
400-
dtype=_dtype)
401-
y = check_array(y, dtype=X.dtype, ensure_2d=False)
405+
if check_input:
406+
_dtype = [np.float64, np.float32]
407+
_accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), solver)
408+
X = check_array(X, accept_sparse=_accept_sparse, dtype=_dtype F438 ,
409+
order="C")
410+
y = check_array(y, dtype=X.dtype, ensure_2d=False, order="C")
402411
check_consistent_length(X, y)
403412

404413
n_samples, n_features = X.shape
@@ -417,8 +426,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
417426
raise ValueError("Number of samples in X and y does not correspond:"
418427
" %d != %d" % (n_samples, n_samples_))
419428

420-
421-
422429
if has_sw:
423430
if np.atleast_1d(sample_weight).ndim > 1:
424431
raise ValueError("Sample weights must be 1D array or scalar")
@@ -438,7 +445,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
438445
if alpha.size == 1 and n_targets > 1:
439446
alpha = np.repeat(alpha, n_targets)
440447

441-
442448
n_iter = None
443449
if solver == 'sparse_cg':
444450
coef = _solve_sparse_cg(X, y, alpha,
@@ -461,7 +467,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
461467
except linalg.LinAlgError:
462468
# use SVD solver if matrix is singular
463469
solver = 'svd'
464-
465470
else:
466471
try:
467472
coef = _solve_cholesky(X, y, alpha)
@@ -473,11 +478,12 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
473478
# precompute max_squared_sum for all targets
474479
max_squared_sum = row_norms(X, squared=True).max()
475480

476-
coef = np.empty((y.shape[1], n_features))
481+
coef = np.empty((y.shape[1], n_features), dtype=X.dtype)
477482
n_iter = np.empty(y.shape[1], dtype=np.int32)
478-
intercept = np.zeros((y.shape[1], ))
483+
intercept = np.zeros((y.shape[1], ), dtype=X.dtype)
479484
for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):
480-
init = {'coef': np.zeros((n_features + int(return_intercept), 1))}
485+
init = {'coef': np.zeros((n_features + int(return_intercept), 1),
486+
dtype=X.dtype)}
481487
coef_, n_iter_, _ = sag_solver(
482488
X, target.ravel(), sample_weight, 'squared', alpha_i, 0,
483489
max_iter, tol, verbose, random_state, False, max_squared_sum,
@@ -530,13 +536,13 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
530536

531537
def fit(self, X, y, sample_weight=None):
532538

533-
if self.solver in ('sag', 'saga'):
534-
_dtype = np.float64
535-
else:
536-
# all other solvers work at both float precision levels
537-
_dtype = [np.float64, np.float32]
538-
539-
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
539+
# all other solvers work at both float precision levels
540+
_dtype = [np.float64, np.float32]
541+
_accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),
542+
self.solver)
543+
X, y = check_X_y(X, y,
544+
accept_sparse=_accept_sparse,
545+
dtype=_dtype,
540546
multi_output=True, y_numeric=True)
541547

542548
if ((sample_weight is not None) and
@@ -555,7 +561,7 @@ def fit(self, X, y, sample_weight=None):
555561
X, y, alpha=self.alpha, sample_weight=sample_weight,
556562
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
557563
random_state=self.random_state, return_n_iter=True,
558-
return_intercept=True)
564+
return_intercept=True, check_input=False)
559565
# add the offset which was subtracted by _preprocess_data
560566
self.intercept_ += y_offset
561567
else:
@@ -570,8 +576,7 @@ def fit(self, X, y, sample_weight=None):
570576
X, y, alpha=self.alpha, sample_weight=sample_weight,
571577
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
572578
random_state=self.random_state, return_n_iter=True,
573-
return_intercept=False, **params)
574-
579+
return_intercept=False, check_input=False, **params)
575580
self._set_intercept(X_offset, y_offset, X_scale)
576581

577582
return self
@@ -893,8 +898,9 @@ def fit(self, X, y, sample_weight=None):
893898
-------
894899
self : returns an instance of self.
895900
"""
896-
check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
897-
multi_output=True)
901+
_accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),
902+
self.solver)
903+
check_X_y(X, y, accept_sparse=_accept_sparse, multi_output=True)
898904

899905
self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
900906
Y = self._label_binarizer.fit_transform(y)
@@ -1077,10 +1083,13 @@ def fit(self, X, y, sample_weight=None):
10771083
-------
10781084
self : object
10791085
"""
1080-
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64,
1086+
X, y = check_X_y(X, y,
1087+
accept_sparse=['csr', 'csc', 'coo'],
1088+
dtype=[np.float64, np.float32],
10811089
multi_output=True, y_numeric=True)
10821090
if sample_weight is not None and not isinstance(sample_weight, float):
1083-
sample_weight = check_array(sample_weight, ensure_2d=False)
1091+
sample_weight = check_array(sample_weight, ensure_2d=False,
1092+
dtype=X.dtype)
10841093
n_samples, n_features = X.shape
10851094

10861095
X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(

sklearn/linear_model/tests/test_ridge.py

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import numpy as np
23
import scipy.sparse as sp
34
from scipy import linalg
@@ -6,6 +7,7 @@
67
import pytest
78

89
from sklearn.utils.testing import assert_almost_equal
10+
from sklearn.utils.testing import assert_allclose
911
from sklearn.utils.testing import assert_array_almost_equal
1012
from sklearn.utils.testing import assert_allclose
1113
from sklearn.utils.testing import assert_equal
@@ -38,7 +40,7 @@
3840
from sklearn.model_selection import GridSearchCV
3941
from sklearn.model_selection import KFold
4042

41-
from sklearn.utils import check_random_state
43+
from sklearn.utils import check_random_state, _IS_32BIT
4244
from sklearn.datasets import make_multilabel_classification
4345

4446
diabetes = datasets.load_diabetes()
@@ -934,7 +936,9 @@ def test_ridge_classifier_no_support_multilabel():
934936
assert_raises(ValueError, RidgeClassifier().fit, X, y)
935937

936938

937-
def test_dtype_match():
939+
@pytest.mark.parametrize(
940+
"solver", ["svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga"])
941+
def test_dtype_match(solver):
938942
rng = np.random.RandomState(0)
939943
alpha = 1.0
940944

@@ -944,25 +948,22 @@ def test_dtype_match():
944948
X_32 = X_64.astype(np.float32)
945949
y_32 = y_64.astype(np.float32)
946950

947-
solvers = ["svd", "sparse_cg", "cholesky", "lsqr"]
948-
for solver in solvers:
949-
950-
# Check type consistency 32bits
951-
ridge_32 = Ridge(alpha=alpha, solver=solver)
952-
ridge_32.fit(X_32, y_32)
953-
coef_32 = ridge_32.coef_
951+
# Check type consistency 32bits
952+
ridge_32 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=1e-10,)
953+
ridge_32.fit(X_32, y_32)
954+
coef_32 = ridge_32.coef_
954955

955-
# Check type consistency 64 bits
956-
ridge_64 = Ridge(alpha=alpha, solver=solver)
957-
ridge_64.fit(X_64, y_64)
958-
coef_64 = ridge_64.coef_
956+
# Check type consistency 64 bits
957+
ridge_64 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=1e-10,)
958+
ridge_64.fit(X_64, y_64)
959+
coef_64 = ridge_64.coef_
959960

960-
# Do the actual checks at once for easier debug
961-
assert coef_32.dtype == X_32.dtype
962-
assert coef_64.dtype == X_64.dtype
963-
assert ridge_32.predict(X_32).dtype == X_32.dtype
964-
assert ridge_64.predict(X_64).dtype == X_64.dtype
965-
assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
961+
# Do the actual checks at once for easier debug
962+
assert coef_32.dtype == X_32.dtype
963+
assert coef_64.dtype == X_64.dtype
964+
assert ridge_32.predict(X_32).dtype == X_32.dtype
965+
assert ridge_64.predict(X_64).dtype == X_64.dtype
966+
assert_allclose(ridge_32.coef_, ridge_64.coef_, rtol=1e-4)
966967

967968

968969
def test_dtype_match_cholesky():
@@ -993,3 +994,32 @@ def test_dtype_match_cholesky():
993994
assert ridge_32.predict(X_32).dtype == X_32.dtype
994995
assert ridge_64.predict(X_64).dtype == X_64.dtype
995996
assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
997+
998+
999+
@pytest.mark.parametrize(
1000+
'solver', ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
1001+
def test_ridge_regression_dtype_stability(solver):
1002+
random_state = np.random.RandomState(0)
1003+
n_samples, n_features = 6, 5
1004+
X = random_state.randn(n_samples, n_features)
1005+
coef = random_state.randn(n_features)
1006+
y = np.dot(X, coef) + 0.01 * rng.randn(n_samples)
1007+
alpha = 1.0
1008+
rtol = 1e-2 if os.name == 'nt' and _IS_32BIT else 1e-5
1009+
1010+
results = dict()
1011+
for current_dtype in (np.float32, np.float64):
1012+
results[current_dtype] = ridge_regression(X.astype(current_dtype),
1013+
y.astype(current_dtype),
1014+
alpha=alpha,
1015+
solver=solver,
1016+
random_state=random_state,
1017+
sample_weight=None,
1018+
max_iter=500,
1019+
tol=1e-10,
1020+
return_n_iter=False,
1021+
return_intercept=False)
1022+
1023+
assert results[np.float32].dtype == np.float32
1024+
assert results[np.float64].dtype == np.float64
1025+
assert_allclose(results[np.float32], results[np.float64], rtol=rtol)

0 commit comments

Comments
 (0)
0