8000 Merge pull request #4 from ncordier/ridge-float64 · massich/scikit-learn@0b4f78a · GitHub
[go: up one dir, main page]

Skip to content

Commit 0b4f78a

Browse files
authored
Merge pull request #4 from ncordier/ridge-float64
[M] Pass the dtype consistency & accuracy tests for ridge
2 parents cd6e696 + 3b10043 commit 0b4f78a

File tree

3 files changed

+20
-14
lines changed

3 files changed

+20
-14
lines changed

sklearn/linear_model/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
171171
if sp.issparse(X):
172172
X_offset, X_var = mean_variance_axis(X, axis=0)
173173
if not return_mean:
174-
X_offset = np.zeros(X.shape[1])
174+
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
175175

176176
if normalize:
177177

@@ -186,7 +186,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
186186
X_scale[X_scale == 0] = 1
187187
inplace_column_scale(X, 1. / X_scale)
188188
else:
189-
X_scale = np.ones(X.shape[1])
189+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
190190

191191
else:
192192
X_offset = np.average(X, axis=0, weights=sample_weight)
@@ -195,12 +195,12 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
195195
X, X_scale = f_normalize(X, axis=0, copy=False,
196196
return_norm=True)
197197
else:
198-
X_scale = np.ones(X.shape[1])
198+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
199199
y_offset = np.average(y, axis=0, weights=sample_weight)
200200
y = y - y_offset
201201
else:
202-
X_offset = np.zeros(X.shape[1])
203-
X_scale = np.ones(X.shape[1])
202+
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
203+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
204204
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
205205

206206
return X, y, X_offset, y_offset, X_scale

sklearn/linear_model/ridge.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0):
3737
n_samples, n_features = X.shape
3838
X1 = sp_linalg.aslinearoperator(X)
39-
coefs = np.empty((y.shape[1], n_features))
39+
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
4040

4141
if n_features > n_samples:
4242
def create_mv(curr_alpha):
@@ -80,7 +80,7 @@ def _mv(x):
8080

8181
def _solve_lsqr(X, y, alpha, max_iter=None, tol=1e-3):
8282
n_samples, n_features = X.shape
83-
coefs = np.empty((y.shape[1], n_features))
83+
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
8484
n_iter = np.empty(y.shape[1], dtype=np.int32)
8585

8686
# According to the lsqr documentation, alpha = damp^2.
@@ -111,7 +111,7 @@ def _solve_cholesky(X, y, alpha):
111111
return linalg.solve(A, Xy, sym_pos=True,
112112
overwrite_a=True).T
113113
else:
114-
coefs = np.empty([n_targets, n_features])
114+
coefs = np.empty([n_targets, n_features], dtype=X.dtype)
115115
for coef, target, current_alpha in zip(coefs, Xy.T, alpha):
116116
A.flat[::n_features + 1] += current_alpha
117117
coef[:] = linalg.solve(A, target, sym_pos=True,
@@ -186,7 +186,7 @@ def _solve_svd(X, y, alpha):
186186
idx = s > 1e-15 # same default value as scipy.linalg.pinv
187187
s_nnz = s[idx][:, np.newaxis]
188188
UTy = np.dot(U.T, y)
189-
d = np.zeros((s.size, alpha.size))
189+
d = np.zeros((s.size, alpha.size), dtype=X.dtype)
190190
d[idx] = s_nnz / (s_nnz ** 2 + alpha)
191191
d_UT_y = d * UTy
192192
return np.dot(Vt.T, d_UT_y).T
@@ -327,7 +327,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
327327
y = check_array(y, dtype=np.float64, ensure_2d=False, order='F')
328328
else:
329329
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
330-
dtype=np.float64)
330+
dtype='numeric')
331331
y = check_array(y, dtype='numeric', ensure_2d=False)
332332
check_consistent_length(X, y)
333333

@@ -371,7 +371,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
371371
X, y = _rescale_data(X, y, sample_weight)
372372

373373
# There should be either 1 or n_targets penalties
374-
alpha = np.asarray(alpha).ravel()
374+
alpha = np.asarray(alpha, dtype=X.dtype).ravel()
375375
if alpha.size not in [1, n_targets]:
376376
raise ValueError("Number of targets and number of penalties "
377377
"do not correspond: %d != %d"
@@ -469,7 +469,13 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
469469
self.random_state = random_state
470470

471471
def fit(self, X, y, sample_weight=None):
472-
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64,
472+
473+
if self.solver in ['svd', 'sparse_cg', 'cholesky', 'lsqr']:
474+
_dtype = [np.float64, np.float32]
475+
else:
476+
_dtype = np.float64
477+
478+
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
473479
multi_output=True, y_numeric=True)
474480

475481
if ((sample_weight is not None) and

sklearn/linear_model/tests/test_ridge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ def test_dtype_match():
801801
y_32 = y_64.astype(np.float32)
802802

803803
# solvers = ("svd", "sparse_cg", "cholesky", "lsqr", "sag")
804-
solvers = ["svd"]
804+
solvers = ["svd", "sparse_cg", "cholesky", "lsqr"]
805805
for solver in solvers:
806806

807807
# Check type consistency 32bits
@@ -815,4 +815,4 @@ def test_dtype_match():
815815
assert_equal(ridge_64.coef_.dtype, X_64.dtype)
816816

817817
# Check accuracy consistency
818-
assert_almost_equal(ridge_32.coefs_, ridge_64.coefs_.astype(np.float32))
818+
assert_almost_equal(ridge_32.coef_, ridge_64.coef_.astype(np.float32))

0 commit comments

Comments
 (0)
0