10000 squashed Ridge PR · scikit-learn/scikit-learn@61ea4ab · GitHub
[go: up one dir, main page]

Skip to content

Commit 61ea4ab

Browse files
ncordierJoan Massich
authored and
Joan Massich
committed
squashed Ridge PR
1 parent b15818e commit 61ea4ab

File tree

3 files changed

+44
-11
lines changed

3 files changed

+44
-11
lines changed

sklearn/linear_model/base.py

Lines changed: 5 additions & 5 deletions
10000
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
171171
if sp.issparse(X):
172172
X_offset, X_var = mean_variance_axis(X, axis=0)
173173
if not return_mean:
174-
X_offset = np.zeros(X.shape[1])
174+
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
175175

176176
if normalize:
177177

@@ -186,7 +186,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
186186
X_scale[X_scale == 0] = 1
187187
inplace_column_scale(X, 1. / X_scale)
188188
else:
189-
X_scale = np.ones(X.shape[1])
189+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
190190

191191
else:
192192
X_offset = np.average(X, axis=0, weights=sample_weight)
@@ -195,12 +195,12 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
195195
X, X_scale = f_normalize(X, axis=0, copy=False,
196196
return_norm=True)
197197
else:
198-
X_scale = np.ones(X.shape[1])
198+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
199199
y_offset = np.average(y, axis=0, weights=sample_weight)
200200
y = y - y_offset
201201
else:
202-
X_offset = np.zeros(X.shape[1])
203-
X_scale = np.ones(X.shape[1])
202+
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
203+
X_scale = np.ones(X.shape[1], dtype=X.dtype)
204204
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
205205

206206
return X, y, X_offset, y_offset, X_scale

sklearn/linear_model/ridge.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0):
3737
n_samples, n_features = X.shape
3838
X1 = sp_linalg.aslinearoperator(X)
39-
coefs = np.empty((y.shape[1], n_features))
39+
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
4040

4141
if n_features > n_samples:
4242
def create_mv(curr_alpha):
@@ -80,7 +80,7 @@ def _mv(x):
8080

8181
def _solve_lsqr(X, y, alpha, max_iter=None, tol=1e-3):
8282
n_samples, n_features = X.shape
83-
coefs = np.empty((y.shape[1], n_features))
83+
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
8484
n_iter = np.empty(y.shape[1], dtype=np.int32)
8585

8686
# According to the lsqr documentation, alpha = damp^2.
@@ -111,7 +111,7 @@ def _solve_cholesky(X, y, alpha):
111111
return linalg.solve(A, Xy, sym_pos=True,
112112
overwrite_a=True).T
113113
else:
114-
coefs = np.empty([n_targets, n_features])
114+
coefs = np.empty([n_targets, n_features], dtype=X.dtype)
115115
for coef, target, current_alpha in zip(coefs, Xy.T, alpha):
116116
A.flat[::n_features + 1] += current_alpha
117117
coef[:] = linalg.solve(A, target, sym_pos=True,
@@ -186,7 +186,7 @@ def _solve_svd(X, y, alpha):
186186
idx = s > 1e-15 # same default value as scipy.linalg.pinv
187187
s_nnz = s[idx][:, np.newaxis]
188188
UTy = np.dot(U.T, y)
189-
d = np.zeros((s.size, alpha.size))
189+
d = np.zeros((s.size, alpha.size), dtype=X.dtype)
190190
d[idx] = s_nnz / (s_nnz ** 2 + alpha)
191191
d_UT_y = d * UTy
192192
return np.dot(Vt.T, d_UT_y).T
@@ -371,7 +371,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
371371
X, y = _rescale_data(X, y, sample_weight)
372372

373373
# There should be either 1 or n_targets penalties
374-
alpha = np.asarray(alpha).ravel()
374+
alpha = np.asarray(alpha, dtype=X.dtype).ravel()
375375
if alpha.size not in [1, n_targets]:
376376
raise ValueError("Number of targets and number of penalties "
377377
"do not correspond: %d != %d"
@@ -469,7 +469,13 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
469469
self.random_state = random_state
470470

471471
def fit(self, X, y, sample_weight=None):
472-
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64,
472+
473+
if self.solver in ['svd', 'sparse_cg', 'cholesky', 'lsqr']:
474+
_dtype = [np.float64, np.float32]
475+
else:
476+
_dtype = np.float64
477+
478+
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
473479
multi_output=True, y_numeric=True)
474480

475481
if ((sample_weight is not None) and

sklearn/linear_model/tests/test_ridge.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,3 +788,30 @@ def test_errors_and_values_svd_helper():
788788
def test_ridge_classifier_no_support_multilabel():
789789
X, y = make_multilabel_classification(n_samples=10, random_state=0)
790790
assert_raises(ValueError, RidgeClassifier().fit, X, y)
791+
792+
793+
def test_dtype_match():
794+
rng = np.random.RandomState(0)
795+
alpha = 1.0
796+
797+
n_samples, n_features = 6, 5
798+
X_64 = rng.randn(n_samples, n_features)
799+
y_64 = rng.randn(n_samples)
800+
X_32 = X_64.astype(np.float32)
801+
y_32 = y_64.astype(np.float32)
802+
803+
solvers = ["svd", "sparse_cg", "cholesky", "lsqr"]
804+
for solver in solvers:
805+
806+
# Check type consistency 32bits
807+
ridge_32 = Ridge(alpha=alpha, solver=solver)
808+
ridge_32.fit(X_32, y_32)
809+
assert_equal(ridge_32.coef_.dtype, X_32.dtype)
810+
811+
# Check type consistency 64 bits
812+
ridge_64 = Ridge(alpha=alpha, solver=solver)
813+
ridge_64.fit(X_64, y_64)
814+
assert_equal(ridge_64.coef_.dtype, X_64.dtype)
815+
816+
# Check accuracy consistency
817+
assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)

0 commit comments

Comments
 (0)
0