8000 [MRG] Pass the dtype consistency & accuracy tests for ridge by ncordier · Pull Request #4 · massich/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] Pass the dtype consistency & accuracy tests for ridge #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 8, 2017
Merged
10 changes: 5 additions & 5 deletions sklearn/linear_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
if sp.issparse(X):
X_offset, X_var = mean_variance_axis(X, axis=0)
if not return_mean:
X_offset = np.zeros(X.shape[1])
X_offset = np.zeros(X.shape[1], dtype=X.dtype)

if normalize:

Expand All @@ -186,7 +186,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
X_scale[X_scale == 0] = 1
inplace_column_scale(X, 1. / X_scale)
else:
X_scale = np.ones(X.shape[1])
X_scale = np.ones(X.shape[1], dtype=X.dtype)

else:
X_offset = np.average(X, axis=0, weights=sample_weight)
Expand All @@ -195,12 +195,12 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
X, X_scale = f_normalize(X, axis=0, copy=False,
return_norm=True)
else:
X_scale = np.ones(X.shape[1])
X_scale = np.ones(X.shape[1], dtype=X.dtype)
y_offset = np.average(y, axis=0, weights=sample_weight)
y = y - y_offset
else:
X_offset = np.zeros(X.shape[1])
X_scale = np.ones(X.shape[1])
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
X_scale = np.ones(X.shape[1], dtype=X.dtype)
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)

return X, y, X_offset, y_offset, X_scale
Expand Down
20 changes: 13 additions & 7 deletions sklearn/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0):
n_samples, n_features = X.shape
X1 = sp_linalg.aslinearoperator(X)
coefs = np.empty((y.shape[1], n_featu 10000 res))
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)

if n_features > n_samples:
def create_mv(curr_alpha):
Expand Down Expand Up @@ -80,7 +80,7 @@ def _mv(x):

def _solve_lsqr(X, y, alpha, max_iter=None, tol=1e-3):
n_samples, n_features = X.shape
coefs = np.empty((y.shape[1], n_features))
coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
n_iter = np.empty(y.shape[1], dtype=np.int32)

# According to the lsqr documentation, alpha = damp^2.
Expand Down Expand Up @@ -111,7 +111,7 @@ def _solve_cholesky(X, y, alpha):
return linalg.solve(A, Xy, sym_pos=True,
overwrite_a=True).T
else:
coefs = np.empty([n_targets, n_features])
coefs = np.empty([n_targets, n_features], dtype=X.dtype)
for coef, target, current_alpha in zip(coefs, Xy.T, alpha):
A.flat[::n_features + 1] += current_alpha
coef[:] = linalg.solve(A, target, sym_pos=True,
Expand Down Expand Up @@ -186,7 +186,7 @@ def _solve_svd(X, y, alpha):
idx = s > 1e-15 # same default value as scipy.linalg.pinv
s_nnz = s[idx][:, np.newaxis]
UTy = np.dot(U.T, y)
d = np.zeros((s.size, alpha.size))
d = np.zeros((s.size, alpha.size), dtype=X.dtype)
d[idx] = s_nnz / (s_nnz ** 2 + alpha)
d_UT_y = d * UTy
return np.dot(Vt.T, d_UT_y).T
Expand Down Expand Up @@ -327,7 +327,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
y = check_array(y, dtype=np.float64, ensure_2d=False, order='F')
else:
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
dtype=np.float64)
dtype='numeric')
y = check_array(y, dtype='numeric', ensure_2d=False)
check_consistent_length(X, y)

Expand Down Expand Up @@ -371,7 +371,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
X, y = _rescale_data(X, y, sample_weight)

# There should be either 1 or n_targets penalties
alpha = np.asarray(alpha).ravel()
alpha = np.asarray(alpha, dtype=X.dtype).ravel()
if alpha.size not in [1, n_targets]:
raise ValueError("Number of targets and number of penalties "
"do not correspond: %d != %d"
Expand Down Expand Up @@ -469,7 +469,13 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
self.random_state = random_state

def fit(self, X, y, sample_weight=None):
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=np.float64,

if self.solver in ['svd', 'sparse_cg', 'cholesky', 'lsqr']:
_dtype = [np.float64, np.float32]
else:
_dtype = np.float64

X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
multi_output=True, y_numeric=True)

if ((sample_weight is not None) and
Expand Down
4 changes: 2 additions & 2 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ def test_dtype_match():
y_32 = y_64.astype(np.float32)

# solvers = ("svd", "sparse_cg", "cholesky", "lsqr", "sag")
solvers = ["svd"]
solvers = ["svd", "sparse_cg", "cholesky", "lsqr"]
for solver in solvers:

# Check type consistency 32bits
Expand All @@ -815,4 +815,4 @@ def test_dtype_match():
assert_equal(ridge_64.coef_.dtype, X_64.dtype)

# Check accuracy consistency
assert_almost_equal(ridge_32.coefs_, ridge_64.coefs_.astype(np.float32))
assert_almost_equal(ridge_32.coef_, ridge_64.coef_.astype(np.float32))
0