8000 Preserving dtype for numpy.float32 in Least Angle Regression by takoika · Pull Request #20155 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

Preserving dtype for numpy.float32 in Least Angle Regression #20155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
May 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,10 @@ Changelog
is now faster. This is especially noticeable on large sparse input.
:pr:`19734` by :user:`Fred Robinson <frrad>`.

- |Enhancement| `fit` method preserves dtype for numpy.float32 in
:class:`Lars`, :class:`LassoLars`, :class:`LassoLars`, :class:`LarsCV` and
:class:`LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura <takoika>`.

:mod:`sklearn.manifold`
.......................

Expand Down
21 changes: 16 additions & 5 deletions sklearn/linear_model/_least_angle.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,12 +476,23 @@ def _lars_path_solver(

max_features = min(max_iter, n_features)

dtypes = set(a.dtype for a in (X, y, Xy, Gram) if a is not None)
if len(dtypes) == 1:
# use the precision level of input data if it is consistent
return_dtype = next(iter(dtypes))
else:
# fallback to double precision otherwise
return_dtype = np.float64

if return_path:
coefs = np.zeros((max_features + 1, n_features))
alphas = np.zeros(max_features + 1)
coefs = np.zeros((max_features + 1, n_features), dtype=return_dtype)
alphas = np.zeros(max_features + 1, dtype=return_dtype)
else:
coef, prev_coef = np.zeros(n_features), np.zeros(n_features)
alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas?
coef, prev_coef = (np.zeros(n_features, dtype=return_dtype),
np.zeros(n_features, dtype=return_dtype))
alpha, prev_alpha = (np.array([0.], dtype=return_dtype),
np.array([0.], dtype=return_dtype))
# above better ideas?

n_iter, n_active = 0, 0
active, indices = list(), np.arange(n_features)
Expand Down Expand Up @@ -948,7 +959,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None):

self.alphas_ = []
self.n_iter_ = []
self.coef_ = np.empty((n_targets, n_features))
self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype)

if fit_path:
self.active_ = []
Expand Down
53 changes: 52 additions & 1 deletion sklearn/linear_model/tests/test_least_angle.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sklearn import linear_model, datasets
from sklearn.linear_model._least_angle import _lars_path_residues
from sklearn.linear_model import LassoLarsIC, lars_path
from sklearn.linear_model import Lars, LassoLars
from sklearn.linear_model import Lars, LassoLars, LarsCV, LassoLarsCV

# TODO: use another dataset that has multiple drops
diabetes = datasets.load_diabetes()
Expand Down Expand Up @@ -777,3 +777,54 @@ def test_copy_X_with_auto_gram():
linear_model.lars_path(X, y, Gram='auto', copy_X=True, method='lasso')
# X did not change
assert_allclose(X, X_before)


@pytest.mark.parametrize("LARS, has_coef_path, args",
((Lars, True, {}),
(LassoLars, True, {}),
(LassoLarsIC, False, {}),
(LarsCV, True, {}),
# max_iter=5 is for avoiding ConvergenceWarning
(LassoLarsCV, True, {"max_iter": 5})))
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_lars_dtype_match(LARS, has_coef_path, args, dtype):
# The test ensures that the fit method preserves input dtype
rng = np.random.RandomState(0)
X = rng.rand(6, 6).astype(dtype)
y = rng.rand(6).astype(dtype)

model = LARS(**args)
model.fit(X, y)
assert model.coef_.dtype == dtype
if has_coef_path:
assert model.coef_path_.dtype == dtype
assert model.intercept_.dtype == dtype


@pytest.mark.parametrize("LARS, has_coef_path, args",
((Lars, True, {}),
(LassoLars, True, {}),
(LassoLarsIC, False, {}),
(LarsCV, True, {}),
# max_iter=5 is for avoiding ConvergenceWarning
(LassoLarsCV, True, {"max_iter": 5})))
def test_lars_numeric_consistency(LARS, has_coef_path, args):
# The test ensures numerical consistency between trained coefficients
# of float32 and float64.
rtol = 1e-5
atol = 1e-5

rng = np.random.RandomState(0)
X_64 = rng.rand(6, 6)
y_64 = rng.rand(6)

model_64 = LARS(**args).fit(X_64, y_64)
model_32 = LARS(**args).fit(X_64.astype(np.float32),
y_64.astype(np.float32))

assert_allclose(model_64.coef_, model_32.coef_, rtol=rtol, atol=atol)
if has_coef_path:
assert_allclose(model_64.coef_path_, model_32.coef_path_,
rtol=rtol, atol=atol)
assert_allclose(model_64.intercept_, model_32.intercept_,
rtol=rtol, atol=atol)
0