diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 7255fe82ff628..525f3439860ef 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -344,6 +344,10 @@ Changelog is now faster. This is especially noticeable on large sparse input. :pr:`19734` by :user:`Fred Robinson `. +- |Enhancement| `fit` method preserves dtype for numpy.float32 in + :class:`Lars`, :class:`LassoLars`, :class:`LassoLars`, :class:`LarsCV` and + :class:`LassoLarsCV`. :pr:`20155` by :user:`Takeshi Oura `. + :mod:`sklearn.manifold` ....................... diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 0932d0bd1aee3..3485344b99e02 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -476,12 +476,23 @@ def _lars_path_solver( max_features = min(max_iter, n_features) + dtypes = set(a.dtype for a in (X, y, Xy, Gram) if a is not None) + if len(dtypes) == 1: + # use the precision level of input data if it is consistent + return_dtype = next(iter(dtypes)) + else: + # fallback to double precision otherwise + return_dtype = np.float64 + if return_path: - coefs = np.zeros((max_features + 1, n_features)) - alphas = np.zeros(max_features + 1) + coefs = np.zeros((max_features + 1, n_features), dtype=return_dtype) + alphas = np.zeros(max_features + 1, dtype=return_dtype) else: - coef, prev_coef = np.zeros(n_features), np.zeros(n_features) - alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas? + coef, prev_coef = (np.zeros(n_features, dtype=return_dtype), + np.zeros(n_features, dtype=return_dtype)) + alpha, prev_alpha = (np.array([0.], dtype=return_dtype), + np.array([0.], dtype=return_dtype)) + # above better ideas? n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) @@ -948,7 +959,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None): self.alphas_ = [] self.n_iter_ = [] - self.coef_ = np.empty((n_targets, n_features)) + self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype) if fit_path: self.active_ = [] diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 4321c39b45e92..656b7e3fef718 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -14,7 +14,7 @@ from sklearn import linear_model, datasets from sklearn.linear_model._least_angle import _lars_path_residues from sklearn.linear_model import LassoLarsIC, lars_path -from sklearn.linear_model import Lars, LassoLars +from sklearn.linear_model import Lars, LassoLars, LarsCV, LassoLarsCV # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() @@ -777,3 +777,54 @@ def test_copy_X_with_auto_gram(): linear_model.lars_path(X, y, Gram='auto', copy_X=True, method='lasso') # X did not change assert_allclose(X, X_before) + + +@pytest.mark.parametrize("LARS, has_coef_path, args", + ((Lars, True, {}), + (LassoLars, True, {}), + (LassoLarsIC, False, {}), + (LarsCV, True, {}), + # max_iter=5 is for avoiding ConvergenceWarning + (LassoLarsCV, True, {"max_iter": 5}))) +@pytest.mark.parametrize("dtype", (np.float32, np.float64)) +def test_lars_dtype_match(LARS, has_coef_path, args, dtype): + # The test ensures that the fit method preserves input dtype + rng = np.random.RandomState(0) + X = rng.rand(6, 6).astype(dtype) + y = rng.rand(6).astype(dtype) + + model = LARS(**args) + model.fit(X, y) + assert model.coef_.dtype == dtype + if has_coef_path: + assert model.coef_path_.dtype == dtype + assert model.intercept_.dtype == dtype + + +@pytest.mark.parametrize("LARS, has_coef_path, args", + ((Lars, True, {}), + (LassoLars, True, {}), + (LassoLarsIC, False, {}), + (LarsCV, True, {}), + # max_iter=5 is for avoiding ConvergenceWarning + (LassoLarsCV, True, {"max_iter": 5}))) +def test_lars_numeric_consistency(LARS, has_coef_path, args): + # The test ensures numerical consistency between trained coefficients + # of float32 and float64. + rtol = 1e-5 + atol = 1e-5 + + rng = np.random.RandomState(0) + X_64 = rng.rand(6, 6) + y_64 = rng.rand(6) + + model_64 = LARS(**args).fit(X_64, y_64) + model_32 = LARS(**args).fit(X_64.astype(np.float32), + y_64.astype(np.float32)) + + assert_allclose(model_64.coef_, model_32.coef_, rtol=rtol, atol=atol) + if has_coef_path: + assert_allclose(model_64.coef_path_, model_32.coef_path_, + rtol=rtol, atol=atol) + assert_allclose(model_64.intercept_, model_32.intercept_, + rtol=rtol, atol=atol)