10000 FIX raise error when enet_path and lasso_path receive unknown paramet… · rth/scikit-learn@4f33e4d · GitHub
[go: up one dir, main page]

Skip to content

Commit 4f33e4d

Browse files
hongshaoyangkkang2097glemaitre
authored
FIX raise error when enet_path and lasso_path receive unknown parameters (scikit-learn#19391)
* Resolves scikit-learn#10598 Updated the code, sorry for putting in lackluster pull requests. Will put in more attention to detail, something silly like putting in commands after the function ended was easy to prevent. * Update coordinate_descent.py * Update coordinate_descent.py * Move params checking to top and fix test * Flake8 * make sure to not pass normalize and fit_intercept as path params * TST check the error message * typo * iter * iter Co-authored-by: Elliot Kang <kkang2097@gmail.com> Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
1 parent cdc486a commit 4f33e4d

File tree

4 files changed

+42
-23
lines changed

4 files changed

+42
-23
lines changed

doc/whats_new/v1.0.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,11 @@ Changelog
496496
(only supported by `lbfgs`).
497497
:pr:`20231` by :user:`Toshihiro Nakae <tnakae>`.
498498

499+
- |Fix| The dictionary `params` in :func:`linear_model.enet_path` and
500+
:func:`linear_model.lasso_path` should only contain parameter of the
501+
coordinate descent solver. Otherwise, an error will be raised.
502+
:pr:`19391` by :user:`Shao Yang Hong <hongshaoyang>`.
503+
499504
:mod:`sklearn.manifold`
500505
.......................
501506

sklearn/linear_model/_coordinate_descent.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,16 @@ def enet_path(
496496
:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py
497497
<sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.
498498
"""
499+
X_offset_param = params.pop("X_offset", None)
500+
X_scale_param = params.pop("X_scale", None)
501+
tol = params.pop("tol", 1e-4)
502+
max_iter = params.pop("max_iter", 1000)
503+
random_state = params.pop("random_state", None)
504+
selection = params.pop("selection", "cyclic")
505+
506+
if len(params) > 0:
507+
raise ValueError("Unexpected parameters in params", params.keys())
508+
499509
# We expect X and y to be already Fortran ordered when bypassing
500510
# checks
501511
if check_input:
@@ -532,10 +542,10 @@ def enet_path(
532542

533543
# MultiTaskElasticNet does not support sparse matrices
534544
if not multi_output and sparse.isspmatrix(X):
535-
if "X_offset" in params:
545+
if X_offset_param is not None:
536546
# As sparse matrices are not actually centered we need this
537547
# to be passed to the CD solver.
538-
X_sparse_scaling = params["X_offset"] / params["X_scale"]
548+
X_sparse_scaling = X_offset_param / X_scale_param
539549
X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)
540550
else:
541551
X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)
@@ -571,13 +581,10 @@ def enet_path(
571581
alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered
572582

573583
n_alphas = len(alphas)
574-
tol = params.get("tol", 1e-4)
575-
max_iter = params.get("max_iter", 1000)
576584
dual_gaps = np.empty(n_alphas)
577585
n_iters = []
578586

579-
rng = check_random_state(params.get("random_state", None))
580-
selection = params.get("selection", "cyclic")
587+
rng = check_random_state(random_state)
581588
if selection not in ["random", "cyclic"]:
582589
raise ValueError("selection should be either random or cyclic.")
583590
random = selection == "random"
@@ -1016,7 +1023,6 @@ def fit(self, X, y, sample_weight=None, check_input=True):
10161023
dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)
10171024
self.n_iter_ = []
10181025

1019-
# FIXME: 'normalize' to be removed in 1.2
10201026
for k in range(n_targets):
10211027
if Xy is not None:
10221028
this_Xy = Xy[:, k]
@@ -1031,8 +1037,6 @@ def fit(self, X, y, sample_weight=None, check_input=True):
10311037
alphas=[alpha],
10321038
precompute=precompute,
10331039
Xy=this_Xy,
1034-
fit_intercept=False,
1035-
normalize=False,
10361040
copy_X=True,
10371041
verbose=False,
10381042
tol=self.tol,
@@ -1267,6 +1271,8 @@ def _path_residuals(
12671271
sample_weight,
12681272
train,
12691273
test,
1274+
normalize,
1275+
fit_intercept,
12701276
path,
12711277
path_params,
12721278
alphas=None,
@@ -1348,9 +1354,6 @@ def _path_residuals(
13481354
# for read-only memmaps (cf. numpy#14132).
13491355
array.setflags(write=True)
13501356

1351-
fit_intercept = path_params["fit_intercept"]
1352-
normalize = path_params["normalize"]
1353-
13541357
if y.ndim == 1:
13551358
precompute = path_params["precompute"]
13561359
else:
@@ -1577,7 +1580,11 @@ def fit(self, X, y, sample_weight=None):
15771580
path_params = self.get_params()
15781581

15791582
# FIXME: 'normalize' to be removed in 1.2
1580-
path_params["normalize"] = _normalize
1583+
# path_params["normalize"] = _normalize
1584+
# Pop `intercept` and `normalize` that are not parameter of the path
1585+
# function
1586+
path_params.pop("normalize", None)
1587+
path_params.pop("fit_intercept", None)
15811588

15821589
if "l1_ratio" in path_params:
15831590
l1_ratios = np.atleast_1d(path_params["l1_ratio"])
@@ -1635,6 +1642,8 @@ def fit(self, X, y, sample_weight=None):
16351642
sample_weight,
16361643
train,
16371644
test,
1645+
_normalize,
1646+
self.fit_intercept,
16381647
self.path,
16391648
path_params,
16401649
alphas=this_alphas,

sklearn/linear_model/tests/test_coordinate_descent.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -618,9 +618,7 @@ def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():
618618
coef_path_cont_lars = interpolate.interp1d(
619619
alphas_lars[::-1], coef_path_lars[:, ::-1]
620620
)
621-
alphas_lasso2, coef_path_lasso2, _ = lasso_path(
622-
X, y, alphas=alphas, return_models=False
623-
)
621+
alphas_lasso2, coef_path_lasso2, _ = lasso_path(X, y, alphas=alphas)
624622
coef_path_cont_lasso = interpolate.interp1d(
625623
alphas_lasso2[::-1], coef_path_lasso2[:, ::-1]
626624
)
@@ -1113,11 +1111,21 @@ def test_sparse_dense_descent_paths():
11131111
X, y, _, _ = build_dataset(n_samples=50, n_features=20)
11141112
csr = sparse.csr_matrix(X)
11151113
for path in [enet_path, lasso_path]:
1116-
_, coefs, _ = path(X, y, fit_intercept=False)
1117-
_, sparse_coefs, _ = path(csr, y, fit_intercept=False)
1114+
_, coefs, _ = path(X, y)
1115+
_, sparse_coefs, _ = path(csr, y)
11181116
assert_array_almost_equal(coefs, sparse_coefs)
11191117

11201118

1119+
@pytest.mark.parametrize("path_func", [enet_path, lasso_path])
1120+
def test_path_unknown_parameter(path_func):
1121+
"""Check that passing parameter not used by the coordinate descent solver
1122+
will raise an error."""
1123+
X, y, _, _ = build_dataset(n_samples=50, n_features=20)
1124+
err_msg = "Unexpected parameters in params"
1125+
with pytest.raises(ValueError, match=err_msg):
1126+
path_func(X, y, normalize=True, fit_intercept=True)
1127+
1128+
11211129
def test_check_input_false():
11221130
X, y, _, _ = build_dataset(n_samples=20, n_features=10)
11231131
X = check_array(X, order="F", dtype="float64")
@@ -1522,8 +1530,7 @@ def test_enet_cv_sample_weight_correctness(fit_intercept):
15221530
assert alphas[0] < reg.alpha_ < alphas[-1]
15231531
assert reg_sw.alpha_ == reg.alpha_
15241532
assert_allclose(reg_sw.coef_, reg.coef_)
1525-
if fit_intercept is not None:
1526-
assert reg_sw.intercept_ == pytest.approx(reg.intercept_)
1533+
assert reg_sw.intercept_ == pytest.approx(reg.intercept_)
15271534

15281535

15291536
@pytest.mark.parametrize("sample_weight", [False, True])

sklearn/linear_model/tests/test_least_angle.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,7 @@ def test_lasso_lars_vs_lasso_cd_ill_conditioned():
383383
y = y.squeeze()
384384
lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
385385

386-
_, lasso_coef2, _ = linear_model.lasso_path(
387-
X, y, alphas=lars_alphas, tol=1e-6, fit_intercept=False
388-
)
386+
_, lasso_coef2, _ = linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)
389387

390388
assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
391389

0 commit comments

Comments
 (0)
0