BUG: ElasticNectCV choosing improper l1_ratio

GaelVaroquaux · GaelVaroquaux · commit 972e7cffa43c · 2013-04-03T09:48:28.000+02:00
The code was lacking good tests: it had only smoke tests. Shame on
me (I am the author).
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -758,6 +758,8 @@ def fit(self, X, y):
             Target values
 
         """
+        # We avoid copying X so for to save memory. X will be copied
+        # after the cross-validation loop
         X = atleast2d_or_csc(X, dtype=np.float64, order='F',
                              copy=self.copy_X and self.fit_intercept)
         # From now on X can be touched inplace
@@ -776,6 +778,8 @@ def fit(self, X, y):
             l1_ratios = [1, ]
         path_params.pop('cv', None)
         path_params.pop('n_jobs', None)
+        # We can modify X inplace
+        path_params['copy_X'] = False
 
         # Start to compute path on full data
         # XXX: is this really useful: we are fitting models that we won't
@@ -787,6 +791,11 @@ def fit(self, X, y):
         n_alphas = len(alphas)
         path_params.update({'alphas': alphas, 'n_alphas': n_alphas})
 
+        # If we are not computing in parallel, we don't want to modify X
+        # inplace in the folds
+        if self.n_jobs == 1 or self.n_jobs is None:
+            path_params['copy_X'] = True
+
         # init cross-validation generator
         cv = check_cv(self.cv, X)
 
@@ -814,6 +823,7 @@ def fit(self, X, y):
             if this_best_mse < best_mse:
                 model = models[i_best_alpha]
                 best_l1_ratio = l1_ratio
+                best_mse = this_best_mse
 
         if hasattr(model, 'l1_ratio'):
             if model.l1_ratio != best_l1_ratio:
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -177,26 +177,40 @@ def test_lasso_path():
 
 
 def test_enet_path():
-    X, y, X_test, y_test = build_dataset()
+    # We use a large number of samples and of informative features so that
+    # the l1_ratio selected is more toward ridge than lasso
+    X, y, X_test, y_test = build_dataset(n_samples=200,
+                                n_features=100,
+                                n_informative_features=100)
     max_iter = 150
 
     with warnings.catch_warnings():
         # Here we have a small number of iterations, and thus the
         # ElasticNet might not converge. This is to speed up tests
         warnings.simplefilter("ignore", UserWarning)
-        clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.9, 0.95], cv=3,
+        clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3,
                            max_iter=max_iter)
         clf.fit(X, y)
-        assert_almost_equal(clf.alpha_, 0.002, 2)
-        assert_equal(clf.l1_ratio_, 0.95)
-
-        clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.9, 0.95], cv=3,
+        # Well-conditionned settings, we should have selected our
+        # smallest penalty
+        assert_almost_equal(clf.alpha_, min(clf.alphas_))
+        # Non-sparse ground truth: we should have seleted an elastic-net
+        # that is closer to ridge than to lasso
+        assert_equal(clf.l1_ratio_, min(clf.l1_ratio))
+
+        clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3,
                            max_iter=max_iter, precompute=True)
         clf.fit(X, y)
-    assert_almost_equal(clf.alpha_, 0.002, 2)
-    assert_equal(clf.l1_ratio_, 0.95)
 
-    # test set
+    # Well-conditionned settings, we should have selected our
+    # smallest penalty
+    assert_almost_equal(clf.alpha_, min(clf.alphas_))
+    # Non-sparse ground truth: we should have seleted an elastic-net
+    # that is closer to ridge than to lasso
+    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))
+
+    # We are in well-conditionned settings with low noise: we should
+    # have a good test-set performance
     assert_greater(clf.score(X_test, y_test), 0.99)