scikit-learn · TomDLT · Apr 17, 2023 · Dec 19, 2022 · Dec 19, 2022 · Dec 19, 2022
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -140,14 +140,6 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
-:mod:`sklearn.feature_selection`
-................................
-
-- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
-  a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.
-
-- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
-  now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.
 
 :mod:`sklearn.base`
 ...................
@@ -255,6 +247,15 @@ Changelog
   meaning that it is note required to call `fit` before calling `transform`.
   Parameter validation only happens at `fit` time.
   :pr:`24230` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.feature_selection`
+................................
+
+- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
+  a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.
+
+- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
+  now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.
 
 :mod:`sklearn.impute`
 .....................
@@ -288,6 +289,11 @@ Changelog
   :class:`linear_model.ARDRegression` to expose the actual number of iterations 
   required to reach the stopping criterion.
   :pr:`25697` by :user:`John Pangas <jpangas>`.
+
+- |Fix| Use a more robust criterion to detect convergence of
+  :class:`linear_model.LogisticRegression(penalty="l1", solver="liblinear")`
+  on linearly separable problems.
+  :pr:`25214` by `Tom Dupre la Tour`_.
 
 :mod:`sklearn.metrics`
 ......................

diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py
@@ -52,7 +52,7 @@
 from sklearn import linear_model
 from sklearn.svm import l1_min_c
 
-cs = l1_min_c(X, y, loss="log") * np.logspace(0, 7, 16)
+cs = l1_min_c(X, y, loss="log") * np.logspace(0, 10, 16)
 
 clf = linear_model.LogisticRegression(
     penalty="l1",

diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
@@ -24,6 +24,7 @@
 from sklearn.linear_model import SGDClassifier
 from sklearn.preprocessing import scale
 from sklearn.utils._testing import skip_if_no_parallel
+from sklearn.svm import l1_min_c
 
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model._logistic import (
@@ -742,7 +743,6 @@ def test_logistic_regression_sample_weights():
     sample_weight = y + 1
 
     for LR in [LogisticRegression, LogisticRegressionCV]:
-
         kw = {"random_state": 42, "fit_intercept": False, "multi_class": "ovr"}
         if LR is LogisticRegressionCV:
             kw.update({"Cs": 3, "cv": 3})
@@ -1918,7 +1918,6 @@ def test_scores_attribute_layout_elasticnet():
 
     for i, C in enumerate(Cs):
         for j, l1_ratio in enumerate(l1_ratios):
-
             lr = LogisticRegression(
                 penalty="elasticnet",
                 solver="saga",
@@ -2033,3 +2032,28 @@ def test_warning_on_penalty_string_none():
     )
     with pytest.warns(FutureWarning, match=warning_message):
         lr.fit(iris.data, target)
+
+
+def test_liblinear_not_stuck():
+    # Non-regression https://github.com/scikit-learn/scikit-learn/issues/18264
+    X = iris.data.copy()
+    y = iris.target.copy()
+    X = X[y != 2]
+    y = y[y != 2]
+    X_prep = StandardScaler().fit_transform(X)
+
+    C = l1_min_c(X, y, loss="log") * 10 ** (10 / 29)
+    clf = LogisticRegression(
+        penalty="l1",
+        solver="liblinear",
+        tol=1e-6,
+        max_iter=100,
+        intercept_scaling=10000.0,
+        random_state=0,
+        C=C,
+    )
+
+    # test that the fit does not raise a ConvergenceWarning
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        clf.fit(X_prep, y)
diff --git a/sklearn/svm/src/liblinear/linear.cpp b/sklearn/svm/src/liblinear/linear.cpp
@@ -1769,6 +1769,7 @@ static int solve_l1r_lr(
 	int max_num_linesearch = 20;
 	int active_size;
 	int QP_active_size;
+	int QP_no_change = 0;
 
 	double nu = 1e-12;
 	double inner_eps = 1;
@@ -1896,9 +1897,13 @@ static int solve_l1r_lr(
 		if(newton_iter == 0)
 			Gnorm1_init = Gnorm1_new;
 
-		if(Gnorm1_new <= eps*Gnorm1_init)
+		// Break outer-loop if the accumulated violation is small.
+		// Also break if no update in QP inner-loop ten times in a row.
+		if(Gnorm1_new <= eps*Gnorm1_init || QP_no_change >= 10)
 			break;
 
+		QP_no_change++;
+
 		iter = 0;
 		QP_Gmax_old = INF;
 		QP_active_size = active_size;
@@ -1955,9 +1960,6 @@ static int solve_l1r_lr(
 				else
 					violation = fabs(Gn);
 
-				QP_Gmax_new = max(QP_Gmax_new, violation);
-				QP_Gnorm1_new += violation;
-
 				// obtain solution of one-variable problem
 				if(Gp < H*wpd[j])
 					z = -Gp/H;
@@ -1970,6 +1972,10 @@ static int solve_l1r_lr(
 					continue;
 				z = min(max(z,-10.0),10.0);
 
+				QP_no_change = 0;
+				QP_Gmax_new = max(QP_Gmax_new, violation);
+				QP_Gnorm1_new += violation;
+
 				wpd[j] += z;
 
 				x = prob_col->x[j];