8000 FIX improve convergence criterion for LogisticRegression(penalty="l1"… · scikit-learn/scikit-learn@463d166 · GitHub
[go: up one dir, main page]

Skip to content

Commit 463d166

Browse files
TomDLTthomasjpfanogrisel
authored
FIX improve convergence criterion for LogisticRegression(penalty="l1", solver='liblinear') (#25214)
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com> Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
1 parent e886ce4 commit 463d166

File tree

4 files changed

+51
-13
lines changed

4 files changed

+51
-13
lines changed

doc/whats_new/v1.3.rst

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,6 @@ Changelog
151151
:pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
152152
where 123456 is the *pull request* number, not the issue number.
153153
154-
:mod:`sklearn.feature_selection`
155-
................................
156-
157-
- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
158-
a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.
159-
160-
- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
161-
now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.
162154

163155
:mod:`sklearn.base`
164156
...................
@@ -275,6 +267,15 @@ Changelog
275267
meaning that it is note required to call `fit` before calling `transform`.
276268
Parameter validation only happens at `fit` time.
277269
:pr:`24230` by :user:`Guillaume Lemaitre <glemaitre>`.
270+
271+
:mod:`sklearn.feature_selection`
272+
................................
273+
274+
- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
275+
a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.
276+
277+
- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
278+
now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.
278279

279280
:mod:`sklearn.impute`
280281
.....................
@@ -308,6 +309,11 @@ Changelog
308309
:class:`linear_model.ARDRegression` to expose the actual number of iterations
309310
required to reach the stopping criterion.
310311
:pr:`25697` by :user:`John Pangas <jpangas>`.
312+
313+
- |Fix| Use a more robust criterion to detect convergence of
314+
:class:`linear_model.LogisticRegression(penalty="l1", solver="liblinear")`
315+
on linearly separable problems.
316+
:pr:`25214` by `Tom Dupre la Tour`_.
311317

312318
:mod:`sklearn.metrics`
313319
......................

examples/linear_model/plot_logistic_path.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
from sklearn import linear_model
5353
from sklearn.svm import l1_min_c
5454

55-
cs = l1_min_c(X, y, loss="log") * np.logspace(0, 7, 16)
55+
cs = l1_min_c(X, y, loss="log") * np.logspace(0, 10, 16)
5656

5757
clf = linear_model.LogisticRegression(
5858
penalty="l1",

sklearn/linear_model/tests/test_logistic.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from sklearn.linear_model import SGDClassifier
2525
from sklearn.preprocessing import scale
2626
from sklearn.utils._testing import skip_if_no_parallel
27+
from sklearn.svm import l1_min_c
2728

2829
from sklearn.exceptions import ConvergenceWarning
2930
from sklearn.linear_model._logistic import (
@@ -2031,3 +2032,28 @@ def test_warning_on_penalty_string_none():
20312032
)
20322033
with pytest.warns(FutureWarning, match=warning_message):
20332034
lr.fit(iris.data, target)
2035+
2036+
2037+
def test_liblinear_not_stuck():
2038+
# Non-regression https://github.com/scikit-learn/scikit-learn/issues/18264
2039+
X = iris.data.copy()
2040+
y = iris.target.copy()
2041+
X = X[y != 2]
2042+
y = y[y != 2]
2043+
X_prep = StandardScaler().fit_transform(X)
2044+
2045+
C = l1_min_c(X, y, loss="log") * 10 ** (10 / 29)
2046+
clf = LogisticRegression(
2047+
penalty="l1",
2048+
solver="liblinear",
2049+
tol=1e-6,
2050+
max_iter=100,
2051+
intercept_scaling=10000.0,
2052+
random_state=0,
2053+
C=C,
2054+
)
2055+
2056+
# test that the fit does not raise a ConvergenceWarning
2057+
with warnings.catch_warnings():
2058+
warnings.simplefilter("error", ConvergenceWarning)
2059+
clf.fit(X_prep, y)

sklearn/svm/src/liblinear/linear.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,6 +1769,7 @@ static int solve_l1r_lr(
17691769
int max_num_linesearch = 20;
17701770
int active_size;
17711771
int QP_active_size;
1772+
int QP_no_change = 0;
17721773

17731774
double nu = 1e-12;
17741775
double inner_eps = 1;
@@ -1896,9 +1897,13 @@ static int solve_l1r_lr(
18961897
if(newton_iter == 0)
18971898
Gnorm1_init = Gnorm1_new;
18981899

1899-
if(Gnorm1_new <= eps*Gnorm1_init)
1900+
// Break outer-loop if the accumulated violation is small.
1901+
// Also break if no update in QP inner-loop ten times in a row.
1902+
if(Gnorm1_new <= eps*Gnorm1_init || QP_no_change >= 10)
19001903
break;
19011904

1905+
QP_no_change++;
1906+
19021907
iter = 0;
19031908
QP_Gmax_old = INF;
19041909
QP_active_size = active_size;
@@ -1955,9 +1960,6 @@ static int solve_l1r_lr(
19551960
else
19561961
violation = fabs(Gn);
19571962

1958-
QP_Gmax_new = max(QP_Gmax_new, violation);
1959-
QP_Gnorm1_new += violation;
1960-
19611963
// obtain solution of one-variable problem
19621964
if(Gp < H*wpd[j])
19631965
z = -Gp/H;
@@ -1970,6 +1972,10 @@ static int solve_l1r_lr(
19701972
continue;
19711973
z = min(max(z,-10.0),10.0);
19721974

1975+
QP_no_change = 0;
1976+
QP_Gmax_new = max(QP_Gmax_new, violation);
1977+
QP_Gnorm1_new += violation;
1978+
19731979
wpd[j] += z;
19741980

19751981
x = prob_col->x[j];

0 commit comments

Comments
 (0)
0