8000 FIX improve convergence criterion for LogisticRegression(penalty="l1", solver='liblinear') by TomDLT · Pull Request #25214 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX improve convergence criterion for LogisticRegression(penalty="l1", solver='liblinear') #25214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 17, 2023
22 changes: 14 additions & 8 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,6 @@ Changelog
:pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
where 123456 is the *pull request* number, not the issue number.

:mod:`sklearn.feature_selection`
................................

- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.

- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.

:mod:`sklearn.base`
...................
Expand Down Expand Up @@ -255,6 +247,15 @@ Changelog
meaning that it is note required to call `fit` before calling `transform`.
Parameter validation only happens at `fit` time.
:pr:`24230` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.feature_selection`
................................

- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.

- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.

:mod:`sklearn.impute`
.....................
Expand Down Expand Up @@ -288,6 +289,11 @@ Changelog
:class:`linear_model.ARDRegression` to expose the actual number of iterations
required to reach the stopping criterion.
:pr:`25697` by :user:`John Pangas <jpangas>`.

- |Fix| Use a more robust criterion to detect convergence of
:class:`linear_model.LogisticRegression(penalty="l1", solver="liblinear")`
on linearly separable problems.
:pr:`25214` by `Tom Dupre la Tour`_.

:mod:`sklearn.metrics`
......................
Expand Down
2 changes: 1 addition & 1 deletion examples/linear_model/plot_logistic_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from sklearn import linear_model
from sklearn.svm import l1_min_c

cs = l1_min_c(X, y, loss="log") * np.logspace(0, 7, 16)
cs = l1_min_c(X, y, loss="log") * np.logspace(0, 10, 16)

clf = linear_model.LogisticRegression(
penalty="l1",
Expand Down
28 changes: 26 additions & 2 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import scale
from sklearn.utils._testing import skip_if_no_parallel
from sklearn.svm import l1_min_c

from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model._logistic import (
Expand Down Expand Up @@ -742,7 +743,6 @@ def test_logistic_regression_sample_weights():
sample_weight = y + 1

for LR in [LogisticRegression, LogisticRegressionCV]:

kw = {"random_state": 42, "fit_intercept": False, "multi_class": "ovr"}
if LR is LogisticRegressionCV:
kw.update({"Cs": 3, "cv": 3})
Expand Down Expand Up @@ -1918,7 +1918,6 @@ def test_scores_attribute_layout_elasticnet():

for i, C in enumerate(Cs):
for j, l1_ratio in enumerate(l1_ratios):

lr = LogisticRegression(
penalty="elasticnet",
solver="saga",
Expand Down Expand Up @@ -2033,3 +2032,28 @@ def test_warning_on_penalty_string_none():
)
with pytest.warns(FutureWarning, match=warning_message):
lr.fit(iris.data, target)


def test_liblinear_not_stuck():
# Non-regression https://github.com/scikit-learn/scikit-learn/issues/18264
X = iris.data.copy()
y = iris.target.copy()
X = X[y != 2]
y = y[y != 2]
X_prep = StandardScaler().fit_transform(X)

C = l1_min_c(X, y, loss="log") * 10 ** (10 / 29)
clf = LogisticRegression(
penalty="l1",
solver="liblinear",
tol=1e-6,
max_iter=100,
intercept_scaling=10000.0,
random_state=0,
C=C,
)

# test that the fit does not raise a ConvergenceWarning
with warnings.catch_warnings():
warnings.simplefilter("error", ConvergenceWarning)
clf.fit(X_prep, y)
14 changes: 10 additions & 4 deletions sklearn/svm/src/liblinear/linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1769,6 +1769,7 @@ static int solve_l1r_lr(
int max_num_linesearch = 20;
int active_size;
int QP_active_size;
int QP_no_change = 0;

double nu = 1e-12;
double inner_eps = 1;
Expand Down Expand Up @@ -1896,9 +1897,13 @@ static int solve_l1r_lr(
if(newton_iter == 0)
Gnorm1_init = Gnorm1_new;

if(Gnorm1_new <= eps*Gnorm1_init)
// Break outer-loop if the accumulated violation is small.
// Also break if no update in QP inner-loop ten times in a row.
if(Gnorm1_new <= eps*Gnorm1_init || QP_no_change >= 10)
break;

QP_no_change++;

iter = 0;
QP_Gmax_old = INF;
QP_active_size = active_size;
Expand Down Expand Up @@ -1955,9 +1960,6 @@ static int solve_l1r_lr(
else
violation = fabs(Gn);

QP_Gmax_new = max(QP_Gmax_new, violation);
QP_Gnorm1_new += violation;

// obtain solution of one-variable problem
if(Gp < H*wpd[j])
z = -Gp/H;
Expand All @@ -1970,6 +1972,10 @@ static int solve_l1r_lr(
continue;
z = min(max(z,-10.0),10.0);

QP_no_change = 0;
QP_Gmax_new = max(QP_Gmax_new, violation);
QP_Gnorm1_new += violation;

wpd[j] += z;

x = prob_col->x[j];
Expand Down
0