8000 FIX improve convergence criterion for LogisticRegression(penalty="l1", solver='liblinear') by TomDLT · Pull Request #25214 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content
22 changes: 14 additions & 8 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,6 @@ Changelog
:pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
where 123456 is the *pull request* number, not the issue number.

:mod:`sklearn.feature_selection`
................................

- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.

- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.

:mod:`sklearn.base`
...................
Expand Down Expand Up @@ -255,6 +247,15 @@ Changelog
meaning that it is note required to call `fit` before calling `transform`.
Parameter validation only happens at `fit` time.
:pr:`24230` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.feature_selection`
................................

- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve
a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_.

- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter
now supports generators. :pr:`25973` by `Yao Xiao <Charlie-XIAO>`.

:mod:`sklearn.impute`
.....................
Expand Down Expand Up @@ -288,6 +289,11 @@ Changelog
:class:`linear_model.ARDRegression` to expose the actual number of iterations
required to reach the stopping criterion.
:pr:`25697` by :user:`John Pangas <jpangas>`.

- |Fix| Use a more robust criterion to detect convergence of
:class:`linear_model.LogisticRegression(penalty="l1", solver="liblinear")`
on linearly separable problems.
:pr:`25214` by `Tom Dupre la Tour`_.

:mod:`sklearn.metrics`
......................
Expand Down
2 changes: 1 addition & 1 deletion examples/linear_model/plot_logistic_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from sklearn import linear_model
from sklearn.svm import l1_min_c

cs = l1_min_c(X, y, loss="log") * np.logspace(0, 7, 16)
cs = l1_min_c(X, y, loss="log") * np.logspace(0, 10, 16)

clf = linear_model.LogisticRegression(
penalty="l1",
Expand Down
28 changes: 26 additions & 2 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import scale
from sklearn.utils._testing import skip_if_no_parallel
from sklearn.svm import l1_min_c

from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model._logistic import (
Expand Down Expand Up @@ -742,7 +743,6 @@ def test_logistic_regression_sample_weights():
sample_weight = y + 1

for LR in [LogisticRegression, LogisticRegressionCV]:

kw = {"random_state": 42, "fit_intercept": False, "multi_class": "ovr"}
if LR is LogisticRegressionCV:
kw.update({"Cs": 3, "cv": 3})
Expand Down Expand Up @@ -1918,7 +1918,6 @@ def test_scores_attribute_layout_elasticnet():

for i, C in enumerate(Cs):
for j, l1_ratio in enumerate(l1_ratios):

lr = LogisticRegression(
penalty="elasticnet",
solver="saga",
Expand Down Expand Up @@ -2033,3 +2032,28 @@ def test_warning_on_penalty_string_none():
)
with pytest.warns(FutureWarning, match=warning_message):
lr.fit(iris.data, target)


def test_liblinear_not_stuck():
# Non-regression https://github.com/scikit-learn/scikit-learn/issues/18264
X = iris.data.copy()
y = iris.target.copy()
X = X[y != 2]
y = y[y != 2]
X_prep = StandardScaler().fit_transform(X)

C = l1_min_c(X, y, loss="log") * 10 ** (10 / 29)
clf = LogisticRegression(
penalty="l1",
solver="liblinear",
tol=1e-6,
max_iter=100,
intercept_scaling=10000.0,
random_state=0,
C=C,
)

# test that the fit does not raise a ConvergenceWarning
with warnings.catch_warnings():
warnings.simplefilter("error", ConvergenceWarning)
clf.fit(X_prep, y)
14 changes: 10 additions & 4 deletions sklearn/svm/src/liblinear/linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1769,6 +1769,7 @@ static int solve_l1r_lr(
int max_num_linesearch = 20;
int active_size;
int QP_active_size;
int QP_no_change = 0;

double nu = 1e-12;
double inner_eps = 1;
Expand Down Expand Up @@ -1896,9 +1897,13 @@ static int solve_l1r_lr(
if(newton_iter == 0)
Gnorm1_init = Gnorm1_new;

if(Gnorm1_new <= eps*Gnorm1_init)
// Break outer-loop if the accumulated violation is small.
// Also break if no update in QP inner-loop ten times in a row.
if(Gnorm1_new <= eps*Gnorm1_init || QP_no_change >= 10)
break;

QP_no_change++;

iter = 0;
QP_Gmax_old = INF;
QP_active_size = active_size;
Expand Down Expand Up @@ -1955,9 +1960,6 @@ static int solve_l1r_lr(
else
violation = fabs(Gn);

QP_Gmax_new = max(QP_Gmax_new, violation);
QP_Gnorm1_new += violation;

// obtain solution of one-variable problem
if(Gp < H*wpd[j])
z = -Gp/H;
Expand All @@ -1970,6 +1972,10 @@ static int solve_l1r_lr(
continue;
z = min(max(z,-10.0),10.0);

QP_no_change = 0;
QP_Gmax_new = max(QP_Gmax_new, violation);
QP_Gnorm1_new += violation;

wpd[j] += z;

x = prob_col->x[j];
Expand Down
0