diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 4fede62e61b34..9c0f4f71edffc 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -140,14 +140,6 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123456 is the *pull request* number, not the issue number. -:mod:`sklearn.feature_selection` -................................ - -- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve - a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_. - -- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter - now supports generators. :pr:`25973` by `Yao Xiao `. :mod:`sklearn.base` ................... @@ -255,6 +247,15 @@ Changelog meaning that it is note required to call `fit` before calling `transform`. Parameter validation only happens at `fit` time. :pr:`24230` by :user:`Guillaume Lemaitre `. + +:mod:`sklearn.feature_selection` +................................ + +- |Enhancement| All selectors in :mod:`sklearn.feature_selection` will preserve + a DataFrame's dtype when transformed. :pr:`25102` by `Thomas Fan`_. + +- |Fix| :class:`feature_selection.SequentialFeatureSelector`'s `cv` parameter + now supports generators. :pr:`25973` by `Yao Xiao `. :mod:`sklearn.impute` ..................... @@ -288,6 +289,11 @@ Changelog :class:`linear_model.ARDRegression` to expose the actual number of iterations required to reach the stopping criterion. :pr:`25697` by :user:`John Pangas `. + +- |Fix| Use a more robust criterion to detect convergence of + :class:`linear_model.LogisticRegression(penalty="l1", solver="liblinear")` + on linearly separable problems. + :pr:`25214` by `Tom Dupre la Tour`_. :mod:`sklearn.metrics` ...................... diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py index a52b082eaed32..52cf2c6587237 100644 --- a/examples/linear_model/plot_logistic_path.py +++ b/examples/linear_model/plot_logistic_path.py @@ -52,7 +52,7 @@ from sklearn import linear_model from sklearn.svm import l1_min_c -cs = l1_min_c(X, y, loss="log") * np.logspace(0, 7, 16) +cs = l1_min_c(X, y, loss="log") * np.logspace(0, 10, 16) clf = linear_model.LogisticRegression( penalty="l1", diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index c3f439bd4f150..a470fe412ab36 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -24,6 +24,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.preprocessing import scale from sklearn.utils._testing import skip_if_no_parallel +from sklearn.svm import l1_min_c from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model._logistic import ( @@ -742,7 +743,6 @@ def test_logistic_regression_sample_weights(): sample_weight = y + 1 for LR in [LogisticRegression, LogisticRegressionCV]: - kw = {"random_state": 42, "fit_intercept": False, "multi_class": "ovr"} if LR is LogisticRegressionCV: kw.update({"Cs": 3, "cv": 3}) @@ -1918,7 +1918,6 @@ def test_scores_attribute_layout_elasticnet(): for i, C in enumerate(Cs): for j, l1_ratio in enumerate(l1_ratios): - lr = LogisticRegression( penalty="elasticnet", solver="saga", @@ -2033,3 +2032,28 @@ def test_warning_on_penalty_string_none(): ) with pytest.warns(FutureWarning, match=warning_message): lr.fit(iris.data, target) + + +def test_liblinear_not_stuck(): + # Non-regression https://github.com/scikit-learn/scikit-learn/issues/18264 + X = iris.data.copy() + y = iris.target.copy() + X = X[y != 2] + y = y[y != 2] + X_prep = StandardScaler().fit_transform(X) + + C = l1_min_c(X, y, loss="log") * 10 ** (10 / 29) + clf = LogisticRegression( + penalty="l1", + solver="liblinear", + tol=1e-6, + max_iter=100, + intercept_scaling=10000.0, + random_state=0, + C=C, + ) + + # test that the fit does not raise a ConvergenceWarning + with warnings.catch_warnings(): + warnings.simplefilter("error", ConvergenceWarning) + clf.fit(X_prep, y) diff --git a/sklearn/svm/src/liblinear/linear.cpp b/sklearn/svm/src/liblinear/linear.cpp index 89a2f1616b996..63648adbe2947 100644 --- a/sklearn/svm/src/liblinear/linear.cpp +++ b/sklearn/svm/src/liblinear/linear.cpp @@ -1769,6 +1769,7 @@ static int solve_l1r_lr( int max_num_linesearch = 20; int active_size; int QP_active_size; + int QP_no_change = 0; double nu = 1e-12; double inner_eps = 1; @@ -1896,9 +1897,13 @@ static int solve_l1r_lr( if(newton_iter == 0) Gnorm1_init = Gnorm1_new; - if(Gnorm1_new <= eps*Gnorm1_init) + // Break outer-loop if the accumulated violation is small. + // Also break if no update in QP inner-loop ten times in a row. + if(Gnorm1_new <= eps*Gnorm1_init || QP_no_change >= 10) break; + QP_no_change++; + iter = 0; QP_Gmax_old = INF; QP_active_size = active_size; @@ -1955,9 +1960,6 @@ static int solve_l1r_lr( else violation = fabs(Gn); - QP_Gmax_new = max(QP_Gmax_new, violation); - QP_Gnorm1_new += violation; - // obtain solution of one-variable problem if(Gp < H*wpd[j]) z = -Gp/H; @@ -1970,6 +1972,10 @@ static int solve_l1r_lr( continue; z = min(max(z,-10.0),10.0); + QP_no_change = 0; + QP_Gmax_new = max(QP_Gmax_new, violation); + QP_Gnorm1_new += violation; + wpd[j] += z; x = prob_col->x[j];