10000 MNT clean-up deprecations for 1.7: multi_class in LogisticRegression by jeremiedbb · Pull Request #31241 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MNT clean-up deprecations for 1.7: multi_class in LogisticRegression #31241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions doc/modules/model_evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1632,7 +1632,7 @@ Therefore, the `y_score` parameter is of size (n_samples,).
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.metrics import roc_auc_score
>>> X, y = load_breast_cancer(return_X_y=True)
>>> clf = LogisticRegression(solver="liblinear").fit(X, y)
>>> clf = LogisticRegression().fit(X, y)
>>> clf.classes_
array([0, 1])

Expand Down Expand Up @@ -1728,11 +1728,11 @@ class with the greater label for each output.
>>> from sklearn.datasets import make_multilabel_classification
>>> from sklearn.multioutput import MultiOutputClassifier
>>> X, y = make_multilabel_classification(random_state=0)
>>> inner_clf = LogisticRegression(solver="liblinear", random_state=0)
>>> inner_clf = LogisticRegression(random_state=0)
>>> clf = MultiOutputClassifier(inner_clf).fit(X, y)
>>> y_score = np.transpose([y_pred[:, 1] for y_pred in clf.predict_proba(X)])
>>> roc_auc_score(y, y_score, average=None)
array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])
array([0.82..., 0.85..., 0.93..., 0.86..., 0.94...])

And the decision values do not require such processing.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- Using the `"liblinear"` solver for multiclass classification with a one-versus-rest
scheme in :class:`linear_model.LogisticRegression` and
:class:`linear_model.LogisticRegressionCV` is deprecated and will raise an error in
version 1.8. Either use a solver which supports the multinomial loss or wrap the
estimator in a :class:`sklearn.multiclass.OneVsRestClassifier` to keep applying a
one-versus-rest scheme.
By :user:`Jérémie du Boisberranger <jeremiedbb>`.
10 changes: 5 additions & 5 deletions sklearn/ensemble/tests/test_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_notfitted():

def test_majority_label_iris(global_random_seed):
"""Check classification by majority label on dataset iris."""
clf1 = LogisticRegression(solver="liblinear", random_state=global_random_seed)
clf1 = LogisticRegression(random_state=global_random_seed)
clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
clf3 = GaussianNB()
eclf = VotingClassifier(
Expand All @@ -127,12 +127,12 @@ def test_majority_label_iris(global_random_seed):

def test_tie_situation():
"""Check voting classifier selects smaller class label in tie situation."""
clf1 = LogisticRegression(random_state=123, solver="liblinear")
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
eclf = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], voting="hard")
assert clf1.fit(X, y).predict(X)[73] == 2
assert clf2.fit(X, y).predict(X)[73] == 1
assert eclf.fit(X, y).predict(X)[73] == 1
assert clf1.fit(X, y).predict(X)[52] == 2
assert clf2.fit(X, y).predict(X)[52] == 1
assert eclf.fit(X, y).predict(X)[52] == 1


def test_weights_iris(global_random_seed):
Expand Down
26 changes: 22 additions & 4 deletions sklearn/linear_model/_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,15 @@ def _logistic_regression_path(
w0 = sol.solve(X=X, y=target, sample_weight=sample_weight)
n_iter_i = sol.iteration
elif solver == "liblinear":
if len(classes) > 2:
warnings.warn(
"Using the 'liblinear' solver for multiclass classification is "
"deprecated. An error will be raised in 1.8. Either use another "
"solver which supports the multinomial loss or wrap the estimator "
"in a OneVsRestClassifier to keep applying a one-versus-rest "
"scheme.",
FutureWarning,
)
(
coef_,
intercept_,
Expand Down Expand Up @@ -931,7 +940,7 @@ class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
'lbfgs' 'l2', None yes
E7F5 'liblinear' 'l1', 'l2' no
'newton-cg' 'l2', None yes
'newton-cholesky' 'l2', None no
'newton-cholesky' 'l2', None yes
'sag' 'l2', None yes
'saga' 'elasticnet', 'l1', 'l2', None yes
================= ============================== ======================
Expand Down Expand Up @@ -1238,7 +1247,7 @@ def fit(self, X, y, sample_weight=None):
check_classification_targets(y)
self.classes_ = np.unique(y)

# TODO(1.7) remove multi_class
# TODO(1.8) remove multi_class
multi_class = self.multi_class
if self.multi_class == "multinomial" and len(self.classes_) == 2:
warnings.warn(
Expand Down Expand Up @@ -1274,6 +1283,15 @@ def fit(self, X, y, sample_weight=None):
multi_class = _check_multi_class(multi_class, solver, len(self.classes_))

if solver == "liblinear":
if len(self.classes_) > 2:
warnings.warn(
"Using the 'liblinear' solver for multiclass classification is "
"deprecated. An error will be raised in 1.8. Either use another "
"solver which supports the multinomial loss or wrap the estimator "
"in a OneVsRestClassifier to keep applying a one-versus-rest "
"scheme.",
FutureWarning,
)
if effective_n_jobs(self.n_jobs) != 1:
warnings.warn(
"'n_jobs' > 1 does not have any effect when"
Expand Down Expand Up @@ -1568,7 +1586,7 @@ class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstima
'lbfgs' 'l2' yes
'liblinear' 'l1', 'l2' no
'newton-cg' 'l2' yes
'newton-cholesky' 'l2', no
'newton-cholesky' 'l2', yes
'sag' 'l2', yes
'saga' 'elasticnet', 'l1', 'l2' yes
================= ============================== ======================
Expand Down Expand Up @@ -1900,7 +1918,7 @@ def fit(self, X, y, sample_weight=None, **params):
classes = self.classes_ = label_encoder.classes_
encoded_labels = label_encoder.transform(label_encoder.classes_)

# TODO(1.7) remove multi_class
# TODO(1.8) remove multi_class
multi_class = self.multi_class
if self.multi_class == "multinomial" and len(self.classes_) == 2:
warnings.warn(
Expand Down
61 changes: 42 additions & 19 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def __call__(self, model, X, y, sample_weight=None):

@skip_if_no_parallel
def test_lr_liblinear_warning():
n_samples, n_features = iris.data.shape
target = iris.target_names[iris.target]
X, y = make_classification(random_state=0)

lr = LogisticRegression(solver="liblinear", n_jobs=2)
warning_message = (
Expand All @@ -139,7 +138,7 @@ def test_lr_liblinear_warning():
" = 2."
)
with pytest.warns(UserWarning, match=warning_message):
lr.fit(iris.data, target)
lr.fit(X, y)


@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
Expand All @@ -148,8 +147,11 @@ def test_predict_3_classes(csr_container):
check_predictions(LogisticRegression(C=10), csr_container(X), Y2)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.filterwarnings(
"ignore:.*'liblinear' solver for multiclass classification is deprecated.*"
)
@pytest.mark.parametrize(
"clf",
[
Expand Down Expand Up @@ -197,7 +199,7 @@ def test_predict_iris(clf):
assert np.mean(pred == target) > 0.95


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV])
def test_check_solver_option(LR):
Expand Down Expand Up @@ -249,7 +251,7 @@ def test_elasticnet_l1_ratio_err_helpful(LR):
model.fit(np.array([[1, 2], [3, 4]]), np.array([0, 1]))


# TODO(1.7): remove whole test with deprecation of multi_class
# TODO(1.8): remove whole test with deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("solver", ["lbfgs", "newton-cg", "sag", "saga"])
def test_multinomial_binary(solver):
Expand All @@ -274,7 +276,7 @@ def test_multinomial_binary(solver):
assert np.mean(pred == target) > 0.9


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
# Maybe even remove this whole test as correctness of multinomial loss is tested
# elsewhere.
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
Expand Down Expand Up @@ -614,7 +616,7 @@ def test_logistic_cv_sparse(csr_container):
assert clfs.C_ == clf.C_


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
# Best remove this whole test.
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
def test_ovr_multinomial_iris():
Expand Down Expand Up @@ -700,7 +702,7 @@ def test_logistic_regression_solvers():
)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("fit_intercept", [False, True])
def test_logistic_regression_solvers_multiclass(fit_intercept):
Expand Down Expand Up @@ -1301,7 +1303,7 @@ def test_logreg_predict_proba_multinomial():
assert clf_wrong_loss > clf_multi_loss


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("max_iter", np.arange(1, 5))
@pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
Expand Down Expand Up @@ -1345,8 +1347,11 @@ def test_max_iter(max_iter, multi_class, solver, message):
assert lr.n_iter_[0] == max_iter


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.filterwarnings(
"ignore:.*'liblinear' solver for multiclass classification is deprecated.*"
)
@pytest.mark.parametrize("solver", SOLVERS)
def test_n_iter(solver):
# Test that self.n_iter_ has the correct format.
Expand Down Expand Up @@ -1478,7 +1483,7 @@ def test_saga_vs_liblinear(csr_container):
assert_array_almost_equal(saga.coef_, liblinear.coef_, 3)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
@pytest.mark.parametrize(
Expand Down Expand Up @@ -1738,7 +1743,7 @@ def test_LogisticRegressionCV_GridSearchCV_elastic_net(n_classes):
assert gs.best_params_["C"] == lrcv.C_[0]


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
# Maybe remove whole test after removal of the deprecated multi_class.
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
def test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr():
Expand Down Expand Up @@ -1786,7 +1791,7 @@ def test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr():
assert (lrcv.predict(X_test) == gs.predict(X_test)).mean() >= 0.8


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("penalty", ("l2", "elasticnet"))
@pytest.mark.parametrize("multi_class", ("ovr", "multinomial", "auto"))
Expand Down Expand Up @@ -1825,7 +1830,7 @@ def test_LogisticRegressionCV_no_refit(penalty, multi_class):
assert lrcv.coef_.shape == (n_classes, n_features)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
# Remove multi_class an change first element of the expected n_iter_.shape from
# n_classes to 1 (according to the docstring).
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
Expand Down Expand Up @@ -1955,8 +1960,11 @@ def test_logistic_regression_path_coefs_multinomial():
assert_array_almost_equal(coefs[1], coefs[2], decimal=1)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.filterwarnings(
"ignore:.*'liblinear' solver for multiclass classification is deprecated.*"
)
@pytest.mark.parametrize(
"est",
[
Expand Down Expand Up @@ -2126,7 +2134,7 @@ def test_scores_attribute_layout_elasticnet():
assert avg_scores_lrcv[i, j] == pytest.approx(avg_score_lr)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("solver", ["lbfgs", "newton-cg", "newton-cholesky"])
@pytest.mark.parametrize("fit_intercept", [False, True])
Expand Down Expand Up @@ -2171,7 +2179,7 @@ def test_multinomial_identifiability_on_iris(solver, fit_intercept):
assert clf.intercept_.sum(axis=0) == pytest.approx(0, abs=1e-11)


# TODO(1.7): remove filterwarnings after the deprecation of multi_class
# TODO(1.8): remove filterwarnings after the deprecation of multi_class
@pytest.mark.filterwarnings("ignore:.*'multi_class' was deprecated.*:FutureWarning")
@pytest.mark.parametrize("multi_class", ["ovr", "multinomial", "auto"])
@pytest.mark.parametrize("class_weight", [{0: 1.0, 1: 10.0, 2: 1.0}, "balanced"])
Expand Down Expand Up @@ -2349,7 +2357,7 @@ def test_passing_params_without_enabling_metadata_routing():
lr_cv.score(X, y, **params)


# TODO(1.7): remove
# TODO(1.8): remove
def test_multi_class_deprecated():
"""Check `multi_class` parameter deprecated."""
X, y = make_classification(n_classes=3, n_samples=50, n_informative=6)
Expand Down Expand Up @@ -2414,3 +2422,18 @@ def test_newton_cholesky_fallback_to_lbfgs(global_random_seed):
n_iter_nc_limited = lr_nc_limited.n_iter_[0]

assert n_iter_nc_limited == lr_nc_limited.max_iter - 1


# TODO(1.8): check for an error instead
@pytest.mark.parametrize("Estimator", [LogisticRegression, LogisticRegressionCV])
def test_liblinear_multiclass_warning(Estimator):
"""Check that liblinear warns on multiclass problems."""
msg = (
"Using the 'liblinear' solver for multiclass classification is "
"deprecated. An error will be raised in 1.8. Either use another "
"solver which supports the multinomial loss or wrap the estimator "
"in a OneVsRestClassifier to keep applying a one-versus-rest "
"scheme."
)
with pytest.warns(FutureWarning, match=msg):
Estimator(solver="liblinear").fit(iris.data, iris.target)
6 changes: 3 additions & 3 deletions sklearn/metrics/_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ class scores must correspond to the order of ``labels``,
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.metrics import roc_auc_score
>>> X, y = load_breast_cancer(return_X_y=True)
>>> clf = LogisticRegression(solver="liblinear", random_state=0).fit(X, y)
>>> clf = LogisticRegression(solver="newton-cholesky", random_state=0).fit(X, y)
>>> roc_auc_score(y, clf.predict_proba(X)[:, 1])
0.99...
>>> roc_auc_score(y, clf.decision_function(X))
Expand All @@ -632,7 +632,7 @@ class scores must correspond to the order of ``labels``,

>>> from sklearn.datasets import load_iris
>>> X, y = load_iris(return_X_y=True)
>>> clf = LogisticRegression(solver="liblinear").fit(X, y)
>>> clf = LogisticRegression(solver="newton-cholesky").fit(X, y)
>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')
0.99...

Expand All @@ -649,7 +649,7 @@ class scores must correspond to the order of ``labels``,
>>> # extract the positive columns for each output
>>> y_score = np.transpose([score[:, 1] for score in y_score])
>>> roc_auc_score(y, y_score, average=None)
array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])
array([0.82..., 0.85..., 0.93..., 0.86..., 0.94...])
>>> from sklearn.linear_model import RidgeClassifierCV
>>> clf = RidgeClassifierCV().fit(X, y)
>>> roc_auc_score(y, clf.decision_function(X), average=None)
Expand Down
Loading
0