diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index 6f53e9af791c5..b4e06b8f37bdc 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -372,8 +372,8 @@ function or **logistic** function: >>> logistic.fit(iris_X_train, iris_y_train) LogisticRegression(C=100000.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, - solver='liblinear', tol=0.0001, verbose=0, warm_start=False) + multi_class='default', n_jobs=1, penalty='l2', random_state=None, + solver='default', tol=0.0001, verbose=0, warm_start=False) This is known as :class:`LogisticRegression`. diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 6a8dcc3e961a4..80f9c265967bb 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -449,9 +449,9 @@ def _check_solver_option(solver, multi_class, penalty, dual): def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, max_iter=100, tol=1e-4, verbose=0, - solver='lbfgs', coef=None, + solver='default', coef=None, class_weight=None, dual=False, penalty='l2', - intercept_scaling=1., multi_class='ovr', + intercept_scaling=1., multi_class='default', random_state=None, check_input=True, max_squared_sum=None, sample_weight=None): """Compute a Logistic Regression model for a list of regularization @@ -500,6 +500,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, number for verbosity. solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'} + default: 'default'. Will be changed to 'auto' solver in 0.22. Numerical solver to use. coef : array-like, shape (n_features,), default None @@ -540,6 +541,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, (and therefore on the intercept) intercept_scaling has to be increased. multi_class : str, {'ovr', 'multinomial'} + default: 'default'. Will be changed to 'multinomial' in 0.22. Multiclass option can be either 'ovr' or 'multinomial'. If the option chosen is 'ovr', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across @@ -587,6 +589,19 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, .. versionchanged:: 0.19 The "copy" parameter was removed. """ + if solver == 'default': + solver = 'lbfgs' + warnings.warn("Default solver will be changed from 'lbfgs' " + "to 'auto' solver in 0.22", FutureWarning) + elif solver == 'auto': + if penalty == 'l1': + solver = 'saga' + if penalty == 'l2': + solver = 'lbfgs' + if multi_class == 'default': + multi_class = 'ovr' + warnings.warn("Default multi_class will be changed from 'ovr' to" + " 'multinomial' in 0.22", FutureWarning) if isinstance(Cs, numbers.Integral): Cs = np.logspace(-4, 4, Cs) @@ -1043,7 +1058,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, 'liblinear'. solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, - default: 'liblinear' + default: 'default'. Will be changed to 'auto' solver in 0.22. Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and @@ -1058,16 +1073,25 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. + The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' if + penalty is 'l1'. Note that 'saga' may suffer from slow convergence + issues on small datasets. The only other solver supporting 'l1' is + 'liblinear', which requires multiclass='ovr' and which unfortunately + regularizes the intercept (see 'intercept_scaling'). + .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver. + .. versionadded:: 0.20 + auto solver max_iter : int, default: 100 Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge. - multi_class : str, {'ovr', 'multinomial'}, default: 'ovr' + multi_class : str, {'ovr', 'multinomial'}, + default: 'default'. Will be changed to 'multinomial' in 0.22. Multiclass option can be either 'ovr' or 'multinomial'. If the option chosen is 'ovr', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across @@ -1160,8 +1184,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, - random_state=None, solver='liblinear', max_iter=100, - multi_class='ovr', verbose=0, warm_start=False, n_jobs=1): + random_state=None, solver='default', max_iter=100, + multi_class='default', verbose=0, warm_start=False, n_jobs=1): self.penalty = penalty self.dual = dual @@ -1201,6 +1225,23 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ + if self.solver == 'default': + _solver = 'liblinear' + warnings.warn("Default solver will be changed from 'liblinear' to " + "'auto' solver in 0.22", FutureWarning) + elif self.solver == 'auto': + if self.penalty == 'l1': + _solver = 'saga' + if self.penalty == 'l2': + _solver = 'lbfgs' + else: + _solver = self.solver + if self.multi_class == 'default': + _multi_class = 'ovr' + warnings.warn("Default multi_class will be changed from 'ovr' to " + "'multinomial' in 0.22", FutureWarning) + else: + _multi_class = self.multi_class if not isinstance(self.C, numbers.Number) or self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) @@ -1211,7 +1252,7 @@ def fit(self, X, y, sample_weight=None): raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) - if self.solver in ['newton-cg']: + if _solver in ['newton-cg']: _dtype = [np.float64, np.float32] else: _dtype = np.float64 @@ -1222,10 +1263,10 @@ def fit(self, X, y, sample_weight=None): self.classes_ = np.unique(y) n_samples, n_features = X.shape - _check_solver_option(self.solver, self.multi_class, self.penalty, + _check_solver_option(_solver, _multi_class, self.penalty, self.dual) - if self.solver == 'liblinear': + if _solver == 'liblinear': if self.n_jobs != 1: warnings.warn("'n_jobs' > 1 does not have any effect when" " 'solver' is set to 'liblinear'. Got 'n_jobs'" @@ -1238,7 +1279,7 @@ def fit(self, X, y, sample_weight=None): self.n_iter_ = np.array([n_iter_]) return self - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None @@ -1267,7 +1308,7 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = np.zeros(n_classes) # Hack so that we iterate only once for the multinomial case. - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': classes_ = [None] warm_start_coef = [warm_start_coef] if warm_start_coef is None: @@ -1277,7 +1318,7 @@ def fit(self, X, y, sample_weight=None): # The SAG solver releases the GIL so it's more efficient to use # threads for this solver. - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: backend = 'threading' else: backend = 'multiprocessing' @@ -1285,8 +1326,8 @@ def fit(self, X, y, sample_weight=None): backend=backend)( path_func(X, y, pos_class=class_, Cs=[self.C], fit_intercept=self.fit_intercept, tol=self.tol, - verbose=self.verbose, solver=self.solver, - multi_class=self.multi_class, max_iter=self.max_iter, + verbose=self.verbose, solver=_solver, + multi_class=_multi_class, max_iter=self.max_iter, class_weight=self.class_weight, check_input=False, random_state=self.random_state, coef=warm_start_coef_, penalty=self.penalty, @@ -1297,7 +1338,7 @@ def fit(self, X, y, sample_weight=None): fold_coefs_, _, n_iter_ = zip(*fold_coefs_) self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0] - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': self.coef_ = fold_coefs_[0][0] else: self.coef_ = np.asarray(fold_coefs_) @@ -1335,7 +1376,11 @@ def predict_proba(self, X): """ if not hasattr(self, "coef_"): raise NotFittedError("Call fit before prediction") - if self.multi_class == "ovr": + if self.multi_class == 'default': + _multi_class = 'ovr' + else: + _multi_class = self.multi_class + if _multi_class == "ovr": return super(LogisticRegression, self)._predict_proba_lr(X) else: decision = self.decision_function(X) @@ -1425,7 +1470,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, default scoring option used is 'accuracy'. solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, - default: 'lbfgs' + default: 'default'. Will be changed to 'auto' solver in 0.22. Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and @@ -1442,6 +1487,14 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. + The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' if + penalty is 'l1'. Note that 'saga' may suffer from slow convergence + issues on small datasets. The only other solver supporting 'l1' is + 'liblinear', which requires multiclass='ovr' and which unfortunately + regularizes the intercept (see 'intercept_scaling'). + + .. versionadded:: 0.20 + auto solver .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 @@ -1496,6 +1549,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, (and therefore on the intercept) intercept_scaling has to be increased. multi_class : str, {'ovr', 'multinomial'} + default: 'default'. Will be changed to 'multinomial' in 0.22 Multiclass option can be either 'ovr' or 'multinomial'. If the option chosen is 'ovr', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across @@ -1565,9 +1619,9 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, """ def __init__(self, Cs=10, fit_intercept=True, cv=None, dual=False, - penalty='l2', scoring=None, solver='lbfgs', tol=1e-4, + penalty='l2', scoring=None, solver='default', tol=1e-4, max_iter=100, class_weight=None, n_jobs=1, verbose=0, - refit=True, intercept_scaling=1., multi_class='ovr', + refit=True, intercept_scaling=1., multi_class='default', random_state=None): self.Cs = Cs self.fit_intercept = fit_intercept @@ -1606,7 +1660,24 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ - _check_solver_option(self.solver, self.multi_class, self.penalty, + if self.solver == 'default': + _solver = 'lbfgs' + warnings.warn("Default solver will be changed from 'lbfgs' " + "to 'auto' solver in 0.22", FutureWarning) + elif self.solver == 'auto': + if self.penalty == 'l1': + _solver = 'saga' + if self.penalty == 'l2': + _solver = 'lbfgs' + else: + _solver = self.solver + if self.multi_class == 'default': + _multi_class = 'ovr' + warnings.warn("Default multi_class will be changed from 'ovr' to" + " 'multinomial' in 0.22", FutureWarning) + else: + _multi_class = self.multi_class + _check_solver_option(_solver, _multi_class, self.penalty, self.dual) if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0: @@ -1633,7 +1704,7 @@ def fit(self, X, y, sample_weight=None): classes = self.classes_ = label_encoder.classes_ encoded_labels = label_encoder.transform(label_encoder.classes_) - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None @@ -1659,7 +1730,7 @@ def fit(self, X, y, sample_weight=None): # We need this hack to iterate only once over labels, in the case of # multi_class = multinomial, without changing the value of the labels. - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': iter_encoded_labels = iter_classes = [None] else: iter_encoded_labels = encoded_labels @@ -1676,7 +1747,7 @@ def fit(self, X, y, sample_weight=None): # The SAG solver releases the GIL so it's more efficient to use # threads for this solver. - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: backend = 'threading' else: backend = 'multiprocessing' @@ -1684,10 +1755,10 @@ def fit(self, X, y, sample_weight=None): backend=backend)( path_func(X, y, train, test, pos_class=label, Cs=self.Cs, fit_intercept=self.fit_intercept, penalty=self.penalty, - dual=self.dual, solver=self.solver, tol=self.tol, + dual=self.dual, solver=_solver, tol=self.tol, max_iter=self.max_iter, verbose=self.verbose, class_weight=class_weight, scoring=self.scoring, - multi_class=self.multi_class, + multi_class=_multi_class, intercept_scaling=self.intercept_scaling, random_state=self.random_state, max_squared_sum=max_squared_sum, @@ -1696,7 +1767,7 @@ def fit(self, X, y, sample_weight=None): for label in iter_encoded_labels for train, test in folds) - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': multi_coefs_paths, Cs, multi_scores, n_iter_ = zip(*fold_coefs_) multi_coefs_paths = np.asarray(multi_coefs_paths) multi_scores = np.asarray(multi_scores) @@ -1733,14 +1804,14 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = np.zeros(n_classes) # hack to iterate only once for multinomial case. - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': scores = multi_scores coefs_paths = multi_coefs_paths for index, (cls, encoded_label) in enumerate( zip(iter_classes, iter_encoded_labels)): - if self.multi_class == 'ovr': + if _multi_class == 'ovr': # The scores_ / coefs_paths_ dict have unencoded class # labels as their keys scores = self.scores_[cls] @@ -1751,7 +1822,7 @@ def fit(self, X, y, sample_weight=None): C_ = self.Cs_[best_index] self.C_.append(C_) - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': coef_init = np.mean(coefs_paths[:, best_index, :, :], axis=0) else: @@ -1760,12 +1831,12 @@ def fit(self, X, y, sample_weight=None): # Note that y is label encoded and hence pos_class must be # the encoded label / None (for 'multinomial') w, _, _ = logistic_regression_path( - X, y, pos_class=encoded_label, Cs=[C_], solver=self.solver, + X, y, pos_class=encoded_label, Cs=[C_], solver=_solver, fit_intercept=self.fit_intercept, coef=coef_init, max_iter=self.max_iter, tol=self.tol, penalty=self.penalty, class_weight=class_weight, - multi_class=self.multi_class, + multi_class=_multi_class, verbose=max(0, self.verbose - 1), random_state=self.random_state, check_input=False, max_squared_sum=max_squared_sum, @@ -1780,7 +1851,7 @@ def fit(self, X, y, sample_weight=None): for i in range(len(folds))], axis=0) self.C_.append(np.mean(self.Cs_[best_indices])) - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': self.C_ = np.tile(self.C_, n_classes) self.coef_ = w[:, :X.shape[1]] if self.fit_intercept: diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py index 39b817da1b0e2..2113907b810c7 100644 --- a/sklearn/linear_model/sag.py +++ b/sklearn/linear_model/sag.py @@ -217,7 +217,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0., ... #doctest: +NORMALIZE_WHITESPACE LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, + multi_class='default', n_jobs=1, penalty='l2', random_state=None, solver='sag', tol=0.0001, verbose=0, warm_start=False) References diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 46ce635daf830..8c32780b8e498 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -1,5 +1,6 @@ import numpy as np import scipy.sparse as sp +import pytest from scipy import linalg, optimize, sparse from sklearn.datasets import load_iris, make_classification from sklearn.metrics import log_loss @@ -17,6 +18,7 @@ from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns_message +from sklearn.utils.testing import assert_no_warnings from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model.logistic import ( @@ -101,6 +103,43 @@ def test_lr_liblinear_warning(): lr.fit, iris.data, target) +@pytest.mark.parametrize('model, default_solver', + [(LogisticRegression, 'liblinear'), + (LogisticRegressionCV, 'lbfgs')]) +def test_logistic_regression_warnings(model, default_solver): + # Test logistic regression with the iris dataset + n_samples, n_features = iris.data.shape + target = iris.target_names[iris.target] + + clf_solver_warning = model(multi_class='ovr') + clf_multi_class_warning = model(solver='lbfgs') + clf_no_warnings = model(solver='lbfgs', + multi_class='multinomial') + + solver_warning_msg = "Default solver will be changed from '{}' " \ + "to 'auto' solver in 0.22".format(default_solver) + multi_class_warning_msg = "Default multi_class will be changed from" \ + " 'ovr' to 'multinomial' in 0.22" + + assert_warns_message(FutureWarning, solver_warning_msg, + clf_solver_warning.fit, iris.data, target) + assert_warns_message(FutureWarning, multi_class_warning_msg, + clf_multi_class_warning.fit, iris.data, target) + assert_no_warnings(clf_no_warnings.fit, iris.data, target) + + +@pytest.mark.parametrize('model', [LogisticRegression, LogisticRegressionCV]) +def test_logistic_regression_auto(model): + # Test logistic regression with auto mode + n_samples, n_features = iris.data.shape + target = iris.target_names[iris.target] + clf_solver_l1 = model(penalty='l1', solver='auto') + clf_solver_l2 = model(penalty='l2', solver='auto') + + clf_solver_l1.fit(iris.data, target) + clf_solver_l2.fit(iris.data, target) + + def test_predict_3_classes(): check_predictions(LogisticRegression(C=10), X, Y2) check_predictions(LogisticRegression(C=10), X_sp, Y2)