From 9b7224389a94205e4590029cebd21fbba508e504 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 25 Oct 2017 16:43:36 +0530 Subject: [PATCH 01/27] added FutureWarning --- sklearn/linear_model/logistic.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 7c8a8d9ae4614..0a09143c08710 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1155,8 +1155,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, - random_state=None, solver='liblinear', max_iter=100, - multi_class='ovr', verbose=0, warm_start=False, n_jobs=1): + random_state=None, solver='default', max_iter=100, + multi_class='default', verbose=0, warm_start=False, n_jobs=1): self.penalty = penalty self.dual = dual @@ -1197,6 +1197,14 @@ def fit(self, X, y, sample_weight=None): self : object Returns self. """ + if self.solver == 'default': + self.solver = 'liblinear' + warnings.warn("Default solver will be changed to 'lbfgs' in 0.22", + FutureWarning) + if self.multi_class == 'default': + self.multi_class = 'multinomial' + warnings.warn("Default multi_class will be changed to " + "'multinomial' in 0.22", FutureWarning) if not isinstance(self.C, numbers.Number) or self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) From 5bb61e8e892f8636aeaf1930e9fb16344d328509 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Mon, 30 Oct 2017 17:57:25 +0530 Subject: [PATCH 02/27] default changed to auto --- sklearn/linear_model/logistic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 0a09143c08710..8cd3e6b8922d8 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1155,8 +1155,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, - random_state=None, solver='default', max_iter=100, - multi_class='default', verbose=0, warm_start=False, n_jobs=1): + random_state=None, solver='auto', max_iter=100, + multi_class='auto', verbose=0, warm_start=False, n_jobs=1): self.penalty = penalty self.dual = dual @@ -1197,12 +1197,12 @@ def fit(self, X, y, sample_weight=None): self : object Returns self. """ - if self.solver == 'default': + if self.solver == 'auto': self.solver = 'liblinear' warnings.warn("Default solver will be changed to 'lbfgs' in 0.22", FutureWarning) - if self.multi_class == 'default': - self.multi_class = 'multinomial' + if self.multi_class == 'auto': + self.multi_class = 'ovr' warnings.warn("Default multi_class will be changed to " "'multinomial' in 0.22", FutureWarning) if not isinstance(self.C, numbers.Number) or self.C < 0: From b8776201576e920951c5afae496ebf3378e7a5b8 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 02:36:22 +0530 Subject: [PATCH 03/27] changes added --- sklearn/linear_model/logistic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 527a28b3d04ab..f2c8ac53de7ff 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1198,11 +1198,11 @@ def fit(self, X, y, sample_weight=None): self : object Returns self. """ - if self.solver == 'auto': + if self.solver == 'auto' and self.penalty == 'l2': self.solver = 'liblinear' - warnings.warn("Default solver will be changed to 'lbfgs' in 0.22", + warnings.warn("Auto solver will be changed to 'lbfgs' in 0.22", FutureWarning) - if self.multi_class == 'auto': + if self.multi_class == 'auto' and self.solver != 'liblinear': self.multi_class = 'ovr' warnings.warn("Default multi_class will be changed to " "'multinomial' in 0.22", FutureWarning) From 803b6f72bc3c0b5262f6ee66f8ca68abcc1492b3 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 04:34:08 +0530 Subject: [PATCH 04/27] auto added in allowed multi_class --- sklearn/linear_model/logistic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index f2c8ac53de7ff..178b5b7254615 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -429,7 +429,7 @@ def _check_solver_option(solver, multi_class, penalty, dual): "newton-cg, lbfgs, sag and saga solvers, got %s" % solver) - if multi_class not in ['multinomial', 'ovr']: + if multi_class not in ['multinomial', 'ovr', 'auto']: raise ValueError("multi_class should be either multinomial or " "ovr, got %s" % multi_class) From 1c7b5027dc878eb213c822c382d76740f350656b Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 05:01:30 +0530 Subject: [PATCH 05/27] changes added --- sklearn/linear_model/logistic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 178b5b7254615..95fc7de7676f2 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -925,7 +925,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, log_reg = LogisticRegression(fit_intercept=fit_intercept) # The score method of Logistic Regression has a classes_ attribute. - if multi_class == 'ovr': + if multi_class == 'ovr' or multi_class == 'auto': log_reg.classes_ = np.array([-1, 1]) elif multi_class == 'multinomial': log_reg.classes_ = np.unique(y_train) From 16fa162a511805e6ec4a119bd22ec14783b21a80 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 05:44:56 +0530 Subject: [PATCH 06/27] auto as solver added --- sklearn/linear_model/logistic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 95fc7de7676f2..b9719f022b1b7 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -424,7 +424,7 @@ def hessp(v): def _check_solver_option(solver, multi_class, penalty, dual): - if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']: + if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga', 'auto']: raise ValueError("Logistic Regression supports only liblinear, " "newton-cg, lbfgs, sag and saga solvers, got %s" % solver) From d672f1e0aff8d404e4538c84fc6c68f8e829c858 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 06:21:36 +0530 Subject: [PATCH 07/27] changes added --- sklearn/linear_model/logistic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index b9719f022b1b7..7bc5c14823bf6 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -424,12 +424,16 @@ def hessp(v): def _check_solver_option(solver, multi_class, penalty, dual): - if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga', 'auto']: + if self.solver == 'auto' and self.penalty == 'l2': + self.solver = 'liblinear' + if self.multi_class == 'auto' and self.solver != 'liblinear': + self.multi_class = 'ovr' + if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']: raise ValueError("Logistic Regression supports only liblinear, " "newton-cg, lbfgs, sag and saga solvers, got %s" % solver) - if multi_class not in ['multinomial', 'ovr', 'auto']: + if multi_class not in ['multinomial', 'ovr']: raise ValueError("multi_class should be either multinomial or " "ovr, got %s" % multi_class) @@ -925,7 +929,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10, log_reg = LogisticRegression(fit_intercept=fit_intercept) # The score method of Logistic Regression has a classes_ attribute. - if multi_class == 'ovr' or multi_class == 'auto': + if multi_class == 'ovr': log_reg.classes_ = np.array([-1, 1]) elif multi_class == 'multinomial': log_reg.classes_ = np.unique(y_train) From e215d9f08b8e5471ea649972990b3996e9c8232c Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 06:30:39 +0530 Subject: [PATCH 08/27] indentation corrected --- sklearn/linear_model/logistic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 7bc5c14823bf6..11a2ff9baa53f 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -425,9 +425,9 @@ def hessp(v): def _check_solver_option(solver, multi_class, penalty, dual): if self.solver == 'auto' and self.penalty == 'l2': - self.solver = 'liblinear' - if self.multi_class == 'auto' and self.solver != 'liblinear': - self.multi_class = 'ovr' + self.solver = 'liblinear' + if self.multi_class == 'auto' and self.solver != 'liblinear': + self.multi_class = 'ovr' if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']: raise ValueError("Logistic Regression supports only liblinear, " "newton-cg, lbfgs, sag and saga solvers, got %s" From edb071e1f9313de995f28c497407ace8b6eaddc7 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Wed, 6 Dec 2017 06:37:19 +0530 Subject: [PATCH 09/27] self removed --- sklearn/linear_model/logistic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 11a2ff9baa53f..fc8477238452a 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -424,10 +424,10 @@ def hessp(v): def _check_solver_option(solver, multi_class, penalty, dual): - if self.solver == 'auto' and self.penalty == 'l2': - self.solver = 'liblinear' - if self.multi_class == 'auto' and self.solver != 'liblinear': - self.multi_class = 'ovr' + if solver == 'auto' and penalty == 'l2': + solver = 'liblinear' + if multi_class == 'auto' and solver != 'liblinear': + multi_class = 'ovr' if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']: raise ValueError("Logistic Regression supports only liblinear, " "newton-cg, lbfgs, sag and saga solvers, got %s" From 06d08d3db4f47b411623b26a1956375fdfab2842 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Sat, 23 Dec 2017 02:52:02 +0530 Subject: [PATCH 10/27] reformed --- sklearn/linear_model/logistic.py | 40 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index fc8477238452a..ffd02621490ca 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -424,10 +424,6 @@ def hessp(v): def _check_solver_option(solver, multi_class, penalty, dual): - if solver == 'auto' and penalty == 'l2': - solver = 'liblinear' - if multi_class == 'auto' and solver != 'liblinear': - multi_class = 'ovr' if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']: raise ValueError("Logistic Regression supports only liblinear, " "newton-cg, lbfgs, sag and saga solvers, got %s" @@ -1202,14 +1198,18 @@ def fit(self, X, y, sample_weight=None): self : object Returns self. """ - if self.solver == 'auto' and self.penalty == 'l2': - self.solver = 'liblinear' + if self.solver == 'auto': + _solver = 'liblinear' warnings.warn("Auto solver will be changed to 'lbfgs' in 0.22", FutureWarning) - if self.multi_class == 'auto' and self.solver != 'liblinear': - self.multi_class = 'ovr' + else: + _solver = self.solver + if self.multi_class == 'auto': + _multi_class = 'ovr' warnings.warn("Default multi_class will be changed to " "'multinomial' in 0.22", FutureWarning) + else: + _multi_class = self.multi_class if not isinstance(self.C, numbers.Number) or self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) @@ -1220,7 +1220,7 @@ def fit(self, X, y, sample_weight=None): raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) - if self.solver in ['newton-cg']: + if _solver in ['newton-cg']: _dtype = [np.float64, np.float32] else: _dtype = np.float64 @@ -1231,10 +1231,10 @@ def fit(self, X, y, sample_weight=None): self.classes_ = np.unique(y) n_samples, n_features = X.shape - _check_solver_option(self.solver, self.multi_class, self.penalty, + _check_solver_option(_solver, _multi_class, self.penalty, self.dual) - if self.solver == 'liblinear': + if _solver == 'liblinear': if self.n_jobs != 1: warnings.warn("'n_jobs' > 1 does not have any effect when" " 'solver' is set to 'liblinear'. Got 'n_jobs'" @@ -1247,7 +1247,7 @@ def fit(self, X, y, sample_weight=None): self.n_iter_ = np.array([n_iter_]) return self - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None @@ -1276,7 +1276,7 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = np.zeros(n_classes) # Hack so that we iterate only once for the multinomial case. - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': classes_ = [None] warm_start_coef = [warm_start_coef] if warm_start_coef is None: @@ -1286,7 +1286,7 @@ def fit(self, X, y, sample_weight=None): # The SAG solver releases the GIL so it's more efficient to use # threads for this solver. - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: backend = 'threading' else: backend = 'multiprocessing' @@ -1294,8 +1294,8 @@ def fit(self, X, y, sample_weight=None): backend=backend)( path_func(X, y, pos_class=class_, Cs=[self.C], fit_intercept=self.fit_intercept, tol=self.tol, - verbose=self.verbose, solver=self.solver, - multi_class=self.multi_class, max_iter=self.max_iter, + verbose=self.verbose, solver=_solver, + multi_class=_multi_class, max_iter=self.max_iter, class_weight=self.class_weight, check_input=False, random_state=self.random_state, coef=warm_start_coef_, penalty=self.penalty, @@ -1306,7 +1306,7 @@ def fit(self, X, y, sample_weight=None): fold_coefs_, _, n_iter_ = zip(*fold_coefs_) self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0] - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': self.coef_ = fold_coefs_[0][0] else: self.coef_ = np.asarray(fold_coefs_) @@ -1344,7 +1344,11 @@ def predict_proba(self, X): """ if not hasattr(self, "coef_"): raise NotFittedError("Call fit before prediction") - calculate_ovr = self.coef_.shape[0] == 1 or self.multi_class == "ovr" + if self.multi_class == 'auto': + _multi_class = 'ovr' + else: + _multi_class = self.multi_class + calculate_ovr = self.coef_.shape[0] == 1 or _multi_class == "ovr" if calculate_ovr: return super(LogisticRegression, self)._predict_proba_lr(X) else: From 7079567d8bf59e8c2e6dbb3c24c4415d8f3f815d Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Sat, 23 Dec 2017 03:10:14 +0530 Subject: [PATCH 11/27] doctest corrected --- sklearn/linear_model/sag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py index 39b817da1b0e2..4d4d74fc736ff 100644 --- a/sklearn/linear_model/sag.py +++ b/sklearn/linear_model/sag.py @@ -217,7 +217,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0., ... #doctest: +NORMALIZE_WHITESPACE LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, + multi_class='auto', n_jobs=1, penalty='l2', random_state=None, solver='sag', tol=0.0001, verbose=0, warm_start=False) References From c92c6ddfd6fbeff92648359d0d4f4859a17f5020 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Sat, 23 Dec 2017 09:44:25 +0530 Subject: [PATCH 12/27] Empty commit From 23777e912df4569d4e1330f052ef1e9b7a7407ea Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Sat, 23 Dec 2017 10:14:16 +0530 Subject: [PATCH 13/27] doctest updated --- doc/tutorial/statistical_inference/supervised_learning.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index 6f53e9af791c5..9a93a43de52f3 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -372,8 +372,8 @@ function or **logistic** function: >>> logistic.fit(iris_X_train, iris_y_train) LogisticRegression(C=100000.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, - solver='liblinear', tol=0.0001, verbose=0, warm_start=False) + multi_class='auto', n_jobs=1, penalty='l2', random_state=None, + solver='auto', tol=0.0001, verbose=0, warm_start=False) This is known as :class:`LogisticRegression`. From e9708ee9481a3d45561ddb8b3eebe447b3125859 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Tue, 9 Jan 2018 15:46:44 +0530 Subject: [PATCH 14/27] test added --- sklearn/linear_model/tests/test_logistic.py | 23 +++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 46ce635daf830..2a5abd127c943 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -101,6 +101,29 @@ def test_lr_liblinear_warning(): lr.fit, iris.data, target) +def test_logistic_regression_warnings(): + # Test logistic regression with the iris dataset + n_samples, n_features = iris.data.shape + target = iris.target_names[iris.target] + + clf_solver_warning = LogisticRegression(C=len(iris.data), + multi_class='ovr') + clf_multi_class_warning = LogisticRegression(C=len(iris.data), + solver='lbfgs') + clf_no_warnings = LogisticRegression(C=len(iris.data), solver='lbfgs', + multi_class='multinomial') + + solver_warning_msg = "Auto solver will be changed to 'lbfgs' in 0.22" + multi_class_warning_msg = "Default multi_class will be changed " \ + "to 'multinomial' in 0.22" + + assert_warns_message(FutureWarning, solver_warning_msg, + clf_solver_warning.fit, iris.data, target) + assert_warns_message(FutureWarning, multi_class_warning_msg, + clf_multi_class_warning.fit, iris.data, target) + assert_no_warnings(clf_no_warnings.fit, iris.data, target) + + def test_predict_3_classes(): check_predictions(LogisticRegression(C=10), X, Y2) check_predictions(LogisticRegression(C=10), X_sp, Y2) From 7af054af5a14085ca8b0b67ff723326977c67166 Mon Sep 17 00:00:00 2001 From: "kumarashutosh.ee@gmail.com" Date: Tue, 9 Jan 2018 16:18:40 +0530 Subject: [PATCH 15/27] import statement added --- sklearn/linear_model/tests/test_logistic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 2a5abd127c943..7289c944482d8 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -17,6 +17,7 @@ from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns_message +from sklearn.utils.testing import assert_no_warnings from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model.logistic import ( From 73cbb8fae9a7f1534d86167d76bf9e19c3946133 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Sat, 13 Jan 2018 11:14:29 +0530 Subject: [PATCH 16/27] changes added --- sklearn/linear_model/logistic.py | 15 +++++++++------ sklearn/linear_model/tests/test_logistic.py | 7 ++++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index f0c7796c1ad5e..a301310a31e5b 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1161,7 +1161,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='auto', max_iter=100, - multi_class='auto', verbose=0, warm_start=False, n_jobs=1): + multi_class='default', verbose=0, warm_start=False, n_jobs=1): self.penalty = penalty self.dual = dual @@ -1203,14 +1203,17 @@ def fit(self, X, y, sample_weight=None): Returns self. """ if self.solver == 'auto': - _solver = 'liblinear' - warnings.warn("Auto solver will be changed to 'lbfgs' in 0.22", - FutureWarning) + if self.penalty == 'l1': + _solver = 'liblinear' + if self.penalty == 'l2': + _solver = 'lbfgs' + warnings.warn("Default solver will be changed from 'liblinear' to " + " auto solver in 0.22", FutureWarning) else: _solver = self.solver - if self.multi_class == 'auto': + if self.multi_class == 'default': _multi_class = 'ovr' - warnings.warn("Default multi_class will be changed to " + warnings.warn("Default multi_class will be changed from 'ovr' to " "'multinomial' in 0.22", FutureWarning) else: _multi_class = self.multi_class diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 7289c944482d8..25487588c3e31 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -114,9 +114,10 @@ def test_logistic_regression_warnings(): clf_no_warnings = LogisticRegression(C=len(iris.data), solver='lbfgs', multi_class='multinomial') - solver_warning_msg = "Auto solver will be changed to 'lbfgs' in 0.22" - multi_class_warning_msg = "Default multi_class will be changed " \ - "to 'multinomial' in 0.22" + solver_warning_msg = "Default solver will be changed from 'liblinear' " \ + "to auto solver in 0.22" + multi_class_warning_msg = "Default multi_class will be changed from" \ + " 'ovr' to 'multinomial' in 0.22" assert_warns_message(FutureWarning, solver_warning_msg, clf_solver_warning.fit, iris.data, target) From 3e0c6d0063e0f37e9e8fa153871eb1743d7693c7 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Mon, 15 Jan 2018 19:12:00 +0530 Subject: [PATCH 17/27] default restored --- .../statistical_inference/supervised_learning.rst | 4 ++-- sklearn/linear_model/logistic.py | 13 +++++-------- sklearn/linear_model/sag.py | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index 9a93a43de52f3..b4e06b8f37bdc 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -372,8 +372,8 @@ function or **logistic** function: >>> logistic.fit(iris_X_train, iris_y_train) LogisticRegression(C=100000.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='auto', n_jobs=1, penalty='l2', random_state=None, - solver='auto', tol=0.0001, verbose=0, warm_start=False) + multi_class='default', n_jobs=1, penalty='l2', random_state=None, + solver='default', tol=0.0001, verbose=0, warm_start=False) This is known as :class:`LogisticRegression`. diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index a301310a31e5b..73ad1ffa370ec 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1160,7 +1160,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, - random_state=None, solver='auto', max_iter=100, + random_state=None, solver='default', max_iter=100, multi_class='default', verbose=0, warm_start=False, n_jobs=1): self.penalty = penalty @@ -1202,13 +1202,10 @@ def fit(self, X, y, sample_weight=None): self : object Returns self. """ - if self.solver == 'auto': - if self.penalty == 'l1': - _solver = 'liblinear' - if self.penalty == 'l2': - _solver = 'lbfgs' + if self.solver == 'default': + _solver = 'liblinear' warnings.warn("Default solver will be changed from 'liblinear' to " - " auto solver in 0.22", FutureWarning) + "auto solver in 0.22", FutureWarning) else: _solver = self.solver if self.multi_class == 'default': @@ -1351,7 +1348,7 @@ def predict_proba(self, X): """ if not hasattr(self, "coef_"): raise NotFittedError("Call fit before prediction") - if self.multi_class == 'auto': + if self.multi_class == 'default': _multi_class = 'ovr' else: _multi_class = self.multi_class diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py index 4d4d74fc736ff..2113907b810c7 100644 --- a/sklearn/linear_model/sag.py +++ b/sklearn/linear_model/sag.py @@ -217,7 +217,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0., ... #doctest: +NORMALIZE_WHITESPACE LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='auto', n_jobs=1, penalty='l2', random_state=None, + multi_class='default', n_jobs=1, penalty='l2', random_state=None, solver='sag', tol=0.0001, verbose=0, warm_start=False) References From d6628f4f95052836294f8e9154821f699cdbf203 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Tue, 16 Jan 2018 23:47:15 +0530 Subject: [PATCH 18/27] auto added and parameter documentation changed --- sklearn/linear_model/logistic.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 488997e63a058..2e0371ba9d581 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1043,7 +1043,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, 'liblinear'. solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, - default: 'liblinear' + default: 'default'. Will be changed to 'auto' solver in 0.22. Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and @@ -1058,16 +1058,22 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. + The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' is + penalty is 'l1'. + .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver. + .. version:: 0.20 + auto solver max_iter : int, default: 100 Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge. - multi_class : str, {'ovr', 'multinomial'}, default: 'ovr' + multi_class : str, {'ovr', 'multinomial'}, + default: 'default'. Will be changed to 'multinomial' in 0.22. Multiclass option can be either 'ovr' or 'multinomial'. If the option chosen is 'ovr', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across @@ -1205,6 +1211,11 @@ def fit(self, X, y, sample_weight=None): _solver = 'liblinear' warnings.warn("Default solver will be changed from 'liblinear' to " "auto solver in 0.22", FutureWarning) + elif self.solver == 'auto': + if self.penalty == 'l1': + _solver = 'saga' + if self.penalty == 'l2': + _solver = 'lbfgs' else: _solver = self.solver if self.multi_class == 'default': From 100decdd9cdcf25ad373cca7724a17bd2f8f8dc4 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Wed, 17 Jan 2018 00:07:15 +0530 Subject: [PATCH 19/27] C removed --- sklearn/linear_model/tests/test_logistic.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 25487588c3e31..47ea4d3f224ca 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -107,11 +107,9 @@ def test_logistic_regression_warnings(): n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] - clf_solver_warning = LogisticRegression(C=len(iris.data), - multi_class='ovr') - clf_multi_class_warning = LogisticRegression(C=len(iris.data), - solver='lbfgs') - clf_no_warnings = LogisticRegression(C=len(iris.data), solver='lbfgs', + clf_solver_warning = LogisticRegression(multi_class='ovr') + clf_multi_class_warning = LogisticRegression(solver='lbfgs') + clf_no_warnings = LogisticRegression(solver='lbfgs', multi_class='multinomial') solver_warning_msg = "Default solver will be changed from 'liblinear' " \ From f4d402e6990bb90c65edb304d5098b0fc3009633 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Fri, 19 Jan 2018 16:58:19 +0530 Subject: [PATCH 20/27] test and doc added --- sklearn/linear_model/logistic.py | 5 ++++- sklearn/linear_model/tests/test_logistic.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 2e0371ba9d581..3ec7d38b00d60 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1059,7 +1059,10 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, preprocess the data with a scaler from sklearn.preprocessing. The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' is - penalty is 'l1'. + penalty is 'l1'. Selecting 'l1' on `saga` may suffer performance + issues. Use solver='liblinear', multiclass='ovr' for better + performance at the cost of `intercept_scaling`. + performance .. versionadded:: 0.17 Stochastic Average Gradient descent solver. diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 47ea4d3f224ca..a529ce3345a5e 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -124,6 +124,17 @@ def test_logistic_regression_warnings(): assert_no_warnings(clf_no_warnings.fit, iris.data, target) +def test_logistic_regression_auto(): + # Test logistic regression with auto mode + n_samples, n_features = iris.data.shape + target = iris.target_names[iris.target] + clf_solver_l1 = LogisticRegression(penalty='l1', solver='auto') + clf_solver_l2 = LogisticRegression(penalty='l2', solver='auto') + + clf_solver_l1.fit(iris.data, target) + clf_solver_l2.fir(iris.data, target) + + def test_predict_3_classes(): check_predictions(LogisticRegression(C=10), X, Y2) check_predictions(LogisticRegression(C=10), X_sp, Y2) From fee3ed66af42929222a35517bf377735f377045c Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Fri, 19 Jan 2018 18:43:03 +0530 Subject: [PATCH 21/27] typo corrected --- sklearn/linear_model/tests/test_logistic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index a529ce3345a5e..934eb1826d9ac 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -132,7 +132,7 @@ def test_logistic_regression_auto(): clf_solver_l2 = LogisticRegression(penalty='l2', solver='auto') clf_solver_l1.fit(iris.data, target) - clf_solver_l2.fir(iris.data, target) + clf_solver_l2.fit(iris.data, target) def test_predict_3_classes(): From 4cc2bada6f8ba2d4ce260cdfbe38680895f8aee9 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Tue, 23 Jan 2018 02:22:07 +0530 Subject: [PATCH 22/27] doc updated --- sklearn/linear_model/logistic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 3ec7d38b00d60..ecb9e983a07d4 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1058,11 +1058,11 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. - The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' is - penalty is 'l1'. Selecting 'l1' on `saga` may suffer performance - issues. Use solver='liblinear', multiclass='ovr' for better - performance at the cost of `intercept_scaling`. - performance + The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' if + penalty is 'l1'. Note that 'saga' may suffer from slow convergence + issues on small datasets. The only other solver supporting 'l1' is + 'liblinear', which requires multiclass='ovr' and which unfortunately + regularizes the intercept (see 'intercept_scaling'). .. versionadded:: 0.17 Stochastic Average Gradient descent solver. From f99bda7b99e458a4edf72196d76888d3fe7b57a0 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Tue, 23 Jan 2018 21:00:57 +0530 Subject: [PATCH 23/27] default changed in LogisticRegressionCV --- sklearn/linear_model/logistic.py | 47 ++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index ecb9e983a07d4..38fb440ca51b2 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1595,9 +1595,9 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, """ def __init__(self, Cs=10, fit_intercept=True, cv=None, dual=False, - penalty='l2', scoring=None, solver='lbfgs', tol=1e-4, + penalty='l2', scoring=None, solver='default', tol=1e-4, max_iter=100, class_weight=None, n_jobs=1, verbose=0, - refit=True, intercept_scaling=1., multi_class='ovr', + refit=True, intercept_scaling=1., multi_class='default', random_state=None): self.Cs = Cs self.fit_intercept = fit_intercept @@ -1636,7 +1636,24 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ - _check_solver_option(self.solver, self.multi_class, self.penalty, + if self.solver == 'default': + _solver = 'liblinear' + warnings.warn("Default solver will be changed from 'liblinear' " + "to 'auto' solver in 0.22", FutureWarning) + elif self.solver == 'auto': + if self.penalty == 'l1': + _solver = 'saga' + if self.penalty == 'l2': + _solver = 'lbfgs' + else: + _solver = self.solver + if self.multi_class == 'default': + _multi_class = 'ovr' + warnings.warn("Default multi_class will be changed from 'ovr' to" + " 'multinomial' in 0.22", FutureWarning) + else: + _multi_class = self.multi_class + _check_solver_option(_solver, _multi_class, self.penalty, self.dual) if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0: @@ -1663,7 +1680,7 @@ def fit(self, X, y, sample_weight=None): classes = self.classes_ = label_encoder.classes_ encoded_labels = label_encoder.transform(label_encoder.classes_) - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: max_squared_sum = row_norms(X, squared=True).max() else: max_squared_sum = None @@ -1689,7 +1706,7 @@ def fit(self, X, y, sample_weight=None): # We need this hack to iterate only once over labels, in the case of # multi_class = multinomial, without changing the value of the labels. - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': iter_encoded_labels = iter_classes = [None] else: iter_encoded_labels = encoded_labels @@ -1706,7 +1723,7 @@ def fit(self, X, y, sample_weight=None): # The SAG solver releases the GIL so it's more efficient to use # threads for this solver. - if self.solver in ['sag', 'saga']: + if _solver in ['sag', 'saga']: backend = 'threading' else: backend = 'multiprocessing' @@ -1714,10 +1731,10 @@ def fit(self, X, y, sample_weight=None): backend=backend)( path_func(X, y, train, test, pos_class=label, Cs=self.Cs, fit_intercept=self.fit_intercept, penalty=self.penalty, - dual=self.dual, solver=self.solver, tol=self.tol, + dual=self.dual, solver=_solver, tol=self.tol, max_iter=self.max_iter, verbose=self.verbose, class_weight=class_weight, scoring=self.scoring, - multi_class=self.multi_class, + multi_class=_multi_class, intercept_scaling=self.intercept_scaling, random_state=self.random_state, max_squared_sum=max_squared_sum, @@ -1726,7 +1743,7 @@ def fit(self, X, y, sample_weight=None): for label in iter_encoded_labels for train, test in folds) - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': multi_coefs_paths, Cs, multi_scores, n_iter_ = zip(*fold_coefs_) multi_coefs_paths = np.asarray(multi_coefs_paths) multi_scores = np.asarray(multi_scores) @@ -1763,14 +1780,14 @@ def fit(self, X, y, sample_weight=None): self.intercept_ = np.zeros(n_classes) # hack to iterate only once for multinomial case. - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': scores = multi_scores coefs_paths = multi_coefs_paths for index, (cls, encoded_label) in enumerate( zip(iter_classes, iter_encoded_labels)): - if self.multi_class == 'ovr': + if _multi_class == 'ovr': # The scores_ / coefs_paths_ dict have unencoded class # labels as their keys scores = self.scores_[cls] @@ -1781,7 +1798,7 @@ def fit(self, X, y, sample_weight=None): C_ = self.Cs_[best_index] self.C_.append(C_) - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': coef_init = np.mean(coefs_paths[:, best_index, :, :], axis=0) else: @@ -1790,12 +1807,12 @@ def fit(self, X, y, sample_weight=None): # Note that y is label encoded and hence pos_class must be # the encoded label / None (for 'multinomial') w, _, _ = logistic_regression_path( - X, y, pos_class=encoded_label, Cs=[C_], solver=self.solver, + X, y, pos_class=encoded_label, Cs=[C_], solver=_solver, fit_intercept=self.fit_intercept, coef=coef_init, max_iter=self.max_iter, tol=self.tol, penalty=self.penalty, class_weight=class_weight, - multi_class=self.multi_class, + multi_class=_multi_class, verbose=max(0, self.verbose - 1), random_state=self.random_state, check_input=False, max_squared_sum=max_squared_sum, @@ -1810,7 +1827,7 @@ def fit(self, X, y, sample_weight=None): for i in range(len(folds))], axis=0) self.C_.append(np.mean(self.Cs_[best_indices])) - if self.multi_class == 'multinomial': + if _multi_class == 'multinomial': self.C_ = np.tile(self.C_, n_classes) self.coef_ = w[:, :X.shape[1]] if self.fit_intercept: From 6f411361783a16dabab56f00ca195abaa3d654fe Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Tue, 23 Jan 2018 21:21:42 +0530 Subject: [PATCH 24/27] default behaviour corrected --- sklearn/linear_model/logistic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 38fb440ca51b2..fb03812bac988 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1637,8 +1637,8 @@ def fit(self, X, y, sample_weight=None): self : object """ if self.solver == 'default': - _solver = 'liblinear' - warnings.warn("Default solver will be changed from 'liblinear' " + _solver = 'lbfgs' + warnings.warn("Default solver will be changed from 'lbfgs' " "to 'auto' solver in 0.22", FutureWarning) elif self.solver == 'auto': if self.penalty == 'l1': From d942b0642af33a662a7a1427d8d1676fe4d93aaf Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Tue, 23 Jan 2018 22:29:35 +0530 Subject: [PATCH 25/27] changes added --- sklearn/linear_model/logistic.py | 34 +++++++++++++++++--- sklearn/linear_model/tests/test_logistic.py | 35 ++++++++++++++++++++- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index fb03812bac988..80f9c265967bb 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -449,9 +449,9 @@ def _check_solver_option(solver, multi_class, penalty, dual): def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, max_iter=100, tol=1e-4, verbose=0, - solver='lbfgs', coef=None, + solver='default', coef=None, class_weight=None, dual=False, penalty='l2', - intercept_scaling=1., multi_class='ovr', + intercept_scaling=1., multi_class='default', random_state=None, check_input=True, max_squared_sum=None, sample_weight=None): """Compute a Logistic Regression model for a list of regularization @@ -500,6 +500,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, number for verbosity. solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'} + default: 'default'. Will be changed to 'auto' solver in 0.22. Numerical solver to use. coef : array-like, shape (n_features,), default None @@ -540,6 +541,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, (and therefore on the intercept) intercept_scaling has to be increased. multi_class : str, {'ovr', 'multinomial'} + default: 'default'. Will be changed to 'multinomial' in 0.22. Multiclass option can be either 'ovr' or 'multinomial'. If the option chosen is 'ovr', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across @@ -587,6 +589,19 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, .. versionchanged:: 0.19 The "copy" parameter was removed. """ + if solver == 'default': + solver = 'lbfgs' + warnings.warn("Default solver will be changed from 'lbfgs' " + "to 'auto' solver in 0.22", FutureWarning) + elif solver == 'auto': + if penalty == 'l1': + solver = 'saga' + if penalty == 'l2': + solver = 'lbfgs' + if multi_class == 'default': + multi_class = 'ovr' + warnings.warn("Default multi_class will be changed from 'ovr' to" + " 'multinomial' in 0.22", FutureWarning) if isinstance(Cs, numbers.Integral): Cs = np.logspace(-4, 4, Cs) @@ -1068,7 +1083,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, Stochastic Average Gradient descent solver. .. versionadded:: 0.19 SAGA solver. - .. version:: 0.20 + .. versionadded:: 0.20 auto solver max_iter : int, default: 100 @@ -1213,7 +1228,7 @@ def fit(self, X, y, sample_weight=None): if self.solver == 'default': _solver = 'liblinear' warnings.warn("Default solver will be changed from 'liblinear' to " - "auto solver in 0.22", FutureWarning) + "'auto' solver in 0.22", FutureWarning) elif self.solver == 'auto': if self.penalty == 'l1': _solver = 'saga' @@ -1455,7 +1470,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, default scoring option used is 'accuracy'. solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, - default: 'lbfgs' + default: 'default'. Will be changed to 'auto' solver in 0.22. Algorithm to use in the optimization problem. - For small datasets, 'liblinear' is a good choice, whereas 'sag' and @@ -1472,6 +1487,14 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. + The solver 'auto' selects 'lbfgs' if penalty is 'l2' and 'saga' if + penalty is 'l1'. Note that 'saga' may suffer from slow convergence + issues on small datasets. The only other solver supporting 'l1' is + 'liblinear', which requires multiclass='ovr' and which unfortunately + regularizes the intercept (see 'intercept_scaling'). + + .. versionadded:: 0.20 + auto solver .. versionadded:: 0.17 Stochastic Average Gradient descent solver. .. versionadded:: 0.19 @@ -1526,6 +1549,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator, (and therefore on the intercept) intercept_scaling has to be increased. multi_class : str, {'ovr', 'multinomial'} + default: 'default'. Will be changed to 'multinomial' in 0.22 Multiclass option can be either 'ovr' or 'multinomial'. If the option chosen is 'ovr', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 934eb1826d9ac..b4d5b2d8e806f 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -113,7 +113,7 @@ def test_logistic_regression_warnings(): multi_class='multinomial') solver_warning_msg = "Default solver will be changed from 'liblinear' " \ - "to auto solver in 0.22" + "to 'auto' solver in 0.22" multi_class_warning_msg = "Default multi_class will be changed from" \ " 'ovr' to 'multinomial' in 0.22" @@ -210,6 +210,39 @@ def test_check_solver_option(): assert_raise_message(ValueError, msg, lr.fit, X, y) +def test_logistic_regression_cv_warnings(): + # Test logistic regression with the iris dataset + n_samples, n_features = iris.data.shape + target = iris.target_names[iris.target] + + clf_solver_warning = LogisticRegressionCV(multi_class='ovr') + clf_multi_class_warning = LogisticRegressionCV(solver='lbfgs') + clf_no_warnings = LogisticRegressionCV(solver='lbfgs', + multi_class='multinomial') + + solver_warning_msg = "Default solver will be changed from 'lbfgs' " \ + "to 'auto' solver in 0.22" + multi_class_warning_msg = "Default multi_class will be changed from" \ + " 'ovr' to 'multinomial' in 0.22" + + assert_warns_message(FutureWarning, solver_warning_msg, + clf_solver_warning.fit, iris.data, target) + assert_warns_message(FutureWarning, multi_class_warning_msg, + clf_multi_class_warning.fit, iris.data, target) + assert_no_warnings(clf_no_warnings.fit, iris.data, target) + + +def test_logistic_regression_cv_auto(): + # Test logistic regression with auto mode + n_samples, n_features = iris.data.shape + target = iris.target_names[iris.target] + clf_solver_l1 = LogisticRegressionCV(penalty='l1', solver='auto') + clf_solver_l2 = LogisticRegressionCV(penalty='l2', solver='auto') + + clf_solver_l1.fit(iris.data, target) + clf_solver_l2.fit(iris.data, target) + + def test_multinomial_binary(): # Test multinomial LR on a binary problem. target = (iris.target > 0).astype(np.intp) From e865efe944dbd66b6b2a2174c208136675096768 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Tue, 23 Jan 2018 23:44:58 +0530 Subject: [PATCH 26/27] indentation corrected --- sklearn/linear_model/tests/test_logistic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index b4d5b2d8e806f..e245c2a0bdbb8 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -218,7 +218,7 @@ def test_logistic_regression_cv_warnings(): clf_solver_warning = LogisticRegressionCV(multi_class='ovr') clf_multi_class_warning = LogisticRegressionCV(solver='lbfgs') clf_no_warnings = LogisticRegressionCV(solver='lbfgs', - multi_class='multinomial') + multi_class='multinomial') solver_warning_msg = "Default solver will be changed from 'lbfgs' " \ "to 'auto' solver in 0.22" From ec8e6b43cf49b44feaacab924561c73bca1da295 Mon Sep 17 00:00:00 2001 From: Kumar Ashutosh Date: Fri, 26 Jan 2018 00:51:12 +0530 Subject: [PATCH 27/27] redundant test removed with parametrize --- sklearn/linear_model/tests/test_logistic.py | 58 ++++++--------------- 1 file changed, 15 insertions(+), 43 deletions(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index e245c2a0bdbb8..8c32780b8e498 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -1,5 +1,6 @@ import numpy as np import scipy.sparse as sp +import pytest from scipy import linalg, optimize, sparse from sklearn.datasets import load_iris, make_classification from sklearn.metrics import log_loss @@ -102,18 +103,21 @@ def test_lr_liblinear_warning(): lr.fit, iris.data, target) -def test_logistic_regression_warnings(): +@pytest.mark.parametrize('model, default_solver', + [(LogisticRegression, 'liblinear'), + (LogisticRegressionCV, 'lbfgs')]) +def test_logistic_regression_warnings(model, default_solver): # Test logistic regression with the iris dataset n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] - clf_solver_warning = LogisticRegression(multi_class='ovr') - clf_multi_class_warning = LogisticRegression(solver='lbfgs') - clf_no_warnings = LogisticRegression(solver='lbfgs', - multi_class='multinomial') + clf_solver_warning = model(multi_class='ovr') + clf_multi_class_warning = model(solver='lbfgs') + clf_no_warnings = model(solver='lbfgs', + multi_class='multinomial') - solver_warning_msg = "Default solver will be changed from 'liblinear' " \ - "to 'auto' solver in 0.22" + solver_warning_msg = "Default solver will be changed from '{}' " \ + "to 'auto' solver in 0.22".format(default_solver) multi_class_warning_msg = "Default multi_class will be changed from" \ " 'ovr' to 'multinomial' in 0.22" @@ -124,12 +128,13 @@ def test_logistic_regression_warnings(): assert_no_warnings(clf_no_warnings.fit, iris.data, target) -def test_logistic_regression_auto(): +@pytest.mark.parametrize('model', [LogisticRegression, LogisticRegressionCV]) +def test_logistic_regression_auto(model): # Test logistic regression with auto mode n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] - clf_solver_l1 = LogisticRegression(penalty='l1', solver='auto') - clf_solver_l2 = LogisticRegression(penalty='l2', solver='auto') + clf_solver_l1 = model(penalty='l1', solver='auto') + clf_solver_l2 = model(penalty='l2', solver='auto') clf_solver_l1.fit(iris.data, target) clf_solver_l2.fit(iris.data, target) @@ -210,39 +215,6 @@ def test_check_solver_option(): assert_raise_message(ValueError, msg, lr.fit, X, y) -def test_logistic_regression_cv_warnings(): - # Test logistic regression with the iris dataset - n_samples, n_features = iris.data.shape - target = iris.target_names[iris.target] - - clf_solver_warning = LogisticRegressionCV(multi_class='ovr') - clf_multi_class_warning = LogisticRegressionCV(solver='lbfgs') - clf_no_warnings = LogisticRegressionCV(solver='lbfgs', - multi_class='multinomial') - - solver_warning_msg = "Default solver will be changed from 'lbfgs' " \ - "to 'auto' solver in 0.22" - multi_class_warning_msg = "Default multi_class will be changed from" \ - " 'ovr' to 'multinomial' in 0.22" - - assert_warns_message(FutureWarning, solver_warning_msg, - clf_solver_warning.fit, iris.data, target) - assert_warns_message(FutureWarning, multi_class_warning_msg, - clf_multi_class_warning.fit, iris.data, target) - assert_no_warnings(clf_no_warnings.fit, iris.data, target) - - -def test_logistic_regression_cv_auto(): - # Test logistic regression with auto mode - n_samples, n_features = iris.data.shape - target = iris.target_names[iris.target] - clf_solver_l1 = LogisticRegressionCV(penalty='l1', solver='auto') - clf_solver_l2 = LogisticRegressionCV(penalty='l2', solver='auto') - - clf_solver_l1.fit(iris.data, target) - clf_solver_l2.fit(iris.data, target) - - def test_multinomial_binary(): # Test multinomial LR on a binary problem. target = (iris.target > 0).astype(np.intp)