From 8f708dafc781c3731beb9e331a0843e342241556 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Fri, 6 Jul 2018 15:35:53 +0200
Subject: [PATCH 01/17] Change default solver in LogisticRegression

---
 benchmarks/bench_logistic_solvers.py          | 269 ++++++++++++++++++
 doc/modules/linear_model.rst                  |  29 +-
 .../supervised_learning.rst                   |   4 +-
 sklearn/linear_model/logistic.py              | 207 +++++++++-----
 sklearn/linear_model/sag.py                   |   2 +-
 sklearn/linear_model/tests/test_logistic.py   |  36 ++-
 6 files changed, 465 insertions(+), 82 deletions(-)
 create mode 100644 benchmarks/bench_logistic_solvers.py

diff --git a/benchmarks/bench_logistic_solvers.py b/benchmarks/bench_logistic_solvers.py
new file mode 100644
index 0000000000000..bd35032439e9e
--- /dev/null
+++ b/benchmarks/bench_logistic_solvers.py
@@ -0,0 +1,269 @@
+"""
+Benchmarks of sklearn solver in LogisticRegression.
+"""
+
+# Author: Tom Dupre la Tour
+import time
+from os.path import expanduser
+
+import matplotlib.pyplot as plt
+import scipy.sparse as sp  # noqa
+import numpy as np
+import pandas as pd
+
+from sklearn.datasets import fetch_mldata
+from sklearn.datasets import fetch_rcv1, load_iris, load_digits
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.externals.joblib import delayed, Parallel, Memory
+from sklearn.linear_model import LogisticRegression
+from sklearn.linear_model.logistic import _multinomial_loss
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelBinarizer
+from sklearn.preprocessing import MinMaxScaler  # noqa
+from sklearn.utils.testing import ignore_warnings
+from sklearn.utils import shuffle
+
+
+def get_loss(coefs, intercepts, X, y, C, multi_class, penalty):
+    if multi_class == 'ovr':
+        if np.array(intercepts).ndim == 0 and intercepts == 0:
+            intercepts = np.zeros(coefs.shape[0])
+        loss = 0
+        for ii, (coef, intercept) in enumerate(zip(coefs, intercepts)):
+            y_bin = y.copy()
+            y_bin[y == ii] = 1
+            y_bin[y != ii] = -1
+            loss += np.sum(
+                np.log(1. + np.exp(-y_bin * (X.dot(coef) + intercept))))
+
+            if penalty == 'l2':
+                loss += 0.5 * coef.dot(coef) / C
+            else:
+                loss += np.sum(np.abs(coef)) / C
+    else:
+        coefs_and_intercept = np.vstack((coefs.T, intercepts.T)).T.ravel()
+        lbin = LabelBinarizer()
+        Y_multi = lbin.fit_transform(y)
+        if Y_multi.shape[1] == 1:
+            Y_multi = np.hstack([1 - Y_multi, Y_multi])
+        loss, _, _ = _multinomial_loss(coefs_and_intercept, X, Y_multi, 0,
+                                       np.ones(X.shape[0]))
+        coefs = coefs.ravel()
+        if penalty == 'l2':
+            loss += 0.5 * coefs.dot(coefs) / C
+        else:
+            loss += np.sum(np.abs(coefs)) / C
+
+    loss /= X.shape[0]
+
+    return loss
+
+
+def fit_single(solver, X, y, X_shape, dataset, penalty='l2',
+               multi_class='multinomial', C=1, max_iter=10):
+    assert X.shape == X_shape
+
+    # if not sp.issparse(X):
+    #     X = MinMaxScaler().fit_transform(X)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=42, stratify=y)
+    train_scores, train_losses, test_scores, times = [], [], [], []
+
+    if solver == 'newton-cg':
+        max_iter /= 2
+
+    n_repeats = None
+    max_iter_range = np.unique(np.int_(np.logspace(0, np.log10(max_iter), 10)))
+    for this_max_iter in max_iter_range:
+        msg = ('[%s, %s, %s, %s] Max iter: %s' % (dataset, multi_class, solver,
+                                                  penalty, this_max_iter))
+        lr = LogisticRegression(solver=solver, multi_class=multi_class, C=C,
+                                penalty=penalty, fit_intercept=False,
+                                tol=1e-24, max_iter=this_max_iter,
+                                random_state=42, intercept_scaling=10000)
+        t0 = time.clock()
+        try:
+            if penalty == 'l1' and multi_class == 'multinomial':
+                raise ValueError('skip as only saga is available.')
+
+            with ignore_warnings(category=ConvergenceWarning):
+                # first time for timing
+                if n_repeats is None:
+                    t0 = time.clock()
+                    lr.fit(X_train, y_train)
+                    max_iter_duration = max_iter * (time.clock() - t0)
+                    n_repeats = max(1, int(1. / max_iter_duration))
+
+                t0 = time.clock()
+                for _ in range(n_repeats):
+                    lr.fit(X_train, y_train)
+                train_time = (time.clock() - t0) / n_repeats
+                print('%s (repeat=%d)' % (msg, n_repeats))
+
+        except ValueError:
+            train_score = np.nan
+            train_loss = np.nan
+            test_score = np.nan
+            train_time = np.nan
+            print('%s (skipped)' % (msg, ))
+            continue
+
+        train_loss = get_loss(lr.coef_, lr.intercept_, X_train, y_train, C,
+                              multi_class, penalty)
+        train_score = lr.score(X_train, y_train)
+        test_score = lr.score(X_test, y_test)
+
+        train_scores.append(train_score)
+        train_losses.append(train_loss)
+        test_scores.append(test_score)
+        times.append(train_time)
+
+    return (solver, penalty, dataset, multi_class, times, train_losses,
+            train_scores, test_scores)
+
+
+def load_dataset(dataset, n_samples_max):
+    if dataset == 'rcv1':
+        rcv1 = fetch_rcv1()
+        X = rcv1.data
+        y = rcv1.target
+
+        # take only 3 categories (CCAT, ECAT, MCAT)
+        y = y[:, [1, 4, 10]].astype(np.float64)
+        # remove samples that have more than one category
+        mask = np.asarray(y.sum(axis=1) == 1).ravel()
+        y = y[mask, :].indices
+        X = X[mask, :]
+
+    elif dataset == 'mnist':
+        mnist = fetch_mldata('MNIST original')
+        X, y = shuffle(mnist.data, mnist.target, random_state=42)
+        X = X.astype(np.float64)
+
+    elif dataset == 'digits':
+        digits = load_digits()
+        X, y = digits.data, digits.target
+
+    elif dataset == 'iris':
+        iris = load_iris()
+        X, y = iris.data, iris.target
+
+    elif dataset == '20news':
+        ng = fetch_20newsgroups_vectorized()
+        X = ng.data
+        y = ng.target
+
+    X = X[:n_samples_max]
+    y = y[:n_samples_max]
+
+    return X, y
+
+
+def run(solvers, penalties, multi_classes, n_samples_max, max_iter, datasets,
+        n_jobs):
+    mem = Memory(cachedir=expanduser('~/cache'), verbose=0)
+
+    results = []
+    for dataset in datasets:
+        for multi_class in multi_classes:
+            X, y = load_dataset(dataset, n_samples_max)
+
+            cached_fit = mem.cache(fit_single, ignore=['X'])
+            cached_fit = fit_single
+
+            out = Parallel(n_jobs=n_jobs, mmap_mode=None)(delayed(cached_fit)(
+                solver, X, y, X.shape, dataset=dataset, penalty=penalty,
+                multi_class=multi_class, C=1, max_iter=max_iter)
+                for solver in solvers
+                for penalty in penalties)  # yapf: disable
+
+            results.extend(out)
+
+            columns = ("solver penalty dataset multi_class times "
+                       "train_losses train_scores test_scores").split()
+            results_df = pd.DataFrame(out, columns=columns)
+            plot(results_df)
+
+
+def plot(res):
+    res.set_index(['dataset', 'multi_class', 'penalty'], inplace=True)
+
+    grouped = res.groupby(level=['dataset', 'multi_class', 'penalty'])
+
+    colors = {
+        'sag': 'red',
+        'saga': 'orange',
+        'liblinear': 'blue',
+        'lbfgs': 'green',
+        'newton-cg': 'darkviolet',
+        'auto': 'black',
+    }
+
+    for idx, group in grouped:
+        dataset, multi_class, penalty = idx
+        fig = plt.figure(figsize=(12, 4))
+
+        # -----------------------
+        ax = fig.add_subplot(131)
+        train_losses = group['train_losses']
+        tmp = np.sort(np.concatenate(train_losses.values))
+        if tmp.size == 0:
+            plt.close(fig)
+            continue
+        ref = 2 * tmp[0] - tmp[1]
+
+        for losses, times, solver in zip(group['train_losses'], group['times'],
+                                         group['solver']):
+            losses = losses - ref
+            linestyle = ':' if solver == 'auto' else '-'
+            ax.plot(times, losses, label=solver, color=colors[solver],
+                    linestyle=linestyle, marker='.')
+        ax.set_xlabel('Time (s)')
+        ax.set_ylabel('Training objective (relative to min)')
+        ax.set_yscale('log')
+
+        # -----------------------
+        ax = fig.add_subplot(132)
+
+        for train_score, times, solver in zip(group['train_scores'],
+                                              group['times'], group['solver']):
+            linestyle = ':' if solver == 'auto' else '-'
+            ax.plot(times, train_score, label=solver, color=colors[solver],
+                    linestyle=linestyle, marker='.')
+        ax.set_xlabel('Time (s)')
+        ax.set_ylabel('Train score')
+
+        # -----------------------
+        ax = fig.add_subplot(133)
+
+        for test_score, times, solver in zip(group['test_scores'],
+                                             group['times'], group['solver']):
+            linestyle = ':' if solver == 'auto' else '-'
+            ax.plot(times, test_score, label=solver, color=colors[solver],
+                    linestyle=linestyle, marker='.')
+        ax.set_xlabel('Time (s)')
+        ax.set_ylabel('Test score')
+        ax.legend()
+
+        # -----------------------
+        name = '%s_%s_%s' % (multi_class, penalty, dataset)
+        plt.suptitle(name)
+        fig.tight_layout()
+        fig.subplots_adjust(top=0.9)
+        plt.savefig('figures/' + name + '.png')
+        plt.close(fig)
+        print('SAVED: ' + name)
+
+
+if __name__ == '__main__':
+    n_jobs = 3
+    max_iter = 50
+    solvers = ['liblinear', 'saga', 'sag', 'lbfgs', 'newton-cg', 'auto']
+    penalties = ['l2', 'l1']
+    multi_classes = ['multinomial', 'ovr']
+    datasets = ['iris', 'digits', '20news', 'rcv1', 'mnist']
+
+    run(solvers, penalties, multi_classes, n_samples_max=None, n_jobs=n_jobs,
+        datasets=datasets, max_iter=max_iter)
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index bb4a9e4e57f36..e84d72837bf05 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -773,19 +773,30 @@ The "saga" solver [7]_ is a variant of "sag" that also supports the
 non-smooth `penalty="l1"` option. This is therefore the solver of choice
 for sparse multinomial logistic regression.
 
-In a nutshell, one may choose the solver with the following rules:
-
-=================================  =====================================
-Case                               Solver
-=================================  =====================================
-L1 penalty                         "liblinear" or "saga"
-Multinomial loss                   "lbfgs", "sag", "saga" or "newton-cg"
-Very Large dataset (`n_samples`)   "sag" or "saga"
-=================================  =====================================
+In a nutshell, the following table summarizes the solvers characteristics:
+
+============================   ===========  =======  ===========  =====  ======
+solver                         'liblinear'  'lbfgs'  'newton-cg'  'sag'  'saga'
+============================   ===========  =======  ===========  =====  ======
+Multinomial + L2 penalty       no           yes      yes          yes    yes
+OVR + L2 penalty               yes          yes      yes          yes    yes
+Multinomial + L1 penalty       no           no       no           no     yes
+OVR + L1 penalty               yes          no       no           no     yes
+============================   ===========  =======  ===========  =====  ======
+Penalize the intercept (bad)   yes          no       no           no     no
+Faster for large datasets      no           no       no           yes    yes
+Robust to unscaled datasets    yes          yes      yes          no     no
+============================   ===========  =======  ===========  =====  ======
 
 The "saga" solver is often the best choice. The "liblinear" solver is
 used by default for historical reasons.
 
+The default solver will change to "auto" in version 0.22. This option
+automatically selects a good solver based on both `penalty` and `multi_class`
+parameters, and on the size of the training set. Note that the "auto" behavior
+may change without notice in the future, leading to similar but not necessarily
+exact same solutions.
+
 For large dataset, you may also consider using :class:`SGDClassifier`
 with 'log' loss.
 
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 49e69d9ec80d4..4a9003685d28b 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -372,8 +372,8 @@ function or **logistic** function:
     >>> logistic.fit(iris_X_train, iris_y_train)
     LogisticRegression(C=100000.0, class_weight=None, dual=False,
               fit_intercept=True, intercept_scaling=1, max_iter=100,
-              multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
-              solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
+              multi_class='default', n_jobs=1, penalty='l2', random_state=None,
+              solver='default', tol=0.0001, verbose=0, warm_start=False)
 
 This is known as :class:`LogisticRegression`.
 
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index e4ea696ce7146..b82dab9a46598 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -28,7 +28,7 @@
 from ..utils.extmath import row_norms
 from ..utils.fixes import logsumexp
 from ..utils.optimize import newton_cg
-from ..utils.validation import check_X_y
+from ..utils.validation import check_X_y, _num_samples
 from ..exceptions import (NotFittedError, ConvergenceWarning,
                           ChangedBehaviorWarning)
 from ..utils.multiclass import check_classification_targets
@@ -424,35 +424,82 @@ def hessp(v):
     return grad, hessp
 
 
-def _check_solver_option(solver, multi_class, penalty, dual):
-    if solver not in ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']:
-        raise ValueError("Logistic Regression supports only liblinear, "
-                         "newton-cg, lbfgs, sag and saga solvers, got %s"
-                         % solver)
+def _check_solver_option(solver, multi_class, penalty, dual, fit_intercept,
+                         n_samples, previous_default_solver='liblinear'):
 
+    # default values raises a future warning
+    if solver == 'default':
+        # solver will eventually change back to previous_default_solver, but we
+        # warn only if the 'auto' solver would have selected a different solver
+        solver = 'auto'
+        warn_solver = True
+    else:
+        warn_solver = False
+
+    if multi_class == 'default':
+        multi_class = 'ovr'
+        warnings.warn("Default multi_class will be changed to 'multinomial' in"
+                      " 0.22. Use a specific option to silence this warning.",
+                      FutureWarning)
+
+    # multi_class checks
     if multi_class not in ['multinomial', 'ovr']:
         raise ValueError("multi_class should be either multinomial or "
-                         "ovr, got %s" % multi_class)
+                         "ovr, got %s." % multi_class)
+
+    # solver checks
+    all_solvers = ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga', 'auto']
+    if solver not in all_solvers:
+        raise ValueError("Logistic Regression supports only solvers in %s, got"
+                         " %s." % (all_solvers, solver))
+
+    all_penalties = ['l1', 'l2']
+    if penalty not in all_penalties:
+        raise ValueError("Logistic Regression supports only penalties in %s,"
+                         " got %s." % (all_penalties, penalty))
+
+    if solver == 'auto':
+        if penalty == 'l1':
+            solver = 'saga'
+        else:
+            solver = 'lbfgs'
 
+        if not fit_intercept and multi_class == 'ovr':
+            solver = 'liblinear'
+        if n_samples > 1e3:
+            solver = 'saga'
+
+    if warn_solver and solver != previous_default_solver:
+        # Do not warn if the 'auto' solver selects the previous default solver
+
+        # previous_default_solver is used since LogisticRegression and
+        # LogisticRegressionCV don't have the same default in 0.19.
+        solver = previous_default_solver
+        warnings.warn("Default solver will be changed to 'auto' in 0.22. "
+                      "Use a specific solver to silence this warning.",
+                      FutureWarning)
+
+    # Compatibility checks
     if multi_class == 'multinomial' and solver == 'liblinear':
         raise ValueError("Solver %s does not support "
                          "a multinomial backend." % solver)
 
-    if solver not in ['liblinear', 'saga']:
-        if penalty != 'l2':
-            raise ValueError("Solver %s supports only l2 penalties, "
-                             "got %s penalty." % (solver, penalty))
-    if solver != 'liblinear':
-        if dual:
-            raise ValueError("Solver %s supports only "
-                             "dual=False, got dual=%s" % (solver, dual))
+    if solver not in ['liblinear', 'saga'] and penalty != 'l2':
+        raise ValueError("Solver %s supports only l2 penalties, "
+                         "got %s penalty." % (solver, penalty))
+
+    if solver != 'liblinear' and dual:
+        raise ValueError("Solver %s supports only "
+                         "dual=False, got dual=%s" % (solver, dual))
+
+    return solver, multi_class
 
 
 def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                              max_iter=100, tol=1e-4, verbose=0,
-                             solver='lbfgs', coef=None,
+                             solver='default', coef=None,
                              class_weight=None, dual=False, penalty='l2',
-                             intercept_scaling=1., multi_class='ovr',
+                             intercept_scaling=1., multi_class='default',
                              random_state=None, check_input=True,
                              max_squared_sum=None, sample_weight=None):
     """Compute a Logistic Regression model for a list of regularization
@@ -471,7 +518,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     X : array-like or sparse matrix, shape (n_samples, n_features)
         Input data.
 
-    y : array-like, shape (n_samples,)
+    y : array-like, shape (n_samples,) or (n_samples, n_targets)
         Input data, target values.
 
     pos_class : int, None
@@ -500,7 +547,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         For the liblinear and lbfgs solvers set verbose to any positive
         number for verbosity.
 
-    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}
+    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga', 'auto'}
         Numerical solver to use.
 
     coef : array-like, shape (n_features,), default None
@@ -591,15 +638,17 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     if isinstance(Cs, numbers.Integral):
         Cs = np.logspace(-4, 4, Cs)
 
-    _check_solver_option(solver, multi_class, penalty, dual)
-
     # Preprocessing.
     if check_input:
         X = check_array(X, accept_sparse='csr', dtype=np.float64,
                         accept_large_sparse=solver != 'liblinear')
         y = check_array(y, ensure_2d=False, dtype=None)
         check_consistent_length(X, y)
-    _, n_features = X.shape
+
+    n_samples, n_features = X.shape
+    solver, multi_class = _check_solver_option(
+        solver, multi_class, penalty, dual, fit_intercept, n_samples, 'lbfgs')
+
     classes = np.unique(y)
     random_state = check_random_state(random_state)
 
@@ -684,7 +733,6 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
             else:
                 w0[:, :coef.shape[1]] = coef
 
-
     if multi_class == 'multinomial':
         # fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
         if solver in ['lbfgs', 'newton-cg']:
@@ -774,9 +822,9 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
 def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
                           scoring=None, fit_intercept=False,
                           max_iter=100, tol=1e-4, class_weight=None,
-                          verbose=0, solver='lbfgs', penalty='l2',
+                          verbose=0, solver='default', penalty='l2',
                           dual=False, intercept_scaling=1.,
-                          multi_class='ovr', random_state=None,
+                          multi_class='default', random_state=None,
                           max_squared_sum=None, sample_weight=None):
     """Computes scores across logistic_regression_path
 
@@ -836,7 +884,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
         For the liblinear and lbfgs solvers set verbose to any positive
         number for verbosity.
 
-    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}
+    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga', 'auto'}
         Decides which solver to use.
 
     penalty : str, 'l1' or 'l2'
@@ -900,7 +948,9 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
     n_iter : array, shape(n_cs,)
         Actual number of iteration for each Cs.
     """
-    _check_solver_option(solver, multi_class, penalty, dual)
+    n_samples, n_features = X.shape
+    solver, multi_class = _check_solver_option(
+        solver, multi_class, penalty, dual, fit_intercept, n_samples, 'lbfgs')
 
     X_train = X[train]
     X_test = X[test]
@@ -1042,8 +1092,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         instance used by `np.random`. Used when ``solver`` == 'sag' or
         'liblinear'.
 
-    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},
-        default: 'liblinear'
+    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga', 'auto'},
+        default: 'liblinear'. Will be changed to 'auto' solver in 0.22.
         Algorithm to use in the optimization problem.
 
         - For small datasets, 'liblinear' is a good choice, whereas 'sag' and
@@ -1053,6 +1103,10 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
           schemes.
         - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas
           'liblinear' and 'saga' handle L1 penalty.
+        - 'auto' automatically chooses a solver based on the penalty and
+          multi_class parameters, and on the size of the input data. Note that
+          the 'auto' behavior may change without notice in the future, leading
+          to similar but not necessarily exact same solutions.
 
         Note that 'sag' and 'saga' fast convergence is only guaranteed on
         features with approximately the same scale. You can
@@ -1062,12 +1116,15 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
            Stochastic Average Gradient descent solver.
         .. versionadded:: 0.19
            SAGA solver.
+        .. versionadded:: 0.20
+           'auto' solver.
 
     max_iter : int, default: 100
         Useful only for the newton-cg, sag and lbfgs solvers.
         Maximum number of iterations taken for the solvers to converge.
 
-    multi_class : str, {'ovr', 'multinomial'}, default: 'ovr'
+    multi_class : str, {'ovr', 'multinomial'}
+        default: 'ovr'. Will be changed to 'multinomial' in 0.22.
         Multiclass option can be either 'ovr' or 'multinomial'. If the option
         chosen is 'ovr', then a binary problem is fit for each label. Else
         the loss minimised is the multinomial loss fit across
@@ -1128,7 +1185,6 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
     --------
     SGDClassifier : incrementally trained logistic regression (when given
         the parameter ``loss="log"``).
-    sklearn.svm.LinearSVC : learns SVM models using the same algorithm.
     LogisticRegressionCV : Logistic regression with built-in cross validation
 
     Notes
@@ -1165,8 +1221,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
 
     def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
                  fit_intercept=True, intercept_scaling=1, class_weight=None,
-                 random_state=None, solver='liblinear', max_iter=100,
-                 multi_class='ovr', verbose=0, warm_start=False, n_jobs=1):
+                 random_state=None, solver='default', max_iter=100,
+                 multi_class='default', verbose=0, warm_start=False, n_jobs=1):
 
         self.penalty = penalty
         self.dual = dual
@@ -1192,7 +1248,7 @@ def fit(self, X, y, sample_weight=None):
             Training vector, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : array-like, shape (n_samples,)
+        y : array-like, shape (n_samples,) or (n_samples, n_targets)
             Target vector relative to X.
 
         sample_weight : array-like, shape (n_samples,) optional
@@ -1216,21 +1272,22 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol=%r)" % self.tol)
 
-        if self.solver in ['newton-cg']:
+        solver, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual,
+            self.fit_intercept, _num_samples(X), 'liblinear')
+
+        if solver in ['newton-cg']:
             _dtype = [np.float64, np.float32]
         else:
             _dtype = np.float64
 
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
-                         accept_large_sparse=self.solver != 'liblinear')
+                         accept_large_sparse=solver != 'liblinear')
         check_classification_targets(y)
         self.classes_ = np.unique(y)
         n_samples, n_features = X.shape
 
-        _check_solver_option(self.solver, self.multi_class, self.penalty,
-                             self.dual)
-
-        if self.solver == 'liblinear':
+        if solver == 'liblinear':
             if self.n_jobs != 1:
                 warnings.warn("'n_jobs' > 1 does not have any effect when"
                               " 'solver' is set to 'liblinear'. Got 'n_jobs'"
@@ -1243,8 +1300,8 @@ def fit(self, X, y, sample_weight=None):
             self.n_iter_ = np.array([n_iter_])
             return self
 
-        if self.solver in ['sag', 'saga']:
-            max_squared_sum = row_norms(X, squared=True).max()
+        if solver in ['sag', 'saga']:
+            max_squared_sum = np.percentile(row_norms(X, squared=True), 90)
         else:
             max_squared_sum = None
 
@@ -1272,7 +1329,7 @@ def fit(self, X, y, sample_weight=None):
         self.intercept_ = np.zeros(n_classes)
 
         # Hack so that we iterate only once for the multinomial case.
-        if self.multi_class == 'multinomial':
+        if multi_class == 'multinomial':
             classes_ = [None]
             warm_start_coef = [warm_start_coef]
         if warm_start_coef is None:
@@ -1282,7 +1339,7 @@ def fit(self, X, y, sample_weight=None):
 
         # The SAG solver releases the GIL so it's more efficient to use
         # threads for this solver.
-        if self.solver in ['sag', 'saga']:
+        if solver in ['sag', 'saga']:
             backend = 'threading'
         else:
             backend = 'multiprocessing'
@@ -1290,8 +1347,8 @@ def fit(self, X, y, sample_weight=None):
                                backend=backend)(
             path_func(X, y, pos_class=class_, Cs=[self.C],
                       fit_intercept=self.fit_intercept, tol=self.tol,
-                      verbose=self.verbose, solver=self.solver,
-                      multi_class=self.multi_class, max_iter=self.max_iter,
+                      verbose=self.verbose, solver=solver,
+                      multi_class=multi_class, max_iter=self.max_iter,
                       class_weight=self.class_weight, check_input=False,
                       random_state=self.random_state, coef=warm_start_coef_,
                       penalty=self.penalty,
@@ -1302,7 +1359,7 @@ def fit(self, X, y, sample_weight=None):
         fold_coefs_, _, n_iter_ = zip(*fold_coefs_)
         self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]
 
-        if self.multi_class == 'multinomial':
+        if multi_class == 'multinomial':
             self.coef_ = fold_coefs_[0][0]
         else:
             self.coef_ = np.asarray(fold_coefs_)
@@ -1340,7 +1397,13 @@ def predict_proba(self, X):
         """
         if not hasattr(self, "coef_"):
             raise NotFittedError("Call fit before prediction")
-        if self.multi_class == "ovr":
+
+        # This check can be removed in 0.22, changing back to self.multi_class
+        _, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual,
+            self.fit_intercept, _num_samples(X))
+
+        if multi_class == "ovr":
             return super(LogisticRegression, self)._predict_proba_lr(X)
         else:
             decision = self.decision_function(X)
@@ -1429,8 +1492,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         that can be used, look at :mod:`sklearn.metrics`. The
         default scoring option used is 'accuracy'.
 
-    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},
-        default: 'lbfgs'
+    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga', 'auto'},
+        default: 'lbfgs'.  Will be changed to 'auto' solver in 0.22.
         Algorithm to use in the optimization problem.
 
         - For small datasets, 'liblinear' is a good choice, whereas 'sag' and
@@ -1442,6 +1505,10 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
           'liblinear' and 'saga' handle L1 penalty.
         - 'liblinear' might be slower in LogisticRegressionCV because it does
           not handle warm-starting.
+        - 'auto' automatically chooses a solver based on the penalty and
+          multi_class parameters, and on the size of the input data. Note that
+          the 'auto' behavior may change without notice in the future, leading
+          to similar but not necessarily exact same solutions.
 
         Note that 'sag' and 'saga' fast convergence is only guaranteed on
         features with approximately the same scale. You can preprocess the data
@@ -1451,6 +1518,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
            Stochastic Average Gradient descent solver.
         .. versionadded:: 0.19
            SAGA solver.
+        .. versionadded:: 0.20
+           'auto' solver.
 
     tol : float, optional
         Tolerance for stopping criteria.
@@ -1501,6 +1570,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         (and therefore on the intercept) intercept_scaling has to be increased.
 
     multi_class : str, {'ovr', 'multinomial'}
+        default: 'ovr'. Will be changed to 'multinomial' in 0.22.
         Multiclass option can be either 'ovr' or 'multinomial'. If the option
         chosen is 'ovr', then a binary problem is fit for each label. Else
         the loss minimised is the multinomial loss fit across
@@ -1570,9 +1640,9 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
     """
 
     def __init__(self, Cs=10, fit_intercept=True, cv=None, dual=False,
-                 penalty='l2', scoring=None, solver='lbfgs', tol=1e-4,
+                 penalty='l2', scoring=None, solver='default', tol=1e-4,
                  max_iter=100, class_weight=None, n_jobs=1, verbose=0,
-                 refit=True, intercept_scaling=1., multi_class='ovr',
+                 refit=True, intercept_scaling=1., multi_class='default',
                  random_state=None):
         self.Cs = Cs
         self.fit_intercept = fit_intercept
@@ -1600,7 +1670,7 @@ def fit(self, X, y, sample_weight=None):
             Training vector, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : array-like, shape (n_samples,)
+        y : array-like, shape (n_samples,) or (n_samples, n_targets)
             Target vector relative to X.
 
         sample_weight : array-like, shape (n_samples,) optional
@@ -1611,8 +1681,9 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
-        _check_solver_option(self.solver, self.multi_class, self.penalty,
-                             self.dual)
+        solver, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual,
+            self.fit_intercept, _num_samples(X), 'lbfgs')
 
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
             raise ValueError("Maximum number of iteration must be positive;"
@@ -1623,7 +1694,7 @@ def fit(self, X, y, sample_weight=None):
 
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
                          order="C",
-                         accept_large_sparse=self.solver != 'liblinear')
+                         accept_large_sparse=solver != 'liblinear')
         check_classification_targets(y)
 
         class_weight = self.class_weight
@@ -1639,8 +1710,8 @@ def fit(self, X, y, sample_weight=None):
         classes = self.classes_ = label_encoder.classes_
         encoded_labels = label_encoder.transform(label_encoder.classes_)
 
-        if self.solver in ['sag', 'saga']:
-            max_squared_sum = row_norms(X, squared=True).max()
+        if solver in ['sag', 'saga']:
+            max_squared_sum = np.percentile(row_norms(X, squared=True), 90)
         else:
             max_squared_sum = None
 
@@ -1665,7 +1736,7 @@ def fit(self, X, y, sample_weight=None):
 
         # We need this hack to iterate only once over labels, in the case of
         # multi_class = multinomial, without changing the value of the labels.
-        if self.multi_class == 'multinomial':
+        if multi_class == 'multinomial':
             iter_encoded_labels = iter_classes = [None]
         else:
             iter_encoded_labels = encoded_labels
@@ -1690,10 +1761,10 @@ def fit(self, X, y, sample_weight=None):
                                backend=backend)(
             path_func(X, y, train, test, pos_class=label, Cs=self.Cs,
                       fit_intercept=self.fit_intercept, penalty=self.penalty,
-                      dual=self.dual, solver=self.solver, tol=self.tol,
+                      dual=self.dual, solver=solver, tol=self.tol,
                       max_iter=self.max_iter, verbose=self.verbose,
                       class_weight=class_weight, scoring=self.scoring,
-                      multi_class=self.multi_class,
+                      multi_class=multi_class,
                       intercept_scaling=self.intercept_scaling,
                       random_state=self.random_state,
                       max_squared_sum=max_squared_sum,
@@ -1702,7 +1773,7 @@ def fit(self, X, y, sample_weight=None):
             for label in iter_encoded_labels
             for train, test in folds)
 
-        if self.multi_class == 'multinomial':
+        if multi_class == 'multinomial':
             multi_coefs_paths, Cs, multi_scores, n_iter_ = zip(*fold_coefs_)
             multi_coefs_paths = np.asarray(multi_coefs_paths)
             multi_scores = np.asarray(multi_scores)
@@ -1739,14 +1810,14 @@ def fit(self, X, y, sample_weight=None):
         self.intercept_ = np.zeros(n_classes)
 
         # hack to iterate only once for multinomial case.
-        if self.multi_class == 'multinomial':
+        if multi_class == 'multinomial':
             scores = multi_scores
             coefs_paths = multi_coefs_paths
 
         for index, (cls, encoded_label) in enumerate(
                 zip(iter_classes, iter_encoded_labels)):
 
-            if self.multi_class == 'ovr':
+            if multi_class == 'ovr':
                 # The scores_ / coefs_paths_ dict have unencoded class
                 # labels as their keys
                 scores = self.scores_[cls]
@@ -1757,7 +1828,7 @@ def fit(self, X, y, sample_weight=None):
 
                 C_ = self.Cs_[best_index]
                 self.C_.append(C_)
-                if self.multi_class == 'multinomial':
+                if multi_class == 'multinomial':
                     coef_init = np.mean(coefs_paths[:, best_index, :, :],
                                         axis=0)
                 else:
@@ -1766,12 +1837,12 @@ def fit(self, X, y, sample_weight=None):
                 # Note that y is label encoded and hence pos_class must be
                 # the encoded label / None (for 'multinomial')
                 w, _, _ = logistic_regression_path(
-                    X, y, pos_class=encoded_label, Cs=[C_], solver=self.solver,
+                    X, y, pos_class=encoded_label, Cs=[C_], solver=solver,
                     fit_intercept=self.fit_intercept, coef=coef_init,
                     max_iter=self.max_iter, tol=self.tol,
                     penalty=self.penalty,
                     class_weight=class_weight,
-                    multi_class=self.multi_class,
+                    multi_class=multi_class,
                     verbose=max(0, self.verbose - 1),
                     random_state=self.random_state,
                     check_input=False, max_squared_sum=max_squared_sum,
@@ -1786,7 +1857,7 @@ def fit(self, X, y, sample_weight=None):
                              for i in range(len(folds))], axis=0)
                 self.C_.append(np.mean(self.Cs_[best_indices]))
 
-            if self.multi_class == 'multinomial':
+            if multi_class == 'multinomial':
                 self.C_ = np.tile(self.C_, n_classes)
                 self.coef_ = w[:, :X.shape[1]]
                 if self.fit_intercept:
diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index 39b817da1b0e2..2113907b810c7 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -217,7 +217,7 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
     ... #doctest: +NORMALIZE_WHITESPACE
     LogisticRegression(C=1.0, class_weight=None, dual=False,
         fit_intercept=True, intercept_scaling=1, max_iter=100,
-        multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
+        multi_class='default', n_jobs=1, penalty='l2', random_state=None,
         solver='sag', tol=0.0001, verbose=0, warm_start=False)
 
     References
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 56be87f71015a..046cf0e0db6da 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -21,6 +21,7 @@
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
+from sklearn.utils.testing import assert_no_warnings
 
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.exceptions import ChangedBehaviorWarning
@@ -197,8 +198,8 @@ def test_multinomial_validation(solver):
 def test_check_solver_option(LR):
     X, y = iris.data, iris.target
 
-    msg = ('Logistic Regression supports only liblinear, newton-cg, '
-           'lbfgs, sag and saga solvers, got wrong_name')
+    msg = ("Logistic Regression supports only solvers in ['liblinear', "
+           "'newton-cg', 'lbfgs', 'sag', 'saga', 'auto'], got wrong_name.")
     lr = LR(solver="wrong_name")
     assert_raise_message(ValueError, msg, lr.fit, X, y)
 
@@ -224,6 +225,37 @@ def test_check_solver_option(LR):
         assert_raise_message(ValueError, msg, lr.fit, X, y)
 
 
+@pytest.mark.parametrize('model, warn_solver',
+                         [(LogisticRegression, True),
+                          (LogisticRegressionCV, False)])
+def test_logistic_regression_warnings(model, warn_solver):
+    clf_solver_warning = model(multi_class='ovr')
+    clf_multi_class_warning = model(solver='lbfgs')
+    clf_no_warnings = model(solver='lbfgs', multi_class='ovr')
+
+    solver_warning_msg = "Default solver will be changed to 'auto'"
+    multi_class_warning_msg = "Default multi_class will be changed to 'multi"
+
+    if warn_solver:
+        assert_warns_message(FutureWarning, solver_warning_msg,
+                             clf_solver_warning.fit, iris.data, iris.target)
+    else:
+        assert_no_warnings(clf_no_warnings.fit, iris.data, iris.target)
+
+    assert_warns_message(FutureWarning, multi_class_warning_msg,
+                         clf_multi_class_warning.fit, iris.data, iris.target)
+    assert_no_warnings(clf_no_warnings.fit, iris.data, iris.target)
+
+
+@pytest.mark.parametrize('penalty', ['l1', 'l2'])
+@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
+@pytest.mark.parametrize('model', [LogisticRegression, LogisticRegressionCV])
+def test_logistic_regression_auto(penalty, multi_class, model):
+    # Test logistic regression with auto mode
+    clf = model(penalty=penalty, multi_class=multi_class, solver='auto')
+    clf.fit(iris.data, iris.target)
+
+
 @pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
 def test_multinomial_binary(solver):
     # Test multinomial LR on a binary problem.

From 83a5dec56896b8d50237176898f00f9146eeb34a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Fri, 27 Jul 2018 16:10:09 +0200
Subject: [PATCH 02/17] simpler auto solver

---
 benchmarks/bench_logistic_solvers.py | 269 ---------------------------
 doc/modules/linear_model.rst         |   5 +-
 sklearn/linear_model/logistic.py     |  17 +-
 3 files changed, 5 insertions(+), 286 deletions(-)
 delete mode 100644 benchmarks/bench_logistic_solvers.py

diff --git a/benchmarks/bench_logistic_solvers.py b/benchmarks/bench_logistic_solvers.py
deleted file mode 100644
index bd35032439e9e..0000000000000
--- a/benchmarks/bench_logistic_solvers.py
+++ /dev/null
@@ -1,269 +0,0 @@
-"""
-Benchmarks of sklearn solver in LogisticRegression.
-"""
-
-# Author: Tom Dupre la Tour
-import time
-from os.path import expanduser
-
-import matplotlib.pyplot as plt
-import scipy.sparse as sp  # noqa
-import numpy as np
-import pandas as pd
-
-from sklearn.datasets import fetch_mldata
-from sklearn.datasets import fetch_rcv1, load_iris, load_digits
-from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.exceptions import ConvergenceWarning
-from sklearn.externals.joblib import delayed, Parallel, Memory
-from sklearn.linear_model import LogisticRegression
-from sklearn.linear_model.logistic import _multinomial_loss
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelBinarizer
-from sklearn.preprocessing import MinMaxScaler  # noqa
-from sklearn.utils.testing import ignore_warnings
-from sklearn.utils import shuffle
-
-
-def get_loss(coefs, intercepts, X, y, C, multi_class, penalty):
-    if multi_class == 'ovr':
-        if np.array(intercepts).ndim == 0 and intercepts == 0:
-            intercepts = np.zeros(coefs.shape[0])
-        loss = 0
-        for ii, (coef, intercept) in enumerate(zip(coefs, intercepts)):
-            y_bin = y.copy()
-            y_bin[y == ii] = 1
-            y_bin[y != ii] = -1
-            loss += np.sum(
-                np.log(1. + np.exp(-y_bin * (X.dot(coef) + intercept))))
-
-            if penalty == 'l2':
-                loss += 0.5 * coef.dot(coef) / C
-            else:
-                loss += np.sum(np.abs(coef)) / C
-    else:
-        coefs_and_intercept = np.vstack((coefs.T, intercepts.T)).T.ravel()
-        lbin = LabelBinarizer()
-        Y_multi = lbin.fit_transform(y)
-        if Y_multi.shape[1] == 1:
-            Y_multi = np.hstack([1 - Y_multi, Y_multi])
-        loss, _, _ = _multinomial_loss(coefs_and_intercept, X, Y_multi, 0,
-                                       np.ones(X.shape[0]))
-        coefs = coefs.ravel()
-        if penalty == 'l2':
-            loss += 0.5 * coefs.dot(coefs) / C
-        else:
-            loss += np.sum(np.abs(coefs)) / C
-
-    loss /= X.shape[0]
-
-    return loss
-
-
-def fit_single(solver, X, y, X_shape, dataset, penalty='l2',
-               multi_class='multinomial', C=1, max_iter=10):
-    assert X.shape == X_shape
-
-    # if not sp.issparse(X):
-    #     X = MinMaxScaler().fit_transform(X)
-
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, random_state=42, stratify=y)
-    train_scores, train_losses, test_scores, times = [], [], [], []
-
-    if solver == 'newton-cg':
-        max_iter /= 2
-
-    n_repeats = None
-    max_iter_range = np.unique(np.int_(np.logspace(0, np.log10(max_iter), 10)))
-    for this_max_iter in max_iter_range:
-        msg = ('[%s, %s, %s, %s] Max iter: %s' % (dataset, multi_class, solver,
-                                                  penalty, this_max_iter))
-        lr = LogisticRegression(solver=solver, multi_class=multi_class, C=C,
-                                penalty=penalty, fit_intercept=False,
-                                tol=1e-24, max_iter=this_max_iter,
-                                random_state=42, intercept_scaling=10000)
-        t0 = time.clock()
-        try:
-            if penalty == 'l1' and multi_class == 'multinomial':
-                raise ValueError('skip as only saga is available.')
-
-            with ignore_warnings(category=ConvergenceWarning):
-                # first time for timing
-                if n_repeats is None:
-                    t0 = time.clock()
-                    lr.fit(X_train, y_train)
-                    max_iter_duration = max_iter * (time.clock() - t0)
-                    n_repeats = max(1, int(1. / max_iter_duration))
-
-                t0 = time.clock()
-                for _ in range(n_repeats):
-                    lr.fit(X_train, y_train)
-                train_time = (time.clock() - t0) / n_repeats
-                print('%s (repeat=%d)' % (msg, n_repeats))
-
-        except ValueError:
-            train_score = np.nan
-            train_loss = np.nan
-            test_score = np.nan
-            train_time = np.nan
-            print('%s (skipped)' % (msg, ))
-            continue
-
-        train_loss = get_loss(lr.coef_, lr.intercept_, X_train, y_train, C,
-                              multi_class, penalty)
-        train_score = lr.score(X_train, y_train)
-        test_score = lr.score(X_test, y_test)
-
-        train_scores.append(train_score)
-        train_losses.append(train_loss)
-        test_scores.append(test_score)
-        times.append(train_time)
-
-    return (solver, penalty, dataset, multi_class, times, train_losses,
-            train_scores, test_scores)
-
-
-def load_dataset(dataset, n_samples_max):
-    if dataset == 'rcv1':
-        rcv1 = fetch_rcv1()
-        X = rcv1.data
-        y = rcv1.target
-
-        # take only 3 categories (CCAT, ECAT, MCAT)
-        y = y[:, [1, 4, 10]].astype(np.float64)
-        # remove samples that have more than one category
-        mask = np.asarray(y.sum(axis=1) == 1).ravel()
-        y = y[mask, :].indices
-        X = X[mask, :]
-
-    elif dataset == 'mnist':
-        mnist = fetch_mldata('MNIST original')
-        X, y = shuffle(mnist.data, mnist.target, random_state=42)
-        X = X.astype(np.float64)
-
-    elif dataset == 'digits':
-        digits = load_digits()
-        X, y = digits.data, digits.target
-
-    elif dataset == 'iris':
-        iris = load_iris()
-        X, y = iris.data, iris.target
-
-    elif dataset == '20news':
-        ng = fetch_20newsgroups_vectorized()
-        X = ng.data
-        y = ng.target
-
-    X = X[:n_samples_max]
-    y = y[:n_samples_max]
-
-    return X, y
-
-
-def run(solvers, penalties, multi_classes, n_samples_max, max_iter, datasets,
-        n_jobs):
-    mem = Memory(cachedir=expanduser('~/cache'), verbose=0)
-
-    results = []
-    for dataset in datasets:
-        for multi_class in multi_classes:
-            X, y = load_dataset(dataset, n_samples_max)
-
-            cached_fit = mem.cache(fit_single, ignore=['X'])
-            cached_fit = fit_single
-
-            out = Parallel(n_jobs=n_jobs, mmap_mode=None)(delayed(cached_fit)(
-                solver, X, y, X.shape, dataset=dataset, penalty=penalty,
-                multi_class=multi_class, C=1, max_iter=max_iter)
-                for solver in solvers
-                for penalty in penalties)  # yapf: disable
-
-            results.extend(out)
-
-            columns = ("solver penalty dataset multi_class times "
-                       "train_losses train_scores test_scores").split()
-            results_df = pd.DataFrame(out, columns=columns)
-            plot(results_df)
-
-
-def plot(res):
-    res.set_index(['dataset', 'multi_class', 'penalty'], inplace=True)
-
-    grouped = res.groupby(level=['dataset', 'multi_class', 'penalty'])
-
-    colors = {
-        'sag': 'red',
-        'saga': 'orange',
-        'liblinear': 'blue',
-        'lbfgs': 'green',
-        'newton-cg': 'darkviolet',
-        'auto': 'black',
-    }
-
-    for idx, group in grouped:
-        dataset, multi_class, penalty = idx
-        fig = plt.figure(figsize=(12, 4))
-
-        # -----------------------
-        ax = fig.add_subplot(131)
-        train_losses = group['train_losses']
-        tmp = np.sort(np.concatenate(train_losses.values))
-        if tmp.size == 0:
-            plt.close(fig)
-            continue
-        ref = 2 * tmp[0] - tmp[1]
-
-        for losses, times, solver in zip(group['train_losses'], group['times'],
-                                         group['solver']):
-            losses = losses - ref
-            linestyle = ':' if solver == 'auto' else '-'
-            ax.plot(times, losses, label=solver, color=colors[solver],
-                    linestyle=linestyle, marker='.')
-        ax.set_xlabel('Time (s)')
-        ax.set_ylabel('Training objective (relative to min)')
-        ax.set_yscale('log')
-
-        # -----------------------
-        ax = fig.add_subplot(132)
-
-        for train_score, times, solver in zip(group['train_scores'],
-                                              group['times'], group['solver']):
-            linestyle = ':' if solver == 'auto' else '-'
-            ax.plot(times, train_score, label=solver, color=colors[solver],
-                    linestyle=linestyle, marker='.')
-        ax.set_xlabel('Time (s)')
-        ax.set_ylabel('Train score')
-
-        # -----------------------
-        ax = fig.add_subplot(133)
-
-        for test_score, times, solver in zip(group['test_scores'],
-                                             group['times'], group['solver']):
-            linestyle = ':' if solver == 'auto' else '-'
-            ax.plot(times, test_score, label=solver, color=colors[solver],
-                    linestyle=linestyle, marker='.')
-        ax.set_xlabel('Time (s)')
-        ax.set_ylabel('Test score')
-        ax.legend()
-
-        # -----------------------
-        name = '%s_%s_%s' % (multi_class, penalty, dataset)
-        plt.suptitle(name)
-        fig.tight_layout()
-        fig.subplots_adjust(top=0.9)
-        plt.savefig('figures/' + name + '.png')
-        plt.close(fig)
-        print('SAVED: ' + name)
-
-
-if __name__ == '__main__':
-    n_jobs = 3
-    max_iter = 50
-    solvers = ['liblinear', 'saga', 'sag', 'lbfgs', 'newton-cg', 'auto']
-    penalties = ['l2', 'l1']
-    multi_classes = ['multinomial', 'ovr']
-    datasets = ['iris', 'digits', '20news', 'rcv1', 'mnist']
-
-    run(solvers, penalties, multi_classes, n_samples_max=None, n_jobs=n_jobs,
-        datasets=datasets, max_iter=max_iter)
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index e84d72837bf05..b9e169fb25432 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -792,10 +792,7 @@ The "saga" solver is often the best choice. The "liblinear" solver is
 used by default for historical reasons.
 
 The default solver will change to "auto" in version 0.22. This option
-automatically selects a good solver based on both `penalty` and `multi_class`
-parameters, and on the size of the training set. Note that the "auto" behavior
-may change without notice in the future, leading to similar but not necessarily
-exact same solutions.
+automatically selects a solver based on the `penalty` parameter.
 
 For large dataset, you may also consider using :class:`SGDClassifier`
 with 'log' loss.
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index b82dab9a46598..cda56efec6243 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -464,11 +464,6 @@ def _check_solver_option(solver, multi_class, penalty, dual, fit_intercept,
         else:
             solver = 'lbfgs'
 
-        if not fit_intercept and multi_class == 'ovr':
-            solver = 'liblinear'
-        if n_samples > 1e3:
-            solver = 'saga'
-
     if warn_solver and solver != previous_default_solver:
         # Do not warn if the 'auto' solver selects the previous default solver
 
@@ -1103,10 +1098,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
           schemes.
         - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas
           'liblinear' and 'saga' handle L1 penalty.
-        - 'auto' automatically chooses a solver based on the penalty and
-          multi_class parameters, and on the size of the input data. Note that
-          the 'auto' behavior may change without notice in the future, leading
-          to similar but not necessarily exact same solutions.
+        - 'auto' automatically chooses a solver based on the penalty
+          parameter.
 
         Note that 'sag' and 'saga' fast convergence is only guaranteed on
         features with approximately the same scale. You can
@@ -1505,10 +1498,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
           'liblinear' and 'saga' handle L1 penalty.
         - 'liblinear' might be slower in LogisticRegressionCV because it does
           not handle warm-starting.
-        - 'auto' automatically chooses a solver based on the penalty and
-          multi_class parameters, and on the size of the input data. Note that
-          the 'auto' behavior may change without notice in the future, leading
-          to similar but not necessarily exact same solutions.
+        - 'auto' automatically chooses a solver based on the penalty
+          parameter.
 
         Note that 'sag' and 'saga' fast convergence is only guaranteed on
         features with approximately the same scale. You can preprocess the data

From 679224b7e039b58e9e647551041c683608514180 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sat, 28 Jul 2018 00:00:18 +0200
Subject: [PATCH 03/17] fix tests

---
 sklearn/linear_model/logistic.py            |   9 +-
 sklearn/linear_model/tests/test_logistic.py | 161 ++++++++++++--------
 2 files changed, 103 insertions(+), 67 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index ddff3dc140bb5..0d5cec93a3282 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -696,7 +696,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
             le = LabelEncoder()
             Y_multi = le.fit_transform(y).astype(X.dtype, copy=False)
 
-        w0 = np.zeros((classes.size, n_features + int(fit_intercept)),
+        w0 = np.zeros((max(2, classes.size), n_features + int(fit_intercept)),
                       order='F', dtype=X.dtype)
 
     if coef is not None:
@@ -803,8 +803,9 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                              "'newton-cg', 'sag'}, got '%s' instead" % solver)
 
         if multi_class == 'multinomial':
-            multi_w0 = np.reshape(w0, (classes.size, -1))
-            if classes.size == 2:
+            n_classes = max(2, classes.size)
+            multi_w0 = np.reshape(w0, (n_classes, -1))
+            if n_classes == 2:
                 multi_w0 = multi_w0[1][np.newaxis, :]
             coefs.append(multi_w0)
         else:
@@ -969,7 +970,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
         check_input=False, max_squared_sum=max_squared_sum,
         sample_weight=sample_weight)
 
-    log_reg = LogisticRegression(multi_class=multi_class)
+    log_reg = LogisticRegression(solver=solver, multi_class=multi_class)
 
     # The score method of Logistic Regression has a classes_ attribute.
     if multi_class == 'ovr':
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 03eb808a9ed5e..12db09cc0acb4 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -58,6 +58,8 @@ def check_predictions(clf, X, y):
     assert_array_equal(probabilities.argmax(axis=1), y)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_predict_2_classes():
     # Simple sanity check on a 2 classes dataset
     # Make sure it predicts the correct result on simple datasets.
@@ -73,6 +75,7 @@ def test_predict_2_classes():
                                          random_state=0), X_sp, Y1)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_error():
     # Test for appropriate exception on errors
     msg = "Penalty term must be positive"
@@ -96,6 +99,7 @@ def test_error():
         assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_logistic_cv_mock_scorer():
 
     class MockScorer(object):
@@ -131,7 +135,7 @@ def __call__(self, model, X, y, sample_weight=None):
 
 
 def test_logistic_cv_score_does_not_warn_by_default():
-    lr = LogisticRegressionCV(cv=2)
+    lr = LogisticRegressionCV(cv=2, multi_class='ovr')
     lr.fit(X, Y1)
 
     with pytest.warns(None) as record:
@@ -143,7 +147,7 @@ def test_lr_liblinear_warning():
     n_samples, n_features = iris.data.shape
     target = iris.target_names[iris.target]
 
-    lr = LogisticRegression(solver='liblinear', n_jobs=2)
+    lr = LogisticRegression(solver='liblinear', multi_class='ovr', n_jobs=2)
     assert_warns_message(UserWarning,
                          "'n_jobs' > 1 does not have any effect when"
                          " 'solver' is set to 'liblinear'. Got 'n_jobs'"
@@ -151,6 +155,8 @@ def test_lr_liblinear_warning():
                          lr.fit, iris.data, target)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_predict_3_classes():
     check_predictions(LogisticRegression(C=10), X, Y2)
     check_predictions(LogisticRegression(C=10), X_sp, Y2)
@@ -165,7 +171,8 @@ def test_predict_iris():
     # Test that both multinomial and OvR solvers handle
     # multiclass data correctly and give good accuracy
     # score (>0.95) for the training data.
-    for clf in [LogisticRegression(C=len(iris.data)),
+    for clf in [LogisticRegression(C=len(iris.data), solver='liblinear',
+                                   multi_class='ovr'),
                 LogisticRegression(C=len(iris.data), solver='lbfgs',
                                    multi_class='multinomial'),
                 LogisticRegression(C=len(iris.data), solver='newton-cg',
@@ -201,7 +208,7 @@ def test_check_solver_option(LR):
 
     msg = ("Logistic Regression supports only solvers in ['liblinear', "
            "'newton-cg', 'lbfgs', 'sag', 'saga', 'auto'], got wrong_name.")
-    lr = LR(solver="wrong_name")
+    lr = LR(solver="wrong_name", multi_class="ovr")
     assert_raise_message(ValueError, msg, lr.fit, X, y)
 
     msg = "multi_class should be either multinomial or ovr, got wrong_name"
@@ -217,22 +224,22 @@ def test_check_solver_option(LR):
     for solver in ['newton-cg', 'lbfgs', 'sag']:
         msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
                solver)
-        lr = LR(solver=solver, penalty='l1')
+        lr = LR(solver=solver, penalty='l1', multi_class='ovr')
         assert_raise_message(ValueError, msg, lr.fit, X, y)
     for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
         msg = ("Solver %s supports only dual=False, got dual=True" %
                solver)
-        lr = LR(solver=solver, dual=True)
+        lr = LR(solver=solver, dual=True, multi_class='ovr')
         assert_raise_message(ValueError, msg, lr.fit, X, y)
 
 
-@pytest.mark.parametrize('model, warn_solver',
-                         [(LogisticRegression, True),
-                          (LogisticRegressionCV, False)])
-def test_logistic_regression_warnings(model, warn_solver):
-    clf_solver_warning = model(multi_class='ovr')
-    clf_multi_class_warning = model(solver='lbfgs')
-    clf_no_warnings = model(solver='lbfgs', multi_class='ovr')
+@pytest.mark.parametrize('model, params, warn_solver',
+                         [(LogisticRegression, {}, True),
+                          (LogisticRegressionCV, {'cv': 5}, False)])
+def test_logistic_regression_warnings(model, params, warn_solver):
+    clf_solver_warning = model(multi_class='ovr', **params)
+    clf_multi_class_warning = model(solver='lbfgs', **params)
+    clf_no_warnings = model(solver='lbfgs', multi_class='ovr', **params)
 
     solver_warning_msg = "Default solver will be changed to 'auto'"
     multi_class_warning_msg = "Default multi_class will be changed to 'multi"
@@ -248,6 +255,7 @@ def test_logistic_regression_warnings(model, warn_solver):
     assert_no_warnings(clf_no_warnings.fit, iris.data, iris.target)
 
 
+@pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 @pytest.mark.parametrize('penalty', ['l1', 'l2'])
 @pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
 @pytest.mark.parametrize('model', [LogisticRegression, LogisticRegressionCV])
@@ -291,11 +299,13 @@ def test_multinomial_binary_probabilities():
 
     expected_proba_class_1 = (np.exp(decision) /
                               (np.exp(decision) + np.exp(-decision)))
-    expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1]
+    expected_proba = np.c_[1 - expected_proba_class_1, expected_proba_class_1]
 
     assert_almost_equal(proba, expected_proba)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_sparsify():
     # Test sparsify and densify members.
     n_samples, n_features = iris.data.shape
@@ -319,6 +329,8 @@ def test_sparsify():
     assert_array_almost_equal(pred_d_d, pred_d_s)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_inconsistent_input():
     # Test that an exception is raised on inconsistent input
     rng = np.random.RandomState(0)
@@ -337,6 +349,8 @@ def test_inconsistent_input():
                   rng.random_sample((3, 12)))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_write_parameters():
     # Test that we can write to coef_ and intercept_
     clf = LogisticRegression(random_state=0)
@@ -346,6 +360,8 @@ def test_write_parameters():
     assert_array_almost_equal(clf.decision_function(X), 0)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_nan():
     # Test proper NaN handling.
     # Regression test for Issue #252: fit used to go into an infinite loop.
@@ -368,12 +384,11 @@ def test_consistency_path():
     for solver in ['sag', 'saga']:
         coefs, Cs, _ = f(logistic_regression_path)(
             X, y, Cs=Cs, fit_intercept=False, tol=1e-5, solver=solver,
-            max_iter=1000,
-            random_state=0)
+            max_iter=1000, multi_class='ovr', random_state=0)
         for i, C in enumerate(Cs):
             lr = LogisticRegression(C=C, fit_intercept=False, tol=1e-5,
-                                    solver=solver,
-                                    random_state=0)
+                                    solver=solver, multi_class='ovr',
+                                    random_state=0, max_iter=1000)
             lr.fit(X, y)
             lr_coef = lr.coef_.ravel()
             assert_array_almost_equal(lr_coef, coefs[i], decimal=4,
@@ -384,9 +399,10 @@ def test_consistency_path():
         Cs = [1e3]
         coefs, Cs, _ = f(logistic_regression_path)(
             X, y, Cs=Cs, fit_intercept=True, tol=1e-6, solver=solver,
-            intercept_scaling=10000., random_state=0)
+            intercept_scaling=10000., random_state=0, multi_class='ovr')
         lr = LogisticRegression(C=Cs[0], fit_intercept=True, tol=1e-4,
-                                intercept_scaling=10000., random_state=0)
+                                intercept_scaling=10000., random_state=0,
+                                multi_class='ovr', solver=solver)
         lr.fit(X, y)
         lr_coef = np.concatenate([lr.coef_.ravel(), lr.intercept_])
         assert_array_almost_equal(lr_coef, coefs[0], decimal=4,
@@ -405,11 +421,14 @@ def test_logistic_regression_path_convergence_fail():
 def test_liblinear_dual_random_state():
     # random_state is relevant for liblinear solver only if dual=True
     X, y = make_classification(n_samples=20, random_state=0)
-    lr1 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15)
+    lr1 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15,
+                             solver='liblinear', multi_class='ovr')
     lr1.fit(X, y)
-    lr2 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15)
+    lr2 = LogisticRegression(random_state=0, dual=True, max_iter=1, tol=1e-15,
+                             solver='liblinear', multi_class='ovr')
     lr2.fit(X, y)
-    lr3 = LogisticRegression(random_state=8, dual=True, max_iter=1, tol=1e-15)
+    lr3 = LogisticRegression(random_state=8, dual=True, max_iter=1, tol=1e-15,
+                             solver='liblinear', multi_class='ovr')
     lr3.fit(X, y)
 
     # same result for same random state
@@ -509,9 +528,10 @@ def test_logistic_cv():
     X_ref -= X_ref.mean()
     X_ref /= X_ref.std()
     lr_cv = LogisticRegressionCV(Cs=[1.], fit_intercept=False,
-                                 solver='liblinear')
+                                 solver='liblinear', multi_class='ovr')
     lr_cv.fit(X_ref, y)
-    lr = LogisticRegression(C=1., fit_intercept=False)
+    lr = LogisticRegression(C=1., fit_intercept=False,
+                            solver='liblinear', multi_class='ovr')
     lr.fit(X_ref, y)
     assert_array_almost_equal(lr.coef_, lr_cv.coef_)
 
@@ -600,6 +620,7 @@ def test_multinomial_logistic_regression_string_inputs():
     assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_logistic_cv_sparse():
     X, y = make_classification(n_samples=50, n_features=5,
@@ -662,11 +683,11 @@ def test_ovr_multinomial_iris():
     precomputed_folds = list(cv.split(train, target))
 
     # Train clf on the original dataset where classes 0 and 1 are separated
-    clf = LogisticRegressionCV(cv=precomputed_folds)
+    clf = LogisticRegressionCV(cv=precomputed_folds, multi_class='ovr')
     clf.fit(train, target)
 
     # Conflate classes 0 and 1 and train clf1 on this modified dataset
-    clf1 = LogisticRegressionCV(cv=precomputed_folds)
+    clf1 = LogisticRegressionCV(cv=precomputed_folds, multi_class='ovr')
     target_copy = target.copy()
     target_copy[target_copy == 0] = 1
     clf1.fit(train, target_copy)
@@ -712,13 +733,12 @@ def test_ovr_multinomial_iris():
 def test_logistic_regression_solvers():
     X, y = make_classification(n_features=10, n_informative=5, random_state=0)
 
-    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False)
-    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False)
-    lib = LogisticRegression(fit_intercept=False)
-    sag = LogisticRegression(solver='sag', fit_intercept=False,
-                             random_state=42)
-    saga = LogisticRegression(solver='saga', fit_intercept=False,
-                              random_state=42)
+    params = dict(fit_intercept=False, random_state=42, multi_class='ovr')
+    ncg = LogisticRegression(solver='newton-cg', **params)
+    lbf = LogisticRegression(solver='lbfgs', **params)
+    lib = LogisticRegression(solver='liblinear', **params)
+    sag = LogisticRegression(solver='sag', **params)
+    saga = LogisticRegression(solver='saga', **params)
     ncg.fit(X, y)
     lbf.fit(X, y)
     sag.fit(X, y)
@@ -740,13 +760,13 @@ def test_logistic_regression_solvers_multiclass():
     X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                                n_classes=3, random_state=0)
     tol = 1e-7
-    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False, tol=tol)
-    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False, tol=tol)
-    lib = LogisticRegression(fit_intercept=False, tol=tol)
-    sag = LogisticRegression(solver='sag', fit_intercept=False, tol=tol,
-                             max_iter=1000, random_state=42)
-    saga = LogisticRegression(solver='saga', fit_intercept=False, tol=tol,
-                              max_iter=10000, random_state=42)
+    params = dict(fit_intercept=False, tol=tol, random_state=42,
+                  multi_class='ovr')
+    ncg = LogisticRegression(solver='newton-cg', **params)
+    lbf = LogisticRegression(solver='lbfgs', **params)
+    lib = LogisticRegression(solver='liblinear', **params)
+    sag = LogisticRegression(solver='sag', max_iter=1000, **params)
+    saga = LogisticRegression(solver='saga', max_iter=10000, **params)
     ncg.fit(X, y)
     lbf.fit(X, y)
     sag.fit(X, y)
@@ -776,20 +796,25 @@ def test_logistic_regressioncv_class_weights():
 
             clf_lbf = LogisticRegressionCV(solver='lbfgs', Cs=1,
                                            fit_intercept=False,
+                                           multi_class='ovr',
                                            class_weight=class_weight)
             clf_ncg = LogisticRegressionCV(solver='newton-cg', Cs=1,
                                            fit_intercept=False,
+                                           multi_class='ovr',
                                            class_weight=class_weight)
             clf_lib = LogisticRegressionCV(solver='liblinear', Cs=1,
                                            fit_intercept=False,
+                                           multi_class='ovr',
                                            class_weight=class_weight)
             clf_sag = LogisticRegressionCV(solver='sag', Cs=1,
                                            fit_intercept=False,
+                                           multi_class='ovr',
                                            class_weight=class_weight,
                                            tol=1e-5, max_iter=10000,
                                            random_state=0)
             clf_saga = LogisticRegressionCV(solver='saga', Cs=1,
                                             fit_intercept=False,
+                                            multi_class='ovr',
                                             class_weight=class_weight,
                                             tol=1e-5, max_iter=10000,
                                             random_state=0)
@@ -816,27 +841,29 @@ def test_logistic_regression_sample_weights():
         # not passing them at all (default None)
         for solver in ['lbfgs', 'liblinear']:
             clf_sw_none = LR(solver=solver, fit_intercept=False,
-                             random_state=42)
+                             random_state=42, multi_class='ovr')
             clf_sw_none.fit(X, y)
             clf_sw_ones = LR(solver=solver, fit_intercept=False,
-                             random_state=42)
+                             random_state=42, multi_class='ovr')
             clf_sw_ones.fit(X, y, sample_weight=np.ones(y.shape[0]))
             assert_array_almost_equal(
                 clf_sw_none.coef_, clf_sw_ones.coef_, decimal=4)
 
         # Test that sample weights work the same with the lbfgs,
         # newton-cg, and 'sag' solvers
-        clf_sw_lbfgs = LR(solver='lbfgs', fit_intercept=False, random_state=42)
+        clf_sw_lbfgs = LR(solver='lbfgs', fit_intercept=False, random_state=42,
+                          multi_class='ovr')
         clf_sw_lbfgs.fit(X, y, sample_weight=sample_weight)
-        clf_sw_n = LR(solver='newton-cg', fit_intercept=False, random_state=42)
+        clf_sw_n = LR(solver='newton-cg', fit_intercept=False, random_state=42,
+                      multi_class='ovr')
         clf_sw_n.fit(X, y, sample_weight=sample_weight)
         clf_sw_sag = LR(solver='sag', fit_intercept=False, tol=1e-10,
-                        random_state=42)
+                        random_state=42, multi_class='ovr')
         # ignore convergence warning due to small dataset
         with ignore_warnings():
             clf_sw_sag.fit(X, y, sample_weight=sample_weight)
         clf_sw_liblinear = LR(solver='liblinear', fit_intercept=False,
-                              random_state=42)
+                              random_state=42, multi_class='ovr')
         clf_sw_liblinear.fit(X, y, sample_weight=sample_weight)
         assert_array_almost_equal(
             clf_sw_lbfgs.coef_, clf_sw_n.coef_, decimal=4)
@@ -850,9 +877,11 @@ def test_logistic_regression_sample_weights():
         # to be 2 for all instances of class 2
         for solver in ['lbfgs', 'liblinear']:
             clf_cw_12 = LR(solver=solver, fit_intercept=False,
-                           class_weight={0: 1, 1: 2}, random_state=42)
+                           class_weight={0: 1, 1: 2}, random_state=42,
+                           multi_class='ovr')
             clf_cw_12.fit(X, y)
-            clf_sw_12 = LR(solver=solver, fit_intercept=False, random_state=42)
+            clf_sw_12 = LR(solver=solver, fit_intercept=False, random_state=42,
+                           multi_class='ovr')
             clf_sw_12.fit(X, y, sample_weight=sample_weight)
             assert_array_almost_equal(
                 clf_cw_12.coef_, clf_sw_12.coef_, decimal=4)
@@ -861,21 +890,21 @@ def test_logistic_regression_sample_weights():
     # since the patched liblinear code is different.
     clf_cw = LogisticRegression(
         solver="liblinear", fit_intercept=False, class_weight={0: 1, 1: 2},
-        penalty="l1", tol=1e-5, random_state=42)
+        penalty="l1", tol=1e-5, random_state=42, multi_class='ovr')
     clf_cw.fit(X, y)
     clf_sw = LogisticRegression(
         solver="liblinear", fit_intercept=False, penalty="l1", tol=1e-5,
-        random_state=42)
+        random_state=42, multi_class='ovr')
     clf_sw.fit(X, y, sample_weight)
     assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4)
 
     clf_cw = LogisticRegression(
         solver="liblinear", fit_intercept=False, class_weight={0: 1, 1: 2},
-        penalty="l2", dual=True, random_state=42)
+        penalty="l2", dual=True, random_state=42, multi_class='ovr')
     clf_cw.fit(X, y)
     clf_sw = LogisticRegression(
         solver="liblinear", fit_intercept=False, penalty="l2", dual=True,
-        random_state=42)
+        random_state=42, multi_class='ovr')
     clf_sw.fit(X, y, sample_weight)
     assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4)
 
@@ -1006,7 +1035,8 @@ def test_liblinear_decision_function_zero():
     # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
     # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
     X, y = make_classification(n_samples=5, n_features=5, random_state=0)
-    clf = LogisticRegression(fit_intercept=False)
+    clf = LogisticRegression(fit_intercept=False, solver='liblinear',
+                             multi_class='ovr')
     clf.fit(X, y)
 
     # Dummy data such that the decision function becomes zero.
@@ -1019,10 +1049,11 @@ def test_liblinear_logregcv_sparse():
     # Test LogRegCV with solver='liblinear' works for sparse matrices
 
     X, y = make_classification(n_samples=10, n_features=5, random_state=0)
-    clf = LogisticRegressionCV(solver='liblinear')
+    clf = LogisticRegressionCV(solver='liblinear', multi_class='ovr')
     clf.fit(sparse.csr_matrix(X), y)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_saga_sparse():
     # Test LogRegCV with solver='liblinear' works for sparse matrices
@@ -1036,13 +1067,16 @@ def test_logreg_intercept_scaling():
     # Test that the right error message is thrown when intercept_scaling <= 0
 
     for i in [-1, 0]:
-        clf = LogisticRegression(intercept_scaling=i)
+        clf = LogisticRegression(intercept_scaling=i, solver='liblinear',
+                                 multi_class='ovr')
         msg = ('Intercept scaling is %r but needs to be greater than 0.'
                ' To disable fitting an intercept,'
                ' set fit_intercept=False.' % clf.intercept_scaling)
         assert_raise_message(ValueError, msg, clf.fit, X, Y1)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_logreg_intercept_scaling_zero():
     # Test that intercept_scaling is ignored when fit_intercept is False
 
@@ -1063,12 +1097,12 @@ def test_logreg_l1():
     X_constant = np.ones(shape=(n_samples, 2))
     X = np.concatenate((X, X_noise, X_constant), axis=1)
     lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
-                                      fit_intercept=False,
+                                      fit_intercept=False, multi_class='ovr',
                                       tol=1e-10)
     lr_liblinear.fit(X, y)
 
     lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga',
-                                 fit_intercept=False,
+                                 fit_intercept=False, multi_class='ovr',
                                  max_iter=1000, tol=1e-10)
     lr_saga.fit(X, y)
     assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
@@ -1094,12 +1128,12 @@ def test_logreg_l1_sparse_data():
     X = sparse.csr_matrix(X)
 
     lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
-                                      fit_intercept=False,
+                                      fit_intercept=False, multi_class='ovr',
                                       tol=1e-10)
     lr_liblinear.fit(X, y)
 
     lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga',
-                                 fit_intercept=False,
+                                 fit_intercept=False, multi_class='ovr',
                                  max_iter=1000, tol=1e-10)
     lr_saga.fit(X, y)
     assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
@@ -1110,19 +1144,20 @@ def test_logreg_l1_sparse_data():
 
     # Check that solving on the sparse and dense data yield the same results
     lr_saga_dense = LogisticRegression(penalty="l1", C=1.0, solver='saga',
-                                       fit_intercept=False,
+                                       fit_intercept=False, multi_class='ovr',
                                        max_iter=1000, tol=1e-10)
     lr_saga_dense.fit(X.toarray(), y)
     assert_array_almost_equal(lr_saga.coef_, lr_saga_dense.coef_)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_logreg_cv_penalty():
     # Test that the correct penalty is passed to the final fit.
     X, y = make_classification(n_samples=50, n_features=20, random_state=0)
-    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
+    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='saga')
     lr_cv.fit(X, y)
-    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
+    lr = LogisticRegression(penalty="l1", C=1.0, solver='saga')
     lr.fit(X, y)
     assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
 

From a9219e821585537b6f1fe180feb7a20dcce91b73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sat, 28 Jul 2018 13:50:48 +0200
Subject: [PATCH 04/17] fix more tests

---
 sklearn/ensemble/tests/test_bagging.py        |  6 ++++
 .../ensemble/tests/test_voting_classifier.py  | 31 ++++++++++++++++++-
 .../tests/test_from_model.py                  |  5 ++-
 .../linear_model/tests/test_randomized_l1.py  |  5 +++
 sklearn/linear_model/tests/test_sag.py        | 19 +++++++-----
 sklearn/metrics/tests/test_score_objects.py   |  4 +++
 .../model_selection/tests/test_validation.py  | 18 ++++++++++-
 sklearn/svm/tests/test_bounds.py              |  3 +-
 sklearn/svm/tests/test_sparse.py              |  3 ++
 sklearn/svm/tests/test_svm.py                 |  6 ++++
 sklearn/tests/test_multiclass.py              |  4 +++
 sklearn/tests/test_multioutput.py             | 16 +++++++++-
 sklearn/tests/test_pipeline.py                |  7 +++++
 sklearn/utils/tests/test_class_weight.py      |  3 ++
 14 files changed, 118 insertions(+), 12 deletions(-)

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 505ec2f17b248..608df3dc43bce 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -293,6 +293,8 @@ def test_bootstrap_features():
         assert_greater(boston.data.shape[1], np.unique(features).shape[0])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_probability():
     # Predict probabilities.
     rng = check_random_state(0)
@@ -712,6 +714,8 @@ def test_oob_score_consistency():
     assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_estimators_samples():
     # Check that format of estimators_samples_ is correct and that results
     # generated at fit time can be identically reproduced at a later time
@@ -748,6 +752,8 @@ def test_estimators_samples():
     assert_array_almost_equal(orig_coefs, new_coefs)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_estimators_samples_deterministic():
     # This test is a regression test to check that with a random step
     # (e.g. SparseRandomProjection) and a given random state, the results
diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py
index f5bfdbd101beb..c480d8381f651 100644
--- a/sklearn/ensemble/tests/test_voting_classifier.py
+++ b/sklearn/ensemble/tests/test_voting_classifier.py
@@ -28,6 +28,8 @@
 X, y = iris.data[:, 1:3], iris.target
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_estimator_init():
     eclf = VotingClassifier(estimators=[])
     msg = ('Invalid `estimators` attribute, `estimators` should be'
@@ -59,6 +61,8 @@ def test_estimator_init():
     assert_raise_message(ValueError, msg, eclf.fit, X, y)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_predictproba_hardvoting():
     eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                         ('lr2', LogisticRegression())],
@@ -67,6 +71,8 @@ def test_predictproba_hardvoting():
     assert_raise_message(AttributeError, msg, eclf.predict_proba, X)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_notfitted():
     eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                         ('lr2', LogisticRegression())],
@@ -76,6 +82,8 @@ def test_notfitted():
     assert_raise_message(NotFittedError, msg, eclf.predict_proba, X)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_majority_label_iris():
     """Check classification by majority label on dataset iris."""
@@ -92,7 +100,8 @@ def test_majority_label_iris():
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_tie_situation():
     """Check voting classifier selects smaller class label in tie situation."""
-    clf1 = LogisticRegression(random_state=123)
+    clf1 = LogisticRegression(random_state=123, multi_class='ovr',
+                              solver='liblinear')
     clf2 = RandomForestClassifier(random_state=123)
     eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
                             voting='hard')
@@ -101,6 +110,8 @@ def test_tie_situation():
     assert_equal(eclf.fit(X, y).predict(X)[73], 1)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_weights_iris():
     """Check classification by average probabilities on dataset iris."""
@@ -115,6 +126,8 @@ def test_weights_iris():
     assert_almost_equal(scores.mean(), 0.93, decimal=2)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_predict_on_toy_problem():
     """Manually check predicted class labels for toy dataset."""
@@ -148,6 +161,8 @@ def test_predict_on_toy_problem():
     assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_predict_proba_on_toy_problem():
     """Calculate predicted probabilities on toy dataset."""
@@ -216,6 +231,8 @@ def test_multilabel():
         return
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_gridsearch():
     """Check GridSearch support."""
@@ -234,6 +251,8 @@ def test_gridsearch():
     grid.fit(iris.data, iris.target)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_parallel_fit():
     """Check parallel backend of VotingClassifier on toy dataset."""
@@ -256,6 +275,8 @@ def test_parallel_fit():
     assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_sample_weight():
     """Tests sample_weight parameter of VotingClassifier"""
@@ -300,6 +321,8 @@ def fit(self, X, y, *args, **sample_weight):
     eclf.fit(X, y, sample_weight=np.ones((len(y),)))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_set_params():
     """set_params should be able to set estimators"""
@@ -335,6 +358,8 @@ def test_set_params():
                  eclf1.get_params()["lr"].get_params()['C'])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_set_estimator_none():
     """VotingClassifier set_params should be able to set estimators as None"""
@@ -390,6 +415,8 @@ def test_set_estimator_none():
     assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_estimator_weights_format():
     # Test estimator weights inputs as list and array
@@ -408,6 +435,8 @@ def test_estimator_weights_format():
     assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore:The default value of n_estimators')
 def test_transform():
     """Check transform method of VotingClassifier on toy dataset."""
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index e6bb76c5e19a9..47e62eb8e7168 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -8,7 +8,6 @@
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import skip_if_32bit
@@ -178,6 +177,8 @@ def test_feature_importances():
         assert_array_almost_equal(X_new, X[:, feature_mask])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_sample_weight():
     # Ensure sample weights are passed to underlying estimator
     X, y = datasets.make_classification(
@@ -214,6 +215,8 @@ def test_coef_default_threshold():
     assert_array_almost_equal(X_new, X[:, mask])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @skip_if_32bit
 def test_2d_coef():
     X, y = datasets.make_classification(
diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py
index 564fbd4e7827d..f9d1ff10e3a37 100644
--- a/sklearn/linear_model/tests/test_randomized_l1.py
+++ b/sklearn/linear_model/tests/test_randomized_l1.py
@@ -4,6 +4,7 @@
 from tempfile import mkdtemp
 import shutil
 
+import pytest
 import numpy as np
 from scipy import sparse
 
@@ -132,6 +133,8 @@ def test_randomized_lasso_precompute():
         assert_array_equal(feature_scores_1, feature_scores_2)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @ignore_warnings(category=DeprecationWarning)
 def test_randomized_logistic():
     # Check randomized sparse logistic regression
@@ -162,6 +165,8 @@ def test_randomized_logistic():
     assert_raises(ValueError, clf.fit, X, y)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @ignore_warnings(category=DeprecationWarning)
 def test_randomized_logistic_sparse():
     # Check randomized sparse logistic regression on sparse data
diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py
index ca99a81a73963..8f4dbc8794fc8 100644
--- a/sklearn/linear_model/tests/test_sag.py
+++ b/sklearn/linear_model/tests/test_sag.py
@@ -247,7 +247,8 @@ def test_classifier_matching():
             n_iter = 300
         clf = LogisticRegression(solver=solver, fit_intercept=fit_intercept,
                                  tol=1e-11, C=1. / alpha / n_samples,
-                                 max_iter=n_iter, random_state=10)
+                                 max_iter=n_iter, random_state=10,
+                                 multi_class='ovr')
         clf.fit(X, y)
 
         weights, intercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
@@ -311,11 +312,12 @@ def test_sag_pobj_matches_logistic_regression():
 
     clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
                               C=1. / alpha / n_samples, max_iter=max_iter,
-                              random_state=10)
+                              random_state=10, multi_class='ovr')
     clf2 = clone(clf1)
     clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
                               C=1. / alpha / n_samples, max_iter=max_iter,
-                              random_state=10)
+                              random_state=10, multi_class='ovr',
+                              solver='lbfgs')
 
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -507,7 +509,7 @@ def test_sag_classifier_computed_correctly():
 
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=n_iter, tol=tol, random_state=77,
-                              fit_intercept=fit_intercept)
+                              fit_intercept=fit_intercept, multi_class='ovr')
     clf2 = clone(clf1)
 
     clf1.fit(X, y)
@@ -547,7 +549,7 @@ def test_sag_multiclass_computed_correctly():
 
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=max_iter, tol=tol, random_state=77,
-                              fit_intercept=fit_intercept)
+                              fit_intercept=fit_intercept, multi_class='ovr')
     clf2 = clone(clf1)
 
     clf1.fit(X, y)
@@ -591,6 +593,7 @@ def test_sag_multiclass_computed_correctly():
         assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_classifier_results():
     """tests if classifier results match target"""
     alpha = .1
@@ -634,7 +637,7 @@ def test_binary_classifier_class_weight():
     class_weight = {1: .45, -1: .55}
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=n_iter, tol=tol, random_state=77,
-                              fit_intercept=fit_intercept,
+                              fit_intercept=fit_intercept, multi_class='ovr',
                               class_weight=class_weight)
     clf2 = clone(clf1)
 
@@ -681,7 +684,7 @@ def test_multiclass_classifier_class_weight():
 
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=max_iter, tol=tol, random_state=77,
-                              fit_intercept=fit_intercept,
+                              fit_intercept=fit_intercept, multi_class='ovr',
                               class_weight=class_weight)
     clf2 = clone(clf1)
     clf1.fit(X, y)
@@ -728,6 +731,7 @@ def test_multiclass_classifier_class_weight():
         assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_classifier_single_class():
     """tests if ValueError is thrown with only one class"""
     X = [[1, 2], [3, 4]]
@@ -740,6 +744,7 @@ def test_classifier_single_class():
                          X, y)
 
 
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_step_size_alpha_error():
     X = [[0, 0], [0, 0]]
     y = [1, -1]
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index f418a9375d993..da04b4215dce0 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -334,6 +334,8 @@ def test_regression_scorers():
     assert_almost_equal(score1, score2)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_thresholded_scorers():
     # Test scorers that take thresholds.
     X, y = make_blobs(random_state=0, centers=2)
@@ -504,6 +506,8 @@ def test_scorer_memmap_input(name):
     check_scorer_memmap(name)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_scoring_is_not_metric():
     assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                          LogisticRegression(), f1_score)
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index bb7e736eb3c3b..0d7a05f39d714 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -790,6 +790,8 @@ def test_cross_val_score_multilabel():
     assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict():
     boston = load_boston()
@@ -840,6 +842,8 @@ def split(self, X, y=None, groups=None):
                          X, y, method='predict_proba', cv=KFold(2))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict_decision_function_shape():
     X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
@@ -887,6 +891,8 @@ def test_cross_val_predict_decision_function_shape():
                         cv=KFold(n_splits=3), method='decision_function')
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict_predict_proba_shape():
     X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
@@ -902,6 +908,8 @@ def test_cross_val_predict_predict_proba_shape():
     assert_equal(preds.shape, (150, 3))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict_predict_log_proba_shape():
     X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
@@ -917,6 +925,8 @@ def test_cross_val_predict_predict_log_proba_shape():
     assert_equal(preds.shape, (150, 3))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict_input_types():
     iris = load_iris()
@@ -1336,6 +1346,8 @@ def check_cross_val_predict_with_method(est):
         assert_array_equal(predictions, predictions_ystr)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict_with_method():
     check_cross_val_predict_with_method(LogisticRegression())
@@ -1350,6 +1362,8 @@ def test_cross_val_predict_method_checking():
     check_cross_val_predict_with_method(est)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: The default of the `iid`')
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_gridsearchcv_cross_val_predict_with_method():
@@ -1379,11 +1393,13 @@ def get_expected_predictions(X, y, cv, classes, est, method):
     return expected_predictions
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_val_predict_class_subset():
 
     X = np.arange(200).reshape(100, 2)
-    y = np.array([x//10 for x in range(100)])
+    y = np.array([x // 10 for x in range(100)])
     classes = 10
 
     kfold3 = KFold(n_splits=3)
diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py
index d02c53b05d8b7..fffd7fc787938 100644
--- a/sklearn/svm/tests/test_bounds.py
+++ b/sklearn/svm/tests/test_bounds.py
@@ -45,7 +45,8 @@ def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None):
     min_c = l1_min_c(X, y, loss, fit_intercept, intercept_scaling)
 
     clf = {
-        'log': LogisticRegression(penalty='l1'),
+        'log': LogisticRegression(penalty='l1', solver='liblinear',
+                                  multi_class='ovr'),
         'squared_hinge': LinearSVC(loss='squared_hinge',
                                    penalty='l1', dual=False),
     }[loss]
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 5fa83050a98f1..8a3ac8423f7d6 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -1,3 +1,4 @@
+import pytest
 import numpy as np
 from scipy import sparse
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
@@ -234,6 +235,8 @@ def test_linearsvc_iris():
     assert_array_equal(pred, sp_clf.predict(iris.data))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_weight():
     # Test class weights
     X_, y_ = make_classification(n_samples=200, n_features=100,
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 6187a08f7b757..4a8e4ef735888 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -5,6 +5,8 @@
 """
 import numpy as np
 import itertools
+import pytest
+
 from numpy.testing import assert_array_equal, assert_array_almost_equal
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_allclose
@@ -403,6 +405,8 @@ def test_svr_predict():
     assert_array_almost_equal(dec.ravel(), reg.predict(X).ravel())
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_weight():
     # Test class weights
     clf = svm.SVC(gamma='scale', class_weight={1: 0.1})
@@ -442,6 +446,8 @@ def test_sample_weights():
     assert_array_almost_equal(dual_coef_no_weight, clf.dual_coef_)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @ignore_warnings(category=UndefinedMetricWarning)
 def test_auto_weight():
     # Test class weights for imbalanced data
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 08c3b9f01e163..130c43b3ebeb2 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -187,6 +187,8 @@ def test_ovr_fit_predict_sparse():
         assert_array_equal(dec_pred, clf_sprs.predict(X_test).toarray())
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_ovr_always_present():
     # Test that ovr works with classes that are always present or absent.
     # Note: tests is the case where _ConstantPredictor is utilised
@@ -244,6 +246,8 @@ def test_ovr_multiclass():
         assert_array_equal(y_pred, [0, 0, 1])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_ovr_binary():
     # Toy dataset where features correspond directly to labels.
     X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]])
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 83e3794d78870..1eb5a7e48f823 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -1,5 +1,6 @@
 from __future__ import division
 
+import pytest
 import numpy as np
 import scipy.sparse as sp
 
@@ -277,7 +278,8 @@ def test_multiclass_multioutput_estimator_predict_proba():
 
     Y = np.concatenate([y1, y2], axis=1)
 
-    clf = MultiOutputClassifier(LogisticRegression(random_state=seed))
+    clf = MultiOutputClassifier(LogisticRegression(
+        multi_class='ovr', solver='liblinear', random_state=seed))
 
     clf.fit(X, Y)
 
@@ -383,6 +385,8 @@ def test_classifier_chain_fit_and_predict_with_linear_svc():
     assert not hasattr(classifier_chain, 'predict_proba')
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_classifier_chain_fit_and_predict_with_sparse_data():
     # Fit classifier chain with sparse data
     X, Y = generate_multilabel_dataset_with_correlations()
@@ -399,6 +403,8 @@ def test_classifier_chain_fit_and_predict_with_sparse_data():
     assert_array_equal(Y_pred_sparse, Y_pred_dense)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_classifier_chain_vs_independent_models():
     # Verify that an ensemble of classifier chains (each of length
     # N) can achieve a higher Jaccard similarity score than N independent
@@ -421,6 +427,8 @@ def test_classifier_chain_vs_independent_models():
                    jaccard_similarity_score(Y_test, Y_pred_ovr))
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_base_chain_fit_and_predict():
     # Fit base chain and verify predict performance
     X, Y = generate_multilabel_dataset_with_correlations()
@@ -440,6 +448,8 @@ def test_base_chain_fit_and_predict():
     assert isinstance(chains[1], ClassifierMixin)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_base_chain_fit_and_predict_with_sparse_data_and_cv():
     # Fit base chain with sparse data cross_val_predict
     X, Y = generate_multilabel_dataset_with_correlations()
@@ -452,6 +462,8 @@ def test_base_chain_fit_and_predict_with_sparse_data_and_cv():
         assert_equal(Y_pred.shape, Y.shape)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_base_chain_random_order():
     # Fit base chain with random order
     X, Y = generate_multilabel_dataset_with_correlations()
@@ -472,6 +484,8 @@ def test_base_chain_random_order():
             assert_array_almost_equal(est1.coef_, est2.coef_)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_base_chain_crossval_fit_and_predict():
     # Fit chain with cross_val_predict and verify predict
     # performance
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index a36d3e17e31e9..8a15238ede1d3 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -6,6 +6,7 @@
 import shutil
 import time
 
+import pytest
 import numpy as np
 from scipy import sparse
 
@@ -234,6 +235,8 @@ def test_pipeline_init_tuple():
     pipe.score(X)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_pipeline_methods_anova():
     # Test the various methods of the pipeline (anova).
     iris = load_iris()
@@ -784,6 +787,8 @@ def test_feature_union_feature_names():
                          'get_feature_names', ft.get_feature_names)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_classes_property():
     iris = load_iris()
     X = iris.data
@@ -887,6 +892,8 @@ def test_step_name_validation():
                                  [[1]], [1])
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_set_params_nested_pipeline():
     estimator = Pipeline([
         ('a', Pipeline([
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index 1dfedad9bcd7c..c2d03595fb860 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 from sklearn.linear_model import LogisticRegression
 from sklearn.datasets import make_blobs
@@ -65,6 +66,8 @@ def test_compute_class_weight_dict():
                          classes, y)
 
 
+@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
+@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 def test_compute_class_weight_invariance():
     # Test that results with class_weight="balanced" is invariant wrt
     # class imbalance if the number of samples is identical.

From 284ecaa67e646464599fa61ca9dc1e391736cd6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sun, 29 Jul 2018 09:57:27 +0200
Subject: [PATCH 05/17] fix doctests

---
 sklearn/ensemble/voting_classifier.py | 3 ++-
 sklearn/linear_model/sag.py           | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index 7ce8bcd80aa46..56ba292396d15 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -90,7 +90,8 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
     >>> from sklearn.linear_model import LogisticRegression
     >>> from sklearn.naive_bayes import GaussianNB
     >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-    >>> clf1 = LogisticRegression(random_state=1)
+    >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
+    ...                           random_state=1)
     >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
     >>> clf3 = GaussianNB()
     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index 122afa5422896..eb130e1a091bd 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -212,12 +212,13 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
 
     >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     >>> y = np.array([1, 1, 2, 2])
-    >>> clf = linear_model.LogisticRegression(solver='sag')
+    >>> clf = linear_model.LogisticRegression(
+    ...     solver='sag', multi_class='multinomial')
     >>> clf.fit(X, y)
     ... #doctest: +NORMALIZE_WHITESPACE
     LogisticRegression(C=1.0, class_weight=None, dual=False,
         fit_intercept=True, intercept_scaling=1, max_iter=100,
-        multi_class='warn', n_jobs=1, penalty='l2', random_state=None,
+        multi_class='multinomial', n_jobs=1, penalty='l2', random_state=None,
         solver='sag', tol=0.0001, verbose=0, warm_start=False)
 
     References

From cc5321219d33ad891ac9e09ab54fe36cc3043aeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sun, 29 Jul 2018 10:31:47 +0200
Subject: [PATCH 06/17] remove 'auto' solver

---
 sklearn/linear_model/logistic.py              | 37 ++++++-------------
 sklearn/linear_model/randomized_l1.py         |  3 +-
 sklearn/linear_model/tests/test_logistic.py   | 14 +------
 .../linear_model/tests/test_randomized_l1.py  |  5 ---
 4 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 0d5cec93a3282..7a464b0c5e55b 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -429,12 +429,15 @@ def _check_solver_option(solver, multi_class, penalty, dual,
 
     # default values raises a future warning
     if solver == 'warn':
-        # solver will eventually change back to previous_default_solver, but we
-        # warn only if the 'auto' solver would have selected a different solver
-        solver = 'auto'
-        warn_solver = True
-    else:
-        warn_solver = False
+        # previous_default_solver is used since LogisticRegression and
+        # LogisticRegressionCV don't have the same default in 0.19.
+        solver = previous_default_solver
+
+        # Do not warn if the 'auto' solver selects the previous default solver
+        if previous_default_solver != 'lbfgs':
+            warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. "
+                          "Use a specific solver to silence this warning.",
+                          FutureWarning)
 
     if multi_class == 'warn':
         multi_class = 'ovr'
@@ -442,13 +445,11 @@ def _check_solver_option(solver, multi_class, penalty, dual,
                       " 0.22. Use a specific option to silence this warning.",
                       FutureWarning)
 
-    # multi_class checks
     if multi_class not in ['multinomial', 'ovr']:
         raise ValueError("multi_class should be either multinomial or "
                          "ovr, got %s." % multi_class)
 
-    # solver checks
-    all_solvers = ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga', 'auto']
+    all_solvers = ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']
     if solver not in all_solvers:
         raise ValueError("Logistic Regression supports only solvers in %s, got"
                          " %s." % (all_solvers, solver))
@@ -458,22 +459,6 @@ def _check_solver_option(solver, multi_class, penalty, dual,
         raise ValueError("Logistic Regression supports only penalties in %s,"
                          " got %s." % (all_penalties, penalty))
 
-    if solver == 'auto':
-        if penalty == 'l1':
-            solver = 'saga'
-        else:
-            solver = 'lbfgs'
-
-    if warn_solver and solver != previous_default_solver:
-        # Do not warn if the 'auto' solver selects the previous default solver
-
-        # previous_default_solver is used since LogisticRegression and
-        # LogisticRegressionCV don't have the same default in 0.19.
-        solver = previous_default_solver
-        warnings.warn("Default solver will be changed to 'auto' in 0.22. "
-                      "Use a specific solver to silence this warning.",
-                      FutureWarning)
-
     # Compatibility checks
     if multi_class == 'multinomial' and solver == 'liblinear':
         raise ValueError("Solver %s does not support "
@@ -1396,7 +1381,7 @@ def predict_proba(self, X):
 
         # This check can be removed in 0.22, changing back to self.multi_class
         _, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual)
+            self.solver, self.multi_class, self.penalty, self.dual, 'lbfgs')
 
         if multi_class == "ovr":
             return super(LogisticRegression, self)._predict_proba_lr(X)
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
index f75a59db5e760..c1e06ba64a3b2 100644
--- a/sklearn/linear_model/randomized_l1.py
+++ b/sklearn/linear_model/randomized_l1.py
@@ -380,7 +380,8 @@ def _randomized_logistic(X, y, weights, mask, C=1., verbose=False,
     for this_C, this_scores in zip(C, scores.T):
         # XXX : would be great to do it with a warm_start ...
         clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False,
-                                 fit_intercept=fit_intercept)
+                                 fit_intercept=fit_intercept,
+                                 solver='liblinear', multi_class='ovr')
         clf.fit(X, y)
         this_scores[:] = np.any(
             np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0)
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 12db09cc0acb4..ae0f59ea24603 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -207,7 +207,7 @@ def test_check_solver_option(LR):
     X, y = iris.data, iris.target
 
     msg = ("Logistic Regression supports only solvers in ['liblinear', "
-           "'newton-cg', 'lbfgs', 'sag', 'saga', 'auto'], got wrong_name.")
+           "'newton-cg', 'lbfgs', 'sag', 'saga'], got wrong_name.")
     lr = LR(solver="wrong_name", multi_class="ovr")
     assert_raise_message(ValueError, msg, lr.fit, X, y)
 
@@ -241,7 +241,7 @@ def test_logistic_regression_warnings(model, params, warn_solver):
     clf_multi_class_warning = model(solver='lbfgs', **params)
     clf_no_warnings = model(solver='lbfgs', multi_class='ovr', **params)
 
-    solver_warning_msg = "Default solver will be changed to 'auto'"
+    solver_warning_msg = "Default solver will be changed to 'lbfgs'"
     multi_class_warning_msg = "Default multi_class will be changed to 'multi"
 
     if warn_solver:
@@ -255,16 +255,6 @@ def test_logistic_regression_warnings(model, params, warn_solver):
     assert_no_warnings(clf_no_warnings.fit, iris.data, iris.target)
 
 
-@pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
-@pytest.mark.parametrize('penalty', ['l1', 'l2'])
-@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
-@pytest.mark.parametrize('model', [LogisticRegression, LogisticRegressionCV])
-def test_logistic_regression_auto(penalty, multi_class, model):
-    # Test logistic regression with auto mode
-    clf = model(penalty=penalty, multi_class=multi_class, solver='auto')
-    clf.fit(iris.data, iris.target)
-
-
 @pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
 def test_multinomial_binary(solver):
     # Test multinomial LR on a binary problem.
diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py
index f9d1ff10e3a37..564fbd4e7827d 100644
--- a/sklearn/linear_model/tests/test_randomized_l1.py
+++ b/sklearn/linear_model/tests/test_randomized_l1.py
@@ -4,7 +4,6 @@
 from tempfile import mkdtemp
 import shutil
 
-import pytest
 import numpy as np
 from scipy import sparse
 
@@ -133,8 +132,6 @@ def test_randomized_lasso_precompute():
         assert_array_equal(feature_scores_1, feature_scores_2)
 
 
-@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
-@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @ignore_warnings(category=DeprecationWarning)
 def test_randomized_logistic():
     # Check randomized sparse logistic regression
@@ -165,8 +162,6 @@ def test_randomized_logistic():
     assert_raises(ValueError, clf.fit, X, y)
 
 
-@pytest.mark.filterwarnings('ignore: Default solver will be changed')  # 0.22
-@pytest.mark.filterwarnings('ignore: Default multi_class will')  # 0.22
 @ignore_warnings(category=DeprecationWarning)
 def test_randomized_logistic_sparse():
     # Check randomized sparse logistic regression on sparse data

From 57b83649a700840f610e2b6e98a8a21f045e7dbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sun, 29 Jul 2018 10:38:32 +0200
Subject: [PATCH 07/17] add whats_new entry

---
 doc/whats_new/v0.20.rst | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 7636b5fe8926b..cb934384c1583 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -195,7 +195,7 @@ Decomposition, manifold learning and clustering
   :issue:`6374` by :user:`John Kirkham <jakirkham>`.
 
 - :class:`decomposition.SparsePCA` now exposes ``normalize_components``. When
-  set to True, the train and test data are centered with the train mean 
+  set to True, the train and test data are centered with the train mean
   repsectively during the fit phase and the transform phase. This fixes the
   behavior of SparsePCA. When set to False, which is the default, the previous
   abnormal behaviour still holds. The False value is for backward
@@ -749,7 +749,7 @@ Preprocessing
 Feature selection
 
 - Fixed computation of ``n_features_to_compute`` for edge case with tied CV
-  scores in :class:`feature_selection.RFECV`. :issue:`9222` by `Nick Hoh
+  scores in :class:`feature_selection.RFECV`. :issue:`9222` by :user:`Nick Hoh
   <nickypie>`.
 
 Model evaluation and meta-estimators
@@ -806,6 +806,12 @@ Linear, kernelized and related models
   :class:`linear_model.LogisticRegression` when ``verbose`` is set to 0.
   :issue:`10881` by :user:`Alexandre Sevin <AlexandreSev>`.
 
+- The default values of the ``solver`` and ``multi_class`` parameters of
+  :class:`linear_model.LogisticRegression` will change respectively from
+  ``'liblinear'`` and ``'ovr'`` in version 0.20 to ``'lbfgs'`` and
+  ``'multinomial'`` in version 0.22. A FutureWarning is raised when the default
+  value is used. :issue:`11476` by `Tom Dupre la Tour`_.
+
 Preprocessing
 
 - Deprecate ``n_values`` and ``categorical_features`` parameters and
@@ -914,7 +920,7 @@ Outlier Detection models
 
 - Novelty detection with :class:`neighbors.LocalOutlierFactor`:
   Add a ``novelty`` parameter to :class:`neighbors.LocalOutlierFactor`. When
-  ``novelty`` is set to True, :class:`neighbors.LocalOutlierFactor` can then 
+  ``novelty`` is set to True, :class:`neighbors.LocalOutlierFactor` can then
   be used for novelty detection, i.e. predict on new unseen data. Available
   prediction methods are ``predict``, ``decision_function`` and
   ``score_samples``. By default, ``novelty`` is set to ``False``, and only

From 1e219835275dc238d8b6b5768dd41388b1379fa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sun, 29 Jul 2018 12:54:02 +0200
Subject: [PATCH 08/17] fix doctests

---
 doc/modules/ensemble.rst                              | 10 ++++++----
 .../statistical_inference/supervised_learning.rst     | 11 ++++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 85fd8a30ba03f..1b5c58083e00e 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -964,7 +964,8 @@ The following example shows how to fit the majority rule classifier::
    >>> iris = datasets.load_iris()
    >>> X, y = iris.data[:, 1:3], iris.target
 
-   >>> clf1 = LogisticRegression(random_state=1)
+   >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
+   ...                           random_state=1)
    >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
    >>> clf3 = GaussianNB()
 
@@ -973,10 +974,10 @@ The following example shows how to fit the majority rule classifier::
    >>> for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']):
    ...     scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
    ...     print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
-   Accuracy: 0.90 (+/- 0.05) [Logistic Regression]
+   Accuracy: 0.95 (+/- 0.04) [Logistic Regression]
    Accuracy: 0.94 (+/- 0.04) [Random Forest]
    Accuracy: 0.91 (+/- 0.04) [naive Bayes]
-   Accuracy: 0.95 (+/- 0.05) [Ensemble]
+   Accuracy: 0.95 (+/- 0.04) [Ensemble]
 
 
 Weighted Average Probabilities (Soft Voting)
@@ -1049,7 +1050,8 @@ The `VotingClassifier` can also be used together with `GridSearch` in order
 to tune the hyperparameters of the individual estimators::
 
    >>> from sklearn.model_selection import GridSearchCV
-   >>> clf1 = LogisticRegression(random_state=1)
+   >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
+   ...                           random_state=1)
    >>> clf2 = RandomForestClassifier(random_state=1)
    >>> clf3 = GaussianNB()
    >>> eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft')
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 1c691d9cc9db4..955aed4603de5 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -368,12 +368,13 @@ function or **logistic** function:
 
 ::
 
-    >>> logistic = linear_model.LogisticRegression(C=1e5)
-    >>> logistic.fit(iris_X_train, iris_y_train)
+    >>> log = linear_model.LogisticRegression(solver='lbfgs', C=1e5,
+    ...                                       multi_class='multinomial')
+    >>> log.fit(iris_X_train, iris_y_train)  # doctest: +NORMALIZE_WHITESPACE
     LogisticRegression(C=100000.0, class_weight=None, dual=False,
-              fit_intercept=True, intercept_scaling=1, max_iter=100,
-              multi_class='warn', n_jobs=1, penalty='l2', random_state=None,
-              solver='warn', tol=0.0001, verbose=0, warm_start=False)
+        fit_intercept=True, intercept_scaling=1, max_iter=100,
+        multi_class='multinomial', n_jobs=1, penalty='l2', random_state=None,
+        solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)
 
 This is known as :class:`LogisticRegression`.
 

From 85f0ecc6bcc248ba4deff0ce0037a7768ae3ccf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Mon, 30 Jul 2018 11:56:05 +0200
Subject: [PATCH 09/17] cleaning

---
 doc/modules/linear_model.rst     |  3 --
 sklearn/linear_model/logistic.py | 77 +++++++++++++-------------------
 2 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index c65b1bbb12433..b00ade0a34f8a 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -791,9 +791,6 @@ Robust to unscaled datasets    yes          yes      yes          no     no
 The "saga" solver is often the best choice. The "liblinear" solver is
 used by default for historical reasons.
 
-The default solver will change to "auto" in version 0.22. This option
-automatically selects a solver based on the `penalty` parameter.
-
 For large dataset, you may also consider using :class:`SGDClassifier`
 with 'log' loss.
 
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 7a464b0c5e55b..3c239fa56943c 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -424,20 +424,14 @@ def hessp(v):
     return grad, hessp
 
 
-def _check_solver_option(solver, multi_class, penalty, dual,
-                         previous_default_solver='liblinear'):
+def _check_solver_option(solver, multi_class, penalty, dual):
 
-    # default values raises a future warning
+    # Default values raises a future warning
     if solver == 'warn':
-        # previous_default_solver is used since LogisticRegression and
-        # LogisticRegressionCV don't have the same default in 0.19.
-        solver = previous_default_solver
-
-        # Do not warn if the 'auto' solver selects the previous default solver
-        if previous_default_solver != 'lbfgs':
-            warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. "
-                          "Use a specific solver to silence this warning.",
-                          FutureWarning)
+        solver = 'liblinear'
+        warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. "
+                      "Use a specific solver to silence this warning.",
+                      FutureWarning)
 
     if multi_class == 'warn':
         multi_class = 'ovr'
@@ -445,6 +439,7 @@ def _check_solver_option(solver, multi_class, penalty, dual,
                       " 0.22. Use a specific option to silence this warning.",
                       FutureWarning)
 
+    # Check the string parameters
     if multi_class not in ['multinomial', 'ovr']:
         raise ValueError("multi_class should be either multinomial or "
                          "ovr, got %s." % multi_class)
@@ -477,7 +472,7 @@ def _check_solver_option(solver, multi_class, penalty, dual,
 
 def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                              max_iter=100, tol=1e-4, verbose=0,
-                             solver='warn', coef=None,
+                             solver='lbfgs', coef=None,
                              class_weight=None, dual=False, penalty='l2',
                              intercept_scaling=1., multi_class='warn',
                              random_state=None, check_input=True,
@@ -527,7 +522,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         For the liblinear and lbfgs solvers set verbose to any positive
         number for verbosity.
 
-    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga', 'auto'}
+    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}
         Numerical solver to use.
 
     coef : array-like, shape (n_features,), default None
@@ -618,16 +613,16 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     if isinstance(Cs, numbers.Integral):
         Cs = np.logspace(-4, 4, Cs)
 
+    solver, multi_class = _check_solver_option(
+        solver, multi_class, penalty, dual)
+
     # Preprocessing.
     if check_input:
         X = check_array(X, accept_sparse='csr', dtype=np.float64,
                         accept_large_sparse=solver != 'liblinear')
         y = check_array(y, ensure_2d=False, dtype=None)
         check_consistent_length(X, y)
-
-    n_samples, n_features = X.shape
-    solver, multi_class = _check_solver_option(
-        solver, multi_class, penalty, dual, 'lbfgs')
+    _, n_features = X.shape
 
     classes = np.unique(y)
     random_state = check_random_state(random_state)
@@ -805,7 +800,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
 def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
                           scoring=None, fit_intercept=False,
                           max_iter=100, tol=1e-4, class_weight=None,
-                          verbose=0, solver='warn', penalty='l2',
+                          verbose=0, solver='lbfgs', penalty='l2',
                           dual=False, intercept_scaling=1.,
                           multi_class='warn', random_state=None,
                           max_squared_sum=None, sample_weight=None):
@@ -867,7 +862,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
         For the liblinear and lbfgs solvers set verbose to any positive
         number for verbosity.
 
-    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga', 'auto'}
+    solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}
         Decides which solver to use.
 
     penalty : str, 'l1' or 'l2'
@@ -931,9 +926,8 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
     n_iter : array, shape(n_cs,)
         Actual number of iteration for each Cs.
     """
-    n_samples, n_features = X.shape
     solver, multi_class = _check_solver_option(
-        solver, multi_class, penalty, dual, 'lbfgs')
+        solver, multi_class, penalty, dual)
 
     X_train = X[train]
     X_test = X[test]
@@ -1075,8 +1069,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         instance used by `np.random`. Used when ``solver`` == 'sag' or
         'liblinear'.
 
-    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga', 'auto'},
-        default: 'liblinear'. Will be changed to 'auto' solver in 0.22.
+    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},
+        default: 'liblinear'. Will be changed to 'lbfgs' solver in 0.22.
         Algorithm to use in the optimization problem.
 
         - For small datasets, 'liblinear' is a good choice, whereas 'sag' and
@@ -1086,8 +1080,6 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
           schemes.
         - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas
           'liblinear' and 'saga' handle L1 penalty.
-        - 'auto' automatically chooses a solver based on the penalty
-          parameter.
 
         Note that 'sag' and 'saga' fast convergence is only guaranteed on
         features with approximately the same scale. You can
@@ -1097,8 +1089,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
            Stochastic Average Gradient descent solver.
         .. versionadded:: 0.19
            SAGA solver.
-        .. versionadded:: 0.20
-           'auto' solver.
+        .. versionchanged:: 0.20
+            Default will change from 'liblinear' to 'lbfgs' in 0.22.
 
     max_iter : int, default: 100
         Useful only for the newton-cg, sag and lbfgs solvers.
@@ -1114,6 +1106,8 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
 
         .. versionadded:: 0.18
            Stochastic Average Gradient descent solver for 'multinomial' case.
+        .. versionchanged:: 0.20
+            Default will change from 'ovr' to 'multinomial' in 0.22.
 
     verbose : int, default: 0
         For the liblinear and lbfgs solvers set verbose to any positive
@@ -1254,8 +1248,7 @@ def fit(self, X, y, sample_weight=None):
                              "positive; got (tol=%r)" % self.tol)
 
         solver, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual,
-            'liblinear')
+            self.solver, self.multi_class, self.penalty, self.dual)
 
         if solver in ['newton-cg']:
             _dtype = [np.float64, np.float32]
@@ -1282,7 +1275,7 @@ def fit(self, X, y, sample_weight=None):
             return self
 
         if solver in ['sag', 'saga']:
-            max_squared_sum = np.percentile(row_norms(X, squared=True), 90)
+            max_squared_sum = row_norms(X, squared=True).max()
         else:
             max_squared_sum = None
 
@@ -1379,11 +1372,7 @@ def predict_proba(self, X):
         if not hasattr(self, "coef_"):
             raise NotFittedError("Call fit before prediction")
 
-        # This check can be removed in 0.22, changing back to self.multi_class
-        _, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual, 'lbfgs')
-
-        if multi_class == "ovr":
+        if self.multi_class == "ovr" or self.multi_class == "warn":
             return super(LogisticRegression, self)._predict_proba_lr(X)
         else:
             decision = self.decision_function(X)
@@ -1476,8 +1465,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
         that can be used, look at :mod:`sklearn.metrics`. The
         default scoring option used is 'accuracy'.
 
-    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga', 'auto'},
-        default: 'lbfgs'.  Will be changed to 'auto' solver in 0.22.
+    solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'},
+        default: 'lbfgs'.
         Algorithm to use in the optimization problem.
 
         - For small datasets, 'liblinear' is a good choice, whereas 'sag' and
@@ -1489,8 +1478,6 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
           'liblinear' and 'saga' handle L1 penalty.
         - 'liblinear' might be slower in LogisticRegressionCV because it does
           not handle warm-starting.
-        - 'auto' automatically chooses a solver based on the penalty
-          parameter.
 
         Note that 'sag' and 'saga' fast convergence is only guaranteed on
         features with approximately the same scale. You can preprocess the data
@@ -1500,8 +1487,6 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
            Stochastic Average Gradient descent solver.
         .. versionadded:: 0.19
            SAGA solver.
-        .. versionadded:: 0.20
-           'auto' solver.
 
     tol : float, optional
         Tolerance for stopping criteria.
@@ -1561,6 +1546,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
 
         .. versionadded:: 0.18
            Stochastic Average Gradient descent solver for 'multinomial' case.
+        .. versionchanged:: 0.20
+            Default will change from 'ovr' to 'multinomial' in 0.22.
 
     random_state : int, RandomState instance or None, optional, default None
         If int, random_state is the seed used by the random number generator;
@@ -1621,7 +1608,7 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
 
     """
     def __init__(self, Cs=10, fit_intercept=True, cv='warn', dual=False,
-                 penalty='l2', scoring=None, solver='warn', tol=1e-4,
+                 penalty='l2', scoring=None, solver='lbfgs', tol=1e-4,
                  max_iter=100, class_weight=None, n_jobs=1, verbose=0,
                  refit=True, intercept_scaling=1., multi_class='warn',
                  random_state=None):
@@ -1663,7 +1650,7 @@ def fit(self, X, y, sample_weight=None):
         self : object
         """
         solver, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual, 'lbfgs')
+            self.solver, self.multi_class, self.penalty, self.dual)
 
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
             raise ValueError("Maximum number of iteration must be positive;"
@@ -1691,7 +1678,7 @@ def fit(self, X, y, sample_weight=None):
         encoded_labels = label_encoder.transform(label_encoder.classes_)
 
         if solver in ['sag', 'saga']:
-            max_squared_sum = np.percentile(row_norms(X, squared=True), 90)
+            max_squared_sum = row_norms(X, squared=True).max()
         else:
             max_squared_sum = None
 

From d06bad1132f2df8e6ee3edcf7bf59d0a07e3ce89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Tue, 31 Jul 2018 11:34:00 +0200
Subject: [PATCH 10/17] only warn for multi_class default if the problem is not
 binary

---
 sklearn/linear_model/logistic.py | 44 ++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 3c239fa56943c..6bb3e8245d0c7 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -424,7 +424,7 @@ def hessp(v):
     return grad, hessp
 
 
-def _check_solver_option(solver, multi_class, penalty, dual):
+def _check_solver_option(solver, multi_class, penalty, dual, classes):
 
     # Default values raises a future warning
     if solver == 'warn':
@@ -435,9 +435,11 @@ def _check_solver_option(solver, multi_class, penalty, dual):
 
     if multi_class == 'warn':
         multi_class = 'ovr'
-        warnings.warn("Default multi_class will be changed to 'multinomial' in"
-                      " 0.22. Use a specific option to silence this warning.",
-                      FutureWarning)
+        if len(classes) > 2:  # only warn if the problem is not binary
+            warnings.warn(
+                "Default multi_class will be changed to 'multinomial' in 0.22."
+                " Use a specific option to silence this warning.",
+                FutureWarning)
 
     # Check the string parameters
     if multi_class not in ['multinomial', 'ovr']:
@@ -613,13 +615,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     if isinstance(Cs, numbers.Integral):
         Cs = np.logspace(-4, 4, Cs)
 
-    solver, multi_class = _check_solver_option(
-        solver, multi_class, penalty, dual)
-
     # Preprocessing.
     if check_input:
+        accept_large_sparse = solver not in ['liblinear', 'warn']
         X = check_array(X, accept_sparse='csr', dtype=np.float64,
-                        accept_large_sparse=solver != 'liblinear')
+                        accept_large_sparse=accept_large_sparse)
         y = check_array(y, ensure_2d=False, dtype=None)
         check_consistent_length(X, y)
     _, n_features = X.shape
@@ -627,6 +627,9 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     classes = np.unique(y)
     random_state = check_random_state(random_state)
 
+    solver, multi_class = _check_solver_option(
+        solver, multi_class, penalty, dual, classes=classes)
+
     if pos_class is None and multi_class != 'multinomial':
         if (classes.size > 2):
             raise ValueError('To fit OvR, use the pos_class argument')
@@ -927,7 +930,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
         Actual number of iteration for each Cs.
     """
     solver, multi_class = _check_solver_option(
-        solver, multi_class, penalty, dual)
+        solver, multi_class, penalty, dual, classes=np.unique(y))
 
     X_train = X[train]
     X_test = X[test]
@@ -1247,20 +1250,22 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol=%r)" % self.tol)
 
-        solver, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual)
-
-        if solver in ['newton-cg']:
+        if self.solver in ['newton-cg']:
             _dtype = [np.float64, np.float32]
         else:
             _dtype = np.float64
 
+        accept_large_sparse = self.solver not in ['liblinear', 'warn']
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
-                         accept_large_sparse=solver != 'liblinear')
+                         accept_large_sparse=accept_large_sparse)
         check_classification_targets(y)
         self.classes_ = np.unique(y)
         n_samples, n_features = X.shape
 
+        solver, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual,
+            classes=self.classes_)
+
         if solver == 'liblinear':
             if self.n_jobs != 1:
                 warnings.warn("'n_jobs' > 1 does not have any effect when"
@@ -1649,9 +1654,6 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
-        solver, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual)
-
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
             raise ValueError("Maximum number of iteration must be positive;"
                              " got (max_iter=%r)" % self.max_iter)
@@ -1659,11 +1661,15 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol=%r)" % self.tol)
 
+        accept_large_sparse = self.solver not in ['liblinear', 'warn']
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
-                         order="C",
-                         accept_large_sparse=solver != 'liblinear')
+                         order="C", accept_large_sparse=accept_large_sparse)
         check_classification_targets(y)
 
+        solver, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual,
+            classes=np.unique(y))
+
         class_weight = self.class_weight
 
         # Encode for string labels

From 94e7927c97524598c246fd5e9f740e9394400c68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Thu, 9 Aug 2018 09:36:24 -0700
Subject: [PATCH 11/17] minor change

---
 doc/whats_new/v0.20.rst          | 2 --
 sklearn/linear_model/logistic.py | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 650680d5eed2a..b694b5e31ecad 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -756,8 +756,6 @@ Support for Python 3.3 has been officially dropped.
   the ``fit_predict`` method is avaiable.
   By :user:`Albert Thomas <albertcthomas>`.
 
-Preprocessing
-=======
 - |Fix| Fixed a bug in :class:`neighbors.NearestNeighbors` where fitting a
   NearestNeighbors model fails when a) the distance metric used is a
   callable and b) the input to the NearestNeighbors model is sparse.
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index f7649ccfe50f2..87165cb1fe137 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1394,7 +1394,7 @@ def predict_proba(self, X):
         if not hasattr(self, "coef_"):
             raise NotFittedError("Call fit before prediction")
 
-        if self.multi_class == "ovr" or self.multi_class == "warn":
+        if self.multi_class in ["ovr", "warn"]:
             return super(LogisticRegression, self)._predict_proba_lr(X)
         else:
             decision = self.decision_function(X)

From 73420300f2978b78d1e9f8c3a13a190c575818dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Thu, 9 Aug 2018 10:32:59 -0700
Subject: [PATCH 12/17] FIX doctests

---
 sklearn/linear_model/logistic.py | 18 ++++++++++--------
 sklearn/linear_model/sag.py      |  6 +++---
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 87165cb1fe137..36707af13d611 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1167,14 +1167,15 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
     >>> from sklearn.datasets import load_iris
     >>> from sklearn.linear_model import LogisticRegression
     >>> X, y = load_iris(return_X_y=True)
-    >>> clf = LogisticRegression(random_state=0).fit(X, y)
+    >>> clf = LogisticRegression(random_state=0, solver='lbfgs',
+    ...                          multi_class='multinomial').fit(X, y)
     >>> clf.predict(X[:2, :])
     array([0, 0])
     >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS
-    array([[8.78...e-01, 1.21...e-01, 1.079...e-05],
-           [7.97...e-01, 2.02...e-01, 3.029...e-05]])
+    array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
+           [9.7...e-01, 2.8...e-02, 3.0...e-08]])
     >>> clf.score(X, y)
-    0.96
+    0.97...
 
     See also
     --------
@@ -1630,14 +1631,15 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
     >>> from sklearn.datasets import load_iris
     >>> from sklearn.linear_model import LogisticRegressionCV
     >>> X, y = load_iris(return_X_y=True)
-    >>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)
+    >>> clf = LogisticRegressionCV(cv=5, random_state=0,
+    ...                            multi_class='multinomial').fit(X, y)
     >>> clf.predict(X[:2, :])
     array([0, 0])
     >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS
-    array([[8.72...e-01, 1.27...e-01, 5.50...e-14],
-           [6.76...e-01, 3.23...e-01, 2.11...e-13]])
+    array([[9.9...e-01, 1.4...e-03, 9.5...e-18],
+           [9.9...e-01, 3.3...e-03, 1.2...e-16]])
     >>> clf.score(X, y) # doctest: +ELLIPSIS
-    0.9266...
+    0.98...
 
     See also
     --------
diff --git a/sklearn/linear_model/sag.py b/sklearn/linear_model/sag.py
index 8d3282df08614..3e8861f26de83 100644
--- a/sklearn/linear_model/sag.py
+++ b/sklearn/linear_model/sag.py
@@ -218,9 +218,9 @@ def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
     ... #doctest: +NORMALIZE_WHITESPACE
     LogisticRegression(C=1.0, class_weight=None, dual=False,
         fit_intercept=True, intercept_scaling=1, max_iter=100,
-        multi_class='multinomial', n_jobs=None, penalty='l2', random_state=None,
-        multi_class='ovr', n_jobs=None, penalty='l2', random_state=None,
-        solver='sag', tol=0.0001, verbose=0, warm_start=False)
+        multi_class='multinomial', n_jobs=None, penalty='l2',
+        random_state=None, solver='sag', tol=0.0001, verbose=0,
+        warm_start=False)
 
     References
     ----------

From 0ccf0b9207dff5493d34ae123ff8db147081ecd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Thu, 9 Aug 2018 18:10:04 -0700
Subject: [PATCH 13/17] FIX doctest...

---
 sklearn/linear_model/logistic.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 36707af13d611..d82dbe6c9257c 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1635,9 +1635,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
     ...                            multi_class='multinomial').fit(X, y)
     >>> clf.predict(X[:2, :])
     array([0, 0])
-    >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS
-    array([[9.9...e-01, 1.4...e-03, 9.5...e-18],
-           [9.9...e-01, 3.3...e-03, 1.2...e-16]])
+    >>> clf.predict_proba(X[:2, :]).shape
+    (2, 3)
     >>> clf.score(X, y) # doctest: +ELLIPSIS
     0.98...
 

From 115d34d79b36a956f43169fb7a593030a596a811 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Fri, 10 Aug 2018 09:27:27 -0700
Subject: [PATCH 14/17] Revert "only warn for multi_class default if the
 problem is not binary"

This reverts commit d06bad1132f2df8e6ee3edcf7bf59d0a07e3ce89.
---
 sklearn/linear_model/logistic.py | 44 ++++++++++++++------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index d82dbe6c9257c..c3384e718d520 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -424,7 +424,7 @@ def hessp(v):
     return grad, hessp
 
 
-def _check_solver_option(solver, multi_class, penalty, dual, classes):
+def _check_solver_option(solver, multi_class, penalty, dual):
 
     # Default values raises a future warning
     if solver == 'warn':
@@ -435,11 +435,9 @@ def _check_solver_option(solver, multi_class, penalty, dual, classes):
 
     if multi_class == 'warn':
         multi_class = 'ovr'
-        if len(classes) > 2:  # only warn if the problem is not binary
-            warnings.warn(
-                "Default multi_class will be changed to 'multinomial' in 0.22."
-                " Use a specific option to silence this warning.",
-                FutureWarning)
+        warnings.warn("Default multi_class will be changed to 'multinomial' in"
+                      " 0.22. Use a specific option to silence this warning.",
+                      FutureWarning)
 
     # Check the string parameters
     if multi_class not in ['multinomial', 'ovr']:
@@ -617,11 +615,13 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     if isinstance(Cs, numbers.Integral):
         Cs = np.logspace(-4, 4, Cs)
 
+    solver, multi_class = _check_solver_option(
+        solver, multi_class, penalty, dual)
+
     # Preprocessing.
     if check_input:
-        accept_large_sparse = solver not in ['liblinear', 'warn']
         X = check_array(X, accept_sparse='csr', dtype=np.float64,
-                        accept_large_sparse=accept_large_sparse)
+                        accept_large_sparse=solver != 'liblinear')
         y = check_array(y, ensure_2d=False, dtype=None)
         check_consistent_length(X, y)
     _, n_features = X.shape
@@ -629,9 +629,6 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     classes = np.unique(y)
     random_state = check_random_state(random_state)
 
-    solver, multi_class = _check_solver_option(
-        solver, multi_class, penalty, dual, classes=classes)
-
     if pos_class is None and multi_class != 'multinomial':
         if (classes.size > 2):
             raise ValueError('To fit OvR, use the pos_class argument')
@@ -932,7 +929,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
         Actual number of iteration for each Cs.
     """
     solver, multi_class = _check_solver_option(
-        solver, multi_class, penalty, dual, classes=np.unique(y))
+        solver, multi_class, penalty, dual)
 
     X_train = X[train]
     X_test = X[test]
@@ -1268,22 +1265,20 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol=%r)" % self.tol)
 
-        if self.solver in ['newton-cg']:
+        solver, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual)
+
+        if solver in ['newton-cg']:
             _dtype = [np.float64, np.float32]
         else:
             _dtype = np.float64
 
-        accept_large_sparse = self.solver not in ['liblinear', 'warn']
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
-                         accept_large_sparse=accept_large_sparse)
+                         accept_large_sparse=solver != 'liblinear')
         check_classification_targets(y)
         self.classes_ = np.unique(y)
         n_samples, n_features = X.shape
 
-        solver, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual,
-            classes=self.classes_)
-
         if solver == 'liblinear':
             if self.n_jobs != 1:
                 warnings.warn("'n_jobs' > 1 does not have any effect when"
@@ -1687,6 +1682,9 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
+        solver, multi_class = _check_solver_option(
+            self.solver, self.multi_class, self.penalty, self.dual)
+
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
             raise ValueError("Maximum number of iteration must be positive;"
                              " got (max_iter=%r)" % self.max_iter)
@@ -1694,15 +1692,11 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol=%r)" % self.tol)
 
-        accept_large_sparse = self.solver not in ['liblinear', 'warn']
         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
-                         order="C", accept_large_sparse=accept_large_sparse)
+                         order="C",
+                         accept_large_sparse=solver != 'liblinear')
         check_classification_targets(y)
 
-        solver, multi_class = _check_solver_option(
-            self.solver, self.multi_class, self.penalty, self.dual,
-            classes=np.unique(y))
-
         class_weight = self.class_weight
 
         # Encode for string labels

From 91be8bf01e137c63988cda7b5277b26704c70669 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Mon, 13 Aug 2018 12:57:27 +1000
Subject: [PATCH 15/17] This is an API change, not a feature

---
 doc/whats_new/v0.20.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index b694b5e31ecad..0d21371e8c8af 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -408,12 +408,6 @@ Support for Python 3.3 has been officially dropped.
   :class:`linear_model.BayesianRidge` for weighted linear regression.
   :issue:`10111` by :user:`Peter St. John <pstjohn>`.
 
-- |Feature| The default values of the ``solver`` and ``multi_class`` parameters of
-  :class:`linear_model.LogisticRegression` will change respectively from
-  ``'liblinear'`` and ``'ovr'`` in version 0.20 to ``'lbfgs'`` and
-  ``'multinomial'`` in version 0.22. A FutureWarning is raised when the default
-  values are used. :issue:`11476` by `Tom Dupre la Tour`_.
-
 - |Fix| Fixed a bug in :func:`logistic.logistic_regression_path` to ensure
   that the returned coefficients are correct when ``multiclass='multinomial'``.
   Previously, some of the coefficients would override each other, leading to
@@ -483,6 +477,12 @@ Support for Python 3.3 has been officially dropped.
   ValueError.
   :issue:`11327` by :user:`Karan Dhingra <kdhingra307>` and `Joel Nothman`_.
 
+- |API| The default values of the ``solver`` and ``multi_class`` parameters of
+  :class:`linear_model.LogisticRegression` will change respectively from
+  ``'liblinear'`` and ``'ovr'`` in version 0.20 to ``'lbfgs'`` and
+  ``'multinomial'`` in version 0.22. A FutureWarning is raised when the default
+  values are used. :issue:`11476` by `Tom Dupre la Tour`_.
+
 - |API| Deprecate ``positive=True`` option in :class:`linear_model.Lars` as
   the underlying implementation is broken. Use :class:`linear_model.Lasso`
   instead. :issue:`9837` by `Alexandre Gramfort`_.

From bf3a7fdc4952a79af1062b75fb5ee6ad594d1261 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 17 Aug 2018 17:48:32 +0300
Subject: [PATCH 16/17] Decrease numerical precision in LogisticRegression
 doctest

---
 sklearn/linear_model/logistic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index ec057503e14b1..66e81d37cf1f5 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1170,7 +1170,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
     array([0, 0])
     >>> clf.predict_proba(X[:2, :]) # doctest: +ELLIPSIS
     array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
-           [9.7...e-01, 2.8...e-02, 3.0...e-08]])
+           [9.7...e-01, 2.8...e-02, ...e-08]])
     >>> clf.score(X, y)
     0.97...
 

From ec86d3ba2c9e621e809e92f07041b76ce56e1d63 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Mon, 20 Aug 2018 23:57:38 +0300
Subject: [PATCH 17/17] More review comments

---
 sklearn/linear_model/logistic.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 66e81d37cf1f5..dab3f4e59c368 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -430,14 +430,14 @@ def _check_solver_option(solver, multi_class, penalty, dual):
     if solver == 'warn':
         solver = 'liblinear'
         warnings.warn("Default solver will be changed to 'lbfgs' in 0.22. "
-                      "Use a specific solver to silence this warning.",
+                      "Specify a solver to silence this warning.",
                       FutureWarning)
 
     if multi_class == 'warn':
         multi_class = 'ovr'
         warnings.warn("Default multi_class will be changed to 'multinomial' in"
-                      " 0.22. Use a specific option to silence this warning.",
-                      FutureWarning)
+                      " 0.22. Specify the multi_class option to silence "
+                      "this warning.", FutureWarning)
 
     # Check the string parameters
     if multi_class not in ['multinomial', 'ovr']:
@@ -1072,7 +1072,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         'liblinear'.
 
     solver : str, {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \
-             default: 'liblinear'. Will be changed to 'lbfgs' solver in 0.22
+             default: 'liblinear'.
 
         Algorithm to use in the optimization problem.
 
@@ -1099,8 +1099,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         Useful only for the newton-cg, sag and lbfgs solvers.
         Maximum number of iterations taken for the solvers to converge.
 
-    multi_class : str, {'ovr', 'multinomial'}
-        default: 'ovr'. Will be changed to 'multinomial' in 0.22.
+    multi_class : str, {'ovr', 'multinomial'}, default: 'ovr'
         Multiclass option can be either 'ovr' or 'multinomial'. If the option
         chosen is 'ovr', then a binary problem is fit for each label. Else
         the loss minimised is the multinomial loss fit across