scikit-learn
diff --git a/‎sklearn/linear_model/logistic.py
Lines changed: 20 additions & 14 deletions b/‎sklearn/linear_model/logistic.py
Lines changed: 20 additions & 14 deletions
diff --git a/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 55 additions & 5 deletions b/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 55 additions & 5 deletions
diff --git a/‎sklearn/tests/test_common.py
Lines changed: 0 additions & 6 deletions b/‎sklearn/tests/test_common.py
Lines changed: 0 additions & 6 deletions
@@ -593,11 +593,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         sample_weight = np.ones(X.shape[0])
 
     # If class_weights is a dict (provided by the user), the weights
-    # are assigned to the original labels. If it is "auto", then
+    # are assigned to the original labels. If it is "balanced", then
     # the class_weights are assigned after masking the labels with a OvR.
     le = LabelEncoder()
 
-    if isinstance(class_weight, dict):
+    if isinstance(class_weight, dict) or multi_class == 'multinomial':
         if solver == "liblinear":
             if classes.size == 2:
                 # Reconstruct the weights with keys 1 and -1
@@ -609,7 +609,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
                                  "solver cannot handle multiclass with "
                                  "class_weight of type dict. Use the lbfgs, "
                                  "newton-cg or sag solvers or set "
-                                 "class_weight='auto'")
+                                 "class_weight='balanced'")
         else:
             class_weight_ = compute_class_weight(class_weight, classes, y)
             sample_weight *= class_weight_[le.fit_transform(y)]
@@ -622,20 +622,20 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
         mask = (y == pos_class)
         y_bin = np.ones(y.shape, dtype=np.float64)
         y_bin[~mask] = -1.
+        # for compute_class_weight
+
+        if class_weight in ("auto", "balanced"):
+            class_weight_ = compute_class_weight(class_weight, mask_classes,
+                                                 y_bin)
+            sample_weight *= class_weight_[le.fit_transform(y_bin)]
 
     else:
         lbin = LabelBinarizer()
-        Y_bin = lbin.fit_transform(y)
-        if Y_bin.shape[1] == 1:
-            Y_bin = np.hstack([1 - Y_bin, Y_bin])
-        w0 = np.zeros((Y_bin.shape[1], n_features + int(fit_intercept)),
+        Y_binarized = lbin.fit_transform(y)
+        if Y_binarized.shape[1] == 1:
+            Y_binarized = np.hstack([1 - Y_binarized, Y_binarized])
+        w0 = np.zeros((Y_binarized.shape[1], n_features + int(fit_intercept)),
                       order='F')
-        mask_classes = classes
-
-    if class_weight == "auto":
-        class_weight_ = compute_class_weight(class_weight, mask_classes,
-                                             y_bin)
-        sample_weight *= class_weight_[le.fit_transform(y_bin)]
 
     if coef is not None:
         # it must work both giving the bias term and not
@@ -664,7 +664,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     if multi_class == 'multinomial':
         # fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
         w0 = w0.ravel()
-        target = Y_bin
+        target = Y_binarized
         if solver == 'lbfgs':
             func = lambda x, *args: _multinomial_loss_grad(x, *args)[0:2]
         elif solver == 'newton-cg':
@@ -864,6 +864,12 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
     """
     _check_solver_option(solver, multi_class, penalty, dual, sample_weight)
 
+    # compute the class weights for the entire dataset y, and not for the fold
+    if class_weight in ("auto", "balanced"):
+        classes = np.unique(y)
+        class_weight = compute_class_weight(class_weight, classes, y)
+        class_weight = {cl: cw for (cl, cw) in zip(classes, class_weight)}
+
     X_train = X[train]
     X_test = X[test]
     y_train = y[train]
 
@@ -11,10 +11,12 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_warns
+from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import raises
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils import ConvergenceWarning
+from sklearn.utils import compute_class_weight
 
 from sklearn.linear_model.logistic import (
     LogisticRegression,
@@ -26,7 +28,6 @@
 from sklearn.datasets import load_iris, make_classification
 from sklearn.metrics import log_loss
 
-
 X = [[-1, 0], [0, 1], [1, 1]]
 X_sp = sp.csr_matrix(X)
 Y1 = [0, 1, 1]
@@ -542,12 +543,12 @@ def test_logistic_regressioncv_class_weights():
     X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                                n_classes=3, random_state=0)
 
-    # Test the liblinear fails when class_weight of type dict is
-    # provided, when it is multiclass. However it can handle
-    # binary problems.
+    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
+           "multiclass with class_weight of type dict. Use the lbfgs, "
+           "newton-cg or sag solvers or set class_weight='balanced'")
     clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                    solver='liblinear')
-    assert_raises(ValueError, clf_lib.fit, X, y)
+    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
     y_ = y.copy()
     y_[y == 2] = 1
     clf_lib.fit(X, y_)
@@ -613,6 +614,55 @@ def test_logistic_regression_sample_weights():
         assert_array_almost_equal(clf_cw_12.coef_, clf_sw_12.coef_, decimal=4)
 
 
+def _compute_class_weight_dictionary(y):
+    # helper for returning a dictionary instead of an array
+    classes = np.unique(y)
+    class_weight = compute_class_weight("balanced", classes, y)
+    class_weight_dict = {cl: cw for (cl, cw) in zip(classes, class_weight)}
+    return class_weight_dict
+
+
+def test_logistic_regression_class_weights():
+    # Multinomial case: remove 90% of class 0
+    X = iris.data[45:, :]
+    y = iris.target[45:]
+    solvers = ("lbfgs", "newton-cg")
+    class_weight_dict = _compute_class_weight_dictionary(y)
+
+    for solver in solvers:
+        clf1 = LogisticRegression(solver=solver, multi_class="multinomial",
+                                  class_weight="balanced")
+        clf2 = LogisticRegression(solver=solver, multi_class="multinomial",
+                                  class_weight=class_weight_dict)
+        clf1.fit(X, y)
+        clf2.fit(X, y)
+        assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=6)
+
+    # Binary case: remove 90% of class 0 and 100% of class 2
+    X = iris.data[45:100, :]
+    y = iris.target[45:100]
+    solvers = ("lbfgs", "newton-cg", "liblinear")
+    class_weight_dict = _compute_class_weight_dictionary(y)
+
+    for solver in solvers:
+        clf1 = LogisticRegression(solver=solver, multi_class="ovr",
+                                  class_weight="balanced")
+        clf2 = LogisticRegression(solver=solver, multi_class="ovr",
+                                  class_weight=class_weight_dict)
+        clf1.fit(X, y)
+        clf2.fit(X, y)
+        assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=6)
+
+
+def test_multinomial_logistic_regression_with_classweight_auto():
+    X, y = iris.data, iris.target
+    model = LogisticRegression(multi_class='multinomial',
+                               class_weight='auto', solver='lbfgs')
+    assert_warns_message(DeprecationWarning,
+                         "class_weight='auto' heuristic is deprecated",
+                         model.fit, X, y)
+
+
 def test_logistic_regression_convergence_warnings():
     # Test that warnings are raised if model does not converge
 
 
@@ -113,12 +113,6 @@ def test_class_weight_balanced_linear_classifiers():
                and issubclass(clazz, LinearClassifierMixin)]
 
     for name, Classifier in linear_classifiers:
-        if name == "LogisticRegressionCV":
-            # Contrary to RidgeClassifierCV, LogisticRegressionCV use actual
-            # CV folds and fit a model for each CV iteration before averaging
-            # the coef. Therefore it is expected to not behave exactly as the
-            # other linear model.
-            continue
         yield check_class_weight_balanced_linear_classifier, name, Classifier