scikit-learn · amueller · Jan 3, 2013 · Dec 24, 2012 · Dec 24, 2012 · Dec 24, 2012
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -112,6 +112,11 @@ Changelog
      :fun:`metrics.mean_absolute_error` and
      :fun:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
 
+  - Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
+    :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
+    of ``class_weight`` was reversed as erroneously higher weight meant less
+    positives of a given class in earlier releases.
+
 API changes summary
 -------------------
    - Renamed all occurences of ``n_atoms`` to ``n_components`` for consistency.

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
@@ -311,13 +311,6 @@ class NuSVC(BaseSVC):
     cache_size : float, optional
         Specify the size of the kernel cache (in MB)
 
-    class_weight : {dict, 'auto'}, optional
-        Set the parameter C of class i to class_weight[i]*C for
-        SVC. If not given, all classes are supposed to have
-        weight one. The 'auto' mode uses the values of y to
-        automatically adjust weights inversely proportional to
-        class frequencies.
-
     verbose : bool, default: False
         Enable verbose output. Note that this setting takes advantage of a
         per-process runtime setting in libsvm that, if enabled, may not work

diff --git a/sklearn/svm/liblinear.pyx b/sklearn/svm/liblinear.pyx
@@ -11,7 +11,7 @@ cimport liblinear
 
 def train_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] X,
                np.ndarray[np.float64_t, ndim=1, mode='c'] Y,
-               int solver_type, double eps, double bias, double C, 
+               int solver_type, double eps, double bias, double C,
                np.ndarray[np.float64_t, ndim=1] class_weight,
                unsigned random_seed):
     """
@@ -35,12 +35,12 @@ def train_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] X,
         free_problem(problem)
         free_parameter(param)
         raise ValueError(error_msg)
- 
+
     # early return
     model = train(problem, param)
 
     # coef matrix holder created as fortran since that's what's used in liblinear
-    cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w  
+    cdef np.ndarray[np.float64_t, ndim=2, mode='fortran'] w
     cdef int nr_class = get_nr_class(model)
     cdef int nr_feature = get_nr_feature(model)
     if bias > 0: nr_feature = nr_feature + 1
@@ -49,7 +49,7 @@ def train_wrap(np.ndarray[np.float64_t, ndim=2, mode='c'] X,
         copy_w(w.data, model, nr_feature)
     else:
         len_w = (nr_class) * nr_feature
-        w = np.empty((nr_class, nr_feature),order='F') 
+        w = np.empty((nr_class, nr_feature),order='F')
         copy_w(w.data, model, len_w)
 
     ### FREE

diff --git a/sklearn/svm/sparse/classes.py b/sklearn/svm/sparse/classes.py
@@ -1,3 +1,5 @@
+import warnings
+
 from .. import LinearSVC as GeneralLinearSVC
 from ..base import BaseSVC
 from ...base import RegressorMixin
@@ -71,14 +73,15 @@ class NuSVC(SparseBaseLibSVM, BaseSVC):
     [1]
     """
 
-    def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma=0.0,
-                 coef0=0.0, shrinking=True, probability=False,
-                 tol=1e-3, cache_size=200, class_weight=None,
-                 verbose=False, max_iter=-1):
-
+    def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0,
+                 shrinking=True, probability=False, tol=1e-3, cache_size=200,
+                 class_weight=None, verbose=False, max_iter=-1):
+        if class_weight is not None:
+            warnings.warn("Parameter class_weight is not supported in NuSVC "
+                          "and will be ignored.", stacklevel=2)
         super(NuSVC, self).__init__(
             'nu_svc', kernel, degree, gamma, coef0, tol, 0., nu, 0., shrinking,
-            probability, cache_size, class_weight, verbose, max_iter)
+            probability, cache_size, None, verbose, max_iter)
 
 
 @deprecated("""to be removed in v0.14;

diff --git a/sklearn/svm/src/liblinear/linear.cpp b/sklearn/svm/src/liblinear/linear.cpp
@@ -7,6 +7,8 @@
 
    - Changes roles of +1 and -1 to match scikit API, Andreas Mueller
         See issue 546: https://github.com/scikit-learn/scikit-learn/pull/546
+   - Also changed roles for pairwise class weights, Andreas Mueller
+        See issue 1491: https://github.com/scikit-learn/scikit-learn/pull/1491
 
  */
 
@@ -2410,7 +2412,7 @@ model* train(const problem *prob, const parameter *param)
 				for(; k<sub_prob.l; k++)
 					sub_prob.y[k] = +1;
 
-				train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+				train_one(&sub_prob, param, &model_->w[0], weighted_C[1], weighted_C[0]);
 			}
 			else
 			{

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -13,6 +13,7 @@
 
 from sklearn import svm, linear_model, datasets, metrics, base
 from sklearn.datasets.samples_generator import make_classification
+from sklearn.metrics import f1_score
 from sklearn.utils import check_random_state
 from sklearn.utils import ConvergenceWarning
 from sklearn.utils.testing import assert_greater, assert_less
@@ -305,15 +306,15 @@ def test_weight():
     # so all predicted values belong to class 2
     assert_array_almost_equal(clf.predict(X), [2] * 6)
 
-    X_, y_ = make_classification(n_samples=200, n_features=100,
-                                 weights=[0.833, 0.167], random_state=0)
+    X_, y_ = make_classification(n_samples=200, n_features=10,
+                                 weights=[0.833, 0.167], random_state=2)
 
     for clf in (linear_model.LogisticRegression(),
                 svm.LinearSVC(random_state=0), svm.SVC()):
-        clf.set_params(class_weight={0: 5})
-        clf.fit(X_[: 180], y_[: 180])
-        y_pred = clf.predict(X_[180:])
-        assert_true(np.sum(y_pred == y_[180:]) >= 11)
+        clf.set_params(class_weight={0: .1, 1: 10})
+        clf.fit(X_[:100], y_[:100])
+        y_pred = clf.predict(X_[100:])
+        assert_true(f1_score(y_[100:], y_pred) > .3)
 
 
 def test_sample_weights():

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -29,7 +29,8 @@
                           TransformerMixin, ClusterMixin)
 from sklearn.utils import shuffle
 from sklearn.preprocessing import StandardScaler, Scaler
-from sklearn.datasets import load_iris, load_boston, make_blobs
+from sklearn.datasets import (load_iris, load_boston, make_blobs,
+                              make_classification)
 from sklearn.metrics import zero_one_score, adjusted_rand_score
 from sklearn.lda import LDA
 from sklearn.svm.base import BaseLibSVM
@@ -52,6 +53,9 @@
 from sklearn.isotonic import IsotonicRegression
 from sklearn.random_projection import (GaussianRandomProjection,
                                        SparseRandomProjection)
+from sklearn.metrics import f1_score
+
+from sklearn.cross_validation import train_test_split
 
 dont_test = [SparseCoder, EllipticEnvelope, EllipticEnvelop, DictVectorizer,
              LabelBinarizer, LabelEncoder, TfidfTransformer,
@@ -645,3 +649,91 @@ def test_configure():
     finally:
         sys.argv = old_argv
         os.chdir(cwd)
+
+
+def test_class_weight_classifiers():
+    # test that class_weight works and that the semantics are consistent
+    classifiers = all_estimators(type_filter='classifier')
+
+    with warnings.catch_warnings(record=True):
+        classifiers = [c for c in classifiers
+                       if 'class_weight' in c[1]().get_params().keys()]
+
+    for n_centers in [2, 3]:
+        # create a very noisy dataset
+        X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
+                                                            random_state=0)
+        for name, Clf in classifiers:
+            if name == "NuSVC":
+                # the sparse version has a parameter that doesn't do anything
+                continue
+            if name.startswith("RidgeClassifier"):
+                # RidgeClassifier shows unexpected behavior
+                # FIXME!
+                continue
+            if name.endswith("NB"):
+                # NaiveBayes classifiers have a somewhat differnt interface.
+                # FIXME SOON!
+                continue
+            if n_centers == 2:
+                class_weight = {0: 1000, 1: 0.0001}
+            else:
+                class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}
+
+            with warnings.catch_warnings(record=True):
+                clf = Clf(class_weight=class_weight)
+            if hasattr(clf, "n_iter"):
+                clf.set_params(n_iter=100)
+
+            set_random_state(clf)
+            clf.fit(X_train, y_train)
+            y_pred = clf.predict(X_test)
+            assert_greater(np.mean(y_pred == 0), 0.9)
+
+
+def test_class_weight_auto_classifies():
+    # test that class_weight="auto" improves f1-score
+    classifiers = all_estimators(type_filter='classifier')
+
+    with warnings.catch_warnings(record=True):
+        classifiers = [c for c in classifiers
+                       if 'class_weight' in c[1]().get_params().keys()]
+
+    for n_classes, weights in zip([2, 3], [[.8, .2], [.8, .1, .1]]):
+        # create unbalanced dataset
+        X, y = make_classification(n_classes=n_classes, n_samples=200,
+                                   n_features=10, weights=weights,
+                                   random_state=0, n_informative=n_classes)
+        X = StandardScaler().fit_transform(X)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
+                                                            random_state=0)
+        for name, Clf in classifiers:
+            if name == "NuSVC":
+                # the sparse version has a parameter that doesn't do anything
+                continue
+
+            if name.startswith("RidgeClassifier"):
+                # RidgeClassifier behaves unexpected
+                # FIXME!
+                continue
+
+            if name.endswith("NB"):
+                # NaiveBayes classifiers have a somewhat differnt interface.
+                # FIXME SOON!
+                continue
+
+            with warnings.catch_warnings(record=True):
+                clf = Clf()
+            if hasattr(clf, "n_iter"):
+                clf.set_params(n_iter=100)
+
+            set_random_state(clf)
+            clf.fit(X_train, y_train)
+            y_pred = clf.predict(X_test)
+
+            clf.set_params(class_weight='auto')
+            clf.fit(X_train, y_train)
+            y_pred_auto = clf.predict(X_test)
+            assert_greater(f1_score(y_test, y_pred_auto),
+                           f1_score(y_test, y_pred))