scikit-learn · amueller · Jan 3, 2013 · Dec 11, 2012 · Dec 11, 2012 · Dec 11, 2012
diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst
@@ -55,13 +55,13 @@ It can be computed using :func:`chi2_kernel` and then passed to an
 
     >>> svm = SVC(kernel='precomputed').fit(K, y)
     >>> svm.predict(K)
-    array([ 0.,  1.,  0.,  1.])
+    array([0, 1, 0, 1])
 
 It can also be directly used as the ``kernel`` argument::
 
     >>> svm = SVC(kernel=chi2_kernel).fit(X, y)
     >>> svm.predict(X)
-    array([ 0.,  1.,  0.,  1.])
+    array([0, 1, 0, 1])
 
 
 The chi squared kernel is given by

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
@@ -90,7 +90,7 @@ training samples::
 After being fitted, the model can then be used to predict new values::
 
     >>> clf.predict([[2., 2.]])
-    array([ 1.])
+    array([1])
 
 SVMs decision function depends on some subset of the training data,
 called the support vectors. Some properties of these support vectors
@@ -471,7 +471,7 @@ vectors and the test vectors must be provided.
     shrinking=True, tol=0.001, verbose=False)
     >>> # predict on training examples
     >>> clf.predict(gram)
-    array([ 0.,  1.])
+    array([0, 1])
 
 Parameters of the RBF Kernel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
@@ -177,7 +177,7 @@ classifier what is the digit of our last image in the `digits` dataset,
 which we have not used to train the classifier::
 
   >>> clf.predict(digits.data[-1])
-  array([ 8.])
+  array([8])
 
 The corresponding image is the following:
 
@@ -214,7 +214,7 @@ persistence model, namely `pickle <http://docs.python.org/library/pickle.html>`_
   >>> s = pickle.dumps(clf)
   >>> clf2 = pickle.loads(s)
   >>> clf2.predict(X[0])
-  array([ 0.])
+  array([0])
   >>> y[0]
   0
 

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -195,6 +195,11 @@ API changes summary
      :class:`decomposition.MiniBatchDictionaryLearning` and
      :class:`decomposition.MiniBatchSparsePCA` for consistency.
 
+   - :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
+     attribute and support arbitrary dtypes for labels ``y``.
+     Also, the dtype returned by ``predict`` now reflects the dtype of
+     ``y`` during ``fit`` (used to be ``np.float``).
+
 .. _changes_0_12.1:
 
 0.12.1

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
@@ -20,6 +20,7 @@
 
 from .sgd_fast import plain_sgd as plain_sgd
 from ..utils.seq_dataset import ArrayDataset, CSRDataset
+from ..utils import compute_class_weight
 from .sgd_fast import Hinge
 from .sgd_fast import SquaredHinge
 from .sgd_fast import Log
@@ -317,31 +318,6 @@ def __init__(self, loss="hinge", penalty='l2', alpha=0.0001,
         self.classes_ = None
         self.n_jobs = int(n_jobs)
 
-    def _set_class_weight(self, class_weight, classes, y):
-        """Estimate class weights for unbalanced datasets."""
-        if class_weight is None or len(class_weight) == 0:
-            # uniform class weights
-            weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
-        elif class_weight == 'auto':
-            # proportional to the number of samples in the class
-            weight = np.array([1.0 / np.sum(y == i) for i in classes],
-                              dtype=np.float64, order='C')
-            weight *= classes.shape[0] / np.sum(weight)
-        else:
-            # user-defined dictionary
-            weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
-            if not isinstance(class_weight, dict):
-                raise ValueError("class_weight must be dict, 'auto', or None,"
-                                 " got: %r" % class_weight)
-            for c in class_weight:
-                i = np.searchsorted(classes, c)
-                if classes[i] != c:
-                    raise ValueError("Class label %d not present." % c)
-                else:
-                    weight[i] = class_weight[c]
-
-        self._expanded_class_weight = weight
-
     def _partial_fit(self, X, y, alpha, C,
                      loss, learning_rate, n_iter,
                      classes, sample_weight,
@@ -367,7 +343,8 @@ def _partial_fit(self, X, y, alpha, C,
         n_classes = self.classes_.shape[0]
 
         # Allocate datastructures from input arguments
-        self._set_class_weight(self.class_weight, self.classes_, y)
+        self._expanded_class_weight = compute_class_weight(self.class_weight,
+                                                           self.classes_, y)
         sample_weight = self._validate_sample_weight(sample_weight, n_samples)
 
         if self.coef_ is None:

diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
@@ -8,33 +8,14 @@
 from ..base import BaseEstimator, ClassifierMixin
 from ..preprocessing import LabelEncoder
 from ..utils import atleast2d_or_csr, array2d, check_random_state
+from ..utils import ConvergenceWarning, compute_class_weight
+from ..utils.fixes import unique
 from ..utils.extmath import safe_sparse_dot
-from ..utils import ConvergenceWarning
 
 
 LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr']
 
 
-def _get_class_weight(class_weight, y):
-    """Estimate class weights for unbalanced datasets."""
-    if class_weight == 'auto':
-        uy = np.unique(y)
-        weight_label = np.asarray(uy, dtype=np.int32, order='C')
-        weight = np.array([1.0 / np.sum(y == i) for i in uy],
-                          dtype=np.float64, order='C')
-        weight *= uy.shape[0] / np.sum(weight)
-    else:
-        if class_weight is None:
-            keys = values = []
-        else:
-            keys = class_weight.keys()
-            values = class_weight.values()
-        weight = np.asarray(values, dtype=np.float64, order='C')
-        weight_label = np.asarray(keys, dtype=np.int32, order='C')
-
-    return weight, weight_label
-
-
 def _one_vs_one_coef(dual_coef, n_support, support_vectors):
     """Generate primal coefficients from dual coefficients
     for the one-vs-one multi class LibSVM in the case
@@ -159,18 +140,24 @@ def fit(self, X, y, sample_weight=None):
                              "by not using the ``sparse`` parameter")
 
         X = atleast2d_or_csr(X, dtype=np.float64, order='C')
-        y = np.asarray(y, dtype=np.float64, order='C')
 
+        if self.impl in ['c_svc', 'nu_svc']:
+            # classification
+            self.classes_, y = unique(y, return_inverse=True)
+            self.class_weight_ = compute_class_weight(self.class_weight,
+                                                      self.classes_, y)
+        else:
+            self.class_weight_ = np.empty(0)
         if self.impl != "one_class" and len(np.unique(y)) < 2:
             raise ValueError("The number of classes has to be greater than"
                              " one.")
 
+        y = np.asarray(y, dtype=np.float64, order='C')
+
         sample_weight = np.asarray([]
                                    if sample_weight is None
                                    else sample_weight, dtype=np.float64)
         solver_type = LIBSVM_IMPL.index(self.impl)
-        self.class_weight_, self.class_weight_label_ = \
-            _get_class_weight(self.class_weight, y)
 
         # input validation
         if solver_type != 2 and X.shape[0] != y.shape[0]:
@@ -238,11 +225,11 @@ def _dense_fit(self, X, y, sample_weight, solver_type, kernel):
         self.support_, self.support_vectors_, self.n_support_, \
             self.dual_coef_, self.intercept_, self.label_, self.probA_, \
             self.probB_, self.fit_status_ = libsvm.fit(
-                X, y, svm_type=solver_type, sample_weight=sample_weight,
-                class_weight=self.class_weight_,
-                class_weight_label=self.class_weight_label_, kernel=kernel,
-                C=self.C, nu=self.nu, probability=self.probability,
-                degree=self.degree, shrinking=self.shrinking, tol=self.tol,
+                X, y,
+                svm_type=solver_type, sample_weight=sample_weight,
+                class_weight=self.class_weight_, kernel=kernel, C=self.C,
+                nu=self.nu, probability=self.probability, degree=self.degree,
+                shrinking=self.shrinking, tol=self.tol,
                 cache_size=self.cache_size, coef0=self.coef0,
                 gamma=self._gamma, epsilon=self.epsilon,
                 max_iter=self.max_iter)
@@ -261,7 +248,7 @@ def _sparse_fit(self, X, y, sample_weight, solver_type, kernel):
             libsvm_sparse.libsvm_sparse_train(
                 X.shape[1], X.data, X.indices, X.indptr, y, solver_type,
                 kernel_type, self.degree, self._gamma, self.coef0, self.tol,
-                self.C, self.class_weight_label_, self.class_weight_,
+                self.C, self.class_weight_,
                 sample_weight, self.nu, self.cache_size, self.epsilon,
                 int(self.shrinking), int(self.probability), self.max_iter)
 
@@ -296,7 +283,11 @@ def predict(self, X):
         """
         X = self._validate_for_predict(X)
         predict = self._sparse_predict if self._sparse else self._dense_predict
-        return predict(X)
+        y = predict(X)
+        if self.impl in ['c_svc', 'nu_svc']:
+            # classification
+            y = self.classes_.take(y.astype(np.int))
+        return y
 
     def _dense_predict(self, X):
         n_samples, n_features = X.shape
@@ -338,14 +329,17 @@ def _sparse_predict(self, X):
         C = 0.0  # C is not useful here
 
         return libsvm_sparse.libsvm_sparse_predict(
-            X.data, X.indices, X.indptr, self.support_vectors_.data,
-            self.support_vectors_.indices, self.support_vectors_.indptr,
+            X.data, X.indices, X.indptr,
+            self.support_vectors_.data,
+            self.support_vectors_.indices,
+            self.support_vectors_.indptr,
             self.dual_coef_.data, self._intercept_,
-            LIBSVM_IMPL.index(self.impl), kernel_type, self.degree,
-            self._gamma, self.coef0, self.tol, C, self.class_weight_label_,
-            self.class_weight_, self.nu, self.epsilon, self.shrinking,
-            self.probability, self.n_support_, self.label_, self.probA_,
-            self.probB_)
+            LIBSVM_IMPL.index(self.impl), kernel_type,
+            self.degree, self._gamma, self.coef0, self.tol,
+            C, self.class_weight_,
+            self.nu, self.epsilon, self.shrinking,
+            self.probability, self.n_support_, self.label_,
+            self.probA_, self.probB_)
 
     def _compute_kernel(self, X):
         """Return the data transformed by a callable kernel"""
@@ -555,7 +549,7 @@ def _sparse_predict_proba(self, X):
             self.dual_coef_.data, self._intercept_,
             LIBSVM_IMPL.index(self.impl), kernel_type,
             self.degree, self._gamma, self.coef0, self.tol,
-            self.C, self.class_weight_label_, self.class_weight_,
+            self.C, self.class_weight_,
             self.nu, self.epsilon, self.shrinking,
             self.probability, self.n_support_, self.label_,
             self.probA_, self.probB_)
@@ -664,8 +658,8 @@ def fit(self, X, y):
         X = atleast2d_or_csr(X, dtype=np.float64, order="C")
         y = np.asarray(y, dtype=np.float64).ravel()
 
-        self.class_weight_, self.class_weight_label_ = \
-            _get_class_weight(self.class_weight, y)
+        self.class_weight_ = compute_class_weight(self.class_weight,
+                                                  self.classes_, y)
 
         if X.shape[0] != y.shape[0]:
             raise ValueError("X and y have incompatible shapes.\n"
@@ -684,7 +678,7 @@ def fit(self, X, y):
             print '[LibLinear]',
         self.raw_coef_ = train(X, y, self._get_solver_type(), self.tol,
                                self._get_bias(), self.C,
-                               self.class_weight_label_, self.class_weight_,
+                               self.class_weight_,
                                # seed for srand in range [0..INT_MAX);
                                # due to limitations in Numpy on 32-bit
                                # platforms, we can't get to the UINT_MAX

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
@@ -240,7 +240,7 @@ class frequencies.
             gamma=0.0, kernel='rbf', max_iter=-1, probability=False,
             shrinking=True, tol=0.001, verbose=False)
     >>> print(clf.predict([[-0.8, -1]]))
-    [ 1.]
+    [1]
 
     See also
     --------
@@ -366,7 +366,7 @@ class frequencies.
             max_iter=-1, nu=0.5, probability=False, shrinking=True, tol=0.001,
             verbose=False)
     >>> print(clf.predict([[-0.8, -1]]))
-    [ 1.]
+    [1]
 
     See also
     --------