scikit-learn
diff --git a/‎doc/whats_new.rst
Lines changed: 3 additions & 0 deletions b/‎doc/whats_new.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/svm/base.py
Lines changed: 4 additions & 0 deletions b/‎sklearn/svm/base.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎sklearn/svm/tests/test_sparse.py
Lines changed: 33 additions & 2 deletions b/‎sklearn/svm/tests/test_sparse.py
Lines changed: 33 additions & 2 deletions
@@ -131,6 +131,9 @@ Changelog
      :mod:`sklearn.metrics` for regression and classification metrics
      by `Arnaud Joly`_.
 
+   - Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
+     unsorted indices by Xinfan Meng and `Andreas Müller`_.
+
 API changes summary
 -------------------
    - Renamed all occurences of ``n_atoms`` to ``n_components`` for consistency.
 
@@ -238,6 +238,7 @@ def _dense_fit(self, X, y, sample_weight, solver_type, kernel):
 
     def _sparse_fit(self, X, y, sample_weight, solver_type, kernel):
         X.data = np.asarray(X.data, dtype=np.float64, order='C')
+        X.sort_indices()
 
         kernel_type = self._sparse_kernels.index(kernel)
 
@@ -398,6 +399,9 @@ def _validate_for_predict(self, X):
         X = atleast2d_or_csr(X, dtype=np.float64, order="C")
         if self._sparse and not sp.isspmatrix(X):
             X = sp.csr_matrix(X)
+        if self._sparse:
+            X.sort_indices()
+
         if (sp.issparse(X) and not self._sparse and
                 not hasattr(self.kernel, '__call__')):
             raise ValueError(
 
@@ -5,9 +5,9 @@
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                            assert_equal)
 
-from nose.tools import assert_raises, assert_true
+from nose.tools import assert_raises, assert_true, assert_false
 from nose.tools import assert_equal as nose_assert_equal
-from sklearn.datasets.samples_generator import make_classification
+from sklearn.datasets import make_classification, load_digits
 from sklearn.svm.tests import test_svm
 from sklearn.utils import ConvergenceWarning
 from sklearn.utils.extmath import safe_sparse_dot
@@ -69,6 +69,37 @@ def test_svc():
                               sp_clf.predict_proba(T2), 4)
 
 
+def test_unsorted_indices():
+    # test that the result with sorted and unsorted indices in csr is the same
+    # we use a subset of digits as iris, blobs or make_classification didn't
+    # show the problem
+    digits = load_digits()
+    X, y = digits.data[:50], digits.target[:50]
+    X_test = sparse.csr_matrix(digits.data[50:100])
+
+    X_sparse = sparse.csr_matrix(X)
+    coef_dense = svm.SVC(kernel='linear', probability=True).fit(X, y).coef_
+    sparse_svc = svm.SVC(kernel='linear', probability=True).fit(X_sparse, y)
+    coef_sorted = sparse_svc.coef_
+    # make sure dense and sparse SVM give the same result
+    assert_array_almost_equal(coef_dense, coef_sorted.toarray())
+
+    X_sparse_unsorted = X_sparse[np.arange(X.shape[0])]
+    X_test_unsorted = X_test[np.arange(X_test.shape[0])]
+
+    # make sure we scramble the indices
+    assert_false(X_sparse_unsorted.has_sorted_indices)
+    assert_false(X_test_unsorted.has_sorted_indices)
+
+    unsorted_svc = svm.SVC(kernel='linear',
+                           probability=True).fit(X_sparse_unsorted, y)
+    coef_unsorted = unsorted_svc.coef_
+    # make sure unsorted indices give same result
+    assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
+    assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted),
+                              sparse_svc.predict_proba(X_test))
+
+
 def test_svc_with_custom_kernel():
     kfunc = lambda x, y: safe_sparse_dot(x, y.T)
     clf_lin = svm.SVC(kernel='linear').fit(X_sp, Y)