From 6b9da826308321e4df33ce70a11d5e124c8890b8 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 10 Dec 2018 16:26:07 +0100
Subject: [PATCH] Make SVC tests independent of SV ordering

This also relaxes some strict assertions on the fitted coefficients.
---
 sklearn/svm/tests/test_sparse.py | 100 ++++++++++++++++++++++++-------
 sklearn/svm/tests/test_svm.py    |  21 +++----
 2 files changed, 90 insertions(+), 31 deletions(-)

diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 8bfa4e964cfce..aace133337bf7 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
-                           assert_equal)
+                           assert_equal, assert_allclose)
 from scipy import sparse
 
 from sklearn import datasets, svm, linear_model, base
@@ -13,6 +13,7 @@
 from sklearn.utils.testing import (assert_raises, assert_warns,
                                    assert_raise_message, ignore_warnings,
                                    skip_if_32bit)
+from sklearn.utils import shuffle
 
 
 # test sample 1
@@ -135,20 +136,77 @@ def kfunc(x, y):
     assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
 
 
-def test_svc_iris():
-    # Test the sparse SVC with the iris dataset
-    for k in ('linear', 'poly', 'rbf'):
-        sp_clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data, iris.target)
-        clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data.toarray(),
-                                                   iris.target)
-
-        assert_array_almost_equal(clf.support_vectors_,
-                                  sp_clf.support_vectors_.toarray())
-        assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
-        assert_array_almost_equal(
-            clf.predict(iris.data.toarray()), sp_clf.predict(iris.data))
-        if k == 'linear':
-            assert_array_almost_equal(clf.coef_, sp_clf.coef_.toarray())
+def _toarray(a):
+    if sparse.issparse(a):
+        return a.toarray()
+    return a
+
+
+def _assert_svc_equal(svc1, svc2, atol=1e-7):
+    # Check that 2 support vector machines parametrize the equivalent decision
+    # functions by comparing dual coefficients and support vectors.
+    assert svc1.get_params() == svc2.get_params()
+
+    sv1 = _toarray(svc1.support_vectors_)
+    dc1 = _toarray(svc1.dual_coef_)
+    sv2 = _toarray(svc2.support_vectors_)
+    dc2 = _toarray(svc2.dual_coef_)
+    assert dc1.shape == dc2.shape
+    assert sv1.shape == sv2.shape
+
+    # Consider each OvO binary classification problem in turn.
+    ovo_dim, n_support_vectors = dc1.shape
+    for ovo_idx in range(ovo_dim):
+        # The ordering of the support vectors is arbitrary. Furthermore, if
+        # samples are duplicated with different class labels, they can be
+        # selected as duplicated support vectors with distinct dual
+        # coefficients.
+
+        # Therefore to check that the 2 SVMs parametrize the same decision
+        # function, we concatenate the dual coef with the matching support
+        # vector coordinates for all the support vectors of each classifier so
+        # that we can check that there are matching pairs (dual_coef,
+        # support_vector) in the two models by computing pairwise distances:
+        dc_sv1 = np.hstack([dc1[ovo_idx].reshape(-1, 1), sv1])
+        dc_sv2 = np.hstack([dc2[ovo_idx].reshape(-1, 1), sv2])
+
+        for sv1_idx in range(n_support_vectors):
+            sqdists = np.sum((dc_sv2 - dc_sv1[sv1_idx]) ** 2, axis=1)
+            sv2_idx = sqdists.argmin()
+            assert np.sqrt(sqdists[sv2_idx]) < atol
+
+    # For the linear kernel, also check that the aggregated coefficients of the
+    # linear decision function in the original feature space match.
+    if svc1.kernel == "linear":
+        max_absdiff = np.abs(_toarray(svc1.coef_) - _toarray(svc2.coef_)).max()
+        assert max_absdiff < atol
+
+
+@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf"])
+def test_svc_iris(kernel, svc_tol=1e-12, atol=1e-7):
+    # The optimization results is not deterministic when the order of the iris
+    # samples is permutated as iris has duplicated samples that can be selected
+    # as support vector or not depending on the training set order. However the
+    # resulting decision function should be independent of the training set
+    # ordering.
+    # Order invariance is only guaranteed if the model has properly converged.
+    # hence the small tol value.
+    iris2_data, iris2_target = shuffle(iris.data, iris.target,
+                                       random_state=0)
+    params = {
+        "gamma": 1.,
+        "kernel": kernel,
+        "tol": svc_tol,
+        "C": 0.01,
+    }
+    sp_clf = svm.SVC(**params).fit(iris.data, iris.target)
+    clf = svm.SVC(**params).fit(iris.data.toarray(), iris.target)
+    sp_clf2 = svm.SVC(**params).fit(iris2_data, iris2_target)
+    clf2 = svm.SVC(**params).fit(iris2_data.toarray(), iris2_target)
+
+    _assert_svc_equal(clf, sp_clf, atol=atol)
+    _assert_svc_equal(clf, clf2, atol=atol)
+    _assert_svc_equal(sp_clf, sp_clf2, atol=atol)
 
 
 def test_sparse_decision_function():
@@ -288,7 +346,8 @@ def test_sparse_oneclasssvm(datasets_index, kernel):
     check_svm_model_equal(clf, sp_clf, *dataset)
 
 
-def test_sparse_realdata():
+@pytest.mark.parametrize("C", [0.01, 1, 100])
+def test_sparse_20newsgroups_subset(C, atol=1e-7):
     # Test on a subset from the 20newsgroups dataset.
     # This catches some bugs if input is not correctly converted into
     # sparse format or weights are not correctly initialized.
@@ -310,11 +369,10 @@ def test_sparse_realdata():
          3., 0., 0., 2., 2., 1., 3., 1., 1., 0., 1., 2., 1.,
          1., 3.])
 
-    clf = svm.SVC(kernel='linear').fit(X.toarray(), y)
-    sp_clf = svm.SVC(kernel='linear').fit(sparse.coo_matrix(X), y)
-
-    assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
-    assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
+    params = dict(kernel='linear', C=C, tol=1e-12)
+    clf = svm.SVC(**params).fit(X.toarray(), y)
+    sp_clf = svm.SVC(**params).fit(sparse.coo_matrix(X), y)
+    _assert_svc_equal(clf, sp_clf, atol=atol)
 
 
 def test_sparse_svc_clone_with_callable_kernel():
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 626705186b59f..8767d5e7e99bd 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -92,7 +92,7 @@ def test_libsvm_iris():
 def test_precomputed():
     # SVC with a precomputed kernel.
     # We test it with a toy dataset and with iris.
-    clf = svm.SVC(kernel='precomputed')
+    clf = svm.SVC(kernel='precomputed', tol=1e-10)
     # Gram matrix for train data (square matrix)
     # (we use just a linear kernel)
     K = np.dot(X, np.array(X).T)
@@ -120,9 +120,10 @@ def test_precomputed():
 
     # same as before, but using a callable function instead of the kernel
     # matrix. kernel is just a linear kernel
+    def kfunc(x, y):
+        return np.dot(x, y.T)
 
-    kfunc = lambda x, y: np.dot(x, y.T)
-    clf = svm.SVC(gamma='scale', kernel=kfunc)
+    clf = svm.SVC(gamma='scale', kernel=kfunc, tol=1e-10)
     clf.fit(X, Y)
     pred = clf.predict(T)
 
@@ -133,15 +134,15 @@ def test_precomputed():
 
     # test a precomputed kernel with the iris dataset
     # and check parameters against a linear SVC
-    clf = svm.SVC(kernel='precomputed')
-    clf2 = svm.SVC(kernel='linear')
+    clf = svm.SVC(kernel='precomputed', tol=1e-10)
+    clf2 = svm.SVC(kernel='linear', tol=1e-10)
     K = np.dot(iris.data, iris.data.T)
     clf.fit(K, iris.target)
     clf2.fit(iris.data, iris.target)
     pred = clf.predict(K)
     assert_array_almost_equal(clf.support_, clf2.support_)
-    assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_)
-    assert_array_almost_equal(clf.intercept_, clf2.intercept_)
+    assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_, decimal=4)
+    assert_array_almost_equal(clf.intercept_, clf2.intercept_, decimal=4)
     assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)
 
     # Gram matrix for test data but compute KT[i,j]
@@ -154,7 +155,7 @@ def test_precomputed():
     pred = clf.predict(K)
     assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)
 
-    clf = svm.SVC(gamma='scale', kernel=kfunc)
+    clf = svm.SVC(gamma='scale', kernel=kfunc, tol=1e-10)
     clf.fit(iris.data, iris.target)
     assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)
 
@@ -429,7 +430,7 @@ def test_weight():
 def test_sample_weights():
     # Test weights on individual samples
     # TODO: check on NuSVR, OneClass, etc.
-    clf = svm.SVC(gamma="scale")
+    clf = svm.SVC(gamma="scale", tol=1e-10)
     clf.fit(X, Y)
     assert_array_equal(clf.predict([X[2]]), [1.])
 
@@ -438,7 +439,7 @@ def test_sample_weights():
     assert_array_equal(clf.predict([X[2]]), [2.])
 
     # test that rescaling all samples is the same as changing C
-    clf = svm.SVC(gamma="scale")
+    clf = svm.SVC(gamma="scale", tol=1e-10)
     clf.fit(X, Y)
     dual_coef_no_weight = clf.dual_coef_
     clf.set_params(C=100)