From 6b9da826308321e4df33ce70a11d5e124c8890b8 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 10 Dec 2018 16:26:07 +0100 Subject: [PATCH] Make SVC tests independent of SV ordering This also relaxes some strict assertions on the fitted coefficients. --- sklearn/svm/tests/test_sparse.py | 100 ++++++++++++++++++++++++------- sklearn/svm/tests/test_svm.py | 21 +++---- 2 files changed, 90 insertions(+), 31 deletions(-) diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index 8bfa4e964cfce..aace133337bf7 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -2,7 +2,7 @@ import numpy as np from numpy.testing import (assert_array_almost_equal, assert_array_equal, - assert_equal) + assert_equal, assert_allclose) from scipy import sparse from sklearn import datasets, svm, linear_model, base @@ -13,6 +13,7 @@ from sklearn.utils.testing import (assert_raises, assert_warns, assert_raise_message, ignore_warnings, skip_if_32bit) +from sklearn.utils import shuffle # test sample 1 @@ -135,20 +136,77 @@ def kfunc(x, y): assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp)) -def test_svc_iris(): - # Test the sparse SVC with the iris dataset - for k in ('linear', 'poly', 'rbf'): - sp_clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data, iris.target) - clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data.toarray(), - iris.target) - - assert_array_almost_equal(clf.support_vectors_, - sp_clf.support_vectors_.toarray()) - assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray()) - assert_array_almost_equal( - clf.predict(iris.data.toarray()), sp_clf.predict(iris.data)) - if k == 'linear': - assert_array_almost_equal(clf.coef_, sp_clf.coef_.toarray()) +def _toarray(a): + if sparse.issparse(a): + return a.toarray() + return a + + +def _assert_svc_equal(svc1, svc2, atol=1e-7): + # Check that 2 support vector machines parametrize the equivalent decision + # functions by comparing dual coefficients and support vectors. + assert svc1.get_params() == svc2.get_params() + + sv1 = _toarray(svc1.support_vectors_) + dc1 = _toarray(svc1.dual_coef_) + sv2 = _toarray(svc2.support_vectors_) + dc2 = _toarray(svc2.dual_coef_) + assert dc1.shape == dc2.shape + assert sv1.shape == sv2.shape + + # Consider each OvO binary classification problem in turn. + ovo_dim, n_support_vectors = dc1.shape + for ovo_idx in range(ovo_dim): + # The ordering of the support vectors is arbitrary. Furthermore, if + # samples are duplicated with different class labels, they can be + # selected as duplicated support vectors with distinct dual + # coefficients. + + # Therefore to check that the 2 SVMs parametrize the same decision + # function, we concatenate the dual coef with the matching support + # vector coordinates for all the support vectors of each classifier so + # that we can check that there are matching pairs (dual_coef, + # support_vector) in the two models by computing pairwise distances: + dc_sv1 = np.hstack([dc1[ovo_idx].reshape(-1, 1), sv1]) + dc_sv2 = np.hstack([dc2[ovo_idx].reshape(-1, 1), sv2]) + + for sv1_idx in range(n_support_vectors): + sqdists = np.sum((dc_sv2 - dc_sv1[sv1_idx]) ** 2, axis=1) + sv2_idx = sqdists.argmin() + assert np.sqrt(sqdists[sv2_idx]) < atol + + # For the linear kernel, also check that the aggregated coefficients of the + # linear decision function in the original feature space match. + if svc1.kernel == "linear": + max_absdiff = np.abs(_toarray(svc1.coef_) - _toarray(svc2.coef_)).max() + assert max_absdiff < atol + + +@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf"]) +def test_svc_iris(kernel, svc_tol=1e-12, atol=1e-7): + # The optimization results is not deterministic when the order of the iris + # samples is permutated as iris has duplicated samples that can be selected + # as support vector or not depending on the training set order. However the + # resulting decision function should be independent of the training set + # ordering. + # Order invariance is only guaranteed if the model has properly converged. + # hence the small tol value. + iris2_data, iris2_target = shuffle(iris.data, iris.target, + random_state=0) + params = { + "gamma": 1., + "kernel": kernel, + "tol": svc_tol, + "C": 0.01, + } + sp_clf = svm.SVC(**params).fit(iris.data, iris.target) + clf = svm.SVC(**params).fit(iris.data.toarray(), iris.target) + sp_clf2 = svm.SVC(**params).fit(iris2_data, iris2_target) + clf2 = svm.SVC(**params).fit(iris2_data.toarray(), iris2_target) + + _assert_svc_equal(clf, sp_clf, atol=atol) + _assert_svc_equal(clf, clf2, atol=atol) + _assert_svc_equal(sp_clf, sp_clf2, atol=atol) def test_sparse_decision_function(): @@ -288,7 +346,8 @@ def test_sparse_oneclasssvm(datasets_index, kernel): check_svm_model_equal(clf, sp_clf, *dataset) -def test_sparse_realdata(): +@pytest.mark.parametrize("C", [0.01, 1, 100]) +def test_sparse_20newsgroups_subset(C, atol=1e-7): # Test on a subset from the 20newsgroups dataset. # This catches some bugs if input is not correctly converted into # sparse format or weights are not correctly initialized. @@ -310,11 +369,10 @@ def test_sparse_realdata(): 3., 0., 0., 2., 2., 1., 3., 1., 1., 0., 1., 2., 1., 1., 3.]) - clf = svm.SVC(kernel='linear').fit(X.toarray(), y) - sp_clf = svm.SVC(kernel='linear').fit(sparse.coo_matrix(X), y) - - assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray()) - assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray()) + params = dict(kernel='linear', C=C, tol=1e-12) + clf = svm.SVC(**params).fit(X.toarray(), y) + sp_clf = svm.SVC(**params).fit(sparse.coo_matrix(X), y) + _assert_svc_equal(clf, sp_clf, atol=atol) def test_sparse_svc_clone_with_callable_kernel(): diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index 626705186b59f..8767d5e7e99bd 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -92,7 +92,7 @@ def test_libsvm_iris(): def test_precomputed(): # SVC with a precomputed kernel. # We test it with a toy dataset and with iris. - clf = svm.SVC(kernel='precomputed') + clf = svm.SVC(kernel='precomputed', tol=1e-10) # Gram matrix for train data (square matrix) # (we use just a linear kernel) K = np.dot(X, np.array(X).T) @@ -120,9 +120,10 @@ def test_precomputed(): # same as before, but using a callable function instead of the kernel # matrix. kernel is just a linear kernel + def kfunc(x, y): + return np.dot(x, y.T) - kfunc = lambda x, y: np.dot(x, y.T) - clf = svm.SVC(gamma='scale', kernel=kfunc) + clf = svm.SVC(gamma='scale', kernel=kfunc, tol=1e-10) clf.fit(X, Y) pred = clf.predict(T) @@ -133,15 +134,15 @@ def test_precomputed(): # test a precomputed kernel with the iris dataset # and check parameters against a linear SVC - clf = svm.SVC(kernel='precomputed') - clf2 = svm.SVC(kernel='linear') + clf = svm.SVC(kernel='precomputed', tol=1e-10) + clf2 = svm.SVC(kernel='linear', tol=1e-10) K = np.dot(iris.data, iris.data.T) clf.fit(K, iris.target) clf2.fit(iris.data, iris.target) pred = clf.predict(K) assert_array_almost_equal(clf.support_, clf2.support_) - assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_) - assert_array_almost_equal(clf.intercept_, clf2.intercept_) + assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_, decimal=4) + assert_array_almost_equal(clf.intercept_, clf2.intercept_, decimal=4) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) # Gram matrix for test data but compute KT[i,j] @@ -154,7 +155,7 @@ def test_precomputed(): pred = clf.predict(K) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) - clf = svm.SVC(gamma='scale', kernel=kfunc) + clf = svm.SVC(gamma='scale', kernel=kfunc, tol=1e-10) clf.fit(iris.data, iris.target) assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2) @@ -429,7 +430,7 @@ def test_weight(): def test_sample_weights(): # Test weights on individual samples # TODO: check on NuSVR, OneClass, etc. - clf = svm.SVC(gamma="scale") + clf = svm.SVC(gamma="scale", tol=1e-10) clf.fit(X, Y) assert_array_equal(clf.predict([X[2]]), [1.]) @@ -438,7 +439,7 @@ def test_sample_weights(): assert_array_equal(clf.predict([X[2]]), [2.]) # test that rescaling all samples is the same as changing C - clf = svm.SVC(gamma="scale") + clf = svm.SVC(gamma="scale", tol=1e-10) clf.fit(X, Y) dual_coef_no_weight = clf.dual_coef_ clf.set_params(C=100)