|
6 | 6 |
|
7 | 7 | import numpy as np
|
8 | 8 | import itertools
|
9 |
| - |
10 | 9 | from numpy.testing import assert_array_equal, assert_array_almost_equal
|
11 | 10 | from numpy.testing import assert_almost_equal
|
| 11 | +from numpy.testing import assert_allclose |
12 | 12 | from scipy import sparse
|
13 | 13 | from nose.tools import assert_raises, assert_true, assert_equal, assert_false
|
14 |
| - |
15 | 14 | from sklearn import svm, linear_model, datasets, metrics, base
|
16 | 15 | from sklearn.model_selection import train_test_split
|
17 | 16 | from sklearn.datasets import make_classification, make_blobs
|
|
25 | 24 | from sklearn.exceptions import ChangedBehaviorWarning
|
26 | 25 | from sklearn.exceptions import ConvergenceWarning
|
27 | 26 | from sklearn.exceptions import NotFittedError
|
28 |
| - |
29 | 27 | from sklearn.multiclass import OneVsRestClassifier
|
30 | 28 |
|
31 | 29 | # toy sample
|
@@ -198,8 +196,44 @@ def test_linearsvr():
|
198 | 196 | svr = svm.SVR(kernel='linear', C=1e3).fit(diabetes.data, diabetes.target)
|
199 | 197 | score2 = svr.score(diabetes.data, diabetes.target)
|
200 | 198 |
|
201 |
| - assert np.linalg.norm(lsvr.coef_ - svr.coef_) / np.linalg.norm(svr.coef_) < .1 |
202 |
| - assert np.abs(score1 - score2) < 0.1 |
| 199 | + assert_allclose(np.linalg.norm(lsvr.coef_), |
| 200 | + np.linalg.norm(svr.coef_), 1, 0.0001) |
| 201 | + assert_almost_equal(score1, score2, 2) |
| 202 | + |
| 203 | + |
| 204 | +def test_linearsvr_fit_sampleweight(): |
| 205 | + # check correct result when sample_weight is 1 |
| 206 | + # check that SVR(kernel='linear') and LinearSVC() give |
| 207 | + # comparable results |
| 208 | + diabetes = datasets.load_diabetes() |
| 209 | + n_samples = len(diabetes.target) |
| 210 | + unit_weight = np.ones(n_samples) |
| 211 | + lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target, |
| 212 | + sample_weight=unit_weight) |
| 213 | + score1 = lsvr.score(diabetes.data, diabetes.target) |
| 214 | + |
| 215 | + lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target) |
| 216 | + score2 = lsvr_no_weight.score(diabetes.data, diabetes.target) |
| 217 | + |
| 218 | + assert_allclose(np.linalg.norm(lsvr.coef_), |
| 219 | + np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001) |
| 220 | + assert_almost_equal(score1, score2, 2) |
| 221 | + |
| 222 | + # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where |
| 223 | + # X = X1 repeated n1 times, X2 repeated n2 times and so forth |
| 224 | + random_state = check_random_state(0) |
| 225 | + random_weight = random_state.randint(0, 10, n_samples) |
| 226 | + lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target, |
| 227 | + sample_weight=random_weight) |
| 228 | + score3 = lsvr_unflat.score(diabetes.data, diabetes.target, |
| 229 | + sample_weight=random_weight) |
| 230 | + |
| 231 | + X_flat = np.repeat(diabetes.data, random_weight, axis=0) |
| 232 | + y_flat = np.repeat(diabetes.target, random_weight, axis=0) |
| 233 | + lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat) |
| 234 | + score4 = lsvr_flat.score(X_flat, y_flat) |
| 235 | + |
| 236 | + assert_almost_equal(score3, score4, 2) |
203 | 237 |
|
204 | 238 |
|
205 | 239 | def test_svr_errors():
|
@@ -277,14 +311,13 @@ def test_probability():
|
277 | 311 |
|
278 | 312 | for clf in (svm.SVC(probability=True, random_state=0, C=1.0),
|
279 | 313 | svm.NuSVC(probability=True, random_state=0)):
|
280 |
| - |
281 | 314 | clf.fit(iris.data, iris.target)
|
282 | 315 |
|
283 | 316 | prob_predict = clf.predict_proba(iris.data)
|
284 | 317 | assert_array_almost_equal(
|
285 | 318 | np.sum(prob_predict, 1), np.ones(iris.data.shape[0]))
|
286 | 319 | assert_true(np.mean(np.argmax(prob_predict, 1)
|
287 |
| - == clf.predict(iris.data)) > 0.9) |
| 320 | + == clf.predict(iris.data)) > 0.9) |
288 | 321 |
|
289 | 322 | assert_almost_equal(clf.predict_proba(iris.data),
|
290 | 323 | np.exp(clf.predict_log_proba(iris.data)), 8)
|
@@ -509,9 +542,9 @@ def test_linearsvc_parameters():
|
509 | 542 | for loss, penalty, dual in itertools.product(losses, penalties, duals):
|
510 | 543 | clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual)
|
511 | 544 | if ((loss, penalty) == ('hinge', 'l1') or
|
512 |
| - (loss, penalty, dual) == ('hinge', 'l2', False) or |
513 |
| - (penalty, dual) == ('l1', True) or |
514 |
| - loss == 'foo' or penalty == 'bar'): |
| 545 | + (loss, penalty, dual) == ('hinge', 'l2', False) or |
| 546 | + (penalty, dual) == ('l1', True) or |
| 547 | + loss == 'foo' or penalty == 'bar'): |
515 | 548 |
|
516 | 549 | assert_raises_regexp(ValueError,
|
517 | 550 | "Unsupported set of arguments.*penalty='%s.*"
|
@@ -569,7 +602,7 @@ def test_linear_svx_uppercase_loss_penality_raises_error():
|
569 | 602 | svm.LinearSVC(loss="SQuared_hinge").fit, X, y)
|
570 | 603 |
|
571 | 604 | assert_raise_message(ValueError, ("The combination of penalty='L2'"
|
572 |
| - " and loss='squared_hinge' is not supported"), |
| 605 | + " and loss='squared_hinge' is not supported"), |
573 | 606 | svm.LinearSVC(penalty="L2").fit, X, y)
|
574 | 607 |
|
575 | 608 |
|
@@ -634,7 +667,6 @@ def test_crammer_singer_binary():
|
634 | 667 |
|
635 | 668 |
|
636 | 669 | def test_linearsvc_iris():
|
637 |
| - |
638 | 670 | # Test that LinearSVC gives plausible predictions on the iris dataset
|
639 | 671 | # Also, test symbolic class names (classes_).
|
640 | 672 | target = iris.target_names[iris.target]
|
@@ -773,7 +805,7 @@ def test_timeout():
|
773 | 805 |
|
774 | 806 |
|
775 | 807 | def test_unfitted():
|
776 |
| - X = "foo!" # input validation not required when SVM not fitted |
| 808 | + X = "foo!" # input validation not required when SVM not fitted |
777 | 809 |
|
778 | 810 | clf = svm.SVC()
|
779 | 811 | assert_raises_regexp(Exception, r".*\bSVC\b.*\bnot\b.*\bfitted\b",
|
|
0 commit comments