|
2 | 2 | import warnings |
3 | 3 | import numpy as np |
4 | 4 |
|
5 | | -from nose.tools import raises |
| 5 | +from nose.tools import raises, assert_not_equal |
6 | 6 | from nose.tools import assert_true, assert_raises |
7 | 7 | from numpy.testing import assert_array_almost_equal |
8 | 8 | from numpy.testing import assert_array_equal |
@@ -206,11 +206,24 @@ def test_average_precision_score_duplicate_values(): |
206 | 206 | # precision-recall curve is a decreasing curve |
207 | 207 | # The following situtation corresponds to a perfect |
208 | 208 | # test statistic, the average_precision_score should be 1 |
209 | | - y_true = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1] |
210 | | - y_score = [0, .1, .1, .5, .5, .6, .6, .9, .9, 1, 1] |
| 209 | + y_true = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1] |
| 210 | + y_score = [0, .1, .1, .4, .5, .6, .6, .9, .9, 1, 1] |
211 | 211 | assert_equal(average_precision_score(y_true, y_score), 1) |
212 | 212 |
|
213 | 213 |
|
| 214 | +def test_average_precision_score_tied_values(): |
| 215 | + # Here if we go from left to right in y_true, the 0 values are |
| 216 | + # are separated from the 1 values, so it appears that we've |
| 217 | + # Correctly sorted our classifications. But in fact the first two |
| 218 | + # values have the same score (0.5) and so the first two values |
| 219 | + # could be swapped around, creating an imperfect sorting. This |
| 220 | + # imperfection should come through in the end score, making it less |
| 221 | + # than one. |
| 222 | + y_true = [0, 1, 1] |
| 223 | + y_score = [.5, .5, .6] |
| 224 | + assert_not_equal(average_precision_score(y_true, y_score), 1.) |
| 225 | + |
| 226 | + |
214 | 227 | def test_precision_recall_fscore_support_errors(): |
215 | 228 | y_true, y_pred, _ = make_prediction(binary=True) |
216 | 229 |
|
@@ -328,7 +341,7 @@ def test_zero_precision_recall(): |
328 | 341 | y_pred = np.array([2, 0, 1, 1, 2, 0]) |
329 | 342 |
|
330 | 343 | assert_almost_equal(precision_score(y_true, y_pred, |
331 | | - average='weighted'), 0.0, 2) |
| 344 | + average='weighted'), 0.0, 2) |
332 | 345 | assert_almost_equal(recall_score(y_true, y_pred, average='weighted'), |
333 | 346 | 0.0, 2) |
334 | 347 | assert_almost_equal(f1_score(y_true, y_pred, average='weighted'), |
@@ -415,14 +428,21 @@ def test_precision_recall_curve(): |
415 | 428 | _test_precision_recall_curve(y_true, probas_pred) |
416 | 429 | assert_array_equal(y_true_copy, y_true) |
417 | 430 |
|
| 431 | + labels = [1, 0, 0, 1] |
| 432 | + predict_probas = [1, 2, 3, 4] |
| 433 | + p, r, t = precision_recall_curve(labels, predict_probas) |
| 434 | + assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.])) |
| 435 | + assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.])) |
| 436 | + assert_array_almost_equal(t, np.array([1, 2, 3, 4])) |
| 437 | + |
418 | 438 |
|
419 | 439 | def _test_precision_recall_curve(y_true, probas_pred): |
420 | 440 | """Test Precision-Recall and aread under PR curve""" |
421 | 441 | p, r, thresholds = precision_recall_curve(y_true, probas_pred) |
422 | 442 | precision_recall_auc = auc(r, p) |
423 | 443 | assert_array_almost_equal(precision_recall_auc, 0.82, 2) |
424 | 444 | assert_array_almost_equal(precision_recall_auc, |
425 | | - average_precision_score(y_true, probas_pred)) |
| 445 | + average_precision_score(y_true, probas_pred)) |
426 | 446 | # Smoke test in the case of proba having only one value |
427 | 447 | p, r, thresholds = precision_recall_curve(y_true, |
428 | 448 | np.zeros_like(probas_pred)) |
@@ -494,9 +514,9 @@ def test_symmetry(): |
494 | 514 | mean_squared_error(y_pred, y_true)) |
495 | 515 | # not symmetric |
496 | 516 | assert_true(explained_variance_score(y_true, y_pred) != |
497 | | - explained_variance_score(y_pred, y_true)) |
| 517 | + explained_variance_score(y_pred, y_true)) |
498 | 518 | assert_true(r2_score(y_true, y_pred) != |
499 | | - r2_score(y_pred, y_true)) |
| 519 | + r2_score(y_pred, y_true)) |
500 | 520 | # FIXME: precision and recall aren't symmetric either |
501 | 521 |
|
502 | 522 |
|
|
0 commit comments