1
1
from __future__ import division
2
2
3
- import random
4
3
import warnings
5
4
import numpy as np
6
5
9
8
10
9
from sklearn .preprocessing import LabelBinarizer
11
10
from sklearn .datasets import make_multilabel_classification
12
- from sklearn .utils import (check_random_state ,
13
- shuffle )
11
+ from sklearn .utils import check_random_state , shuffle
14
12
from sklearn .utils .multiclass import unique_labels
15
13
from sklearn .utils .testing import (assert_true ,
16
14
assert_raises ,
@@ -85,8 +83,8 @@ def make_prediction(dataset=None, binary=False):
85
83
n_samples , n_features = X .shape
86
84
p = np .arange (n_samples )
87
85
88
- random . seed ( 0 )
89
- random .shuffle (p )
86
+ rng = check_random_state ( 37 )
87
+ rng .shuffle (p )
90
88
X , y = X [p ], y [p ]
91
89
half = int (n_samples / 2 )
92
90
@@ -114,7 +112,7 @@ def test_roc_curve():
114
112
115
113
fpr , tpr , thresholds = roc_curve (y_true , probas_pred )
116
114
roc_auc = auc (fpr , tpr )
117
- assert_array_almost_equal (roc_auc , 0.80 , decimal = 2 )
115
+ assert_array_almost_equal (roc_auc , 0.90 , decimal = 2 )
118
116
assert_almost_equal (roc_auc , auc_score (y_true , probas_pred ))
119
117
120
118
@@ -159,7 +157,7 @@ def test_roc_curve_confidence():
159
157
160
158
fpr , tpr , thresholds = roc_curve (y_true , probas_pred - 0.5 )
161
159
roc_auc = auc (fpr , tpr )
162
- assert_array_almost_equal (roc_auc , 0.80 , decimal = 2 )
160
+ assert_array_almost_equal (roc_auc , 0.90 , decimal = 2 )
163
161
164
162
165
163
def test_roc_curve_hard ():
@@ -181,7 +179,7 @@ def test_roc_curve_hard():
181
179
# hard decisions
182
180
fpr , tpr , thresholds = roc_curve (y_true , pred )
183
181
roc_auc = auc (fpr , tpr )
184
- assert_array_almost_equal (roc_auc , 0.74 , decimal = 2 )
182
+ assert_array_almost_equal (roc_auc , 0.78 , decimal = 2 )
185
183
186
184
187
185
def test_roc_curve_one_label ():
@@ -245,7 +243,8 @@ def test_auc_score_non_binary_class():
245
243
"""Test that auc_score function returns an error when trying to compute AUC
246
244
for non-binary class values.
247
245
"""
248
- y_pred = np .random .rand (10 )
246
+ rng = check_random_state (404 )
247
+ y_pred = rng .rand (10 )
249
248
# y_true contains only one class value
250
249
y_true = np .zeros (10 , dtype = "int" )
251
250
assert_raise_message (ValueError , "AUC is defined for binary "
@@ -257,7 +256,7 @@ def test_auc_score_non_binary_class():
257
256
assert_raise_message (ValueError , "AUC is defined for binary "
258
257
"classification only" , auc_score , y_true , y_pred )
259
258
# y_true contains three different class values
260
- y_true = np . random .randint (0 , 3 , size = 10 )
259
+ y_true = rng .randint (0 , 3 , size = 10 )
261
260
assert_raise_message (ValueError , "AUC is defined for binary "
262
261
"classification only" , auc_score , y_true , y_pred )
263
262
@@ -268,22 +267,22 @@ def test_precision_recall_f1_score_binary():
268
267
269
268
# detailed measures for each class
270
269
p , r , f , s = precision_recall_fscore_support (y_true , y_pred , average = None )
271
- assert_array_almost_equal (p , [0.73 , 0.75 ], 2 )
272
- assert_array_almost_equal (r , [0.76 , 0.72 ], 2 )
273
- assert_array_almost_equal (f , [0.75 , 0.74 ], 2 )
270
+ assert_array_almost_equal (p , [0.73 , 0.85 ], 2 )
271
+ assert_array_almost_equal (r , [0.88 , 0.68 ], 2 )
272
+ assert_array_almost_equal (f , [0.80 , 0.76 ], 2 )
274
273
assert_array_equal (s , [25 , 25 ])
275
274
276
275
# individual scoring function that can be used for grid search: in the
277
276
# binary class case the score is the value of the measure for the positive
278
277
# class (e.g. label == 1)
279
278
ps = precision_score (y_true , y_pred )
280
- assert_array_almost_equal (ps , 0.75 , 2 )
279
+ assert_array_almost_equal (ps , 0.85 , 2 )
281
280
282
281
rs = recall_score (y_true , y_pred )
283
- assert_array_almost_equal (rs , 0.72 , 2 )
282
+ assert_array_almost_equal (rs , 0.68 , 2 )
284
283
285
284
fs = f1_score (y_true , y_pred )
286
- assert_array_almost_equal (fs , 0.74 , 2 )
285
+ assert_array_almost_equal (fs , 0.76 , 2 )
287
286
288
287
289
288
def test_average_precision_score_duplicate_values ():
@@ -331,7 +330,7 @@ def test_confusion_matrix_binary():
331
330
y_true , y_pred , _ = make_prediction (binary = True )
332
331
333
332
cm = confusion_matrix (y_true , y_pred )
334
- assert_array_equal (cm , [[19 , 6 ], [7 , 18 ]])
333
+ assert_array_equal (cm , [[22 , 3 ], [8 , 17 ]])
335
334
336
335
tp = cm [0 , 0 ]
337
336
tn = cm [1 , 1 ]
@@ -345,7 +344,7 @@ def test_confusion_matrix_binary():
345
344
true_mcc = num / den
346
345
mcc = matthews_corrcoef (y_true , y_pred )
347
346
assert_array_almost_equal (mcc , true_mcc , decimal = 2 )
348
- assert_array_almost_equal (mcc , 0.48 , decimal = 2 )
347
+ assert_array_almost_equal (mcc , 0.57 , decimal = 2 )
349
348
350
349
351
350
def test_matthews_corrcoef_nan ():
@@ -360,46 +359,46 @@ def test_precision_recall_f1_score_multiclass():
360
359
361
360
# compute scores with default labels introspection
362
361
p , r , f , s = precision_recall_fscore_support (y_true , y_pred , average = None )
363
- assert_array_almost_equal (p , [0.82 , 0.55 , 0.47 ], 2 )
364
- assert_array_almost_equal (r , [0.92 , 0.17 , 0.90 ], 2 )
365
- assert_array_almost_equal (f , [0.87 , 0.26 , 0.62 ], 2 )
366
- assert_array_equal (s , [25 , 30 , 20 ])
362
+ assert_array_almost_equal (p , [0.83 , 0.33 , 0.42 ], 2 )
363
+ assert_array_almost_equal (r , [0.79 , 0.09 , 0.90 ], 2 )
364
+ assert_array_almost_equal (f , [0.81 , 0.15 , 0.57 ], 2 )
365
+ assert_array_equal (s , [24 , 31 , 20 ])
367
366
368
367
# averaging tests
369
368
ps = precision_score (y_true , y_pred , pos_label = 1 , average = 'micro' )
370
- assert_array_almost_equal (ps , 0.61 , 2 )
369
+ assert_array_almost_equal (ps , 0.53 , 2 )
371
370
372
371
rs = recall_score (y_true , y_pred , average = 'micro' )
373
- assert_array_almost_equal (rs , 0.61 , 2 )
372
+ assert_array_almost_equal (rs , 0.53 , 2 )
374
373
375
374
fs = f1_score (y_true , y_pred , average = 'micro' )
376
- assert_array_almost_equal (fs , 0.61 , 2 )
375
+ assert_array_almost_equal (fs , 0.53 , 2 )
377
376
378
377
ps = precision_score (y_true , y_pred , average = 'macro' )
379
- assert_array_almost_equal (ps , 0.62 , 2 )
378
+ assert_array_almost_equal (ps , 0.53 , 2 )
380
379
381
380
rs = recall_score (y_true , y_pred , average = 'macro' )
382
- assert_array_almost_equal (rs , 0.66 , 2 )
381
+ assert_array_almost_equal (rs , 0.60 , 2 )
383
382
384
383
fs = f1_score (y_true , y_pred , average = 'macro' )
385
- assert_array_almost_equal (fs , 0.58 , 2 )
384
+ assert_array_almost_equal (fs , 0.51 , 2 )
386
385
387
386
ps = precision_score (y_true , y_pred , average = 'weighted' )
388
- assert_array_almost_equal (ps , 0.62 , 2 )
387
+ assert_array_almost_equal (ps , 0.51 , 2 )
389
388
390
389
rs = recall_score (y_true , y_pred , average = 'weighted' )
391
- assert_array_almost_equal (rs , 0.61 , 2 )
390
+ assert_array_almost_equal (rs , 0.53 , 2 )
392
391
393
392
fs = f1_score (y_true , y_pred , average = 'weighted' )
394
- assert_array_almost_equal (fs , 0.55 , 2 )
393
+ assert_array_almost_equal (fs , 0.47 , 2 )
395
394
396
395
# same prediction but with and explicit label ordering
397
396
p , r , f , s = precision_recall_fscore_support (
398
397
y_true , y_pred , labels = [0 , 2 , 1 ], average = None )
399
- assert_array_almost_equal (p , [0.82 , 0.47 , 0.55 ], 2 )
400
- assert_array_almost_equal (r , [0.92 , 0.90 , 0.17 ], 2 )
401
- assert_array_almost_equal (f , [0.87 , 0.62 , 0.26 ], 2 )
402
- assert_array_equal (s , [25 , 20 , 30 ])
398
+ assert_array_almost_equal (p , [0.83 , 0.41 , 0.33 ], 2 )
399
+ assert_array_almost_equal (r , [0.79 , 0.90 , 0.10 ], 2 )
400
+ assert_array_almost_equal (f , [0.81 , 0.57 , 0.15 ], 2 )
401
+ assert_array_equal (s , [24 , 20 , 31 ])
403
402
404
403
405
404
def test_precision_recall_f1_score_multiclass_pos_label_none ():
@@ -443,15 +442,15 @@ def test_confusion_matrix_multiclass():
443
442
444
443
# compute confusion matrix with default labels introspection
445
444
cm = confusion_matrix (y_true , y_pred )
446
- assert_array_equal (cm , [[23 , 2 , 0 ],
447
- [5 , 5 , 20 ],
445
+ assert_array_equal (cm , [[19 , 4 , 1 ],
446
+ [4 , 3 , 24 ],
448
447
[0 , 2 , 18 ]])
449
448
450
449
# compute confusion matrix with explicit label ordering
451
450
cm = confusion_matrix (y_true , y_pred , labels = [0 , 2 , 1 ])
452
- assert_array_equal (cm , [[23 , 0 , 2 ],
451
+ assert_array_equal (cm , [[19 , 1 , 4 ],
453
452
[0 , 18 , 2 ],
454
- [5 , 20 , 5 ]])
453
+ [4 , 24 , 3 ]])
455
454
456
455
457
456
def test_confusion_matrix_multiclass_subset_labels ():
@@ -460,14 +459,14 @@ def test_confusion_matrix_multiclass_subset_labels():
460
459
461
460
# compute confusion matrix with only first two labels considered
462
461
cm = confusion_matrix (y_true , y_pred , labels = [0 , 1 ])
463
- assert_array_equal (cm , [[23 , 2 ],
464
- [5 , 5 ]])
462
+ assert_array_equal (cm , [[19 , 4 ],
463
+ [4 , 3 ]])
465
464
466
465
# compute confusion matrix with explicit label ordering for only subset
467
466
# of labels
468
467
cm = confusion_matrix (y_true , y_pred , labels = [2 , 1 ])
469
468
assert_array_equal (cm , [[18 , 2 ],
470
- [20 , 5 ]])
469
+ [24 , 3 ]])
471
470
472
471
473
472
def test_classification_report ():
@@ -479,11 +478,11 @@ def test_classification_report():
479
478
expected_report = """\
480
479
precision recall f1-score support
481
480
482
- setosa 0.82 0.92 0.87 25
483
- versicolor 0.56 0.17 0.26 30
484
- virginica 0.47 0.90 0.62 20
481
+ setosa 0.83 0.79 0.81 24
482
+ versicolor 0.33 0.10 0.15 31
483
+ virginica 0.42 0.90 0.57 20
485
484
486
- avg / total 0.62 0.61 0.56 75
485
+ avg / total 0.51 0.53 0.47 75
487
486
"""
488
487
report = classification_report (
489
488
y_true , y_pred , labels = np .arange (len (iris .target_names )),
@@ -499,6 +498,15 @@ def test_classification_report():
499
498
2 0.47 0.90 0.62 20
500
499
501
500
avg / total 0.62 0.61 0.56 75
501
+ """
502
+ expected_report = """\
503
+ precision recall f1-score support
504
+
505
+ 0 0.83 0.79 0.81 24
506
+ 1 0.33 0.10 0.15 31
507
+ 2 0.42 0.90 0.57 20
508
+
509
+ avg / total 0.51 0.53 0.47 75
502
510
"""
503
511
report = classification_report (y_true , y_pred )
504
512
assert_equal (report , expected_report )
@@ -526,7 +534,7 @@ def _test_precision_recall_curve(y_true, probas_pred):
526
534
"""Test Precision-Recall and aread under PR curve"""
527
535
p , r , thresholds = precision_recall_curve (y_true , probas_pred )
528
536
precision_recall_auc = auc (r , p )
529
- assert_array_almost_equal (precision_recall_auc , 0.82 , 2 )
537
+ assert_array_almost_equal (precision_recall_auc , 0.85 , 2 )
530
538
assert_array_almost_equal (precision_recall_auc ,
531
539
average_precision_score (y_true , probas_pred ))
532
540
# Smoke test in the case of proba having only one value
@@ -570,18 +578,18 @@ def test_losses():
570
578
# --------------
571
579
with warnings .catch_warnings (record = True ):
572
580
# Throw deprecated warning
573
- assert_equal (zero_one (y_true , y_pred ), 13 )
581
+ assert_equal (zero_one (y_true , y_pred ), 11 )
574
582
assert_almost_equal (zero_one (y_true , y_pred , normalize = True ),
575
- 13 / float (n_samples ), 2 )
583
+ 11 / float (n_samples ), 2 )
576
584
577
585
assert_almost_equal (zero_one_loss (y_true , y_pred ),
578
- 13 / float (n_samples ), 2 )
579
- assert_equal (zero_one_loss (y_true , y_pred , normalize = False ), 13 )
586
+ 11 / float (n_samples ), 2 )
587
+ assert_equal (zero_one_loss (y_true , y_pred , normalize = False ), 11 )
580
588
assert_almost_equal (zero_one_loss (y_true , y_true ), 0.0 , 2 )
581
589
assert_almost_equal (zero_one_loss (y_true , y_true , normalize = False ), 0 , 2 )
582
590
583
591
assert_almost_equal (hamming_loss (y_true , y_pred ),
584
- 2 * 13 . / (n_samples * n_classes ), 2 )
592
+ 2 * 11 . / (n_samples * n_classes ), 2 )
585
593
586
594
assert_equal (accuracy_score (y_true , y_pred ),
587
595
1 - zero_one_loss (y_true , y_pred ))
@@ -597,21 +605,21 @@ def test_losses():
597
605
# Regression
598
606
# ----------
599
607
assert_almost_equal (mean_squared_error (y_true , y_pred ),
600
- 12 .999 / n_samples , 2 )
608
+ 10 .999 / n_samples , 2 )
601
609
assert_almost_equal (mean_squared_error (y_true , y_true ),
602
610
0.00 , 2 )
603
611
604
612
# mean_absolute_error and mean_squared_error are equal because
605
613
# it is a binary problem.
606
614
assert_almost_equal (mean_absolute_error (y_true , y_pred ),
607
- 12 .999 / n_samples , 2 )
615
+ 10 .999 / n_samples , 2 )
608
616
assert_almost_equal (mean_absolute_error (y_true , y_true ), 0.00 , 2 )
609
617
610
- assert_almost_equal (explained_variance_score (y_true , y_pred ), - 0.04 , 2 )
618
+ assert_almost_equal (explained_variance_score (y_true , y_pred ), 0.16 , 2 )
611
619
assert_almost_equal (explained_variance_score (y_true , y_true ), 1.00 , 2 )
612
620
assert_equal (explained_variance_score ([0 , 0 , 0 ], [0 , 1 , 1 ]), 0.0 )
613
621
614
- assert_almost_equal (r2_score (y_true , y_pred ), - 0.04 , 2 )
622
+ assert_almost_equal (r2_score (y_true , y_pred ), 0.12 , 2 )
615
623
assert_almost_equal (r2_score (y_true , y_true ), 1.00 , 2 )
616
624
assert_equal (r2_score ([0 , 0 , 0 ], [0 , 0 , 0 ]), 1.0 )
617
625
assert_equal (r2_score ([0 , 0 , 0 ], [0 , 1 , 1 ]), 0.0 )
@@ -826,11 +834,12 @@ def test_multioutput_regression_invariance_to_dimension_shuffling():
826
834
y_true = np .reshape (y_true , (- 1 , n_dims ))
827
835
y_pred = np .reshape (y_pred , (- 1 , n_dims ))
828
836
837
+ rng = check_random_state (314159 )
829
838
for metric in [r2_score , mean_squared_error , mean_absolute_error ]:
830
839
error = metric (y_true , y_pred )
831
840
832
841
for _ in xrange (3 ):
833
- perm = np . random .permutation (n_dims )
842
+ perm = rng .permutation (n_dims )
834
843
assert_almost_equal (error ,
835
844
metric (y_true [:, perm ], y_pred [:, perm ]))
836
845
@@ -855,14 +864,14 @@ def test_multilabel_representation_invariance():
855
864
856
865
# NOTE: The "sorted" trick is necessary to shuffle labels, because it
857
866
# allows to return the shuffled tuple.
858
- py_random_state = random . Random ( 0 )
859
- shuffled = lambda x : sorted (x , key = lambda * args : py_random_state . random ())
867
+ rng = check_random_state ( 42 )
868
+ shuffled = lambda x : sorted (x , key = lambda * args : rng . rand ())
860
869
y1_shuffle = [shuffled (x ) for x in y1 ]
861
870
y2_shuffle = [shuffled (x ) for x in y2 ]
862
871
863
- # Let's have redundant label
864
- y1_redundant = [x * py_random_state .randint (1 , 3 ) for x in y1 ]
865
- y2_redundant = [x * py_random_state .randint (1 , 3 ) for x in y2 ]
872
+ # Let's have redundant labels
873
+ y1_redundant = [x * rng .randint (1 , 4 ) for x in y1 ]
874
+ y2_redundant = [x * rng .randint (1 , 4 ) for x in y2 ]
866
875
867
876
# Binary indicator matrix format
868
877
lb = LabelBinarizer ().fit ([range (n_classes )])
0 commit comments