1
1
import pickle
2
+ import tempfile
3
+ import shutil
4
+ import os
5
+ import numbers
2
6
3
7
import numpy as np
4
8
30
34
from sklearn .cross_validation import train_test_split , cross_val_score
31
35
from sklearn .grid_search import GridSearchCV
32
36
from sklearn .multiclass import OneVsRestClassifier
37
+ from sklearn .externals import joblib
33
38
34
39
35
40
REGRESSION_SCORERS = ['r2' , 'mean_absolute_error' , 'mean_squared_error' ,
46
51
MULTILABEL_ONLY_SCORERS = ['precision_samples' , 'recall_samples' , 'f1_samples' ]
47
52
48
53
54
+ def _make_estimators (X_train , y_train , y_ml_train ):
55
+ # Make estimators that make sense to test various scoring methods
56
+ sensible_regr = DummyRegressor (strategy = 'median' )
57
+ sensible_regr .fit (X_train , y_train )
58
+ sensible_clf = DecisionTreeClassifier (random_state = 0 )
59
+ sensible_clf .fit (X_train , y_train )
60
+ sensible_ml_clf = DecisionTreeClassifier (random_state = 0 )
61
+ sensible_ml_clf .fit (X_train , y_ml_train )
62
+ return dict (
63
+ [(name , sensible_regr ) for name in REGRESSION_SCORERS ] +
64
+ [(name , sensible_clf ) for name in CLF_SCORERS ] +
65
+ [(name , sensible_ml_clf ) for name in MULTILABEL_ONLY_SCORERS ]
66
+ )
67
+
68
+
69
+ X_mm , y_mm , y_ml_mm = None , None , None
70
+ ESTIMATORS = None
71
+ TEMP_FOLDER = None
72
+
73
+
74
+ def setup_module ():
75
+ # Create some memory mapped data
76
+ global X_mm , y_mm , y_ml_mm , TEMP_FOLDER , ESTIMATORS
77
+ TEMP_FOLDER = tempfile .mkdtemp (prefix = 'sklearn_test_score_objects_' )
78
+ X , y = make_classification (n_samples = 30 , n_features = 5 , random_state = 0 )
79
+ _ , y_ml = make_multilabel_classification (n_samples = X .shape [0 ],
80
+ random_state = 0 )
81
+ filename = os .path .join (TEMP_FOLDER , 'test_data.pkl' )
82
+ joblib .dump ((X , y , y_ml ), filename )
83
+ X_mm , y_mm , y_ml_mm = joblib .load (filename , mmap_mode = 'r' )
84
+ ESTIMATORS = _make_estimators (X_mm , y_mm , y_ml_mm )
85
+
86
+
87
+ def teardown_module ():
88
+ global X_mm , y_mm , y_ml_mm , TEMP_FOLDER , ESTIMATORS
89
+ # GC closes the mmap file descriptors
90
+ X_mm , y_mm , y_ml_mm , ESTIMATORS = None , None , None , None
91
+ shutil .rmtree (TEMP_FOLDER )
92
+
93
+
49
94
class EstimatorWithoutFit (object ):
50
95
"""Dummy estimator to test check_scoring"""
51
96
pass
@@ -318,18 +363,7 @@ def test_scorer_sample_weight():
318
363
sample_weight [:10 ] = 0
319
364
320
365
# get sensible estimators for each metric
321
- sensible_regr = DummyRegressor (strategy = 'median' )
322
- sensible_regr .fit (X_train , y_train )
323
- sensible_clf = DecisionTreeClassifier (random_state = 0 )
324
- sensible_clf .fit (X_train , y_train )
325
- sensible_ml_clf = DecisionTreeClassifier (random_state = 0 )
326
- sensible_ml_clf .fit (X_train , y_ml_train )
327
- estimator = dict ([(name , sensible_regr )
328
- for name in REGRESSION_SCORERS ] +
329
- [(name , sensible_clf )
330
- for name in CLF_SCORERS ] +
331
- [(name , sensible_ml_clf )
332
- for name in MULTILABEL_ONLY_SCORERS ])
366
+ estimator = _make_estimators (X_train , y_train , y_ml_train )
333
367
334
368
for name , scorer in SCORERS .items ():
335
369
if name in MULTILABEL_ONLY_SCORERS :
@@ -355,3 +389,21 @@ def test_scorer_sample_weight():
355
389
assert_true ("sample_weight" in str (e ),
356
390
"scorer {0} raises unhelpful exception when called "
357
391
"with sample weights: {1}" .format (name , str (e )))
392
+
393
+
394
+ @ignore_warnings # UndefinedMetricWarning for P / R scores
395
+ def check_scorer_memmap (scorer_name ):
396
+ scorer , estimator = SCORERS [scorer_name ], ESTIMATORS [scorer_name ]
397
+ if scorer_name in MULTILABEL_ONLY_SCORERS :
398
+ score = scorer (estimator , X_mm , y_ml_mm )
399
+ else :
400
+ score = scorer (estimator , X_mm , y_mm )
401
+ assert isinstance (score , numbers .Number ), scorer_name
402
+
403
+
404
+ def test_scorer_memmap_input ():
405
+ # Non-regression test for #6147: some score functions would
406
+ # return singleton memmap when computed on memmap data instead of scalar
407
+ # float values.
408
+ for name in SCORERS .keys ():
409
+ yield check_scorer_memmap , name
0 commit comments