1
1
import pickle
2
+ import tempfile
3
+ import shutil
4
+ import os
5
+ import numbers
2
6
3
7
import numpy as np
4
8
30
34
from sklearn .model_selection import train_test_split , cross_val_score
31
35
from sklearn .model_selection import GridSearchCV
32
36
from sklearn .multiclass import OneVsRestClassifier
37
+ from sklearn .externals import joblib
33
38
34
39
35
40
REGRESSION_SCORERS = ['r2' , 'mean_absolute_error' , 'mean_squared_error' ,
46
51
MULTILABEL_ONLY_SCORERS = ['precision_samples' , 'recall_samples' , 'f1_samples' ]
47
52
48
53
54
+ def _make_estimators (X_train , y_train , y_ml_train ):
55
+ # Make estimators that make sense to test various scoring methods
56
+ sensible_regr = DummyRegressor (strategy = 'median' )
57
+ sensible_regr .fit (X_train , y_train )
58
+ sensi
10000
ble_clf = DecisionTreeClassifier (random_state = 0 )
59
+ sensible_clf .fit (X_train , y_train )
60
+ sensible_ml_clf = DecisionTreeClassifier (random_state = 0 )
61
+ sensible_ml_clf .fit (X_train , y_ml_train )
62
+ return dict (
63
+ [(name , sensible_regr ) for name in REGRESSION_SCORERS ] +
64
+ [(name , sensible_clf ) for name in CLF_SCORERS ] +
65
+ [(name , sensible_ml_clf ) for name in MULTILABEL_ONLY_SCORERS ]
66
+ )
67
+
68
+
69
+ X_mm , y_mm , y_ml_mm = None , None , None
70
+ ESTIMATORS = None
71
+ TEMP_FOLDER = None
72
+
73
+
74
+ def setup_module ():
75
+ # Create some memory mapped data
76
+ global X_mm , y_mm , y_ml_mm , TEMP_FOLDER , ESTIMATORS
77
+ TEMP_FOLDER = tempfile .mkdtemp (prefix = 'sklearn_test_score_objects_' )
78
+ X , y = make_classification (n_samples = 30 , n_features = 5 , random_state = 0 )
79
+ _ , y_ml = make_multilabel_classification (n_samples = X .shape [0 ],
80
+ random_state = 0 )
81
+ filename = os .path .join (TEMP_FOLDER , 'test_data.pkl' )
82
+ joblib .dump ((X , y , y_ml ), filename )
83
+ X_mm , y_mm , y_ml_mm = joblib .load (filename , mmap_mode = 'r' )
84
+ ESTIMATORS = _make_estimators (X_mm , y_mm , y_ml_mm )
85
+
86
+
87
+ def teardown_module ():
88
+ global X_mm , y_mm , y_ml_mm , TEMP_FOLDER , ESTIMATORS
89
+ # GC closes the mmap file descriptors
90
+ X_mm , y_mm , y_ml_mm , ESTIMATORS = None , None , None , None
91
+ shutil .rmtree (TEMP_FOLDER )
92
+
93
+
49
94
class EstimatorWithoutFit (object ):
50
95
"""Dummy estimator to test check_scoring"""
51
96
pass
@@ -324,18 +369,7 @@ def test_scorer_sample_weight():
324
369
sample_weight [:10 ] = 0
325
370
326
371
# get sensible estimators for each metric
327
- sensible_regr = DummyRegressor (strategy = 'median' )
328
- sensible_regr .fit (X_train , y_train )
329
- sensible_clf = DecisionTreeClassifier (random_state = 0 )
330
- sensible_clf .fit (X_train , y_train )
331
- sensible_ml_clf = DecisionTreeClassifier (random_state = 0 )
332
- sensible_ml_clf .fit (X_train , y_ml_train )
333
- estimator = dict ([(name , sensible_regr )
334
- for name in REGRESSION_SCORERS ] +
335
- [(name , sensible_clf )
336
- for name in CLF_SCORERS ] +
337
- [(name , sensible_ml_clf )
338
- for name in MULTILABEL_ONLY_SCORERS ])
372
+ estimator = _make_estimators (X_train , y_train , y_ml_train )
339
373
340
374
for name , scorer in SCORERS .items ():
341
375
if name in MULTILABEL_ONLY_SCORERS :
@@ -361,3 +395,21 @@ def test_scorer_sample_weight():
361
395
assert_true ("sample_weight" in str (e ),
362
396
"scorer {0} raises unhelpful exception when called "
363
397
"with sample weights: {1}" .format (name , str (e )))
398
+
399
+
400
+ @ignore_warnings # UndefinedMetricWarning for P / R scores
401
+ def check_scorer_memmap (scorer_name ):
402
+ scorer , estimator = SCORERS [scorer_name ], ESTIMATORS [scorer_name ]
403
+ if scorer_name in MULTILABEL_ONLY_SCORERS :
404
+ score = scorer (estimator , X_mm , y_ml_mm )
405
+ else :
406
+ score = scorer (estimator , X_mm , y_mm )
407
+ assert isinstance (score , numbers .Number ), scorer_name
408
+
409
+
410
+ def test_scorer_memmap_input ():
411
+ # Non-regression test for #6147: some score functions would
412
+ # return singleton memmap when computed on memmap data instead of scalar
413
+ # float values.
414
+ for name in SCORERS .keys ():
415
+ yield check_scorer_memmap , name
0 commit comments