34
34
from ..utils .random import sample_without_replacement
35
35
from ..utils .validation import indexable , check_is_fitted
36
36
from ..utils .metaestimators import if_delegate_has_method
37
- from ..metrics .scorer import check_scoring
37
+ from ..metrics .scorer import check_multimetric_scoring
38
38
39
39
40
40
__all__ = ['GridSearchCV' , 'ParameterGrid' , 'fit_grid_point' ,
@@ -291,9 +291,11 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
291
291
test : ndarray, dtype int or bool
292
292
Boolean mask or indices for test set.
293
293
294
- scorer : callable or None.
295
- If provided must be a scorer callable object / function with signature
296
- ``scorer(estimator, X, y)``.
294
+ scorers : dict
295
+ dict which maps the scorer name to the scorer callable.
296
+
297
+ If provided, the scorer callable object / function must be with
298
+ signature ``scorer(estimator, X, y)``.
297
299
298
300
verbose : int
299
301
Verbosity level.
@@ -309,21 +311,22 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
309
311
310
312
Returns
311
313
-------
312
- score : float
313
- Score of this parameter setting on given training / test split.
314
+ scores : dict
315
+ A dict mapping the scorer name to it's score value for the given
316
+ parameter setting on given training / test split.
314
317
315
318
parameters : dict
316
319
The parameters that have been evaluated.
317
320
318
321
n_samples_test : int
319
322
Number of test samples in this split.
320
323
"""
321
- score , n_samples_test , _ = _fit_and_score (estimator , X , y , scorer , train ,
322
- test , verbose , parameters ,
323
- fit_params = fit_params ,
324
- return_n_test_samples = True ,
325
- error_score = error_score )
326
- return score , parameters , n_samples_test
324
+ scores , n_samples_test , _ = _fit_and_score (estimator , X , y , scorers , train ,
325
+ test , verbose , parameters ,
326
+ fit_params = fit_params ,
327
+ return_n_test_samples = True ,
328
+ error_score = error_score )
329
+ return scores , parameters , n_samples_test
327
330
328
331
329
332
def _check_param_grid (param_grid ):
@@ -537,7 +540,10 @@ def _fit(self, X, y, groups, parameter_iterable):
537
540
538
541
estimator = self .estimator
539
542
cv = check_cv (self .cv , y , classifier = is_classifier (estimator ))
540
- self .scorer_ = check_scoring (self .estimator , scoring = self .scoring )
543
+
544
+ self .scorers_ = check_multimetric_scoring (self .estimator ,
545
+ scoring = self .scoring )
546
+ multimetric = False if len (list (self .scorers_ .keys ())) == 1 else True
541
547
542
548
X , y , groups = indexable (X , y , groups )
543
549
n_splits = cv .get_n_splits (X , y , gro
10000
ups )
@@ -565,15 +571,32 @@ def _fit(self, X, y, groups, parameter_iterable):
565
571
566
572
# if one choose to see train score, "out" will contain train score info
567
573
if self .return_train_score :
568
- (train_scores , test_scores , test_sample_counts ,
574
+ (train_score_dicts , test_score_dicts , test_sample_counts ,
569
575
fit_time , score_time , parameters ) = zip (* out )
570
576
else :
571
- (test_scores , test_sample_counts ,
577
+ (test_score_dicts , test_sample_counts ,
572
578
fit_time , score_time , parameters ) = zip (* out )
573
579
574
580
candidate_params = parameters [::n_splits ]
575
581
n_candidates = len (candidate_params )
576
582
583
+ # The train_scores and test_scores will be a list of dict
584
+ # of form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]
585
+ # Convert that to a dict of array {'prec': np.array([0.1 ...]), ...}
586
+ def _to_dict_of_scores_array ():
587
+ # It will be reshaped into (n_candidates, n_splits) in _store()
588
+ np_empty = partial (np .empty , shape = ((n_candidates * n_splits ,)))
589
+ scores_arr = defauldict (np_empty )
590
+ for i , score_dict_i in enumerate (scores ):
591
+ for key in scoring .keys ():
592
+ scores_arr [key ][i // n_splits ,
593
+ i % n_splits ] = score_dict_i [key ]
594
+ return dict (scores_arr )
595
+
596
+ test_scores = _to_dict_of_scores_array (test_score_dicts )
597
+ if self .return_train_score :
598
+ train_scores = _to_dict_of_scores_array (train_score_dicts )
599
+
577
600
results = dict ()
578
601
579
602
def _store (key_name , array , weights = None , splits = False , rank = False ):
@@ -582,6 +605,7 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
582
605
n_splits )
583
606
if splits :
584
607
for split_i in range (n_splits ):
608
+ # Uses closure to reference the results
585
609
results ["split%d_%s"
586
610
% (split_i , key_name )] = array [:, split_i ]
587
611
@@ -597,19 +621,20 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
597
621
results ["rank_%s" % key_name ] = np .asarray (
598
622
rankdata (- array_means , method = 'min' ), dtype = np .int32 )
599
623
600
- # Computed the (weighted) mean and std for test scores alone
601
- # NOTE test_sample counts (weights) remain the same for all candidates
602
- test_sample_counts = np .array (test_sample_counts [:n_splits ],
603
- dtype = np .int )
604
-
605
- _store ('test_score' , test_scores , splits = True , rank = True ,
606
- weights = test_sample_counts if self .iid else None )
607
- _store ('train_score' , train_scores , splits = True )
608
- _store ('fit_time' , fit_time )
609
- _store ('score_time' , score_time )
610
-
611
- best_index = np .flatnonzero (results ["rank_test_score" ] == 1 )[0 ]
612
- best_parameters = candidate_params [best_index ]
624
+ for scorer_name in self .scoring .keys ():
625
+ # Computed the (weighted) mean and std for test scores alone
626
+ # NOTE test_sample counts (weights) remain the same for all
627
+ # candidates
628
+ test_sample_counts = np .array (test_sample_counts [:n_splits ],
629
+ dtype = np .int )
630
+ _store ('test_%s' % scorer_name , test_scores [scorer_name ],
631
+ splits = True , rank = True ,
632
+ weights = test_sample_counts if self .iid else None )
633
+ if self .return_train_score :
634
+ _store ('train_%s' % scorer_name , train_scores [scorer_name ],
635
+ splits = True )
636
+ _store ('fit_time' , fit_time )
637
+ _store ('score_time' , score_time )
613
638
614
639
# Use one np.MaskedArray and mask all the places where the param is not
615
640
# applicable for that candidate. Use defaultdict as each candidate may
@@ -625,6 +650,9 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
625
650
626
651
results .update (param_results )
627
652
653
+ best_index = np .flatnonzero (results ["rank_test_score" ] == 1 )[0 ]
654
+ best_parameters = candidate_params [best_index ]
655
+
628
656
# Store a list of param dicts at the key 'params'
629
657
results ['params' ] = candidate_params
630
658
@@ -707,11 +735,18 @@ class GridSearchCV(BaseSearchCV):
707
735
in the list are explored. This enables searching over any sequence
708
736
of parameter settings.
709
737
710
- scoring : string, callable or None, default=None
711
- A string (see model evaluation documentation) or
712
- a scorer callable object / function with signature
713
- ``scorer(estimator, X, y)``.
714
- If ``None``, the ``score`` method of the estimator is used.
738
+ scoring : string, callable or None, optional, default: None
739
+ A single string (see :ref:`_scoring_parameter`) or a callable
740
+ (see :ref:`_scoring`) to evaluate the predictions on the test set.
741
+
742
+ For evaluating multiple metrics, either give a list of (unique) strings
743
+ or a dict with names as keys and callables as values.
744
+
745
+ NOTE that when using custom scorers, each scorer should return a single
746
+ value. Single scorers returning a list/array of values may be wrapped
747
+ into multiple scorers that return one value each.
748
+
749
+ If None the estimator's default scorer, if available is used.
715
750
716
751
fit_params : dict, optional
717
752
Parameters to pass to the fit method.
@@ -848,35 +883,63 @@ class GridSearchCV(BaseSearchCV):
848
883
'params' : [{'kernel': 'poly', 'degree': 2}, ...],
849
884
}
850
885
851
- NOTE that the key ``'params'`` is used to store a list of parameter
886
+ NOTE
887
+
888
+ The key ``'params'`` is used to store a list of parameter
852
889
settings dict for all the parameter candidates.
853
890
854
891
The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
855
892
``std_score_time`` are all in seconds.
856
893
857
- best_estimator_ : estimator
894
+ For multiple metric evaluation, the scores for all the scorers are
895
+ available in the ``cv_results_`` dict at the keys ending with that
896
+ scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` as shown
897
+ above. ('split0_test_precision', 'mean_train_precision' etc.)
898
+
899
+ best_estimator_ : estimator or dict
858
900
Estimator that was chosen by the search, i.e. estimator
859
901
which gave highest score (or smallest loss if specified)
860
902
on the left out data. Not available if refit=False.
861
903
862
- best_score_ : float
904
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
905
+ list), this parameter is a dict mapping scorer names to the estimator
906
+ that gave the best score for that scorer.
907
+
908
+ best_score_ : float or dict
863
909
Score of best_estimator on the left out data.
864
910
865
- best_params_ : dict
911
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
912
+ list), this parameter is a dict mapping scorer names to the best score
913
+ for that scorer.
914
+
915
+ best_params_ : dict or dict of dict
866
916
Parameter setting that gave the best results on the hold out data.
867
917
868
- best_index_ : int
918
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
919
+ list), this parameter is a dict of dict mapping scorer names to the
920
+ dict of the parameter setting that gave the best scores for that
921
+ scorer.
922
+
923
+ best_index_ : int or dict
869
924
The index (of the ``cv_results_`` arrays) which corresponds to the best
870
925
candidate parameter setting.
871
926
872
927
The dict at ``search.cv_results_['params'][search.best_index_]`` gives
873
928
the parameter setting for the best model, that gives the highest
874
929
mean score (``search.best_score_``).
875
930
876
- scorer_ : function
931
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
932
+ list), this parameter is a dict mapping scorer names to the
933
+ index which corresponds to the parameter setting that gave the best
934
+ scores for that scorer.
935
+
936
+ scorer_ : function or a dict
877
937
Scorer function used on the held out data to choose the best
878
938
parameters for the model.
879
939
940
+ For multimetric evaluation this parameter is a dict mapping scorer
941
+ names to the corresponding scorer functions.
942
+
880
943
n_splits_ : int
881
944
The number of cross-validation splits (folds/iterations).
882
945
@@ -1094,35 +1157,63 @@ class RandomizedSearchCV(BaseSearchCV):
1094
1157
'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],
1095
1158
}
1096
1159
1097
- NOTE that the key ``'params'`` is used to store a list of parameter
1160
+ NOTE
1161
+
1162
+ The key ``'params'`` is used to store a list of parameter
1098
1163
settings dict for all the parameter candidates.
1099
1164
1100
1165
The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
1101
1166
``std_score_time`` are all in seconds.
1102
1167
1103
- best_estimator_ : estimator
1168
+ For multiple metric evaluation, the scores for all the scorers are
1169
+ available in the ``cv_results_`` dict at the keys ending with that
1170
+ scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` as shown
1171
+ above.
1172
+
1173
+ best_estimator_ : estimator or dict
1104
1174
Estimator that was chosen by the search, i.e. estimator
1105
1175
which gave highest score (or smallest loss if specified)
1106
1176
on the left out data. Not available if refit=False.
1107
1177
1108
- best_score_ : float
1178
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
1179
+ list), this parameter is a dict mapping scorer names to the estimator
1180
+ that gave the best score for that scorer.
1181
+
1182
+ best_score_ : float or dict
1109
1183
Score of best_estimator on the left out data.
1110
1184
1111
- best_params_ : dict
1185
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
1186
+ list), this parameter is a dict mapping scorer names to the best score
1187
+ for that scorer.
1188
+
1189
+ best_params_ : dict or dict of dict
1112
1190
Parameter setting that gave the best results on the hold out data.
1113
1191
1114
- best_index_ : int
1192
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
1193
+ list), this parameter is a dict of dict mapping scorer names to the
1194
+ dict of the parameter setting that gave the best scores for that
1195
+ scorer.
1196
+
1197
+ best_index_ : int or dict
1115
1198
The index (of the ``cv_results_`` arrays) which corresponds to the best
1116
1199
candidate parameter setting.
1117
1200
1118
1201
The dict at ``search.cv_results_['params'][search.best_index_]`` gives
1119
1202
the parameter setting for the best model, that gives the highest
1120
1203
mean score (``search.best_score_``).
1121
1204
1122
- scorer_ : function
1205
+ For multimetric evaluation (when the ``scoring`` parameter is a dict/
1206
+ list), this parameter is a dict mapping scorer names to the
1207
+ index which corresponds to the parameter setting that gave the best
1208
+ scores for that scorer.
1209
+
1210
+ scorer_ : function or a dict
1123
1211
Scorer function used on the held out data to choose the best
1124
1212
parameters for the model.
1125
1213
1214
+ For multimetric evaluation this parameter is a dict mapping scorer
1215
+ names to the corresponding scorer functions.
1216
+
1126
1217
n_splits_ : int
1127
1218
The number of cross-validation splits (folds/iterations).
1128
1219
0 commit comments