@@ -319,7 +319,9 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
319
319
"""
320
320
score , n_samples_test , _ = _fit_and_score (estimator , X , y , scorer , train ,
321
321
test , verbose , parameters ,
322
- fit_params , error_score )
322
+ fit_params = fit_params ,
323
+ return_n_test_samples = True ,
324
+ error_score = error_score )
323
325
return score , parameters , n_samples_test
324
326
325
327
@@ -374,7 +376,7 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
374
376
def __init__ (self , estimator , scoring = None ,
375
377
fit_params = None , n_jobs = 1 , iid = True ,
376
378
refit = True , cv = None , verbose = 0 , pre_dispatch = '2*n_jobs' ,
377
- error_score = 'raise' ):
379
+ error_score = 'raise' , return_train_score = True ):
378
380
379
381
self .scoring = scoring
380
382
self .estimator = estimator
@@ -386,6 +388,7 @@ def __init__(self, estimator, scoring=None,
386
388
self .verbose = verbose
387
389
self .pre_dispatch = pre_dispatch
388
390
self .error_score = error_score
391
+ self .return_train_score = return_train_score
389
392
390
393
@property
391
394
def _estimator_type (self ):
@@ -551,41 +554,61 @@ def _fit(self, X, y, groups, parameter_iterable):
551
554
pre_dispatch = pre_dispatch
552
555
)(delayed (_fit_and_score )(clone (base_estimator ), X , y , self .scorer_ ,
553
556
train , test , self .verbose , parameters ,
554
- self .fit_params , return_parameters = True ,
557
+ fit_params = self .fit_params ,
558
+ return_train_score = self .return_train_score ,
559
+ return_n_test_samples = True ,
560
+ return_times = True , return_parameters = True ,
555
561
error_score = self .error_score )
556
562
for parameters in parameter_iterable
557
563
for train , test in cv .split (X , y , groups ))
558
564
559
- test_scores , test_sample_counts , _ , parameters = zip (* out )
565
+ # if one choose to see train score, "out" will contain train score info
566
+ if self .return_train_score :
567
+ (train_scores , test_scores , test_sample_counts ,
568
+ fit_time , score_time , parameters ) = zip (* out )
569
+ else :
570
+ (test_scores , test_sample_counts ,
571
+ fit_time , score_time , parameters ) = zip (* out )
560
572
561
573
candidate_params = parameters [::n_splits ]
562
574
n_candidates = len (candidate_params )
563
575
564
- test_scores = np .array (test_scores ,
565
- dtype = np .float64 ).reshape (n_candidates ,
566
- n_splits )
576
+ results = dict ()
577
+
578
+ def _store (key_name , array , weights = None , splits = False , rank = False ):
579
+ """A small helper to store the scores/times to the cv_results_"""
580
+ array = np .array (array , dtype = np .float64 ).reshape (n_candidates ,
581
+ n_splits )
582
+ if splits :
583
+ for split_i in range (n_splits ):
584
+ results ["split%d_%s"
585
+ % (split_i , key_name )] = array [:, split_i ]
586
+
587
+ array_means = np .average (array , axis = 1 , weights = weights )
588
+ results ['mean_%s' % key_name ] = array_means
589
+ # Weighted std is not directly available in numpy
590
+ array_stds = np .sqrt (np .average ((array -
591
+ array_means [:, np .newaxis ]) ** 2 ,
592
+ axis = 1 , weights = weights ))
593
+ results ['std_%s' % key_name ] = array_stds
594
+
595
+ if rank :
596
+ results ["rank_%s" % key_name ] = np .asarray (
597
+ rankdata (- array_means , method = 'min' ), dtype = np .int32 )
598
+
599
+ # Computed the (weighted) mean and std for test scores alone
567
600
# NOTE test_sample counts (weights) remain the same for all candidates
568
601
test_sample_counts = np .array (test_sample_counts [:n_splits ],
569
602
dtype = np .int )
570
603
571
- # Computed the (weighted) mean and std for all the candidates
572
- weights = test_sample_counts if self .iid else None
573
- means = np .average (test_scores , axis = 1 , weights = weights )
574
- stds = np .sqrt (np .average ((test_scores - means [:, np .newaxis ]) ** 2 ,
575
- axis = 1 , weights = weights ))
576
-
577
- cv_results = dict ()
578
- for split_i in range (n_splits ):
579
- cv_results ["split%d_test_score" % split_i ] = test_scores [:,
580
- split_i ]
581
- cv_results ["mean_test_score" ] = means
582
- cv_results ["std_test_score" ] = stds
583
-
584
- ranks = np .asarray (rankdata (- means , method = 'min' ), dtype = np .int32 )
604
+ _store ('test_score' , test_scores , splits = True , rank = True ,
605
+ weights = test_sample_counts if self .iid else None )
606
+ _store ('train_score' , train_scores , splits = True )
607
+ _store ('fit_time' , fit_time )
608
+ _store ('score_time' , score_time )
585
609
586
- best_index = np .flatno
10000
nzero (ranks == 1 )[0 ]
610
+ best_index = np .flatnonzero (results [ "rank_test_score" ] == 1 )[0 ]
587
611
best_parameters = candidate_params [best_index ]
588
- cv_results ["rank_test_score" ] = ranks
589
612
590
613
# Use one np.MaskedArray and mask all the places where the param is not
591
614
# applicable for that candidate. Use defaultdict as each candidate may
@@ -599,12 +622,12 @@ def _fit(self, X, y, groups, parameter_iterable):
599
622
# Setting the value at an index also unmasks that index
600
623
param_results ["param_%s" % name ][cand_i ] = value
601
624
602
- cv_results .update (param_results )
625
+ results .update (param_results )
603
626
604
627
# Store a list of param dicts at the key 'params'
605
- cv_results ['params' ] = candidate_params
628
+ results ['params' ] = candidate_params
606
629
607
- self .cv_results_ = cv_results
630
+ self .cv_results_ = results
608
631
self .best_index_ = best_index
609
632
self .n_splits_ = n_splits
610
633
@@ -746,6 +769,10 @@ class GridSearchCV(BaseSearchCV):
746
769
FitFailedWarning is raised. This parameter does not affect the refit
747
770
step, which will always raise the error.
748
771
772
+ return_train_score : boolean, default=True
773
+ If ``'False'``, the ``cv_results_`` attribute will not include training
774
+ scores.
775
+
749
776
750
777
Examples
751
778
--------
@@ -764,13 +791,16 @@ class GridSearchCV(BaseSearchCV):
764
791
random_state=None, shrinking=True, tol=...,
765
792
verbose=False),
766
793
fit_params={}, iid=..., n_jobs=1,
767
- param_grid=..., pre_dispatch=..., refit=...,
794
+ param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
768
795
scoring=..., verbose=...)
769
796
>>> sorted(clf.cv_results_.keys())
770
797
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
771
- ['mean_test_score', 'param_C', 'param_kernel', 'params',...
772
- 'rank_test_score', 'split0_test_score', 'split1_test_score',...
773
- 'split2_test_score', 'std_test_score']
798
+ ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
799
+ 'mean_train_score', 'param_C', 'param_kernel', 'params',...
800
+ 'rank_test_score', 'split0_test_score',...
801
+ 'split0_train_score', 'split1_test_score', 'split1_train_score',...
802
+ 'split2_test_score', 'split2_train_score',...
803
+ 'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]
774
804
775
805
Attributes
776
806
----------
@@ -801,17 +831,28 @@ class GridSearchCV(BaseSearchCV):
801
831
mask = [ True True False False]...),
802
832
'param_degree': masked_array(data = [2.0 3.0 -- --],
803
833
mask = [False False True True]...),
804
- 'split0_test_score' : [0.8, 0.7, 0.8, 0.9],
805
- 'split1_test_score' : [0.82, 0.5, 0.7, 0.78],
806
- 'mean_test_score' : [0.81, 0.60, 0.75, 0.82],
807
- 'std_test_score' : [0.02, 0.01, 0.03, 0.03],
808
- 'rank_test_score' : [2, 4, 3, 1],
809
- 'params' : [{'kernel': 'poly', 'degree': 2}, ...],
834
+ 'split0_test_score' : [0.8, 0.7, 0.8, 0.9],
835
+ 'split1_test_score' : [0.82, 0.5, 0.7, 0.78],
836
+ 'mean_test_score' : [0.81, 0.60, 0.75, 0.82],
837
+ 'std_test_score' : [0.02, 0.01, 0.03, 0.03],
838
+ 'rank_test_score' : [2, 4, 3, 1],
839
+ 'split0_train_score' : [0.8, 0.9, 0.7],
840
+ 'split1_train_score' : [0.82, 0.5, 0.7],
841
+ 'mean_train_score' : [0.81, 0.7, 0.7],
842
+ 'std_train_score' : [0.03, 0.03, 0.04],
843
+ 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],
844
+ 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],
845
+ 'mean_score_time' : [0.007, 0.06, 0.04, 0.04],
846
+ 'std_score_time' : [0.001, 0.002, 0.003, 0.005],
847
+ 'params' : [{'kernel': 'poly', 'degree': 2}, ...],
810
848
}
811
849
812
850
NOTE that the key ``'params'`` is used to store a list of parameter
813
851
settings dict for all the parameter candidates.
814
852
853
+ The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
854
+ ``std_score_time`` are all in seconds.
855
+
815
856
best_estimator_ : estimator
816
857
Estimator that was chosen by the search, i.e. estimator
817
858
which gave highest score (or smallest loss if specified)
@@ -868,11 +909,13 @@ class GridSearchCV(BaseSearchCV):
868
909
869
910
def __init__ (self , estimator , param_grid , scoring = None , fit_params = None ,
870
911
n_jobs = 1 , iid = True , refit = True , cv = None , verbose = 0 ,
871
- pre_dispatch = '2*n_jobs' , error_score = 'raise' ):
912
+ pre_dispatch = '2*n_jobs' , error_score = 'raise' ,
913
+ return_train_score = True ):
872
914
super (GridSearchCV , self ).__init__ (
873
915
estimator = estimator , scoring = scoring , fit_params = fit_params ,
874
916
n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
875
- pre_dispatch = pre_dispatch , error_score = error_score )
917
+ pre_dispatch = pre_dispatch , error_score = error_score ,
918
+ return_train_score = return_train_score )
876
919
self .param_grid = param_grid
877
920
_check_param_grid (param_grid )
878
921
@@ -1006,6 +1049,10 @@ class RandomizedSearchCV(BaseSearchCV):
1006
1049
FitFailedWarning is raised. This parameter does not affect the refit
1007
1050
step, which will always raise the error.
1008
1051
1052
+ return_train_score : boolean, default=True
1053
+ If ``'False'``, the ``cv_results_`` attribute will not include training
1054
+ scores.
1055
+
1009
1056
Attributes
1010
1057
----------
1011
1058
cv_results_ : dict of numpy (masked) ndarrays
@@ -1030,17 +1077,28 @@ class RandomizedSearchCV(BaseSearchCV):
1030
1077
'param_kernel' : masked_array(data = ['rbf', rbf', 'rbf'],
1031
1078
mask = False),
1032
1079
'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),
1033
- 'split0_test_score' : [0.8, 0.9, 0.7],
1034
- 'split1_test_score' : [0.82, 0.5, 0.7],
1035
- 'mean_test_score' : [0.81, 0.7, 0.7],
1036
- 'std_test_score' : [0.02, 0.2, 0.],
1037
- 'rank_test_score' : [3, 1, 1],
1080
+ 'split0_test_score' : [0.8, 0.9, 0.7],
1081
+ 'split1_test_score' : [0.82, 0.5, 0.7],
1082
+ 'mean_test_score' : [0.81, 0.7, 0.7],
1083
+ 'std_test_score' : [0.02, 0.2, 0.],
1084
+ 'rank_test_score' : [3, 1, 1],
1085
+ 'split0_train_score' : [0.8, 0.9, 0.7],
1086
+ 'split1_train_score' : [0.82, 0.5, 0.7],
1087
+ 'mean_train_score' : [0.81, 0.7, 0.7],
1088
+ 'std_train_score' : [0.03, 0.03, 0.04],
1089
+ 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],
1090
+ 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],
1091
+ 'mean_score_time' : [0.007, 0.06, 0.04, 0.04],
1092
+ 'std_score_time' : [0.001, 0.002, 0.003, 0.005],
1038
1093
'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],
1039
1094
}
1040
1095
1041
1096
NOTE that the key ``'params'`` is used to store a list of parameter
1042
1097
settings dict for all the parameter candidates.
1043
1098
1099
+ The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and
1100
+ ``std_score_time`` are all in seconds.
1101
+
1044
1102
best_estimator_ : estimator
1045
1103
Estimator that was chosen by the search, i.e. estimator
1046
1104
which gave highest score (or smallest loss if specified)
@@ -1094,15 +1152,15 @@ class RandomizedSearchCV(BaseSearchCV):
1094
1152
def __init__ (self , estimator , param_distributions , n_iter = 10 , scoring = None ,
1095
1153
fit_params = None , n_jobs = 1 , iid = True , refit = True , cv = None ,
1096
1154
verbose = 0 , pre_dispatch = '2*n_jobs' , random_state = None ,
1097
- error_score = 'raise' ):
1098
-
1155
+ error_score = 'raise' , return_train_score = True ):
1099
1156
self .param_distributions = param_distributions
1100
1157
self .n_iter = n_iter
1101
1158
self .random_state = random_state
1102
1159
super (RandomizedSearchCV , self ).__init__ (
1103
- estimator = estimator , scoring = scoring , fit_params = fit_params ,
1104
- n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
1105
- pre_dispatch = pre_dispatch , error_score = error_score )
1160
+ estimator = estimator , scoring = scoring , fit_params = fit_params ,
1161
+ n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
1162
+ pre_dispatch = pre_dispatch , error_score = error_score ,
1163
+ return_train_score = return_train_score )
1106
1164
1107
1165
def fit (self , X , y = None , groups = None ):
1108
1166
"""Run fit on the estimator with randomly drawn parameters.
0 commit comments