@@ -374,7 +374,7 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
374
374
def __init__ (self , estimator , scoring = None ,
375
375
fit_params = None , n_jobs = 1 , iid = True ,
376
376
refit = True , cv = None , verbose = 0 , pre_dispatch = '2*n_jobs' ,
8000
code>
377
- error_score = 'raise' ):
377
+ error_score = 'raise' , return_train_score = True ):
378
378
379
379
self .scoring = scoring
380
380
self .estimator = estimator
@@ -386,6 +386,7 @@ def __init__(self, estimator, scoring=None,
386
386
self .verbose = verbose
387
387
self .pre_dispatch = pre_dispatch
388
388
self .error_score = error_score
389
+ self .return_train_score = return_train_score
389
390
390
391
@property
391
392
def _estimator_type (self ):
@@ -551,28 +552,52 @@ def _fit(self, X, y, groups, parameter_iterable):
551
552
pre_dispatch = pre_dispatch
552
553
)(delayed (_fit_and_score )(clone (base_estimator ), X , y , self .scorer_ ,
553
554
train , test , self .verbose , parameters ,
554
- self .fit_params , return_parameters = True ,
555
+ self .fit_params ,
556
+ return_train_score = self .return_train_score ,
557
+ return_parameters = True ,
555
558
D7AF
error_score = self .error_score )
556
559
for parameters in parameter_iterable
557
560
for train , test in cv .split (X , y , groups ))
558
561
559
- test_scores , test_sample_counts , _ , parameters = zip (* out )
562
+ # if one choose to see train score, "out" will contain train score info
563
+ if self .return_train_score :
564
+ train_scores , test_scores , test_sample_counts , time , parameters = \
565
+ zip (* out )
566
+ else :
567
+ test_scores , test_sample_counts , time , parameters = zip (* out )
560
568
561
569
candidate_params = parameters [::n_splits ]
562
570
n_candidates = len (candidate_params )
563
571
572
+ # if one choose to return train score, reshape the train_scores array
573
+ if self .return_train_score :
574
+ train_scores = np .array (train_scores ,
575
+ dtype = np .float64 ).reshape (n_candidates ,
576
+ n_splits )
564
577
test_scores = np .array (test_scores ,
565
578
dtype = np .float64 ).reshape (n_candidates ,
566
579
n_splits )
567
580
# NOTE test_sample counts (weights) remain the same for all candidates
568
581
test_sample_counts = np .array (test_sample_counts [:n_splits ],
569
582
dtype = np .int )
570
583
571
- # Computed the (weighted) mean and std for all the candidates
584
+ # Computed the (weighted) mean and std for test scores
572
585
weights = test_sample_counts if self .iid else None
573
- means = np .average (test_scores , axis = 1 , weights = weights )
574
- stds = np .sqrt (np .average ((test_scores - means [:, np .newaxis ]) ** 2 ,
575
- axis = 1 , weights = weights ))
586
+ test_means = np .average (test_scores , axis = 1 , weights = weights )
587
+ test_stds = np .sqrt (
588
+ np .average ((test_scores - test_means [:, np .newaxis ]) ** 2 , axis = 1 ,
589
+ weights = weights ))
590
+
591
+ time = np .array (time , dtype = np .float64 ).reshape (n_candidates , n_splits )
592
+ time_means = np .average (time , axis = 1 )
593
+ time_stds = np .sqrt (
594
+ np .average ((time - time_means [:, np .newaxis ]) ** 2 ,
595
+ axis = 1 ))
596
+ if self .return_train_score :
597
+ train_means = np .average (train_scores , axis = 1 )
598
+ train_stds = np .sqrt (
599
+ np .average ((train_scores - train_means [:, np .newaxis ]) ** 2 ,
600
+ axis = 1 ))
576
601
577
602
cv_results = dict ()
578
603
for split_i in range (n_splits ):
@@ -581,7 +606,19 @@ def _fit(self, X, y, groups, parameter_iterable):
581
606
cv_results ["mean_test_score" ] = means
582
607
cv_results ["std_test_score" ] = stds
583
608
584
- ranks = np .asarray (rankdata (- means , method = 'min' ), dtype = np .int32 )
609
+ if self .return_train_score :
610
+ for split_i in range (n_splits ):
611
+ results ["train_split%d_score" % split_i ] = (
612
+ train_scores [:, split_i ])
613
+ results ["mean_train_score" ] = train_means
614
+ results ["std_train_scores" ] = train_stds
615
+ results ["rank_train_scores" ] = np .asarray (rankdata (- train_means ,
616
+ method = 'min' ),
617
+ dtype = np .int32 )
618
+
619
+ results ["mean_test_time" ] = time_means
620
+ results ["std_test_time" ] = time_stds
621
+ ranks = np .asarray (rankdata (- test_means , method = 'min' ), dtype = np .int32 )
585
622
586
623
best_index = np .flatnonzero (ranks == 1 )[0 ]
587
624
best_parameters = candidate_params [best_index ]
@@ -746,6 +783,10 @@ class GridSearchCV(BaseSearchCV):
746
783
FitFailedWarning is raised. This parameter does not affect the refit
747
784
step, which will always raise the error.
748
785
786
+ return_train_score: boolean, default=True
787
+ If ``'False'``, the results_ attribute will not include training
788
+ scores.
789
+
749
790
750
791
Examples
751
792
--------
@@ -764,13 +805,14 @@ class GridSearchCV(BaseSearchCV):
764
805
random_state=None, shrinking=True, tol=...,
765
806
verbose=False),
766
807
fit_params={}, iid=..., n_jobs=1,
767
- param_grid=..., pre_dispatch=..., refit=...,
808
+ param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
768
809
scoring=..., verbose=...)
769
810
>>> sorted(clf.cv_results_.keys())
770
811
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
771
- ['mean_test_score', 'param_C', 'param_kernel', 'params',...
772
- 'rank_test_score', 'split0_test_score', 'split1_test_score',...
773
- 'split2_test_score', 'std_test_score']
812
+ ['mean_test_score', 'mean_test_time', 'mean_train_score',...
813
+ 'param_C', 'param_kernel', 'params', 'rank_test_score',...
814
+ 'split0_test_score', 'split1_test_score',...
815
+ 'split2_test_score', 'std_test_score', 'std_test_time'...]
774
816
775
817
Attributes
776
818
----------
@@ -806,11 +848,20 @@ class GridSearchCV(BaseSearchCV):
806
848
'mean_test_score' : [0.81, 0.60, 0.75, 0.82],
807
849
'std_test_score' : [0.02, 0.01, 0.03, 0.03],
808
850
'rank_test_score' : [2, 4, 3, 1],
851
+ 'split0_train_score': [0.9, 0.8, 0.85, 1.]
852
+ 'split1_train_score': [0.95, 0.7, 0.8, 0.8]
853
+ 'mean_train_score' : [0.93, 0.75, 0.83, 0.9]
854
+ 'std_train_score' : [0.02, 0.01, 0.03, 0.03],
855
+ 'rank_train_score' : [2, 4, 3, 1],
856
+ 'mean_test_time' : [0.00073, 0.00063, 0.00043, 0.00049]
857
+ 'std_test_time' : [1.62e-4, 3.37e-5, 1.42e-5, 1.1e-5]
809
858
'params' : [{'kernel': 'poly', 'degree': 2}, ...],
810
859
}
811
860
812
861
NOTE that the key ``'params'`` is used to store a list of parameter
813
- settings dict for all the parameter candidates.
862
+ settings dict for all the parameter candidates. Besides,
863
+ ``'train_mean_score'``, ``'train_split*_score'``, ... will be present
864
+ when ``return_train_score=True``.
814
865
815
866
best_estimator_ : estimator
816
867
Estimator that was chosen by the search, i.e. estimator
@@ -868,11 +919,13 @@ class GridSearchCV(BaseSearchCV):
868
919
869
920
def __init__ (self , estimator , param_grid , scoring = None , fit_params = None ,
870
921
n_jobs = 1 , iid = True , refit = True , cv = None , verbose = 0 ,
871
- pre_dispatch = '2*n_jobs' , error_score = 'raise' ):
922
+ pre_dispatch = '2*n_jobs' , error_score = 'raise' ,
923
+ return_train_score = False ):
872
924
super (GridSearchCV , self ).__init__ (
873
925
estimator = estimator , scoring = scoring , fit_params = fit_params ,
874
926
n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
875
- pre_dispatch = pre_dispatch , error_score = error_score )
927
+ pre_dispatch = pre_dispatch , error_score = error_score ,
928
+ return_train_score = return_train_score )
876
929
self .param_grid = param_grid
877
930
_check_param_grid (param_grid )
878
931
@@ -1006,6 +1059,10 @@ class RandomizedSearchCV(BaseSearchCV):
1006
1059
FitFailedWarning is raised. This parameter does not affect the refit
1007
1060
step, which will always raise the error.
1008
1061
1062
+ return_train_score: boolean, default=True
1063
+ If ``'False'``, the results_ attribute will not include training
1064
+ scores.
1065
+
1009
1066
Attributes
1010
1067
----------
1011
1068
cv_results_ : dict of numpy (masked) ndarrays
@@ -1030,16 +1087,27 @@ class RandomizedSearchCV(BaseSearchCV):
1030
1087
'param_kernel' : masked_array(data = ['rbf', rbf', 'rbf'],
1031
1088
mask = False),
1032
1089
'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),
1033
- 'split0_test_score' : [0.8, 0.9, 0.7],
1034
- 'split1_test_score' : [0.82, 0.5, 0.7],
1035
- 'mean_test_score' : [0.81, 0.7, 0.7],
1036
- 'std_test_score' : [0.02, 0.2, 0.],
1037
- 'rank_test_score' : [3, 1, 1],
1090
+ 'split0_test_score' : [0.8, 0.9, 0.7],
1091
+ 'split1_test_score' : [0.82, 0.5, 0.7],
1092
+ 'mean_test_score' : [0.81, 0.7, 0.7],
1093
+ 'std_test_score' : [0.02, 0.2, 0.],
1094
+ 'rank_test_score' : [3, 1, 1],
1095
+ 'split0_train_score' : [0.8, 0.9, 0.7],
1096
+ 'split1_train_score' : [0.82, 0.5, 0.7],
1097
+ 'mean_train_score' : [0.81, 0.7, 0.7],
1098
+ 'std_train_score' : [0.00073, 0.00063, 0.00043]
1099
+ 'rank_train_score' : [1.62e-4, 3.37e-5, 1.1e-5]
1100
+ 'test_mean_time' : [0.00073, 0.00063, 0.00043]
1101
+ 'test_std_time' : [1.62e-4, 3.37e-5, 1.1e-5]
1102
+ 'test_std_score' : [0.02, 0.2, 0.],
1103
+ 'test_rank_score' : [3, 1, 1],
1038
1104
'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],
1039
1105
}
1040
1106
1041
1107
NOTE that the key ``'params'`` is used to store a list of parameter
1042
- settings dict for all the parameter candidates.
1108
+ settings dict for all the parameter candidates. Besides,
1109
+ 'train_mean_score', 'train_split*_score', ... will be present when
1110
+ return_train_score is set to True.
1043
1111
1044
1112
best_estimator_ : estimator
1045
1113
Estimator that was chosen by the search, i.e. estimator
@@ -1094,15 +1162,15 @@ class RandomizedSearchCV(BaseSearchCV):
1094
1162
def __init__ (self , estimator , param_distributions , n_iter = 10 , scoring = None ,
1095
1163
fit_params = None , n_jobs = 1 , iid = True , refit = True , cv = None ,
1096
1164
verbose = 0 , pre_dispatch = '2*n_jobs' , random_state = None ,
1097
- error_score = 'raise' ):
1098
-
1165
+ error_score = 'raise' , return_train_score = False ):
1099
1166
self .param_distributions = param_distributions
1100
1167
self .n_iter = n_iter
1101
1168
self .random_state = random_state
1102
1169
super (RandomizedSearchCV , self ).__init__ (
1103
- estimator = estimator , scoring = scoring , fit_params = fit_params ,
1104
- n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
1105
- pre_dispatch = pre_dispatch , error_score = error_score )
1170
+ estimator = estimator , scoring = scoring , fit_params = fit_params ,
1171
+ n_jobs = n_jobs , iid = iid , refit = refit , cv = cv , verbose = verbose ,
1172
+ pre_dispatch = pre_dispatch , error_score = error_score ,
1173
+ return_train_score = return_train_score )
1106
1174
1107
1175
def fit (self , X , y = None , groups = None ):
1108
1176
"""Run fit on the estimator with randomly drawn parameters.
0 commit comments