10000 Fixing up the nomenclature and other things; Thou shalt pass now; · scikit-learn/scikit-learn@28290a9 · GitHub
[go: up one dir, main page]

Skip to content

Commit 28290a9

Browse files
committed
Fixing up the nomenclature and other things; Thou shalt pass now;
1 parent 5d998f2 commit 28290a9

File tree

2 files changed

+78
-72
lines changed

2 files changed

+78
-72
lines changed

sklearn/model_selection/_search.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -590,34 +590,33 @@ def _fit(self, X, y, labels, parameter_iterable):
590590

591591
time = np.array(time, dtype=np.float64).reshape(n_candidates, n_splits)
592592
time_means = np.average(time, axis=1)
593-
time_stds = np.sqrt(
593+
time_stds = np.sqrt(
594594
np.average((time - time_means[:, np.newaxis]) ** 2,
595595
axis=1))
596-
if self.return_train_score:
597-
train_means = np.average(train_scores, axis=1)
598-
train_stds = np.sqrt(
599-
np.average((train_scores - train_means[:, np.newaxis]) ** 2,
600-
axis=1))
601596

602597
cv_results = dict()
603598
for split_i in range(n_splits):
604599
cv_results["split%d_test_score" % split_i] = test_scores[:,
605600
split_i]
606-
cv_results["mean_test_score"] = means
607-
cv_results["std_test_score"] = stds
601+
cv_results["mean_test_score"] = test_means
602+
cv_results["std_test_score"] = test_stds
608603

609604
if self.return_train_score:
605+
train_means = np.average(train_scores, axis=1)
606+
train_stds = np.sqrt(
607+
np.average((train_scores - train_means[:, np.newaxis]) ** 2,
608+
axis=1))
610609
for split_i in range(n_splits):
611-
results["train_split%d_score" % split_i] = (
610+
cv_results["split%d_train_score" % split_i] = (
612611
train_scores[:, split_i])
613-
results["mean_train_score"] = train_means
614-
results["std_train_scores"] = train_stds
615-
results["rank_train_scores"] = np.asarray(rankdata(-train_means,
616-
method='min'),
617-
dtype=np.int32)
618-
619-
results["mean_test_time"] = time_means
620-
results["std_test_time"] = time_stds
612+
cv_results["mean_train_score"] = train_means
613+
cv_results["std_train_score"] = train_stds
614+
cv_results["rank_train_score"] = np.asarray(rankdata(-train_means,
615+
method='min'),
616+
dtype=np.int32)
617+
618+
cv_results["mean_test_time"] = time_means
619+
cv_results["std_test_time"] = time_stds
621620
ranks = np.asarray(rankdata(-test_means, method='min'), dtype=np.int32)
622621

623622
best_index = np.flatnonzero(ranks == 1)[0]

sklearn/model_selection/tests/test_search.py

Lines changed: 62 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -594,33 +594,34 @@ def test_param_sampler():
59 341A 4594
assert_equal([x for x in sampler], [x for x in sampler])
595595

596596

597-
def check_cv_results_array_types(results, param_keys, score_keys):
598-
# Check if the search results' array are of correct types
599-
assert_true(all(isinstance(results[param], np.ma.MaskedArray)
597+
def check_cv_results_array_types(cv_results, param_keys, score_keys):
598+
# Check if the search `cv_results`'s array are of correct types
599+
assert_true(all(isinstance(cv_results[param], np.ma.MaskedArray)
600600
for param in param_keys))
601-
assert_true(all(results[key].dtype == object for key in param_keys))
602-
assert_false(any(isinstance(results[key], np.ma.MaskedArray)
601+
assert_true(all(cv_results[key].dtype == object for key in param_keys))
602+
assert_false(any(isinstance(cv_results[key], np.ma.MaskedArray)
603603
for key in score_keys))
604-
assert_true(all(results[key].dtype == np.float64
605-
for key in score_keys if key != 'rank_test_score'))
606-
assert_true(results['rank_test_score'].dtype == np.int32)
604+
assert_true(all(cv_results[key].dtype == np.float64
605+
for key in score_keys if key not in ('rank_test_score',
606+
'rank_train_score')))
607+
assert_true(cv_results['rank_test_score'].dtype == np.int32)
607608

608609

609-
def check_cv_results_keys(results, param_keys, score_keys, n_cand):
610+
def check_cv_results_keys(cv_results, param_keys, score_keys, n_cand):
610611
# Test the search.cv_results_ contains all the required results
611-
assert_array_equal(sorted(results.keys()),
612+
assert_array_equal(sorted(cv_results.keys()),
612613
sorted(param_keys + score_keys + ('params',)))
613-
assert_true(all(results[key].shape == (n_cand,)
614+
assert_true(all(cv_results[key].shape == (n_cand,)
614615
for key in param_keys + score_keys))
615616

616617

617618
def check_cv_results_grid_scores_consistency(search):
618619
# TODO Remove in 0.20
619-
results = search.cv_results_
620-
res_scores = np.vstack(list([results["split%d_test_score" % i]
620+
cv_results = search.cv_results_
621+
res_scores = np.vstack(list([cv_results["split%d_test_score" % i]
621622
for i in range(search.n_splits_)])).T
622-
res_means = results["mean_test_score"]
623-
res_params = results["params"]
623+
res_means = cv_results["mean_test_score"]
624+
res_params = cv_results["params"]
624625
n_cand = len(res_params)
625626
grid_scores = assert_warns(DeprecationWarning, getattr,
626627
search, 'grid_scores_')
@@ -633,7 +634,7 @@ def check_cv_results_grid_scores_consistency(search):
633634
assert_array_equal(grid_scores[i].mean_validation_score, res_means[i])
634635

635636

636-
def test_grid_search_results():
637+
def test_grid_search_cv_results():
637638
X, y = make_classification(n_samples=50, n_features=4,
638639
random_state=42)
639640

@@ -649,43 +650,46 @@ def test_grid_search_results():
649650
grid_search_iid.fit(X, y)
650651

651652
param_keys = ('param_C', 'param_degree', 'param_gamma', 'param_kernel')
652-
score_keys = ('mean_test_score', 'mean_train_score', 'mean_test_time',
653-
'rank_test_score', 'split0_test_score', 'split1_test_score',
654-
'split2_test_score', 'split0_train_score',
655-
'split1_train_score', 'split2_train_score',
656-
'std_test_score', 'std_train_score', 'std_test_time')
653+
score_keys = ('mean_test_score', 'mean_train_score',
654+
'rank_test_score', 'rank_train_score',
655+
'split0_test_score', 'split1_test_score',
656+
'split2_test_score',
657+
'split0_train_score', 'split1_train_score',
658+
'split2_train_score',
659+
'std_test_score', 'std_train_score',
660+
'mean_test_time', 'std_test_time')
657661
n_candidates = n_grid_points
658662

659663
for search, iid in zip((grid_search, grid_search_iid), (False, True)):
660664
assert_equal(iid, search.iid)
661-
results = search.cv_results_
665+
cv_results = search.cv_results_
662666
# Check if score and timing are reasonable
663-
assert_true(all(results['test_rank_test_score'] >= 1))
664-
assert_true(all(results[k] >= 0) for k in score_keys
667+
assert_true(all(cv_results['rank_test_score'] >= 1))
668+
assert_true(all(cv_results[k] >= 0) for k in score_keys
665669
if k is not 'rank_test_score')
666-
assert_true(all(results[k] <= 1) for k in score_keys
670+
assert_true(all(cv_results[k] <= 1) for k in score_keys
667671
if not k.endswith('time') and
668672
k is not 'rank_test_score')
669-
# Check results structure
670-
check_cv_results_array_types(results, param_keys, score_keys)
671-
check_cv_results_keys(results, param_keys, score_keys, n_candidates)
673+
# Check cv_results structure
674+
check_cv_results_array_types(cv_results, param_keys, score_keys)
675+
check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
672676
# Check masking
673-
results = grid_search.cv_results_
677+
cv_results = grid_search.cv_results_
674678
n_candidates = len(grid_search.cv_results_['params'])
675-
assert_true(all((results['param_C'].mask[i] and
676-
results['param_gamma'].mask[i] and
677-
not results['param_degree'].mask[i])
679+
assert_true(all((cv_results['param_C'].mask[i] and
680+
cv_results['param_gamma'].mask[i] and
681+
not cv_results['param_degree'].mask[i])
678682
for i in range(n_candidates)
679-
if results['param_kernel'][i] == 'linear'))
680-
assert_true(all((not results['param_C'].mask[i] and
681-
not results['param_gamma'].mask[i] and
682-
results['param_degree'].mask[i])
683+
if cv_results['param_kernel'][i] == 'linear'))
684+
assert_true(all((not cv_results['param_C'].mask[i] and
685+
not cv_results['param_gamma'].mask[i] and
686+
cv_results['param_degree'].mask[i])
683687
for i in range(n_candidates)
684-
if results['param_kernel'][i] == 'rbf'))
688+
if cv_results['param_kernel'][i] == 'rbf'))
685689
check_cv_results_grid_scores_consistency(search)
686690

687691

688-
def test_random_search_results():
692+
def test_random_search_cv_results():
689693
# Make a dataset with a lot of noise to get various kind of prediction
690694
# errors across CV folds and parameter settings
691695
X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
@@ -709,22 +713,25 @@ def test_random_search_results():
709713
random_search_iid.fit(X, y)
710714

711715
param_keys = ('param_C', 'param_gamma')
712-
score_keys = ('test_mean_score', 'train_mean_score', 'test_mean_time',
713-
'test_rank_score', 'test_split0_score', 'test_split1_score',
714-
'test_split2_score', 'train_split0_score',
715-
'train_split1_score', 'train_split2_score',
716-
'test_std_score', 'train_std_score', 'test_std_time')
716+
score_keys = ('mean_test_score', 'mean_train_score',
717+
'rank_test_score', 'rank_train_score',
718+
'split0_test_score', 'split1_test_score',
719+
'split2_test_score',
720+
'split0_train_score', 'split1_train_score',
721+
'split2_train_score',
722+
'std_test_score', 'std_train_score',
723+
'mean_test_time', 'std_test_time')
717724
n_cand = n_search_iter
718725

719726
for search, iid in zip((random_search, random_search_iid), (False, True)):
720727
assert_equal(iid, search.iid)
721-
results = search.cv_results_
728+
cv_results = search.cv_results_
722729
# Check results structure
723-
check_cv_results_array_types(results, param_keys, score_keys)
724-
check_cv_results_keys(results, param_keys, score_keys, n_cand)
730+
check_cv_results_array_types(cv_results, param_keys, score_keys)
731+
check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
725732
# For random_search, all the param array vals should be unmasked
726-
assert_false(any(results['param_C'].mask) or
727-
any(results['param_gamma'].mask))
733+
assert_false(any(cv_results['param_C'].mask) or
734+
any(cv_results['param_gamma'].mask))
728735
check_cv_results_grid_scores_consistency(search)
729736

730737

@@ -806,15 +813,15 @@ def test_search_cv_results_rank_tie_breaking():
806813

807814
for search in (grid_search, random_search):
808815
search.fit(X, y)
809-
results = search.cv_results_
816+
cv_results = search.cv_results_
810817
# Check tie breaking strategy -
811818
# Check that there is a tie in the mean scores between
812819
# candidates 1 and 2 alone
813-
assert_almost_equal(results['mean_test_score'][0],
814-
results['mean_test_score'][1])
820+
assert_almost_equal(cv_results['mean_test_score'][0],
821+
cv_results['mean_test_score'][1])
815822
try:
816-
assert_almost_equal(results['mean_test_score'][1],
817-
results['mean_test_score'][2])
823+
assert_almost_equal(cv_results['mean_test_score'][1],
824+
cv_results['mean_test_score'][2])
818825
except AssertionError:
819826
pass
820827
# 'min' rank should be assigned to the tied candidates
@@ -841,10 +848,10 @@ def test_grid_search_correct_score_results():
841848
Cs = [.1, 1, 10]
842849
for score in ['f1', 'roc_auc']:
843850
grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score, cv=n_splits)
844-
results = grid_search.fit(X, y).cv_results_
851+
cv_results = grid_search.fit(X, y).cv_results_
845852

846853
# Test scorer names
847-
result_keys = list(results.keys())
854+
result_keys = list(cv_results.keys())
848855
expected_keys = (("mean_test_score", "rank_test_score") +
849856
tuple("split%d_test_score" % cv_i
850857
for cv_i in range(n_splits)))

0 commit comments

Comments
 (0)
0