diff --git a/doc/datasets/mldata_fixture.py b/doc/datasets/mldata_fixture.py index 17dae0e571928..37d9f9af05dc3 100644 --- a/doc/datasets/mldata_fixture.py +++ b/doc/datasets/mldata_fixture.py @@ -42,4 +42,4 @@ def setup_module(): def teardown_module(): uninstall_mldata_mock() - shutil.rmtree(custom_data_home) \ No newline at end of file + shutil.rmtree(custom_data_home) diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py index bd2013956b5e2..aac87db41a803 100644 --- a/examples/covariance/plot_sparse_cov.py +++ b/examples/covariance/plot_sparse_cov.py @@ -126,7 +126,7 @@ # plot the model selection metric pl.figure(figsize=(4, 3)) pl.axes([.2, .15, .75, .7]) -pl.plot(model.cv_alphas_, np.mean(model.cv_scores, axis=1), 'o-') +pl.plot(model.cv_alphas_, np.mean(model.grid_scores, axis=1), 'o-') pl.axvline(model.alpha_, color='.5') pl.title('Model selection') pl.ylabel('Cross-validation score') diff --git a/examples/grid_search_digits.py b/examples/grid_search_digits.py index fc7d441a5020b..f917d2124376a 100644 --- a/examples/grid_search_digits.py +++ b/examples/grid_search_digits.py @@ -60,7 +60,7 @@ print() print("Grid scores on development set:") print() - for params, mean_score, scores in clf.cv_scores_: + for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2, params)) print() diff --git a/examples/plot_rfe_with_cross_validation.py b/examples/plot_rfe_with_cross_validation.py index af64c9c29695a..24bc1ee82e531 100644 --- a/examples/plot_rfe_with_cross_validation.py +++ b/examples/plot_rfe_with_cross_validation.py @@ -32,5 +32,5 @@ pl.figure() pl.xlabel("Number of features selected") pl.ylabel("Cross validation score (nb of misclassifications)") -pl.plot(range(1, len(rfecv.cv_scores_) + 1), rfecv.cv_scores_) +pl.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_) pl.show() diff --git a/examples/randomized_search.py b/examples/randomized_search.py index 540062698931b..3c3c54cd41a41 100644 --- a/examples/randomized_search.py +++ b/examples/randomized_search.py @@ -39,8 +39,8 @@ # Utility function to report best scores -def report(cv_scores, n_top=3): - top_scores = sorted(cv_scores, key=itemgetter(1), reverse=True)[:n_top] +def report(grid_scores, n_top=3): + top_scores = sorted(grid_scores, key=itemgetter(1), reverse=True)[:n_top] for i, score in enumerate(top_scores): print("Model with rank: {0}".format(i + 1)) print("Mean validation score: {0:.3f} (std: {1:.3f})".format( @@ -67,7 +67,7 @@ def report(cv_scores, n_top=3): random_search.fit(X, y) print("RandomizedSearchCV took %.2f seconds for %d candidates" " parameter settings." % ((time() - start), n_iter_search)) -report(random_search.cv_scores_) +report(random_search.grid_scores_) # use a full grid over all parameters param_grid = {"max_depth": [3, None], @@ -82,5 +82,5 @@ def report(cv_scores, n_top=3): grid_search.fit(X, y) print("GridSearchCV took %.2f seconds for %d candidate parameter settings." - % (time() - start, len(grid_search.cv_scores_))) -report(grid_search.cv_scores_) + % (time() - start, len(grid_search.grid_scores_))) +report(grid_search.grid_scores_) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index f298ebf01205c..722abc4beae4c 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -105,8 +105,8 @@ pl.axis('tight') # plot the scores of the grid -# cv_scores_ contains parameter settings and scores -score_dict = grid.cv_scores_ +# grid_scores_ contains parameter settings and scores +score_dict = grid.grid_scores_ # We extract just the scores scores = [x[1] for x in score_dict] diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py index b21a2a52f341e..133fd8be31898 100644 --- a/examples/svm/plot_svm_scale_c.py +++ b/examples/svm/plot_svm_scale_c.py @@ -131,7 +131,7 @@ cv=ShuffleSplit(n=n_samples, train_size=train_size, n_iter=250, random_state=1)) grid.fit(X, y) - scores = [x[1] for x in grid.cv_scores_] + scores = [x[1] for x in grid.grid_scores_] scales = [(1, 'No scaling'), ((n_samples * train_size), '1/n_samples'), diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py index b72a67a527ee9..73f13d71c363a 100644 --- a/sklearn/covariance/graph_lasso_.py +++ b/sklearn/covariance/graph_lasso_.py @@ -422,7 +422,7 @@ class GraphLassoCV(GraphLasso): `cv_alphas_`: list of float All penalization parameters explored. - `cv_scores`: 2D numpy.ndarray (n_alphas, n_folds) + `grid_scores`: 2D numpy.ndarray (n_alphas, n_folds) Log-likelihood score on left-out data across folds. See Also @@ -551,14 +551,14 @@ def fit(self, X, y=None): % (i + 1, n_refinements, time.time() - t0)) path = list(zip(*path)) - cv_scores = list(path[1]) + grid_scores = list(path[1]) alphas = list(path[0]) # Finally, compute the score with alpha = 0 alphas.append(0) - cv_scores.append(cross_val_score(EmpiricalCovariance(), X, + grid_scores.append(cross_val_score(EmpiricalCovariance(), X, cv=cv, n_jobs=self.n_jobs, verbose=inner_verbose)) - self.cv_scores = np.array(cv_scores) + self.grid_scores = np.array(grid_scores) best_alpha = alphas[best_index] self.alpha_ = best_alpha self.cv_alphas_ = alphas diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index 19aa27b7c14d6..799e0aaee9869 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -261,9 +261,9 @@ class RFECV(RFE, MetaEstimatorMixin): Selected (i.e., estimated best) features are assigned rank 1. - `cv_scores_` : array of shape [n_subsets_of_features] + `grid_scores_` : array of shape [n_subsets_of_features] The cross-validation scores such that - `cv_scores_[i]` corresponds to + `grid_scores_[i]` corresponds to the CV score of the i-th subset of features. `estimator_` : object @@ -373,5 +373,5 @@ def fit(self, X, y): self.estimator_.set_params(**self.estimator_params) self.estimator_.fit(self.transform(X), y) - self.cv_scores_ = scores / n + self.grid_scores_ = scores / n return self diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 01a3a3f6fa403..853634ca3c700 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -72,7 +72,7 @@ def test_rfecv(): rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=3) rfecv.fit(X, y) # non-regression test for missing worst feature: - assert_equal(len(rfecv.cv_scores_), X.shape[1]) + assert_equal(len(rfecv.grid_scores_), X.shape[1]) assert_equal(len(rfecv.ranking_), X.shape[1]) X_r = rfecv.transform(X) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 0ca929d8c494c..babf7a6b2e77b 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -488,8 +488,8 @@ def _fit(self, X, y, parameter_iterable): n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch)( delayed(fit_grid_point)( - X, y, base_estimator, parameters, train, test, self.scorer_, - self.verbose, **self.fit_params) + X, y, base_estimator, parameters, train, test, + self.scorer_, self.verbose, **self.fit_params) for parameters in parameter_iterable for train, test in cv) @@ -498,7 +498,7 @@ def _fit(self, X, y, parameter_iterable): n_folds = len(cv) scores = list() - cv_scores = list() + grid_scores = list() for grid_start in range(0, n_fits, n_folds): n_test_samples = 0 score = 0 @@ -516,16 +516,16 @@ def _fit(self, X, y, parameter_iterable): score /= float(n_folds) scores.append((score, parameters)) # TODO: shall we also store the test_fold_sizes? - cv_scores.append(_CVScoreTuple( + grid_scores.append(_CVScoreTuple( parameters, score, np.array(all_scores))) # Store the computed scores - self.cv_scores_ = cv_scores + self.grid_scores_ = grid_scores # Find the best parameters by comparing on the mean validation score: # note that `sorted` is deterministic in the way it breaks ties - best = sorted(cv_scores, key=lambda x: x.mean_validation_score, + best = sorted(grid_scores, key=lambda x: x.mean_validation_score, reverse=True)[0] self.best_params_ = best.parameters self.best_score_ = best.mean_validation_score @@ -630,7 +630,7 @@ class GridSearchCV(BaseSearchCV): Attributes ---------- - `cv_scores_` : list of named tuples + `grid_scores_` : list of named tuples Contains scores for all parameter combinations in param_grid. Each entry corresponds to one parameter setting. Each named tuple has the attributes: @@ -685,12 +685,6 @@ def __init__(self, estimator, param_grid, scoring=None, loss_func=None, self.param_grid = param_grid _check_param_grid(param_grid) - @property - def grid_scores_(self): - warnings.warn("grid_scores_ is deprecated and will be removed in 0.15." - " Use cv_scores_ instead.", DeprecationWarning) - return self.cv_scores_ - def fit(self, X, y=None, **params): """Run fit with all sets of parameters. @@ -789,7 +783,7 @@ class RandomizedSearchCV(BaseSearchCV): Attributes ---------- - `cv_scores_` : list of named tuples + `grid_scores_` : list of named tuples Contains scores for all parameter combinations in param_grid. Each entry corresponds to one parameter setting. Each named tuple has the attributes: diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index b44eff0dad2a8..c7c3a70c7d035 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -157,7 +157,7 @@ def test_grid_search(): assert_equal(grid_search.best_estimator_.foo_param, 2) for i, foo_i in enumerate([1, 2, 3]): - assert_true(grid_search.cv_scores_[i][0] + assert_true(grid_search.grid_scores_[i][0] == {'foo_param': foo_i}) # Smoke test the score etc: grid_search.score(X, y) @@ -194,19 +194,19 @@ def test_grid_search_no_score(): GridSearchCV, clf_no_score, {'C': Cs}) -def test_trivial_cv_scores(): +def test_trivial_grid_scores(): """Test search over a "grid" with only one point. - Non-regression test: cv_scores_ wouldn't be set by GridSearchCV. + Non-regression test: grid_scores_ wouldn't be set by GridSearchCV. """ clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1]}) grid_search.fit(X, y) - assert_true(hasattr(grid_search, "cv_scores_")) + assert_true(hasattr(grid_search, "grid_scores_")) random_search = RandomizedSearchCV(clf, {'foo_param': [0]}) random_search.fit(X, y) - assert_true(hasattr(random_search, "cv_scores_")) + assert_true(hasattr(random_search, "grid_scores_")) def test_no_refit(): @@ -245,7 +245,7 @@ def test_grid_search_iid(): # once with iid=True (default) grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv) grid_search.fit(X, y) - first = grid_search.cv_scores_[0] + first = grid_search.grid_scores_[0] assert_equal(first.parameters['C'], 1) assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.]) # for first split, 1/4 of dataset is in test, for second 3/4. @@ -257,7 +257,7 @@ def test_grid_search_iid(): grid_search = GridSearchCV(svm, param_grid={'C': [1, 10]}, cv=cv, iid=False) grid_search.fit(X, y) - first = grid_search.cv_scores_[0] + first = grid_search.grid_scores_[0] assert_equal(first.parameters['C'], 1) # scores are the same as above assert_array_almost_equal(first.cv_validation_scores, [1, 1. / 3.]) @@ -471,7 +471,7 @@ def test_X_as_list(): cv = KFold(n=len(X), n_folds=3) grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv) grid_search.fit(X.tolist(), y).score(X, y) - assert_true(hasattr(grid_search, "cv_scores_")) + assert_true(hasattr(grid_search, "grid_scores_")) def test_unsupervised_grid_search(): @@ -511,7 +511,7 @@ def test_param_sampler(): assert_true(0 <= sample["C"] <= 1) -def test_randomized_search_cv_scores(): +def test_randomized_search_grid_scores(): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, @@ -527,10 +527,10 @@ def test_randomized_search_cv_scores(): search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter, param_distributions=params, iid=False) search.fit(X, y) - assert_equal(len(search.cv_scores_), n_search_iter) + assert_equal(len(search.grid_scores_), n_search_iter) # Check consistency of the structure of each cv_score item - for cv_score in search.cv_scores_: + for cv_score in search.grid_scores_: assert_equal(len(cv_score.cv_validation_scores), n_cv_iter) # Because we set iid to False, the mean_validation score is the # mean of the fold mean scores instead of the aggregate sample-wise @@ -541,12 +541,12 @@ def test_randomized_search_cv_scores(): list(sorted(params.keys()))) # Check the consistency with the best_score_ and best_params_ attributes - sorted_cv_scores = list(sorted(search.cv_scores_, + sorted_grid_scores = list(sorted(search.grid_scores_, key=lambda x: x.mean_validation_score)) - best_score = sorted_cv_scores[-1].mean_validation_score + best_score = sorted_grid_scores[-1].mean_validation_score assert_equal(search.best_score_, best_score) - tied_best_params = [s.parameters for s in sorted_cv_scores + tied_best_params = [s.parameters for s in sorted_grid_scores if s.mean_validation_score == best_score] assert_true(search.best_params_ in tied_best_params, "best_params_={0} is not part of the" @@ -563,7 +563,7 @@ def test_grid_search_score_consistency(): grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score) grid_search.fit(X, y) cv = StratifiedKFold(n_folds=3, y=y) - for C, scores in zip(Cs, grid_search.cv_scores_): + for C, scores in zip(Cs, grid_search.grid_scores_): clf.set_params(C=C) scores = scores[2] # get the separate runs from grid scores i = 0 @@ -607,7 +607,7 @@ def test_grid_search_with_multioutput_data(): for est in estimators: grid_search = GridSearchCV(est, est_parameters, cv=cv) grid_search.fit(X, y) - for parameters, _, cv_validation_scores in grid_search.cv_scores_: + for parameters, _, cv_validation_scores in grid_search.grid_scores_: est.set_params(**parameters) for i, (train, test) in enumerate(cv): @@ -620,7 +620,7 @@ def test_grid_search_with_multioutput_data(): for est in estimators: random_search = RandomizedSearchCV(est, est_parameters, cv=cv) random_search.fit(X, y) - for parameters, _, cv_validation_scores in random_search.cv_scores_: + for parameters, _, cv_validation_scores in random_search.grid_scores_: est.set_params(**parameters) for i, (train, test) in enumerate(cv): @@ -633,7 +633,7 @@ def test_grid_search_with_multioutput_data(): for est in estimators: random_search = RandomizedSearchCV(est, est_parameters, cv=cv) random_search.fit(X, y) - for parameters, _, cv_validation_scores in random_search.cv_scores_: + for parameters, _, cv_validation_scores in random_search.grid_scores_: est.set_params(**parameters) for i, (train, test) in enumerate(cv):