8000 Merge pull request #3 from rvraghav93/model_selection_fix_examples · raghavrv/scikit-learn@beec231 · GitHub
[go: up one dir, main page]

Skip to content

Commit beec231

Browse files
committed
Merge pull request #3 from rvraghav93/model_selection_fix_examples
[MRG+1] FIX all the examples to use the new cv classes
2 parents fe8f5a2 + 0d7d738 commit beec231

37 files changed

+96
-79
lines changed

examples/applications/face_recognition.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131
import logging
3232
import matplotlib.pyplot as plt
3333

34-
from sklearn.cross_validation import train_test_split
34+
from sklearn.model_selection import train_test_split
35+
from sklearn.model_selection import GridSearchCV
3536
from sklearn.datasets import fetch_lfw_people
36-
from sklearn.grid_search import GridSearchCV
3737
from sklearn.metrics import classification_report
3838
from sklearn.metrics import confusion_matrix
3939
from sklearn.decomposition import RandomizedPCA

examples/calibration/plot_calibration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from sklearn.naive_bayes import GaussianNB
3737
from sklearn.metrics import brier_score_loss
3838
from sklearn.calibration import CalibratedClassifierCV
39-
from sklearn.cross_validation import train_test_split
39+
from sklearn.model_selection import train_test_split
4040

4141

4242
n_samples = 50000

examples/calibration/plot_calibration_curve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
from sklearn.metrics import (brier_score_loss, precision_score, recall_score,
5757
f1_score)
5858
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
59-
from sklearn.cross_validation import train_test_split
59+
from sklearn.model_selection import train_test_split
6060

6161

6262
# Create dataset of classification task with many redundant and few

examples/classification/plot_classifier_comparison.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import numpy as np
3232
import matplotlib.pyplot as plt
3333
from matplotlib.colors import ListedColormap
34-
from sklearn.cross_validation import train_test_split
34+
from sklearn.model_selection import train_test_split
3535
from sklearn.preprocessing import StandardScaler
3636
from sklearn.datasets import make_moons, make_circles, make_classification
3737
from sklearn.neighbors import KNeighborsClassifier

examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
from sklearn.cluster import FeatureAgglomeration
3131
from sklearn.linear_model import BayesianRidge
3232
from sklearn.pipeline import Pipeline
33-
from sklearn.grid_search import GridSearchCV
3433
from sklearn.externals.joblib import Memory
35-
from sklearn.cross_validation import KFold
34+
from sklearn.model_selection import GridSearchCV
35+
from sklearn.model_selection import KFold
3636

3737
###############################################################################
3838
# Generate data
@@ -60,7 +60,7 @@
6060

6161
###############################################################################
6262
# Compute the coefs of a Bayesian Ridge with GridSearch
63-
cv = KFold(len(y), 2) # cross-validation generator for model selection
63+
cv = KFold(2) # cross-validation generator for model selection
6464
ridge = BayesianRidge()
6565
cachedir = tempfile.mkdtemp()
6666
mem = Memory(cachedir=cachedir, verbose=1)

examples/covariance/plot_covariance_estimation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949

5050
from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
5151
log_likelihood, empirical_covariance
52-
from sklearn.grid_search import GridSearchCV
52+
from sklearn.model_selection import GridSearchCV
5353

5454

5555
###############################################################################

examples/decomposition/plot_pca_vs_fa_model_selection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535

3636
from sklearn.decomposition import PCA, FactorAnalysis
3737
from sklearn.covariance import ShrunkCovariance, LedoitWolf
38-
from sklearn.cross_validation import cross_val_score
39-
from sklearn.grid_search import GridSearchCV
38+
from sklearn.model_selection import cross_val_score
39+
from sklearn.model_selection import GridSearchCV
4040

4141
###############################################################################
4242
# Create the data

examples/ensemble/plot_gradient_boosting_oob.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
import matplotlib.pyplot as plt
3434

3535
from sklearn import ensemble
36-
from sklearn.cross_validation import KFold
37-
from sklearn.cross_validation import train_test_split
36+
from sklearn.model_selection import KFold
37+
from sklearn.model_selection import train_test_split
3838

3939

4040
# Generate data (adapted from G. Ridgeway's gbm example)
@@ -75,10 +75,10 @@ def heldout_score(clf, X_test, y_test):
7575

7676

7777
def cv_estimate(n_folds=3):
78-
cv = KFold(n=X_train.shape[0], n_folds=n_folds)
78+
cv = KFold(n_folds=n_folds)
7979
cv_clf = ensemble.GradientBoostingClassifier(**params)
8080
val_scores = np.zeros((n_estimators,), dtype=np.float64)
81-
for train, test in cv:
81+
for train, test in cv.split(X_train, y_train):
8282
cv_clf.fit(X_train[train], y_train[train])
8383
val_scores += heldout_score(cv_clf, X_train[test], y_train[test])
8484
val_scores /= n_folds

examples/ensemble/plot_partial_dependence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252
from mpl_toolkits.mplot3d import Axes3D
5353

54-
from sklearn.cross_validation import train_test_split
54+
from sklearn.model_selection import train_test_split
5555
from sklearn.ensemble import GradientBoostingRegressor
5656
from sklearn.ensemble.partial_dependence import plot_partial_dependence
5757
from sklearn.ensemble.partial_dependence import partial_dependence

examples/exercises/plot_cv_diabetes.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,25 @@
1414
import numpy as np
1515
import matplotlib.pyplot as plt
1616

17-
from sklearn import cross_validation, datasets, linear_model
17+
from sklearn import datasets
18+
from sklearn.linear_model import LassoCV
19+
from sklearn.linear_model import Lasso
20+
from sklearn.model_selection import KFold
21+
from sklearn.model_selection import cross_val_score
1822

1923
diabetes = datasets.load_diabetes()
2024
X = diabetes.data[:150]
2125
y = diabetes.target[:150]
2226

23-
lasso = linear_model.Lasso()
27+
lasso = Lasso()
2428
alphas = np.logspace(-4, -.5, 30)
2529

2630
scores = list()
2731
scores_std = list()
2832

2933
for alpha in alphas:
3034
lasso.alpha = alpha
31-
this_scores = cross_validation.cross_val_score(lasso, X, y, n_jobs=1)
35+
this_scores = cross_val_score(lasso, X, y, n_jobs=1)
3236
scores.append(np.mean(this_scores))
3337
scores_std.append(np.std(this_scores))
3438

@@ -51,15 +55,15 @@
5155
# performs cross-validation on the training data it receives).
5256
# We use external cross-validation to see how much the automatically obtained
5357
# alphas differ across different cross-validation folds.
54-
lasso_cv = linear_model.LassoCV(alphas=alphas)
55-
k_fold = cross_validation.KFold(len(X), 3)
58+
lasso_cv = LassoCV(alphas=alphas)
59+
k_fold = KFold(3)
5660

5761
print("Answer to the bonus question:",
5862
"how much can you trust the selection of alpha?")
5963
print()
6064
print("Alpha parameters maximising the generalization score on different")
6165
print("subsets of the data:")
62-
for k, (train, test) in enumerate(k_fold):
66+
for k, (train, test) in enumerate(k_fold.split(X, y)):
6367
lasso_cv.fit(X[train], y[train])
6468
print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".
6569
format(k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])))

0 commit comments

Comments
 (0)
0