|
10 | 10 |
|
11 | 11 | """
|
12 | 12 |
|
| 13 | +# %% |
| 14 | +# Load some data to play with |
| 15 | +# --------------------------- |
13 | 16 | import numpy as np
|
14 |
| -import matplotlib.pyplot as plt |
15 | 17 | from sklearn.datasets import load_iris
|
16 |
| -from sklearn.feature_selection import SelectPercentile, chi2 |
17 |
| -from sklearn.model_selection import cross_val_score |
18 |
| -from sklearn.pipeline
8000
import Pipeline |
19 |
| -from sklearn.preprocessing import StandardScaler |
20 |
| -from sklearn.svm import SVC |
21 |
| - |
22 | 18 |
|
23 |
| -# ############################################################################# |
24 |
| -# Import some data to play with |
25 | 19 | X, y = load_iris(return_X_y=True)
|
| 20 | + |
26 | 21 | # Add non-informative features
|
27 |
| -np.random.seed(0) |
28 |
| -X = np.hstack((X, 2 * np.random.random((X.shape[0], 36)))) |
| 22 | +rng = np.random.RandomState(0) |
| 23 | +X = np.hstack((X, 2 * rng.random((X.shape[0], 36)))) |
| 24 | + |
| 25 | +# %% |
| 26 | +# Create the pipeline |
| 27 | +# ------------------- |
| 28 | +from sklearn.pipeline import Pipeline |
| 29 | +from sklearn.feature_selection import SelectPercentile, chi2 |
| 30 | +from sklearn.preprocessing import StandardScaler |
| 31 | +from sklearn.svm import SVC |
29 | 32 |
|
30 |
| -# ############################################################################# |
31 | 33 | # Create a feature-selection transform, a scaler and an instance of SVM that we
|
32 | 34 | # combine together to have a full-blown estimator
|
| 35 | + |
33 | 36 | clf = Pipeline(
|
34 | 37 | [
|
35 | 38 | ("anova", SelectPercentile(chi2)),
|
|
38 | 41 | ]
|
39 | 42 | )
|
40 | 43 |
|
41 |
| -# ############################################################################# |
| 44 | +# %% |
42 | 45 | # Plot the cross-validation score as a function of percentile of features
|
| 46 | +# ----------------------------------------------------------------------- |
| 47 | +import matplotlib.pyplot as plt |
| 48 | +from sklearn.model_selection import cross_val_score |
| 49 | + |
43 | 50 | score_means = list()
|
44 | 51 | score_stds = list()
|
45 | 52 | percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)
|
|
0 commit comments