From 13a47510f0ab85b99957cd9646d8c84d948e5387 Mon Sep 17 00:00:00 2001 From: JihaneBennis Date: Sat, 12 Mar 2022 14:35:15 +0100 Subject: [PATCH 1/5] Fix notebook style example --- examples/svm/plot_svm_anova.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index ce34e5b7ab3e2..1fd54053987ef 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -9,7 +9,7 @@ that our model achieves best performance when we select around 10% of features. """ - +#%% import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris @@ -20,14 +20,14 @@ from sklearn.svm import SVC -# ############################################################################# +#%% # Import some data to play with X, y = load_iris(return_X_y=True) # Add non-informative features np.random.seed(0) X = np.hstack((X, 2 * np.random.random((X.shape[0], 36)))) -# ############################################################################# +#%% # Create a feature-selection transform, a scaler and an instance of SVM that we # combine together to have a full-blown estimator clf = Pipeline( @@ -38,7 +38,7 @@ ] ) -# ############################################################################# +#%% # Plot the cross-validation score as a function of percentile of features score_means = list() score_stds = list() @@ -56,4 +56,4 @@ plt.xlabel("Percentile") plt.ylabel("Accuracy Score") plt.axis("tight") -plt.show() +plt.show() \ No newline at end of file From 5a55a3eba7817e5e676dd5a9255da390fdd22aa4 Mon Sep 17 00:00:00 2001 From: JihaneBennis Date: Sat, 12 Mar 2022 14:40:57 +0100 Subject: [PATCH 2/5] lines --- examples/svm/plot_svm_anova.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 1fd54053987ef..e80ad28a6835d 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -9,7 +9,7 @@ that our model achieves best performance when we select around 10% of features. """ -#%% +# %% import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris @@ -20,14 +20,14 @@ from sklearn.svm import SVC -#%% +# %% # Import some data to play with X, y = load_iris(return_X_y=True) # Add non-informative features np.random.seed(0) X = np.hstack((X, 2 * np.random.random((X.shape[0], 36)))) -#%% +# %% # Create a feature-selection transform, a scaler and an instance of SVM that we # combine together to have a full-blown estimator clf = Pipeline( @@ -38,7 +38,7 @@ ] ) -#%% +# %% # Plot the cross-validation score as a function of percentile of features score_means = list() score_stds = list() From 5c78da056de28f1655ba7108ef2fe25282241852 Mon Sep 17 00:00:00 2001 From: JihaneBennis Date: Sat, 12 Mar 2022 15:02:55 +0100 Subject: [PATCH 3/5] flake8 correction --- examples/svm/plot_svm_anova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index e80ad28a6835d..86321a4000746 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -56,4 +56,4 @@ plt.xlabel("Percentile") plt.ylabel("Accuracy Score") plt.axis("tight") -plt.show() \ No newline at end of file +plt.show() From f4757b2525de487b54855297609585a94785e5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 18 Mar 2022 15:06:53 +0100 Subject: [PATCH 4/5] Update examples/svm/plot_svm_anova.py Co-authored-by: Olivier Grisel --- examples/svm/plot_svm_anova.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 86321a4000746..d142ece5cbdb9 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -24,8 +24,8 @@ # Import some data to play with X, y = load_iris(return_X_y=True) # Add non-informative features -np.random.seed(0) -X = np.hstack((X, 2 * np.random.random((X.shape[0], 36)))) +rng = np.random.RandomState(0) +X = np.hstack((X, 2 * rng.random((X.shape[0], 36)))) # %% # Create a feature-selection transform, a scaler and an instance of SVM that we From 05488f125be34ab4c5bd6138dbb0b644a3e5bbf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 18 Mar 2022 15:11:31 +0100 Subject: [PATCH 5/5] Move imports closer to the point they are used and move sections to titles. --- examples/svm/plot_svm_anova.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index d142ece5cbdb9..730d6a35f35a8 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -9,27 +9,30 @@ that our model achieves best performance when we select around 10% of features. """ + # %% +# Load some data to play with +# --------------------------- import numpy as np -import matplotlib.pyplot as plt from sklearn.datasets import load_iris -from sklearn.feature_selection import SelectPercentile, chi2 -from sklearn.model_selection import cross_val_score -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler -from sklearn.svm import SVC - -# %% -# Import some data to play with X, y = load_iris(return_X_y=True) + # Add non-informative features rng = np.random.RandomState(0) X = np.hstack((X, 2 * rng.random((X.shape[0], 36)))) # %% +# Create the pipeline +# ------------------- +from sklearn.pipeline import Pipeline +from sklearn.feature_selection import SelectPercentile, chi2 +from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC + # Create a feature-selection transform, a scaler and an instance of SVM that we # combine together to have a full-blown estimator + clf = Pipeline( [ ("anova", SelectPercentile(chi2)), @@ -40,6 +43,10 @@ # %% # Plot the cross-validation score as a function of percentile of features +# ----------------------------------------------------------------------- +import matplotlib.pyplot as plt +from sklearn.model_selection import cross_val_score + score_means = list() score_stds = list() percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)