From 20e4355cd7da5ca90ac152be0ff5716ef6f50872 Mon Sep 17 00:00:00 2001 From: Brenden Date: Thu, 10 Feb 2022 13:10:26 -0500 Subject: [PATCH 1/3] updated notebook style for plot_train_error_vs_test_error --- .../plot_train_error_vs_test_error.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 2bde2486b48d6..f5d3f64b0af78 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -12,14 +12,15 @@ """ +# %% +# Generate sample data +# -------------------- # Author: Alexandre Gramfort # License: BSD 3 clause import numpy as np from sklearn import linear_model -# ############################################################################# -# Generate sample data n_samples_train, n_samples_test, n_features = 75, 150, 500 np.random.seed(0) coef = np.random.randn(n_features) @@ -27,12 +28,14 @@ X = np.random.randn(n_samples_train + n_samples_test, n_features) y = np.dot(X, coef) +# %% # Split train and test data X_train, X_test = X[:n_samples_train], X[n_samples_train:] y_train, y_test = y[:n_samples_train], y[n_samples_train:] -# ############################################################################# +# %% # Compute train and test errors +# ----------------------------- alphas = np.logspace(-5, 1, 60) enet = linear_model.ElasticNet(l1_ratio=0.7, max_iter=10000) train_errors = list() @@ -51,8 +54,9 @@ enet.set_params(alpha=alpha_optim) coef_ = enet.fit(X, y).coef_ -# ############################################################################# +# %% # Plot results functions +# ---------------------- import matplotlib.pyplot as plt From a0644bf91c7d3698df1c4907eccdd23255682cf7 Mon Sep 17 00:00:00 2001 From: Brenden Date: Fri, 11 Feb 2022 08:37:30 -0500 Subject: [PATCH 2/3] moved generate sample data header --- examples/model_selection/plot_train_error_vs_test_error.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index f5d3f64b0af78..63e8815c2e2ea 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -12,12 +12,12 @@ """ -# %% -# Generate sample data -# -------------------- # Author: Alexandre Gramfort # License: BSD 3 clause +# %% +# Generate sample data +# -------------------- import numpy as np from sklearn import linear_model From 3c0780b4d7fffac1028de80d07d4f3e1b1697a4d Mon Sep 17 00:00:00 2001 From: Brenden Date: Fri, 11 Feb 2022 11:47:44 -0500 Subject: [PATCH 3/3] Generate sample data using scikit-learn helper functions --- .../plot_train_error_vs_test_error.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 63e8815c2e2ea..528d3482be15b 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -20,19 +20,21 @@ # -------------------- import numpy as np from sklearn import linear_model +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split n_samples_train, n_samples_test, n_features = 75, 150, 500 -np.random.seed(0) -coef = np.random.randn(n_features) -coef[50:] = 0.0 # only the top 10 features are impacting the model -X = np.random.randn(n_samples_train + n_samples_test, n_features) -y = np.dot(X, coef) - -# %% -# Split train and test data -X_train, X_test = X[:n_samples_train], X[n_samples_train:] -y_train, y_test = y[:n_samples_train], y[n_samples_train:] - +X, y, coef = make_regression( + n_samples=n_samples_train + n_samples_test, + n_features=n_features, + n_informative=50, + shuffle=False, + noise=1.0, + coef=True, +) +X_train, X_test, y_train, y_test = train_test_split( + X, y, train_size=n_samples_train, test_size=n_samples_test, shuffle=False +) # %% # Compute train and test errors # -----------------------------