diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index 72ca2c09b7f39..e2273326b9a12 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -17,6 +17,7 @@ # Author: Alexandre Gramfort # License: BSD 3 clause +# %% import shutil import tempfile @@ -33,15 +34,16 @@ from sklearn.model_selection import GridSearchCV from sklearn.model_selection import KFold -# ############################################################################# -# Generate data +# %% +# Set parameters n_samples = 200 size = 40 # image size roi_size = 15 snr = 5.0 np.random.seed(0) -mask = np.ones([size, size], dtype=bool) +# %% +# Generate data coef = np.zeros((size, size)) coef[0:roi_size, 0:roi_size] = -1.0 coef[-roi_size:, -roi_size:] = 1.0 @@ -53,17 +55,21 @@ X /= X.std(axis=0) y = np.dot(X, coef.ravel()) + +# %% +# add noise noise = np.random.randn(y.shape[0]) noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2) -y += noise_coef * noise # add noise +y += noise_coef * noise -# ############################################################################# +# %% # Compute the coefs of a Bayesian Ridge with GridSearch cv = KFold(2) # cross-validation generator for model selection ridge = BayesianRidge() cachedir = tempfile.mkdtemp() mem = Memory(location=cachedir, verbose=1) +# %% # Ward agglomeration followed by BayesianRidge connectivity = grid_to_graph(n_x=size, n_y=size) ward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem) @@ -75,6 +81,7 @@ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_) coef_agglomeration_ = coef_.reshape(size, size) +# %% # Anova univariate feature selection followed by BayesianRidge f_regression = mem.cache(feature_selection.f_regression) # caching function anova = feature_selection.SelectPercentile(f_regression) @@ -86,7 +93,7 @@ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1)) coef_selection_ = coef_.reshape(size, size) -# ############################################################################# +# %% # Inverse the transformation to plot the results on an image plt.close("all") plt.figure(figsize=(7.3, 2.7)) @@ -102,5 +109,6 @@ plt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26) plt.show() +# %% # Attempt to remove the temporary cachedir, but don't worry if it fails shutil.rmtree(cachedir, ignore_errors=True)