8000 DOC: use notebook-style for plot_feature_agglomeration_vs_univariate_selection.py by MarieSacksick · Pull Request #22796 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

DOC: use notebook-style for plot_feature_agglomeration_vs_univariate_selection.py #22796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# License: BSD 3 clause

# %%
import shutil
import tempfile

Expand All @@ -33,15 +34,16 @@
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

# #############################################################################
# Generate data
# %%
# Set parameters
n_samples = 200
size = 40 # image size
roi_size = 15
snr = 5.0
np.random.seed(0)
mask = np.ones([size, size], dtype=bool)

# %%
# Generate data
coef = np.zeros((size, size))
coef[0:roi_size, 0:roi_size] = -1.0
coef[-roi_size:, -roi_size:] = 1.0
Expand All @@ -53,17 +55,21 @@
X /= X.std(axis=0)

y = np.dot(X, coef.ravel())

# %%
# add noise
noise = np.random.randn(y.shape[0])
noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.0)) / linalg.norm(noise, 2)
y += noise_coef * noise # add noise
y += noise_coef * noise

# #############################################################################
# %%
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(2) # cross-validation generator for model selection
ridge = BayesianRidge()
cachedir = tempfile.mkdtemp()
mem = Memory(location=cachedir, verbose=1)

# %%
# Ward agglomeration followed by BayesianRidge
connectivity = grid_to_graph(n_x=size, n_y=size)
ward = FeatureAgglomeration(n_clusters=10, connectivity=connectivity, memory=mem)
Expand All @@ -75,6 +81,7 @@
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)
coef_agglomeration_ = coef_.reshape(size, size)

# %%
# Anova univariate feature selection followed by BayesianRidge
f_regression = mem.cache(feature_selection.f_regression) # caching function
anova = feature_selection.SelectPercentile(f_regression)
Expand All @@ -86,7 +93,7 @@
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1))
coef_selection_ = coef_.reshape(size, size)

# #############################################################################
# %%
# Inverse the transformation to plot the results on an image
plt.close("all")
plt.figure(figsize=(7.3, 2.7))
Expand All @@ -102,5 +109,6 @@
plt.subplots_adjust(0.04, 0.0, 0.98, 0.94, 0.16, 0.26)
plt.show()

# %%
# Attempt to remove the temporary cachedir, but don't worry if it fails
shutil.rmtree(cachedir, ignore_errors=True)
0