8000 MAINT Enable and run black on examples by thomasjpfan · Pull Request #20502 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MAINT Enable and run black on examples #20502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions examples/applications/plot_digits_denoising.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def plot_digits(X, title):
# Let's first have a look to see the difference between noise-free and noisy
# images. We will check the test set in this regard.
plot_digits(X_test, "Uncorrupted test images")
plot_digits(X_test_noisy,
f"Noisy test images\n"
f"MSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}")
plot_digits(
X_test_noisy, f"Noisy test images\nMSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}"
)

# %%
# Learn the `PCA` basis
Expand All @@ -100,8 +100,9 @@ def plot_digits(X, title):
from sklearn.decomposition import PCA, KernelPCA

pca = PCA(n_components=32)
kernel_pca = KernelPCA(n_components=400, kernel="rbf", gamma=1e-3,
fit_inverse_transform=True, alpha=5e-3)
kernel_pca = KernelPCA(
n_components=400, kernel="rbf", gamma=1e-3, fit_inverse_transform=True, alpha=5e-3
)

pca.fit(X_train_noisy)
_ = kernel_pca.fit(X_train_noisy)
Expand All @@ -118,17 +119,21 @@ def plot_digits(X, title):
# kernel to learn the PCA basis and a kernel ridge to learn the mapping
# function.
X_reconstructed_kernel_pca = kernel_pca.inverse_transform(
kernel_pca.transform(X_test_noisy))
kernel_pca.transform(X_test_noisy)
)
X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test_noisy))

# %%
plot_digits(X_test, "Uncorrupted test images")
plot_digits(X_reconstructed_pca,
f"PCA reconstruction\n"
f"MSE: {np.mean((X_test - X_reconstructed_pca) ** 2):.2f}")
plot_digits(X_reconstructed_kernel_pca,
f"Kernel PCA reconstruction\n"
f"MSE: {np.mean((X_test - X_reconstructed_kernel_pca) ** 2):.2f}")
plot_digits(
X_reconstructed_pca,
f"PCA reconstruction\nMSE: {np.mean((X_test - X_reconstructed_pca) ** 2):.2f}",
)
plot_digits(
X_reconstructed_kernel_pca,
"Kernel PCA reconstruction\n"
f"MSE: {np.mean((X_test - X_reconstructed_kernel_pca) ** 2):.2f}",
)

# %%
# PCA has a lower MSE than kernel PCA. However, the qualitative analysis might
Expand Down
38 changes: 21 additions & 17 deletions examples/applications/plot_face_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")


# #############################################################################
Expand Down Expand Up @@ -75,19 +75,20 @@

# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, random_state=42)
X, y, test_size=0.25, random_state=42
)


# #############################################################################
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction
n_components = 150

print("Extracting the top %d eigenfaces from %d faces"
% (n_components, X_train.shape[0]))
print(
"Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])
)
t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
whiten=True).fit(X_train)
pca = PCA(n_components=n_components, svd_solver="randomized", whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, h, w))
Expand All @@ -104,11 +105,11 @@

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
clf = GridSearchCV(
SVC(kernel='rbf', class_weight='balanced'), param_grid
)
param_grid = {
"C": [1e3, 5e3, 1e4, 5e4, 1e5],
"gamma": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}
clf = GridSearchCV(SVC(kernel="rbf", class_weight="balanced"), param_grid)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
Expand All @@ -130,10 +131,11 @@
# #############################################################################
# Qualitative evaluation of the predictions using matplotlib


def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
"""Helper function to plot a gallery of portraits"""
plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
plt.subplots_adjust(bottom=0, left=0.01, right=0.99, top=0.90, hspace=0.35)
for i in range(n_row * n_col):
plt.subplot(n_row, n_col, i + 1)
plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
Expand All @@ -144,14 +146,16 @@ def plot_gallery(images, titles, h, w, n_row=3, n_col=4):

# plot the result of the prediction on a portion of the test set


def title(y_pred, y_test, target_names, i):
pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
return 'predicted: %s\ntrue: %s' % (pred_name, true_name)
pred_name = target_names[y_pred[i]].rsplit(" ", 1)[-1]
true_name = target_names[y_test[i]].rsplit(" ", 1)[-1]
return "predicted: %s\ntrue: %s" % (pred_name, true_name)


prediction_titles = [title(y_pred, y_test, target_names, i)
for i in range(y_pred.shape[0])]
prediction_titles = [
title(y_pred, y_test, target_names, i) for i in range(y_pred.shape[0])
]

plot_gallery(X_test, prediction_titles, h, w)

Expand Down
161 changes: 89 additions & 72 deletions examples/applications/plot_model_complexity_influence.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,21 @@

def generate_data(case):
"""Generate regression/classification data."""
if case == 'regression':
if case == "regression":
X, y = datasets.load_diabetes(return_X_y=True)
elif case == 'classification':
X, y = datasets.fetch_20newsgroups_vectorized(subset='all',
return_X_y=True)
elif case == "classification":
X, y = datasets.fetch_20newsgroups_vectorized(subset="all", return_X_y=True)
X, y = shuffle(X, y)
offset = int(X.shape[0] * 0.8)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

data = {'X_train': X_train, 'X_test': X_test, 'y_train': y_train,
'y_test': y_test}
data = {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test}
return data


regression_data = generate_data('regression')
classification_data = generate_data('classification')
regression_data = generate_data("regression")
classification_data = generate_data("classification")


##############################################################################
Expand All @@ -110,26 +108,33 @@ def benchmark_influence(conf):
prediction_times = []
prediction_powers = []
complexities = []
for param_value in conf['changing_param_values']:
conf['tuned_params'][conf['changing_param']] = param_value
estimator = conf['estimator'](**conf['tuned_params'])
for param_value in conf["changing_param_values"]:
conf["tuned_params"][conf["changing_param"]] = param_value
estimator = conf["estimator"](**conf["tuned_params"])

print("Benchmarking %s" % estimator)
estimator.fit(conf['data']['X_train'], conf['data']['y_train'])
conf['postfit_hook'](estimator)
complexity = conf['complexity_computer'](estimator)
estimator.fit(conf["data"]["X_train"], conf["data"]["y_train"])
conf["postfit_hook"](estimator)
complexity = conf["complexity_computer"](estimator)
complexities.append(complexity)
start_time = time.time()
for _ in range(conf['n_samples']):
y_pred = estimator.predict(conf['data']['X_test'])
elapsed_time = (time.time() - start_time) / float(conf['n_samples'])
for _ in range(conf["n_samples"]):
y_pred = estimator.predict(conf["data"]["X_test"])
elapsed_time = (time.time() - start_time) / float(conf["n_samples"])
prediction_times.append(elapsed_time)
pred_score = conf['prediction_performance_computer'](
conf['data']['y_test'], y_pred)
pred_score = conf["prediction_performance_computer"](
conf["data"]["y_test"], y_pred
)
prediction_powers.append(pred_score)
print("Complexity: %d | %s: %.4f | Pred. Time: %fs\n" % (
complexity, conf['prediction_performance_label'], pred_score,
elapsed_time))
print(
"Complexity: %d | %s: %.4f | Pred. Time: %fs\n"
% (
complexity,
conf["prediction_performance_label"],
pred_score,
elapsed_time,
)
)
return prediction_powers, prediction_times, complexities


Expand All @@ -147,46 +152,58 @@ def benchmark_influence(conf):
# different data.
#


def _count_nonzero_coefficients(estimator):
a = estimator.coef_.toarray()
return np.count_nonzero(a)


configurations = [
{'estimator': SGDClassifier,
'tuned_params': {'penalty': 'elasticnet', 'alpha': 0.001, 'loss':
'modified_huber', 'fit_intercept': True, 'tol': 1e-3},
'changing_param': 'l1_ratio',
'changing_param_values': [0.25, 0.5, 0.75, 0.9],
'complexity_label': 'non_zero coefficients',
'complexity_computer': _count_nonzero_coefficients,
'prediction_performance_computer': hamming_loss,
'prediction_performance_label': 'Hamming Loss (Misclassification Ratio)',
'postfit_hook': lambda x: x.sparsify(),
'data': classification_data,
'n_samples': 30},
{'estimator': NuSVR,
'tuned_params': {'C': 1e3, 'gamma': 2 ** -15},
'changing_param': 'nu',
'changing_param_values': [0.1, 0.25, 0.5, 0.75, 0.9],
'complexity_label': 'n_support_vectors',
'complexity_computer': lambda x: len(x.support_vectors_),
'data': regression_data,
'postfit_hook': lambda x: x,
'prediction_performance_computer': mean_squared_error,
'prediction_performance_label': 'MSE',
'n_samples': 30},
{'estimator': GradientBoostingRegressor,
'tuned_params': {'loss': 'squared_error'},
'changing_param': 'n_estimators',
'changing_param_values': [10, 50, 100, 200, 500],
'complexity_label': 'n_trees',
'complexity_computer': lambda x: x.n_estimators,
'data': regression_data,
'postfit_hook': lambda x: x,
'prediction_performance_computer': mean_squared_error,
'prediction_performance_label': 'MSE',
'n_samples': 30},
{
"estimator": SGDClassifier,
"tuned_params": {
"penalty": "elasticnet",
"alpha": 0.001,
"loss": "modified_huber",
"fit_intercept": True,
"tol": 1e-3,
},
"changing_param": "l1_ratio",
"changing_param_values": [0.25, 0.5, 0.75, 0.9],
"complexity_label": "non_zero coefficients",
"complexity_computer": _count_nonzero_coefficients,
"prediction_performance_computer": hamming_loss,
"prediction_performance_label": "Hamming Loss (Misclassification Ratio)",
"postfit_hook": lambda x: x.sparsify(),
"data": classification_data,
"n_samples": 30,
},
{
"estimator": NuSVR,
"tuned_params": {"C": 1e3, "gamma": 2 ** -15},
"changing_param": "nu",
"changing_param_values": [0.1, 0.25, 0.5, 0.75, 0.9],
"complexity_label": "n_support_vectors",
"complexity_computer": lambda x: len(x.support_vectors_),
"data": regression_data,
"postfit_hook": lambda x: x,
"prediction_performance_computer": mean_squared_error,
"prediction_performance_label": "MSE",
"n_samples": 30,
},
{
"estimator": GradientBoostingRegressor,
"tuned_params": {"loss": "squared_error"},
"changing_param": "n_estimators",
"changing_param_values": [10, 50, 100, 200, 500],
"complexity_label": "n_trees",
"complexity_computer": lambda x: x.n_estimators,
"data": regression_data,
"postfit_hook": lambda x: x,
"prediction_performance_computer": mean_squared_error,
"prediction_performance_label": "MSE",
"n_samples": 30,
},
]


Expand All @@ -209,6 +226,7 @@ def _count_nonzero_coefficients(estimator):
# ensemble is not as detrimental.
#


def plot_influence(conf, mse_values, prediction_times, complexities):
"""
Plot influence of model complexity on both accuracy and latency.
Expand All @@ -219,38 +237,37 @@ def plot_influence(conf, mse_values, prediction_times, complexities):

# first axes (prediction error)
ax1 = fig.add_subplot(111)
line1 = ax1.plot(complexities, mse_values, c='tab:blue', ls='-')[0]
ax1.set_xlabel('Model Complexity (%s)' % conf['complexity_label'])
y1_label = conf['prediction_performance_label']
line1 = ax1.plot(complexities, mse_values, c="tab:blue", ls="-")[0]
ax1.set_xlabel("Model Complexity (%s)" % conf["complexity_label"])
y1_label = conf["prediction_performance_label"]
ax1.set_ylabel(y1_label)

ax1.spines['left'].set_color(line1.get_color())
ax1.spines["left"].set_color(line1.get_color())
ax1.yaxis.label.set_color(line1.get_color())
ax1.tick_params(axis='y', colors=line1.get_color())
ax1.tick_params(axis="y", colors=line1.get_color())

# second axes (latency)
ax2 = fig.add_subplot(111, sharex=ax1, frameon=False)
line2 = ax2.plot(complexities, prediction_times, c='tab:orange', ls='-')[0]
line2 = ax2.plot(complexities, prediction_times, c="tab:orange", ls="-")[0]
ax2.yaxis.tick_right()
ax2.yaxis.set_label_position("right")
y2_label = "Time (s)"
ax2.set_ylabel(y2_label)
ax1.spines['right'].set_color(line2.get_color())
ax1.spines["right"].set_color(line2.get_color())
ax2.yaxis.label.set_color(line2.get_color())
ax2.tick_params(axis='y', colors=line2.get_color())
ax2.tick_params(axis="y", colors=line2.get_color())

plt.legend((line1, line2), ("prediction error", "latency"),
loc='upper right')
plt.legend((line1, line2), ("prediction error", "latency"), loc="upper right")

plt.title("Influence of varying '%s' on %s" % (conf['changing_param'],
conf['estimator'].__name__))
plt.title(
"Influence of varying '%s' on %s"
% (conf["changing_param"], conf["estimator"].__name__)
)


for conf in configurations:
prediction_performances, prediction_times, complexities = \
benchmark_influence(conf)
plot_influence(conf, prediction_performances, prediction_times,
complexities)
prediction_performances, prediction_times, complexities = benchmark_influence(conf)
plot_influence(conf, prediction_performances, prediction_times, complexities)
plt.show()


Expand Down
Loading
0