From b10a007b9e258736a2bf1795e12b33f588bb6878 Mon Sep 17 00:00:00 2001 From: plagree Date: Thu, 8 Jun 2017 15:26:48 +0200 Subject: [PATCH 1/4] DOC examples with correct notebook style --- examples/cluster/plot_dict_face_patches.py | 2 ++ examples/feature_selection/plot_feature_selection.py | 4 ---- .../plot_permutation_test_for_classification.py | 2 -- examples/plot_kernel_ridge_regression.py | 5 +---- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py index 7fb125ed735e9..654fd3162e87b 100644 --- a/examples/cluster/plot_dict_face_patches.py +++ b/examples/cluster/plot_dict_face_patches.py @@ -34,6 +34,7 @@ ############################################################################### # Learn the dictionary of images +# ------------------------------ print('Learning the dictionary... ') rng = np.random.RandomState(0) @@ -68,6 +69,7 @@ ############################################################################### # Plot the results +# ---------------- plt.figure(figsize=(4.2, 4)) for i, patch in enumerate(kmeans.cluster_centers_): plt.subplot(9, 9, i + 1) diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index 5d123985a01bb..61c17dc87c4e1 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -27,7 +27,6 @@ from sklearn import datasets, svm from sklearn.feature_selection import SelectPercentile, f_classif -############################################################################### # import some data to play with # The iris dataset @@ -40,13 +39,11 @@ X = np.hstack((iris.data, E)) y = iris.target -############################################################################### plt.figure(1) plt.clf() X_indices = np.arange(X.shape[-1]) -############################################################################### # Univariate feature selection with F-test for feature scoring # We use the default selection function: the 10% most significant features selector = SelectPercentile(f_classif, percentile=10) @@ -57,7 +54,6 @@ label=r'Univariate score ($-Log(p_{value})$)', color='darkorange', edgecolor='black') -############################################################################### # Compare to the weights of an SVM clf = svm.SVC(kernel='linear') clf.fit(X, y) diff --git a/examples/feature_selection/plot_permutation_test_for_classification.py b/examples/feature_selection/plot_permutation_test_for_classification.py index 8cadbfa91ad09..84b1c5a3fca4e 100644 --- a/examples/feature_selection/plot_permutation_test_for_classification.py +++ b/examples/feature_selection/plot_permutation_test_for_classification.py @@ -25,7 +25,6 @@ from sklearn import datasets -############################################################################## # Loading a dataset iris = datasets.load_iris() X = iris.data @@ -47,7 +46,6 @@ print("Classification score %s (pvalue : %s)" % (score, pvalue)) -############################################################################### # View histogram of permutation scores plt.hist(permutation_scores, 20, label='Permutation scores', edgecolor='black') diff --git a/examples/plot_kernel_ridge_regression.py b/examples/plot_kernel_ridge_regression.py index 85cd9990c1f68..6ad422227bb21 100644 --- a/examples/plot_kernel_ridge_regression.py +++ b/examples/plot_kernel_ridge_regression.py @@ -48,7 +48,6 @@ rng = np.random.RandomState(0) -############################################################################# # Generate sample data X = 5 * rng.rand(10000, 1) y = np.sin(X).ravel() @@ -58,7 +57,6 @@ X_plot = np.linspace(0, 5, 100000)[:, None] -############################################################################# # Fit regression model train_size = 100 svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5, @@ -97,8 +95,7 @@ % (X_plot.shape[0], kr_predict)) -############################################################################# -# look at the results +# Look at the results sv_ind = svr.best_estimator_.support_ plt.scatter(X[sv_ind], y[sv_ind], c='r', s=50, label='SVR support vectors', zorder=2, edgecolors=(0, 0, 0)) From 187b51c7235660d318094361526ac7d2212ddcff Mon Sep 17 00:00:00 2001 From: plagree Date: Thu, 8 Jun 2017 17:35:36 +0200 Subject: [PATCH 2/4] Modifications in examples/ to avoid unwanted notebook style --- examples/applications/plot_face_recognition.py | 6 ------ .../applications/plot_model_complexity_influence.py | 6 ++---- examples/applications/plot_prediction_latency.py | 5 ++--- examples/applications/plot_stock_market.py | 5 ----- .../applications/wikipedia_principal_eigenvector.py | 2 -- examples/calibration/plot_calibration.py | 1 - examples/calibration/plot_compare_calibration.py | 1 - examples/classification/plot_lda_qda.py | 10 +++------- examples/cluster/plot_affinity_propagation.py | 3 --- examples/cluster/plot_dbscan.py | 3 --- examples/cluster/plot_dict_face_patches.py | 4 ---- examples/cluster/plot_face_ward_segmentation.py | 4 ---- ...ot_feature_agglomeration_vs_univariate_selection.py | 3 --- examples/cluster/plot_kmeans_digits.py | 1 - examples/cluster/plot_mean_shift.py | 3 --- examples/cluster/plot_mini_batch_kmeans.py | 4 ---- examples/cluster/plot_segmentation_toy.py | 3 --- .../cluster/plot_ward_structured_vs_unstructured.py | 6 ------ examples/covariance/plot_covariance_estimation.py | 4 ---- examples/covariance/plot_mahalanobis_distances.py | 1 - examples/covariance/plot_sparse_cov.py | 3 --- .../plot_compare_cross_decomposition.py | 5 ----- examples/decomposition/plot_faces_decomposition.py | 5 ----- .../decomposition/plot_ica_blind_source_separation.py | 2 -- examples/decomposition/plot_ica_vs_pca.py | 2 -- examples/decomposition/plot_image_denoising.py | 4 ---- examples/decomposition/plot_pca_3d.py | 2 -- .../decomposition/plot_pca_vs_fa_model_selection.py | 2 -- examples/ensemble/plot_gradient_boosting_regression.py | 4 ---- examples/exercises/plot_cv_diabetes.py | 1 - examples/feature_selection/plot_feature_selection.py | 2 +- examples/linear_model/plot_ard.py | 3 --- examples/linear_model/plot_bayesian_ridge.py | 3 --- examples/linear_model/plot_lasso_and_elasticnet.py | 5 +---- .../linear_model/plot_lasso_dense_vs_sparse_data.py | 2 -- examples/linear_model/plot_lasso_model_selection.py | 3 +++ examples/linear_model/plot_logistic_path.py | 1 - examples/linear_model/plot_multi_task_lasso_support.py | 1 - examples/linear_model/plot_ols_3d.py | 1 - examples/linear_model/plot_ridge_path.py | 2 -- examples/linear_model/plot_theilsen.py | 2 -- .../grid_search_text_feature_extraction.py | 4 +--- examples/model_selection/plot_roc_crossval.py | 4 +--- .../model_selection/plot_train_error_vs_test_error.py | 3 --- examples/neighbors/plot_regression.py | 2 -- .../plot_rbm_logistic_classification.py | 4 ---- examples/plot_isotonic_regression.py | 4 +--- .../semi_supervised/plot_label_propagation_digits.py | 10 ++++------ .../plot_label_propagation_structure.py | 2 -- examples/svm/plot_rbf_parameters.py | 5 +---- examples/svm/plot_svm_anova.py | 3 --- examples/svm/plot_svm_regression.py | 6 +----- examples/text/document_classification_20newsgroups.py | 2 -- examples/text/document_clustering.py | 2 -- 54 files changed, 21 insertions(+), 160 deletions(-) diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py index 039af7ea2feb6..123c4b4bdd9b7 100644 --- a/examples/applications/plot_face_recognition.py +++ b/examples/applications/plot_face_recognition.py @@ -48,7 +48,6 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') -############################################################################### # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) @@ -72,7 +71,6 @@ print("n_classes: %d" % n_classes) -############################################################################### # Split into a training set and a test set using a stratified k fold # split into a training and testing set @@ -80,7 +78,6 @@ X, y, test_size=0.25, random_state=42) -############################################################################### # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 @@ -101,7 +98,6 @@ print("done in %0.3fs" % (time() - t0)) -############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") @@ -115,7 +111,6 @@ print(clf.best_estimator_) -############################################################################### # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") @@ -127,7 +122,6 @@ print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) -############################################################################### # Qualitative evaluation of the predictions using matplotlib def plot_gallery(images, titles, h, w, n_row=3, n_col=4): diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py index 90fd5c718e78f..f7df14f63b4b7 100644 --- a/examples/applications/plot_model_complexity_influence.py +++ b/examples/applications/plot_model_complexity_influence.py @@ -34,11 +34,10 @@ from sklearn.linear_model.stochastic_gradient import SGDClassifier from sklearn.metrics import hamming_loss -############################################################################### # Routines -# initialize random generator +# Initialize random generator np.random.seed(0) @@ -122,8 +121,7 @@ def _count_nonzero_coefficients(estimator): a = estimator.coef_.toarray() return np.count_nonzero(a) -############################################################################### -# main code +# Main code regression_data = generate_data('regression') classification_data = generate_data('classification', sparse=True) configurations = [ diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index a375c1cc8f3c3..156a5d33ee2af 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -266,12 +266,11 @@ def plot_benchmark_throughput(throughputs, configuration): plt.show() -############################################################################### -# main code +# Main code start_time = time.time() -# benchmark bulk/atomic prediction speed for various regressors +# Benchmark bulk/atomic prediction speed for various regressors configuration = { 'n_train': int(1e3), 'n_test': int(1e2), diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index cd1745bb1825f..bd4fd1f5fc9c7 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -74,7 +74,6 @@ from sklearn import cluster, covariance, manifold -############################################################################### # Retrieve the data from Internet def quotes_historical_google(symbol, date1, date2): @@ -189,7 +188,6 @@ def quotes_historical_google(symbol, date1, date2): variation = close_prices - open_prices -############################################################################### # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() @@ -199,7 +197,6 @@ def quotes_historical_google(symbol, date1, date2): X /= X.std(axis=0) edge_model.fit(X) -############################################################################### # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) @@ -208,7 +205,6 @@ def quotes_historical_google(symbol, date1, date2): for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) -############################################################################### # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane @@ -220,7 +216,6 @@ def quotes_historical_google(symbol, date1, date2): embedding = node_position_model.fit_transform(X.T).T -############################################################################### # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index d60121e8ece31..c0b5529d2e3f6 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -52,7 +52,6 @@ print(__doc__) -############################################################################### # Where to download the data, if not already on disk redirects_url = "http://downloads.dbpedia.org/3.5.1/en/redirects_en.nt.bz2" redirects_filename = redirects_url.rsplit("/", 1)[1] @@ -73,7 +72,6 @@ print() -############################################################################### # Loading the redirect files memory = Memory(cachedir=".") diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index b38b25812bb7f..174812be4d1e6 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -83,7 +83,6 @@ clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) -############################################################################### # Plot the data and the predicted probabilities plt.figure() y_unique = np.unique(y) diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py index 2e914696fc177..28624090f5da6 100644 --- a/examples/calibration/plot_compare_calibration.py +++ b/examples/calibration/plot_compare_calibration.py @@ -81,7 +81,6 @@ rfc = RandomForestClassifier(n_estimators=100) -############################################################################### # Plot calibration plots plt.figure(figsize=(10, 10)) diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index a668e7cc0db0c..74744ab8348f9 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -20,8 +20,7 @@ class has its own standard deviation with QDA. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -############################################################################### -# colormap +# Colormap cmap = colors.LinearSegmentedColormap( 'red_blue_classes', {'red': [(0, 1, 1), (1, 0.7, 0.7)], @@ -30,8 +29,7 @@ class has its own standard deviation with QDA. plt.cm.register_cmap(cmap=cmap) -############################################################################### -# generate datasets +# Generate datasets def dataset_fixed_cov(): '''Generate 2 Gaussians samples with the same covariance matrix''' n, dim = 300, 2 @@ -54,8 +52,7 @@ def dataset_cov(): return X, y -############################################################################### -# plot functions +# Plot functions def plot_data(lda, X, y, y_pred, fig_index): splot = plt.subplot(2, 2, fig_index) if fig_index == 1: @@ -132,7 +129,6 @@ def plot_qda_cov(qda, splot): plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red') plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue') -############################################################################### for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py index 0d6c395a4e4bf..edce07c7206e9 100644 --- a/examples/cluster/plot_affinity_propagation.py +++ b/examples/cluster/plot_affinity_propagation.py @@ -14,13 +14,11 @@ from sklearn import metrics from sklearn.datasets.samples_generator import make_blobs -############################################################################## # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5, random_state=0) -############################################################################## # Compute Affinity Propagation af = AffinityPropagation(preference=-50).fit(X) cluster_centers_indices = af.cluster_centers_indices_ @@ -39,7 +37,6 @@ print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean')) -############################################################################## # Plot result import matplotlib.pyplot as plt from itertools import cycle diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py index a12b3d39128b6..83c07bcbb892a 100644 --- a/examples/cluster/plot_dbscan.py +++ b/examples/cluster/plot_dbscan.py @@ -17,7 +17,6 @@ from sklearn.preprocessing import StandardScaler -############################################################################## # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, @@ -25,7 +24,6 @@ X = StandardScaler().fit_transform(X) -############################################################################## # Compute DBSCAN db = DBSCAN(eps=0.3, min_samples=10).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) @@ -46,7 +44,6 @@ print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)) -############################################################################## # Plot result import matplotlib.pyplot as plt diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py index 654fd3162e87b..ed837dbaa0cfd 100644 --- a/examples/cluster/plot_dict_face_patches.py +++ b/examples/cluster/plot_dict_face_patches.py @@ -32,9 +32,7 @@ faces = datasets.fetch_olivetti_faces() -############################################################################### # Learn the dictionary of images -# ------------------------------ print('Learning the dictionary... ') rng = np.random.RandomState(0) @@ -67,9 +65,7 @@ dt = time.time() - t0 print('done in %.2fs.' % dt) -############################################################################### # Plot the results -# ---------------- plt.figure(figsize=(4.2, 4)) for i, patch in enumerate(kmeans.cluster_centers_): plt.subplot(9, 9, i + 1) diff --git a/examples/cluster/plot_face_ward_segmentation.py b/examples/cluster/plot_face_ward_segmentation.py index 687d87ce7f429..27b464a244641 100644 --- a/examples/cluster/plot_face_ward_segmentation.py +++ b/examples/cluster/plot_face_ward_segmentation.py @@ -25,7 +25,6 @@ from sklearn.cluster import AgglomerativeClustering -############################################################################### # Generate data try: # SciPy >= 0.16 have face in misc from scipy.misc import face @@ -38,11 +37,9 @@ X = np.reshape(face, (-1, 1)) -############################################################################### # Define the structure A of the data. Pixels connected to their neighbors. connectivity = grid_to_graph(*face.shape) -############################################################################### # Compute clustering print("Compute structured hierarchical clustering...") st = time.time() @@ -55,7 +52,6 @@ print("Number of pixels: ", label.size) print("Number of clusters: ", np.unique(label).size) -############################################################################### # Plot the results on an image plt.figure(figsize=(5, 5)) plt.imshow(face, cmap=plt.cm.gray) diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index ca3eb2a0035be..d6f65d83f959d 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -34,7 +34,6 @@ from sklearn.model_selection import GridSearchCV from sklearn.model_selection import KFold -############################################################################### # Generate data n_samples = 200 size = 40 # image size @@ -58,7 +57,6 @@ noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2) y += noise_coef * noise # add noise -############################################################################### # Compute the coefs of a Bayesian Ridge with GridSearch cv = KFold(2) # cross-validation generator for model selection ridge = BayesianRidge() @@ -88,7 +86,6 @@ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1)) coef_selection_ = coef_.reshape(size, size) -############################################################################### # Inverse the transformation to plot the results on an image plt.close('all') plt.figure(figsize=(7.3, 2.7)) diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py index 1e6fbbc019923..b08fd6b854fc4 100644 --- a/examples/cluster/plot_kmeans_digits.py +++ b/examples/cluster/plot_kmeans_digits.py @@ -84,7 +84,6 @@ def bench_k_means(estimator, name, data): data=data) print(82 * '_') -############################################################################### # Visualize the results on PCA-reduced data reduced_data = PCA(n_components=2).fit_transform(data) diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py index 775cd98e59527..2e2c75b962688 100644 --- a/examples/cluster/plot_mean_shift.py +++ b/examples/cluster/plot_mean_shift.py @@ -16,12 +16,10 @@ from sklearn.cluster import MeanShift, estimate_bandwidth from sklearn.datasets.samples_generator import make_blobs -############################################################################### # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6) -############################################################################### # Compute clustering with MeanShift # The following bandwidth can be automatically detected using @@ -37,7 +35,6 @@ print("number of estimated clusters : %d" % n_clusters_) -############################################################################### # Plot result import matplotlib.pyplot as plt from itertools import cycle diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py index 56d999c6c846d..9c650be0a44e2 100644 --- a/examples/cluster/plot_mini_batch_kmeans.py +++ b/examples/cluster/plot_mini_batch_kmeans.py @@ -23,7 +23,6 @@ from sklearn.metrics.pairwise import pairwise_distances_argmin from sklearn.datasets.samples_generator import make_blobs -############################################################################## # Generate sample data np.random.seed(0) @@ -32,7 +31,6 @@ n_clusters = len(centers) X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7) -############################################################################## # Compute clustering with Means k_means = KMeans(init='k-means++', n_clusters=3, n_init=10) @@ -40,7 +38,6 @@ k_means.fit(X) t_batch = time.time() - t0 -############################################################################## # Compute clustering with MiniBatchKMeans mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size, @@ -49,7 +46,6 @@ mbk.fit(X) t_mini_batch = time.time() - t0 -############################################################################## # Plot result fig = plt.figure(figsize=(8, 3)) diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index 96f007400e492..fe24304e5b8cc 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -36,7 +36,6 @@ from sklearn.feature_extraction import image from sklearn.cluster import spectral_clustering -############################################################################### l = 100 x, y = np.indices((l, l)) @@ -52,7 +51,6 @@ circle3 = (x - center3[0]) ** 2 + (y - center3[1]) ** 2 < radius3 ** 2 circle4 = (x - center4[0]) ** 2 + (y - center4[1]) ** 2 < radius4 ** 2 -############################################################################### # 4 circles img = circle1 + circle2 + circle3 + circle4 @@ -81,7 +79,6 @@ plt.matshow(img) plt.matshow(label_im) -############################################################################### # 2 circles img = circle1 + circle2 mask = img.astype(bool) diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py index 2471f68a6f8ed..102a78ba82eea 100644 --- a/examples/cluster/plot_ward_structured_vs_unstructured.py +++ b/examples/cluster/plot_ward_structured_vs_unstructured.py @@ -33,7 +33,6 @@ from sklearn.cluster import AgglomerativeClustering from sklearn.datasets.samples_generator import make_swiss_roll -############################################################################### # Generate data (swiss roll dataset) n_samples = 1500 noise = 0.05 @@ -41,7 +40,6 @@ # Make it thinner X[:, 1] *= .5 -############################################################################### # Compute clustering print("Compute unstructured hierarchical clustering...") st = time.time() @@ -51,7 +49,6 @@ print("Elapsed time: %.2fs" % elapsed_time) print("Number of points: %i" % label.size) -############################################################################### # Plot result fig = plt.figure() ax = p3.Axes3D(fig) @@ -62,12 +59,10 @@ plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time) -############################################################################### # Define the structure A of the data. Here a 10 nearest neighbors from sklearn.neighbors import kneighbors_graph connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False) -############################################################################### # Compute clustering print("Compute structured hierarchical clustering...") st = time.time() @@ -78,7 +73,6 @@ print("Elapsed time: %.2fs" % elapsed_time) print("Number of points: %i" % label.size) -############################################################################### # Plot result fig = plt.figure() ax = p3.Axes3D(fig) diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index 96f637974ee29..8a663fec43fc8 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -52,7 +52,6 @@ from sklearn.model_selection import GridSearchCV -############################################################################### # Generate sample data n_features, n_samples = 40, 20 np.random.seed(42) @@ -64,7 +63,6 @@ X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) -############################################################################### # Compute the likelihood on test data # spanning a range of possible shrinkage coefficient values @@ -78,7 +76,6 @@ emp_cov = empirical_covariance(X_train) loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) -############################################################################### # Compare different approaches to setting the parameter # GridSearch for an optimal shrinkage coefficient @@ -94,7 +91,6 @@ oa = OAS() loglik_oa = oa.fit(X_train).score(X_test) -############################################################################### # Plot results fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index 53329aa71b80f..166252fc7f61f 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -78,7 +78,6 @@ # compare estimators learnt from the full data set with true parameters emp_cov = EmpiricalCovariance().fit(X) -############################################################################### # Display results fig = plt.figure() plt.subplots_adjust(hspace=-.1, wspace=.4, top=.95, bottom=.05) diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py index d9b7f0808fd75..8d42e7aaef929 100644 --- a/examples/covariance/plot_sparse_cov.py +++ b/examples/covariance/plot_sparse_cov.py @@ -59,7 +59,6 @@ from sklearn.covariance import GraphLassoCV, ledoit_wolf import matplotlib.pyplot as plt -############################################################################## # Generate the data n_samples = 60 n_features = 20 @@ -79,7 +78,6 @@ X -= X.mean(axis=0) X /= X.std(axis=0) -############################################################################## # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples @@ -91,7 +89,6 @@ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) -############################################################################## # Plot the results plt.figure(figsize=(10, 6)) plt.subplots_adjust(left=0.02, right=0.98) diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py index 437c08b056479..65a2980d746e6 100644 --- a/examples/cross_decomposition/plot_compare_cross_decomposition.py +++ b/examples/cross_decomposition/plot_compare_cross_decomposition.py @@ -24,7 +24,6 @@ import matplotlib.pyplot as plt from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA -############################################################################### # Dataset based latent variables model n = 500 @@ -46,7 +45,6 @@ print("Corr(Y)") print(np.round(np.corrcoef(Y.T), 2)) -############################################################################### # Canonical (symmetric) PLS # Transform data @@ -106,7 +104,6 @@ plt.yticks(()) plt.show() -############################################################################### # PLS regression, with multivariate response, a.k.a. PLS2 n = 1000 @@ -126,7 +123,6 @@ print(np.round(pls2.coef_, 1)) pls2.predict(X) -############################################################################### # PLS regression, with univariate response, a.k.a. PLS1 n = 1000 @@ -139,7 +135,6 @@ print("Estimated betas") print(np.round(pls1.coef_, 1)) -############################################################################### # CCA (PLS mode B with symmetric deflation) cca = CCA(n_components=2) diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index fce02751a1b0c..2b84bbf4374b0 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -32,7 +32,6 @@ image_shape = (64, 64) rng = RandomState(0) -############################################################################### # Load faces data dataset = fetch_olivetti_faces(shuffle=True, random_state=rng) faces = dataset.data @@ -48,7 +47,6 @@ print("Dataset consists of %d faces" % n_samples) -############################################################################### def plot_gallery(title, images, n_col=n_col, n_row=n_row): plt.figure(figsize=(2. * n_col, 2.26 * n_row)) plt.suptitle(title, size=16) @@ -62,7 +60,6 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row): plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) -############################################################################### # List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ @@ -102,12 +99,10 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row): ] -############################################################################### # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) -############################################################################### # Do the estimation and plot it for name, estimator, center in estimators: diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py index 9ba5a1523a3c6..31b3e8a560605 100644 --- a/examples/decomposition/plot_ica_blind_source_separation.py +++ b/examples/decomposition/plot_ica_blind_source_separation.py @@ -21,7 +21,6 @@ from sklearn.decomposition import FastICA, PCA -############################################################################### # Generate sample data np.random.seed(0) n_samples = 2000 @@ -51,7 +50,6 @@ pca = PCA(n_components=3) H = pca.fit_transform(X) # Reconstruct signals based on orthogonal components -############################################################################### # Plot results plt.figure() diff --git a/examples/decomposition/plot_ica_vs_pca.py b/examples/decomposition/plot_ica_vs_pca.py index 54655e519257a..cf9ce0925204a 100644 --- a/examples/decomposition/plot_ica_vs_pca.py +++ b/examples/decomposition/plot_ica_vs_pca.py @@ -37,7 +37,6 @@ from sklearn.decomposition import PCA, FastICA -############################################################################### # Generate sample data rng = np.random.RandomState(42) S = rng.standard_t(1.5, size=(20000, 2)) @@ -57,7 +56,6 @@ S_ica_ /= S_ica_.std(axis=0) -############################################################################### # Plot results def plot_samples(S, axis_list=None): diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py index 29bdf6ba65217..68e62a9b6e305 100644 --- a/examples/decomposition/plot_image_denoising.py +++ b/examples/decomposition/plot_image_denoising.py @@ -44,7 +44,6 @@ from sklearn.feature_extraction.image import reconstruct_from_patches_2d -############################################################################### try: # SciPy >= 0.16 have face in misc from scipy.misc import face face = face(gray=True) @@ -75,7 +74,6 @@ data /= np.std(data, axis=0) print('done in %.2fs.' % (time() - t0)) -############################################################################### # Learn the dictionary from reference patches print('Learning the dictionary...') @@ -98,7 +96,6 @@ plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) -############################################################################### # Display the distorted image def show_with_diff(image, reference, title): @@ -123,7 +120,6 @@ def show_with_diff(image, reference, title): show_with_diff(distorted, face, 'Distorted image') -############################################################################### # Extract noisy patches and reconstruct them using the dictionary print('Extracting noisy patches... ') diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py index f26d5d9d1c9bb..06592c0e6b221 100644 --- a/examples/decomposition/plot_pca_3d.py +++ b/examples/decomposition/plot_pca_3d.py @@ -26,7 +26,6 @@ from scipy import stats -############################################################################### # Create the data e = np.exp(1) @@ -55,7 +54,6 @@ def pdf(x): b /= norm -############################################################################### # Plot the figures def plot_figs(fig_num, elev, azim): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py index 7944f327e3645..5601f9cdd7798 100644 --- a/examples/decomposition/plot_pca_vs_fa_model_selection.py +++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py @@ -39,7 +39,6 @@ print(__doc__) -############################################################################### # Create the data n_samples, n_features, rank = 1000, 50, 10 @@ -55,7 +54,6 @@ sigmas = sigma * rng.rand(n_features) + sigma / 2. X_hetero = X + rng.randn(n_samples, n_features) * sigmas -############################################################################### # Fit the models n_components = np.arange(0, n_features, 5) # options for n_components diff --git a/examples/ensemble/plot_gradient_boosting_regression.py b/examples/ensemble/plot_gradient_boosting_regression.py index 0437fd924ef1d..468c338f42f2f 100644 --- a/examples/ensemble/plot_gradient_boosting_regression.py +++ b/examples/ensemble/plot_gradient_boosting_regression.py @@ -22,7 +22,6 @@ from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error -############################################################################### # Load data boston = datasets.load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) @@ -31,7 +30,6 @@ X_train, y_train = X[:offset], y[:offset] X_test, y_test = X[offset:], y[offset:] -############################################################################### # Fit regression model params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.01, 'loss': 'ls'} @@ -41,7 +39,6 @@ mse = mean_squared_error(y_test, clf.predict(X_test)) print("MSE: %.4f" % mse) -############################################################################### # Plot training deviance # compute test set deviance @@ -61,7 +58,6 @@ plt.xlabel('Boosting Iterations') plt.ylabel('Deviance') -############################################################################### # Plot feature importance feature_importance = clf.feature_importances_ # make importances relative to max importance diff --git a/examples/exercises/plot_cv_diabetes.py b/examples/exercises/plot_cv_diabetes.py index 6f3736d3c255b..0b41315069b2b 100644 --- a/examples/exercises/plot_cv_diabetes.py +++ b/examples/exercises/plot_cv_diabetes.py @@ -52,7 +52,6 @@ plt.axhline(np.max(scores), linestyle='--', color='.5') plt.xlim([alphas[0], alphas[-1]]) -############################################################################## # Bonus: how much can you trust the selection of alpha? # To answer this question we use the LassoCV object that sets its alpha diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index 61c17dc87c4e1..73badff72be2e 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -27,7 +27,7 @@ from sklearn import datasets, svm from sklearn.feature_selection import SelectPercentile, f_classif -# import some data to play with +# Import some data to play with # The iris dataset iris = datasets.load_iris() diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index 76d34d3150a5e..9947b110365cb 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -30,7 +30,6 @@ from sklearn.linear_model import ARDRegression, LinearRegression -############################################################################### # Generating simulated data with Gaussian weights # Parameters of the example @@ -51,7 +50,6 @@ # Create the target y = np.dot(X, w) + noise -############################################################################### # Fit the ARD Regression clf = ARDRegression(compute_score=True) clf.fit(X, y) @@ -59,7 +57,6 @@ ols = LinearRegression() ols.fit(X, y) -############################################################################### # Plot the true weights, the estimated weights, the histogram of the # weights, and predictions with standard deviations plt.figure(figsize=(6, 5)) diff --git a/examples/linear_model/plot_bayesian_ridge.py b/examples/linear_model/plot_bayesian_ridge.py index 0dbc854cf2ee2..c0a4bd8cdcfe7 100644 --- a/examples/linear_model/plot_bayesian_ridge.py +++ b/examples/linear_model/plot_bayesian_ridge.py @@ -30,7 +30,6 @@ from sklearn.linear_model import BayesianRidge, LinearRegression -############################################################################### # Generating simulated data with Gaussian weights np.random.seed(0) n_samples, n_features = 100, 100 @@ -48,7 +47,6 @@ # Create the target y = np.dot(X, w) + noise -############################################################################### # Fit the Bayesian Ridge Regression and an OLS for comparison clf = BayesianRidge(compute_score=True) clf.fit(X, y) @@ -56,7 +54,6 @@ ols = LinearRegression() ols.fit(X, y) -############################################################################### # Plot true weights, estimated weights, histogram of the weights, and # predictions with standard deviations lw = 2 diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index ca2d2425f9f5d..1c385a162467b 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -15,8 +15,7 @@ from sklearn.metrics import r2_score -############################################################################### -# generate some sparse data to play with +# Generate some sparse data to play with np.random.seed(42) n_samples, n_features = 50, 200 @@ -35,7 +34,6 @@ X_train, y_train = X[:n_samples // 2], y[:n_samples // 2] X_test, y_test = X[n_samples // 2:], y[n_samples // 2:] -############################################################################### # Lasso from sklearn.linear_model import Lasso @@ -47,7 +45,6 @@ print(lasso) print("r^2 on test data : %f" % r2_score_lasso) -############################################################################### # ElasticNet from sklearn.linear_model import ElasticNet diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py index bc8df42a8490e..bc1d9d2b561c2 100644 --- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py +++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py @@ -17,7 +17,6 @@ from sklearn.linear_model import Lasso -############################################################################### # The two Lasso implementations on Dense data print("--- Dense matrices") @@ -39,7 +38,6 @@ print("Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_)) -############################################################################### # The two Lasso implementations on Sparse data print("--- Sparse matrices") diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index 245c6bd0492c7..7a36ba0672336 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -66,6 +66,7 @@ ############################################################################## # LassoLarsIC: least angle regression with BIC/AIC criterion +# ---------------------------------------------------------- model_bic = LassoLarsIC(criterion='bic') t1 = time.time() @@ -98,6 +99,7 @@ def plot_ic_criterion(model, name, color): ############################################################################## # LassoCV: coordinate descent +# --------------------------- # Compute paths print("Computing regularization path using the coordinate descent lasso...") @@ -127,6 +129,7 @@ def plot_ic_criterion(model, name, color): ############################################################################## # LassoLarsCV: least angle regression +# ----------------------------------- # Compute paths print("Computing regularization path using the Lars lasso...") diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py index d1b17948c78e0..220811259f4ac 100644 --- a/examples/linear_model/plot_logistic_path.py +++ b/examples/linear_model/plot_logistic_path.py @@ -29,7 +29,6 @@ X -= np.mean(X, 0) -############################################################################### # Demo path functions cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3) diff --git a/examples/linear_model/plot_multi_task_lasso_support.py b/examples/linear_model/plot_multi_task_lasso_support.py index ea17d752f94a0..58f315b401770 100644 --- a/examples/linear_model/plot_multi_task_lasso_support.py +++ b/examples/linear_model/plot_multi_task_lasso_support.py @@ -39,7 +39,6 @@ coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T]) coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.).fit(X, Y).coef_ -############################################################################### # Plot support and time series fig = plt.figure(figsize=(8, 5)) plt.subplot(1, 2, 1) diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index 23dfa01d60ecc..e9af33303ad00 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -37,7 +37,6 @@ ols.fit(X_train, y_train) -############################################################################### # Plot the figure def plot_figs(fig_num, elev, azim, X_train, clf): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py index 1f2c475f78b7d..6164932996b08 100644 --- a/examples/linear_model/plot_ridge_path.py +++ b/examples/linear_model/plot_ridge_path.py @@ -39,7 +39,6 @@ X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis]) y = np.ones(10) -############################################################################### # Compute paths n_alphas = 200 @@ -51,7 +50,6 @@ ridge.fit(X, y) coefs.append(ridge.coef_) -############################################################################### # Display results ax = plt.gca() diff --git a/examples/linear_model/plot_theilsen.py b/examples/linear_model/plot_theilsen.py index 747ac63e6a205..e87b64d3c12c0 100644 --- a/examples/linear_model/plot_theilsen.py +++ b/examples/linear_model/plot_theilsen.py @@ -51,7 +51,6 @@ colors = {'OLS': 'turquoise', 'Theil-Sen': 'gold', 'RANSAC': 'lightgreen'} lw = 2 -############################################################################## # Outliers only in the y direction np.random.seed(0) @@ -80,7 +79,6 @@ plt.legend(loc='upper left') plt.title("Corrupt y") -############################################################################## # Outliers in the X direction np.random.seed(0) diff --git a/examples/model_selection/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py index daf82718d42e1..8622b1b021e52 100644 --- a/examples/model_selection/grid_search_text_feature_extraction.py +++ b/examples/model_selection/grid_search_text_feature_extraction.py @@ -67,7 +67,6 @@ format='%(asctime)s %(levelname)s %(message)s') -############################################################################### # Load some categories from the training set categories = [ 'alt.atheism', @@ -84,8 +83,7 @@ print("%d categories" % len(data.target_names)) print() -############################################################################### -# define a pipeline combining a text feature extractor with a simple +# Define a pipeline combining a text feature extractor with a simple # classifier pipeline = Pipeline([ ('vect', CountVectorizer()), diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index 366aa0acbee06..6ecae65877fce 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -40,10 +40,9 @@ from sklearn.metrics import roc_curve, auc from sklearn.model_selection import StratifiedKFold -############################################################################### # Data IO and generation -# import some data to play with +# Import some data to play with iris = datasets.load_iris() X = iris.data y = iris.target @@ -54,7 +53,6 @@ random_state = np.random.RandomState(0) X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] -############################################################################### # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 9002a0a3a5f30..26e64f79660d9 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -19,7 +19,6 @@ import numpy as np from sklearn import linear_model -############################################################################### # Generate sample data n_samples_train, n_samples_test, n_features = 75, 150, 500 np.random.seed(0) @@ -32,7 +31,6 @@ X_train, X_test = X[:n_samples_train], X[n_samples_train:] y_train, y_test = y[:n_samples_train], y[n_samples_train:] -############################################################################### # Compute train and test errors alphas = np.logspace(-5, 1, 60) enet = linear_model.ElasticNet(l1_ratio=0.7) @@ -52,7 +50,6 @@ enet.set_params(alpha=alpha_optim) coef_ = enet.fit(X, y).coef_ -############################################################################### # Plot results functions import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_regression.py b/examples/neighbors/plot_regression.py index c664d7f173b0e..89c730ec877a8 100644 --- a/examples/neighbors/plot_regression.py +++ b/examples/neighbors/plot_regression.py @@ -16,7 +16,6 @@ # License: BSD 3 clause (C) INRIA -############################################################################### # Generate sample data import numpy as np import matplotlib.pyplot as plt @@ -30,7 +29,6 @@ # Add noise to targets y[::5] += 1 * (0.5 - np.random.rand(8)) -############################################################################### # Fit regression model n_neighbors = 5 diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index 2b9b15fe3d966..0fd6a075353cd 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -42,7 +42,6 @@ from sklearn.pipeline import Pipeline -############################################################################### # Setting up def nudge_dataset(X, Y): @@ -91,7 +90,6 @@ def nudge_dataset(X, Y): classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)]) -############################################################################### # Training # Hyper-parameters. These were set by cross-validation, @@ -111,7 +109,6 @@ def nudge_dataset(X, Y): logistic_classifier = linear_model.LogisticRegression(C=100.0) logistic_classifier.fit(X_train, Y_train) -############################################################################### # Evaluation print() @@ -125,7 +122,6 @@ def nudge_dataset(X, Y): Y_test, logistic_classifier.predict(X_test)))) -############################################################################### # Plotting plt.figure(figsize=(4.2, 4)) diff --git a/examples/plot_isotonic_regression.py b/examples/plot_isotonic_regression.py index 4ae207ccedcfd..bac13ac414903 100644 --- a/examples/plot_isotonic_regression.py +++ b/examples/plot_isotonic_regression.py @@ -30,7 +30,6 @@ rs = check_random_state(0) y = rs.randint(-50, 50, size=(n,)) + 50. * np.log(1 + np.arange(n)) -############################################################################### # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() @@ -40,8 +39,7 @@ lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression -############################################################################### -# plot result +# Plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] lc = LineCollection(segments, zorder=0) diff --git a/examples/semi_supervised/plot_label_propagation_digits.py b/examples/semi_supervised/plot_label_propagation_digits.py index 72da021374ad9..1590bf0bf9120 100644 --- a/examples/semi_supervised/plot_label_propagation_digits.py +++ b/examples/semi_supervised/plot_label_propagation_digits.py @@ -45,11 +45,10 @@ class will be very good. unlabeled_set = indices[n_labeled_points:] -# shuffle everything around +# Shuffle everything around y_train = np.copy(y) y_train[unlabeled_set] = -1 -############################################################################### # Learn with LabelSpreading lp_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5) lp_model.fit(X, y_train) @@ -66,14 +65,13 @@ class will be very good. print("Confusion matrix") print(cm) -# calculate uncertainty values for each transduced distribution +# Calculate uncertainty values for each transduced distribution pred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T) -# pick the top 10 most uncertain labels +# Pick the top 10 most uncertain labels uncertainty_index = np.argsort(pred_entropies)[-10:] -############################################################################### -# plot +# Plot f = plt.figure(figsize=(7, 5)) for index, image_index in enumerate(uncertainty_index): image = images[image_index] diff --git a/examples/semi_supervised/plot_label_propagation_structure.py b/examples/semi_supervised/plot_label_propagation_structure.py index 2632247984b24..8a1fa6e24c172 100644 --- a/examples/semi_supervised/plot_label_propagation_structure.py +++ b/examples/semi_supervised/plot_label_propagation_structure.py @@ -28,12 +28,10 @@ labels[0] = outer labels[-1] = inner -############################################################################### # Learn with LabelSpreading label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=1.0) label_spread.fit(X, labels) -############################################################################### # Plot output labels output_labels = label_spread.transduction_ plt.figure(figsize=(8.5, 4)) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 9bbca6683ce95..045f7cf245b99 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -91,7 +91,6 @@ def __call__(self, value, clip=None): x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1] return np.ma.masked_array(np.interp(value, x, y)) -############################################################################## # Load and prepare data set # # dataset for grid search @@ -118,7 +117,6 @@ def __call__(self, value, clip=None): X = scaler.fit_transform(X) X_2d = scaler.fit_transform(X_2d) -############################################################################## # Train classifiers # # For an initial search, a logarithmic grid with basis @@ -147,8 +145,7 @@ def __call__(self, value, clip=None): clf.fit(X_2d, y_2d) classifiers.append((C, gamma, clf)) -############################################################################## -# visualization +# Visualization # # draw visualization of parameter effects diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 01938efd593ac..d8b54cd306c1d 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -14,7 +14,6 @@ from sklearn.model_selection import cross_val_score from sklearn.pipeline import Pipeline -############################################################################### # Import some data to play with digits = datasets.load_digits() y = digits.target @@ -26,7 +25,6 @@ # add 200 non-informative features X = np.hstack((X, 2 * np.random.random((n_samples, 200)))) -############################################################################### # Create a feature-selection transform and an instance of SVM that we # combine together to have an full-blown estimator @@ -34,7 +32,6 @@ clf = Pipeline([('anova', transform), ('svc', svm.SVC(C=1.0))]) -############################################################################### # Plot the cross-validation score as a function of percentile of features score_means = list() score_stds = list() diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index 15a744e2aa8ca..0093bbcea2a05 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -12,16 +12,13 @@ from sklearn.svm import SVR import matplotlib.pyplot as plt -############################################################################### # Generate sample data X = np.sort(5 * np.random.rand(40, 1), axis=0) y = np.sin(X).ravel() -############################################################################### # Add noise to targets y[::5] += 3 * (0.5 - np.random.rand(8)) -############################################################################### # Fit regression model svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_lin = SVR(kernel='linear', C=1e3) @@ -30,8 +27,7 @@ y_lin = svr_lin.fit(X, y).predict(X) y_poly = svr_poly.fit(X, y).predict(X) -############################################################################### -# look at the results +# Look at the results lw = 2 plt.scatter(X, y, color='darkorange', label='data') plt.hold('on') diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py index f34bbd10cbe55..250aa8429ec82 100644 --- a/examples/text/document_classification_20newsgroups.py +++ b/examples/text/document_classification_20newsgroups.py @@ -100,7 +100,6 @@ def is_interactive(): print() -############################################################################### # Load some categories from the training set if opts.all_categories: categories = None @@ -201,7 +200,6 @@ def trim(s): return s if len(s) <= 80 else s[:77] + "..." -############################################################################### # Benchmark classifiers def benchmark(clf): print('_' * 80) diff --git a/examples/text/document_clustering.py b/examples/text/document_clustering.py index 29725cc7ccfb4..ba7a9a8a1daf1 100644 --- a/examples/text/document_clustering.py +++ b/examples/text/document_clustering.py @@ -114,7 +114,6 @@ def is_interactive(): sys.exit(1) -############################################################################### # Load some categories from the training set categories = [ 'alt.atheism', @@ -183,7 +182,6 @@ def is_interactive(): print() -############################################################################### # Do the actual clustering if opts.minibatch: From 7bbf4ae47c8610fa5e88d70f345c52eefd5cc42d Mon Sep 17 00:00:00 2001 From: plagree Date: Tue, 20 Jun 2017 10:07:45 +0200 Subject: [PATCH 3/4] Remove last notebook style example --- examples/linear_model/plot_lasso_model_selection.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index 7a36ba0672336..6b58b55956162 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -64,9 +64,8 @@ # normalize data as done by Lars to allow for comparison X /= np.sqrt(np.sum(X ** 2, axis=0)) -############################################################################## +# ############################################################################# # LassoLarsIC: least angle regression with BIC/AIC criterion -# ---------------------------------------------------------- model_bic = LassoLarsIC(criterion='bic') t1 = time.time() @@ -97,9 +96,8 @@ def plot_ic_criterion(model, name, color): plt.title('Information-criterion for model selection (training time %.3fs)' % t_bic) -############################################################################## +# ############################################################################# # LassoCV: coordinate descent -# --------------------------- # Compute paths print("Computing regularization path using the coordinate descent lasso...") @@ -127,9 +125,8 @@ def plot_ic_criterion(model, name, color): plt.axis('tight') plt.ylim(ymin, ymax) -############################################################################## +# ############################################################################# # LassoLarsCV: least angle regression -# ----------------------------------- # Compute paths print("Computing regularization path using the Lars lasso...") From 895bc522a0c6a751055cb4011f65ee08641c84f0 Mon Sep 17 00:00:00 2001 From: plagree Date: Tue, 20 Jun 2017 11:36:38 +0200 Subject: [PATCH 4/4] Space formatting to avoid notebook style --- examples/applications/plot_face_recognition.py | 6 ++++++ examples/applications/plot_model_complexity_influence.py | 2 ++ examples/applications/plot_prediction_latency.py | 2 ++ examples/applications/plot_stock_market.py | 5 +++++ examples/applications/wikipedia_principal_eigenvector.py | 2 ++ examples/calibration/plot_calibration.py | 1 + examples/calibration/plot_compare_calibration.py | 1 + examples/classification/plot_lda_qda.py | 3 +++ examples/cluster/plot_affinity_propagation.py | 3 +++ examples/cluster/plot_dbscan.py | 3 +++ examples/cluster/plot_dict_face_patches.py | 2 ++ examples/cluster/plot_face_ward_segmentation.py | 4 ++++ .../plot_feature_agglomeration_vs_univariate_selection.py | 3 +++ examples/cluster/plot_kmeans_digits.py | 1 + examples/cluster/plot_mean_shift.py | 3 +++ examples/cluster/plot_mini_batch_kmeans.py | 4 ++++ examples/cluster/plot_segmentation_toy.py | 2 ++ examples/cluster/plot_ward_structured_vs_unstructured.py | 6 ++++++ examples/covariance/plot_covariance_estimation.py | 4 ++++ examples/covariance/plot_mahalanobis_distances.py | 1 + examples/covariance/plot_sparse_cov.py | 3 +++ .../cross_decomposition/plot_compare_cross_decomposition.py | 4 ++++ examples/decomposition/plot_faces_decomposition.py | 4 ++++ examples/decomposition/plot_ica_blind_source_separation.py | 2 ++ examples/decomposition/plot_ica_vs_pca.py | 2 ++ examples/decomposition/plot_image_denoising.py | 3 +++ examples/decomposition/plot_pca_3d.py | 2 ++ examples/decomposition/plot_pca_vs_fa_model_selection.py | 2 ++ examples/ensemble/plot_gradient_boosting_regression.py | 4 ++++ examples/exercises/plot_cv_diabetes.py | 1 + examples/feature_selection/plot_feature_selection.py | 3 +++ .../plot_permutation_test_for_classification.py | 2 ++ examples/linear_model/plot_ard.py | 3 +++ examples/linear_model/plot_bayesian_ridge.py | 3 +++ examples/linear_model/plot_lasso_and_elasticnet.py | 3 +++ examples/linear_model/plot_lasso_dense_vs_sparse_data.py | 2 ++ examples/linear_model/plot_logistic_path.py | 1 + examples/linear_model/plot_multi_task_lasso_support.py | 1 + examples/linear_model/plot_ols_3d.py | 1 + examples/linear_model/plot_ridge_path.py | 2 ++ examples/linear_model/plot_theilsen.py | 2 ++ .../model_selection/grid_search_text_feature_extraction.py | 2 ++ examples/model_selection/plot_roc_crossval.py | 2 ++ examples/model_selection/plot_train_error_vs_test_error.py | 3 +++ examples/neighbors/plot_regression.py | 2 ++ .../neural_networks/plot_rbm_logistic_classification.py | 4 ++++ examples/plot_isotonic_regression.py | 2 ++ examples/plot_kernel_ridge_regression.py | 3 +++ examples/semi_supervised/plot_label_propagation_digits.py | 5 +++++ .../semi_supervised/plot_label_propagation_structure.py | 2 ++ examples/svm/plot_rbf_parameters.py | 3 +++ examples/svm/plot_svm_anova.py | 3 +++ examples/svm/plot_svm_regression.py | 4 ++++ examples/text/document_classification_20newsgroups.py | 2 ++ examples/text/document_clustering.py | 2 ++ 55 files changed, 147 insertions(+) diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py index 123c4b4bdd9b7..13a38d13bc00c 100644 --- a/examples/applications/plot_face_recognition.py +++ b/examples/applications/plot_face_recognition.py @@ -48,6 +48,7 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') +# ############################################################################# # Download the data, if not already on disk and load it as numpy arrays lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) @@ -71,6 +72,7 @@ print("n_classes: %d" % n_classes) +# ############################################################################# # Split into a training set and a test set using a stratified k fold # split into a training and testing set @@ -78,6 +80,7 @@ X, y, test_size=0.25, random_state=42) +# ############################################################################# # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150 @@ -98,6 +101,7 @@ print("done in %0.3fs" % (time() - t0)) +# ############################################################################# # Train a SVM classification model print("Fitting the classifier to the training set") @@ -111,6 +115,7 @@ print(clf.best_estimator_) +# ############################################################################# # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") @@ -122,6 +127,7 @@ print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) +# ############################################################################# # Qualitative evaluation of the predictions using matplotlib def plot_gallery(images, titles, h, w, n_row=3, n_col=4): diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py index f7df14f63b4b7..359711b995b14 100644 --- a/examples/applications/plot_model_complexity_influence.py +++ b/examples/applications/plot_model_complexity_influence.py @@ -34,6 +34,7 @@ from sklearn.linear_model.stochastic_gradient import SGDClassifier from sklearn.metrics import hamming_loss +# ############################################################################# # Routines @@ -121,6 +122,7 @@ def _count_nonzero_coefficients(estimator): a = estimator.coef_.toarray() return np.count_nonzero(a) +# ############################################################################# # Main code regression_data = generate_data('regression') classification_data = generate_data('classification', sparse=True) diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index 156a5d33ee2af..71321b4d39d6e 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -266,10 +266,12 @@ def plot_benchmark_throughput(throughputs, configuration): plt.show() +# ############################################################################# # Main code start_time = time.time() +# ############################################################################# # Benchmark bulk/atomic prediction speed for various regressors configuration = { 'n_train': int(1e3), diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index bd4fd1f5fc9c7..c7d627e8148ef 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -74,6 +74,7 @@ from sklearn import cluster, covariance, manifold +# ############################################################################# # Retrieve the data from Internet def quotes_historical_google(symbol, date1, date2): @@ -188,6 +189,7 @@ def quotes_historical_google(symbol, date1, date2): variation = close_prices - open_prices +# ############################################################################# # Learn a graphical structure from the correlations edge_model = covariance.GraphLassoCV() @@ -197,6 +199,7 @@ def quotes_historical_google(symbol, date1, date2): X /= X.std(axis=0) edge_model.fit(X) +# ############################################################################# # Cluster using affinity propagation _, labels = cluster.affinity_propagation(edge_model.covariance_) @@ -205,6 +208,7 @@ def quotes_historical_google(symbol, date1, date2): for i in range(n_labels + 1): print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i]))) +# ############################################################################# # Find a low-dimension embedding for visualization: find the best position of # the nodes (the stocks) on a 2D plane @@ -216,6 +220,7 @@ def quotes_historical_google(symbol, date1, date2): embedding = node_position_model.fit_transform(X.T).T +# ############################################################################# # Visualization plt.figure(1, facecolor='w', figsize=(10, 8)) plt.clf() diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index c0b5529d2e3f6..175c10594440e 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -52,6 +52,7 @@ print(__doc__) +# ############################################################################# # Where to download the data, if not already on disk redirects_url = "http://downloads.dbpedia.org/3.5.1/en/redirects_en.nt.bz2" redirects_filename = redirects_url.rsplit("/", 1)[1] @@ -72,6 +73,7 @@ print() +# ############################################################################# # Loading the redirect files memory = Memory(cachedir=".") diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index 174812be4d1e6..c6e3c0111b708 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -83,6 +83,7 @@ clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) +# ############################################################################# # Plot the data and the predicted probabilities plt.figure() y_unique = np.unique(y) diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py index 28624090f5da6..d935bce4f5bc2 100644 --- a/examples/calibration/plot_compare_calibration.py +++ b/examples/calibration/plot_compare_calibration.py @@ -81,6 +81,7 @@ rfc = RandomForestClassifier(n_estimators=100) +# ############################################################################# # Plot calibration plots plt.figure(figsize=(10, 10)) diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index 74744ab8348f9..c76ffc1f2c11e 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -20,6 +20,7 @@ class has its own standard deviation with QDA. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +# ############################################################################# # Colormap cmap = colors.LinearSegmentedColormap( 'red_blue_classes', @@ -29,6 +30,7 @@ class has its own standard deviation with QDA. plt.cm.register_cmap(cmap=cmap) +# ############################################################################# # Generate datasets def dataset_fixed_cov(): '''Generate 2 Gaussians samples with the same covariance matrix''' @@ -52,6 +54,7 @@ def dataset_cov(): return X, y +# ############################################################################# # Plot functions def plot_data(lda, X, y, y_pred, fig_index): splot = plt.subplot(2, 2, fig_index) diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py index edce07c7206e9..2c8fc3acc3936 100644 --- a/examples/cluster/plot_affinity_propagation.py +++ b/examples/cluster/plot_affinity_propagation.py @@ -14,11 +14,13 @@ from sklearn import metrics from sklearn.datasets.samples_generator import make_blobs +# ############################################################################# # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5, random_state=0) +# ############################################################################# # Compute Affinity Propagation af = AffinityPropagation(preference=-50).fit(X) cluster_centers_indices = af.cluster_centers_indices_ @@ -37,6 +39,7 @@ print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric='sqeuclidean')) +# ############################################################################# # Plot result import matplotlib.pyplot as plt from itertools import cycle diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py index 83c07bcbb892a..8b116ed2cfbb0 100644 --- a/examples/cluster/plot_dbscan.py +++ b/examples/cluster/plot_dbscan.py @@ -17,6 +17,7 @@ from sklearn.preprocessing import StandardScaler +# ############################################################################# # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, @@ -24,6 +25,7 @@ X = StandardScaler().fit_transform(X) +# ############################################################################# # Compute DBSCAN db = DBSCAN(eps=0.3, min_samples=10).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) @@ -44,6 +46,7 @@ print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels)) +# ############################################################################# # Plot result import matplotlib.pyplot as plt diff --git a/examples/cluster/plot_dict_face_patches.py b/examples/cluster/plot_dict_face_patches.py index ed837dbaa0cfd..ac2fde3e2cc6a 100644 --- a/examples/cluster/plot_dict_face_patches.py +++ b/examples/cluster/plot_dict_face_patches.py @@ -32,6 +32,7 @@ faces = datasets.fetch_olivetti_faces() +# ############################################################################# # Learn the dictionary of images print('Learning the dictionary... ') @@ -65,6 +66,7 @@ dt = time.time() - t0 print('done in %.2fs.' % dt) +# ############################################################################# # Plot the results plt.figure(figsize=(4.2, 4)) for i, patch in enumerate(kmeans.cluster_centers_): diff --git a/examples/cluster/plot_face_ward_segmentation.py b/examples/cluster/plot_face_ward_segmentation.py index 27b464a244641..1490b6a110388 100644 --- a/examples/cluster/plot_face_ward_segmentation.py +++ b/examples/cluster/plot_face_ward_segmentation.py @@ -25,6 +25,7 @@ from sklearn.cluster import AgglomerativeClustering +# ############################################################################# # Generate data try: # SciPy >= 0.16 have face in misc from scipy.misc import face @@ -37,9 +38,11 @@ X = np.reshape(face, (-1, 1)) +# ############################################################################# # Define the structure A of the data. Pixels connected to their neighbors. connectivity = grid_to_graph(*face.shape) +# ############################################################################# # Compute clustering print("Compute structured hierarchical clustering...") st = time.time() @@ -52,6 +55,7 @@ print("Number of pixels: ", label.size) print("Number of clusters: ", np.unique(label).size) +# ############################################################################# # Plot the results on an image plt.figure(figsize=(5, 5)) plt.imshow(face, cmap=plt.cm.gray) diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index d6f65d83f959d..0801899f70349 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -34,6 +34,7 @@ from sklearn.model_selection import GridSearchCV from sklearn.model_selection import KFold +# ############################################################################# # Generate data n_samples = 200 size = 40 # image size @@ -57,6 +58,7 @@ noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2) y += noise_coef * noise # add noise +# ############################################################################# # Compute the coefs of a Bayesian Ridge with GridSearch cv = KFold(2) # cross-validation generator for model selection ridge = BayesianRidge() @@ -86,6 +88,7 @@ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_.reshape(1, -1)) coef_selection_ = coef_.reshape(size, size) +# ############################################################################# # Inverse the transformation to plot the results on an image plt.close('all') plt.figure(figsize=(7.3, 2.7)) diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py index b08fd6b854fc4..f38eb8b4be416 100644 --- a/examples/cluster/plot_kmeans_digits.py +++ b/examples/cluster/plot_kmeans_digits.py @@ -84,6 +84,7 @@ def bench_k_means(estimator, name, data): data=data) print(82 * '_') +# ############################################################################# # Visualize the results on PCA-reduced data reduced_data = PCA(n_components=2).fit_transform(data) diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py index 2e2c75b962688..730c820c48345 100644 --- a/examples/cluster/plot_mean_shift.py +++ b/examples/cluster/plot_mean_shift.py @@ -16,10 +16,12 @@ from sklearn.cluster import MeanShift, estimate_bandwidth from sklearn.datasets.samples_generator import make_blobs +# ############################################################################# # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6) +# ############################################################################# # Compute clustering with MeanShift # The following bandwidth can be automatically detected using @@ -35,6 +37,7 @@ print("number of estimated clusters : %d" % n_clusters_) +# ############################################################################# # Plot result import matplotlib.pyplot as plt from itertools import cycle diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py index 9c650be0a44e2..9f84566a3c3a7 100644 --- a/examples/cluster/plot_mini_batch_kmeans.py +++ b/examples/cluster/plot_mini_batch_kmeans.py @@ -23,6 +23,7 @@ from sklearn.metrics.pairwise import pairwise_distances_argmin from sklearn.datasets.samples_generator import make_blobs +# ############################################################################# # Generate sample data np.random.seed(0) @@ -31,6 +32,7 @@ n_clusters = len(centers) X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7) +# ############################################################################# # Compute clustering with Means k_means = KMeans(init='k-means++', n_clusters=3, n_init=10) @@ -38,6 +40,7 @@ k_means.fit(X) t_batch = time.time() - t0 +# ############################################################################# # Compute clustering with MiniBatchKMeans mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size, @@ -46,6 +49,7 @@ mbk.fit(X) t_mini_batch = time.time() - t0 +# ############################################################################# # Plot result fig = plt.figure(figsize=(8, 3)) diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index fe24304e5b8cc..aa66c811eda8d 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -51,6 +51,7 @@ circle3 = (x - center3[0]) ** 2 + (y - center3[1]) ** 2 < radius3 ** 2 circle4 = (x - center4[0]) ** 2 + (y - center4[1]) ** 2 < radius4 ** 2 +# ############################################################################# # 4 circles img = circle1 + circle2 + circle3 + circle4 @@ -79,6 +80,7 @@ plt.matshow(img) plt.matshow(label_im) +# ############################################################################# # 2 circles img = circle1 + circle2 mask = img.astype(bool) diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py index 102a78ba82eea..fa804d1e50335 100644 --- a/examples/cluster/plot_ward_structured_vs_unstructured.py +++ b/examples/cluster/plot_ward_structured_vs_unstructured.py @@ -33,6 +33,7 @@ from sklearn.cluster import AgglomerativeClustering from sklearn.datasets.samples_generator import make_swiss_roll +# ############################################################################# # Generate data (swiss roll dataset) n_samples = 1500 noise = 0.05 @@ -40,6 +41,7 @@ # Make it thinner X[:, 1] *= .5 +# ############################################################################# # Compute clustering print("Compute unstructured hierarchical clustering...") st = time.time() @@ -49,6 +51,7 @@ print("Elapsed time: %.2fs" % elapsed_time) print("Number of points: %i" % label.size) +# ############################################################################# # Plot result fig = plt.figure() ax = p3.Axes3D(fig) @@ -59,10 +62,12 @@ plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time) +# ############################################################################# # Define the structure A of the data. Here a 10 nearest neighbors from sklearn.neighbors import kneighbors_graph connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False) +# ############################################################################# # Compute clustering print("Compute structured hierarchical clustering...") st = time.time() @@ -73,6 +78,7 @@ print("Elapsed time: %.2fs" % elapsed_time) print("Number of points: %i" % label.size) +# ############################################################################# # Plot result fig = plt.figure() ax = p3.Axes3D(fig) diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index 8a663fec43fc8..adb57f003cfbb 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -52,6 +52,7 @@ from sklearn.model_selection import GridSearchCV +# ############################################################################# # Generate sample data n_features, n_samples = 40, 20 np.random.seed(42) @@ -63,6 +64,7 @@ X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) +# ############################################################################# # Compute the likelihood on test data # spanning a range of possible shrinkage coefficient values @@ -76,6 +78,7 @@ emp_cov = empirical_covariance(X_train) loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) +# ############################################################################# # Compare different approaches to setting the parameter # GridSearch for an optimal shrinkage coefficient @@ -91,6 +94,7 @@ oa = OAS() loglik_oa = oa.fit(X_train).score(X_test) +# ############################################################################# # Plot results fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index 166252fc7f61f..21f295ce58305 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -78,6 +78,7 @@ # compare estimators learnt from the full data set with true parameters emp_cov = EmpiricalCovariance().fit(X) +# ############################################################################# # Display results fig = plt.figure() plt.subplots_adjust(hspace=-.1, wspace=.4, top=.95, bottom=.05) diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py index 8d42e7aaef929..1d6782cb43ef8 100644 --- a/examples/covariance/plot_sparse_cov.py +++ b/examples/covariance/plot_sparse_cov.py @@ -59,6 +59,7 @@ from sklearn.covariance import GraphLassoCV, ledoit_wolf import matplotlib.pyplot as plt +# ############################################################################# # Generate the data n_samples = 60 n_features = 20 @@ -78,6 +79,7 @@ X -= X.mean(axis=0) X /= X.std(axis=0) +# ############################################################################# # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples @@ -89,6 +91,7 @@ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) +# ############################################################################# # Plot the results plt.figure(figsize=(10, 6)) plt.subplots_adjust(left=0.02, right=0.98) diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py index 65a2980d746e6..4a123c04b03a4 100644 --- a/examples/cross_decomposition/plot_compare_cross_decomposition.py +++ b/examples/cross_decomposition/plot_compare_cross_decomposition.py @@ -24,6 +24,7 @@ import matplotlib.pyplot as plt from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA +# ############################################################################# # Dataset based latent variables model n = 500 @@ -45,6 +46,7 @@ print("Corr(Y)") print(np.round(np.corrcoef(Y.T), 2)) +# ############################################################################# # Canonical (symmetric) PLS # Transform data @@ -104,6 +106,7 @@ plt.yticks(()) plt.show() +# ############################################################################# # PLS regression, with multivariate response, a.k.a. PLS2 n = 1000 @@ -135,6 +138,7 @@ print("Estimated betas") print(np.round(pls1.coef_, 1)) +# ############################################################################# # CCA (PLS mode B with symmetric deflation) cca = CCA(n_components=2) diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index 2b84bbf4374b0..d29af6ad408fb 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -32,6 +32,7 @@ image_shape = (64, 64) rng = RandomState(0) +# ############################################################################# # Load faces data dataset = fetch_olivetti_faces(shuffle=True, random_state=rng) faces = dataset.data @@ -60,6 +61,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row): plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) +# ############################################################################# # List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ @@ -99,10 +101,12 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row): ] +# ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) +# ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py index 31b3e8a560605..fb7689064dd06 100644 --- a/examples/decomposition/plot_ica_blind_source_separation.py +++ b/examples/decomposition/plot_ica_blind_source_separation.py @@ -21,6 +21,7 @@ from sklearn.decomposition import FastICA, PCA +# ############################################################################# # Generate sample data np.random.seed(0) n_samples = 2000 @@ -50,6 +51,7 @@ pca = PCA(n_components=3) H = pca.fit_transform(X) # Reconstruct signals based on orthogonal components +# ############################################################################# # Plot results plt.figure() diff --git a/examples/decomposition/plot_ica_vs_pca.py b/examples/decomposition/plot_ica_vs_pca.py index cf9ce0925204a..f9ef968babeb1 100644 --- a/examples/decomposition/plot_ica_vs_pca.py +++ b/examples/decomposition/plot_ica_vs_pca.py @@ -37,6 +37,7 @@ from sklearn.decomposition import PCA, FastICA +# ############################################################################# # Generate sample data rng = np.random.RandomState(42) S = rng.standard_t(1.5, size=(20000, 2)) @@ -56,6 +57,7 @@ S_ica_ /= S_ica_.std(axis=0) +# ############################################################################# # Plot results def plot_samples(S, axis_list=None): diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py index 68e62a9b6e305..33a394a856c91 100644 --- a/examples/decomposition/plot_image_denoising.py +++ b/examples/decomposition/plot_image_denoising.py @@ -74,6 +74,7 @@ data /= np.std(data, axis=0) print('done in %.2fs.' % (time() - t0)) +# ############################################################################# # Learn the dictionary from reference patches print('Learning the dictionary...') @@ -96,6 +97,7 @@ plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) +# ############################################################################# # Display the distorted image def show_with_diff(image, reference, title): @@ -120,6 +122,7 @@ def show_with_diff(image, reference, title): show_with_diff(distorted, face, 'Distorted image') +# ############################################################################# # Extract noisy patches and reconstruct them using the dictionary print('Extracting noisy patches... ') diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py index 06592c0e6b221..d9db17ffaec39 100644 --- a/examples/decomposition/plot_pca_3d.py +++ b/examples/decomposition/plot_pca_3d.py @@ -26,6 +26,7 @@ from scipy import stats +# ############################################################################# # Create the data e = np.exp(1) @@ -54,6 +55,7 @@ def pdf(x): b /= norm +# ############################################################################# # Plot the figures def plot_figs(fig_num, elev, azim): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py index 5601f9cdd7798..b858434d910e3 100644 --- a/examples/decomposition/plot_pca_vs_fa_model_selection.py +++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py @@ -39,6 +39,7 @@ print(__doc__) +# ############################################################################# # Create the data n_samples, n_features, rank = 1000, 50, 10 @@ -54,6 +55,7 @@ sigmas = sigma * rng.rand(n_features) + sigma / 2. X_hetero = X + rng.randn(n_samples, n_features) * sigmas +# ############################################################################# # Fit the models n_components = np.arange(0, n_features, 5) # options for n_components diff --git a/examples/ensemble/plot_gradient_boosting_regression.py b/examples/ensemble/plot_gradient_boosting_regression.py index 468c338f42f2f..9285f8dae0eea 100644 --- a/examples/ensemble/plot_gradient_boosting_regression.py +++ b/examples/ensemble/plot_gradient_boosting_regression.py @@ -22,6 +22,7 @@ from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error +# ############################################################################# # Load data boston = datasets.load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) @@ -30,6 +31,7 @@ X_train, y_train = X[:offset], y[:offset] X_test, y_test = X[offset:], y[offset:] +# ############################################################################# # Fit regression model params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.01, 'loss': 'ls'} @@ -39,6 +41,7 @@ mse = mean_squared_error(y_test, clf.predict(X_test)) print("MSE: %.4f" % mse) +# ############################################################################# # Plot training deviance # compute test set deviance @@ -58,6 +61,7 @@ plt.xlabel('Boosting Iterations') plt.ylabel('Deviance') +# ############################################################################# # Plot feature importance feature_importance = clf.feature_importances_ # make importances relative to max importance diff --git a/examples/exercises/plot_cv_diabetes.py b/examples/exercises/plot_cv_diabetes.py index 0b41315069b2b..76b0d81b8998c 100644 --- a/examples/exercises/plot_cv_diabetes.py +++ b/examples/exercises/plot_cv_diabetes.py @@ -52,6 +52,7 @@ plt.axhline(np.max(scores), linestyle='--', color='.5') plt.xlim([alphas[0], alphas[-1]]) +# ############################################################################# # Bonus: how much can you trust the selection of alpha? # To answer this question we use the LassoCV object that sets its alpha diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index 73badff72be2e..59ed716660341 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -27,6 +27,7 @@ from sklearn import datasets, svm from sklearn.feature_selection import SelectPercentile, f_classif +# ############################################################################# # Import some data to play with # The iris dataset @@ -44,6 +45,7 @@ X_indices = np.arange(X.shape[-1]) +# ############################################################################# # Univariate feature selection with F-test for feature scoring # We use the default selection function: the 10% most significant features selector = SelectPercentile(f_classif, percentile=10) @@ -54,6 +56,7 @@ label=r'Univariate score ($-Log(p_{value})$)', color='darkorange', edgecolor='black') +# ############################################################################# # Compare to the weights of an SVM clf = svm.SVC(kernel='linear') clf.fit(X, y) diff --git a/examples/feature_selection/plot_permutation_test_for_classification.py b/examples/feature_selection/plot_permutation_test_for_classification.py index 84b1c5a3fca4e..095f743d40803 100644 --- a/examples/feature_selection/plot_permutation_test_for_classification.py +++ b/examples/feature_selection/plot_permutation_test_for_classification.py @@ -25,6 +25,7 @@ from sklearn import datasets +# ############################################################################# # Loading a dataset iris = datasets.load_iris() X = iris.data @@ -46,6 +47,7 @@ print("Classification score %s (pvalue : %s)" % (score, pvalue)) +# ############################################################################# # View histogram of permutation scores plt.hist(permutation_scores, 20, label='Permutation scores', edgecolor='black') diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index 9947b110365cb..38c334a217df5 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -30,6 +30,7 @@ from sklearn.linear_model import ARDRegression, LinearRegression +# ############################################################################# # Generating simulated data with Gaussian weights # Parameters of the example @@ -50,6 +51,7 @@ # Create the target y = np.dot(X, w) + noise +# ############################################################################# # Fit the ARD Regression clf = ARDRegression(compute_score=True) clf.fit(X, y) @@ -57,6 +59,7 @@ ols = LinearRegression() ols.fit(X, y) +# ############################################################################# # Plot the true weights, the estimated weights, the histogram of the # weights, and predictions with standard deviations plt.figure(figsize=(6, 5)) diff --git a/examples/linear_model/plot_bayesian_ridge.py b/examples/linear_model/plot_bayesian_ridge.py index c0a4bd8cdcfe7..4359c421ea866 100644 --- a/examples/linear_model/plot_bayesian_ridge.py +++ b/examples/linear_model/plot_bayesian_ridge.py @@ -30,6 +30,7 @@ from sklearn.linear_model import BayesianRidge, LinearRegression +# ############################################################################# # Generating simulated data with Gaussian weights np.random.seed(0) n_samples, n_features = 100, 100 @@ -47,6 +48,7 @@ # Create the target y = np.dot(X, w) + noise +# ############################################################################# # Fit the Bayesian Ridge Regression and an OLS for comparison clf = BayesianRidge(compute_score=True) clf.fit(X, y) @@ -54,6 +56,7 @@ ols = LinearRegression() ols.fit(X, y) +# ############################################################################# # Plot true weights, estimated weights, histogram of the weights, and # predictions with standard deviations lw = 2 diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index 1c385a162467b..350cac0a0ad95 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -15,6 +15,7 @@ from sklearn.metrics import r2_score +# ############################################################################# # Generate some sparse data to play with np.random.seed(42) @@ -34,6 +35,7 @@ X_train, y_train = X[:n_samples // 2], y[:n_samples // 2] X_test, y_test = X[n_samples // 2:], y[n_samples // 2:] +# ############################################################################# # Lasso from sklearn.linear_model import Lasso @@ -45,6 +47,7 @@ print(lasso) print("r^2 on test data : %f" % r2_score_lasso) +# ############################################################################# # ElasticNet from sklearn.linear_model import ElasticNet diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py index bc1d9d2b561c2..c54f81d1b8bcd 100644 --- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py +++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py @@ -17,6 +17,7 @@ from sklearn.linear_model import Lasso +# ############################################################################# # The two Lasso implementations on Dense data print("--- Dense matrices") @@ -38,6 +39,7 @@ print("Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_)) +# ############################################################################# # The two Lasso implementations on Sparse data print("--- Sparse matrices") diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py index 220811259f4ac..66a1ab9bd0254 100644 --- a/examples/linear_model/plot_logistic_path.py +++ b/examples/linear_model/plot_logistic_path.py @@ -29,6 +29,7 @@ X -= np.mean(X, 0) +# ############################################################################# # Demo path functions cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3) diff --git a/examples/linear_model/plot_multi_task_lasso_support.py b/examples/linear_model/plot_multi_task_lasso_support.py index 58f315b401770..c7a9536383bc2 100644 --- a/examples/linear_model/plot_multi_task_lasso_support.py +++ b/examples/linear_model/plot_multi_task_lasso_support.py @@ -39,6 +39,7 @@ coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T]) coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.).fit(X, Y).coef_ +# ############################################################################# # Plot support and time series fig = plt.figure(figsize=(8, 5)) plt.subplot(1, 2, 1) diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index e9af33303ad00..d8b0f2b52aa22 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -37,6 +37,7 @@ ols.fit(X_train, y_train) +# ############################################################################# # Plot the figure def plot_figs(fig_num, elev, azim, X_train, clf): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py index 6164932996b08..b16212cbd3718 100644 --- a/examples/linear_model/plot_ridge_path.py +++ b/examples/linear_model/plot_ridge_path.py @@ -39,6 +39,7 @@ X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis]) y = np.ones(10) +# ############################################################################# # Compute paths n_alphas = 200 @@ -50,6 +51,7 @@ ridge.fit(X, y) coefs.append(ridge.coef_) +# ############################################################################# # Display results ax = plt.gca() diff --git a/examples/linear_model/plot_theilsen.py b/examples/linear_model/plot_theilsen.py index e87b64d3c12c0..c80b4a409937b 100644 --- a/examples/linear_model/plot_theilsen.py +++ b/examples/linear_model/plot_theilsen.py @@ -51,6 +51,7 @@ colors = {'OLS': 'turquoise', 'Theil-Sen': 'gold', 'RANSAC': 'lightgreen'} lw = 2 +# ############################################################################# # Outliers only in the y direction np.random.seed(0) @@ -79,6 +80,7 @@ plt.legend(loc='upper left') plt.title("Corrupt y") +# ############################################################################# # Outliers in the X direction np.random.seed(0) diff --git a/examples/model_selection/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py index 8622b1b021e52..bc26ca0719265 100644 --- a/examples/model_selection/grid_search_text_feature_extraction.py +++ b/examples/model_selection/grid_search_text_feature_extraction.py @@ -67,6 +67,7 @@ format='%(asctime)s %(levelname)s %(message)s') +# ############################################################################# # Load some categories from the training set categories = [ 'alt.atheism', @@ -83,6 +84,7 @@ print("%d categories" % len(data.target_names)) print() +# ############################################################################# # Define a pipeline combining a text feature extractor with a simple # classifier pipeline = Pipeline([ diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index 6ecae65877fce..eb4664049ee77 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -40,6 +40,7 @@ from sklearn.metrics import roc_curve, auc from sklearn.model_selection import StratifiedKFold +# ############################################################################# # Data IO and generation # Import some data to play with @@ -53,6 +54,7 @@ random_state = np.random.RandomState(0) X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] +# ############################################################################# # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 26e64f79660d9..4a1654d228f0f 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -19,6 +19,7 @@ import numpy as np from sklearn import linear_model +# ############################################################################# # Generate sample data n_samples_train, n_samples_test, n_features = 75, 150, 500 np.random.seed(0) @@ -31,6 +32,7 @@ X_train, X_test = X[:n_samples_train], X[n_samples_train:] y_train, y_test = y[:n_samples_train], y[n_samples_train:] +# ############################################################################# # Compute train and test errors alphas = np.logspace(-5, 1, 60) enet = linear_model.ElasticNet(l1_ratio=0.7) @@ -50,6 +52,7 @@ enet.set_params(alpha=alpha_optim) coef_ = enet.fit(X, y).coef_ +# ############################################################################# # Plot results functions import matplotlib.pyplot as plt diff --git a/examples/neighbors/plot_regression.py b/examples/neighbors/plot_regression.py index 89c730ec877a8..28c593ceeaf34 100644 --- a/examples/neighbors/plot_regression.py +++ b/examples/neighbors/plot_regression.py @@ -16,6 +16,7 @@ # License: BSD 3 clause (C) INRIA +# ############################################################################# # Generate sample data import numpy as np import matplotlib.pyplot as plt @@ -29,6 +30,7 @@ # Add noise to targets y[::5] += 1 * (0.5 - np.random.rand(8)) +# ############################################################################# # Fit regression model n_neighbors = 5 diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index 0fd6a075353cd..aa75ccc06d1f1 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -42,6 +42,7 @@ from sklearn.pipeline import Pipeline +# ############################################################################# # Setting up def nudge_dataset(X, Y): @@ -90,6 +91,7 @@ def nudge_dataset(X, Y): classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)]) +# ############################################################################# # Training # Hyper-parameters. These were set by cross-validation, @@ -109,6 +111,7 @@ def nudge_dataset(X, Y): logistic_classifier = linear_model.LogisticRegression(C=100.0) logistic_classifier.fit(X_train, Y_train) +# ############################################################################# # Evaluation print() @@ -122,6 +125,7 @@ def nudge_dataset(X, Y): Y_test, logistic_classifier.predict(X_test)))) +# ############################################################################# # Plotting plt.figure(figsize=(4.2, 4)) diff --git a/examples/plot_isotonic_regression.py b/examples/plot_isotonic_regression.py index bac13ac414903..fd076b5afad62 100644 --- a/examples/plot_isotonic_regression.py +++ b/examples/plot_isotonic_regression.py @@ -30,6 +30,7 @@ rs = check_random_state(0) y = rs.randint(-50, 50, size=(n,)) + 50. * np.log(1 + np.arange(n)) +# ############################################################################# # Fit IsotonicRegression and LinearRegression models ir = IsotonicRegression() @@ -39,6 +40,7 @@ lr = LinearRegression() lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression +# ############################################################################# # Plot result segments = [[[i, y[i]], [i, y_[i]]] for i in range(n)] diff --git a/examples/plot_kernel_ridge_regression.py b/examples/plot_kernel_ridge_regression.py index 6ad422227bb21..cb91908ed5f89 100644 --- a/examples/plot_kernel_ridge_regression.py +++ b/examples/plot_kernel_ridge_regression.py @@ -48,6 +48,7 @@ rng = np.random.RandomState(0) +# ############################################################################# # Generate sample data X = 5 * rng.rand(10000, 1) y = np.sin(X).ravel() @@ -57,6 +58,7 @@ X_plot = np.linspace(0, 5, 100000)[:, None] +# ############################################################################# # Fit regression model train_size = 100 svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5, @@ -95,6 +97,7 @@ % (X_plot.shape[0], kr_predict)) +# ############################################################################# # Look at the results sv_ind = svr.best_estimator_.support_ plt.scatter(X[sv_ind], y[sv_ind], c='r', s=50, label='SVR support vectors', diff --git a/examples/semi_supervised/plot_label_propagation_digits.py b/examples/semi_supervised/plot_label_propagation_digits.py index 1590bf0bf9120..6b15fc21629bd 100644 --- a/examples/semi_supervised/plot_label_propagation_digits.py +++ b/examples/semi_supervised/plot_label_propagation_digits.py @@ -45,10 +45,12 @@ class will be very good. unlabeled_set = indices[n_labeled_points:] +# ############################################################################# # Shuffle everything around y_train = np.copy(y) y_train[unlabeled_set] = -1 +# ############################################################################# # Learn with LabelSpreading lp_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5) lp_model.fit(X, y_train) @@ -65,12 +67,15 @@ class will be very good. print("Confusion matrix") print(cm) +# ############################################################################# # Calculate uncertainty values for each transduced distribution pred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T) +# ############################################################################# # Pick the top 10 most uncertain labels uncertainty_index = np.argsort(pred_entropies)[-10:] +# ############################################################################# # Plot f = plt.figure(figsize=(7, 5)) for index, image_index in enumerate(uncertainty_index): diff --git a/examples/semi_supervised/plot_label_propagation_structure.py b/examples/semi_supervised/plot_label_propagation_structure.py index 8a1fa6e24c172..7cc15d73f1b89 100644 --- a/examples/semi_supervised/plot_label_propagation_structure.py +++ b/examples/semi_supervised/plot_label_propagation_structure.py @@ -28,10 +28,12 @@ labels[0] = outer labels[-1] = inner +# ############################################################################# # Learn with LabelSpreading label_spread = label_propagation.LabelSpreading(kernel='knn', alpha=1.0) label_spread.fit(X, labels) +# ############################################################################# # Plot output labels output_labels = label_spread.transduction_ plt.figure(figsize=(8.5, 4)) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 045f7cf245b99..3a909b2b422bf 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -91,6 +91,7 @@ def __call__(self, value, clip=None): x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1] return np.ma.masked_array(np.interp(value, x, y)) +# ############################################################################# # Load and prepare data set # # dataset for grid search @@ -117,6 +118,7 @@ def __call__(self, value, clip=None): X = scaler.fit_transform(X) X_2d = scaler.fit_transform(X_2d) +# ############################################################################# # Train classifiers # # For an initial search, a logarithmic grid with basis @@ -145,6 +147,7 @@ def __call__(self, value, clip=None): clf.fit(X_2d, y_2d) classifiers.append((C, gamma, clf)) +# ############################################################################# # Visualization # # draw visualization of parameter effects diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index d8b54cd306c1d..e223730eb82bf 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -14,6 +14,7 @@ from sklearn.model_selection import cross_val_score from sklearn.pipeline import Pipeline +# ############################################################################# # Import some data to play with digits = datasets.load_digits() y = digits.target @@ -25,6 +26,7 @@ # add 200 non-informative features X = np.hstack((X, 2 * np.random.random((n_samples, 200)))) +# ############################################################################# # Create a feature-selection transform and an instance of SVM that we # combine together to have an full-blown estimator @@ -32,6 +34,7 @@ clf = Pipeline([('anova', transform), ('svc', svm.SVC(C=1.0))]) +# ############################################################################# # Plot the cross-validation score as a function of percentile of features score_means = list() score_stds = list() diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index 0093bbcea2a05..e46675eb0e069 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -12,13 +12,16 @@ from sklearn.svm import SVR import matplotlib.pyplot as plt +# ############################################################################# # Generate sample data X = np.sort(5 * np.random.rand(40, 1), axis=0) y = np.sin(X).ravel() +# ############################################################################# # Add noise to targets y[::5] += 3 * (0.5 - np.random.rand(8)) +# ############################################################################# # Fit regression model svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_lin = SVR(kernel='linear', C=1e3) @@ -27,6 +30,7 @@ y_lin = svr_lin.fit(X, y).predict(X) y_poly = svr_poly.fit(X, y).predict(X) +# ############################################################################# # Look at the results lw = 2 plt.scatter(X, y, color='darkorange', label='data') diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py index 250aa8429ec82..4781d28043e21 100644 --- a/examples/text/document_classification_20newsgroups.py +++ b/examples/text/document_classification_20newsgroups.py @@ -100,6 +100,7 @@ def is_interactive(): print() +# ############################################################################# # Load some categories from the training set if opts.all_categories: categories = None @@ -200,6 +201,7 @@ def trim(s): return s if len(s) <= 80 else s[:77] + "..." +# ############################################################################# # Benchmark classifiers def benchmark(clf): print('_' * 80) diff --git a/examples/text/document_clustering.py b/examples/text/document_clustering.py index ba7a9a8a1daf1..58e0e25a89cff 100644 --- a/examples/text/document_clustering.py +++ b/examples/text/document_clustering.py @@ -114,6 +114,7 @@ def is_interactive(): sys.exit(1) +# ############################################################################# # Load some categories from the training set categories = [ 'alt.atheism', @@ -182,6 +183,7 @@ def is_interactive(): print() +# ############################################################################# # Do the actual clustering if opts.minibatch: