10000 DOC simpler block delimitation in sphinx-gallery examples (#17068) · viclafargue/scikit-learn@4ad9c06 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4ad9c06

Browse files
rththomasjpfan
authored andcommitted
DOC simpler block delimitation in sphinx-gallery examples (scikit-learn#17068)
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
1 parent 20afa6b commit 4ad9c06

34 files changed

+204
-208
lines changed

examples/applications/plot_out_of_core_classification.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def _not_in_sphinx():
4343
# Hack to detect whether we are running by the sphinx builder
4444
return '__file__' in globals()
4545

46-
###############################################################################
46+
# %%
4747
# Reuters Dataset related routines
4848
# --------------------------------
4949
#
@@ -178,7 +178,7 @@ def progress(blocknum, bs, size):
178178
yield doc
179179

180180

181-
###############################################################################
181+
# %%
182182
# Main
183183
# ----
184184
#
@@ -311,7 +311,7 @@ def progress(cls_name, stats):
311311
print('\n')
312312

313313

314-
###############################################################################
314+
# %%
315315
# Plot results
316316
# ------------
317317
#

examples/applications/plot_outlier_detection_wine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494

9595
plt.show()
9696

97-
##############################################################################
97+
# %%
9898
# Second example
9999
# --------------
100100
# The second example shows the ability of the Minimum Covariance Determinant

examples/cluster/plot_coin_segmentation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
# installed)
6767
N_REGIONS = 25
6868

69-
#############################################################################
69+
# %%
7070
# Visualize the resulting regions
7171

7272
for assign_labels in ('kmeans', 'discretize'):

examples/cluster/plot_linkage_comparison.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
np.random.seed(0)
3737

38-
######################################################################
38+
# %%
3939
# Generate datasets. We choose the size big enough to see the scalability
4040
# of the algorithms, but not too big to avoid too long running times
4141

@@ -58,7 +58,7 @@
5858
cluster_std=[1.0, 2.5, 0.5],
5959
random_state=random_state)
6060

61-
######################################################################
61+
# %%
6262
# Run the clustering and plot
6363

6464
# Set up cluster parameters

examples/compose/plot_column_transformer_mixed_types.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
# X = titanic.frame.drop('survived', axis=1)
4545
# y = titanic.frame['survived']
4646

47-
###############################################################################
47+
# %%
4848
# Use ``ColumnTransformer`` by selecting column by names
4949
###############################################################################
5050
# We will train our classifier with the following features:
@@ -90,7 +90,7 @@
9090
clf.fit(X_train, y_train)
9191
print("model score: %.3f" % clf.score(X_test, y_test))
9292
93-
##############################################################################
93+
# %%
9494
# HTML representation of ``Pipeline``
9595
###############################################################################
9696
# When the ``Pipeline`` is printed out in a jupyter notebook an HTML
@@ -100,7 +100,7 @@
100100
set_config(display='diagram')
101101
clf
102102

103-
###############################################################################
103+
# %%
104104
# Use ``ColumnTransformer`` by selecting column by data types
105105
###############################################################################
106106
# When dealing with a cleaned dataset, the preprocessing can be automatic by
@@ -113,19 +113,19 @@
113113
subset_feature = ['embarked', 'sex', 'pclass', 'age', 'fare']
114114
X_train, X_test = X_train[subset_feature], X_test[subset_feature]
115115

116-
###############################################################################
116+
# %%
117117
# Then, we introspect the information regarding each column data type.
118118

119119
X_train.info()
120120

121-
###############################################################################
121+
# %%
122122
# We can observe that the `embarked` and `sex` columns were tagged as
123123
# `category` columns when loading the data with ``fetch_openml``. Therefore, we
124124
# can use this information to dispatch the categorical columns to the
125125
# ``categorical_transformer`` and the remaining columns to the
126126
# ``numerical_transformer``.
127127

128-
###############################################################################
128+
# %%
129129
# .. note:: In practice, you will have to handle yourself the column data type.
130130
# If you want some columns to be considered as `category`, you will have to
131131
# convert them into categorical columns. If you are using pandas, you can
@@ -145,20 +145,20 @@
145145
clf.fit(X_train, y_train)
146146
print("model score: %.3f" % clf.score(X_test, y_test))
147147

148-
###############################################################################
148+
# %%
149149
# The resulting score is not exactly the same as the one from the previous
150150
# pipeline becase the dtype-based selector treats the ``pclass`` columns as
151151
# a numeric features instead of a categorical feature as previously:
152152

153153
selector(dtype_exclude="category")(X_train)
154154

155-
###############################################################################
155+
# %%
156156

157157
selector(dtype_include="category")(X_train)
158158

159-
###############################################################################
159+
# %%
160160
# Using the prediction pipeline in a grid search
161-
###############################################################################
161+
##############################################################################
162162
# Grid search can also be performed on the different preprocessing steps
163163
# defined in the ``ColumnTransformer`` object, together with the classifier's
164164
# hyperparameters as part of the ``Pipeline``.
@@ -174,7 +174,7 @@
174174
grid_search = GridSearchCV(clf, param_grid, cv=10)
175175
grid_search
176176

177-
###############################################################################
177+
# %%
178178
# Calling 'fit' triggers the cross-validated search for the best
179179
# hyper-parameters combination:
180180
#
@@ -183,11 +183,11 @@
183183
print(f"Best params:")
184184
print(grid_search.best_params_)
185185

186-
###############################################################################
186+
# %%
187187
# The internal cross-validation scores obtained by those parameters is:
188188
print(f"Internal CV score: {grid_search.best_score_:.3f}")
189189

190-
###############################################################################
190+
# %%
191191
# We can also introspect the top grid search results as a pandas dataframe:
192192
import pandas as pd
193193

@@ -198,7 +198,7 @@
198198
"param_classifier__C"
199199
]].head(5)
200200

201-
###############################################################################
201+
# %%
202202
# The best hyper-parameters have be used to re-fit a final model on the full
203203
# training set. We can evaluate that final model on held out test data that was
204204
# not used for hyparameter tuning.

examples/compose/plot_compare_reduction.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
Note that the use of ``memory`` to enable caching becomes interesting when the
2121
fitting of a transformer is costly.
2222
23-
###############################################################################
23+
# %%
2424
Illustration of ``Pipeline`` and ``GridSearchCV``
2525
###############################################################################
2626
@@ -89,7 +89,7 @@
8989

9090
plt.show()
9191

92-
###############################################################################
92+
# %%
9393
# Caching transformers within a ``Pipeline``
9494
###############################################################################
9595
# It is sometimes worthwhile storing the state of a specific transformer
@@ -119,7 +119,7 @@
119119
memory.clear(warn=False)
120120
rmtree(location)
121121

122-
###############################################################################
122+
# %%
123123
# The ``PCA`` fitting is only computed at the evaluation of the first
124124
# configuration of the ``C`` parameter of the ``LinearSVC`` classifier. The
125125
# other configurations of ``C`` will trigger the loading of the cached ``PCA``

examples/compose/plot_transformed_target.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from sklearn.compose import TransformedTargetRegressor
2828
from sklearn.metrics import median_absolute_error, r2_score
2929

30-
###############################################################################
30+
# %%
3131
# Synthetic example
3232
##############################################################################
3333

@@ -37,7 +37,7 @@
3737
else:
3838
density_param = {'normed': True}
3939

40-
###############################################################################
40+
# %%
4141
# A synthetic random regression dataset is generated. The targets ``y`` are
4242
# modified by:
4343
#
@@ -54,7 +54,7 @@
5454
y = np.expm1((y + abs(y.min())) / 200)
5555
y_trans = np.log1p(y)
5656

57-
###############################################################################
57+
# %%
5858
# Below we plot the probability density functions of the target
5959
# before and after applying the logarithmic functions.
6060

@@ -76,7 +76,7 @@
7676

7777
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
7878

79-
###############################################################################
79+
# %%
8080
# At first, a linear model will be applied on the original targets. Due to the
8181
# non-linearity, the model trained will not be precise during
8282
# prediction. Subsequently, a logarithmic function is used to linearize the
@@ -118,11 +118,10 @@
118118
f.suptitle("Synthetic data", y=0.035)
119119
f.tight_layout(rect=[0.05, 0.05, 0.95, 0.95])
120120

121-
###############################################################################
121+
# %%
122122
# Real-world data set
123123
###############################################################################
124-
125-
###############################################################################
124+
#
126125
# In a similar manner, the Ames housing data set is used to show the impact
127126
# of transforming the targets before learning a model. In this example, the
128127
# target to be predicted is the selling price of each house.
@@ -140,8 +139,7 @@
140139
n_quantiles=900,
141140
output_distribution='normal',
142141
copy=True).squeeze()
143-
144-
###############################################################################
142+
# %%
145143
# A :class:`~sklearn.preprocessing.QuantileTransformer` is used to normalize
146144
# the target distribution before applying a
147145
# :class:`~sklearn.linear_model.RidgeCV` model.
@@ -164,7 +162,7 @@
164162

165163
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
166164

167-
###############################################################################
165+
# %%
168166
# The effect of the transformer is weaker than on the synthetic data. However,
169167
# the transformation results in an increase in :math:`R^2` and large decrease
170168
# of the MAE. The residual plot (predicted target - true target vs predicted

examples/covariance/plot_lw_vs_oas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from sklearn.covariance import LedoitWolf, OAS
3030

3131
np.random.seed(0)
32-
###############################################################################
32+
# %%
3333
n_features = 100
3434
# simulation covariance matrix (AR(1) process)
3535
r = 0.1

examples/ensemble/plot_gradient_boosting_early_stopping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
index = np.arange(0, n * bar_width, bar_width) * 2.5
9292
index = index[0:n]
9393

94-
#######################################################################
94+
# %%
9595
# Compare scores with and without early stopping
9696
# ----------------------------------------------
9797

@@ -129,7 +129,7 @@ def autolabel(rects, n_estimators):
129129
plt.show()
130130

131131

132-
#######################################################################
132+
# %%
133133
# Compare fit times with and without early stopping
134134
# -------------------------------------------------
135135

examples/ensemble/plot_gradient_boosting_regression.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from sklearn.metrics import mean_squared_error
2929
from sklearn.model_selection import train_test_split
3030

31-
##############################################################################
31+
# %%
3232
# Load the data
3333
# -------------------------------------
3434
#
@@ -37,7 +37,7 @@
3737
diabetes = datasets.load_diabetes()
3838
X, y = diabetes.data, diabetes.target
3939

40-
##############################################################################
40+
# %%
4141
# Data preprocessing
4242
# -------------------------------------
4343
#
@@ -69,7 +69,7 @@
6969
'learning_rate': 0.01,
7070
'loss': 'ls'}
7171

72-
##############################################################################
72+
# %%
7373
# Fit regression model
7474
# -------------------------------------
7575
#
@@ -82,7 +82,7 @@
8282
mse = mean_squared_error(y_test, reg.predict(X_test))
8383
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
8484

85-
##############################################################################
85+
# %%
8686
# Plot training deviance
8787
# -------------------------------------
8888
#
@@ -106,7 +106,7 @@
106106
fig.tight_layout()
107107
plt.show()
108108

109-
##############################################################################
109+
# %%
110110
# Plot feature importance
111111
# -------------------------------------
112112
#

0 commit comments

Comments
 (0)
0