8000 DOC Fix notebook-style for plot_document_clustering.py (#22443) · scikit-learn/scikit-learn@f13015e · GitHub
[go: up one dir, main page]

Skip to content

Commit f13015e

Browse files
authored
DOC Fix notebook-style for plot_document_clustering.py (#22443)
1 parent 835904a commit f13015e

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

examples/text/plot_document_clustering.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,16 @@
118118
)
119119

120120
print(__doc__)
121-
op.print_help()
122-
print()
123121

124122

125123
def is_interactive():
126124
return not hasattr(sys.modules["__main__"], "__file__")
127125

128126

127+
if not is_interactive():
128+
op.print_help()
129+
print()
130+
129131
# work-around for Jupyter notebook and IPython console
130132
argv = [] if is_interactive() else sys.argv[1:]
131133
(opts, args) = op.parse_args(argv)
@@ -134,8 +136,10 @@ def is_interactive():
134136
sys.exit(1)
135137

136138

137-
# #############################################################################
139+
# %%
138140
# Load some categories from the training set
141+
# ------------------------------------------
142+
139143
categories = [
140144
"alt.atheism",
141145
"talk.religion.misc",
@@ -156,6 +160,11 @@ def is_interactive():
156160
print("%d categories" % len(dataset.target_names))
157161
print()
158162

163+
164+
# %%
165+
# Feature Extraction
166+
# ------------------
167+
159168
labels = dataset.target
160169
true_k = np.unique(labels).shape[0]
161170

@@ -214,8 +223,9 @@ def is_interactive():
214223
print()
215224

216225

217-
# #############################################################################
218-
# Do the actual clustering
226+
# %%
227+
# Clustering
228+
# ----------
219229

220230
if opts.minibatch:
221231
km = MiniBatchKMeans(
@@ -241,6 +251,11 @@ def is_interactive():
241251
print("done in %0.3fs" % (time() - t0))
242252
print()
243253

254+
255+
# %%
256+
# Performance metrics
257+
# -------------------
258+
244259
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
245260
print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_))
246261
print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
@@ -253,6 +268,8 @@ def is_interactive():
253268
print()
254269

255270

271+
# %%
272+
256273
if not opts.use_hashing:
257274
print("Top terms per cluster:")
258275

0 commit comments

Comments
 (0)
0