scikit-learn
diff --git a/‎examples/text/plot_document_clustering.py
Lines changed: 22 additions & 5 deletions b/‎examples/text/plot_document_clustering.py
Lines changed: 22 additions & 5 deletions
@@ -118,14 +118,16 @@
 )
 
 print(__doc__)
-op.print_help()
-print()
 
 
 def is_interactive():
     return not hasattr(sys.modules["__main__"], "__file__")
 
 
+if not is_interactive():
+    op.print_help()
+    print()
+
 # work-around for Jupyter notebook and IPython console
 argv = [] if is_interactive() else sys.argv[1:]
 (opts, args) = op.parse_args(argv)
@@ -134,8 +136,10 @@ def is_interactive():
     sys.exit(1)
 
 
-# #############################################################################
+# %%
 # Load some categories from the training set
+# ------------------------------------------
+
 categories = [
     "alt.atheism",
     "talk.religion.misc",
@@ -156,6 +160,11 @@ def is_interactive():
 print("%d categories" % len(dataset.target_names))
 print()
 
+
+# %%
+# Feature Extraction
+# ------------------
+
 labels = dataset.target
 true_k = np.unique(labels).shape[0]
 
@@ -214,8 +223,9 @@ def is_interactive():
     print()
 
 
-# #############################################################################
-# Do the actual clustering
+# %%
+# Clustering
+# ----------
 
 if opts.minibatch:
     km = MiniBatchKMeans(
@@ -241,6 +251,11 @@ def is_interactive():
 print("done in %0.3fs" % (time() - t0))
 print()
 
+
+# %%
+# Performance metrics
+# -------------------
+
 print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
 print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_))
 print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
@@ -253,6 +268,8 @@ def is_interactive():
 print()
 
 
+# %%
+
 if not opts.use_hashing:
     print("Top terms per cluster:")