118
118
)
119
119
120
120
print (__doc__ )
121
- op .print_help ()
122
- print ()
123
121
124
122
125
123
def is_interactive ():
126
124
return not hasattr (sys .modules ["__main__" ], "__file__" )
127
125
128
126
127
+ if not is_interactive ():
128
+ op .print_help ()
129
+ print ()
130
+
129
131
# work-around for Jupyter notebook and IPython console
130
132
argv = [] if is_interactive () else sys .argv [1 :]
131
133
(opts , args ) = op .parse_args (argv )
@@ -134,8 +136,10 @@ def is_interactive():
134
136
sys .exit (1 )
135
137
136
138
137
- # #############################################################################
139
+ # %%
138
140
# Load some categories from the training set
141
+ # ------------------------------------------
142
+
139
143
categories = [
140
144
"alt.atheism" ,
141
145
"talk.religion.misc" ,
@@ -156,6 +160,11 @@ def is_interactive():
156
160
print ("%d categories" % len (dataset .target_names ))
157
161
print ()
158
162
163
+
164
+ # %%
165
+ # Feature Extraction
166
+ # ------------------
167
+
159
168
labels = dataset .target
160
169
true_k = np .unique (labels ).shape [0 ]
161
170
@@ -214,8 +223,9 @@ def is_interactive():
214
223
print ()
215
224
216
225
217
- # #############################################################################
218
- # Do the actual clustering
226
+ # %%
227
+ # Clustering
228
+ # ----------
219
229
220
230
if opts .minibatch :
221
231
km = MiniBatchKMeans (
@@ -241,6 +251,11 @@ def is_interactive():
241
251
print ("done in %0.3fs" % (time () - t0 ))
242
252
print ()
243
253
254
+
255
+ # %%
256
+ # Performance metrics
257
+ # -------------------
258
+
244
259
print ("Homogeneity: %0.3f" % metrics .homogeneity_score (labels , km .labels_ ))
245
260
print ("Completeness: %0.3f" % metrics .completeness_score (labels , km .labels_ ))
246
261
print ("V-measure: %0.3f" % metrics .v_measure_score (labels , km .labels_ ))
@@ -253,6 +268,8 @@ def is_interactive():
253
268
print ()
254
269
255
270
271
+ # %%
272
+
256
273
if not opts .use_hashing :
257
274
print ("Top terms per cluster:" )
258
275
0 commit comments