@@ -184,9 +184,13 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
184
184
if verbose :
185
185
print 'Initialization complete'
186
186
# iterations
187
+ x_squared_norms = X .copy ()
188
+ x_squared_norms **= 2
189
+ x_squared_norms = x_squared_norms .sum (axis = 1 )
187
190
for i in range (max_iter ):
188
191
centers_old = centers .copy ()
189
- labels , inertia = _e_step (X , centers )
192
+ labels , inertia = _e_step (X , centers ,
193
+ x_squared_norms = x_squared_norms )
190
194
centers = _m_step (X , labels , k )
191
195
if verbose :
192
196
print 'Iteration %i, inertia %s' % (i , inertia )
@@ -228,12 +232,18 @@ def _m_step(x, z, k):
228
232
The resulting centers
229
233
"""
230
234
dim = x .shape [1 ]
231
- centers = np .repeat (np .reshape (x .mean (0 ), (1 , dim )), k , 0 )
235
+ centers = np .empty ((k , dim ))
236
+ X_center = None
232
237
for q in range (k ):
233
- if np .sum (z == q ) == 0 :
234
- pass
238
+ this_center_mask = (z == q )
239
+ if not np .any (this_center_mask ):
240
+ # The centroid of empty clusters is set to the center of
241
+ # everything
242
+ if X_center is None :
243
+ X_center = x .mean (axis = 0 )
244
+ centers [q ] = X_center
235
245
else :
236
- centers [q ] = np .mean (x [z == q ], axis = 0 )
246
+ centers [q ] = np .mean (x [this_center_mask ], axis = 0 )
237
247
return centers
238
248
239
249
@@ -265,8 +275,10 @@ def _e_step(x, centers, precompute_distances=True, x_squared_norms=None):
265
275
if precompute_distances :
266
276
distances = euclidean_distances (centers , x , x_squared_norms ,
267
277
squared = True )
268
- z = - np .ones (n_samples ).astype (np .int )
269
- mindist = np .infty * np .ones (n_samples )
278
+ z = np .empty (n_samples , dtype = np .int )
279
+ z .fill (- 1 )
280
+ mindist = np .empty (n_samples )
281
+ mindist .fill (np .infty )
270
282
for q in range (k ):
271
283
if precompute_distances :
272
284
dist = distances [q ]
0 commit comments