@@ -184,9 +184,13 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
184184 if verbose :
185185 print 'Initialization complete'
186186 # iterations
187+ x_squared_norms = X .copy ()
188+ x_squared_norms **= 2
189+ x_squared_norms = x_squared_norms .sum (axis = 1 )
187190 for i in range (max_iter ):
188191 centers_old = centers .copy ()
189- labels , inertia = _e_step (X , centers )
192+ labels , inertia = _e_step (X , centers ,
193+ x_squared_norms = x_squared_norms )
190194 centers = _m_step (X , labels , k )
191195 if verbose :
192196 print 'Iteration %i, inertia %s' % (i , inertia )
@@ -228,12 +232,18 @@ def _m_step(x, z, k):
228232 The resulting centers
229233 """
230234 dim = x .shape [1 ]
231- centers = np .repeat (np .reshape (x .mean (0 ), (1 , dim )), k , 0 )
235+ centers = np .empty ((k , dim ))
236+ X_center = None
232237 for q in range (k ):
233- if np .sum (z == q ) == 0 :
234- pass
238+ this_center_mask = (z == q )
239+ if not np .any (this_center_mask ):
240+ # The centroid of empty clusters is set to the center of
241+ # everything
242+ if X_center is None :
243+ X_center = x .mean (axis = 0 )
244+ centers [q ] = X_center
235245 else :
236- centers [q ] = np .mean (x [z == q ], axis = 0 )
246+ centers [q ] = np .mean (x [this_center_mask ], axis = 0 )
237247 return centers
238248
239249
@@ -265,8 +275,10 @@ def _e_step(x, centers, precompute_distances=True, x_squared_norms=None):
265275 if precompute_distances :
266276 distances = euclidean_distances (centers , x , x_squared_norms ,
267277 squared = True )
268- z = - np .ones (n_samples ).astype (np .int )
269- mindist = np .infty * np .ones (n_samples )
8000
278+ z = np .empty (n_samples , dtype = np .int )
279+ z .fill (- 1 )
280+ mindist = np .empty (n_samples )
281+ mindist .fill (np .infty )
270282 for q in range (k ):
271283 if precompute_distances :
272284 dist = distances [q ]
0 commit comments