8000 k-means - added optional parameters "precompute_distances" and "x_squ… · seckcoder/scikit-learn@5144bb6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5144bb6

Browse files
jabergGaelVaroquaux
authored andcommitted
k-means - added optional parameters "precompute_distances" and "x_squared_norms"
1 parent 0552ea0 commit 5144bb6

File tree

1 file changed

+5
-12
lines changed

1 file changed

+5
-12
lines changed

scikits/learn/cluster/k_means_.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def _m_step(x, z ,k):
251251
return centers
252252

253253

254-
def _e_step(x, centers):
254+
def _e_step(x, centers, precompute_distances=True, x_squared_norms=None):
255255
"""E step of the K-means EM algorithm
256256
257257
Computation of the input-to-cluster assignment
@@ -276,22 +276,15 @@ def _e_step(x, centers):
276276
n_samples = x.shape[0]
277277
k = centers.shape[0]
278278

279-
there_is_memory_to_compute_distances_matrix = True
280-
281-
if there_is_memory_to_compute_distances_matrix:
282-
distances = (
283-
(x**2).sum(axis=1)
284-
+ (centers**2).sum(axis=1).reshape((k,1))
285-
- 2*np.dot(centers, x.T))
286-
# distances is a matrix of shape (k, n_samples)
287-
279+
if precompute_distances:
280+
distances = all_pairs_l2_distance_squared(centers, x, x_squared_norms)
288281
z = -np.ones(n_samples).astype(np.int)
289282
mindist = np.infty * np.ones(n_samples)
290283
for q in range(k):
291-
if there_is_memory_to_compute_distances_matrix:
284+
if precompute_distances:
292285
dist = distances[q]
293286
else:
294-
dist = np.sum((x - centers[q]) ** 2, 1)
287+
dist = np.sum((x - centers[q]) ** 2, axis=1)
295288
z[dist<mindist] = q
296289
mindist = np.minimum(dist, mindist)
297290
inertia = mindist.sum()

0 commit comments

Comments
 (0)
0