10000 more PEP8 · seckcoder/scikit-learn@a5e1638 · GitHub
[go: up one dir, main page]

Skip to content

Commit a5e1638

Browse files
ogriselGaelVaroquaux
authored andcommitted
more PEP8
1 parent 869d7e8 commit a5e1638

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

scikits/learn/cluster/k_means_.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@
1313
from ..base import BaseEstimator
1414
from ..metrics.pairwise import euclidian_distances
1515

16-
################################################################################
16+
17+
###############################################################################
1718
# Initialisation heuristic
1819

19-
# kinit originaly from pybrain:
20-
# http://github.com/pybrain/pybrain/raw/master/pybrain/auxiliary/kmeans.py
2120
def k_init(X, k, n_samples_max=500, rng=None):
2221
"""Init k seeds according to kmeans++
2322
@@ -44,6 +43,9 @@ def k_init(X, k, n_samples_max=500, rng=None):
4443
4544
Implementation from Yong Sun's website
4645
http://blogs.sun.com/yongsun/entry/k_means_and_k_means
46+
47+
kinit originaly from pybrain:
48+
http://github.com/pybrain/pybrain/raw/master/pybrain/auxiliary/kmeans.py
4749
"""
4850
n_samples = X.shape[0]
4951
if rng is None:
@@ -55,8 +57,8 @@ def k_init(X, k, n_samples_max=500, rng=None):
5557

5658
distances = euclidian_distances(X, X, squared=True)
5759

58-
'choose the 1st seed randomly, and store D(x)^2 in D[]'
59-
first_idx =rng.randint(n_samples)
60+
# choose the 1st seed randomly, and store D(x)^2 in D[]
61+
first_idx = rng.randint(n_samples)
6062
centers = [X[first_idx]]
6163
D = distances[first_idx]
6264

@@ -76,10 +78,10 @@ def k_init(X, k, n_samples_max=500, rng=None):
7678
return np.array(centers)
7779

7880

79-
################################################################################
81+
###############################################################################
8082
# K-means estimation by EM (expectation maximisation)
8183

82-
def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
84+
def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
8385
delta=1e-4, rng=None, copy_x=True):
8486
""" K-means clustering algorithm.
8587
@@ -99,9 +101,9 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
99101
Maximum number of iterations of the k-means algorithm to run.
100102
101103
n_init: int, optional, default: 10
102-
Number of time the k-means algorithm will be run with different centroid
103-
seeds. The final results will be the best output of n_init consecutive
104-
runs in terms of inertia.
104+
Number of time the k-means algorithm will be run with different
105+
centroid seeds. The final results will be the best output of
106+
n_init consecutive runs in terms of inertia.
105107
106108
init: {'k-means++', 'random', or ndarray, or a callable}, optional
107109
Method for initialization, default to 'k-means++':
@@ -180,7 +182,7 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
180182
"'%s' (type '%s') was passed.")
181183

182184
if verbose:
183-
print 'Initialization complete'
185+
print 'Initialization complete'
184186
# iterations
185187
for i in range(max_iter):
186188
centers_old = centers.copy()
@@ -194,19 +196,19 @@ def k_means(X, k, init='k-means++', n_init=10, max_iter=300, verbose=0,
194196
break
195197

196198
if inertia < best_inertia:
199+
best_labels = labels.copy()
197200
best_centers = centers.copy()
198-
best_labels = labels.copy()
199201
best_inertia = inertia
200202
else:
203+
best_labels = labels
201204
best_centers = centers
202-
best_labels = labels
203205
best_inertia = inertia
204-
if not copy_x:
206+
if not copy_x:
205207
X += Xmean
206-
return best_centers+Xmean, best_labels, best_inertia
208+
return best_centers + Xmean, best_labels, best_inertia
207209

208210

209-
def _m_step(x, z ,k):
211+
def _m_step(x, z, k):
210212
""" M step of the K-means EM algorithm
211213
212214
Computation of cluster centers/means
@@ -228,10 +230,10 @@ def _m_step(x, z ,k):
228230
dim = x.shape[1]
229231
centers = np.repeat(np.reshape(x.mean(0), (1, dim)), k, 0)
230232
for q in range(k):
231-
if np.sum(z==q)==0:
233+
if np.sum(z == q) == 0:
232234
pass
233235
else:
234-
centers[q] = np.mean(x[z==q], axis=0)
236+
centers[q] = np.mean(x[z == q], axis=0)
235237
return centers
236238

237239

@@ -261,23 +263,21 @@ def _e_step(x, centers, precompute_distances=True, x_squared_norms=None):
261263
k = centers.shape[0]
262264

263265
if precompute_distances:
264-
distances = euclidian_distances(centers, x, x_squared_norms, squared=True)
266+
distances = euclidian_distances(centers, x, x_squared_norms,
267+
squared=True)
265268
z = -np.ones(n_samples).astype(np.int)
266269
mindist = np.infty * np.ones(n_samples)
267270
for q in range(k):
268271
if precompute_distances:
269272
dist = distances[q]
270273
else:
271274
dist = np.sum((x - centers[q]) ** 2, axis=1)
272-
z[dist<mindist] = q
275+
z[dist < mindist] = q
273276
mindist = np.minimum(dist, mindist)
274277
inertia = mindist.sum()
275278
return z, inertia
276279

277280

278-
279-
################################################################################
280-
281281
class KMeans(BaseEstimator):
282282
""" K-Means clustering
283283
@@ -295,12 +295,13 @@ class KMeans(BaseEstimator):
295295
interpreted as initial cluster to use instead.
296296
297297
max_iter : int
298-
Maximum number of iterations of the k-means algorithm for a single run.
298+
Maximum number of iterations of the k-means algorithm for a
299+
single run.
299300
300301
n_init: int, optional, default: 10
301-
Number of time the k-means algorithm will be run with different centroid
302-
seeds. The final results will be the best output of n_init consecutive
303-
runs in terms of inertia.
302+
Number of time the k-means algorithm will be run with different
303+
centroid seeds. The final results will be the best output of
304+
n_init consecutive runs in terms of inertia.
304305
305306
init : {'k-means++', 'random', 'points', 'matrix'}
306307
Method for initialization, defaults to 'k-means++':
@@ -354,7 +355,6 @@ class KMeans(BaseEstimator):
354355
it can be useful to restart it several times.
355356
"""
356357

357-
358358
def __init__(self, k=8, init='random', n_init=10, max_iter=300,
359359
verbose=0, rng=None, copy_x=True):
360360
self.k = k
@@ -371,7 +371,7 @@ def fit(self, X, **params):
371371
self._set_params(**params)
372372
self.cluster_centers_, self.labels_, self.inertia_ = k_means(X,
373373
k=self.k, init=self.init, n_init=self.n_init,
374-
max_iter=self.max_iter, verbose=self.verbose,
374+
max_iter=self.max_iter, verbose=self.verbose,
375375
rng=self.rng, copy_x=self.copy_x)
376376
return self
377377

0 commit comments

Comments
 (0)
0