FIX parallelisation of kmeans clustering (#12955)

nixphix · jnothman · commit 8a604f7f87ce · 2019-01-17T10:42:17.000+11:00
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -15,6 +15,13 @@ enhancements to features released in 0.20.0.
 Changelog
 ---------
 
+:mod:`sklearn.cluster`
+......................
+
+- |Fix| Fixed a bug in :class:`cluster.KMeans` where computation was single
+  threaded when `n_jobs > 1` or `n_jobs = -1`.
+  :issue:`12949` by :user:`Prabakaran Kumaresshan <nixphix>`.
+
 :mod:`sklearn.linear_model`
 ...........................
 
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -367,7 +367,7 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
     else:
         raise ValueError("Algorithm must be 'auto', 'full' or 'elkan', got"
                          " %s" % str(algorithm))
-    if effective_n_jobs(n_jobs):
+    if effective_n_jobs(n_jobs) == 1:
         # For a single thread, less memory is needed if we just store one set
         # of the best results (as opposed to one set per run per thread).
         for it in range(n_init):
@@ -868,15 +868,15 @@ class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
     >>> from sklearn.cluster import KMeans
     >>> import numpy as np
     >>> X = np.array([[1, 2], [1, 4], [1, 0],
-    ...               [4, 2], [4, 4], [4, 0]])
+    ...               [10, 2], [10, 4], [10, 0]])
     >>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
     >>> kmeans.labels_
-    array([0, 0, 0, 1, 1, 1], dtype=int32)
-    >>> kmeans.predict([[0, 0], [4, 4]])
-    array([0, 1], dtype=int32)
+    array([1, 1, 1, 0, 0, 0], dtype=int32)
+    >>> kmeans.predict([[0, 0], [12, 3]])
+    array([1, 0], dtype=int32)
     >>> kmeans.cluster_centers_
-    array([[1., 2.],
-           [4., 2.]])
+    array([[10.,  2.],
+           [ 1.,  2.]])
 
     See also
     --------