8000 FIX Number of threads in KMeans should not be bigger than number of c… · Sastopher/scikit-learn@15716da · GitHub
[go: up one dir, main page]

Skip to content

Commit 15716da

Browse files
jeremiedbbadrinjalali
authored andcommitted
FIX Number of threads in KMeans should not be bigger than number of chunks (scikit-learn#17210)
* num threads not bigger than num chunks * what's new
1 parent 7f1473d commit 15716da

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

doc/whats_new/v0.23.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ Changelog
1919
provided by the user were modified in place. :pr:`17204` by
2020
:user:`Jeremie du Boisberranger <jeremiedbb>`.
2121

22+
- |Efficiency| :class:`cluster.KMeans` cannot spawn idle threads any more for
23+
very small datasets. :pr:`17210` by
24+
:user:`Jeremie du Boisberranger <jeremiedbb>`.
25+
2226
Miscellaneous
2327
.............
2428

sklearn/cluster/_k_means_elkan.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ def _elkan_iter_chunked_dense(
284284
# count remainder chunk in total number of chunks
285285
n_chunks += n_samples != n_chunks * n_samples_chunk
286286

287+
# number of threads should not be bigger than number of chunks
288+
n_threads = min(n_threads, n_chunks)
289+
287290
if update_centers:
288291
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
289292
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
@@ -515,6 +518,9 @@ def _elkan_iter_chunked_sparse(
515518
# count remainder chunk in total number of chunks
516519
n_chunks += n_samples != n_chunks * n_samples_chunk
517520

521+
# number of threads should not be bigger than number of chunks
522+
n_threads = min(n_threads, n_chunks)
523+
518524
if update_centers:
519525
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
520526
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))

sklearn/cluster/_k_means_lloyd.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ def _lloyd_iter_chunked_dense(
120120
# count remainder chunk in total number of chunks
121121
n_chunks += n_samples != n_chunks * n_samples_chunk
122122

123+
# number of threads should not be bigger than number of chunks
124+
n_threads = min(n_threads, n_chunks)
125+
123126
if update_centers:
124127
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
125128
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
@@ -314,6 +317,9 @@ def _lloyd_iter_chunked_sparse(
314317
# count remainder chunk in total number of chunks
315318
n_chunks += n_samples != n_chunks * n_samples_chunk
316319

320+
# number of threads should not be bigger than number of chunks
321+
n_threads = min(n_threads, n_chunks)
322+
317323
if update_centers:
318324
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
319325
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))

0 commit comments

Comments
 (0)
0