10000 PERF set openmp to use only physical cores by default (#26082) · scikit-learn/scikit-learn@5b46d01 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5b46d01

Browse files
authored
PERF set openmp to use only physical cores by default (#26082)
1 parent a7a416f commit 5b46d01

File tree

2 files changed

+33
-5
lines changed

2 files changed

+33
-5
lines changed

doc/whats_new/v1.3.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,17 @@ Changes impacting all modules
126126

127127
:pr:`25044` by :user:`Julien Jerphanion <jjerphan>`.
128128

129+
- |Enhancement| All estimators that internally rely on OpenMP multi-threading
130+
(via Cython) now use a number of threads equal to the number of physical
131+
(instead of logical) cores by default. In the past, we observed that using as
132+
many threads as logical cores on SMT hosts could sometimes cause severe
133+
performance problems depending on the algorithms and the shape of the data.
134+
Note that it is still possible to manually adjust the number of threads used
135+
by OpenMP as documented in :ref:`parallelism`.
136+
137+
:pr:`26082` by :user:`Jérémie du Boisberranger <jeremiedbb>` and
138+
:user:`Olivier Grisel <ogrisel>`.
139+
129140
Changelog
130141
---------
131142

sklearn/utils/_openmp_helpers.pyx

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@ import os
22
from joblib import cpu_count
33

44

5+
# Module level cache for cpu_count as we do not expect this to change during
6+
# the lifecycle of a Python program. This dictionary is keyed by
7+
# only_physical_cores.
8+
_CPU_COUNTS = {}
9+
10+
511
def _openmp_parallelism_enabled():
612
"""Determines whether scikit-learn has been built with OpenMP
713
@@ -12,7 +18,7 @@ def _openmp_parallelism_enabled():
1218
return SKLEARN_OPENMP_PARALLELISM_ENABLED
1319

1420

15-
cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False):
21+
cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True):
1622
"""Determine the effective number of threads to be used for OpenMP calls
1723
1824
- For ``n_threads = None``,
@@ -33,6 +39,15 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False):
3339
3440
- Raise a ValueError for ``n_threads = 0``.
3541
42+
Passing the `only_physical_cores=False` flag makes it possible to use extra
43+
threads for SMT/HyperThreading logical cores. It has been empirically
44+
observed that using as many threads as available SMT cores can slightly
45+
improve the performance in some cases, but can severely degrade
46+
performance other times. Therefore it is recommended to use
47+
`only_physical_cores=True` unless an empirical study has been conducted to
48+
assess the impact of SMT on a case-by-case basis (using various input data
49+
shapes, in particular small data shapes).
50+
3651
If scikit-learn is built without OpenMP support, always return 1.
3752
"""
3853
if n_threads == 0:
@@ -47,10 +62,12 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False):
4762
# to exceed the number of cpus.
4863
max_n_threads = omp_get_max_threads()
4964
else:
50-
max_n_threads = min(
51-
omp_get_max_threads(),
52-
cpu_count(only_physical_cores=only_physical_cores)
53-
)
65+
try:
66+
n_cpus = _CPU_COUNTS[only_physical_cores]
67+
except KeyError:
68+
n_cpus = cpu_count(only_physical_cores=only_physical_cores)
69+
_CPU_COUNTS[only_physical_cores] = n_cpus
70+
max_n_threads = min(omp_get_max_threads(), n_cpus)
5471

5572
if n_threads is None:
5673
return max_n_threads

0 commit comments

Comments
 (0)
0