@@ -2,6 +2,12 @@ import os
2
2
from joblib import cpu_count
3
3
4
4
5
+ # Module level cache for cpu_count as we do not expect this to change during
6
+ # the lifecycle of a Python program. This dictionary is keyed by
7
+ # only_physical_cores.
8
+ _CPU_COUNTS = {}
9
+
10
+
5
11
def _openmp_parallelism_enabled ():
6
12
""" Determines whether scikit-learn has been built with OpenMP
7
13
@@ -12,7 +18,7 @@ def _openmp_parallelism_enabled():
12
18
return SKLEARN_OPENMP_PARALLELISM_ENABLED
13
19
14
20
15
- cpdef _openmp_effective_n_threads(n_threads = None , only_physical_cores = False ):
21
+ cpdef _openmp_effective_n_threads(n_threads = None , only_physical_cores = True ):
16
22
""" Determine the effective number of threads to be used for OpenMP calls
17
23
18
24
- For ``n_threads = None``,
@@ -33,6 +39,15 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False):
33
39
34
40
- Raise a ValueError for ``n_threads = 0``.
35
41
42
+ Passing the `only_physical_cores=False` flag makes it possible to use extra
43
+ threads for SMT/HyperThreading logical cores. It has been empirically
44
+ observed that using as many threads as available SMT cores can slightly
45
+ improve the performance in some cases, but can severely degrade
46
+ performance other times. Therefore it is recommended to use
47
+ `only_physical_cores=True` unless an empirical study has been conducted to
48
+ assess the impact of SMT on a case-by-case basis (using various input data
49
+ shapes, in particular small data shapes).
50
+
36
51
If scikit-learn is built without OpenMP support, always return 1.
37
52
"""
38
53
if n_threads == 0 :
@@ -47,10 +62,12 @@ cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=False):
47
62
# to exceed the number of cpus.
48
63
max_n_threads = omp_get_max_threads()
49
64
else :
50
- max_n_threads = min (
51
- omp_get_max_threads(),
52
- cpu_count(only_physical_cores = only_physical_cores)
53
- )
65
+ try :
66
+ n_cpus = _CPU_COUNTS[only_physical_cores]
67
+ except KeyError :
68
+ n_cpus = cpu_count(only_physical_cores = only_physical_cores)
69
+ _CPU_COUNTS[only_physical_cores] = n_cpus
70
+ max_n_threads = min (omp_get_max_threads(), n_cpus)
54
71
55
72
if n_threads is None :
56
73
return max_n_threads
0 commit comments