8000 Parallelize sort for GCC build · pytorch/pytorch@13eb689 · GitHub
[go: up one dir, main page]

Skip to content

Commit 13eb689

Browse files
committed
Parallelize sort for GCC build
Resolve #149977, #149979, #150094. Previously, #149505 used libstdc++ parallel mode by enabling -D_GLIBCXX_PARALLEL. However, mixing source files compiled with and without parallel mode can lead to undefined behavior (See https://gcc.gnu.org/onlinedocs/libstdc++/manual/parallel_mode_using.html) We switch to using the specific paralell sort from <parallel/algorithm> when compiled with GCC compiler. Note that use of std::execution policy has dependency on libtbb and we thus decide to avoid that.
1 parent 46c8f2e commit 13eb689

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

aten/src/ATen/native/cpu/SortingKernel.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
#ifdef USE_FBGEMM
2020
#include <fbgemm/Utils.h>
2121
#endif
22+
#if __has_include(<parallel/algorithm>) && defined(_OPENMP)
23+
#include <parallel/algorithm>
24+
#endif
2225

2326
namespace at::native {
2427

@@ -146,6 +149,25 @@ static inline void sort_kernel_impl(const value_accessor_t& value_accessor,
146149
auto composite_accessor = CompositeRandomAccessorCPU<
147150
value_accessor_t, indices_accessor_t
148151
>(value_accessor, indices_accessor);
152+
#if __has_include(<parallel/algorithm>) && defined(_OPENMP)
153+
if (descending) {
154+
if (stable) {
155+
__gnu_parallel::stable_sort(composite_accessor, composite_accessor + dim_size,
156+
KeyValueCompDesc<scalar_t>());
157+
} else {
158+
__gnu_parallel::sort(composite_accessor, composite_accessor + dim_size,
159+
KeyValueCompDesc<scalar_t>());
160+
}
161+
} else {
162+
if (stable) {
163+
__gnu_parallel::stable_sort(composite_accessor, composite_accessor + dim_size,
164+
KeyValueCompAsc<scalar_t>());
165+
} else {
166+
__gnu_parallel::sort(composite_accessor, composite_accessor + dim_size,
167+
KeyValueCompAsc<scalar_t>());
168+
}
169+
}
170+
#else
149171
if (descending) {
150172
if (stable) {
151173
std::stable_sort(composite_accessor, composite_accessor + dim_size,
@@ -163,6 +185,7 @@ static inline void sort_kernel_impl(const value_accessor_t& value_accessor,
163185
KeyValueCompAsc<scalar_t>());
164186
}
165187
}
188+
#endif
166189
}
167190

168191
static void sort_kernel(

cmake/Codegen.cmake

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -416,11 +416,6 @@ if(INTERN_BUILD_ATEN_OPS)
416416
set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}")
417417
endif(MSVC)
418418

419-
# Only parallelize the SortingKernel for now to avoid side effects
420-
if(${NAME} STREQUAL "native/cpu/SortingKernel.cpp" AND NOT MSVC AND USE_OMP)
421-
string(APPEND EXTRA_FLAGS " -D_GLIBCXX_PARALLEL")
422-
endif()
423-
424419
# Disable certain warnings for GCC-9.X
425420
if(CMAKE_COMPILER_IS_GNUCXX)
426421
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))

0 commit comments

Comments
 (0)
0