8000 Switch off simd_select for AArch64 · numpy/numpy@f4e1583 · GitHub
[go: up one dir, main page]

Skip to content

Commit f4e1583

Browse files
committed
Switch off simd_select for AArch64
1 parent 15fc922 commit f4e1583

File tree

3 files changed

+33
-27
lines changed

3 files changed

+33
-27
lines changed

numpy/core/src/npysort/selection.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "simd_qsort.hpp"
2929

3030
#define NOT_USED NPY_UNUSED(unused)
31+
#define DISABLE_HIGHWAY_OPTIMIZATION (defined(__arm__) || defined(__aarch64__))
3132

3233
template<typename T>
3334
inline bool quickselect_dispatch(T* v, npy_intp num, npy_intp kth)
@@ -55,12 +56,14 @@ inline bool quickselect_dispatch(T* v, npy_intp num, npy_intp kth)
5556
#endif
5657
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSelect, <TF>);
5758
}
59+
#if !DISABLE_HIGHWAY_OPTIMIZATION
5860
else if constexpr (sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t)) {
5961
#ifndef NPY_DISABLE_OPTIMIZATION
6062
#include "simd_qsort.dispatch.h"
6163
#endif
6264
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSelect, <TF>);
6365
}
66+
#endif
6467
if (dispfunc) {
6568
(*dispfunc)(reinterpret_cast<TF*>(v), num, kth);
6669
return true;
@@ -85,7 +88,7 @@ inline bool argquickselect_dispatch(T* v, npy_intp* arg, npy_intp num, npy_intp
8588
sizeof(npy_intp) == sizeof(int64_t)) {
8689
using TF = typename np::meta::FixedWidth<T>::Type;
8790
#ifndef NPY_DISABLE_OPTIMIZATION
88-
#include "simd_qsort.dispatch.h"
91+
#include "simd_argsort.dispatch.h"
8992
#endif
9093
void (*dispfunc)(TF*, npy_intp*, npy_intp, npy_intp) = nullptr;
9194
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template ArgQSelect, <TF>);

numpy/core/src/npysort/simd_argsort.dispatch.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,39 @@
77
// 'baseline' option isn't specified within targets.
88

99
#include "simd_qsort.hpp"
10+
#ifndef __CYGWIN__
1011

11-
#if defined(NPY_HAVE_AVX512_SKX) && !defined(_MSC_VER)
12+
#if defined(NPY_HAVE_AVX512_SKX)
1213
#include "x86-simd-sort/src/avx512-64bit-argsort.hpp"
1314
#endif
1415

1516
namespace np { namespace qsort_simd {
1617

17-
#if defined(NPY_HAVE_AVX512_SKX) && !defined(_MSC_VER)
18+
#if defined(NPY_HAVE_AVX512_SKX)
19+
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(int32_t *arr, npy_intp* arg, npy_intp num, npy_intp kth)
20+
{
21+
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
22+
}
23+
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint32_t *arr, npy_intp* arg, npy_intp num, npy_intp kth)
24+
{
25+
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
26+
}
27+
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(int64_t*arr, npy_intp* arg, npy_intp num, npy_intp kth)
28+
{
29+
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
30+
}
31+
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint64_t*arr, npy_intp* arg, npy_intp num, npy_intp kth)
32+
{
33+
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
34+
}
35+
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(float *arr, npy_intp* arg, npy_intp num, npy_intp kth)
36+
{
37+
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
38+
}
39+
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(double *arr, npy_intp* arg, npy_intp num, npy_intp kth)
40+
{
41+
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
42+
}
1843
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(int32_t *arr, npy_intp *arg, npy_intp size)
1944
{
2045
avx512_argsort(arr, reinterpret_cast<int64_t*>(arg), size);
@@ -42,3 +67,5 @@ template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(double *arr, npy_intp *arg, npy
4267
#endif
4368

4469
}} // namespace np::simd
70+
71+
#endif // __CYGWIN__

numpy/core/src/npysort/simd_qsort.dispatch.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,6 @@
2323
namespace np { namespace qsort_simd {
2424

2525
#if defined(NPY_HAVE_AVX512_SKX)
26-
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(int32_t *arr, npy_intp* arg, npy_intp num, npy_intp kth)
27-
{
28-
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
29-
}
30-
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint32_t *arr, npy_intp* arg, npy_intp num, npy_intp kth)
31-
{
32-
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
33-
}
34-
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(int64_t*arr, npy_intp* arg, npy_intp num, npy_intp kth)
35-
{
36-
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
37-
}
38-
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint64_t*arr, npy_intp* arg, npy_intp num, npy_intp kth)
39-
{
40-
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
41-
}
42-
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(float *arr, npy_intp* arg, npy_intp num, npy_intp kth)
43-
{
44-
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
45-
}
46-
template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(double *arr, npy_intp* arg, npy_intp num, npy_intp kth)
47-
{
48-
avx512_argselect(arr, reinterpret_cast<int64_t*>(arg), kth, num);
49-
}
5026
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int32_t *arr, npy_intp num, npy_intp kth)
5127
{
5228
avx512_qselect(arr, kth, num, true);

0 commit comments

Comments
 (0)
0