Merge pull request #24498 from r-devulap/qsort-16bit-dispatch

seiko2plus · web-flow · commit 54238c7be101 · 2023-09-07T08:33:21.000+04:00
MAINT: Re-write 16-bit qsort dispatch
diff --git a/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp b/numpy/core/src/npysort/simd_qsort_16bit.dispatch.cpp
@@ -9,8 +9,23 @@
 
 #if defined(NPY_HAVE_AVX512_SPR) && !defined(_MSC_VER)
     #include "x86-simd-sort/src/avx512fp16-16bit-qsort.hpp"
+/*
+ * Wrapper function declarations to avoid multiple definitions of
+ * avx512_qsort<uint16_t> and avx512_qsort<int16_t>
+ */
+void avx512_qsort_uint16(uint16_t*, intptr_t);
+void avx512_qsort_int16(int16_t*, intptr_t);
 #elif defined(NPY_HAVE_AVX512_ICL) && !defined(_MSC_VER)
     #include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
+/* Wrapper function defintions here: */
+void avx512_qsort_uint16(uint16_t* arr, intptr_t size)
+{
+    avx512_qsort(arr, size);
+}
+void avx512_qsort_int16(int16_t* arr, intptr_t size)
+{
+    avx512_qsort(arr, size);
+}
 #endif
 
 namespace np { namespace qsort_simd {
@@ -27,11 +42,19 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, intptr_t size)
 }
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, intptr_t size)
 {
+#if defined(NPY_HAVE_AVX512_SPR)
+    avx512_qsort_uint16(arr, size);
+#else
     avx512_qsort(arr, size);
+#endif
 }
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, intptr_t size)
 {
+#if defined(NPY_HAVE_AVX512_SPR)
+    avx512_qsort_int16(arr, size);
+#else
     avx512_qsort(arr, size);
+#endif
 }
 #endif // NPY_HAVE_AVX512_ICL || SPR
 #endif // _MSC_VER