avx512 qsort for fp16, fp32 and fp64 require an explicit flag to sort…

… NAN
numpy · seiko2plus · Dec 4, 2023 · Oct 13, 2023 · Oct 17, 2023 · Oct 17, 2023
commit 9a7e109302ebdeb3b939966d64653535cb46391c
diff --git a/numpy/_core/src/npysort/simd_qsort.dispatch.cpp b/numpy/_core/src/npysort/simd_qsort.dispatch.cpp
@@ -42,11 +42,11 @@ template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(uint64_t*arr, npy_intp* arg,
 }
 template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(float *arr, npy_intp* arg, npy_intp num, npy_intp kth)
 {
-    avx512_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
+    avx512_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num, true);
 }
 template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(double *arr, npy_intp* arg, npy_intp num, npy_intp kth)
 {
-    avx512_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num);
+    avx512_argselect(arr, reinterpret_cast<size_t*>(arg), kth, num, true);
 }
 template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int32_t *arr, npy_intp num, npy_intp kth)
 {
@@ -90,11 +90,11 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint64_t *arr, npy_intp size)
 }
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(float *arr, npy_intp size)
 {
-    avx512_qsort(arr, size);
+    avx512_qsort(arr, size, true);
 }
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(double *arr, npy_intp size)
 {
-    avx512_qsort(arr, size);
+    avx512_qsort(arr, size, true);
 }
 #elif USE_HIGHWAY
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int32_t *arr, intptr_t size)

diff --git a/numpy/_core/src/npysort/simd_qsort_16bit.dispatch.cpp b/numpy/_core/src/npysort/simd_qsort_16bit.dispatch.cpp
@@ -79,9 +79,9 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size)
 {
 #if defined(NPY_HAVE_AVX512_SPR)
-    avx512_qsort(reinterpret_cast<_Float16*>(arr), size);
+    avx512_qsort(reinterpret_cast<_Float16*>(arr), size, true);
 #else
-    avx512_qsort_fp16(reinterpret_cast<uint16_t*>(arr), size);
+    avx512_qsort_fp16(reinterpret_cast<uint16_t*>(arr), size, true);
 #endif
 }
 template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size)