8000 BUG: Mirror VQSORT_ENABLED logic in Quicksort by Mousius · Pull Request #27050 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions numpy/_core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,7 @@ py.extension_module('_multiarray_umath',
'src/multiarray',
'src/npymath',
'src/umath',
'src/highway'
],
dependencies: [blas_dep],
link_with: [npymath_lib, multiarray_umath_mtargets.static_lib('_multiarray_umath_mtargets')] + highway_lib,
Expand Down
4 changes: 4 additions & 0 deletions numpy/_core/src/npysort/highway_qsort.dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#define VQSORT_ONLY_STATIC 1
#include "hwy/contrib/sort/vqsort-inl.h"

#if VQSORT_ENABLED

#define DISPATCH_VQSORT(TYPE) \
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(TYPE *arr, intptr_t size) \
{ \
Expand All @@ -18,3 +20,5 @@ namespace np { namespace highway { namespace qsort_simd {
DISPATCH_VQSORT(float)

} } } // np::highway::qsort_simd

#endif // VQSORT_ENABLED
15 changes: 15 additions & 0 deletions numpy/_core/src/npysort/highway_qsort.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
#ifndef NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
#define NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP

#include "hwy/highway.h"

#include "common.hpp"

// This replicates VQSORT_ENABLED from hwy/contrib/sort/shared-inl.h
// without checking the scalar target as this is not built within the dynamic
// dispatched sources.
#if (HWY_COMPILER_MSVC && !HWY_IS_DEBUG_BUILD) || \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not just use VQSORT_ENABLED instead duplicating the macro?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is included by quicksort.cpp, which isn't dynamically dispatched, meaning the VQSORT_ENABLED flag would be set incorrectly due to having HWY_TARGET == HWY_SCALAR.

(HWY_ARCH_ARM_V7 && HWY_IS_DEBUG_BUILD) || \
(HWY_ARCH_ARM_A64 && HWY_COMPILER_GCC_ACTUAL && HWY_IS_ASAN) || \
(HWY_ARCH_ARM_A64 && HWY_COMPILER_CLANG && \
(HWY_IS_HWASAN || HWY_IS_MSAN || HWY_IS_TSAN || HWY_IS_ASAN))
#define NPY_DISABLE_HIGHWAY_SORT
#endif

#ifndef NPY_DISABLE_HIGHWAY_SORT
namespace np { namespace highway { namespace qsort_simd {

#ifndef NPY_DISABLE_OPTIMIZATION
Expand All @@ -21,3 +35,4 @@ NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp n
} } } // np::highway::qsort_simd

#endif // NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
#endif // NPY_DISABLE_HIGHWAY_SORT
4 changes: 4 additions & 0 deletions numpy/_core/src/npysort/highway_qsort_16bit.dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "quicksort.hpp"

#if VQSORT_ENABLED

namespace np { namespace highway { namespace qsort_simd {

template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, intptr_t size)
Expand All @@ -24,3 +26,5 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, intptr_t size)
}

} } } // np::highway::qsort_simd

#endif // VQSORT_ENABLED
4 changes: 2 additions & 2 deletions numpy/_core/src/npysort/quicksort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ inline bool quicksort_dispatch(T *start, npy_intp num)
#if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit
#include "x86_simd_qsort_16bit.dispatch.h"
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
#else
#elif !defined(NPY_DISABLE_HIGHWAY_SORT)
#include "highway_qsort_16bit.dispatch.h"
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, <TF>);
#endif
Expand All @@ -95,7 +95,7 @@ inline bool quicksort_dispatch(T *start, npy_intp num)
#if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit
#include "x86_simd_qsort.dispatch.h"
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
#else
#elif !defined(NPY_DISABLE_HIGHWAY_SORT)
#include "highway_qsort.dispatch.h"
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, <TF>);
#endif
Expand Down
0