8000 BUG: Fix NEON_FP16 check for MSVC by Mousius · Pull Request #29199 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG: Fix NEON_FP16 check for MSVC #29199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions numpy/_core/src/common/simd/neon/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
#define NPY_IMPL_NEON_REDUCE_MINMAX(INTRIN, STYPE, SFX, OP) \
NPY_FINLINE STYPE npyv_reduce_##INTRIN##_##SFX(npyv_##SFX a) \
{ \
STYPE al = (STYPE)vget_low_##SFX(a); \
STYPE ah = (STYPE)vget_high_##SFX(a); \
STYPE al = (STYPE)vgetq_lane_##SFX(a, 0); \
STYPE ah = (STYPE)vgetq_lane_##SFX(a, 1); \
return al OP ah ? al : ah; \
}
NPY_IMPL_NEON_REDUCE_MINMAX(max, npy_uint64, u64, >)
Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/src/multiarray/lowlevel_strided_loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *

/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/

#if defined(NPY_HAVE_NEON_FP16)
#if defined(NPY_HAVE_NEON_FP16) && !defined(_MSC_VER)
#define EMULATED_FP16 0
#define NATIVE_FP16 1
typedef _Float16 _npy_half;
Expand Down
2 changes: 1 addition & 1 deletion numpy/distutils/checks/cpu_neon_fp16.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
int main(int argc, char **argv)
{
short *src = (short*)argv[argc-1];
float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src));
float32x4_t v_z4 = vcvt_f32_f16(vreinterpret_f16_s16(vld1_s16(src)));
return (int)vgetq_lane_f32(v_z4, 0);
}
Loading
0