8000 ENH: Improve Floating Point Cast Performance on ARM by f2013519 · Pull Request #28769 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Improve Floating Point Cast Performance on ARM #28769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 29, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Ignore fp exceptions only for float casts
  • Loading branch information
f2013519 committed Apr 22, 2025
commit 0fbe5ece95b67a3630ad83ab262cf39358b1a9aa
31 changes: 21 additions & 10 deletions numpy/_core/src/multiarray/lowlevel_strided_loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -718,9 +718,11 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *

#if defined(NPY_HAVE_NEON_FP16) || defined(NPY_HAVE_F16C)
#define EMULATED_FP16 0
#define NATIVE_FP16 1
typedef _Float16 _npy_half;
#else
#define EMULATED_FP16 1
#define NATIVE_FP16 0
typedef npy_half _npy_half;
#endif

Expand All @@ -747,7 +749,8 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
* _npy_half, npy_float, npy_double, npy_longdouble,
* npy_float, npy_double, npy_longdouble#
* #is_bool1 = 1, 0*17#
* #is_half1 = 0*11, EMULATED_FP16, 0*6#
* #is_emu_half1 = 0*11, EMULATED_FP16, 0*6#
* #is_native_half1 = 0*11, NATIVE_FP16, 0*6#
* #is_float1 = 0*12, 1, 0, 0, 1, 0, 0#
* #is_double1 = 0*13, 1, 0, 0, 1, 0#
* #is_complex1 = 0*15, 1*3#
Expand Down Expand Up @@ -776,7 +779,8 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
* _npy_half, npy_float, npy_double, npy_longdouble,
* npy_float, npy_double, npy_longdouble#
* #is_bool2 = 1, 0*17#
* #is_half2 = 0*11, EMULATED_FP16, 0*6#
* #is_emu_half2 = 0*11, EMULATED_FP16, 0*6#
* #is_native_half2 = 0*11, NATIVE_FP16, 0*6#
* #is_float2 = 0*12, 1, 0, 0, 1, 0, 0#
* #is_double2 = 0*13, 1, 0, 0, 1, 0#
* #is_complex2 = 0*15, 1*3#
Expand All @@ -790,8 +794,8 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *

#if !(NPY_USE_UNALIGNED_ACCESS && !@aligned@)

/* For half types, don't use actual double/float types in conversion */
#if @is_half1@ || @is_half2@
/* For emulated half types, don't use actual double/float types in conversion */
#if @is_emu_half1@ || @is_emu_half2@

# if @is_float1@
# define _TYPE1 npy_uint32
Expand All @@ -817,27 +821,27 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
#endif

/* Determine an appropriate casting conversion function */
#if @is_half1@
#if @is_emu_half1@

# if @is_float2@
# define _CONVERT_FN(x) npy_halfbits_to_floatbits(x)
# elif @is_double2@
# define _CONVERT_FN(x) npy_halfbits_to_doublebits(x)
# elif @is_half2@
# elif @is_emu_half2@
# define _CONVERT_FN(x) (x)
# elif @is_bool2@
# define _CONVERT_FN(x) ((npy_bool)!npy_half_iszero(x))
# else
# define _CONVERT_FN(x) ((_TYPE2)npy_half_to_float(x))
# endif

#elif @is_half2@
#elif @is_emu_half2@

# if @is_float1@
# define _CONVERT_FN(x) npy_floatbits_to_halfbits(x)
# elif @is_double1@
# define _CONVERT_FN(x) npy_doublebits_to_halfbits(x)
# elif @is_half1@
# elif @is_emu_half1@
# define _CONVERT_FN(x) (x)
# elif @is_bool1@
# define _CONVERT_FN(x) npy_float_to_half((float)(x!=0))
Expand All @@ -855,7 +859,11 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *

#endif

NPY_IGNORE_FP_EXCEPTIONS_ON
#if (@is_native_half1@ || @is_float1@ || @is_double1@) && \
(@is_native_half2@ || @is_float2@ || @is_double2@)
// Enable Vectorization on Clang for floating point casts
NPY_IGNORE_FP_EXCEPTIONS_ON
#endif
static NPY_GCC_OPT_3 int
@prefix@_cast_@name1@_to_@name2@(
PyArrayMethod_Context *context, char *const *args,
Expand Down Expand Up @@ -949,7 +957,10 @@ static NPY_GCC_OPT_3 int
}
return 0;
}
NPY_IGNORE_FP_EXCEPTIONS_OFF
#if (@is_native_half1@ || @is_float1@ || @is_double1@) && \
(@is_native_half2@ || @is_float2@ || @is_double2@)
NPY_IGNORE_FP_EXCEPTIONS_OFF
#endif

#undef _CONVERT_FN
#undef _TYPE2
Expand Down
Loading
0