8000 ENH: Improve Floating Point Cast Performance on ARM by f2013519 · Pull Request #28769 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Improve Floating Point Cast Performance on ARM #28769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 29, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Work around gcc bug for double->half casts
  • Loading branch information
f2013519 committed Apr 24, 2025
commit de229c76a90cac3d92f49e23f63c2a967238900f
18 changes: 15 additions & 3 deletions numpy/_core/src/multiarray/lowlevel_strided_loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -708,10 +708,10 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *

/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/

#if defined(__ARM_FP16_FORMAT_IEEE)
#if defined(NPY_HAVE_NEON_FP16)
#define EMULATED_FP16 0
#define NATIVE_FP16 1
typedef __fp16 _npy_half;
typedef _Float16 _npy_half;
#else
#define EMULATED_FP16 1
#define NATIVE_FP16 0
Expand Down Expand Up @@ -862,7 +862,18 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
#endif
#endif

static NPY_GCC_OPT_3 int
// Work around GCC bug for double->half casts. For SVE and
// OPT_LEVEL > 1, it implements this as double->single->half
// which is incorrect as it introduces double rounding with
// narrowing casts.
#if (@is_double1@ && @is_native_half2@) && \
defined(NPY_HAVE_SVE) && defined(__GNUC__)
#define GCC_CAST_OPT_LEVEL __attribute__((optimize("O1")))
#else
#define GCC_CAST_OPT_LEVEL NPY_GCC_OPT_3
#endif

static GCC_CAST_OPT_LEVEL int
@prefix@_cast_@name1@_to_@name2@(
PyArrayMethod_Context *context, char *const *args,
const npy_intp *dimensions, const npy_intp *strides,
Expand Down Expand Up @@ -966,6 +977,7 @@ static NPY_GCC_OPT_3 int
#endif
#endif

#undef GCC_CAST_OPT_LEVEL
#undef _CONVERT_FN
#undef _TYPE2
#undef _TYPE1
Expand Down
Loading
0