8000 Work around gcc bug for double->half casts · numpy/numpy@de229c7 · GitHub
[go: up one dir, main page]

Skip to content

Commit de229c7

Browse files
committed
Work around gcc bug for double->half casts
1 parent 559dc78 commit de229c7

File tree

1 file changed

+15
-3
lines changed

1 file changed

+15
-3
lines changed

numpy/_core/src/multiarray/lowlevel_strided_loops.c.src

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,10 +708,10 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
708708

709709
/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/
710710

711-
#if defined(__ARM_FP16_FORMAT_IEEE)
711+
#if defined(NPY_HAVE_NEON_FP16)
712712
#define EMULATED_FP16 0
713713
#define NATIVE_FP16 1
714-
typedef __fp16 _npy_half;
714+
typedef _Float16 _npy_half;
715715
#else
716716
#define EMULATED_FP16 1
717717
#define NATIVE_FP16 0
@@ -862,7 +862,18 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
862862
#endif
863863
#endif
864864

865-
static NPY_GCC_OPT_3 int
865+
// Work around GCC bug for double->half casts. For SVE and
866+
// OPT_LEVEL > 1, it implements this as double->single->half
867+
// which is incorrect as it introduces double rounding with
868+
// narrowing casts.
869+
#if (@is_double1@ && @is_native_half2@) && \
870+
defined(NPY_HAVE_SVE) && defined(__GNUC__)
871+
#define GCC_CAST_OPT_LEVEL __attribute__((optimize("O1")))
872+
#else
873+
#define GCC_CAST_OPT_LEVEL NPY_GCC_OPT_3
874+
#endif
875+
876+
static GCC_CAST_OPT_LEVEL int
866877
@prefix@_cast_@name1@_to_@name2@(
867878
PyArrayMethod_Context *context, char *const *args,
868879
const npy_intp *dimensions, const npy_intp *strides,
@@ -966,6 +977,7 @@ static NPY_GCC_OPT_3 int
966977
#endif
967978
#endif
968979

980+
#undef GCC_CAST_OPT_LEVEL
969981
#undef _CONVERT_FN
970982
#undef _TYPE2
971983
#undef _TYPE1

0 commit comments

Comments
 (0)
0