@@ -708,10 +708,10 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
708
708
709
709
/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/
710
710
711
- #if defined(__ARM_FP16_FORMAT_IEEE )
711
+ #if defined(NPY_HAVE_NEON_FP16 )
712
712
#define EMULATED_FP16 0
713
713
#define NATIVE_FP16 1
714
- typedef __fp16 _npy_half;
714
+ typedef _Float16 _npy_half;
715
715
#else
716
716
#define EMULATED_FP16 1
717
717
#define NATIVE_FP16 0
@@ -862,7 +862,18 @@ NPY_NO_EXPORT PyArrayMethod_StridedLoop *
862
862
#endif
863
863
#endif
864
864
865
- static NPY_GCC_OPT_3 int
865
+ // Work around GCC bug for double->half casts. For SVE and
866
+ // OPT_LEVEL > 1, it implements this as double->single->half
867
+ // which is incorrect as it introduces double rounding with
868
+ // narrowing casts.
869
+ #if (@is_double1@ && @is_native_half2@) && \
870
+ defined(NPY_HAVE_SVE) && defined(__GNUC__)
871
+ #define GCC_CAST_OPT_LEVEL __attribute__((optimize("O1")))
872
+ #else
873
+ #define GCC_CAST_OPT_LEVEL NPY_GCC_OPT_3
874
+ #endif
875
+
876
+ static GCC_CAST_OPT_LEVEL int
866
877
@prefix@_cast_@name1@_to_@name2@(
867
878
PyArrayMethod_Context *context, char *const *args,
868
879
const npy_intp *dimensions, const npy_intp *strides,
@@ -966,6 +977,7 @@ static NPY_GCC_OPT_3 int
966
977
#endif
967
978
#endif
968
979
980
+ #undef GCC_CAST_OPT_LEVEL
969
981
#undef _CONVERT_FN
970
982
#undef _TYPE2
971
983
#undef _TYPE1
0 commit comments