diff --git a/numpy/core/meson.build b/numpy/core/meson.build index 9aaa5ed8716b..0e34a8242c8b 100644 --- a/numpy/core/meson.build +++ b/numpy/core/meson.build @@ -430,6 +430,7 @@ _numpyconfig_h = configure_file( # ---------------------------- staticlib_cflags = [] +staticlib_cppflags = [] if cc.get_id() == 'msvc' # Disable voltbl section for vc142 to allow link using mingw-w64; see: # https://github.com/matthew-brett/dll_investigation/issues/1#issuecomment-1100468171 @@ -443,6 +444,7 @@ endif # https://mesonbuild.com/Build-options.html#build-options if get_option('disable-simd-optimizations') staticlib_cflags += '-DNPY_DISABLE_OPTIMIZATION' + staticlib_cppflags += '-DNPY_DISABLE_OPTIMIZATION' endif npy_math_internal_h = custom_target( @@ -455,12 +457,13 @@ npymath_sources = [ src_file.process('src/npymath/ieee754.c.src'), src_file.process('src/npymath/npy_math_complex.c.src'), npy_math_internal_h, - 'src/npymath/halffloat.c', + 'src/npymath/halffloat.cpp', 'src/npymath/npy_math.c', ] npymath_lib = static_library('npymath', npymath_sources, c_args: staticlib_cflags, + cpp_args: staticlib_cppflags, include_directories: ['include', 'src/npymath', 'src/common'], dependencies: py_dep, install: true, diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 680c2a5f6033..7e620075b2a0 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -669,7 +669,7 @@ def get_mathlib_info(*args): # join('src', 'npymath', 'ieee754.cpp'), join('src', 'npymath', 'ieee754.c.src'), join('src', 'npymath', 'npy_math_complex.c.src'), - join('src', 'npymath', 'halffloat.c'), + join('src', 'npymath', 'halffloat.cpp'), ] config.add_installed_library('npymath', @@ -727,7 +727,8 @@ def get_mathlib_info(*args): join('src', 'common', 'numpyos.h'), join('src', 'common', 'npy_cpu_dispatch.h'), join('src', 'common', 'simd', 'simd.h'), - ] + join('src', 'common', 'common.hpp'), + ] common_src = [ join('src', 'common', 'array_assign.c'), diff --git a/numpy/core/src/common/common.hpp b/numpy/core/src/common/common.hpp index 47d790bcfeb1..44ba449d8e0e 100644 --- a/numpy/core/src/common/common.hpp +++ b/numpy/core/src/common/common.hpp @@ -4,8 +4,11 @@ * The following C++ headers are safe to be used standalone, however, * they are gathered to make it easy for us and for the future need to support PCH. */ +#include "npdef.hpp" +#include "utils.hpp" #include "npstd.hpp" #include "half.hpp" #include "meta.hpp" +#include "float_status.hpp" #endif // NUMPY_CORE_SRC_COMMON_COMMON_HPP diff --git a/numpy/core/src/common/float_status.hpp b/numpy/core/src/common/float_status.hpp new file mode 100644 index 000000000000..8e4d5e06a59c --- /dev/null +++ b/numpy/core/src/common/float_status.hpp @@ -0,0 +1,134 @@ +#ifndef NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP +#define NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP + +#include "npstd.hpp" + +#include + +namespace np { + +/// @addtogroup cpp_core_utility +/// @{ +/** + * Class wraps floating-point environment operations, + * provides lazy access to its functionality. + */ +class FloatStatus { + public: +/* + * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when + * unsupported. In such cases NumPy will not report these correctly, but we + * should still allow compiling (whether tests pass or not). + * By defining them as 0 locally, we make them no-ops. Unlike these defines, + * for example `musl` still defines all of the functions (as no-ops): + * https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c + * and does similar replacement in its tests: + * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30 + */ +#ifdef FE_DIVBYZERO + static constexpr int kDivideByZero = FE_DIVBYZERO; +#else + static constexpr int kDivideByZero = 0; +#endif +#ifdef FE_INVALID + static constexpr int kInvalid = FE_INVALID; +#else + static constexpr int kInvalid = 0; +#endif +#ifdef FE_INEXACT + static constexpr int kInexact = FE_INEXACT; +#else + static constexpr int kInexact = 0; +#endif +#ifdef FE_OVERFLOW + static constexpr int kOverflow = FE_OVERFLOW; +#else + static constexpr int kOverflow = 0; +#endif +#ifdef FE_UNDERFLOW + static constexpr int kUnderflow = FE_UNDERFLOW; +#else + static constexpr int kUnderflow = 0; +#endif + static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact | + kOverflow | kUnderflow); + + FloatStatus(bool clear_on_dst=true) + : clear_on_dst_(clear_on_dst) + { + if constexpr (kAllExcept != 0) { + fpstatus_ = fetestexcept(kAllExcept); + } + else { + fpstatus_ = 0; + } + } + ~FloatStatus() + { + if constexpr (kAllExcept != 0) { + if (fpstatus_ != 0 && clear_on_dst_) { + feclearexcept(kAllExcept); + } + } + } + constexpr bool IsDivideByZero() const + { + return (fpstatus_ & kDivideByZero) != 0; + } + constexpr bool IsInexact() const + { + return (fpstatus_ & kInexact) != 0; + } + constexpr bool IsInvalid() const + { + return (fpstatus_ & kInvalid) != 0; + } + constexpr bool IsOverFlow() const + { + return (fpstatus_ & kOverflow) != 0; + } + constexpr bool IsUnderFlow() const + { + return (fpstatus_ & kUnderflow) != 0; + } + static void RaiseDivideByZero() + { + if constexpr (kDivideByZero != 0) { + feraiseexcept(kDivideByZero); + } + } + static void RaiseInexact() + { + if constexpr (kInexact != 0) { + feraiseexcept(kInexact); + } + } + static void RaiseInvalid() + { + if constexpr (kInvalid != 0) { + feraiseexcept(kInvalid); + } + } + static void RaiseOverflow() + { + if constexpr (kOverflow != 0) { + feraiseexcept(kOverflow); + } + } + static void RaiseUnderflow() + { + if constexpr (kUnderflow != 0) { + feraiseexcept(kUnderflow); + } + } + + private: + bool clear_on_dst_; + int fpstatus_; +}; + +/// @} cpp_core_utility +} // namespace np + +#endif // NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP + diff --git a/numpy/core/src/common/half.hpp b/numpy/core/src/common/half.hpp index 399f2fa7967e..e5f3f7a4014b 100644 --- a/numpy/core/src/common/half.hpp +++ b/numpy/core/src/common/half.hpp @@ -3,11 +3,14 @@ #include "npstd.hpp" +#include "npy_cpu_dispatch.h" // NPY_HAVE_CPU_FEATURES +#include "half_private.hpp" + // TODO(@seiko2plus): // - covers half-precision operations that being supported by numpy/halffloat.h -// - support __fp16 -// - optimize x86 half<->single via cpu_fp16 -// - optimize ppc64 half<->single via cpu_vsx3 +// - add support for arithmetic operations +// - enables __fp16 causes massive FP exceptions on aarch64, +// needs a deep investigation namespace np { @@ -16,48 +19,246 @@ namespace np { /// Provides a type that implements 16-bit floating point (half-precision). /// This type is ensured to be 16-bit size. +#if 1 // ndef __ARM_FP16_FORMAT_IEEE class Half final { - public: - /// @name Public Constructors - /// @{ + public: + /// Whether `Half` has a full native HW support. + static constexpr bool kNative = false; + /// Whether `Half` has a native HW support for single/double conversion. + template + static constexpr bool kNativeConversion = ( + ( + std::is_same_v && + #if defined(NPY_HAVE_FP16) || defined(NPY_HAVE_VSX3) + true + #else + false + #endif + ) || ( + std::is_same_v && + #if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3) + true + #else + false + #endif + ) + ); /// Default constructor. initialize nothing. Half() = default; - /// Copy. - Half(const Half &r) + + /// Constract from float + /// If there are no hardware optimization available, rounding will always + /// be set to ties to even. + explicit Half(float f) { - data_.u = r.data_.u; + #if defined(NPY_HAVE_FP16) + __m128 mf = _mm_load_ss(&f); + bits_ = static_cast(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT))); + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector float vf32 = vec_splats(f); + __vector unsigned short vf16; + __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32)); + bits_ = vec_extract(vf16, 0); + #else + bits_ = half_private::FromFloatBits(BitCast(f)); + #endif } - /// @} + /// Construct from double. + /// If there are no hardware optimization available, rounding will always + /// be set to ties to even. + explicit Half(double f) + { + #if defined(NPY_HAVE_AVX512FP16) + __m128d md = _mm_load_sd(&f); + bits_ = static_cast(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(mf)))); + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector double vf64 = vec_splats(f); + __vector unsigned short vf16; + __asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64)); + bits_ = vec_extract(vf16, 0); + #else + bits_ = half_private::FromDoubleBits(BitCast(f)); + #endif + } + + /// Cast to float + explicit operator float() const + { + #if defined(NPY_HAVE_FP16) + float ret; + _mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_))); + return ret; + #elif defined(NPY_HAVE_VSX3) && defined(vec_extract_fp_from_shorth) + return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0); + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector float vf32; + __asm__ __volatile__("xvcvhpsp %x0,%x1" + : "=wa"(vf32) + : "wa"(vec_splats(bits_.u))); + return vec_extract(vf32, 0); + #else + return BitCast(half_private::ToFloatBits(bits_)); + #endif + } + + /// Cast to double + explicit operator double() const + { + #if defined(NPY_HAVE_AVX512FP16) + double ret; + _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_)))); + return ret; + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector float vf64; + __asm__ __volatile__("xvcvhpdp %x0,%x1" + : "=wa"(vf32) + : "wa"(vec_splats(bits_))); + return vec_extract(vf64, 0); + #else + return BitCast(half_private::ToDoubleBits(bits_)); + #endif + } /// Returns a new Half constracted from the IEEE 754 binary16. - /// @param b the value of binary16. - static Half FromBits(uint16_t b) + static constexpr Half FromBits(uint16_t bits) { - Half f; - f.data_.u = b; - return f; + Half h{}; + h.bits_ = bits; + return h; } /// Returns the IEEE 754 binary16 representation. - uint16_t Bits() const + constexpr uint16_t Bits() const { - return data_.u; + return bits_; } - private: - union { - uint16_t u; -/* -TODO(@seiko2plus): support __fp16 -#ifdef NPY_HAVE_HW_FP16 - __fp16 f; -#endif -*/ - } data_; + /// @name Comparison operators (orderd) + /// @{ + constexpr bool operator==(Half r) const + { + return !(IsNaN() || r.IsNaN()) && Equal(r); + } + constexpr bool operator<(Half r) const + { + return !(IsNaN() || r.IsNaN()) && Less(r); + } + constexpr bool operator<=(Half r) const + { + return !(IsNaN() || r.IsNaN()) && LessEqual(r); + } + constexpr bool operator>(Half r) const + { + return r < *this; + } + constexpr bool operator>=(Half r) const + { + return r <= *this; + } + /// @} + + /// @name Comparison operators (unorderd) + /// @{ + constexpr bool operator!=(Half r) const + { + return !(*this == r); + } + /// @} Comparison operators + + /// @name Comparison with no guarantee of NaN behavior + /// @{ + constexpr bool Less(Half r) const + { + uint_fast16_t a = static_cast(bits_), + b = static_cast(r.bits_); + bool sign_a = (a & 0x8000u) == 0x8000u; + bool sign_b = (b & 0x8000u) == 0x8000u; + // if both `a` and `b` have same sign + // Test if `a` > `b` when `a` has the sign + // or `a` < `b` when is not. + // And make sure they are not equal to each other + // in case of both are equal to +-0 + // else + // Test if `a` has the sign. + // and `a` != -0.0 and `b` != 0.0 + return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b) + : sign_a && ((a | b) != 0x8000u); + } + constexpr bool LessEqual(Half r) const + { + uint_fast16_t a = static_cast(bits_), + b = static_cast(r.bits_); + bool sign_a = (a & 0x8000u) == 0x8000u; + bool sign_b = (b & 0x8000u) == 0x8000u; + // if both `a` and `b` have same sign + // Test if `a` > `b` when `a` has the sign + // or `a` < `b` when is not. + // or a == b (needed even if we used <= above instead + // since testing +-0 still required) + // else + // Test if `a` has the sign + // or `a` and `b` equal to +-0.0 + return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b) + : sign_a || ((a | b) == 0x8000u); + } + constexpr bool Equal(Half r) const + { + // fast16 cast is not worth it, since unpack op should involved. + uint16_t a = bits_, b = r.bits_; + return a == b || ((a | b) == 0x8000u); + } + /// @} Comparison + + /// @name Properties + // @{ + constexpr bool IsNaN() const + { + return ((bits_ & 0x7c00u) == 0x7c00u) && + ((bits_ & 0x03ffu) != 0); + } + /// @} Properties + + private: + uint16_t bits_; +}; +#else // __ARM_FP16_FORMAT_IEEE +class Half final { + public: + static constexpr bool kNative = true; + template + static constexpr bool kNativeConversion = ( + std::is_same_v || std::is_same_v + ); + Half() = default; + constexpr Half(__fp16 h) : half_(h) + {} + constexpr operator __fp16() const + { return half_; } + static Half FromBits(uint16_t bits) + { + Half h; + h.half_ = BitCast<__fp16>(bits); + return h; + } + uint16_t Bits() const + { return BitCast(half_); } + constexpr bool Less(Half r) const + { return half_ < r.half_; } + constexpr bool LessEqual(Half r) const + { return half_ <= r.half_; } + constexpr bool Equal(Half r) const + { return half_ == r.half_; } + constexpr bool IsNaN() const + { return half_ != half_; } + + private: + __fp16 half_; }; +#endif // __ARM_FP16_FORMAT_IEEE /// @} cpp_core_types } // namespace np + #endif // NUMPY_CORE_SRC_COMMON_HALF_HPP diff --git a/numpy/core/src/common/half_private.hpp b/numpy/core/src/common/half_private.hpp new file mode 100644 index 000000000000..7a64eb397bed --- /dev/null +++ b/numpy/core/src/common/half_private.hpp @@ -0,0 +1,330 @@ +#ifndef NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP +#define NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP + +#include "npstd.hpp" +#include "float_status.hpp" + +/* + * The following functions that emulating float/double/half conversions + * are copied from npymath without any changes to its functionalty. + */ +namespace np { namespace half_private { + +template +inline uint16_t FromFloatBits(uint32_t f) +{ + uint32_t f_exp, f_sig; + uint16_t h_sgn, h_exp, h_sig; + + h_sgn = (uint16_t) ((f&0x80000000u) >> 16); + f_exp = (f&0x7f800000u); + + /* Exponent overflow/NaN converts to signed inf/NaN */ + if (f_exp >= 0x47800000u) { + if (f_exp == 0x7f800000u) { + /* Inf or NaN */ + f_sig = (f&0x007fffffu); + if (f_sig != 0) { + /* NaN - propagate the flag in the significand... */ + uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13)); + /* ...but make sure it stays a NaN */ + if (ret == 0x7c00u) { + ret++; + } + return h_sgn + ret; + } else { + /* signed inf */ + return (uint16_t) (h_sgn + 0x7c00u); + } + } else { + if constexpr (gen_overflow) { + /* overflow to signed inf */ + FloatStatus::RaiseOverflow(); + } + return (uint16_t) (h_sgn + 0x7c00u); + } + } + + /* Exponent underflow converts to a subnormal half or signed zero */ + if (f_exp <= 0x38000000u) { + /* + * Signed zeros, subnormal floats, and floats with small + * exponents all convert to signed zero half-floats. + */ + if (f_exp < 0x33000000u) { + if constexpr (gen_underflow) { + /* If f != 0, it underflowed to 0 */ + if ((f&0x7fffffff) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + return h_sgn; + } + /* Make the subnormal significand */ + f_exp >>= 23; + f_sig = (0x00800000u + (f&0x007fffffu)); + if constexpr (gen_underflow) { + /* If it's not exactly represented, it underflowed */ + if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + /* + * Usually the significand is shifted by 13. For subnormals an + * additional shift needs to occur. This shift is one for the largest + * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which + * offsets the new first bit. At most the shift can be 1+10 bits. + */ + f_sig >>= (113 - f_exp); + /* Handle rounding by adding 1 to the bit beyond half precision */ + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. However, the (113 - f_exp) + * shift can lose up to 11 bits, so the || checks them in the original. + * In all other cases, we can just add one. + */ + if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) { + f_sig += 0x00001000u; + } + } + else { + f_sig += 0x00001000u; + } + h_sig = (uint16_t) (f_sig >> 13); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp from zero to one and h_sig will be zero. + * This is the correct result. + */ + return (uint16_t) (h_sgn + h_sig); + } + + /* Regular case with no overflow or underflow */ + h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13); + /* Handle rounding by adding 1 to the bit beyond half precision */ + f_sig = (f&0x007fffffu); + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((f_sig&0x00003fffu) != 0x00001000u) { + f_sig += 0x00001000u; + } + } + else { + f_sig += 0x00001000u; + } + h_sig = (uint16_t) (f_sig >> 13); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp by one and h_sig will be zero. This is the + * correct result. h_exp may increment to 15, at greatest, in + * which case the result overflows to a signed inf. + */ + if constexpr (gen_overflow) { + h_sig += h_exp; + if (h_sig == 0x7c00u) { + FloatStatus::RaiseOverflow(); + } + return h_sgn + h_sig; + } + else { + return h_sgn + h_exp + h_sig; + } +} + +template +inline uint16_t FromDoubleBits(uint64_t d) +{ + uint64_t d_exp, d_sig; + uint16_t h_sgn, h_exp, h_sig; + + h_sgn = (d&0x8000000000000000ULL) >> 48; + d_exp = (d&0x7ff0000000000000ULL); + + /* Exponent overflow/NaN converts to signed inf/NaN */ + if (d_exp >= 0x40f0000000000000ULL) { + if (d_exp == 0x7ff0000000000000ULL) { + /* Inf or NaN */ + d_sig = (d&0x000fffffffffffffULL); + if (d_sig != 0) { + /* NaN - propagate the flag in the significand... */ + uint16_t ret = (uint16_t) (0x7c00u + (d_sig >> 42)); + /* ...but make sure it stays a NaN */ + if (ret == 0x7c00u) { + ret++; + } + return h_sgn + ret; + } else { + /* signed inf */ + return h_sgn + 0x7c00u; + } + } else { + /* overflow to signed inf */ + if constexpr (gen_overflow) { + FloatStatus::RaiseOverflow(); + } + return h_sgn + 0x7c00u; + } + } + + /* Exponent underflow converts to subnormal half or signed zero */ + if (d_exp <= 0x3f00000000000000ULL) { + /* + * Signed zeros, subnormal floats, and floats with small + * exponents all convert to signed zero half-floats. + */ + if (d_exp < 0x3e60000000000000ULL) { + if constexpr (gen_underflow) { + /* If d != 0, it underflowed to 0 */ + if ((d&0x7fffffffffffffffULL) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + return h_sgn; + } + /* Make the subnormal significand */ + d_exp >>= 52; + d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL)); + if constexpr (gen_underflow) { + /* If it's not exactly represented, it underflowed */ + if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + /* + * Unlike floats, doubles have enough room to shift left to align + * the subnormal significand leading to no loss of the last bits. + * The smallest possible exponent giving a subnormal is: + * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are + * shifted with respect to it. This adds a shift of 10+1 bits the final + * right shift when comparing it to the one in the normal branch. + */ + assert(d_exp - 998 >= 0); + d_sig <<= (d_exp - 998); + /* Handle rounding by adding 1 to the bit beyond half precision */ + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) { + d_sig += 0x0010000000000000ULL; + } + } + else { + d_sig += 0x0010000000000000ULL; + } + h_sig = (uint16_t) (d_sig >> 53); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp from zero to one and h_sig will be zero. + * This is the correct result. + */ + return h_sgn + h_sig; + } + + /* Regular case with no overflow or underflow */ + h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42); + /* Handle rounding by adding 1 to the bit beyond half precision */ + d_sig = (d&0x000fffffffffffffULL); + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) { + d_sig += 0x0000020000000000ULL; + } + } + else { + d_sig += 0x0000020000000000ULL; + } + h_sig = (uint16_t) (d_sig >> 42); + + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp by one and h_sig will be zero. This is the + * correct result. h_exp may increment to 15, at greatest, in + * which case the result overflows to a signed inf. + */ + if constexpr (gen_overflow) { + h_sig += h_exp; + if (h_sig == 0x7c00u) { + FloatStatus::RaiseOverflow(); + } + return h_sgn + h_sig; + } + else { + return h_sgn + h_exp + h_sig; + } +} + +constexpr uint32_t ToFloatBits(uint16_t h) +{ + uint16_t h_exp = (h&0x7c00u); + uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16; + switch (h_exp) { + case 0x0000u: { // 0 or subnormal + uint16_t h_sig = (h&0x03ffu); + // Signed zero + if (h_sig == 0) { + return f_sgn; + } + // Subnormal + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23; + uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13; + return f_sgn + f_exp + f_sig; + } + case 0x7c00u: // inf or NaN + // All-ones exponent and a copy of the significand + return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13); + default: // normalized + // Just need to adjust the exponent and shift + return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13); + } +} + +constexpr uint64_t ToDoubleBits(uint16_t h) +{ + uint16_t h_exp = (h&0x7c00u); + uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48; + switch (h_exp) { + case 0x0000u: { // 0 or subnormal + uint16_t h_sig = (h&0x03ffu); + // Signed zero + if (h_sig == 0) { + return d_sgn; + } + // Subnormal + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52; + uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42; + return d_sgn + d_exp + d_sig; + } + case 0x7c00u: // inf or NaN + // All-ones exponent and a copy of the significand + return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42); + default: // normalized + // Just need to adjust the exponent and shift + return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42); + } +} + +}} // namespace np::half_private +#endif // NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP diff --git a/numpy/core/src/common/npdef.hpp b/numpy/core/src/common/npdef.hpp new file mode 100644 index 000000000000..56a0df52e5da --- /dev/null +++ b/numpy/core/src/common/npdef.hpp @@ -0,0 +1,28 @@ +#ifndef NUMPY_CORE_SRC_COMMON_NPDEF_HPP +#define NUMPY_CORE_SRC_COMMON_NPDEF_HPP + +#if !defined(__cplusplus) || __cplusplus < 201703L + #error "NumPy requires a compiler with at least C++17 enabled" +#endif + +/// @addtogroup cpp_core_defs +/// @{ + +/// Whether compiler supports C++20 +#if __cplusplus > 202002L + #define NP_HAS_CPP20 1 +#else + #define NP_HAS_CPP20 0 +#endif + +/// Wraps `__has_builtin` +#if defined(__has_builtin) + #define NP_HAS_BUILTIN(INTRIN) __has_builtin(INTRIN) +#else + #define NP_HAS_BUILTIN(INTRIN) 0 +#endif + +/// @} cpp_core_defs + +#endif // NUMPY_CORE_SRC_COMMON_NPDEF_HPP + diff --git a/numpy/core/src/common/npstd.hpp b/numpy/core/src/common/npstd.hpp index 71993bd7c8ef..ca664229a87c 100644 --- a/numpy/core/src/common/npstd.hpp +++ b/numpy/core/src/common/npstd.hpp @@ -31,6 +31,8 @@ using std::int64_t; using std::uintptr_t; using std::intptr_t; using std::complex; +using std::uint_fast16_t; +using std::uint_fast32_t; /** Guard for long double. * diff --git a/numpy/core/src/common/utils.hpp b/numpy/core/src/common/utils.hpp new file mode 100644 index 000000000000..f847cab44832 --- /dev/null +++ b/numpy/core/src/common/utils.hpp @@ -0,0 +1,51 @@ +#ifndef NUMPY_CORE_SRC_COMMON_UTILS_HPP +#define NUMPY_CORE_SRC_COMMON_UTILS_HPP + +#include "npdef.hpp" + +#if NP_HAS_CPP20 + #include +#endif + +#include +#include + +namespace np { + +/** Create a value of type `To` from the bits of `from`. + * + * similar to `std::bit_cast` but compatible with C++17, + * should perform similar to `*reinterpret_cast(&from)` + * or through punning without expecting any undefined behaviors. + */ +template +#if NP_HAS_BUILTIN(__builtin_bit_cast) || NP_HAS_CPP20 +[[nodiscard]] constexpr +#else +inline +#endif +To BitCast(const From &from) noexcept +{ + static_assert( + sizeof(To) == sizeof(From), + "both data types must have the same size"); + + static_assert( + std::is_trivially_copyable_v && + std::is_trivially_copyable_v, + "both data types must be trivially copyable"); + +#if NP_HAS_CPP20 + return std::bit_cast(from); +#elif NP_HAS_BUILTIN(__builtin_bit_cast) + return __builtin_bit_cast(To, from); +#else + To to; + memcpy(&to, &from, sizeof(from)); + return to; +#endif +} + +} // namespace np +#endif // NUMPY_CORE_SRC_COMMON_UTILS_HPP + diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c deleted file mode 100644 index 51948c736276..000000000000 --- a/numpy/core/src/npymath/halffloat.c +++ /dev/null @@ -1,555 +0,0 @@ -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "numpy/halffloat.h" - -/* - * This chooses between 'ties to even' and 'ties away from zero'. - */ -#define NPY_HALF_ROUND_TIES_TO_EVEN 1 -/* - * If these are 1, the conversions try to trigger underflow, - * overflow, and invalid exceptions in the FP system when needed. - */ -#define NPY_HALF_GENERATE_OVERFLOW 1 -#define NPY_HALF_GENERATE_UNDERFLOW 1 -#define NPY_HALF_GENERATE_INVALID 1 - -/* - ******************************************************************** - * HALF-PRECISION ROUTINES * - ******************************************************************** - */ - -float npy_half_to_float(npy_half h) -{ - union { float ret; npy_uint32 retbits; } conv; - conv.retbits = npy_halfbits_to_floatbits(h); - return conv.ret; -} - -double npy_half_to_double(npy_half h) -{ - union { double ret; npy_uint64 retbits; } conv; - conv.retbits = npy_halfbits_to_doublebits(h); - return conv.ret; -} - -npy_half npy_float_to_half(float f) -{ - union { float f; npy_uint32 fbits; } conv; - conv.f = f; - return npy_floatbits_to_halfbits(conv.fbits); -} - -npy_half npy_double_to_half(double d) -{ - union { double d; npy_uint64 dbits; } conv; - conv.d = d; - return npy_doublebits_to_halfbits(conv.dbits); -} - -int npy_half_iszero(npy_half h) -{ - return (h&0x7fff) == 0; -} - -int npy_half_isnan(npy_half h) -{ - return ((h&0x7c00u) == 0x7c00u) && ((h&0x03ffu) != 0x0000u); -} - -int npy_half_isinf(npy_half h) -{ - return ((h&0x7fffu) == 0x7c00u); -} - -int npy_half_isfinite(npy_half h) -{ - return ((h&0x7c00u) != 0x7c00u); -} - -int npy_half_signbit(npy_half h) -{ - return (h&0x8000u) != 0; -} - -npy_half npy_half_spacing(npy_half h) -{ - npy_half ret; - npy_uint16 h_exp = h&0x7c00u; - npy_uint16 h_sig = h&0x03ffu; - if (h_exp == 0x7c00u) { -#if NPY_HALF_GENERATE_INVALID - npy_set_floatstatus_invalid(); -#endif - ret = NPY_HALF_NAN; - } else if (h == 0x7bffu) { -#if NPY_HALF_GENERATE_OVERFLOW - npy_set_floatstatus_overflow(); -#endif - ret = NPY_HALF_PINF; - } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */ - if (h_exp > 0x2c00u) { /* If result is normalized */ - ret = h_exp - 0x2c00u; - } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ - ret = 1 << ((h_exp >> 10) - 2); - } else { - ret = 0x0001u; /* Smallest subnormal half */ - } - } else if (h_exp > 0x2800u) { /* If result is still normalized */ - ret = h_exp - 0x2800u; - } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ - ret = 1 << ((h_exp >> 10) - 1); - } else { - ret = 0x0001u; - } - - return ret; -} - -npy_half npy_half_copysign(npy_half x, npy_half y) -{ - return (x&0x7fffu) | (y&0x8000u); -} - -npy_half npy_half_nextafter(npy_half x, npy_half y) -{ - npy_half ret; - - if (npy_half_isnan(x) || npy_half_isnan(y)) { - ret = NPY_HALF_NAN; - } else if (npy_half_eq_nonan(x, y)) { - ret = x; - } else if (npy_half_iszero(x)) { - ret = (y&0x8000u) + 1; /* Smallest subnormal half */ - } else if (!(x&0x8000u)) { /* x > 0 */ - if ((npy_int16)x > (npy_int16)y) { /* x > y */ - ret = x-1; - } else { - ret = x+1; - } - } else { - if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */ - ret = x-1; - } else { - ret = x+1; - } - } -#if NPY_HALF_GENERATE_OVERFLOW - if (npy_half_isinf(ret) && npy_half_isfinite(x)) { - npy_set_floatstatus_overflow(); - } -#endif - - return ret; -} - -int npy_half_eq_nonan(npy_half h1, npy_half h2) -{ - return (h1 == h2 || ((h1 | h2) & 0x7fff) == 0); -} - -int npy_half_eq(npy_half h1, npy_half h2) -{ - /* - * The equality cases are as follows: - * - If either value is NaN, never equal. - * - If the values are equal, equal. - * - If the values are both signed zeros, equal. - */ - return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && - (h1 == h2 || ((h1 | h2) & 0x7fff) == 0); -} - -int npy_half_ne(npy_half h1, npy_half h2) -{ - return !npy_half_eq(h1, h2); -} - -int npy_half_lt_nonan(npy_half h1, npy_half h2) -{ - if (h1&0x8000u) { - if (h2&0x8000u) { - return (h1&0x7fffu) > (h2&0x7fffu); - } else { - /* Signed zeros are equal, have to check for it */ - return (h1 != 0x8000u) || (h2 != 0x0000u); - } - } else { - if (h2&0x8000u) { - return 0; - } else { - return (h1&0x7fffu) < (h2&0x7fffu); - } - } -} - -int npy_half_lt(npy_half h1, npy_half h2) -{ - return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_lt_nonan(h1, h2); -} - -int npy_half_gt(npy_half h1, npy_half h2) -{ - return npy_half_lt(h2, h1); -} - -int npy_half_le_nonan(npy_half h1, npy_half h2) -{ - if (h1&0x8000u) { - if (h2&0x8000u) { - return (h1&0x7fffu) >= (h2&0x7fffu); - } else { - return 1; - } - } else { - if (h2&0x8000u) { - /* Signed zeros are equal, have to check for it */ - return (h1 == 0x0000u) && (h2 == 0x8000u); - } else { - return (h1&0x7fffu) <= (h2&0x7fffu); - } - } -} - -int npy_half_le(npy_half h1, npy_half h2) -{ - return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_le_nonan(h1, h2); -} - -int npy_half_ge(npy_half h1, npy_half h2) -{ - return npy_half_le(h2, h1); -} - -npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus) -{ - float fh1 = npy_half_to_float(h1); - float fh2 = npy_half_to_float(h2); - float div, mod; - - div = npy_divmodf(fh1, fh2, &mod); - *modulus = npy_float_to_half(mod); - return npy_float_to_half(div); -} - - - -/* - ******************************************************************** - * BIT-LEVEL CONVERSIONS * - ******************************************************************** - */ - -npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f) -{ - npy_uint32 f_exp, f_sig; - npy_uint16 h_sgn, h_exp, h_sig; - - h_sgn = (npy_uint16) ((f&0x80000000u) >> 16); - f_exp = (f&0x7f800000u); - - /* Exponent overflow/NaN converts to signed inf/NaN */ - if (f_exp >= 0x47800000u) { - if (f_exp == 0x7f800000u) { - /* Inf or NaN */ - f_sig = (f&0x007fffffu); - if (f_sig != 0) { - /* NaN - propagate the flag in the significand... */ - npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13)); - /* ...but make sure it stays a NaN */ - if (ret == 0x7c00u) { - ret++; - } - return h_sgn + ret; - } else { - /* signed inf */ - return (npy_uint16) (h_sgn + 0x7c00u); - } - } else { - /* overflow to signed inf */ -#if NPY_HALF_GENERATE_OVERFLOW - npy_set_floatstatus_overflow(); -#endif - return (npy_uint16) (h_sgn + 0x7c00u); - } - } - - /* Exponent underflow converts to a subnormal half or signed zero */ - if (f_exp <= 0x38000000u) { - /* - * Signed zeros, subnormal floats, and floats with small - * exponents all convert to signed zero half-floats. - */ - if (f_exp < 0x33000000u) { -#if NPY_HALF_GENERATE_UNDERFLOW - /* If f != 0, it underflowed to 0 */ - if ((f&0x7fffffff) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - return h_sgn; - } - /* Make the subnormal significand */ - f_exp >>= 23; - f_sig = (0x00800000u + (f&0x007fffffu)); -#if NPY_HALF_GENERATE_UNDERFLOW - /* If it's not exactly represented, it underflowed */ - if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - /* - * Usually the significand is shifted by 13. For subnormals an - * additional shift needs to occur. This shift is one for the largest - * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which - * offsets the new first bit. At most the shift can be 1+10 bits. - */ - f_sig >>= (113 - f_exp); - /* Handle rounding by adding 1 to the bit beyond half precision */ -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. However, the (113 - f_exp) - * shift can lose up to 11 bits, so the || checks them in the original. - * In all other cases, we can just add one. - */ - if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) { - f_sig += 0x00001000u; - } -#else - f_sig += 0x00001000u; -#endif - h_sig = (npy_uint16) (f_sig >> 13); - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp from zero to one and h_sig will be zero. - * This is the correct result. - */ - return (npy_uint16) (h_sgn + h_sig); - } - - /* Regular case with no overflow or underflow */ - h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13); - /* Handle rounding by adding 1 to the bit beyond half precision */ - f_sig = (f&0x007fffffu); -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. In all other cases, we do. - */ - if ((f_sig&0x00003fffu) != 0x00001000u) { - f_sig += 0x00001000u; - } -#else - f_sig += 0x00001000u; -#endif - h_sig = (npy_uint16) (f_sig >> 13); - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp by one and h_sig will be zero. This is the - * correct result. h_exp may increment to 15, at greatest, in - * which case the result overflows to a signed inf. - */ -#if NPY_HALF_GENERATE_OVERFLOW - h_sig += h_exp; - if (h_sig == 0x7c00u) { - npy_set_floatstatus_overflow(); - } - return h_sgn + h_sig; -#else - return h_sgn + h_exp + h_sig; -#endif -} - -npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d) -{ - npy_uint64 d_exp, d_sig; - npy_uint16 h_sgn, h_exp, h_sig; - - h_sgn = (d&0x8000000000000000ULL) >> 48; - d_exp = (d&0x7ff0000000000000ULL); - - /* Exponent overflow/NaN converts to signed inf/NaN */ - if (d_exp >= 0x40f0000000000000ULL) { - if (d_exp == 0x7ff0000000000000ULL) { - /* Inf or NaN */ - d_sig = (d&0x000fffffffffffffULL); - if (d_sig != 0) { - /* NaN - propagate the flag in the significand... */ - npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42)); - /* ...but make sure it stays a NaN */ - if (ret == 0x7c00u) { - ret++; - } - return h_sgn + ret; - } else { - /* signed inf */ - return h_sgn + 0x7c00u; - } - } else { - /* overflow to signed inf */ -#if NPY_HALF_GENERATE_OVERFLOW - npy_set_floatstatus_overflow(); -#endif - return h_sgn + 0x7c00u; - } - } - - /* Exponent underflow converts to subnormal half or signed zero */ - if (d_exp <= 0x3f00000000000000ULL) { - /* - * Signed zeros, subnormal floats, and floats with small - * exponents all convert to signed zero half-floats. - */ - if (d_exp < 0x3e60000000000000ULL) { -#if NPY_HALF_GENERATE_UNDERFLOW - /* If d != 0, it underflowed to 0 */ - if ((d&0x7fffffffffffffffULL) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - return h_sgn; - } - /* Make the subnormal significand */ - d_exp >>= 52; - d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL)); -#if NPY_HALF_GENERATE_UNDERFLOW - /* If it's not exactly represented, it underflowed */ - if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - /* - * Unlike floats, doubles have enough room to shift left to align - * the subnormal significand leading to no loss of the last bits. - * The smallest possible exponent giving a subnormal is: - * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are - * shifted with respect to it. This adds a shift of 10+1 bits the final - * right shift when comparing it to the one in the normal branch. - */ - assert(d_exp - 998 >= 0); - d_sig <<= (d_exp - 998); - /* Handle rounding by adding 1 to the bit beyond half precision */ -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. In all other cases, we do. - */ - if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) { - d_sig += 0x0010000000000000ULL; - } -#else - d_sig += 0x0010000000000000ULL; -#endif - h_sig = (npy_uint16) (d_sig >> 53); - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp from zero to one and h_sig will be zero. - * This is the correct result. - */ - return h_sgn + h_sig; - } - - /* Regular case with no overflow or underflow */ - h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42); - /* Handle rounding by adding 1 to the bit beyond half precision */ - d_sig = (d&0x000fffffffffffffULL); -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. In all other cases, we do. - */ - if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) { - d_sig += 0x0000020000000000ULL; - } -#else - d_sig += 0x0000020000000000ULL; -#endif - h_sig = (npy_uint16) (d_sig >> 42); - - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp by one and h_sig will be zero. This is the - * correct result. h_exp may increment to 15, at greatest, in - * which case the result overflows to a signed inf. - */ -#if NPY_HALF_GENERATE_OVERFLOW - h_sig += h_exp; - if (h_sig == 0x7c00u) { - npy_set_floatstatus_overflow(); - } - return h_sgn + h_sig; -#else - return h_sgn + h_exp + h_sig; -#endif -} - -npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h) -{ - npy_uint16 h_exp, h_sig; - npy_uint32 f_sgn, f_exp, f_sig; - - h_exp = (h&0x7c00u); - f_sgn = ((npy_uint32)h&0x8000u) << 16; - switch (h_exp) { - case 0x0000u: /* 0 or subnormal */ - h_sig = (h&0x03ffu); - /* Signed zero */ - if (h_sig == 0) { - return f_sgn; - } - /* Subnormal */ - h_sig <<= 1; - while ((h_sig&0x0400u) == 0) { - h_sig <<= 1; - h_exp++; - } - f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23; - f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13; - return f_sgn + f_exp + f_sig; - case 0x7c00u: /* inf or NaN */ - /* All-ones exponent and a copy of the significand */ - return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13); - default: /* normalized */ - /* Just need to adjust the exponent and shift */ - return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13); - } -} - -npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h) -{ - npy_uint16 h_exp, h_sig; - npy_uint64 d_sgn, d_exp, d_sig; - - h_exp = (h&0x7c00u); - d_sgn = ((npy_uint64)h&0x8000u) << 48; - switch (h_exp) { - case 0x0000u: /* 0 or subnormal */ - h_sig = (h&0x03ffu); - /* Signed zero */ - if (h_sig == 0) { - return d_sgn; - } - /* Subnormal */ - h_sig <<= 1; - while ((h_sig&0x0400u) == 0) { - h_sig <<= 1; - h_exp++; - } - d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52; - d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42; - return d_sgn + d_exp + d_sig; - case 0x7c00u: /* inf or NaN */ - /* All-ones exponent and a copy of the significand */ - return d_sgn + 0x7ff0000000000000ULL + - (((npy_uint64)(h&0x03ffu)) << 42); - default: /* normalized */ - /* Just need to adjust the exponent and shift */ - return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42); - } -} diff --git a/numpy/core/src/npymath/halffloat.cpp b/numpy/core/src/npymath/halffloat.cpp new file mode 100644 index 000000000000..aa582c1b9517 --- /dev/null +++ b/numpy/core/src/npymath/halffloat.cpp @@ -0,0 +1,238 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +/* + * If these are 1, the conversions try to trigger underflow, + * overflow, and invalid exceptions in the FP system when needed. + */ +#define NPY_HALF_GENERATE_OVERFLOW 1 +#define NPY_HALF_GENERATE_INVALID 1 + +#include "numpy/halffloat.h" + +#include "common.hpp" +/* + ******************************************************************** + * HALF-PRECISION ROUTINES * + ******************************************************************** + */ +using namespace np; + +float npy_half_to_float(npy_half h) +{ + return static_cast(Half::FromBits(h)); +} + +double npy_half_to_double(npy_half h) +{ + return static_cast(Half::FromBits(h)); +} + +npy_half npy_float_to_half(float f) +{ + return Half(f).Bits(); +} + +npy_half npy_double_to_half(double d) +{ + return Half(d).Bits(); +} + +int npy_half_iszero(npy_half h) +{ + return (h&0x7fff) == 0; +} + +int npy_half_isnan(npy_half h) +{ + return Half::FromBits(h).IsNaN(); +} + +int npy_half_isinf(npy_half h) +{ + return ((h&0x7fffu) == 0x7c00u); +} + +int npy_half_isfinite(npy_half h) +{ + return ((h&0x7c00u) != 0x7c00u); +} + +int npy_half_signbit(npy_half h) +{ + return (h&0x8000u) != 0; +} + +npy_half npy_half_spacing(npy_half h) +{ + npy_half ret; + npy_uint16 h_exp = h&0x7c00u; + npy_uint16 h_sig = h&0x03ffu; + if (h_exp == 0x7c00u) { +#if NPY_HALF_GENERATE_INVALID + npy_set_floatstatus_invalid(); +#endif + ret = NPY_HALF_NAN; + } else if (h == 0x7bffu) { +#if NPY_HALF_GENERATE_OVERFLOW + npy_set_floatstatus_overflow(); +#endif + ret = NPY_HALF_PINF; + } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */ + if (h_exp > 0x2c00u) { /* If result is normalized */ + ret = h_exp - 0x2c00u; + } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ + ret = 1 << ((h_exp >> 10) - 2); + } else { + ret = 0x0001u; /* Smallest subnormal half */ + } + } else if (h_exp > 0x2800u) { /* If result is still normalized */ + ret = h_exp - 0x2800u; + } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ + ret = 1 << ((h_exp >> 10) - 1); + } else { + ret = 0x0001u; + } + + return ret; +} + +npy_half npy_half_copysign(npy_half x, npy_half y) +{ + return (x&0x7fffu) | (y&0x8000u); +} + +npy_half npy_half_nextafter(npy_half x, npy_half y) +{ + npy_half ret; + + if (npy_half_isnan(x) || npy_half_isnan(y)) { + ret = NPY_HALF_NAN; + } else if (npy_half_eq_nonan(x, y)) { + ret = x; + } else if (npy_half_iszero(x)) { + ret = (y&0x8000u) + 1; /* Smallest subnormal half */ + } else if (!(x&0x8000u)) { /* x > 0 */ + if ((npy_int16)x > (npy_int16)y) { /* x > y */ + ret = x-1; + } else { + ret = x+1; + } + } else { + if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */ + ret = x-1; + } else { + ret = x+1; + } + } +#if NPY_HALF_GENERATE_OVERFLOW + if (npy_half_isinf(ret) && npy_half_isfinite(x)) { + npy_set_floatstatus_overflow(); + } +#endif + + return ret; +} + +int npy_half_eq_nonan(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1).Equal(Half::FromBits(h2)); +} + +int npy_half_eq(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) == Half::FromBits(h2); +} + +int npy_half_ne(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) != Half::FromBits(h2); +} + +int npy_half_lt_nonan(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1).Less(Half::FromBits(h2)); +} + +int npy_half_lt(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) < Half::FromBits(h2); +} + +int npy_half_gt(npy_half h1, npy_half h2) +{ + return npy_half_lt(h2, h1); +} + +int npy_half_le_nonan(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1).LessEqual(Half::FromBits(h2)); +} + +int npy_half_le(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) <= Half::FromBits(h2); +} + +int npy_half_ge(npy_half h1, npy_half h2) +{ + return npy_half_le(h2, h1); +} + +npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus) +{ + float fh1 = npy_half_to_float(h1); + float fh2 = npy_half_to_float(h2); + float div, mod; + + div = npy_divmodf(fh1, fh2, &mod); + *modulus = npy_float_to_half(mod); + return npy_float_to_half(div); +} + + +/* + ******************************************************************** + * BIT-LEVEL CONVERSIONS * + ******************************************************************** + */ + +npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f) +{ + if constexpr (Half::kNativeConversion) { + return BitCast(Half(BitCast(f))); + } + else { + return half_private::FromFloatBits(f); + } +} + +npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d) +{ + if constexpr (Half::kNativeConversion) { + return BitCast(Half(BitCast(d))); + } + else { + return half_private::FromDoubleBits(d); + } +} + +npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h) +{ + if constexpr (Half::kNativeConversion) { + return BitCast(static_cast(Half::FromBits(h))); + } + else { + return half_private::ToFloatBits(h); + } +} + +npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h) +{ + if constexpr (Half::kNativeConversion) { + return BitCast(static_cast(Half::FromBits(h))); + } + else { + return half_private::ToDoubleBits(h); + } +} +