BUG: Resolve build issue on ppc64 with Power9 or higher as baseline

seiko2plus · seiko2plus · commit 0e8113764713 · 2023-10-25T23:44:21.000+04:00
This fix addresses two issues:
   * Corrects the use of unsupported instructions by the assembler
     in half-precision to double-precision conversion.
   * Resolves a code error related to variable naming during conversion.
diff --git a/numpy/_core/src/common/half.hpp b/numpy/_core/src/common/half.hpp
@@ -73,11 +73,8 @@ class Half final {
     #if defined(NPY_HAVE_AVX512FP16)
         __m128d md = _mm_load_sd(&f);
         bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(md))));
-    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
-        __vector double vf64 = vec_splats(f);
-        __vector unsigned short vf16;
-        __asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64));
-        bits_ = vec_extract(vf16, 0);
+    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
+        __asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits_) : "wa" (f));
     #else
         bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
     #endif
@@ -96,7 +93,7 @@ class Half final {
         __vector float vf32;
         __asm__ __volatile__("xvcvhpsp %x0,%x1"
                              : "=wa"(vf32)
-                             : "wa"(vec_splats(bits_.u)));
+                             : "wa"(vec_splats(bits_)));
         return vec_extract(vf32, 0);
     #else
         return BitCast<float>(half_private::ToFloatBits(bits_));
@@ -110,12 +107,12 @@ class Half final {
         double ret;
         _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
         return ret;
-    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
-        __vector float vf64;
-        __asm__ __volatile__("xvcvhpdp %x0,%x1"
-                             : "=wa"(vf32)
-                             : "wa"(vec_splats(bits_)));
-        return vec_extract(vf64, 0);
+    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
+        double f64;
+        __asm__ __volatile__("xscvhpdp %x0,%x1"
+                             : "=wa"(f64)
+                             : "wa"(bits_));
+        return f64;
     #else
         return BitCast<double>(half_private::ToDoubleBits(bits_));
     #endif