SIMD, TST: add test cases for NPYV fast integer division

numpy · mattip · Mar 8, 2021 · Jan 16, 2021 · Jan 16, 2021 · Jan 16, 2021
commit 6c94b4c2c7d48acf08a0f4d2d5844f7d7f3669de
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -15,8 +15,8 @@
 /**begin repeat
  * #sfx       = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
  * #bsfx      = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
-     * #esfx      = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64#
- * #expand_sup =1,  0,  1,   0,   0,   0,   0,   0,   0,   0#
+ * #esfx      = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
+ * #expand_sup= 1,  0,  1,   0,   0,   0,   0,   0,   0,   0#
  * #simd_sup  = 1,  1,  1,   1,   1,   1,   1,   1,   1,   NPY_SIMD_F64#
  * #fp_only   = 0,  0,  0,   0,   0,   0,   0,   0,   1,   1#
  * #sat_sup   = 1,  1,  1,   1,   0,   0,   0,   0,   0,   0#
@@ -27,6 +27,7 @@
  * #sum_sup   = 0,  0,  0,   0,   1,   0,   1,   0,   1,   1#
  * #rev64_sup = 1,  1,  1,   1,   1,   1,   0,   0,   1,   0#
  * #ncont_sup = 0,  0,  0,   0,   1,   1,   1,   1,   1,   1#
+ * #intdiv_sup= 1,  1,  1,   1,   1,   1,   1,   1,   0,   0#
  * #shl_imm   = 0,  0,  15,  15,  31,  31,  63,  63,  0,   0#
  * #shr_imm   = 0,  0,  16,  16,  32,  32,  64,  64,  0,   0#
  */
@@ -354,6 +355,11 @@ SIMD_IMPL_INTRIN_2(mul_@sfx@, v@sfx@, v@sfx@, v@sfx@)
 SIMD_IMPL_INTRIN_2(div_@sfx@, v@sfx@, v@sfx@, v@sfx@)
 #endif // div_sup
 
+#if @intdiv_sup@
+SIMD_IMPL_INTRIN_1(divisor_@sfx@, v@sfx@x3, @sfx@)
+SIMD_IMPL_INTRIN_2(divc_@sfx@, v@sfx@, v@sfx@, v@sfx@x3)
+#endif // intdiv_sup
+
 #if @fused_sup@
 /**begin repeat1
  * #intrin  = muladd, mulsub, nmuladd, nmulsub#
@@ -442,14 +448,15 @@ SIMD_IMPL_INTRIN_1(not_@bsfx@, v@bsfx@, v@bsfx@)
 SIMD_IMPL_INTRIN_1(tobits_@bsfx@, u64, v@bsfx@)
 /**end repeat**/
 
+
 //#########################################################################
 //## Attach module functions
 //#########################################################################
 static PyMethodDef simd__intrinsics_methods[] = {
 /**begin repeat
  * #sfx       = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
  * #bsfx      = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
- * #esfx      = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64#
+ * #esfx      = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
  * #expand_sup =1,  0,  1,   0,   0,   0,   0,   0,   0,   0#
  * #simd_sup  = 1,  1,  1,   1,   1,   1,   1,   1,   1,   NPY_SIMD_F64#
  * #fp_only   = 0,  0,  0,   0,   0,   0,   0,   0,   1,   1#
@@ -461,6 +468,7 @@ static PyMethodDef simd__intrinsics_methods[] = {
  * #sum_sup   = 0,  0,  0,   0,   1,   0,   1,   0,   1,   1#
  * #rev64_sup = 1,  1,  1,   1,   1,   1,   0,   0,   1,   0#
  * #ncont_sup = 0,  0,  0,   0,   1,   1,   1,   1,   1,   1#
+ * #intdiv_sup= 1,  1,  1,   1,   1,   1,   1,   1,   0,   0#
  * #shl_imm   = 0,  0,  15,  15,  31,  31,  63,  63,  0,   0#
  * #shr_imm   = 0,  0,  16,  16,  32,  32,  64,  64,  0,   0#
  */
@@ -568,6 +576,11 @@ SIMD_INTRIN_DEF(mul_@sfx@)
 SIMD_INTRIN_DEF(div_@sfx@)
 #endif // div_sup
 
+#if @intdiv_sup@
+SIMD_INTRIN_DEF(divisor_@sfx@)
+SIMD_INTRIN_DEF(divc_@sfx@)
+#endif // intdiv_sup
+
 #if @fused_sup@
 /**begin repeat1
  * #intrin  = muladd, mulsub, nmuladd, nmulsub#

diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
@@ -733,6 +733,76 @@ def test_arithmetic_div(self):
         div = self.div(vdata_a, vdata_b)
         assert div == data_div
 
+    def test_arithmetic_intdiv(self):
+        """
+        Test integer division intrinics:
+            npyv_divisor_##sfx
+            npyv_divc_##sfx
+        """
+        if self._is_fp():
+            return
+
+        def trunc_div(a, d):
+            """
+            Divide towards zero works with large integers > 2^53,
+            equivalent to int(a/d)
+            """
+            sign_a, sign_d = a < 0, d < 0
+            if a == 0 or sign_a == sign_d:
+                return a // d
+            return (a + sign_d - sign_a) // d + 1
+
+        int_min = self._int_min() if self._is_signed() else 1
+        int_max = self._int_max()
+        rdata = (
+            0, 1, self.nlanes, int_max-self.nlanes,
+            int_min, int_min//2 + 1
+        )
+        divisors = (1, 2, self.nlanes, int_min, int_max, int_max//2)
+
+        for x, d in zip(rdata, divisors):
+            data = self._data(x)
+            vdata = self.load(data)
+            data_divc = [trunc_div(a, d) for a in data]
+            divisor = self.divisor(d)
+            divc = self.divc(vdata, divisor)
+            assert divc == data_divc
+
+        if not self._is_signed():
+            return
+
+        safe_neg = lambda x: -x-1 if -x > int_max else -x
+        # test round divison for signed integers
+        for x, d in zip(rdata, divisors):
+            d_neg = safe_neg(d)
+            data = self._data(x)
+            data_neg = [safe_neg(a) for a in data]
+            vdata = self.load(data)
+            vdata_neg = self.load(data_neg)
+            divisor = self.divisor(d)
+            divisor_neg = self.divisor(d_neg)
+
+            # round towards zero
+            data_divc = [trunc_div(a, d_neg) for a in data]
+            divc = self.divc(vdata, divisor_neg)
+            assert divc == data_divc
+            data_divc = [trunc_div(a, d) for a in data_neg]
+            divc = self.divc(vdata_neg, divisor)
+            assert divc == data_divc
+
+        # test truncate sign if the dividend is zero
+        vzero = self.zero()
+        for d in (-1, -10, -100, int_min//2, int_min):
+            divisor = self.divisor(d)
+            divc = self.divc(vzero, divisor)
+            assert divc == vzero
+
+        # test overflow
+        vmin = self.setall(int_min)
+        divisor = self.divisor(-1)
+        divc = self.divc(vmin, divisor)
+        assert divc == vmin
+
     def test_arithmetic_reduce_sum(self):
         """
         Test reduce sum intrinics: