8000 SIMD, TST: add test cases for NPYV fast integer division · numpy/numpy@6c94b4c · GitHub
[go: up one dir, main page]

Skip to content

Commit 6c94b4c

Browse files
committed
SIMD, TST: add test cases for NPYV fast integer division
1 parent 2da9858 commit 6c94b4c

File tree

2 files changed

+86
-3
lines changed

2 files changed

+86
-3
lines changed

numpy/core/src/_simd/_simd.dispatch.c.src

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
/**begin repeat
1616
* #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
1717
* #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
18-
* #esfx = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64#
19-
* #expand_sup =1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
18+
* #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
19+
* #expand_sup= 1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
2020
* #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
2121
* #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
2222
* #sat_sup = 1, 1, 1, 1, 0, 0, 0, 0, 0, 0#
@@ -27,6 +27,7 @@
2727
* #sum_sup = 0, 0, 0, 0, 1, 0, 1, 0, 1, 1#
2828
* #rev64_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 0#
2929
* #ncont_sup = 0, 0, 0, 0, 1, 1, 1, 1, 1, 1#
30+
* #intdiv_sup= 1, 1, 1, 1, 1, 1, 1, 1, 0, 0#
3031
* #shl_imm = 0, 0, 15, 15, 31, 31, 63, 63, 0, 0#
3132
* #shr_imm = 0, 0, 16, 16, 32, 32, 64, 64, 0, 0#
3233
*/
@@ -354,6 +355,11 @@ SIMD_IMPL_INTRIN_2(mul_@sfx@, v@sfx@, v@sfx@, v@sfx@)
354355
SIMD_IMPL_INTRIN_2(div_@sfx@, v@sfx@, v@sfx@, v@sfx@)
355356
#endif // div_sup
356357

358+
#if @intdiv_sup@
359+
SIMD_IMPL_INTRIN_1(divisor_@sfx@, v@sfx@x3, @sfx@)
360+
SIMD_IMPL_INTRIN_2(divc_@sfx@, v@sfx@, v@sfx@, v@sfx@x3)
361+
#endif // intdiv_sup
362+
357363
#if @fused_sup@
358364
/**begin repeat1
359365
* #intrin = muladd, mulsub, nmuladd, nmulsub#
@@ -442,14 +448,15 @@ SIMD_IMPL_INTRIN_1(not_@bsfx@, v@bsfx@, v@bsfx@)
442448
SIMD_IMPL_INTRIN_1(tobits_@bsfx@, u64, v@bsfx@)
443449
/**end repeat**/
444450

451+
445452
//#########################################################################
446453
//## Attach module functions
447454
//#########################################################################
448455
static PyMethodDef simd__intrinsics_methods[] = {
449456
/**begin repeat
450457
* #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
451458
* #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
452-
* #esfx = u16, s8, u32, s16, u32, s32, u64, s64, f32, f64#
459+
* #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
453460
* #expand_sup =1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
454461
* #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
455462
* #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
@@ -461,6 +468,7 @@ static PyMethodDef simd__intrinsics_methods[] = {
461468
* #sum_sup = 0, 0, 0, 0, 1, 0, 1, 0, 1, 1#
462469
* #rev64_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 0#
463470
* #ncont_sup = 0, 0, 0, 0, 1, 1, 1, 1, 1, 1#
471+
* #intdiv_sup= 1, 1, 1, 1, 1, 1, 1, 1, 0, 0#
464472
* #shl_imm = 0, 0, 15, 15, 31, 31, 63, 63, 0, 0#
465473
* #shr_imm = 0, 0, 16, 16, 32, 32, 64, 64, 0, 0#
466474
*/
@@ -568,6 +576,11 @@ SIMD_INTRIN_DEF(mul_@sfx@)
568576
SIMD_INTRIN_DEF(div_@sfx@)
569577
#endif // div_sup
570578

579+
#if @intdiv_sup@
580+
SIMD_INTRIN_DEF(divisor_@sfx@)
581+
SIMD_INTRIN_DEF(divc_@sfx@)
582+
#endif // intdiv_sup
583+
571584
#if @fused_sup@
572585
/**begin repeat1
573586
* #intrin = muladd, mulsub, nmuladd, nmulsub#

numpy/core/tests/test_simd.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,76 @@ def test_arithmetic_div(self):
733733
div = self.div(vdata_a, vdata_b)
734734
assert div == data_div
735735

736+
def test_arithmetic_intdiv(self):
737+
"""
738+
Test integer division intrinics:
739+
npyv_divisor_##sfx
740+
npyv_divc_##sfx
741+
"""
742+
if self._is_fp():
743+
return
744+
745+
def trunc_div(a, d):
746+
"""
747+
Divide towards zero works with large integers > 2^53,
748+
equivalent to int(a/d)
749+
"""
750+
sign_a, sign_d = a < 0, d < 0
751+
if a == 0 or sign_a == sign_d:
752+
return a // d
753+
return (a + sign_d - sign_a) // d + 1
754+
755+
int_min = self._int_min() if self._is_signed() else 1
756+
int_max = self._int_max()
757+
rdata = (
758+
0, 1, self.nlanes, int_max-self.nlanes,
759+
int_min, int_min//2 + 1
760+
)
761+
divisors = (1, 2, self.nlanes, int_min, int_max, int_max//2)
762+
763+
for x, d in zip(rdata, divisors):
764+
data = self._data(x)
765+
vdata = self.load(data)
766+
data_divc = [trunc_div(a, d) for a in data]
767+
divisor = self.divisor(d)
768+
divc = self.divc(vdata, divisor)
769+
assert divc == data_divc
770+
771+
if not self._is_signed():
772+
return
773+
774+
safe_neg = lambda x: -x-1 if -x > int_max else -x
775+
# test round divison for signed integers
776+
for x, d in zip(rdata, divisors):
777+
d_neg = safe_neg(d)
778+
data = self._data(x)
779+
data_neg = [safe_neg(a) for a in data]
780+
vdata = self.load(data)
781+
vdata_neg = self.load(data_neg)
782+
divisor = self.divisor(d)
783+
divisor_neg = self.divisor(d_neg)
784+
785+
# round towards zero
786+
data_divc = [trunc_div(a, d_neg) for a in data]
787+
divc = self.divc(vdata, divisor_neg)
788+
assert divc == data_divc
789+
data_divc = [trunc_div(a, d) for a in data_neg]
790+
divc = self.divc(vdata_neg, divisor)
791+
assert divc == data_divc
792+
793+
# test truncate sign if the dividend is zero
794+
vzero = self.zero()
795+
for d in (-1, -10, -100, int_min//2, int_min):
796+
divisor = self.divisor(d)
797+
divc = self.divc(vzero, divisor)
798+
assert divc == vzero
799+
800+
# test overflow
801+
vmin = self.setall(int_min)
802+
divisor = self.divisor(-1)
803+
divc = self.divc(vmin, divisor)
804+
assert divc == vmin
805+
736806
def test_arithmetic_reduce_sum(self):
737807
"""
738808
Test reduce sum intrinics:

0 commit comments

Comments
 (0)
0