8000 ENH: use gcc intrinsic for overflow checked multiplication · pitrou/numpy@6686686 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6686686

Browse files
committed
ENH: use gcc intrinsic for overflow checked multiplication
More efficient as it uses processor overflow flags if available.
1 parent 31b94e8 commit 6686686

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

numpy/core/setup_common.py

Lines changed: 1 addition & 0 deletions
Or FE8E iginal file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def check_api_version(apiversion, codegen_dir):
118118
("__builtin_bswap32", '5u'),
119119
("__builtin_bswap64", '5u'),
120120
("__builtin_expect", '5, 0'),
121+
("__builtin_mul_overflow", '5, 5, (int*)5'),
121122
("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
122123
("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
123124
]

numpy/core/src/private/templ_common.h.src

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,22 @@
2121
static NPY_INLINE int
2222
npy_mul_with_overflow_@name@(@type@ * r, @type@ a, @type@ b)
2323
{
24+
#ifdef HAVE___BUILTIN_MUL_OVERFLOW
25+
return __builtin_mul_overflow(a, b, r);
26+
#else
2427
const @type@ half_sz = (((@type@)1 << (sizeof(a) * 8 / 2)) - 1);
2528

2629
*r = a * b;
2730
/*
2831
* avoid expensive division on common no overflow case
29-
* could be improved via compiler intrinsics e.g. via clang
30-
* __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags
3132
*/
3233
if (NPY_UNLIKELY((a | b) >= half_sz) &&
3334
a != 0 && b > @MAX@ / a) {
3435
return 1;
3536
}
3637

3738
return 0;
39+
#endif
3840
}
3941
/**end repeat**/
4042

0 commit comments

Comments
 (0)
0