r-devulap
diff --git a/‎doc/source/reference/c-api.coremath.rst
Lines changed: 33 additions & 1 deletion b/‎doc/source/reference/c-api.coremath.rst
Lines changed: 33 additions & 1 deletion
diff --git a/‎numpy/core/include/numpy/npy_math.h
Lines changed: 7 additions & 1 deletion b/‎numpy/core/include/numpy/npy_math.h
Lines changed: 7 additions & 1 deletion
diff --git a/‎numpy/core/src/npymath/ieee754.c.src
Lines changed: 58 additions & 17 deletions b/‎numpy/core/src/npymath/ieee754.c.src
Lines changed: 58 additions & 17 deletions
diff --git a/‎numpy/core/src/umath/extobj.c
Lines changed: 1 addition & 1 deletion b/‎numpy/core/src/umath/extobj.c
Lines changed: 1 addition & 1 deletion
diff --git a/‎numpy/core/src/umath/loops.c.src
Lines changed: 7 additions & 7 deletions b/‎numpy/core/src/umath/loops.c.src
Lines changed: 7 additions & 7 deletions
diff --git a/‎numpy/core/src/umath/reduction.c
Lines changed: 1 addition & 1 deletion b/‎numpy/core/src/umath/reduction.c
Lines changed: 1 addition & 1 deletion
@@ -183,14 +183,46 @@ Those can be useful for precise floating point comparison.
     * NPY_FPE_UNDERFLOW
     * NPY_FPE_INVALID
 
+    Note that :c:func:`npy_get_floatstatus_barrier` is preferable as it prevents
+    agressive compiler optimizations reordering the call relative to
+    the code setting the status, which could lead to incorrect results.
+
     .. versionadded:: 1.9.0
 
+.. c:function:: int npy_get_floatstatus_barrier(char*)
+
+    Get floating point status. A pointer to a local variable is passed in to
+    prevent aggresive compiler optimizations from reodering this function call
+    relative to the code setting the status, which could lead to incorrect
+    results.
+
+    Returns a bitmask with following possible flags:
+
+    * NPY_FPE_DIVIDEBYZERO
+    * NPY_FPE_OVERFLOW
+    * NPY_FPE_UNDERFLOW
+    * NPY_FPE_INVALID
+
+    .. versionadded:: 1.15.0
+
 .. c:function:: int npy_clear_floatstatus()
 
     Clears the floating point status. Returns the previous status mask.
 
+    Note that :c:func:`npy_clear_floatstatus_barrier` is preferable as it
+    prevents agressive compiler optimizations reordering the call relative to
+    the code setting the status, which could lead to incorrect results.
+
     .. versionadded:: 1.9.0
 
+.. c:function:: int npy_clear_floatstatus_barrier(char*)
+
+    Clears the floating point status. A pointer to a local variable is passed in to
+    prevent aggresive compiler optimizations from reodering this function call.
+    Returns the previous status mask.
+
+    .. versionadded:: 1.15.0
+n
 Complex functions
 ~~~~~~~~~~~~~~~~~
 
@@ -237,7 +269,7 @@ of floating point round-off error.
 
 Like for other types, NumPy includes a typedef npy_half for the 16 bit
 float.  Unlike for most of the other types, you cannot use this as a
-normal type in C, since is is a typedef for npy_uint16.  For example,
+normal type in C, since it is a typedef for npy_uint16.  For example,
 1.0 looks like 0x3c00 to C, and if you do an equality comparison
 between the different signed zeros, you will get -0.0 != 0.0
 (0x8000 != 0x0000), which is incorrect.
 
@@ -524,8 +524,14 @@ npy_clongdouble npy_catanhl(npy_clongdouble z);
 #define NPY_FPE_UNDERFLOW     4
 #define NPY_FPE_INVALID       8
 
-int npy_get_floatstatus(void);
+int npy_clear_floatstatus_barrier(char*);
+int npy_get_floatstatus_barrier(char*);
+/*
+ * use caution with these - clang and gcc8.1 are known to reorder calls
+ * to this form of the function which can defeat the check
+ */
 int npy_clear_floatstatus(void);
+int npy_get_floatstatus(void);
 void npy_set_floatstatus_divbyzero(void);
 void npy_set_floatstatus_overflow(void);
 void npy_set_floatstatus_underflow(void);
 
@@ -6,6 +6,7 @@
  */
 #include "npy_math_common.h"
 #include "npy_math_private.h"
+#include "numpy/utils.h"
 
 #ifndef HAVE_COPYSIGN
 double npy_copysign(double x, double y)
@@ -557,6 +558,15 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
 }
 #endif
 
+int npy_clear_floatstatus() {
+    char x=0;
+    return npy_clear_floatstatus_barrier(&x);
+}
+int npy_get_floatstatus() {
+    char x=0;
+    return npy_get_floatstatus_barrier(&x);
+}
+
 /*
  * Functions to set the floating point status word.
  * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
@@ -574,18 +584,24 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
     defined(__NetBSD__)
 #include <ieeefp.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char * param))
 {
     int fpstatus = fpgetsticky();
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return ((FP_X_DZ  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
 {
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     fpsetsticky(0);
 
     return fpstatus;
@@ -617,21 +633,27 @@ void npy_set_floatstatus_invalid(void)
       (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
 #  include <fenv.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char* param)
 {
     int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
                                 FE_UNDERFLOW | FE_INVALID);
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
 
     return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
 {
     /* testing float status is 50-100 times faster than clearing on x86 */
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     if (fpstatus != 0) {
         feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
                       FE_UNDERFLOW | FE_INVALID);
@@ -665,18 +687,24 @@ void npy_set_floatstatus_invalid(void)
 #include <float.h>
 #include <fpxcp.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
 {
     int fpstatus = fp_read_flag();
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
 {
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     fp_swap_flag(0);
 
     return fpstatus;
@@ -710,8 +738,11 @@ void npy_set_floatstatus_invalid(void)
 #include <float.h>
 
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
 {
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
 #if defined(_WIN64)
     int fpstatus = _statusfp();
 #else
@@ -720,15 +751,18 @@ int npy_get_floatstatus(void)
     _statusfp2(&fpstatus, &fpstatus2);
     fpstatus |= fpstatus2;
 #endif
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
 {
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     _clearfp();
 
     return fpstatus;
@@ -739,18 +773,24 @@ int npy_clear_floatstatus(void)
 
 #include <machine/fpu.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
 {
     unsigned long fpstatus = ieee_get_fp_control();
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return  ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
             ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
             ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
             ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
 {
-    long fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     /* clear status bits as well as disable exception mode if on */
     ieee_set_fp_control(0);
 
@@ -759,13 +799,14 @@ int npy_clear_floatstatus(void)
 
 #else
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char NPY_UNUSED(*param))
 {
     return 0;
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
 {
+    int fpstatus = npy_get_floatstatus_barrier(param);
     return 0;
 }
 
 
@@ -284,7 +284,7 @@ _check_ufunc_fperr(int errmask, PyObject *extobj, const char *ufunc_name) {
     if (!errmask) {
         return 0;
     }
-    fperr = PyUFunc_getfperr();
+    fperr = npy_get_floatstatus_barrier((char*)extobj);
     if (!fperr) {
         return 0;
     }
 
@@ -1775,7 +1775,7 @@ NPY_NO_EXPORT void
             *((npy_bool *)op1) = @func@(in1) != 0;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -1857,7 +1857,7 @@ NPY_NO_EXPORT void
             *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -1947,7 +1947,7 @@ NPY_NO_EXPORT void
             *((@type@ *)op1) = tmp + 0;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 
 NPY_NO_EXPORT void
@@ -2133,7 +2133,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
         const npy_half in1 = *(npy_half *)ip1;
         *((npy_bool *)op1) = @func@(in1) != 0;
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat**/
 
@@ -2195,7 +2195,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
         const npy_half in2 = *(npy_half *)ip2;
         *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat**/
 
@@ -2635,7 +2635,7 @@ NPY_NO_EXPORT void
         const @ftype@ in1i = ((@ftype@ *)ip1)[1];
         *((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i);
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -2744,7 +2744,7 @@ NPY_NO_EXPORT void
             ((@ftype@ *)op1)[1] = in2i;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
 
@@ -560,7 +560,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
     }
 
     /* Start with the floating-point exception flags cleared */
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&iter);
 
     if (NpyIter_GetIterSize(iter) != 0) {
         NpyIter_IterNextFunc *iternext;
Original file line number	Diff line number	Diff line change
`@@ -284,7 +284,7 @@ _check_ufunc_fperr(int errmask, PyObject extobj, const char ufunc_name) {`
`284`	`284`	`if (!errmask) {`
`285`	`285`	`return 0;`
`286`	`286`	`}`
`287`		`- fperr = PyUFunc_getfperr();`
	`287`	`+ fperr = npy_get_floatstatus_barrier((char*)extobj);`
`288`	`288`	`if (!fperr) {`
`289`	`289`	`return 0;`
`290`	`290`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1775,7 +1775,7 @@ NPY_NO_EXPORT void`
`1775`	`1775`	`((npy_bool )op1) = @func@(in1) != 0;`
`1776`	`1776`	`}`
`1777`	`1777`	`}`
`1778`		`- npy_clear_floatstatus();`
	`1778`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`1779`	`1779`	`}`
`1780`	`1780`	`/end repeat1/`
`1781`	`1781`
`@@ -1857,7 +1857,7 @@ NPY_NO_EXPORT void`
`1857`	`1857`	`((@type@ )op1) = (in1 @OP@ in2 \|\| npy_isnan(in2)) ? in1 : in2;`
`1858`	`1858`	`}`
`1859`	`1859`	`}`
`1860`		`- npy_clear_floatstatus();`
	`1860`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`1861`	`1861`	`}`
`1862`	`1862`	`/end repeat1/`
`1863`	`1863`
`@@ -1947,7 +1947,7 @@ NPY_NO_EXPORT void`
`1947`	`1947`	`((@type@ )op1) = tmp + 0;`
`1948`	`1948`	`}`
`1949`	`1949`	`}`
`1950`		`- npy_clear_floatstatus();`
	`1950`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`1951`	`1951`	`}`
`1952`	`1952`
`1953`	`1953`	`NPY_NO_EXPORT void`
`@@ -2133,7 +2133,7 @@ HALF_@kind@(char *args, npy_intp dimensions, npy_intp steps, void NPY_UNUSED`
`2133`	`2133`	`const npy_half in1 = (npy_half )ip1;`
`2134`	`2134`	`((npy_bool )op1) = @func@(in1) != 0;`
`2135`	`2135`	`}`
`2136`		`- npy_clear_floatstatus();`
	`2136`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`2137`	`2137`	`}`
`2138`	`2138`	`/end repeat/`
`2139`	`2139`
`@@ -2195,7 +2195,7 @@ HALF_@kind@(char *args, npy_intp dimensions, npy_intp steps, void NPY_UNUSED`
`2195`	`2195`	`const npy_half in2 = (npy_half )ip2;`
`2196`	`2196`	`((npy_half )op1) = (@OP@(in1, in2) \|\| npy_half_isnan(in2)) ? in1 : in2;`
`2197`	`2197`	`}`
`2198`		`- npy_clear_floatstatus();`
	`2198`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`2199`	`2199`	`}`
`2200`	`2200`	`/end repeat/`
`2201`	`2201`
`@@ -2635,7 +2635,7 @@ NPY_NO_EXPORT void`
`2635`	`2635`	`const @ftype@ in1i = ((@ftype@ *)ip1)[1];`
`2636`	`2636`	`((npy_bool )op1) = @func@(in1r) @OP@ @func@(in1i);`
`2637`	`2637`	`}`
`2638`		`- npy_clear_floatstatus();`
	`2638`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`2639`	`2639`	`}`
`2640`	`2640`	`/end repeat1/`
`2641`	`2641`
`@@ -2744,7 +2744,7 @@ NPY_NO_EXPORT void`
`2744`	`2744`	`((@ftype@ *)op1)[1] = in2i;`
`2745`	`2745`	`}`
`2746`	`2746`	`}`
`2747`		`- npy_clear_floatstatus();`
	`2747`	`+ npy_clear_floatstatus_barrier((char*)dimensions);`
`2748`	`2748`	`}`
`2749`	`2749`	`/end repeat1/`
`2750`	`2750`
Original file line number	Diff line number	Diff line change
`@@ -560,7 +560,7 @@ PyUFunc_ReduceWrapper(PyArrayObject operand, PyArrayObject out,`
`560`	`560`	`}`
`561`	`561`
`562`	`562`	`/* Start with the floating-point exception flags cleared */`
`563`		`- PyUFunc_clearfperr();`
	`563`	`+ npy_clear_floatstatus_barrier((char*)&iter);`
`564`	`564`
`565`	`565`	`if (NpyIter_GetIterSize(iter) != 0) {`
`566`	`566`	`NpyIter_IterNextFunc *iternext;`