@@ -308,6 +308,49 @@ _find_array_prepare(ufunc_full_args args,
308
308
return ;
309
309
}
310
310
311
+ #define NPY_UFUNC_DEFAULT_INPUT_FLAGS \
312
+ NPY_ITER_READONLY | \
313
+ NPY_ITER_ALIGNED | \
314
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE
315
+
316
+ #define NPY_UFUNC_DEFAULT_OUTPUT_FLAGS \
317
+ NPY_ITER_ALIGNED | \
318
+ NPY_ITER_ALLOCATE | \
319
+ NPY_ITER_NO_BROADCAST | \
320
+ NPY_ITER_NO_SUBTYPE | \
321
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE
322
+ /*
323
+ * Set per-operand flags according to desired input or output flags.
324
+ * op_flags[i] for i in input (as determined by ufunc->nin) will be
325
+ * merged with op_in_flags, perhaps overriding per-operand flags set
326
+ * in previous stages.
327
+ * op_flags[i] for i in output will be set to op_out_flags only if previously
328
+ * unset.
329
+ * The input flag behavior preserves backward compatibility, while the
330
+ * output flag behaviour is the "correct" one for maximum flexibility.
331
+ */
332
+ NPY_NO_EXPORT void
333
+ _ufunc_setup_flags (PyUFuncObject * ufunc , npy_uint32 op_in_flags ,
334
+ npy_uint32 op_out_flags , npy_uint32 * op_flags )
335
+ {
336
+ int nin = ufunc -> nin ;
337
+ int nout = ufunc -> nout ;
338
+ int nop = nin + nout , i ;
339
+ /* Set up the flags */
340
+ for (i = 0 ; i < nin ; ++ i ) {
341
+ op_flags [i ] = ufunc -> op_flags [i ] | op_in_flags ;
342
+ /*
343
+ * If READWRITE flag has been set for this operand,
344
+ * then clear default READONLY flag
345
+ */
346
+ if (op_flags [i ] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY )) {
347
+ op_flags [i ] &= ~NPY_ITER_READONLY ;
348
+ }
349
+ }
350
+ for (i = nin ; i < nop ; ++ i ) {
351
+ op_flags [i ] = ufunc -> op_flags [i ] ? ufunc -> op_flags [i ] : op_out_flags ;
352
+ }
353
+ }
311
354
312
355
/*
313
356
* This function analyzes the input arguments
@@ -1394,11 +1437,11 @@ iterator_loop(PyUFuncObject *ufunc,
1394
1437
PyObject * * arr_prep ,
1395
1438
ufunc_full_args full_args ,
1396
1439
PyUFuncGenericFunction innerloop ,
1397
- void * innerloopdata )
1440
+ void * innerloopdata ,
1441
+ npy_uint32 * op_flags )
1398
1442
{
1399
1443
npy_intp i , nin = ufunc -> nin , nout = ufunc -> nout ;
1400
1444
npy_intp nop = nin + nout ;
1401
- npy_uint32 op_flags [NPY_MAXARGS ];
1402
1445
NpyIter * iter ;
1403
1446
char * baseptrs [NPY_MAXARGS ];
1404
1447
@@ -1412,29 +1455,6 @@ iterator_loop(PyUFuncObject *ufunc,
1412
1455
1413
1456
NPY_BEGIN_THREADS_DEF ;
1414
1457
1415
- /* Set up the flags */
1416
- for (i = 0 ; i < nin ; ++ i ) {
1417
- op_flags [i ] = NPY_ITER_READONLY |
1418
- NPY_ITER_ALIGNED |
1419
- NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE ;
1420
- /*
1421
- * If READWRITE flag has been set for this operand,
1422
- * then clear default READONLY flag
1423
- */
1424
- op_flags [i ] |= ufunc -> op_flags [i ];
1425
- if (op_flags [i ] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY )) {
1426
- op_flags [i ] &= ~NPY_ITER_READONLY ;
1427
- }
1428
- }
1429
- for (i = nin ; i < nop ; ++ i ) {
1430
- op_flags [i ] = NPY_ITER_WRITEONLY |
1431
- NPY_ITER_ALIGNED |
1432
- NPY_ITER_ALLOCATE |
1433
- NPY_ITER_NO_BROADCAST |
1434
- NPY_ITER_NO_SUBTYPE |
1435
- NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE ;
1436
- }
1437
-
1438
1458
iter_flags = ufunc -> iter_flags |
1439
1459
NPY_ITER_EXTERNAL_LOOP |
1440
1460
NPY_ITER_REFS_OK |
@@ -1538,15 +1558,15 @@ iterator_loop(PyUFuncObject *ufunc,
1538
1558
}
1539
1559
1540
1560
/*
1561
+ * ufunc - the ufunc to call
1541
1562
* trivial_loop_ok - 1 if no alignment, data conversion, etc required
1542
- * nin - number of inputs
1543
- * nout - number of outputs
1544
- * op - the operands (nin + nout of them)
1563
+ * op - the operands (ufunc->nin + ufunc->nout of them)
1564
+ * dtypes - the dtype of each operand
1545
1565
* order - the loop execution order/output memory order
1546
1566
* buffersize - how big of a buffer to use
1547
1567
* arr_prep - the __array_prepare__ functions for the outputs
1548
- * innerloop - the inner loop function
1549
- * innerloopdata - data to pass to the inner loop
1568
+ * full_args - the original input, output PyObject *
1569
+ * op_flags - per-operand flags, a combination of NPY_ITER_* constants
1550
1570
*/
1551
1571
static int
1552
1572
execute_legacy_ufunc_loop (PyUFuncObject * ufunc ,
@@ -1556,7 +1576,8 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
1556
1576
NPY_ORDER order ,
1557
1577
npy_intp buffersize ,
1558
1578
PyObject * * arr_prep ,
1559
- ufunc_full_args full_args )
1579
+ ufunc_full_args full_args ,
1580
+ npy_uint32 * op_flags )
1560
1581
{
1561
1582
npy_intp nin = ufunc -> nin , nout = ufunc -> nout ;
1562
1583
PyUFuncGenericFunction innerloop ;
@@ -1691,7 +1712,7 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
1691
1712
NPY_UF_DBG_PRINT ("iterator loop\n" );
1692
1713
if (iterator_loop (ufunc , op , dtypes , order ,
1693
1714
buffersize , arr_prep , full_args ,
1694
- innerloop , innerloopdata ) < 0 ) {
1715
+ innerloop , innerloopdata , op_flags ) < 0 ) {
1695
1716
return -1 ;
1696
1717
}
1697
1718
@@ -1717,14 +1738,13 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
1717
1738
NPY_ORDER order ,
1718
1739
npy_intp buffersize ,
1719
1740
PyObject * * arr_prep ,
1720
- ufunc_full_args full_args )
1741
+ ufunc_full_args full_args ,
1742
+ npy_uint32 * op_flags )
1721
1743
{
1722
1744
int i , nin = ufunc -> nin , nout = ufunc -> nout ;
1723
1745
int nop = nin + nout ;
1724
- npy_uint32 op_flags [NPY_MAXARGS ];
1725
1746
NpyIter * iter ;
1726
1747
int needs_api ;
1727
- npy_intp default_op_in_flags = 0 , default_op_out_flags = 0 ;
1728
1748
1729
1749
NpyIter_IterNextFunc * iternext ;
1730
1750
char * * dataptr ;
@@ -1734,48 +1754,10 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
1734
1754
PyArrayObject * * op_it ;
1735
1755
npy_uint32 iter_flags ;
1736
1756
1737
- if (wheremask != NULL ) {
1738
- if (nop + 1 > NPY_MAXARGS ) {
1739
- PyErr_SetString (PyExc_ValueError ,
1740
- "Too many operands when including where= parameter" );
1741
- return -1 ;
1742
- }
1743
- op [nop ] = wheremask ;
1744
- dtypes [nop ] = NULL ;
1745
- default_op_out_flags |= NPY_ITER_WRITEMASKED ;
1746
- }
1747
-
1748
- /* Set up the flags */
1749
- for (i = 0 ; i < nin ; ++ i ) {
1750
- op_flags [i ] = default_op_in_flags |
1751
- NPY_ITER_READONLY |
1752
- NPY_ITER_ALIGNED |
1753
- NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE ;
1754
- /*
1755
- * If READWRITE flag has been set for this operand,
1756
- * then clear default READONLY flag
1757
- */
1758
- op_flags [i ] |= ufunc -> op_flags [i ];
1759
- if (op_flags [i ] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY )) {
1760
- op_flags [i ] &= ~NPY_ITER_READONLY ;
1761
- }
1762
- }
1763
1757
for (i = nin ; i < nop ; ++ i ) {
1764
- /*
1765
- * We don't write to all elements, and the iterator may make
1766
- * UPDATEIFCOPY temporary copies. The output arrays (unless they are
1767
- * allocated by the iterator itself) must be considered READWRITE by the
1768
- * iterator, so that the elements we don't write to are copied to the
1769
- * possible temporary array.
1770
- */
1771
- op_flags [i ] = default_op_out_flags |
1772
- (op [i ] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY ) |
1773
- NPY_ITER_ALIGNED |
1774
- NPY_ITER_ALLOCATE |
1775
- NPY_ITER_NO_BROADCAST |
1776
- NPY_ITER_NO_SUBTYPE |
1777
- NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE ;
1758
+ op_flags [i ] |= (op [i ] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY );
1778
1759
}
1760
+
1779
1761
if (wheremask != NULL ) {
1780
1762
op_flags [nop ] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK ;
1781
1763
}
@@ -2785,6 +2767,18 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
2785
2767
if (retval < 0 ) {
2786
2768
goto fail ;
2787
2769
}
2770
+ /*
2771
+ * We don't write to all elements, and the iterator may make
2772
+ * UPDATEIFCOPY temporary copies. The output arrays (unless they are
2773
+ * allocated by the iterator itself) must be considered READWRITE by the
2774
+ * iterator, so that the elements we don't write to are copied to the
2775
+ * possible temporary array.
2776
+ */
2777
+ _ufunc_setup_flags (ufunc , NPY_ITER_COPY | NPY_UFUNC_DEFAULT_INPUT_FLAGS ,
2778
+ NPY_ITER_UPDATEIFCOPY |
2779
+ NPY_ITER_READWRITE |
2780
+ NPY_UFUNC_DEFAULT_OUTPUT_FLAGS ,
2781
+ op_flags );
2788
2782
/* For the generalized ufunc, we get the loop right away too */
2789
2783
retval = ufunc -> legacy_inner_loop_selector (ufunc , dtypes ,
2790
2784
& innerloop , & innerloopdata , & needs_api );
@@ -2827,28 +2821,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
2827
2821
* Set up the iterator per-op flags. For generalized ufuncs, we
2828
2822
* can't do buffering, so must COPY or UPDATEIFCOPY.
2829
2823
*/
2830
- for (i = 0 ; i < nin ; ++ i ) {
2831
- op_flags [i ] = NPY_ITER_READONLY |
2832
- NPY_ITER_COPY |
2833
- NPY_ITER_ALIGNED |
2834
- NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE ;
2835
- /*
2836
- * If READWRITE flag has been set for this operand,
2837
- * then clear default READONLY flag
2838
- */
2839
- op_flags [i ] |= ufunc -> op_flags [i ];
2840
- if (op_flags [i ] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY )) {
2841
- op_flags [i ] &= ~NPY_ITER_READONLY ;
2842
- }
2843
- }
2844
- for (i = nin ; i < nop ; ++ i ) {
2845
- op_flags [i ] = NPY_ITER_READWRITE |
2846
- NPY_ITER_UPDATEIFCOPY |
2847
- NPY_ITER_ALIGNED |
2848
- NPY_ITER_ALLOCATE |
2849
- NPY_ITER_NO_BROADCAST |
2850
- NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE ;
2851
- }
2852
2824
2853
2825
iter_flags = ufunc -> iter_flags |
2854
2826
NPY_ITER_MULTI_INDEX |
@@ -3097,7 +3069,8 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
3097
3069
int i , nop ;
3098
3070
const char * ufunc_name ;
3099
3071
int retval = -1 , subok = 1 ;
3100
- int need_fancy = 0 ;
3072
+ npy_uint32 op_flags [NPY_MAXARGS ];
3073
+ npy_intp default_op_out_flags ;
3101
3074
3102
3075
PyArray_Descr * dtypes [NPY_MAXARGS ];
3103
3076
@@ -3156,13 +3129,6 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
3156
3129
return retval ;
3157
3130
}
3158
3131
3159
- /*
3160
- * Use the masked loop if a wheremask was specified.
3161
- */
3162
- if (wheremask != NULL ) {
3163
- need_fancy = 1 ;
3164
- }
3165
-
3166
3132
/* Get the buffersize and errormask */
3167
3133
if (_get_bufsize_errmask (extobj , ufunc_name , & buffersize , & errormask ) < 0 ) {
3168
3134
retval = -1 ;
@@ -3177,16 +3143,20 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
3177
3143
goto fail ;
3178
3144
}
3179
3145
3180
- /* Only do the trivial loop check for the unmasked version. */
3181
- if (!need_fancy ) {
3182
- /*
3183
- * This checks whether a trivial loop is ok, making copies of
3184
- * scalar and one dimensional operands if that will help.
3185
- */
3186
- trivial_loop_ok = check_for_trivial_loop (ufunc , op , dtypes , buffersize );
3187
- if (trivial_loop_ok < 0 ) {
3188
- goto fail ;
3189
- }
3146
+ if (wheremask != NULL ) {
3147
+ /* Set up the flags. */
3148
+ default_op_out_flags = NPY_ITER_NO_SUBTYPE |
3149
+ NPY_ITER_WRITEMASKED |
3150
+ NPY_UFUNC_DEFAULT_OUTPUT_FLAGS ;
3151
+ _ufunc_setup_flags (ufunc , NPY_UFUNC_DEFAULT_INPUT_FLAGS ,
3152
+ default_op_out_flags , op_flags );
3153
+ }
3154
+ else {
3155
+ /* Set up the flags. */
3156
+ default_op_out_flags = NPY_ITER_WRITEONLY |
3157
+ NPY_UFUNC_DEFAULT_OUTPUT_FLAGS ;
3158
+ _ufunc_setup_flags (ufunc , NPY_UFUNC_DEFAULT_INPUT_FLAGS ,
3159
+ default_op_out_flags , op_flags );
3190
3160
}
3191
3161
3192
3162
#if NPY_UF_DBG_TRACING
@@ -3214,23 +3184,46 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
3214
3184
_find_array_prepare (full_args , arr_prep , nin , nout );
3215
3185
}
3216
3186
3217
- /* Start with the floating-point exception flags cleared */
3218
- npy_clear_floatstatus_barrier ((char * )& ufunc );
3219
3187
3220
3188
/* Do the ufunc loop */
3221
- if (need_fancy ) {
3189
+ if (wheremask != NULL ) {
3222
3190
NPY_UF_DBG_PRINT ("Executing fancy inner loop\n" );
3223
3191
3192
+ if (nop + 1 > NPY_MAXARGS ) {
3193
+ PyErr_SetString (PyExc_ValueError ,
3194
+ "Too many operands when including where= parameter" );
3195
+ return -1 ;
3196
+ }
3197
+ op [nop ] = wheremask ;
3198
+ dtypes [nop ] = NULL ;
3199
+
3200
+ /* Set up the flags */
3201
+
3202
+ npy_clear_floatstatus_barrier ((char * )& ufunc );
3224
3203
retval = execute_fancy_ufunc_loop (ufunc , wheremask ,
3225
3204
op , dtypes , order ,
3226
- buffersize , arr_prep , full_args );
3205
+ buffersize , arr_prep , full_args , op_flags );
3227
3206
}
3228
3207
else {
3229
3208
NPY_UF_DBG_PRINT ("Executing legacy inner loop\n" );
3230
3209
3210
+ /*
3211
+ * This checks whether a trivial loop is ok, making copies of
3212
+ * scalar and one dimensional operands if that will help.
3213
+ * Since it requires dtypes, it can only be called after
3214
+ * ufunc->type_resolver
3215
+ */
3216
+ trivial_loop_ok = check_for_trivial_loop (ufunc , op , dtypes , buffersize );
3217
+ if (trivial_loop_ok < 0 ) {
3218
+ goto fail ;
3219
+ }
3220
+
3221
+ /* check_for_trivial_loop on half-floats can overflow */
3222
+ npy_clear_floatstatus_barrier ((char * )& ufunc );
3223
+
3231
3224
retval = execute_legacy_ufunc_loop (ufunc , trivial_loop_ok ,
3232
3225
op , dtypes , order ,
3233
- buffersize , arr_prep , full_args );
3226
+ buffersize , arr_prep , full_args , op_flags );
3234
3227
}
3235
3228
if (retval < 0 ) {
3236
3229
goto fail ;
0 commit comments