@@ -1263,23 +1263,10 @@ iterator_loop(PyUFuncObject *ufunc,
1263
1263
void * innerloopdata ,
1264
1264
npy_uint32 * op_flags )
1265
1265
{
1266
- npy_intp i , nin = ufunc -> nin , nout = ufunc -> nout ;
1267
- npy_intp nop = nin + nout ;
1268
- NpyIter * iter ;
1269
- char * baseptrs [NPY_MAXARGS ];
1270
-
1271
- NpyIter_IterNextFunc * iternext ;
1272
- char * * dataptr ;
1273
- npy_intp * stride ;
1274
- npy_intp * count_ptr ;
1275
- int needs_api ;
1276
-
1277
- PyArrayObject * * op_it ;
1278
- npy_uint32 iter_flags ;
1279
-
1280
- NPY_BEGIN_THREADS_DEF ;
1266
+ int nin = ufunc -> nin , nout = ufunc -> nout ;
1267
+ int nop = nin + nout ;
1281
1268
1282
- iter_flags = ufunc -> iter_flags |
1269
+ npy_uint32 iter_flags = ufunc -> iter_flags |
1283
1270
NPY_ITER_EXTERNAL_LOOP |
1284
1271
NPY_ITER_REFS_OK |
1285
1272
NPY_ITER_ZEROSIZE_OK |
@@ -1288,16 +1275,17 @@ iterator_loop(PyUFuncObject *ufunc,
1288
1275
NPY_ITER_DELAY_BUFALLOC |
1289
1276
NPY_ITER_COPY_IF_OVERLAP ;
1290
1277
1291
- /* Call the __array_prepare__ functions for already existing output arrays.
1278
+ /*
1279
+ * Call the __array_prepare__ functions for already existing output arrays.
1292
1280
* Do this before creating the iterator, as the iterator may UPDATEIFCOPY
1293
1281
* some of them.
1294
1282
*/
1295
- for (i = 0 ; i < nout ; ++ i ) {
1283
+ for (int i = 0 ; i < nout ; i ++ ) {
1296
1284
if (op [nin + i ] == NULL ) {
1297
1285
continue ;
1298
1286
}
1299
1287
if (prepare_ufunc_output (ufunc , & op [nin + i ],
1300
- arr_prep [i ], full_args , i ) < 0 ) {
1288
+ arr_prep [i ], full_args , i ) < 0 ) {
1301
1289
return -1 ;
1302
1290
}
1303
1291
}
@@ -1307,7 +1295,7 @@ iterator_loop(PyUFuncObject *ufunc,
1307
1295
* were already checked, we use the casting rule 'unsafe' which
1308
1296
* is faster to calculate.
1309
1297
*/
1310
- iter = NpyIter_AdvancedNew (nop , op ,
1298
+ NpyIter * iter = NpyIter_AdvancedNew (nop , op ,
1311
1299
iter_flags ,
1312
1300
order , NPY_UNSAFE_CASTING ,
1313
1301
op_flags , dtype ,
@@ -1316,16 +1304,20 @@ iterator_loop(PyUFuncObject *ufunc,
1316
1304
return -1 ;
1317
1305
}
1318
1306
1319
- /* Copy any allocated outputs */
1320
- op_it = NpyIter_GetOperandArray (iter );
1321
- for (i = 0 ; i < nout ; ++ i ) {
1322
- if (op [nin + i ] == NULL ) {
1323
- op [nin + i ] = op_it [nin + i ];
1324
- Py_INCREF (op [nin + i ]);
1307
+ NPY_UF_DBG_PRINT ("Made iterator\n" );
1308
+
1309
+ /* Call the __array_prepare__ functions for newly allocated arrays */
1310
+ PyArrayObject * * op_it = NpyIter_GetOperandArray (iter );
1311
+ char * baseptrs [NPY_MAXARGS ];
1312
+
1313
+ for (int i = 0 ; i < nout ; ++ i ) {
1314
+ if (op [nin + i ] == NULL ) {
1315
+ op [nin + i ] = op_it [nin + i ];
1316
+ Py_INCREF (op [nin + i ]);
1325
1317
1326
1318
/* Call the __array_prepare__ functions for the new array */
1327
- if (prepare_ufunc_output (ufunc , & op [ nin + i ],
1328
- arr_prep [i ], full_args , i ) < 0 ) {
1319
+ if (prepare_ufunc_output (ufunc ,
1320
+ & op [ nin + i ], arr_prep [i ], full_args , i ) < 0 ) {
1329
1321
NpyIter_Deallocate (iter );
1330
1322
return -1 ;
1331
1323
}
@@ -1340,45 +1332,59 @@ iterator_loop(PyUFuncObject *ufunc,
1340
1332
* with other operands --- the op[nin+i] array passed to it is newly
1341
1333
* allocated and doesn't have any overlap.
1342
1334
*/
1343
- baseptrs [nin + i ] = PyArray_BYTES (op [nin + i ]);
1335
+ baseptrs [nin + i ] = PyArray_BYTES (op [nin + i ]);
1344
1336
}
1345
1337
else {
1346
- baseptrs [nin + i ] = PyArray_BYTES (op_it [nin + i ]);
1338
+ baseptrs [nin + i ] = PyArray_BYTES (op_it [nin + i ]);
1347
1339
}
1348
1340
}
1349
-
1350
1341
/* Only do the loop if the iteration size is non-zero */
1351
- if (NpyIter_GetIterSize (iter ) != 0 ) {
1352
- /* Reset the iterator with the base pointers from possible __array_prepare__ */
1353
- for (i = 0 ; i < nin ; ++ i ) {
1354
- baseptrs [i ] = PyArray_BYTES (op_it [i ]);
1355
- }
1356
- if (NpyIter_ResetBasePointers (iter , baseptrs , NULL ) != NPY_SUCCEED ) {
1357
- NpyIter_Deallocate (iter );
1342
+ npy_intp full_size = NpyIter_GetIterSize (iter );
1343
+ if (full_size == 0 ) {
1344
+ if (!NpyIter_Deallocate (iter )) {
1358
1345
return -1 ;
1359
1346
}
1347
+ return 0 ;
1348
+ }
1360
1349
1361
- /* Get the variables needed for the loop */
1362
- iternext = NpyIter_GetIterNext (iter , NULL );
1363
- if (iternext == NULL ) {
1364
- NpyIter_Deallocate (iter );
1365
- return -1 ;
F438
tr>1366
- }
1367
- dataptr = NpyIter_GetDataPtrArray (iter );
1368
- stride = NpyIter_GetInnerStrideArray (iter );
1369
- count_ptr = NpyIter_GetInnerLoopSizePtr (iter );
1370
- needs_api = NpyIter_IterationNeedsAPI (iter );
1350
+ /*
1351
+ * Reset the iterator with the base pointers possibly modified by
1352
+ * `__array_prepare__`.
1353
+ */
1354
+ for (int i = 0 ; i < nin ; i ++ ) {
1355
+ baseptrs [i ] = PyArray_BYTES (op_it [i ]);
1356
+ }
1357
+ if (NpyIter_ResetBasePointers (iter , baseptrs , NULL ) != NPY_SUCCEED ) {
1358
+ NpyIter_Deallocate (iter );
1359
+ return -1 ;
1360
+ }
1371
1361
1372
- NPY_BEGIN_THREADS_NDITER (iter );
1362
+ /* Get the variables needed for the loop */
1363
+ NpyIter_IterNextFunc * iternext = NpyIter_GetIterNext (iter , NULL );
1364
+ if (iternext == NULL ) {
1365
+ NpyIter_Deallocate (iter );
1366
+ return -1 ;
1367
+ }
1368
+ char * * dataptr = NpyIter_GetDataPtrArray (iter );
1369
+ npy_intp * strides = NpyIter_GetInnerStrideArray (iter );
1370
+ npy_intp * countptr = NpyIter_GetInnerLoopSizePtr (iter );
1371
+ int needs_api = NpyIter_IterationNeedsAPI (iter );
1373
1372
1374
- /* Execute the loop */
1375
- do {
1376
- NPY_UF_DBG_PRINT1 ("iterator loop count %d\n" , (int )* count_ptr );
1377
- innerloop (dataptr , count_ptr , stride , innerloopdata );
1378
- } while (!(needs_api && PyErr_Occurred ()) && iternext (iter ));
1373
+ NPY_BEGIN_THREADS_DEF ;
1379
1374
1380
- NPY_END_THREADS ;
1375
+ if (!needs_api ) {
1376
+ NPY_BEGIN_THREADS_THRESHOLDED (full_size );
1381
1377
}
1378
+
1379
+ NPY_UF_DBG_PRINT ("Actual inner loop:\n" );
1380
+ /* Execute the loop */
1381
+ do {
1382
+ NPY_UF_DBG_PRINT1 ("iterator loop count %d\n" , (int )* count_ptr );
1383
+ innerloop (dataptr , countptr , strides , innerloopdata );
1384
+ } while (!(needs_api && PyErr_Occurred ()) && iternext (iter ));
1385
+
1386
+ NPY_END_THREADS ;
1387
+
1382
1388
/*
1383
1389
* Currently `innerloop` may leave an error set, in this case
1384
1390
* NpyIter_Deallocate will always return an error as well.
0 commit comments