8000 ENH: missingdata: Add support for identity-less skipna reductions wit… · numpy/numpy@3ba3937 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3ba3937

Browse files
committed
ENH: missingdata: Add support for identity-less skipna reductions with ndim > 1
1 parent fe720c2 commit 3ba3937

File tree

3 files changed

+310
-10
lines changed

3 files changed

+310
-10
lines changed

numpy/core/src/multiarray/reduction.c

Lines changed: 238 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "numpy/npy_3kcompat.h"
1919

2020
#include "lowlevel_strided_loops.h"
21+
#include "na_mask.h"
2122
#include "reduction.h"
2223

2324
/*
@@ -269,6 +270,241 @@ check_nonreorderable_axes(int ndim, npy_bool *axis_flags, const char *funcname)
269270
return 0;
270271
}
271272

273+
/*
274+
* Initializes the reduce result for skipna reductions where operand
275+
* has more than one dimension.
276+
*
277+
* 'operand must have an NA mask, 'result' may or may not have an
278+
* NA mask, and 'skipna' must be True to call this function.
279+
*/
280+
static PyArrayObject *
281+
initialize_reduce_result_noidentity_skipna(
282+
PyArrayObject *operand, PyArrayObject *result,
283+
npy_bool *axis_flags, const char *funcname)
284+
{
285+
PyArrayObject *initialized = NULL;
286+
npy_intp initialized_countdown;
287+
npy_intp op_itemsize;
288+
PyArray_Descr *bool_dtype;
289+
290+
/* Iterator parameters */
291+
NpyIter *iter = NULL;
292+
PyArrayObject *op[3];
293+
npy_uint32 flags, op_flags[3];
294+
npy_intp fixed_strides[4];
295+
296+
/* Data transfer function */
297+
PyArray_StridedUnaryOp *stransfer = NULL;
298+
NpyAuxData *transferdata = NULL;
299+
int needs_api;
300+
301+
op_itemsize = PyArray_DTYPE(operand)->elsize;
302+
303+
/*
304+
* Create a view of operand which owns its its own mask, so that
305+
* we can change it.
306+
*/
307+
operand = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type);
308+
if (operand == NULL) {
309+
goto fail;
310+
}
311+
if (PyArray_AllocateMaskNA(operand, 1, 0, 1) < 0) {
312+
goto fail;
313+
}
314+
315+
/*
316+
* Allocate a flag array to keep track of which elements in the result
317+
* have already been initialized.
318+
*
319+
* This reference to bool_dtype gets stolen by NewLikeArray.
320+
*/
321+
bool_dtype = PyArray_DescrFromType(NPY_BOOL);
322+
if (bool_dtype == NULL) {
323+
goto fail;
324+
}
325+
initialized = (PyArrayObject *)PyArray_NewLikeArray(result,
326+
NPY_KEEPORDER, bool_dtype, 0);
327+
if (initialized == NULL) {
328+
goto fail;
329+
}
330+
if (PyArray_AssignZero(initialized, NULL, 0, NULL) < 0) {
331+
Py_DECREF(initialized);
332+
goto fail;
333+
}
334+
335+
/* Set up the iterator for copying the elements */
336+
op[0] = operand;
337+
op[1] = result;
338+
op[2] = initialized;
339+
op_flags[0] = NPY_ITER_READWRITE | NPY_ITER_USE_MASKNA;
340+
op_flags[1] = NPY_ITER_READWRITE | NPY_ITER_IGNORE_MASKNA;
341+
op_flags[2] = NPY_ITER_READWRITE;
342+
flags = NPY_ITER_EXTERNAL_LOOP |
343+
NPY_ITER_REFS_OK |
344+
NPY_ITER_REDUCE_OK |
345+
NPY_ITER_ZEROSIZE_OK |
346+
NPY_ITER_DONT_NEGATE_STRIDES;
347+
348+
iter = NpyIter_MultiNew(3, op, flags,
349+
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
350+
op_flags,
351+
NULL);
352+
if (iter == NULL) {
353+
goto fail;
354+
}
355+
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
356+
needs_api = NpyIter_IterationNeedsAPI(iter);
357+
358+
/* Get a function for copying the elements */
359+
if (PyArray_GetDTypeTransferFunction(
360+
PyArray_ISALIGNED(operand) && PyArray_ISALIGNED(result),
361+
fixed_strides[0], fixed_strides[1],
362+
PyArray_DTYPE(operand), PyArray_DTYPE(result),
363+
0,
364+
&stransfer, &transferdata,
365+
&needs_api) != NPY_SUCCEED) {
366+
goto fail;
367+
}
368+
369+
if (NpyIter_GetIterSize(iter) != 0) {
370+
NpyIter_IterNextFunc *iternext;
371+
char **dataptr;
372+
npy_intp *strideptr;
373+
npy_intp *countptr, count, subcount;
374+
NPY_BEGIN_THREADS_DEF;
375+
376+
iternext = NpyIter_GetIterNext(iter, NULL);
377+
if (iternext == NULL) {
378+
goto fail;
379+
}
380+
dataptr = NpyIter_GetDataPtrArray(iter);
381+
strideptr = NpyIter_GetInnerStrideArray(iter);
382+
countptr = NpyIter_GetInnerLoopSizePtr(iter);
383+
384+
/*
385+
* Track how many initializations we've done, both to
386+
* short circuit completion and to raise an error if
387+
* any remained uninitialized.
388+
*/
389+
initialized_countdown = PyArray_SIZE(result);
390+
391+
if (!needs_api) {
392+
NPY_BEGIN_THREADS;
393+
}
394+
395+
do {
396+
char *op_d = dataptr[0], *res_d = dataptr[1];
397+
char *init_d = dataptr[2], *op_namask_d = dataptr[3];
398+
npy_intp op_s = strideptr[0], res_s = strideptr[1];
399+
npy_intp init_s = strideptr[2], op_namask_s = strideptr[3];
400+
401+
count = *countptr;
402+
403+
/* If the result stride is 0, copy at most one value */
404+
if (res_s == 0) {
405+
npy_intp i;
406+
for (i = 0; i < count; ++i) {
407+
if (*init_d == 0 && NpyMaskValue_IsExposed(
408+
*(npy_mask *)op_namask_d)) {
409+
410+
/* Mark it as initialized */
411+
*init_d = 1;
412+
stransfer(res_d, 0, op_d + i * op_s, op_s,
413+
1, op_itemsize, transferdata);
414+
415+
--initialized_countdown;
416+
if (initialized_countdown == 0) {
417+
goto finish_loop;
418+
}
419+
break;
420+
}
421+
422+
init_d += init_s;
423+
op_namask_d += op_namask_s;
424+
}
425+
}
426+
/* Otherwise process the data in runs as large as possible */
427+
else {
428+
do {
429+
/* Skip values that are initialized or masked */
430+
subcount = 0;
431+
while (subcount < count && (*init_d == 1 ||
432+
!NpyMaskValue_IsExposed(
433+
*(npy_mask *)op_namask_d))) {
434+
++subcount;
435+
init_d += init_s;
436+
op_namask_d += op_namask_s;
437+
}
438+
op_d += subcount * op_s;
439+
res_d += subcount * res_s;
440+
count -= subcount;
441+
442+
/* Transfer values that are uninitialized and exposed */
443+
subcount = 0;
444+
while (subcount < count && (*init_d == 0 &&
445+
NpyMaskValue_IsExposed(
446+
*(npy_mask *)op_namask_d))) {
447+
++subcount;
448+
/* Mark it as initialized */
449+
*init_d = 1;
450+
init_d += init_s;
451+
op_namask_d += op_namask_s;
452+
}
453+
stransfer(res_d, res_s, op_d, op_s,
454+
subcount, op_itemsize, transferdata);
455+
op_d += subcount * op_s;
456+
res_d += subcount * res_s;
457+
count -= subcount;
458+
459+
initialized_countdown -= subcount;
460+
if (initialized_countdown == 0) {
461+
goto finish_loop;
462+
}
463+
} while (count > 0);
464+
}
465+
} while (iternext(iter));
466+
467+
finish_loop:
468+
if (!needs_api) {
469+
NPY_END_THREADS;
470+
}
471+
}
472+
473+
if (needs_api && PyErr_Occurred()) {
474+
goto fail;
475+
}
476+
477+
/* Since this ufunc has no identity, all elements must be initialized */
478+
if (initialized_countdown != 0) {
479+
PyErr_Format(PyExc_ValueError,
480+
"reduction operation %s with skipna=True "
481+
"had an output element with all its inputs NA", funcname);
482+
goto fail;
483+
}
484+
485+
/* If 'result' has an NA mask, set it to all exposed */
486+
if (PyArray_HASMASKNA(result)) {
487+
if (PyArray_AssignMaskNA(result, 1, NULL, 0, NULL) < 0) {
488+
goto fail;
489+
}
490+
}
491+
492+
Py_DECREF(initialized);
493+
NpyIter_Deallocate(iter);
494+
NPY_AUXDATA_FREE(transferdata);
495+
return operand;
496+
497+
fail:
498+
Py_XDECREF(operand);
499+
Py_XDECREF(initialized);
500+
if (iter != NULL) {
501+
NpyIter_Deallocate(iter);
502+
}
503+
NPY_AUXDATA_FREE(transferdata);
504+
505+
return NULL;
506+
}
507+
272508
/*
273509
* This function initializes a result array for a reduction operation
274510
* which has no identity. This means it needs to copy the first element
@@ -423,10 +659,8 @@ PyArray_InitializeReduceResult(
423659
* case
424660
*/
425661
else {
426-
PyErr_SetString(PyExc_ValueError,
427-
"skipna=True with a non-identity reduction "
428-
"and an array with ndim > 1 isn't implemented yet");
429-
return NULL;
662+
return initialize_reduce_result_noidentity_skipna(
663+
operand, result, axis_flags, funcname);
430664
}
431665

432666
/*

numpy/core/src/umath/ufunc_type_resolution.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,6 @@ PyUFunc_DefaultTypeResolver(PyUFuncObject *ufunc,
173173
* PyArray_ResultType instead of a linear search to get the best
174174
* loop.
175175
*
176-
* Note that a simpler linear search through the functions loop
177-
* is still done, but switching to a simple array lookup for
178-
* built-in types would be better at some point.
179-
*
180176
* Returns 0 on success, -1 on error.
181177
*/
182178
NPY_NO_EXPORT int

numpy/core/tests/test_maskna.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,6 @@ def asn():
340340
b = np.arange(6).reshape(2,3)
341341
assert_raises(ValueError, np.copyto, b, a, where=badmask)
342342

343-
344343
def test_array_maskna_view_function():
345344
a = np.arange(10)
346345

@@ -807,7 +806,6 @@ def test_maskna_ufunc_1D():
807806
assert_equal(np.isna(a), [[1], [0]])
808807
assert_equal(a[~np.isna(a)], [4.])
809808

810-
811809
def test_maskna_ufunc_sum_1D():
812810
check_maskna_ufunc_sum_1D(np.sum)
813811

@@ -906,6 +904,78 @@ def check_ufunc_max_1D(max_func):
906904
a[...] = np.NA
907905
assert_raises(ValueError, max_func, a, skipna=True)
908906

907+
def test_ufunc_skipna_max_3D():
908+
check_ufunc_skipna_max_3D(np.max)
909+
910+
def test_ufunc_skipna_maximum_reduce_3D():
911+
check_ufunc_skipna_max_3D(np.maximum.reduce)
912+
913+
def check_ufunc_skipna_max_3D(max_func):
914+
a_orig = np.array([[[29, 6, 24, 11, 24],
915+
[17, 26, 10, 29, 21],
916+
[ 4, 4, 7, 9, 30],
917+
[ 9, 20, 5, 12, 23]],
918+
[[ 8, 9, 10, 31, 22],
919+
[ 5, 20, 2, 29, 27],
920+
[21, 22, 13, 30, 20],
921+
[24, 27, 9, 20, 31]],
922+
[[14, 0, 13, 11, 22],
923+
[ 0, 16, 16, 14, 2],
924+
[ 0, 2, 1, 29, 12],
925+
[24, 25, 12, 11, 9]]])
926+
a = a_orig.view(maskna=True)
927+
b = a_orig.copy()
928+
929+
def check_all_axis_combos(x, y, badaxes=()):
930+
if 0 not in badaxes:
931+
res = max_func(x, axis=0, skipna=True)
932+
assert_array_equal(res, max_func(y, axis=0, skipna=True))
933+
if 1 not in badaxes:
934+
res = max_func(x, axis=1, skipna=True)
935+
assert_array_equal(res, max_func(y, axis=1, skipna=True))
936+
if 2 not in badaxes:
937+
res = max_func(x, axis=2, skipna=True)
938+
assert_array_equal(res, max_func(y, axis=2, skipna=True))
939+
res = max_func(x, axis=(0,1), skipna=True)
940+
assert_array_equal(res, max_func(y, axis=(0,1), skipna=True))
941+
res = max_func(x, axis=(0,2), skipna=True)
942+
assert_array_equal(res, max_func(y, axis=(0,2), skipna=True))
943+
res = max_func(x, axis=(1,2), skipna=True)
944+
assert_array_equal(res, max_func(y, axis=(1,2), skipna=True))
945+
res = max_func(x, axis=(0,1,2), skipna=True)
946+
assert_array_equal(res, max_func(y, axis=(0,1,2), skipna=True))
947+
948+
# Straightforward reduce with no NAs
949+
check_all_axis_combos(a, b)
950+
951+
# Set a few values in 'a' to NA, and set the corresponding
952+
# values in 'b' to -1 to definitely eliminate them from the maximum
953+
for coord in [(0,1,2), (1,2,2), (0,2,4), (2,1,0)]:
954+
a[coord] = np.NA
955+
b[coord] = -1
956+
check_all_axis_combos(a, b)
957+
958+
# Set a few more values in 'a' to NA
959+
for coord in [(2,1,1), (2,1,2), (2,1,3), (0,0,4), (0,3,4)]:
960+
a[coord] = np.NA
961+
b[coord] = -1
962+
check_all_axis_combos(a, b)
963+
964+
# Set it so that there's a full set of NAs along the third dimension
965+
for coord in [(2,1,4)]:
966+
a[coord] = np.NA
967+
b[coord] = -1
968+
check_all_axis_combos(a, b, badaxes=(2,))
969+
assert_raises(ValueError, max_func, a, axis=2, skipna=True)
970+
971+
# Set it so that there's a full set of NAs along the second dimension
972+
for coord in [(0,1,4)]:
973+
a[coord] = np.NA
974+
b[coord] = -1
975+
check_all_axis_combos(a, b, badaxes=(1,2))
976+
assert_raises(ValueError, max_func, a, axis=1, skipna=True)
977+
assert_raises(ValueError, max_func, a, axis=2, skipna=True)
978+
909979
def test_count_reduce_items():
910980
# np.count_reduce_items
911981

0 commit comments

Comments
 (0)
0