8000 ENH: missingdata: Add wheremask to PyArray_ContainsNA · numpy/numpy@616a0af · GitHub
[go: up one dir, main page]

Skip to content

Commit 616a0af

Browse files
committed
ENH: missingdata: Add wheremask to PyArray_ContainsNA
Use this to make masked assignment just check the elements its copying for NA, so that the source array can have NAs, just not where the mask says.
1 parent 023573c commit 616a0af

File tree

11 files changed

+246
-54
lines changed

11 files changed

+246
-54
lines changed

doc/release/2.0.0-notes.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ New features
1414
Mask-based NA missing values
1515
----------------------------
1616

17-
Support for NA missing values similar to those in R has been implemented.
18-
This was done by adding optional NA masks to the core array object.
17+
Preliminary support for NA missing values similar to those in R has
18+
been implemented. This was done by adding optional NA masks to the core
19+
array object.
1920

2021
While a significant amount of the NumPy functionality has been extended to
2122
support NA masks, not everything is yet supported. Here is an (incomplete)

numpy/core/src/multiarray/array_assign_array.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,11 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
480480
}
481481

482482
if (src_has_maskna && !dst_has_maskna) {
483-
/* TODO: add 'wheremask' as a parameter to ContainsNA */
484-
if (PyArray_ContainsNA(src)) {
483+
int containsna = PyArray_ContainsNA(src, wheremask, NULL);
484+
if (containsna == -1) {
485+
goto fail;
486+
}
487+
else if (containsna) {
485488
PyErr_SetString(PyExc_ValueError,
486489
"Cannot assign NA to an array which "
487490
"does not support NAs");
@@ -655,8 +658,12 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
655658
}
656659
else {
657660
npy_intp wheremask_strides[NPY_MAXDIMS];
661+
int containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
658662

659-
if (PyArray_ContainsNA(wheremask)) {
663+
if (containsna == -1) {
664+
goto fail;
665+
}
666+
else if (containsna) {
660667
if (!dst_has_maskna) {
661668
PyErr_SetString(PyExc_ValueError,
662669
"Cannot assign NA to an array which "

numpy/core/src/multiarray/array_assign_scalar.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,12 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
431431
}
432432
else {
433433
npy_intp wheremask_strides[NPY_MAXDIMS];
434+
int containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
434435

435-
if (PyArray_ContainsNA(wheremask)) {
436+
if (containsna == -1) {
437+
goto fail;
438+
}
439+
else if (containsna) {
436440
if (!dst_has_maskna) {
437441
PyErr_SetString(PyExc_ValueError,
438442
"Cannot assign NA to an array which "

numpy/core/src/multiarray/common.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
101101
/* Check if it's an ndarray */
102102
if (PyArray_Check(obj)) {
103103
/* Check for any NAs in the array */
104-
if (PyArray_ContainsNA((PyArrayObject *)obj)) {
104+
int containsna = PyArray_ContainsNA((PyArrayObject *)obj, NULL, NULL);
105+
if (containsna == -1) {
106+
goto fail;
107+
}
108+
else if (containsna) {
105109
*out_contains_na = 1;
106110
}
107111
dtype = PyArray_DESCR((PyA F438 rrayObject *)obj);

numpy/core/src/multiarray/ctors.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2620,7 +2620,11 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
26202620
baseflags |= NPY_ITER_USE_MASKNA;
26212621
}
26222622
else {
2623-
if (PyArray_ContainsNA(src)) {
2623+
int containsna = PyArray_ContainsNA(src, NULL, NULL);
2624+
if (containsna == -1) {
2625+
return -1;
2626+
}
2627+
else if (containsna) {
26242628
PyErr_SetString(PyExc_ValueError,
26252629
"Cannot assign NA to an array which "
26262630
"does not support NAs");

numpy/core/src/multiarray/item_selection.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,17 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
123123
if (PyArray_HASMASKNA(obj)) {
124124
use_maskna = 1;
125125
}
126-
else if (PyArray_ContainsNA(self)) {
127-
PyErr_SetString(PyExc_ValueError,
128-
"Cannot assign NA to an array which "
129-
"does not support NAs");
130-
goto fail;
126+
else {
127+
int containsna = PyArray_ContainsNA(self, NULL, NULL);
128+
if (containsna == -1) {
129+
goto fail;
130+
}
131+
else if (containsna) {
132+
PyErr_SetString(PyExc_ValueError,
133+
"Cannot assign NA to an array which "
134+
"does not support NAs");
135+
goto fail;
136+
}
131137
}
132138
}
133139
}
@@ -2009,10 +2015,10 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
20092015
return NULL;
20102016
}
20112017

2012-
result = PyArray_ReduceWrapper(arr, out,
2018+
result = PyArray_ReduceWrapper(arr, out, NULL,
20132019
PyArray_DESCR(arr), dtype,
20142020
NPY_SAME_KIND_CASTING,
2015-
axis_flags, 1, skipna, keepdims,
2021+
axis_flags, 1, skipna, NULL, keepdims,
20162022
&assign_reduce_identity_zero,
20172023
&reduce_count_nonzero_loop,
20182024
&reduce_count_nonzero_masked_loop,
@@ -2047,7 +2053,11 @@ PyArray_CountNonzero(PyArrayObject *self)
20472053

20482054
/* If 'self' has an NA mask, make sure it has no NA values */
20492055
if (PyArray_HASMASKNA(self)) {
2050-
if (PyArray_ContainsNA(self)) {
2056+
int containsna = PyArray_ContainsNA(self, NULL, NULL);
2057+
if (containsna == -1) {
2058+
return -1;
2059+
}
2060+
else if (containsna) {
20512061
PyErr_SetString(PyExc_ValueError,
20522062
"Cannot count the number of nonzeros in an array "
20532063
"which contains an NA");

numpy/core/src/multiarray/mapping.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ array_boolean_subscript(PyArrayObject *self,
717717
char *ret_data, *ret_maskna_data = NULL;
718718
PyArray_Descr *dtype;
719719
PyArrayObject *ret;
720-
int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0;
720+
int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0, containsna;
721721
npy_intp bmask_size;
722722

723723
if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
@@ -728,10 +728,12 @@ array_boolean_subscript(PyArrayObject *self,
728728

729729
/*
730730
* See the Boolean Indexing section of the missing data NEP.
731-
*
732-
* TODO: Add 'wheremask' as a parameter to ContainsNA.
733731
*/
734-
if (PyArray_ContainsNA(bmask)) {
732+
containsna = PyArray_ContainsNA(bmask, NULL, NULL);
733+
if (containsna == -1) {
734+
return NULL;
735+
}
736+
else if (containsna) {
735737
PyErr_SetString(PyExc_ValueError,
736738
"The boolean mask indexing array "
737739
"may not contain any NA values");
@@ -957,7 +959,7 @@ array_ass_boolean_subscript(PyArrayObject *self,
957959
char *v_data, *v_maskna_data = NULL;
958960
int self_has_maskna = PyArray_HASMASKNA(self);
959961
int v_has_maskna = PyArray_HASMASKNA(v);
960-
int needs_api = 0;
962+
int needs_api = 0, containsna;
961963
npy_intp bmask_size;
962964
char constant_valid_mask = 1;
963965

@@ -985,7 +987,11 @@ array_ass_boolean_subscript(PyArrayObject *self,
985987
}
986988

987989
/* See the Boolean Indexing section of the missing data NEP */
988-
if (PyArray_ContainsNA(bmask)) {
990+
containsna = PyArray_ContainsNA(bmask, NULL, NULL);
991+
if (containsna == -1) {
992+
return -1;
993+
}
994+
else if (containsna) {
989995
PyErr_SetString(PyExc_ValueError,
990996
"The boolean mask assignment indexing array "
991997
"may not contain any NA values");
@@ -994,7 +1000,11 @@ array_ass_boolean_subscript(PyArrayObject *self,
9941000

9951001
/* Can't assign an NA to an array which doesn't support it */
9961002
if (v_has_maskna && !self_has_maskna) {
997-
if (PyArray_ContainsNA(v)) {
1003+
containsna = PyArray_ContainsNA(v, NULL, NULL);
1004+
if (containsna == -1) {
1005+
return -1;
1006+
}
1007+
else if (containsna) {
9981008
PyErr_SetString(PyExc_ValueError,
9991009
"Cannot assign NA to an array which "
10001010
"does not support NAs");

numpy/core/src/multiarray/na_mask.c

Lines changed: 120 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,42 +39,136 @@ PyArray_HasNASupport(PyArrayObject *arr)
3939
* Returns false if the array has no NA support. Returns
4040
* true if the array has NA support AND there is an
4141
* NA anywhere in the array.
42+
*
43+
* If 'wheremask' is non-NULL, only positions with True
44+
* in 'wheremask' are checked for NA.
45+
*
46+
* The parameter 'whichna' is not yet supported, but is
47+
* provided for future multi-NA support. It should be set
48+
* to NULL.
49+
*
50+
* Returns -1 on failure, otherwise 0 for False and 1 for True.
4251
*/
43-
NPY_NO_EXPORT npy_bool
44-
PyArray_ContainsNA(PyArrayObject *arr)
52+
NPY_NO_EXPORT int
53+
PyArray_ContainsNA(PyArrayObject *arr, PyArrayObject *wheremask,
54+
npy_bool *whichna)
4555
{
46-
/* Need NA support to contain NA */
47-
if (PyArray_HASMASKNA(arr)) {
48-
int idim, ndim;
49-
char *data;
50-
npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
51-
npy_intp i, coord[NPY_MAXDIMS];
52-
53-
if (PyArray_HASFIELDS(arr)) {
54-
/* TODO: need to add field-NA support */
55-
return 1;
56-
}
56+
/* Validate that the parameter for future expansion is NULL */
57+
if (whichna != NULL) {
58+
PyErr_SetString(PyExc_RuntimeError,
59+
"multi-NA is not yet supported in PyArray_ContainsNA");
60+
return -1;
61+
}
5762

58-
/* Use raw iteration with no heap memory allocation */
59-
if (PyArray_PrepareOneRawArrayIter(
63+
if (wheremask == NULL) {
64+
/* Need NA support to contain NA */
65+
if (PyArray_HASMASKNA(arr)) {
66+
int idim, ndim;
67+
char *data;
68+
npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
69+
npy_intp i, coord[NPY_MAXDIMS];
70+
71+
if (PyArray_HASFIELDS(arr)) {
72+
PyErr_SetString(PyExc_RuntimeError,
73+
"field-NA is not yet supported");
74+
return -1;
75+
}
76+
77+
/* Use raw iteration with no heap memory allocation */
78+
if (PyArray_PrepareOneRawArrayIter(
6079
PyArray_NDIM(arr), PyArray_DIMS(arr),
6180
PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
6281
&ndim, shape,
6382
&data, strides) < 0) {
64-
PyErr_Clear();
65-
return 1;
66-
}
83+
return -1;
84+
}
6785

68-
/* Do the iteration */
69-
NPY_RAW_ITER_START(idim, ndim, coord, shape) {
70-
char *d = data;
71-
/* Process the innermost dimension */
72-
for (i = 0; i < shape[0]; ++i, d += strides[0]) {
73-
if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
74-
return 1;
86+
/* Do the iteration */
87+
NPY_RAW_ITER_START(idim, ndim, coord, shape) {
88+
char *d = data;
89+
/* Process the innermost dimension */
90+
for (i = 0; i < shape[0]; ++i, d += strides[0]) {
91+
if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
92+
return 1;
93+
}
7594
}
95+
} NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
96+
}
97+
}
98+
else {
99+
npy_intp wheremask_strides_bcast[NPY_MAXDIMS];
100+
int containsna;
101+
102+
containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
103+
if (containsna != 0) {
104+
if (containsna == -1) {
105+
return -1;
106+
}
107+
else {
108+
PyErr_SetString(PyExc_ValueError,
109+
"the where mask may not contain any NA values");
110+
return -1;
76111
}
77-
} NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
112+
}
113+
114+
/*
115+
* Broadcast the where-mask onto arr. Note that this
116+
* is before checking if 'arr' has an NA mask, to
117+
* catch any broadcasting errors.
118+
*/
119+
if (broadcast_strides(PyArray_NDIM(arr), PyArray_DIMS(arr),
120+
PyArray_NDIM(wheremask), PyArray_DIMS(wheremask),
121+
PyArray_STRIDES(wheremask), "where mask",
122+
wheremask_strides_bcast) < 0) {
123+
return -1;
124+
}
125+
126+
if (PyArray_DTYPE(wheremask)->type_num != NPY_BOOL) {
127+
PyErr_SetString(PyExc_ValueError,
128+
"the where mask must have a 'bool' dtype");
129+
return -1;
130+
}
131+
132+
if (PyArray_HASMASKNA(arr)) {
133+
int idim, ndim;
134+
char *data, *wheremask_data;
135+
npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
136+
npy_intp wheremask_strides[NPY_MAXDIMS];
137+
npy_intp i, coord[NPY_MAXDIMS];
138+
139+
if (PyArray_HASFIELDS(arr)) {
140+
PyErr_SetString(PyExc_RuntimeError,
141+
"field-NA is not yet supported");
142+
return -1;
143+
}
144+
145+
/* Use raw iteration with no heap memory allocation */
146+
if (PyArray_PrepareTwoRawArrayIter(
147+
PyArray_NDIM(arr), PyArray_DIMS(arr),
148+
PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
149+
PyArray_DATA(wheremask), wheremask_strides_bcast,
150+
&ndim, shape,
151+
&data, strides,
152+
&wheremask_data, wheremask_strides) < 0) {
153+
return -1;
154+
}
155+
156+
/* Do the iteration */
157+
NPY_RAW_ITER_START(idim, ndim, coord, shape) {
158+
char *d = data, *where_d = wheremask_data;
159+
/* Process the innermost dimension */
160+
for (i = 0; i < shape[0]; ++i) {
161+
if (*where_d && !NpyMaskValue_IsExposed((npy_mask)(*d))) {
162+
return 1;
163+
}
164+
165+
d += strides[0];
166+
where_d += wheremask_strides[0];
167+
}
168+
} NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape,
169+
data, strides,
170+
wheremask_data, wheremask_strides);
171+
}
78172
}
79173

80174
return 0;

0 commit comments

Comments
 (0)
0