8000 ENH: missingdata: Add maskna= parameter to np.copy and ndarray.copy · numpy/numpy@bede98e · GitHub
[go: up one dir, main page]

Skip to content

Commit bede98e

Browse files
committed
ENH: missingdata: Add maskna= parameter to np.copy and ndarray.copy
1 parent ce2f51d commit bede98e

File tree

8 files changed

+319
-156
lines changed

8 files changed

+319
-156
lines changed

numpy/add_newdocs.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3247,17 +3247,21 @@ def luf(lamdaexpr, *args, **kwargs):
32473247

32483248
add_newdoc('numpy.core.multiarray', 'ndarray', ('copy',
32493249
"""
3250-
a.copy(order='C')
3250+
a.copy(order='C', maskna=None)
32513251
32523252
Return a copy of the array.
32533253
32543254
Parameters
32553255
----------
3256-
order : {'C', 'F', 'A'}, optional
3257-
By default, the result is stored in C-contiguous (row-major) order in
3258-
memory. If `order` is `F`, the result has 'Fortran' (column-major)
3259-
order. If order is 'A' ('Any'), then the result has the same order
3260-
as the input.
3256+
order : {'C', 'F', 'A', 'K'}, optional
3257+
Controls the memory layout of the copy. 'C' means C-order,
3258+
'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous,
3259+
'C' otherwise. 'K' means match the layout of `a` as closely
3260+
as possible.
3261+
maskna : bool, optional
3262+
If specifies, forces the copy to have or to not have an
3263+
NA mask. This is a way to remove an NA mask from an array
3264+
while making a copy.
32613265
32623266
See also
32633267
--------

numpy/core/src/multiarray/convert.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,9 @@ PyArray_AssignOne(PyArrayObject *dst,
515515
NPY_NO_EXPORT PyObject *
516516
PyArray_NewCopy(PyArrayObject *obj, NPY_ORDER order)
517517
{
518-
PyArrayObject *ret = (PyArrayObject *)PyArray_NewLikeArray(
519-
obj, order, NULL, 1);
518+
PyArrayObject *ret;
519+
520+
ret = (PyArrayObject *)PyArray_NewLikeArray(obj, order, NULL, 1);
520521
if (ret == NULL) {
521522
return NULL;
522523
}
@@ -528,7 +529,7 @@ PyArray_NewCopy(PyArrayObject *obj, NPY_ORDER order)
528529
}
529530
}
530531

531-
if (PyArray_CopyInto(ret, obj) == -1) {
532+
if (PyArray_AssignArray(ret, obj, NULL, NPY_UNSAFE_CASTING, 0, NULL) < 0) {
532533
Py_DECREF(ret);
533534
return NULL;
534535
}

numpy/core/src/multiarray/ctors.c

Lines changed: 79 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,16 +1856,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
18561856
ret = NULL;
18571857
}
18581858
else {
1859-
if (PyArray_HASMASKNA((PyArrayObject *)arr) &&
1860-
(flags & NPY_ARRAY_ALLOWNA) == 0) {
1861-
PyErr_SetString(PyExc_ValueError,
1862-
"this operation does not support "
1863-
"arrays with NA masks");
1864-
ret = NULL;
1865-
}
1866-
else {
1867-
ret = (PyArrayObject *)PyArray_FromArray(arr, newtype, flags);
1868-
}
1859+
ret = (PyArrayObject *)PyArray_FromArray(arr, newtype, flags);
18691860
Py_DECREF(arr);
18701861
}
18711862
}
@@ -1962,13 +1953,12 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
19621953
int copy = 0;
19631954
int arrflags;
19641955
PyArray_Descr *oldtype;
1965-
PyTypeObject *subtype;
19661956
NPY_CASTING casting = NPY_SAFE_CASTING;
19671957

19681958
oldtype = PyArray_DESCR(arr);
1969-
subtype = Py_TYPE(arr);
19701959
if (newtype == NULL) {
1971-
newtype = oldtype; Py_INCREF(oldtype);
1960+
newtype = oldtype;
1961+
Py_INCREF(oldtype);
19721962
}
19731963
itemsize = newtype->elsize;
19741964
if (itemsize == 0) {
@@ -2005,141 +1995,79 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
20051995
return NULL;
20061996
}
20071997

2008-
/* Don't copy if sizes are compatible */
2009-
if ((flags & NPY_ARRAY_ENSURECOPY) ||
2010-
PyArray_EquivTypes(oldtype, newtype)) {
2011-
arrflags = PyArray_FLAGS(arr);
2012-
if (PyArray_NDIM(arr) <= 1 && (flags & NPY_ARRAY_F_CONTIGUOUS)) {
2013-
flags |= NPY_ARRAY_C_CONTIGUOUS;
2014-
}
2015-
copy = (flags & NPY_ARRAY_ENSURECOPY) ||
2016-
((flags & NPY_ARRAY_C_CONTIGUOUS) &&
2017-
(!(arrflags & NPY_ARRAY_C_CONTIGUOUS)))
2018-
|| ((flags & NPY_ARRAY_ALIGNED) &&
2019-
(!(arrflags & NPY_ARRAY_ALIGNED)))
2020-
|| (PyArray_NDIM(arr) > 1 &&
2021-
((flags & NPY_ARRAY_F_CONTIGUOUS) &&
2022-
(!(arrflags & NPY_ARRAY_F_CONTIGUOUS))))
2023-
|| ((flags & NPY_ARRAY_WRITEABLE) &&
2024-
(!(arrflags & NPY_ARRAY_WRITEABLE)));
2025-
2026-
if (copy) {
2027-
if ((flags & NPY_ARRAY_UPDATEIFCOPY) &&
2028-
(!PyArray_ISWRITEABLE(arr))) {
2029-
Py_DECREF(newtype);
2030-
PyErr_SetString(PyExc_ValueError,
2031-
"cannot copy back to a read-only array");
2032-
return NULL;
2033-
}
2034-
if ((flags & NPY_ARRAY_ENSUREARRAY)) {
2035-
subtype = &PyArray_Type;
2036-
}
2037-
ret = (PyArrayObject *)
2038-
PyArray_NewFromDescr(subtype, newtype,
2039-
PyArray_NDIM(arr),
2040-
PyArray_DIMS(arr),
2041-
NULL, NULL,
2042-
flags & NPY_ARRAY_F_CONTIGUOUS,
2043-
(PyObject *)arr);
2044-
if (ret == NULL) {
2045-
return NULL;
2046-
}
2047-
2048-
/* Allocate an NA mask if necessary from the input */
2049-
if (PyArray_HASMASKNA(arr)) {
2050-
if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
2051-
Py_DECREF(ret);
2052-
return NULL;
2053-
}
2054-
}
2055-
2056-
if (PyArray_CopyInto(ret, arr) < 0) {
2057-
Py_DECREF(ret);
2058-
return NULL;
2059-
}
1998+
arrflags = PyArray_FLAGS(arr);
1999+
if (PyArray_NDIM(arr) <= 1 && (flags & NPY_ARRAY_F_CONTIGUOUS)) {
2000+
flags |= NPY_ARRAY_C_CONTIGUOUS;
2001+
}
2002+
copy = (flags & NPY_ARRAY_ENSURECOPY) ||
2003+
((flags & NPY_ARRAY_C_CONTIGUOUS) &&
2004+
(!(arrflags & NPY_ARRAY_C_CONTIGUOUS)))
2005+
|| ((flags & NPY_ARRAY_ALIGNED) &&
2006+
(!(arrflags & NPY_ARRAY_ALIGNED)))
2007+
|| (PyArray_NDIM(arr) > 1 &&
2008+
((flags & NPY_ARRAY_F_CONTIGUOUS) &&
2009+
(!(arrflags & NPY_ARRAY_F_CONTIGUOUS))))
2010+
|| ((flags & NPY_ARRAY_WRITEABLE) &&
2011+
(!(arrflags & NPY_ARRAY_WRITEABLE))) ||
2012+
!PyArray_EquivTypes(oldtype, newtype);
20602013

2061-
/* Allocate an NA mask if requested but wasn't from the input */
2062-
if ((flags & (NPY_ARRAY_MASKNA | NPY_ARRAY_OWNMASKNA)) != 0 &&
2063-
!PyArray_HASMASKNA(ret)) {
2064-
if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
2065-
Py_DECREF(ret);
2066-
return NULL;
2067-
}
2068-
}
2014+
if (copy) {
2015+
NPY_ORDER order = NPY_KEEPORDER;
2016+
int subok = 1;
20692017

2070-
if (flags & NPY_ARRAY_UPDATEIFCOPY) {
2071-
/*
2072-
* Don't use PyArray_SetBaseObject, because that compresses
2073-
* the chain of bases.
2074-
*/
2075-
Py_INCREF(arr);
2076-
((PyArrayObject_fieldaccess *)ret)->base = (PyObject *)arr;
2077-
PyArray_ENABLEFLAGS(ret, NPY_ARRAY_UPDATEIFCOPY);
2078-
PyArray_CLEARFLAGS(arr, NPY_ARRAY_WRITEABLE);
2079-
}
2018+
/* Set the order for the copy being made based on the flags */
2019+
if (flags & NPY_ARRAY_F_CONTIGUOUS) {
2020+
order = NPY_FORTRANORDER;
20802021
}
2081-
/*
2082-
* If no copy then just increase the reference
2083-
* count and return the input
2084-
*/
2085-
else {
2086-
Py_DECREF(newtype);
2087-
if ((flags & NPY_ARRAY_ENSUREARRAY) &&
2088-
!PyArray_CheckExact(arr)) {
2089-
PyArray_Descr *dtype = PyArray_DESCR(arr);
2090-
Py_INCREF(dtype);
2091-
ret = (PyArrayObject *)
2092-
PyArray_NewFromDescr(&PyArray_Type,
2093-
dtype,
2094-
PyArray_NDIM(arr),
2095-
PyArray_DIMS(arr),
2096-
PyArray_STRIDES(arr),
2097-
PyArray_DATA(arr),
2098-
PyArray_FLAGS(arr),
2099-
NULL);
2100-
if (ret == NULL) {
2101-
return NULL;
2102-
}
2103-
if (PyArray_SetBaseObject(ret, (PyObject *)arr)) {
2104-
Py_DECREF(ret);
2105-
return NULL;
2106-
}
2107-
}
2108-
else {
2109-
ret = arr;
2110-
}
2111-
Py_INCREF(arr);
2022+
else if (flags & NPY_ARRAY_C_CONTIGUOUS) {
2023+
order = NPY_CORDER;
21122024
}
2113-
}
21142025

2115-
/*
2116-
* The desired output type is different than the input
2117-
* array type and copy was not specified
2118-
*/
2119-
else {
21202026
if ((flags & NPY_ARRAY_UPDATEIFCOPY) &&
21212027
(!PyArray_ISWRITEABLE(arr))) {
21222028
Py_DECREF(newtype);
21232029
PyErr_SetString(PyExc_ValueError,
2124-
"cannot copy back to a read-only array B");
2030+
"cannot copy back to a read-only array");
21252031
return NULL;
21262032
}
21272033
if ((flags & NPY_ARRAY_ENSUREARRAY)) {
2128-
subtype = &PyArray_Type;
2034+
subok = 0;
21292035
}
2130-
ret = (PyArrayObject *)
2131-
PyArray_NewFromDescr(subtype, newtype,
2132-
PyArray_NDIM(arr), PyArray_DIMS(arr),
2133-
NULL, NULL,
2134-
flags & NPY_ARRAY_F_CONTIGUOUS,
2135-
(PyObject *)arr);
2036+
ret = (PyArrayObject *)PyArray_NewLikeArray(arr, order,
2037+
newtype, subok);
21362038
if (ret == NULL) {
21372039
return NULL;
21382040
}
2139-
if (PyArray_CastTo(ret, arr) < 0) {
2041+
2042+
/*
2043+
* Allocate an NA mask if necessary from the input,
2044+
* is NAs are being allowed.
2045+
*/
2046+
if (PyArray_HASMASKNA(arr) && (flags & NPY_ARRAY_ALLOWNA)) {
2047+
if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
2048+
Py_DECREF(ret);
2049+
return NULL;
2050+
}
2051+
}
2052+
2053+
/*
2054+
* If a ALLOWNA was not enabled, and 'arr' has an NA mask,
2055+
* this will raise an error if 'arr' contains any NA values.
2056+
*/
2057+
if (PyArray_CopyInto(ret, arr) < 0) {
21402058
Py_DECREF(ret);
21412059
return NULL;
21422060
}
2061+
2062+
/* Allocate an NA mask if requested but wasn't from the input */
2063+
if ((flags & (NPY_ARRAY_MASKNA | NPY_ARRAY_OWNMASKNA)) != 0 &&
2064+
!PyArray_HASMASKNA(ret)) {
2065+
if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
2066+
Py_DECREF(ret);
2067+
return NULL;
2068+
}
2069+
}
2070+
21432071
if (flags & NPY_ARRAY_UPDATEIFCOPY) {
21442072
/*
21452073
* Don't use PyArray_SetBaseObject, because that compresses
@@ -2151,6 +2079,28 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
21512079
PyArray_CLEARFLAGS(arr, NPY_ARRAY_WRITEABLE);
21522080
}
21532081
}
2082+
/*
2083+
* If no copy then just increase the reference
2084+
* count and return the input
2085+
*/
2086+
else {
2087+
Py_DECREF(newtype);
2088+
if ((flags & NPY_ARRAY_ENSUREARRAY) &&
2089+
!PyArray_CheckExact(arr)) {
2090+
PyArray_Descr *dtype = PyArray_DESCR(arr);
2091+
Py_INCREF(dtype);
2092+
2093+
ret = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type);
2094+
if (ret == NULL) {
2095+
return NULL;
2096+
}
2097+
}
2098+
else {
2099+
ret = arr;
2100+
}
2101+
Py_INCREF(arr);
2102+
}
2103+
21542104
return (PyObject *)ret;
21552105
}
21562106

numpy/core/src/multiarray/methods.c

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,14 +1009,53 @@ static PyObject *
10091009
array_copy(PyArrayObject *self, PyObject *args, PyObject *kwds)
10101010
{
10111011
PyArray_ORDER order = NPY_CORDER;
1012-
static char *kwlist[] = {"order", NULL};
1012+
PyObject *maskna_in = Py_None;
1013+
int maskna = -1;
1014+
static char *kwlist[] = {"order", "maskna", NULL};
1015+
PyArrayObject *ret;
10131016

1014-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&", kwlist,
1015-
PyArray_OrderConverter, &order)) {
1017+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&O", kwlist,
1018+
PyArray_OrderConverter, &order,
1019+
&maskna_in)) {
10161020
return NULL;
10171021
}
10181022

1019-
return PyArray_NewCopy(self, order);
1023+
/* Treat None the same as not providing the parameter */
1024+
if (maskna_in != Py_None) {
1025+
maskna = PyObject_IsTrue(maskna_in);
1026+
if (maskna == -1) {
1027+
return NULL;
1028+
}
1029+
}
1030+
1031+
/* If maskna=False was passed and self has an NA mask, strip it away */
1032+
if (maskna == 0 && PyArray_HASMASKNA(self)) {
1033+
/* An array with no NA mask */
1034+
ret = (PyArrayObject *)PyArray_NewLikeArray(self, order, NULL, 1);
1035+
if (ret == NULL) {
1036+
return NULL;
1037+
}
1038+
1039+
/* AssignArray validates that 'self' contains no NA values */
1040+
if (PyArray_AssignArray(ret, self, NULL, NPY_UNSAFE_CASTING,
1041+
0, NULL) < 0) {
1042+
Py_DECREF(ret);
1043+
return NULL;
1044+
}
1045+
}
1046+
else {
1047+
ret = (PyArrayObject *)PyArray_NewCopy(self, order);
1048+
1049+
/* Add the NA mask if requested */
1050+
if (ret != NULL && maskna == 1) {
1051+
if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) {
1052+
Py_DECREF(ret);
1053+
return NULL;
1054+
}
1055+
}
1056+
}
1057+
1058+
return (PyObject *)ret;
10201059
}
10211060

10221061
#include <stdio.h>

0 commit comments

Comments
 (0)
0