8000 ENH: missingdata: Rewrite PyArray_Concatenate to work with NA masks · ContinuumIO/numpy@9194b3a · GitHub
[go: up one dir, main page]

Skip to content

Commit 9194b3a

Browse files
mwiebecharris
authored andcommitted
ENH: missingdata: Rewrite PyArray_Concatenate to work with NA masks
It should also have less memory usage for heterogeneous inputs, because it no longer makes extra copies in that case.
1 parent 99a21ef commit 9194b3a

File tree

15 files changed

+528
-188
lines changed

15 files changed

+528
-188
lines changed

doc/release/2.0.0-notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,12 @@ What works with NA:
2929
* Array methods:
3030
+ ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod,
3131
ndarray.conjugate, ndarray.diagonal
32+
+ numpy.concatenate
3233

3334
What doesn't work with NA:
3435
* Fancy indexing, such as with lists and partial boolean masks.
36+
* ndarray.flat and any other methods that use the old iterator
37+
mechanism instead of the newer nditer.
3538
* UFunc.reduce of multi-dimensional arrays, with skipna=True and a ufunc
3639
that doesn't have an identity.
3740
* UFunc.accumulate, UFunc.reduceat.

doc/source/reference/c-api.array.rst

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,18 +1667,20 @@ Conversion
16671667
copied into every location. A -1 is returned if an error occurs,
16681668
otherwise 0 is returned.
16691669

1670-
.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype)
1671-
1672-
Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new view of
1673-
the array *self* as possibly a different data-type, *dtype*. If
1674-
*dtype* is ``NULL``, then the returned array will have the same
1675-
data type as *self*. The new data-type must be consistent with
1676-
the size of *self*. Either the itemsizes must be identical, or
1677-
*self* must be single-segment and the total number of bytes must
1678-
be the same. In the latter case the dimensions of the returned
1679-
array will be altered in the last (or first for Fortran-style
1680-
contiguous arrays) dimension. The data area of the returned array
1681-
and self is exactly the same.
1670+
.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype, PyTypeObject *ptype)
1671+
1672+
Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new
1673+
view of the array *self* as possibly a different data-type, *dtype*,
1674+
and different array subclass *ptype*.
1675+
1676+
If *dtype* is ``NULL``, then the returned array will have the same
1677+
data type as *self*. The new data-type must be consistent with the
1678+
size of *self*. Either the itemsizes must be identical, or *self* must
1679+
be single-segment and the total number of bytes must be the same.
1680+
In the latter case the dimensions of the returned array will be
1681+
altered in the last (or first for Fortran-style contiguous arrays)
1682+
dimension. The data area of the returned array and self is exactly
1683+
the same.
16821684

16831685

16841686
Shape Manipulation

numpy/add_newdocs.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,7 +3702,7 @@ def luf(lamdaexpr, *args, **kwargs):
37023702

37033703
add_newdoc('numpy.core.multiarray', 'copyto',
37043704
"""
3705-
copyto(dst, src, casting='same_kind', where=None)
3705+
copyto(dst, src, casting='same_kind', where=None, preservena=False)
37063706
37073707
Copies values from `src` into `dst`, broadcasting as necessary.
37083708
Raises a TypeError if the casting rule is violated, and if
@@ -3725,10 +3725,13 @@ def luf(lamdaexpr, *args, **kwargs):
37253725
* 'same_kind' means only safe casts or casts within a kind,
37263726
like float64 to float32, are allowed.
37273727
* 'unsafe' means any data conversions may be done.
3728-
where : array_like of bool
3728+
where : array_like of bool, optional
37293729
A boolean array which is broadcasted to match the dimensions
37303730
of `dst`, and selects elements to copy from `src` to `dst`
37313731
wherever it contains the value True.
3732+
preservena : bool, optional
3733+
If set to True, leaves any NA values in `dst` untouched. This
3734+
is similar to the "hard mask" feature in numpy.ma.
37323735
37333736
""")
37343737

numpy/core/shape_base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,5 +267,10 @@ def hstack(tup):
267267
[3, 4]])
268268
269269
"""
270-
return _nx.concatenate(map(atleast_1d,tup),1)
270+
arrs = map(atleast_1d,tup)
271+
# As a special case, dimension 0 of 1-dimensional arrays is "horizontal"
272+
if arrs[0].ndim == 1:
273+
return _nx.concatenate(arrs, 0)
274+
else:
275+
return _nx.concatenate(arrs, 1)
271276

numpy/core/src/multiarray/convert_datatype.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,13 @@ NPY_NO_EXPORT PyArray_Descr *
13311331
PyArray_MinScalarType(PyArrayObject *arr)
13321332
{
13331333
PyArray_Descr *dtype = PyArray_DESCR(arr);
1334-
if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num)) {
1334+
/*
1335+
* If the array isn't a numeric scalar or is a scalar but with
1336+
* its value masked out, just return the array's dtype.
1337+
*/
1338+
if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num) ||
1339+
(PyArray_HASMASKNA(arr) && !NpyMaskValue_IsExposed(
1340+
(npy_mask)*PyArray_MASKNA_DATA(arr)))) {
13351341
Py_INCREF(dtype);
13361342
return dtype;
13371343
}

numpy/core/src/multiarray/ctors.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,7 @@ PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER order,
11751175
int idim;
11761176

11771177
PyArray_CreateSortedStridePerm(PyArray_NDIM(prototype),
1178+
PyArray_SHAPE(prototype),
11781179
PyArray_STRIDES(prototype),
11791180
strideperm);
11801181

numpy/core/src/multiarray/dtype_transfer.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3922,7 +3922,7 @@ PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape,
39223922
}
39233923

39243924
/* Sort the axes based on the destination strides */
3925-
PyArray_CreateSortedStridePerm(ndim, strides, strideperm);
3925+
PyArray_CreateSortedStridePerm(ndim, shape, strides, strideperm);
39263926
for (i = 0; i < ndim; ++i) {
39273927
int iperm = strideperm[ndim - i - 1].perm;
39283928
out_shape[i] = shape[iperm];
@@ -4052,7 +4052,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
40524052
}
40534053

40544054
/* Sort the axes based on the destination strides */
4055-
PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
4055+
PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
40564056
for (i = 0; i < ndim; ++i) {
40574057
int iperm = strideperm[ndim - i - 1].perm;
40584058
out_shape[i] = shape[iperm];
@@ -4186,7 +4186,7 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
41864186
}
41874187

41884188
/* Sort the axes based on the destination strides */
4189-
PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
4189+
PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
41904190
for (i = 0; i < ndim; ++i) {
41914191
int iperm = strideperm[ndim - i - 1].perm;
41924192
out_shape[i] = shape[iperm];
@@ -4324,7 +4324,7 @@ PyArray_PrepareFourRawArrayIter(int ndim, npy_intp *shape,
43244324
}
43254325

43264326
/* Sort the axes based on the destination strides */
4327-
PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm);
4327+
PyArray_CreateSortedStridePerm(ndim, shape, stridesA, strideperm);
43284328
for (i = 0; i < ndim; ++i) {
43294329
int iperm = strideperm[ndim - i - 1].perm;
43304330
out_shape[i] = shape[iperm];

0 commit comments

Comments
 (0)
0