8000 DEP: Deprecate coercion to subarray dtypes by seberg · Pull Request #17419 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

DEP: Deprecate coercion to subarray dtypes #17419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions doc/release/upcoming_changes/17419.deprecation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Arrays cannot be using subarray dtypes
--------------------------------------
Array creation and casting using ``np.array(obj, dtype)``
and ``arr.astype(dtype)`` will not support ``dtype``
to be a subarray dtype such as ``np.dtype("(2)i,")``.

For such a ``dtype`` the following behaviour occurs currently::

res = np.array(obj, dtype)

res.dtype is not dtype
res.dtype is dtype.base
res.shape[-dtype.ndim:] == dtype.shape

The shape of the dtype is included into the array.
This leads to inconsistencies when ``obj`` is:

* a scalar, such as ``np.array(1, dtype="(2)i")``
* an array, such as ``np.array(np.array([1]), dtype="(2)i")``

In most cases the work-around is to pass the output dtype directly
and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``.
If this is insufficient, please open an issue on the NumPy issue
tracker.
202 changes: 202 additions & 0 deletions numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,160 @@ PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op),
}


/*
* This function is a legacy implementation to retain subarray dtype
* behaviour in array coercion. The behaviour here makes sense if tuples
* of matching dimensionality are being coerced. Due to the difficulty
* that the result is ill-defined for lists of array-likes, this is deprecated.
*
* WARNING: Do not use this function, it exists purely to support a deprecated
* code path.
*/
static int
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case it isn't obvious from the comment. Do not review this function, it is a straight copy from the 1.19.x branch.

setArrayFromSequence(PyArrayObject *a, PyObject *s,
int dim, PyArrayObject * dst)
{
Py_ssize_t i, slen;
int res = -1;

/* first recursion, view equal destination */
if (dst == NULL)
dst = a;

/*
* This code is to ensure that the sequence access below will
* return a lower-dimensional sequence.
*/

/* INCREF on entry DECREF on exit */
Py_INCREF(s);

PyObject *seq = NULL;

if (PyArray_Check(s)) {
if (!(PyArray_CheckExact(s))) {
/*
* make sure a base-class array is used so that the dimensionality
* reduction assumption is correct.
*/
/* This will DECREF(s) if replaced */
s = PyArray_EnsureArray(s);
if (s == NULL) {
goto fail;
}
}

/* dst points to correct array subsection */
if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) {
goto fail;
}

Py_DECREF(s);
return 0;
}

if (dim > PyArray_NDIM(a)) {
PyErr_Format(PyExc_ValueError,
"setArrayFromSequence: sequence/array dimensions mismatch.");
goto fail;
}

/* Try __array__ before using s as a sequence */
PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL);
if (tmp == NULL) {
goto fail;
}
else if (tmp == Py_NotImplemented) {
Py_DECREF(tmp);
}
else {
int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp);
Py_DECREF(tmp);
if (r < 0) {
goto fail;
}
Py_DECREF(s);
return 0;
}

seq = PySequence_Fast(s, "Could not convert object to sequence");
if (seq == NULL) {
goto fail;
}
slen = PySequence_Fast_GET_SIZE(seq);

/*
* Either the dimensions match, or the sequence has length 1 and can
* be broadcast to the destination.
*/
if (slen != PyArray_DIMS(a)[dim] && slen != 1) {
PyErr_Format(PyExc_ValueError,
"cannot copy sequence with size %zd to array axis "
"with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]);
goto fail;
}

/* Broadcast the one element from the sequence to all the outputs */
if (slen == 1) {
PyObject *o = PySequence_Fast_GET_ITEM(seq, 0);
npy_intp alen = PyArray_DIM(a, dim);

for (i = 0; i < alen; i++) {
if ((PyArray_NDIM(a) - dim) > 1) {
PyArrayObject * tmp =
(PyArrayObject *)array_item_asarray(dst, i);
if (tmp == NULL) {
goto fail;
}

res = setArrayFromSequence(a, o, dim+1, tmp);
Py_DECREF(tmp);
}
else {
char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
res = PyArray_SETITEM(dst, b, o);
}
if (res < 0) {
goto fail;
}
}
}
/* Copy element by element */
else {
for (i = 0; i < slen; i++) {
PyObject * o = PySequence_Fast_GET_ITEM(seq, i);
if ((PyArray_NDIM(a) - dim) > 1) {
PyArrayObject * tmp =
(PyArrayObject *)array_item_asarray(dst, i);
if (tmp == NULL) {
goto fail;
}

res = setArrayFromSequence(a, o, dim+1, tmp);
Py_DECREF(tmp);
}
else {
char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
res = PyArray_SETITEM(dst, b, o);
}
if (res < 0) {
goto fail;
}
}
}

Py_DECREF(seq);
Py_DECREF(s);
return 0;

fail:
Py_XDECREF(seq);
Py_DECREF(s);
return res;
}



/*NUMPY_API
* Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags
* Steals a reference to newtype --- which can be NULL
Expand Down Expand Up @@ -1407,6 +1561,54 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
if (ndim < 0) {
return NULL;
}

if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
/*
* When a subarray dtype was passed in, its dimensions are absorbed
* into the array dimension (causing a dimension mismatch).
* We can't reasonably handle this because of inconsistencies in
* how it was handled (depending on nested list vs. embed array-likes).
* So we give a deprecation warning and fall back to legacy code.
*/
ret = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
if (ret == NULL) {
npy_free_coercion_cache(cache);
return NULL;
}
assert(PyArray_NDIM(ret) != ndim);

if (cache == NULL) {
/* This is a single item. Sets only first subarray element. */
assert(ndim == 0);
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
Py_DECREF(ret);
return NULL;
}
}
else {
npy_free_coercion_cache(cache);
if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
Py_DECREF(ret);
return NULL;
}
}
/* NumPy 1.20, 2020-10-01 */
if (DEPRECATE(
"using a dtype with a subarray field is deprecated. "
"This can lead to inconsistent behaviour due to the resulting "
"dtype being different from the input dtype. "
"You may try to use `dtype=dtype.base`, which should give the "
"same result for most inputs, but does not guarantee the "
"output dimensions to match the subarray ones. "
"(Deprecated NumPy 1.20)")) {
Py_DECREF(ret);
return NULL;
}
return (PyObject *)ret;
}

if (dtype == NULL) {
dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
}
Expand Down
14 changes: 14 additions & 0 deletions numpy/core/src/multiarray/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,20 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
if (ret == NULL) {
return NULL;
}
/* NumPy 1.20, 2020-10-01 */
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
"using a dtype with a subarray field is deprecated. "
"This can lead to inconsistent behaviour due to the resulting "
"dtype being different from the input dtype. "
"You may try to use `dtype=dtype.base`, which should give the "
"same result for most inputs, but does not guarantee the "
"output dimensions to match the subarray ones. "
"For `arr.astype()` the old, surprising, behaviour can be "
"retained using `res = np.empty(arr.shape, dtype)` followed"
"by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
Py_DECREF(ret);
return NULL;
}

if (PyArray_CopyInto(ret, self) < 0) {
Py_DECREF(ret);
Expand Down
41 changes: 41 additions & 0 deletions numpy/core/tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def assert_deprecated(self, function, num=1, ignore_others=False,
kwargs : dict
Keyword arguments for `function`
"""
__tracebackhide__ = True # Hide traceback for py.test

# reset the log
self.log[:] = []

Expand Down Expand Up @@ -728,3 +730,42 @@ def test_not_deprecated(self):
np.concatenate(([0.], [1.]), out=np.empty(2, dtype=np.int64),
casting="same_kind")


class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
message = "using a dtype with a subarray field is deprecated"

@pytest.mark.parametrize(["obj", "dtype"],
[([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
(["1", "2"], "(2)i,")])
def test_deprecated_sequence(self, obj, dtype):
dtype = np.dtype(dtype)
self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
with pytest.warns(DeprecationWarning):
res = np.array(obj, dtype=dtype)

# Using `arr.astype(subarray_dtype)` is also deprecated, because
# it uses broadcasting instead of casting each element.
self.assert_deprecated(lambda: res.astype(dtype))
expected = np.empty(len(obj), dtype=dtype)
for i in range(len(expected)):
expected[i] = obj[i]

assert_array_equal(res, expected)

def test_deprecated_array(self):
# Arrays are more complex, since they "broadcast" on success:
arr = np.array([1, 2])
self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
with pytest.warns(DeprecationWarning):
res = np.array(arr, dtype="(2)i,")

assert_array_equal(res, [[1, 2], [1, 2]])

def test_not_deprecated(self):
# These error paths are not deprecated, the tests should be retained
# when the deprecation is finalized.
arr = np.arange(5 * 2).reshape(5, 2)
with pytest.raises(ValueError):
arr.astype("(2,2)f")
with pytest.raises(ValueError):
np.array(arr, dtype="(2,2)f")
0