10000 DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed by seberg · Pull Request #17596 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed #17596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed
This currently appends the subarray dtype dimensions first and then
tries to assign to the result array which uses incorrect broadcasting
(broadcasting against the subarray dimensions instead of repeating
each element according to the subarray dimensions).

This also fixes the python scalar pathway `np.array(2, dtype="(2)f4,")`
which previously only filled the first value.  I consider that a clear
bug fix.

Closes gh-17511
  • Loading branch information
seberg committed Oct 21, 2020
commit fcc394033c5902bbd8104694c5f7d33a8b5eb99f
24 changes: 0 additions & 24 deletions doc/release/upcoming_changes/17419.deprecation.rst

This file was deleted.

30 changes: 30 additions & 0 deletions doc/release/upcoming_changes/17596.future.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Arrays cannot be using subarray dtypes
--------------------------------------
Array creation and casting using ``np.array(arr, dtype)``
and ``arr.astype(dtype)`` will use different logic when ``dtype``
is a subarray dtype such as ``np.dtype("(2)i,")``.

For such a ``dtype`` the following behaviour is true::

res = np.array(arr, dtype)

res.dtype is not dtype
res.dtype is dtype.base
res.shape == arr.shape + dtype.shape

But ``res`` is filled using the logic:

res = np.empty(arr.shape + dtype.shape, dtype=dtype.base)
res[...] = arr

which uses incorrect broadcasting (and often leads to an error).
In the future, this will instead cast each element individually,
leading to the same result as::

res = np.array(arr, dtype=np.dtype(["f", dtype]))["f"]

Which can normally be used to opt-in to the new behaviour.

This change does not affect ``np.array(list, dtype="(2)i,")`` unless the
``list`` itself includes at least one array. In particular, the behaviour
is unchanged for a list of tuples.
111 changes: 77 additions & 34 deletions numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -1565,49 +1565,66 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,

if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
/*
* When a subarray dtype was passed in, its dimensions are absorbed
* into the array dimension (causing a dimension mismatch).
* We can't reasonably handle this because of inconsistencies in
* how it was handled (depending on nested list vs. embed array-likes).
* So we give a deprecation warning and fall back to legacy code.
* When a subarray dtype was passed in, its dimensions are appended
* to the array dimension (causing a dimension mismatch).
* There is a problem with that, because if we coerce from non-arrays
* we do this correctly by element (as defined by tuples), but for
* arrays we first append the dimensions and then assign to the base
* dtype and then assign which causes the problem.
*
* Thus, we check if there is an array included, in that case we
* give a FutureWarning.
* When the warning is removed, PyArray_Pack will have to ensure
* that that it does not append the dimensions when creating the
* subarrays to assign `arr[0] = obj[0]`.
*/
ret = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
if (ret == NULL) {
npy_free_coercion_cache(cache);
return NULL;
int includes_array = 0;
if (cache != NULL) {
/* This is not ideal, but it is a pretty special case */
coercion_cache_obj *next = cache;
while (next != NULL) {
if (!next->sequence) {
includes_array = 1;
break;
}
next = next->next;
}
}
assert(PyArray_NDIM(ret) != ndim);
if (includes_array) {
npy_free_coercion_cache(cache);

if (cache == NULL) {
/* This is a single item. Sets only first subarray element. */
assert(ndim == 0);
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
ret = (PyArrayObject *) PyArray_NewFromDescr(
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
flags & NPY_ARRAY_F_CONTIGUOUS, NULL);
if (ret == NULL) {
return NULL;
}
assert(PyArray_NDIM(ret) != ndim);

/* NumPy 1.20, 2020-10-01 */
if (DEPRECATE_FUTUREWARNING(
"creating an array with a subarray dtype will behave "
"differently when the `np.array()` (or `asarray`, etc.) "
"call includes an array or array object.\n"
"If you are converting a single array or a list of arrays,"
"you can opt-in to the future behaviour using:\n"
" np.array(arr, dtype=np.dtype(['f', dtype]))['f']\n"
" np.array([arr1, arr2], dtype=np.dtype(['f', dtype]))['f']\n"
"\n"
"By including a new field and indexing it after the "
"conversion.\n"
"This may lead to a different result or to current failures "
"succeeding. (FutureWarning since NumPy 1.20)") < 0) {
Py_DECREF(ret);
return NULL;
}
}
else {
npy_free_coercion_cache(cache);

if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
Py_DECREF(ret);
return NULL;
}
return (PyObject *)ret;
}
/* NumPy 1.20, 2020-10-01 */
if (DEPRECATE(
"using a dtype with a subarray field is deprecated. "
"This can lead to inconsistent behaviour due to the resulting "
"dtype being different from the input dtype. "
"You may try to use `dtype=dtype.base`, which should give the "
"same result for most inputs, but does not guarantee the "
"output dimensions to match the subarray ones. "
"(Deprecated NumPy 1.20)")) {
Py_DECREF(ret);
return NULL;
}
return (PyObject *)ret;
}

if (dtype == NULL) {
Expand Down Expand Up @@ -1700,26 +1717,52 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
}

/* Create a new array and copy the data */
Py_INCREF(dtype); /* hold on in case of a subarray that is replaced */
ret = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
if (ret == NULL) {
npy_free_coercion_cache(cache);
Py_DECREF(dtype);
return NULL;
}
if (ndim == PyArray_NDIM(ret)) {
/*
* Appending of dimensions did not occur, so use the actual dtype
* below. This is relevant for S0 or U0 which can be replaced with
* S1 or U1, although that should likely change.
*/
Py_SETREF(dtype, PyArray_DESCR(ret));
Py_INCREF(dtype);
}

if (cache == NULL) {
/* This is a single item. Set it directly. */
assert(ndim == 0);

if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
if (PyArray_Pack(dtype, PyArray_BYTES(ret), op) < 0) {
Py_DECREF(dtype);
Py_DECREF(ret);
return NULL;
}
Py_DECREF(dtype);
return (PyObject *)ret;
}
assert(ndim != 0);
assert(op == cache->converted_obj);
if (PyArray_AssignFromCache(ret, cache) < 0) {

/* Decrease the number of dimensions to the detected ones */
int out_ndim = PyArray_NDIM(ret);
PyArray_Descr *out_descr = PyArray_DESCR(ret);
((PyArrayObject_fields *)ret)->nd = ndim;
((PyArrayObject_fields *)ret)->descr = dtype;

int success = PyArray_AssignFromCache(ret, cache);

((PyArrayObject_fields *)ret)->nd = out_ndim;
((PyArrayObject_fields *)ret)->descr = out_descr;
Py_DECREF(dtype);
if (success < 0) {
Py_DECREF(ret);
return NULL;
}
Expand Down
21 changes: 11 additions & 10 deletions numpy/core/src/multiarray/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,16 +845,17 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
/* NumPy 1.20, 2020-10-01 */
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
"using a dtype with a subarray field is deprecated. "
"This can lead to inconsistent behaviour due to the resulting "
"dtype being different from the input dtype. "
"You may try to use `dtype=dtype.base`, which should give the "
"same result for most inputs, but does not guarantee the "
"output dimensions to match the subarray ones. "
"For `arr.astype()` the old, surprising, behaviour can be "
"retained using `res = np.empty(arr.shape, dtype)` followed"
"by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) &&
DEPRECATE_FUTUREWARNING(
"casting an array to a subarray dtype "
"will not using broadcasting in the future, but cast each "
"element to the new dtype and then append the dtype's shape "
"to the new array. You can opt-in to the new behaviour, by "
"additional field to the cast: "
"`arr.astype(np.dtype([('f', dtype)]))['f']`.\n"
"This may lead to a different result or to current failures "
"succeeding. "
"(FutureWarning since NumPy 1.20)") < 0) {
Py_DECREF(ret);
return NULL;
}
Expand Down
56 changes: 29 additions & 27 deletions numpy/core/tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,40 +732,42 @@ def test_not_deprecated(self):


class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
message = "using a dtype with a subarray field is deprecated"

@pytest.mark.parametrize(["obj", "dtype"],
[([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
(["1", "2"], "(2)i,")])
def test_deprecated_sequence(self, obj, dtype):
dtype = np.dtype(dtype)
self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
with pytest.warns(DeprecationWarning):
res = np.array(obj, dtype=dtype)

# Using `arr.astype(subarray_dtype)` is also deprecated, because
# it uses broadcasting instead of casting each element.
self.assert_deprecated(lambda: res.astype(dtype))
expected = np.empty(len(obj), dtype=dtype)
for i in range(len(expected)):
expected[i] = obj[i]

assert_array_equal(res, expected)
warning_cls = FutureWarning
message = "(creating|casting) an array (with|to) a subarray dtype"

def test_deprecated_array(self):
# Arrays are more complex, since they "broadcast" on success:
arr = np.array([1, 2])

self.assert_deprecated(lambda: arr.astype("(2)i,"))
with pytest.warns(FutureWarning):
res = arr.astype("(2)i,")

assert_array_equal(res, [[1, 2], [1, 2]])

self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
with pytest.warns(DeprecationWarning):
with pytest.warns(FutureWarning):
res = np.array(arr, dtype="(2)i,")

assert_array_equal(res, [[1, 2], [1, 2]])

def test_not_deprecated(self):
# These error paths are not deprecated, the tests should be retained
# when the deprecation is finalized.
with pytest.warns(FutureWarning):
res = np.array([[(1,), (2,)], arr], dtype="(2)i,")

assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]])

def test_deprecated_and_error(self):
# These error paths do not give a warning, but will succeed in the
# future.
arr = np.arange(5 * 2).reshape(5, 2)
with pytest.raises(ValueError):
arr.astype("(2,2)f")
with pytest.raises(ValueError):
np.array(arr, dtype="(2,2)f")
def check():
with pytest.raises(ValueError):
arr.astype("(2,2)f")

self.assert_deprecated(check)

def check():
with pytest.raises(ValueError):
np.array(arr, dtype="(2,2)f")

self.assert_deprecated(check)
18 changes: 18 additions & 0 deletions numpy/core/tests/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,24 @@ def test_union_struct(self):
'formats':['i1', 'O'],
'offsets':[np.dtype('intp').itemsize, 0]})

@pytest.mark.parametrize(["obj", "dtype", "expected"],
[([], ("(2)f4,"), np.empty((0, 2), dtype="f4")),
(3, "(3)f4,", [3, 3, 3]),
(np.float64(2), "(2)f4,", [2, 2]),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, the first case here previously returned [3, uninitialized, uninitialized], while the second case actually did the right thing. So I considered that a simple bugfix here.

([((0, 1), (1, 2)), ((2,),)], '(2,2)f4', None),
(["1", "2"], "(2)i,", None)])
def test_subarray_list(self, obj, dtype, expected):
dtype = np.dtype(dtype)
res = np.array(obj, dtype=dtype)

if expected is None:
# iterate the 1-d list to fill the array
expected = np.empty(len(obj), dtype=dtype)
for i in range(len(expected)):
expected[i] = obj[i]

assert_array_equal(res, expected)

def test_comma_datetime(self):
dt = np.dtype('M8[D],datetime64[Y],i8')
assert_equal(dt, np.dtype([('f0', 'M8[D]'),
Expand Down
0