8000 Merge pull request #17596 from seberg/subarray-dtype-futurewarning · numpy/numpy@08f9eeb · GitHub
[go: up one dir, main page]

Skip to content

Commit 08f9eeb

Browse files
authored
Merge pull request #17596 from seberg/subarray-dtype-futurewarning
DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed
2 parents b10ee24 + fcc3940 commit 08f9eeb

File tree

6 files changed

+165
-95
lines changed

6 files changed

+165
-95
lines changed

doc/release/upcoming_changes/17419.deprecation.rst

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Arrays cannot be using subarray dtypes
2+
--------------------------------------
3+
Array creation and casting using ``np.array(arr, dtype)``
4+
and ``arr.astype(dtype)`` will use different logic when ``dtype``
5+
is a subarray dtype such as ``np.dtype("(2)i,")``.
6+
7+
For such a ``dtype`` the following behaviour is true::
8+
9+
res = np.array(arr, dtype)
10+
11+
res.dtype is not dtype
12+
res.dtype is dtype.base
13+
res.shape == arr.shape + dtype.shape
14+
15+
But ``res`` is filled using the logic:
16+
17+
res = np.empty(arr.shape + dtype.shape, dtype=dtype.base)
18+
res[...] = arr
19+
20+
which uses incorrect broadcasting (and often leads to an error).
21+
In the future, this will instead cast each element individually,
22+
leading to the same result as::
23+
24+
res = np.array(arr, dtype=np.dtype(["f", dtype]))["f"]
25+
26+
Which can normally be used to opt-in to the new behaviour.
27+
28+
This change does not affect ``np.array(list, dtype="(2)i,")`` unless the
29+
``list`` itself includes at least one array. In particular, the behaviour
30+
is unchanged for a list of tuples.

numpy/core/src/multiarray/ctors.c

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,49 +1565,66 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
15651565

15661566
if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
15671567
/*
1568-
* When a subarray dtype was passed in, its dimensions are absorbed
1569-
* into the array dimension (causing a dimension mismatch).
1570-
* We can't reasonably handle this because of inconsistencies in
1571-
* how it was handled (depending on nested list vs. embed array-likes).
1572-
* So we give a deprecation warning and fall back to legacy code.
1568+
* When a subarray dtype was passed in, its dimensions are appended
1569+
* to the array dimension (causing a dimension mismatch).
1570+
* There is a problem with that, because if we coerce from non-arrays
1571+
* we do this correctly by element (as defined by tuples), but for
1572+
* arrays we first append the dimensions and then assign to the base
1573+
* dtype and then assign which causes the problem.
1574+
*
1575+
* Thus, we check if there is an array included, in that case we
1576+
* give a FutureWarning.
1577+
* When the warning is removed, PyArray_Pack will have to ensure
1578+
* that that it does not append the dimensions when creating the
1579+
* subarrays to assign `arr[0] = obj[0]`.
15731580
*/
1574-
ret = (PyArrayObject *)PyArray_NewFromDescr(
1575-
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
1576-
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
1577-
if (ret == NULL) {
1578-
npy_free_coercion_cache(cache);
1579-
return NULL;
1581+
int includes_array = 0;
1582+
if (cache != NULL) {
1583+
/* This is not ideal, but it is a pretty special case */
1584+
coercion_cache_obj *next = cache;
1585+
while (next != NULL) {
1586+
if (!next->sequence) {
1587+
includes_array = 1;
1588+
break;
1589+
}
1590+
next = next->next;
1591+
}
15801592
}
1581-
assert(PyArray_NDIM(ret) != ndim);
1593+
if (includes_array) {
1594+
npy_free_coercion_cache(cache);
15821595

1583-
if (cache == NULL) {
1584-
/* This is a single item. Sets only first subarray element. */
1585-
assert(ndim == 0);
1586-
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
1596+
ret = (PyArrayObject *) PyArray_NewFromDescr(
1597+
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
1598+
flags & NPY_ARRAY_F_CONTIGUOUS, NULL);
1599+
if (ret == NULL) {
1600+
return NULL;
1601+
}
1602+
assert(PyArray_NDIM(ret) != ndim);
1603+
1604+
/* NumPy 1.20, 2020-10-01 */
1605+
if (DEPRECATE_FUTUREWARNING(
1606+
"creating an array with a subarray dtype will behave "
1607+
"differently when the `np.array()` (or `asarray`, etc.) "
1608+
"call includes an array or array object.\n"
1609+
"If you are converting a single array or a list of arrays,"
1610+
"you can opt-in to the future behaviour using:\n"
1611+
" np.array(arr, dtype=np.dtype(['f', dtype]))['f']\n"
1612+
" np.array([arr1, arr2], dtype=np.dtype(['f', dtype]))['f']\n"
1613+
"\n"
1614+
"By including a new field and indexing it after the "
1615+
"conversion.\n"
1616+
"This may lead to a different result or to current failures "
1617+
"succeeding. (FutureWarning since NumPy 1.20)") < 0) {
15871618
Py_DECREF(ret);
15881619
return NULL;
15891620
}
1590-
}
1591-
else {
1592-
npy_free_coercion_cache(cache);
1621+
15931622
if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
15941623
Py_DECREF(ret);
15951624
return NULL;
15961625
}
1626+
return (PyObject *)ret;
15971627
}
1598-
/* NumPy 1.20, 2020-10-01 */
1599-
if (DEPRECATE(
1600-
"using a dtype with a subarray field is deprecated. "
1601-
"This can lead to inconsistent behaviour due to the resulting "
1602-
"dtype being different from the input dtype. "
1603-
"You may try to use `dtype=dtype.base`, which should give the "
1604-
"same result for most inputs, but does not guarantee the "
1605-
"output dimensions to match the subarray ones. "
1606-
"(Deprecated NumPy 1.20)")) {
1607-
Py_DECREF(ret);
1608-
return NULL;
1609-
}
1610-
return (PyObject *)ret;
16111628
}
16121629

16131630
if (dtype == NULL) {
@@ -1700,26 +1717,52 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
17001717
}
17011718

17021719
/* Create a new array and copy the data */
1720+
Py_INCREF(dtype); /* hold on in case of a subarray that is replaced */
17031721
ret = (PyArrayObject *)PyArray_NewFromDescr(
17041722
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
17051723
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
17061724
if (ret == NULL) {
17071725
npy_free_coercion_cache(cache);
1726+
Py_DECREF(dtype);
17081727
return NULL;
17091728
}
1729+
if (ndim == PyArray_NDIM(ret)) {
1730+
/*
1731+
* Appending of dimensions did not occur, so use the actual dtype
1732+
* below. This is relevant for S0 or U0 which can be replaced with
1733+
* S1 or U1, although that should likely change.
1734+
*/
1735+
Py_SETREF(dtype, PyArray_DESCR(ret));
1736+
Py_INCREF(dtype);
1737+
}
1738+
17101739
if (cache == NULL) {
17111740
/* This is a single item. Set it directly. */
17121741
assert(ndim == 0);
17131742

1714-
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
1743+
if (PyArray_Pack(dtype, PyArray_BYTES(ret), op) < 0) {
1744+
Py_DECREF(dtype);
17151745
Py_DECREF(ret);
17161746
return NULL;
17171747
}
1748+
Py_DECREF(dtype);
17181749
return (PyObject *)ret;
17191750
}
17201751
assert(ndim != 0);
17211752
assert(op == cache->converted_obj);
1722-
if (PyArray_AssignFromCache(ret, cache) < 0) {
1753+
1754+
/* Decrease the number of dimensions to the detected ones */
1755+
int out_ndim = PyArray_NDIM(ret);
1756+
PyArray_Descr *out_descr = PyArray_DESCR(ret);
1757+
((PyArrayObject_fields *)ret)->nd = ndim;
1758+
((PyArrayObject_fields *)ret)->descr = dtype;
1759+
1760+
int success = PyArray_AssignFromCache(ret, cache);
1761+
1762+
((PyArrayObject_fields *)ret)->nd = out_ndim;
1763+
((PyArrayObject_fields *)ret)->descr = out_descr;
1764+
Py_DECREF(dtype);
1765+
if (success < 0) {
17231766
Py_DECREF(ret);
17241767
return NULL;
17251768
}

numpy/core/src/multiarray/methods.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -845,16 +845,17 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
845845
return NULL;
846846
}
847847
/* NumPy 1.20, 2020-10-01 */
848-
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
849-
"using a dtype with a subarray field is deprecated. "
850-
"This can lead to inconsistent behaviour due to the resulting "
851-
"dtype being different from the input dtype. "
852-
"You may try to use `dtype=dtype.base`, which should give the "
853-
"same result for most inputs, but does not guarantee the "
854-
"output dimensions to match the subarray ones. "
855-
"For `arr.astype()` the old, surprising, behaviour can be "
856-
"retained using `res = np.empty(arr.shape, dtype)` followed"
857-
"by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
848+
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) &&
849+
DEPRECATE_FUTUREWARNING(
850+
"casting an array to a subarray dtype "
851+
"will not using broadcasting in the future, but cast each "
852+
"element to the new dtype and then append the dtype's shape "
853+
"to the new array. You can opt-in to the new behaviour, by "
854+
"additional field to the cast: "
855+
"`arr.astype(np.dtype([('f', dtype)]))['f']`.\n"
856+
"This may lead to a different result or to current failures "
857+
"succeeding. "
858+
"(FutureWarning since NumPy 1.20)") < 0) {
858859
Py_DECREF(ret);
859860
return NULL;
860861
}

numpy/core/tests/test_deprecations.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -732,40 +732,42 @@ def test_not_deprecated(self):
732732

733733

734734
class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
735-
message = "using a dtype with a subarray field is deprecated"
736-
737-
@pytest.mark.parametrize(["obj", "dtype"],
738-
[([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
739-
(["1", "2"], "(2)i,")])
740-
def test_deprecated_sequence(self, obj, dtype):
741-
dtype = np.dtype(dtype)
742-
self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
743-
with pytest.warns(DeprecationWarning):
744-
res = np.array(obj, dtype=dtype)
745-
746-
# Using `arr.astype(subarray_dtype)` is also deprecated, because
747-
# it uses broadcasting instead of casting each element.
748-
self.assert_deprecated(lambda: res.astype(dtype))
749-
expected = np.empty(len(obj), dtype=dtype)
750-
for i in range(len(expected)):
751-
expected[i] = obj[i]
752-
753-
assert_array_equal(res, expected)
735+
warning_cls = FutureWarning
736+
message = "(creating|casting) an array (with|to) a subarray dtype"
754737

755738
def test_deprecated_array(self):
756739
# Arrays are more complex, since they "broadcast" on success:
757740
arr = np.array([1, 2])
741+
742+
self.assert_deprecated(lambda: arr.astype("(2)i,"))
743+
with pytest.warns(FutureWarning):
744+
res = arr.astype("(2)i,")
745+
746+
assert_array_equal(res, [[1, 2], [1, 2]])
747+
758748
self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
759-
with pytest.warns(DeprecationWarning):
749+
with pytest.warns(FutureWarning):
760750
res = np.array(arr, dtype="(2)i,")
761751

762752
assert_array_equal(res, [[1, 2], [1, 2]])
763753

764-
def test_not_deprecated(self):
765-
# These error paths are not deprecated, the tests should be retained
766-
# when the deprecation is finalized.
754+
with pytest.warns(FutureWarning):
755+
res = np.array([[(1,), (2,)], arr], dtype="(2)i,")
756+
757+
assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]])
758+
759+
def test_deprecated_and_error(self):
760+
# These error paths do not give a warning, but will succeed in the
761+
# future.
767762
arr = np.arange(5 * 2).reshape(5, 2)
768-
with pytest.raises(ValueError):
769-
arr.astype("(2,2)f")
770-
with pytest.raises(ValueError):
771-
np.array(arr, dtype="(2,2)f")
763+
def check():
764+
with pytest.raises(ValueError):
765+
arr.astype("(2,2)f")
766+
767+
self.assert_deprecated(check)
768+
769+
def check():
770+
with pytest.raises(ValueError):
771+
np.array(arr, dtype="(2,2)f")
772+
773+
self.assert_deprecated(check)

numpy/core/tests/test_dtype.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,24 @@ def test_union_struct(self):
314314
'formats':['i1', 'O'],
315315
'offsets':[np.dtype('intp').itemsize, 0]})
316316

317+
@pytest.mark.parametrize(["obj", "dtype", "expected"],
318+
[([], ("(2)f4,"), np.empty((0, 2), dtype="f4")),
319+
(3, "(3)f4,", [3, 3, 3]),
320+
(np.float64(2), "(2)f4,", [2, 2]),
321+
([((0, 1), (1, 2)), ((2,),)], '(2,2)f4', None),
322+
(["1", "2"], "(2)i,", None)])
323+
def test_subarray_list(self, obj, dtype, expected):
324+
dtype = np.dtype(dtype)
325+
res = np.array(obj, dtype=dtype)
326+
327+
if expected is None:
328+
# iterate the 1-d list to fill the array
329+
expected = np.empty(len(obj), dtype=dtype)
330+
for i in range(len(expected)):
331+
expected[i] = obj[i]
332+
333+
assert_array_equal(res, expected)
334+
317335
def test_comma_datetime(self):
318336
dt = np.dtype('M8[D],datetime64[Y],i8')
319337
assert_equal(dt, np.dtype([('f0', 'M8[D]'),

0 commit comments

Comments
 (0)
0