10000 DEP: Deprecate coercion to subarray dtypes · numpy/numpy@e31ae7f · GitHub
[go: up one dir, main page]

Skip to content

Commit e31ae7f

Browse files
committed
DEP: Deprecate coercion to subarray dtypes
When coercing to subarray dtypes, e.g. using `np.array(obj, dtype)`, but also `arr.astype(dtype)`, the behaviour was only well defined with tuple inputs, but not with array-like inputs. In particular, `arr.astype(dtype)` had arguably surprising behaviour of not converting by element, but rather attempting (and often failing) to broadcast `arr` to the result array with added dimensions. This deprecates all of these cases, the main issue would be for users relying on stranger inputs with broadcasted tuples contained in sequences: ``` np.array([((0, 1), (1, 2)), ((2,),)], dtype='(2,2)f4') ``` In most cases, where the tuples have the correct output shape, the new base dtype can be directly used since the discovered shape should match. However, there is no work-around for the above case. Closes gh-17173
1 parent 57adb4b commit e31ae7f

File tree

4 files changed

+281
-0
lines changed

4 files changed

+281
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
Arrays cannot be using subarray dtypes
2+
--------------------------------------
3+
Array creation and casting using ``np.array(obj, dtype)``
4+
and ``arr.astype(dtype)`` will not support ``dtype``
5+
to be a subarray dtype such as ``np.dtype("(2)i,")``.
6+
7+
For such a ``dtype`` the following behaviour occurs currently::
8+
9+
res = np.array(obj, dtype)
10+
11+
res.dtype is not dtype
12+
res.dtype is dtype.base
13+
res.shape[-dtype.ndim:] == dtype.shape
14+
15+
The shape of the dtype is included into the array.
16+
This leads to inconsistencies when ``obj`` is:
17+
18+
* a scalar, such as ``np.array(1, dtype="(2)i")``
19+
* an array, such as ``np.array(np.array([1]), dtype="(2)i")``
20+
21+
In most cases the work-around is to pass the output dtype directly
22+
and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``.
23+
If this is insufficient, please open an issue on the NumPy issue
24+
tracker.

numpy/core/src/multiarray/ctors.c

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,6 +1367,160 @@ PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op),
13671367
}
13681368

13691369

1370+
/*
1371+
* This function is a legacy implementation to retain subarray dtype
1372+
* behaviour in array coercion. The behaviour here makes sense if tuples
1373+
* of matching dimensionality are being coerced. Due to the difficulty
1374+
* that the result is ill-defined for lists of array-likes, this is deprecated.
1375+
*
1376+
* WARNING: Do not use this function, it exists purely to support a deprecated
1377+
* code path.
1378+
*/
1379+
static int
1380+
setArrayFromSequence(PyArrayObject *a, PyObject *s,
1381+
int dim, PyArrayObject * dst)
1382+
{
1383+
Py_ssize_t i, slen;
1384+
int res = -1;
1385+
1386+
/* first recursion, view equal destination */
1387+
if (dst == NULL)
1388+
dst = a;
1389+
1390+
/*
1391+
* This code is to ensure that the sequence access below will
1392+
* return a lower-dimensional sequence.
1393+
*/
1394+
1395+
/* INCREF on entry DECREF on exit */
1396+
Py_INCREF(s);
1397+
1398+
PyObject *seq = NULL;
1399+
1400+
if (PyArray_Check(s)) {
1401+
if (!(PyArray_CheckExact(s))) {
1402+
/*
1403+
* make sure a base-class array is used so that the dimensionality
1404+
* reduction assumption is correct.
1405+
*/
1406+
/* This will DECREF(s) if replaced */
1407+
s = PyArray_EnsureArray(s);
1408+
if (s == NULL) {
1409+
goto fail;
1410+
}
1411+
}
1412 3419 +
1413+
/* dst points to correct array subsection */
1414+
if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) {
1415+
goto fail;
1416+
}
1417+
1418+
Py_DECREF(s);
1419+
return 0;
1420+
}
1421+
1422+
if (dim > PyArray_NDIM(a)) {
1423+
PyErr_Format(PyExc_ValueError,
1424+
"setArrayFromSequence: sequence/array dimensions mismatch.");
1425+
goto fail;
1426+
}
1427+
1428+
/* Try __array__ before using s as a sequence */
1429+
PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL);
1430+
if (tmp == NULL) {
1431+
goto fail;
1432+
}
1433+
else if (tmp == Py_NotImplemented) {
1434+
Py_DECREF(tmp);
1435+
}
1436+
else {
1437+
int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp);
1438+
Py_DECREF(tmp);
1439+
if (r < 0) {
1440+
goto fail;
1441+
}
1442+
Py_DECREF(s);
1443+
return 0;
1444+
}
1445+
1446+
seq = PySequence_Fast(s, "Could not convert object to sequence");
1447+
if (seq == NULL) {
1448+
goto fail;
1449+
}
1450+
slen = PySequence_Fast_GET_SIZE(seq);
1451+
1452+
/*
1453+
* Either the dimensions match, or the sequence has length 1 and can
1454+
* be broadcast to the destination.
1455+
*/
1456+
if (slen != PyArray_DIMS(a)[dim] && slen != 1) {
1457+
PyErr_Format(PyExc_ValueError,
1458+
"cannot copy sequence with size %zd to array axis "
1459+
"with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]);
1460+
goto fail;
1461+
}
1462+
1463+
/* Broadcast the one element from the sequence to all the outputs */
1464+
if (slen == 1) {
1465+
PyObject *o = PySequence_Fast_GET_ITEM(seq, 0);
1466+
npy_intp alen = PyArray_DIM(a, dim);
1467+
1468+
for (i = 0; i < alen; i++) {
1469+
if ((PyArray_NDIM(a) - dim) > 1) {
1470+
PyArrayObject * tmp =
1471+
(PyArrayObject *)array_item_asarray(dst, i);
1472+
if (tmp == NULL) {
1473+
goto fail;
1474+
}
1475+
1476+
res = setArrayFromSequence(a, o, dim+1, tmp);
1477+
Py_DECREF(tmp);
1478+
}
1479+
else {
1480+
char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
1481+
res = PyArray_SETITEM(dst, b, o);
1482+
}
1483+
if (res < 0) {
1484+
goto fail;
1485+
}
1486+
}
1487+
}
1488+
/* Copy element by element */
1489+
else {
1490+
for (i = 0; i < slen; i++) {
1491+
PyObject * o = PySequence_Fast_GET_ITEM(seq, i);
1492+
if ((PyArray_NDIM(a) - dim) > 1) {
1493+
PyArrayObject * tmp =
1494+
(PyArrayObject *)array_item_asarray(dst, i);
1495+
if (tmp == NULL) {
1496+
goto fail;
1497+
}
1498+
1499+
res = setArrayFromSequence(a, o, dim+1, tmp);
1500+
Py_DECREF(tmp);
1501+
}
1502+
else {
1503+
char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
1504+
res = PyArray_SETITEM(dst, b, o);
1505+
}
1506+
if (res < 0) {
150 6377 7+
goto fail;
1508+
}
1509+
}
1510+
}
1511+
1512+
Py_DECREF(seq);
1513+
Py_DECREF(s);
1514+
return 0;
1515+
1516+
fail:
1517+
Py_XDECREF(seq);
1518+
Py_DECREF(s);
1519+
return res;
1520+
}
1521+
1522+
1523+
13701524
/*NUMPY_API
13711525
* Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags
13721526
* Steals a reference to newtype --- which can be NULL
@@ -1407,6 +1561,54 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
14071561
if (ndim < 0) {
14081562
return NULL;
14091563
}
1564+
1565+
if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
1566+
/*
1567+
* When a subarray dtype was passed in, its dimensions are absorbed
1568+
* into the array dimension (causing a dimension mismatch).
1569+
* We can't reasonably handle this because of inconsistencies in
1570+
* how it was handled (depending on nested list vs. embed array-likes).
1571+
* So we give a deprecation warning and fall back to legacy code.
1572+
*/
1573+
ret = (PyArrayObject *)PyArray_NewFromDescr(
1574+
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
1575+
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
1576+
if (ret == NULL) {
1577+
npy_free_coercion_cache(cache);
1578+
return NULL;
1579+
}
1580+
assert(PyArray_NDIM(ret) != ndim);
1581+
1582+
if (cache == NULL) {
1583+
/* This is a single item. Sets only first subarray element. */
1584+
assert(ndim == 0);
1585+
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
1586+
Py_DECREF(ret);
1587+
return NULL;
1588+
}
1589+
}
1590+
else {
1591+
npy_free_coercion_cache(cache);
1592+
if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
1593+
Py_DECREF(ret);
1594+
return NULL;
1595+
}
1596+
}
1597+
/* NumPy 1.20, 2020-10-01 */
1598+
if (DEPRECATE(
1599+
"using a dtype with a subarray field is deprecated. "
1600+
"This can lead to inconsistent behaviour due to the resulting "
1601+
"dtype being different from the input dtype. "
1602+
"You may try to use `dtype=dtype.base`, which should give the "
1603+
"same result for most inputs, but does not guarantee the "
1604+
"output dimensions to match the subarray ones. "
1605+
"(Deprecated NumPy 1.20)")) {
1606+
Py_DECREF(ret);
1607+
return NULL;
1608+
}
1609+
return (PyObject *)ret;
1610+
}
1611+
14101612
if (dtype == NULL) {
14111613
dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
14121614
}

numpy/core/src/multiarray/methods.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,20 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
844844
if (ret == NULL) {
845845
return NULL;
846846
}
847+
/* NumPy 1.20, 2020-10-01 */
848+
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
849+
"using a dtype with a subarray field is deprecated. "
850+
"This can lead to inconsistent behaviour due to the resulting "
851+
"dtype being different from the input dtype. "
852+
"You may try to use `dtype=dtype.base`, which should give the "
853+
"same result for most inputs, but does not guarantee the "
854+
"output dimensions to match the subarray ones. "
855+
"For `arr.astype()` the old, surprising, behaviour can be "
856+
"retained using `res = np.empty(arr.shape, dtype)` followed"
857+
"by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
858+
Py_DECREF(ret);
859+
return NULL;
860+
}
847861

848862
if (PyArray_CopyInto(ret, self) < 0) {
849863
Py_DECREF(ret);

numpy/core/tests/test_deprecations.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ def assert_deprecated(self, function, num=1, ignore_others=False,
8181
kwargs : dict
8282
Keyword arguments for `function`
8383
"""
84+
__tracebackhide__ = True # Hide traceback for py.test
85+
8486
# reset the log
8587
self.log[:] = []
8688

@@ -728,3 +730,42 @@ def test_not_deprecated(self):
728730
np.concatenate(([0.], [1.]), out=np.empty(2, dtype=np.int64),
729731
casting="same_kind")
730732

733+
734+
class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
735+
message = "using a dtype with a subarray field is deprecated"
736+
737+
@pytest.mark.parametrize(["obj", "dtype"],
738+
[([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
739+
(["1", "2"], "(2)i,")])
740+
def test_deprecated_sequence(self, obj, dtype):
741+
dtype = np.dtype(dtype)
742+
self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
743+
with pytest.warns(DeprecationWarning):
744+
res = np.array(obj, dtype=dtype)
745+
746+
# Using `arr.astype(subarray_dtype)` is also deprecated, because
747+
# it uses broadcasting instead of casting each element.
748+
self.assert_deprecated(lambda: res.astype(dtype))
749+
expected = np.empty(len(obj), dtype=dtype)
750+
for i in range(len(expected)):
751+
expected[i] = obj[i]
752+
753+
assert_array_equal(res, expected)
754+
755+
def test_deprecated_array(self):
756+
# Arrays are more complex, since they "broadcast" on success:
757+
arr = np.array([1, 2])
758+
self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
759+
with pytest.warns(DeprecationWarning):
760+
res = np.array(arr, dtype="(2)i,")
761+
762+
assert_array_equal(res, [[1, 2], [1, 2]])
763+
764+
def test_not_deprecated(self):
765+
# These error paths are not deprecated, the tests should be retained
766+
# when the deprecation is finalized.
767+
arr = np.arange(5 * 2).reshape(5, 2)
768+
with pytest.raises(ValueError):
769+
arr.astype("(2,2)f")
770+
with pytest.raises(ValueError):
771+
np.array(arr, dtype="(2,2)f")

0 commit comments

Comments
 (0)
0