8000 DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed · seberg/numpy@fcc3940 · GitHub
[go: up one dir, main page]

Skip to content

Commit fcc3940

Browse files
committed
DEP,BUG: Coercion/cast of array to a subarray dtype will be fixed
This currently appends the subarray dtype dimensions first and then tries to assign to the result array which uses incorrect broadcasting (broadcasting against the subarray dimensions instead of repeating each element according to the subarray dimensions). This also fixes the python scalar pathway `np.array(2, dtype="(2)f4,")` which previously only filled the first value. I consider that a clear bug fix. Closes numpygh-17511
1 parent 32f1359 commit fcc3940

File tree

6 files changed

+165
-95
lines changed

6 files changed

+165
-95
lines changed

doc/release/upcoming_changes/17419.deprecation.rst

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Arrays cannot be using subarray dtypes
2+
--------------------------------------
3+
Array creation and casting using ``np.array(arr, dtype)``
4+
and ``arr.astype(dtype)`` will use different logic when ``dtype``
5+
is a subarray dtype such as ``np.dtype("(2)i,")``.
6+
7+
For such a ``dtype`` the following behaviour is true::
8+
9+
res = np.array(arr, dtype)
10+
11+
res.dtype is not dtype
12+
res.dtype is dtype.base
13+
res.shape == arr.shape + dtype.shape
14+
15+
But ``res`` is filled using the logic:
16+
17+
res = np.empty(arr.shape + dtype.shape, dtype=dtype.base)
18+
res[...] = arr
19+
20+
which uses incorrect broadcasting (and often leads to an error).
21+
In the future, this will instead cast each element individually,
22+
leading to the same result as::
23+
24+
res = np.array(arr, dtype=np.dtype(["f", dtype]))["f"]
25+
26+
Which can normally be used to opt-in to the new behaviour.
27+
28+
This change does not affect ``np.array(list, dtype="(2)i,")`` unless the
29+
``list`` itself includes at least one array. In particular, the behaviour
30+
is unchanged for a list of tuples.

numpy/core/src/multiarray/ctors.c

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,49 +1565,66 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
15651565

15661566
if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) {
15671567
/*
1568-
* When a subarray dtype was passed in, its dimensions are absorbed
1569-
* into the array dimension (causing a dimension mismatch).
1570-
* We can't reasonably handle this because of inconsistencies in
1571-
* how it was handled (depending on nested list vs. embed array-likes).
1572-
* So we give a deprecation warning and fall back to legacy code.
1568+
* When a subarray dtype was passed in, its dimensions are appended
1569+
* to the array dimension (causing a dimension mismatch).
1570+
* There is a problem with that, because if we coerce from non-arrays
1571+
* we do this correctly by element (as defined by tuples), but for
1572+
* arrays we first append the dimensions and then assign to the base
1573+
* dtype and then assign which causes the problem.
1574+
*
1575+
* Thus, we check if there is an array included, in that case we
1576+
* give a FutureWarning.
1577+
* When the warning is removed, PyArray_Pack will have to ensure
1578+
* that that it does not append the dimensions when creating the
1579+
* subarrays to assign `arr[0] = obj[0]`.
15731580
*/
1574-
ret = (PyArrayObject *)PyArray_NewFromDescr(
1575-
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
1576-
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
1577-
if (ret == NULL) {
1578-
npy_free_coercion_cache(cache);
1579-
return NULL;
1581+
int includes_array = 0;
1582+
if (cache != NULL) {
1583+
/* This is not ideal, but it is a pretty special case */
1584+
coercion_cache_obj *next = cache;
1585+
while (next != NULL) {
1586+
if (!next->sequence) {
1587+
includes_array = 1;
1588+
break;
1589+
}
1590+
next = next->next;
1591+
}
15801592
}
1581-
assert(PyArray_NDIM(ret) != ndim);
1593+
if (includes_array) {
1594+
npy_free_coercion_cache(cache);
15821595

1583-
if (cache == NULL) {
1584-
/* This is a single item. Sets only first subarray element. */
1585-
assert(ndim == 0);
1586-
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
1596+
ret = (PyArrayObject *) PyArray_NewFromDescr(
1597+
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
1598+
flags & NPY_ARRAY_F_CONTIGUOUS, NULL);
1599+
if (ret == NULL) {
1600+
return NULL;
1601+
}
1602+
assert(PyArray_NDIM(ret) != ndim);
1603+
1604+
/* NumPy 1.20, 2020-10-01 */
1605+
if (DEPRECATE_FUTUREWARNING(
1606+
"creating an array with a subarray dtype will behave "
1607+
"differently when the `np.array()` (or `asarray`, etc.) "
1608+
"call includes an array or array object.\n"
1609+
"If you are converting a single array or a list of arrays,"
1610+
"you can opt-in to the future behaviour using:\n"
1611+
" np.array(arr, dtype=np.dtype(['f', dtype]))['f']\n"
1612+
" np.array([arr1, arr2], dtype=np.dtype(['f', dtype]))['f']\n"
1613+
"\n"
1614+
"By including a new field and indexing it after the "
1615+
"conversion.\n"
1616+
"This may lead to a different result or to current failures "
1617+
"succeeding. (FutureWarning since NumPy 1.20)") < 0) {
15871618
Py_DECREF(ret);
15881619
return NULL;
15891620
}
1590-
}
1591-
else {
1592-
npy_free_coercion_cache(cache);
1621+
15931622
if (setArrayFromSequence(ret, op, 0, NULL) < 0) {
15941623
Py_DECREF(ret);
15951624
return NULL;
15961625
}
1626+
return (PyObject *)ret;
15971627
}
1598-
/* NumPy 1.20, 2020-10-01 */
1599-
if (DEPRECATE(
1600-
"using a dtype with a subarray field is deprecated. "
1601-
"This can lead to inconsistent behaviour due to the resulting "
1602-
"dtype being different from the input dtype. "
1603-
"You may try to use `dtype=dtype.base`, which should give the "
1604-
"same result for most inputs, but does not guarantee the "
1605-
"output dimensions to match the subarray ones. "
1606-
"(Deprecated NumPy 1.20)")) {
1607-
Py_DECREF(ret);
1608-
return NULL;
1609-
}
1610-
return (PyObject *)ret;
16111628
}
16121629

16131630
if (dtype == NULL) {
@@ -1700,26 +1717,52 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
17001717
}
17011718

17021719
/* Create a new array and copy the data */
1720+
Py_INCREF(dtype); /* hold on in case of a subarray that is replaced */
17031721
ret = (PyArrayObject *)PyArray_NewFromDescr(
17041722
&PyArray_Type, dtype, ndim, dims, NULL, NULL,
17051723
flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
17061724
if (ret == NULL) {
17071725
npy_free_coercion_cache(cache);
1726+
Py_DECREF(dtype);
17081727
return NULL;
17091728
}
1729+
if (ndim == PyArray_NDIM(ret)) {
1730+
/*
1731+
* Appending of dimensions did not occur, so use the actual dtype
1732+
* below. This is relevant for S0 or U0 which can be replaced with
1733+
* S1 or U1, although that should likely change.
1734+
*/
1735+
Py_SETREF(dtype, PyArray_DESCR(ret));
1736+
Py_INCREF(dtype);
1737+
}
1738+
17101739
if (cache == NULL) {
17111740
/* This is a single item. Set it directly. */
17121741
assert(ndim == 0);
17131742

1714-
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
1743+
if (PyArray_Pack(dtype, PyArray_BYTES(ret), op) < 0) {
1744+
Py_DECREF(dtype);
17151745
Py_DECREF(ret);
17161746
return NULL;
17171747
}
1748+
Py_DECREF(dtype);
17181749
return (PyObject *)ret;
17191750
}
17201751
assert(ndim != 0);
17211752
assert(op == cache->converted_obj);
1722-
if (PyArray_AssignFromCache(ret, cache) < 0) {
1753+
1754+
/* Decrease the number of dimensions to the detected ones */
1755+
int out_ndim = PyArray_NDIM(ret);
1756+
PyArray_Descr *out_descr = PyArray_DESCR(ret);
1757+
((PyArrayObject_fields *)ret)->nd = ndim;
1758+
((PyArrayObject_fields *)ret)->descr = dtype;
1759+
1760+
int success = PyArray_AssignFromCache(ret, cache);
1761+
1762+
((PyArrayObject_fields *)ret)->nd = out_ndim;
1763+
((PyArrayObject_fields *)ret)->descr = out_descr;
1764+
Py_DECREF(dtype);
1765+
if (success < 0) {
17231766
Py_DECREF(ret);
17241767
return NULL;
17251768
}

numpy/core/src/multiarray/methods.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -845,16 +845,17 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
845845
return NULL;
846846
}
847847
/* NumPy 1.20, 2020-10-01 */
848-
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE(
849-
"using a dtype with a subarray field is deprecated. "
850-
"This can lead to inconsistent behaviour due to the resulting "
851-
"dtype being different from the input dtype. "
852-
"You may try to use `dtype=dtype.base`, which should give the "
853-
"same result for most inputs, but does not guarantee the "
854-
"output dimensions to match the subarray ones. "
855-
"For `arr.astype()` the old, surprising, behaviour can be "
856-
"retained using `res = np.empty(arr.shape, dtype)` followed"
857-
"by `res[...] = arr`. (Deprecated NumPy 1.20)")) {
848+
if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) &&
849+
DEPRECATE_FUTUREWARNING(
850+
"casting an array to a subarray dtype "
851+
"will not using broadcasting in the future, but cast each "
852+
"element to the new dtype and then append the dtype's shape "
853+
"to the new array. You can opt-in to the new behaviour, by "
854+
"additional field to the cast: "
855+
"`arr.astype(np.dtype([('f', dtype)]))['f']`.\n"
856+
"This may lead to a different result or to current failures "
857+
"succeeding. "
858+
"(FutureWarning since NumPy 1.20)") < 0) {
858859
Py_DECREF(ret);
859860
return NULL;
860861
}

numpy/core/tests/test_deprecations.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -732,40 +732,42 @@ def test_not_deprecated(self):
732732

733733

734734
class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
735-
message = "using a dtype with a subarray field is deprecated"
736-
737-
@pytest.mark.parametrize(["obj", "dtype"],
738-
[([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'),
739-
(["1", "2"], "(2)i,")])
740-
def test_deprecated_sequence(self, obj, dtype):
741-
dtype = np.dtype(dtype)
742-
self.assert_deprecated(lambda: np.array(obj, dtype=dtype))
743-
with pytest.warns(DeprecationWarning):
744-
res = np.array(obj, dtype=dtype)
745-
746-
# Using `arr.astype(subarray_dtype)` is also deprecated, because
747-
# it uses broadcasting instead of casting each element.
748-
self.assert_deprecated(lambda: res.astype(dtype))
749-
expected = np.empty(len(obj), dtype=dtype)
750-
for i in range(len(expected)):
751-
expected[i] = obj[i]
752-
753-
assert_array_equal(res, expected)
735+
warning_cls = FutureWarning
736+
message = "(creating|casting) an array (with|to) a subarray dtype"
754737

755738
def test_deprecated_array(self):
756739
# Arrays are more complex, since they "broadcast" on success:
757740
arr = np.array([1, 2])
741+
742+
self.assert_deprecated(lambda: arr.astype("(2)i,"))
743+
with pytest.warns(FutureWarning):
744+
res = arr.astype("(2)i,")
745+
746+
assert_array_equal(res, [[1, 2], [1, 2]])
747+
758748
self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,"))
759-
with pytest.warns(DeprecationWarning):
749+
with pytest.warns(FutureWarning):
760750
res = np.array(arr, dtype="(2)i,")
761751

762752
assert_array_equal(res, [[1, 2], [1, 2]])
763753

764-
def test_not_deprecated(self):
765-
# These error paths are not deprecated, the tests should be retained
766-
# when the deprecation is finalized.
754+
with pytest.warns(FutureWarning):
755+
res = np.array([[(1,), (2,)], arr], dtype="(2)i,")
756+
757+
assert_array_equal(res, [[[1, 1], [2, 2]], [[1, 2], [1, 2]]])
758+
759+
def test_deprecated_and_error(self):
760+
# These error paths do not give a warning, but will succeed in the
761+
# future.
767762
arr = np.arange(5 * 2).reshape(5, 2)
768-
with pytest.raises(ValueError):
769-
arr.astype("(2,2)f")
770-
with pytest.raises(ValueError):
771-
np.array(arr, dtype="(2,2)f")
763+
def check():
764+
with pytest.raises(ValueError):
765+
arr.astype("(2,2)f")
766+
767+
self.assert_deprecated(check)
768+
769+
def check():
770+
with pytest.raises(ValueError):
771+
np.array(arr, dtype="(2,2)f")
772+
773+
self.assert_deprecated(check)

numpy/core/tests/test_dtype.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,24 @@ def test_union_struct(self):
314314
'formats':['i1', 'O'],
315315
'offsets':[np.dtype('intp').itemsize, 0]})
316316

317+
@pytest.mark.parametrize(["obj", "dtype", "expected"],
318+
[([], ("(2)f4,"), np.empty((0, 2), dtype="f4")),
319+
(3, "(3)f4,", [3, 3, 3]),
320+
(np.float64(2), "(2)f4,", [2, 2]),
321+
([((0, 1), (1, 2)), ((2,),)], '(2,2)f4', None),
322+
(["1", "2"], "(2)i,", None)])
323+
def test_subarray_list(self, obj, dtype, expected):
324+
dtype = np.dtype(dtype)
325+
res = np.array(obj, dtype=dtype)
326+
327+
if expected is None:
328+
# iterate the 1-d list to fill the array
329+
expected = np.empty(len(obj), dtype=dtype)
330+
for i in range(len(expected)):
331+
expected[i] = obj[i]
332+
333+
assert_array_equal(res, expected)
334+
317335
def test_comma_datetime(self):
318336
dt = np.dtype('M8[D],datetime64[Y],i8')
319337
assert_equal(dt, np.dtype([('f0', 'M8[D]'),

0 commit comments

Comments
 (0)
0