8000 Merge pull request #20681 from charris/backport-20954 · numpy/numpy@5399c03 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5399c03

Browse files
authored
Merge pull request #20681 from charris/backport-20954
BUG: Fix setstate logic for empty arrays
2 parents f9c45f8 + 34618d5 commit 5399c03

File tree

5 files changed

+85
-47
lines changed

5 files changed

+85
-47
lines changed

numpy/core/src/multiarray/arrayobject.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -493,14 +493,6 @@ array_dealloc(PyArrayObject *self)
493493
if (PyDataType_FLAGCHK(fa->descr, NPY_ITEM_REFCOUNT)) {
494494
PyArray_XDECREF(self);
495495
}
496-
/*
497-
* Allocation will never be 0, see comment in ctors.c
498-
* line 820
499-
*/
500 10000 -
size_t nbytes = PyArray_NBYTES(self);
501-
if (nbytes == 0) {
502-
nbytes = fa->descr->elsize ? fa->descr->elsize : 1;
503-
}
504496
if (fa->mem_handler == NULL) {
505497
char *env = getenv("NUMPY_WARN_IF_NO_MEM_POLICY");
506498
if ((env != NULL) && (strncmp(env, "1", 1) == 0)) {
@@ -511,7 +503,16 @@ array_dealloc(PyArrayObject *self)
511503
}
512504
// Guess at malloc/free ???
513505
free(fa->data);
514-
} else {
506+
}
507+
else {
508+
/*
509+
* In theory `PyArray_NBYTES_ALLOCATED`, but differs somewhere?
510+
* So instead just use the knowledge that 0 is impossible.
511+
*/
512+
size_t nbytes = PyArray_NBYTES(self);
513+
if (nbytes == 0) {
514+
nbytes = 1;
515+
}
515516
PyDataMem_UserFREE(fa->data, nbytes, fa->mem_handler);
516517
Py_DECREF(fa->mem_handler);
517518
}

numpy/core/src/multiarray/common.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,35 @@ npy_memchr(char * haystack, char needle,
292292
return p;
293293
}
294294

295+
/*
296+
* Helper to work around issues with the allocation strategy currently
297+
* allocating not 1 byte for empty arrays, but enough for an array where
298+
* all 0 dimensions are replaced with size 1 (if the itemsize is not 0).
299+
*
300+
* This means that we can fill in nice (nonzero) strides and still handle
301+
* slicing direct math without being in danger of leaving the allocated byte
302+
* bounds.
303+
* In practice, that probably does not matter, but in principle this would be
304+
* undefined behaviour in C. Another solution may be to force the strides
305+
* to 0 in these cases. See also gh-15788.
306+
*
307+
* Unlike the code in `PyArray_NewFromDescr` does no overflow checks.
308+
*/
309+
static NPY_INLINE npy_intp
310+
PyArray_NBYTES_ALLOCATED(PyArrayObject *arr)
311+
{
312+
if (PyArray_ITEMSIZE(arr) == 0) {
313+
return 1;
314+
}
315+
npy_intp nbytes = PyArray_ITEMSIZE(arr);
316+
for (int i = 0; i < PyArray_NDIM(arr); i++) {
317+
if (PyArray_DIMS(arr)[i] != 0) {
318+
nbytes *= PyArray_DIMS(arr)[i];
319+
}
320+
}
321+
return nbytes;
322+
}
323+
295324

296325
/*
297326
* Simple helper to create a tuple from an array of items. The `make_null_none`

numpy/core/src/multiarray/ctors.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,14 +754,20 @@ PyArray_NewFromDescr_int(
754754
}
755755
fa->strides = fa->dimensions + nd;
756756

757-
/* Copy dimensions, check them, and find total array size `nbytes` */
757+
/*
758+
* Copy dimensions, check them, and find total array size `nbytes`
759+
*
760+
* Note that we ignore 0-length dimensions, to match this in the `free`
761+
* calls, `PyArray_NBYTES_ALLOCATED` is a private helper matching this
762+
* behaviour, but without overflow checking.
763+
*/
758764
for (int i = 0; i < nd; i++) {
759765
fa->dimensions[i] = dims[i];
760766

761767
if (fa->dimensions[i] == 0) {
762768
/*
763769
* Compare to PyArray_OverflowMultiplyList that
764-
* returns 0 in this case.
770+
* returns 0 in this case. See also `PyArray_NBYTES_ALLOCATED`.
765771
*/
766772
continue;
767773
}

numpy/core/src/multiarray/getset.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -384,15 +384,7 @@ array_data_set(PyArrayObject *self, PyObject *op, void *NPY_UNUSED(ignored))
384384
}
385385
if (PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA) {
386386
PyArray_XDECREF(self);
387-
size_t nbytes = PyArray_NBYTES(self);
388-
/*
389-
* Allocation will never be 0, see comment in ctors.c
390-
* line 820
391-
*/
392-
if (nbytes == 0) {
393-
PyArray_Descr *dtype = PyArray_DESCR(self);
394-
nbytes = dtype->elsize ? dtype->elsize : 1;
395-
}
387+
size_t nbytes = PyArray_NBYTES_ALLOCATED(self);
396388
PyObject *handler = PyArray_HANDLER(self);
397389
if (handler == NULL) {
398390
/* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */

numpy/core/src/multiarray/methods.c

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1934,7 +1934,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
19341934
PyObject *rawdata = NULL;
19351935
char *datastr;
19361936
Py_ssize_t len;
1937-
npy_intp size, dimensions[NPY_MAXDIMS];
1937+
npy_intp dimensions[NPY_MAXDIMS];
19381938
int nd;
19391939
npy_intp nbytes;
19401940
int overflowed;
@@ -1976,29 +1976,47 @@ array_setstate(PyArrayObject *self, PyObject *args)
19761976
* since fa could be a 0-d or scalar, and then
19771977
* PyDataMem_UserFREE will be confused
19781978
*/
1979-
size_t n_tofree = PyArray_NBYTES(self);
1980-
if (n_tofree == 0) {
1981-
PyArray_Descr *dtype = PyArray_DESCR(self);
1982-
n_tofree = dtype->elsize ? dtype->elsize : 1;
1983-
}
1979+
size_t n_tofree = PyArray_NBYTES_ALLOCATED(self);
19841980
Py_XDECREF(PyArray_DESCR(self));
19851981
fa->descr = typecode;
19861982
Py_INCREF(typecode);
19871983
nd = PyArray_IntpFromSequence(shape, dimensions, NPY_MAXDIMS);
19881984
if (nd < 0) {
19891985
return NULL;
19901986
}
1991-
size = PyArray_MultiplyList(dimensions, nd);
1992-
if (size < 0) {
1993-
/* More items than are addressable */
1994-
return PyErr_NoMemory();
1987+
/*
1988+
* We should do two things here:
1989+
* 1. Validate the input, that it is neither invalid, nor "too big"
1990+
* ("too big" ignores dimensios of size 0).
1991+
* 2. Find `PyArray_NBYTES` of the result, as this is what we may need to
1992+
* copy from the pickled data (may not match allocation currently if 0).
1993+
* Compare with `PyArray_NewFromDescr`, raise MemoryError for simplicity.
19 10000 94+
*/
1995+
npy_bool empty = NPY_FALSE;
1996+
nbytes = 1;
1997+
for (int i = 0; i < nd; i++) {
1998+
if (dimensions[i] < 0) {
1999+
PyErr_SetString(PyExc_TypeError,
2000+
"impossible dimension while unpickling array");
2001+
return NULL;
2002+
}
2003+
if (dimensions[i] == 0) {
2004+
empty = NPY_TRUE;
2005+
}
2006+
overflowed = npy_mul_with_overflow_intp(
2007+
&nbytes, nbytes, dimensions[i]);
2008+
if (overflowed) {
2009+
return PyErr_NoMemory();
2010+
}
19952011
}
19962012
overflowed = npy_mul_with_overflow_intp(
1997-
&nbytes, size, PyArray_DESCR(self)->elsize);
2013+
&nbytes, nbytes, PyArray_DESCR(self)->elsize);
19982014
if (overflowed) {
1999-
/* More bytes than are addressable */
20002015
return PyErr_NoMemory();
20012016
}
2017+
if (empty) {
2018+
nbytes = 0;
2019+
}
20022020

20032021
if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) {
20042022
if (!PyList_Check(rawdata)) {
@@ -2039,8 +2057,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
20392057

20402058
if (len != nbytes) {
20412059
PyErr_SetString(PyExc_ValueError,
2042-
"buffer size does not" \
2043-
" match array size");
2060+
"buffer size does not match array size");
20442061
Py_DECREF(rawdata);
20452062
return NULL;
20462063
}
@@ -2097,21 +2114,18 @@ array_setstate(PyArrayObject *self, PyObject *args)
20972114
/* Bytes should always be considered immutable, but we just grab the
20982115
* pointer if they are large, to save memory. */
20992116
if (!IsAligned(self) || swap || (len <= 1000)) {
2100-
npy_intp num = PyArray_NBYTES(self);
2101-
if (num == 0) {
2102-
Py_DECREF(rawdata);
2103-
Py_RETURN_NONE;
2104-
}
2117+
npy_intp num = PyArray_NBYTES_ALLOCATED(self);
21052118
/* Store the handler in case the default is modified */
21062119
Py_XDECREF(fa->mem_handler);
21072120
fa->mem_handler = PyDataMem_GetHandler();
21082121
if (fa->mem_handler == NULL) {
2122+
Py_CLEAR(fa->mem_handler);
21092123
Py_DECREF(rawdata);
21102124
return NULL;
21112125
}
21122126
fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
21132127
if (PyArray_DATA(self) == NULL) {
2114-
Py_DECREF(fa->mem_handler);
2128+
Py_CLEAR(fa->mem_handler);
21152129
Py_DECREF(rawdata);
21162130
return PyErr_NoMemory();
21172131
}
@@ -2158,11 +2172,8 @@ array_setstate(PyArrayObject *self, PyObject *args)
21582172
}
21592173
}
21602174
else {
2161-
npy_intp num = PyArray_NBYTES(self);
2162-
int elsize = PyArray_DESCR(self)->elsize;
2163-
if (num == 0 || elsize == 0) {
2164-
Py_RETURN_NONE;
2165-
}
2175+
npy_intp num = PyArray_NBYTES_ALLOCATED(self);
2176+
21662177
/* Store the functions in case the default handler is modified */
21672178
Py_XDECREF(fa->mem_handler);
21682179
fa->mem_handler = PyDataMem_GetHandler();
@@ -2171,7 +2182,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
21712182
}
21722183
fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
21732184
if (PyArray_DATA(self) == NULL) {
2174-
Py_DECREF(fa->mem_handler);
2185+
Py_CLEAR(fa->mem_handler);
21752186
return PyErr_NoMemory();
21762187
}
21772188
if (PyDataType_FLAGCHK(PyArray_DESCR(self), NPY_NEEDS_INIT)) {
@@ -2180,7 +2191,6 @@ array_setstate(PyArrayObject *self, PyObject *args)
21802191
PyArray_ENABLEFLAGS(self, NPY_ARRAY_OWNDATA);
21812192
fa->base = NULL;
21822193
if (_setlist_pkl(self, rawdata) < 0) {
2183-
Py_DECREF(fa->mem_handler);
21842194
return NULL;
21852195
}
21862196
}

0 commit comments

Comments
 (0)
0