8000 Merge pull request #8977 from eric-wieser/fix-elsize-0 · numpy/numpy@04c43f1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 04c43f1

Browse files
authored
Merge pull request #8977 from eric-wieser/fix-elsize-0
BUG: Fix all kinds of problems when itemsize == 0
2 parents 01471dd + b6d2cd3 commit 04c43f1

File tree

9 files changed

+216
-88
lines changed

9 files changed

+216
-88
lines changed

doc/release/1.14.0-notes.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,23 @@ common cache line size. This makes ``npy`` files easier to use in
247247
programs which open them with ``mmap``, especially on Linux where an
248248 8000
``mmap`` offset must be a multiple of the page size.
249249

250+
Better support for empty structured and string types
251+
----------------------------------------------------
252+
Structured types can contain zero fields, and string dtypes can contain zero
253+
characters. Zero-length strings still cannot be created directly, and must be
254+
constructed through structured dtypes:
255+
256+
str0 = np.empty(10, np.dtype([('v', str, N)]))['v']
257+
void0 = np.empty(10, np.void)
258+
259+
It was always possible to work with these, but the following operations are
260+
now supported for these arrays:
261+
262+
* `arr.sort()`
263+
* `arr.view(bytes)`
264+
* `arr.resize(...)`
265+
* `pickle.dumps(arr)`
266+
250267

251268
Changes
252269
=======

numpy/core/src/multiarray/ctors.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3708,14 +3708,15 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
37083708
for (i = 0; (i < count || count == -1) &&
37093709
(value = PyIter_Next(iter)); i++) {
37103710
if (i >= elcount) {
3711+
npy_intp nbytes;
37113712
/*
37123713
Grow PyArray_DATA(ret):
37133714
this is similar for the strategy for PyListObject, but we use
37143715
50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ...
37153716
*/
37163717
elcount = (i >> 1) + (i < 4 ? 4 : 2) + i;
3717-
if (elcount <= NPY_MAX_INTP/elsize) {
3718-
new_data = PyDataMem_RENEW(PyArray_DATA(ret), elcount * elsize);
3718+
if (!npy_mul_with_overflow_intp(&nbytes, elcount, elsize)) {
3719+
new_data = PyDataMem_RENEW(PyArray_DATA(ret), nbytes);
37193720
}
37203721
else {
37213722
new_data = NULL;

numpy/core/src/multiarray/descriptor.c

Lines changed: 27 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "_datetime.h"
1717
#include "common.h"
18+
#include "templ_common.h" /* for npy_mul_with_overflow_intp */
1819
#include "descriptor.h"
1920
#include "alloc.h"
2021

@@ -259,12 +260,7 @@ _convert_from_tuple(PyObject *obj)
259260
res = _use_inherit(type, val, &errflag);
260261
if (res || errflag) {
261262
Py_DECREF(type);
262-
if (res) {
263-
return res;
264-
}
265-
else {
266-
return NULL;
267-
}
263+
return res;
268264
}
269265
PyErr_Clear();
270266
/*
@@ -278,7 +274,8 @@ _convert_from_tuple(PyObject *obj)
278274
if (error_converting(itemsize)) {
279275
PyErr_SetString(PyExc_ValueError,
280276
"invalid itemsize in generic type tuple");
281-
goto fail;
277+
Py_DECREF(type);
278+
return NULL;
282279
}
283280
PyArray_DESCR_REPLACE(type);
284281
if (type->type_num == NPY_UNICODE) {
@@ -287,13 +284,15 @@ _convert_from_tuple(PyObject *obj)
287284
else {
288285
type->elsize = itemsize;
289286
}
287+
return type;
290288
}
291289
else if (type->metadata && (PyDict_Check(val) || PyDictProxy_Check(val))) {
292290
/* Assume it's a metadata dictionary */
293291
if (PyDict_Merge(type->metadata, val, 0) == -1) {
294292
Py_DECREF(type);
295293
return NULL;
296294
}
295+
return type;
297296
}
298297
else {
299298
/*
@@ -302,12 +301,12 @@ _convert_from_tuple(PyObject *obj)
302301
* a new fields attribute.
303302
*/
304303
PyArray_Dims shape = {NULL, -1};
305-
PyArray_Descr *newdescr;
304+
PyArray_Descr *newdescr = NULL;
306305
npy_intp items;
307-
int i;
306+
int i, overflowed;
307+
int nbytes;
308308

309309
if (!(PyArray_IntpConverter(val, &shape)) || (shape.len > NPY_MAXDIMS)) {
310-
npy_free_cache_dim_obj(shape);
311310
PyErr_SetString(PyExc_ValueError,
312311
"invalid shape in fixed-type tuple.");
313312
goto fail;
@@ -324,46 +323,43 @@ _convert_from_tuple(PyObject *obj)
324323
npy_free_cache_dim_obj(shape);
325324
return type;
326325
}
327-
newdescr = PyArray_DescrNewFromType(NPY_VOID);
328-
if (newdescr == NULL) {
329-
npy_free_cache_dim_obj(shape);
330-
goto fail;
331-
}
332326

333327
/* validate and set shape */
334328
for (i=0; i < shape.len; i++) {
335329
if (shape.ptr[i] < 0) {
336330
PyErr_SetString(PyExc_ValueError,
337331
"invalid shape in fixed-type tuple: "
338332
"dimension smaller then zero.");
339-
npy_free_cache_dim_obj(shape);
340333
goto fail;
341334
}
342335
if (shape.ptr[i] > NPY_MAX_INT) {
343336
PyErr_SetString(PyExc_ValueError,
344337
"invalid shape in fixed-type tuple: "
345338
"dimension does not fit into a C int.");
346-
npy_free_cache_dim_obj(shape);
347339
goto fail;
348340
}
349341
}
350342
items = PyArray_OverflowMultiplyList(shape.ptr, shape.len);
351-
if ((items < 0) || (items > (NPY_MAX_INT / type->elsize))) {
343+
if (items < 0 || items > NPY_MAX_INT) {
344+
overflowed = 1;
345+
}
346+
else {
347+
overflowed = npy_mul_with_overflow_int(
348+
&nbytes, type->elsize, (int) items);
349+
}
350+
if (overflowed) {
352351
PyErr_SetString(PyExc_ValueError,
353352
"invalid shape in fixed-type tuple: dtype size in "
354353
"bytes must fit into a C int.");
355-
npy_free_cache_dim_obj(shape);
356354
goto fail;
357355
}
358-
newdescr->elsize = type->elsize * items;
359-
if (newdescr->elsize == -1) {
360-
npy_free_cache_dim_obj(shape);
356+
newdescr = PyArray_DescrNewFromType(NPY_VOID);
357+
if (newdescr == NULL) {
361358
goto fail;
362359
}
363-
360+
newdescr->elsize = nbytes;
364361
newdescr->subarray = PyArray_malloc(sizeof(PyArray_ArrayDescr));
365362
if (newdescr->subarray == NULL) {
366-
Py_DECREF(newdescr);
367363
PyErr_NoMemory();
368364
goto fail;
369365
}
@@ -382,29 +378,26 @@ _convert_from_tuple(PyObject *obj)
382378
*/
383379
newdescr->subarray->shape = PyTuple_New(shape.len);
384380
if (newdescr->subarray->shape == NULL) {
385-
npy_free_cache_dim_obj(shape);
386381
goto fail;
387382
}
388383
for (i=0; i < shape.len; i++) {
389384
PyTuple_SET_ITEM(newdescr->subarray->shape, i,
390385
PyInt_FromLong((long)shape.ptr[i]));
391386

392387
if (PyTuple_GET_ITEM(newdescr->subarray->shape, i) == NULL) {
393-
Py_DECREF( 558 newdescr->subarray->shape);
394-
newdescr->subarray->shape = NULL;
395-
npy_free_cache_dim_obj(shape);
396388
goto fail;
397389
}
398390
}
399391

400392
npy_free_cache_dim_obj(shape);
401-
type = newdescr;
402-
}
403-
return type;
393+
return newdescr;
404394

405-
fail:
406-
Py_XDECREF(type);
407-
return NULL;
395+
fail:
396+
Py_XDECREF(type);
397+
Py_XDECREF(newdescr);
398+
npy_free_cache_dim_obj(shape);
399+
return NULL;
400+
}
408401
}
409402

410403
/*

numpy/core/src/multiarray/getset.c

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -469,22 +469,18 @@ array_descr_set(PyArrayObject *self, PyObject *arg)
469469
Py_DECREF(safe);
470470
}
471471

472-
if (newtype->elsize == 0) {
473-
/* Allow a void view */
474-
if (newtype->type_num == NPY_VOID) {
475-
PyArray_DESCR_REPLACE(newtype);
476-
if (newtype == NULL) {
477-
return -1;
478-
}
479-
newtype->elsize = PyArray_DESCR(self)->elsize;
480-
}
481-
/* But no other flexible types */
482-
else {
483-
PyErr_SetString(PyExc_TypeError,
484-
"data-type must not be 0-sized");
485-
Py_DECREF(newtype);
472+
/*
473+
* Treat V0 as resizable void - unless the destination is already V0, then
474+
* don't allow np.void to be duplicated
475+
*/
476+
if (newtype->type_num == NPY_VOID &&
477+
newtype->elsize == 0 &&
478+
PyArray_DESCR(self)->elsize != 0) {
479+
PyArray_DESCR_REPLACE(newtype);
480+
if (newtype == NULL) {
486481
return -1;
487482
}
483+
newtype->elsize = PyArray_DESCR(self)->elsize;
488484
}
489485

490486

@@ -532,7 +528,8 @@ array_descr_set(PyArrayObject *self, PyObject *arg)
532528

533529
if (newtype->elsize < PyArray_DESCR(self)->elsize) {
534530
/* if it is compatible, increase the size of the relevant axis */
535-
if (PyArray_DESCR(self)->elsize % newtype->elsize != 0) {
531+
if (newtype->elsize == 0 ||
532+
PyArray_DESCR(self)->elsize % newtype->elsize != 0) {
536533
PyErr_SetString(PyExc_ValueError,
537534
"When changing to a smaller dtype, its size must be a "
538535
"divisor of the size of original dtype");

numpy/core/src/multiarray/methods.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "npy_import.h"
1414
#include "ufunc_override.h"
1515
#include "common.h"
16+
#include "templ_common.h" /* for npy_mul_with_overflow_intp */
1617
#include "ctors.h"
1718
#include "calculation.h"
1819
#include "convert_datatype.h"
@@ -1671,6 +1672,8 @@ array_setstate(PyArrayObject *self, PyObject *args)
16711672
Py_ssize_t len;
16721673
npy_intp size, dimensions[NPY_MAXDIMS];
16731674
int nd;
1675+
npy_intp nbytes;
1676+
int overflowed;
16741677

16751678
PyArrayObject_fields *fa = (PyArrayObject_fields *)self;
16761679

@@ -1712,13 +1715,15 @@ array_setstate(PyArrayObject *self, PyObject *args)
17121715
return NULL;
17131716
}
17141717
size = PyArray_MultiplyList(dimensions, nd);
1715-
if (PyArray_DESCR(self)->elsize == 0) {
1716-
PyErr_SetString(PyExc_ValueError, "Invalid data-type size.");
1717-
return NULL;
1718+
if (size < 0) {
1719+
/* More items than are addressable */
1720+
return PyErr_NoMemory();
17181721
}
1719-
if (size < 0 || size > NPY_MAX_INTP / PyArray_DESCR(self)->elsize) {
1720-
PyErr_NoMemory();
1721-
return NULL;
1722+
overflowed = npy_mul_with_overflow_intp(
1723+
&nbytes, size, PyArray_DESCR(self)->elsize);
1724+
if (overflowed) {
1725+
/* More bytes than are addressable */
1726+
return PyErr_NoMemory();
17221727
}
17231728

17241729
if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) {
@@ -1760,7 +1765,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
17601765
return NULL;
17611766
}
17621767

1763-
if ((len != (PyArray_DESCR(self)->elsize * size))) {
1768+
if (len != nbytes) {
17641769
PyErr_SetString(PyExc_ValueError,
17651770
"buffer size does not" \
17661771
" match array size");
@@ -1822,7 +1827,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
18221827
}
18231828
if (swap) {
18241829
/* byte-swap on pickle-read */
1825-
npy_intp numels = num / PyArray_DESCR(self)->elsize;
1830+
npy_intp numels = PyArray_SIZE(self);
18261831
PyArray_DESCR(self)->f->copyswapn(PyArray_DATA(self),
18271832
PyArray_DESCR(self)->elsize,
18281833
datastr, PyArray_DESCR(self)->elsize,

0 commit comments

Comments
 (0)
0