8000 ENH: allow 0-sized elements in PEP3118 format strings to align · numpy/numpy@dfc8ec7 · GitHub
[go: up one dir, main page]

Skip to content

Commit dfc8ec7

Browse files
committed
ENH: allow 0-sized elements in PEP3118 format strings to align
1 parent 08734b1 commit dfc8ec7

File tree

4 files changed

+77
-19
lines changed

4 files changed

+77
-19
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Trailing Padding now supported in PEP3118 buffer inferface
2+
----------------------------------------------------------
3+
Previously, structured types with trailing padding such as
4+
`np.dtype({'formats': ['i1'], 'names': ['a'], 'itemsize': 4})` could not
5+
roundtrip through the PEP3118 interface using a memoryview, as in
6+
`a == np.array(memoryview(a))`. Now, such trailing padding is preserved.
7+
8+
More technically, the PEP3118 interface now supports PEP3118 format strings as
9+
follows: Within "T{}", in aligned @ mode, trailing padding is automatically
10+
assumed in the same way as C structs and numpy aligned dtypes. Outside of T{}
11+
trailing padding is not automatically added or assumed in inputs, following
12+
python's struct module, but is explicitly added by padding with "x" or unnamed
13+
zero-sized trailing elements. 0-sized unnamed elements, like "0i", can now be
14+
added anywhere in the format string, and in @ mode this will add padding bytes
15+
up to that type's alignment offset, and otherwise is ignored, as described in
16+
the python struct docs.

numpy/core/_internal.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,9 @@ def _dtype_from_pep3118(spec):
563563
return dtype
564564

565565
def __dtype_from_pep3118(stream, is_subdtype):
566+
# numpy interprets pep3118 formats which includes named fields as
567+
# structured dtypes, even if not enclosed by "T{}"
568+
566569
field_spec = dict(
567570
names=[],
568571
formats=[],
@@ -613,8 +616,7 @@ def __dtype_from_pep3118(stream, is_subdtype):
613616
is_padding = False
614617

615618
if stream.consume('T{'):
616-
value, align = __dtype_from_pep3118(
617-
stream, is_subdtype=True)
619+
value, align = __dtype_from_pep3118(stream, is_subdtype=True)
618620
elif stream.next in type_map_chars:
619621
if stream.next == 'Z':
620622
typechar = stream.advance(2)
@@ -638,12 +640,10 @@ def __dtype_from_pep3118(stream, is_subdtype):
638640
else:
639641
raise ValueError("Unknown PEP 3118 data type specifier %r" % stream.s)
640642

641-
#
642643
# Native alignment may require padding
643644
#
644645
# Here we assume that the presence of a '@' character implicitly implies
645646
# that the start of the array is *already* aligned.
646-
#
647647
extra_offset = 0
648648
if stream.byteorder == '@':
649649
start_padding = (-offset) % align
@@ -663,6 +663,19 @@ def __dtype_from_pep3118(stream, is_subdtype):
663663
# Update common alignment
664664
common_alignment = _lcm(align, common_alignment)
665665

666+
# Field name
667+
if stream.consume(':'):
668+
name = stream.consume_until(':')
669+
else:
670+
name = None
671+
672+
# struct docs explicitly say that repeat-0 elements are for padding or
673+
# alignment. We further interpret this applies only to unnamed fields
674+
if name is None and itemsize == 0:
675+
offset += extra_offset
676+
field_spec['itemsize'] = offset
677+
continue
678+
666679
# Convert itemsize to sub-array
667680
if itemsize != 1:
668681
value = dtype((value, (itemsize,)))
@@ -671,12 +684,6 @@ def __dtype_from_pep3118(stream, is_subdtype):
671684
if shape is not None:
672685
value = dtype((value, shape))
673686

674-
# Field name
675-
if stream.consume(':'):
676-
name = stream.consume_until(':')
677-
else:
678-
name = None
679-
680687
if not (is_padding and name is None):
681688
if name is not None and name in field_spec['names']:
682689
raise RuntimeError(f"Duplicate field name '{name}' in PEP3118 format")

numpy/core/src/multiarray/buffer.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,10 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
289289
/* Insert padding manually */
290290
if (*offset > new_offset) {
291291
PyErr_SetString(
292-
PyExc_ValueError, "The buffer interface does not support "
293-
"overlapping fields or out-of-order "
294-
"fields");
292+
PyExc_ValueError,
293+
"dtypes with overlapping or out-of-order fields are not "
294+
"representable as buffers. Consider reordering the fields.&quo 9E88 t;
295+
);
295296
return -1;
296297
}
297298
/* add padding bytes: repeat-count plus 'x' */

numpy/core/tests/test_multiarray.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7030,24 +7030,36 @@ def aligned(n):
70307030
return align*(1 + (n-1)//align)
70317031

70327032
base = dict(formats=['i'], names=['f0'])
7033+
bbase = dict(formats=['b'], names=['f0'])
70337034

70347035
self._check('ix', dict(itemsize=size + 1, **base))
70357036
self._check('ixx', dict(itemsize=size + 2, **base))
70367037
self._check('ixxx', dict(itemsize=size + 3, **base))
70377038
self._check('ixxxx', dict(itemsize=size + 4, **base))
70387039
self._check('i7x', dict(itemsize=size + 7, **base))
7039-
7040-
self._check('T{i:f0:x}', dict(itemsize=aligned(size + 1), **base))
7041-
self._check('T{i:f0:xx}', dict(itemsize=aligned(size + 2), **base))
7042-
self._check('T{i:f0:xxx}', dict(itemsize=aligned(size + 3), **base))
7043-
self._check('T{i:f0:xxxx}', dict(itemsize=aligned(size + 4), **base))
7044-
self._check('T{i:f0:7x}', dict(itemsize=aligned(size + 7), **base))
7040+
self._check('ix0i', dict(itemsize=2*size, **base))
7041+
self._check('b0i', dict(itemsize=size, **bbase))
7042+
7043+
# Our intepretaton of the PEP3118/struct spec is that trailing
7044+
# padding for alignment is assumed only inside of T{}.
7045+
self._check('T{ix}', dict(itemsize=aligned(size + 1), **base))
7046+
self._check('T{ixx}', dict(itemsize=aligned(size + 2), **base))
7047+
self._check('T{ixxx}', dict(itemsize=aligned(size + 3), **base))
7048+
self._check('T{ixxxx}', dict(itemsize=aligned(size + 4), **base))
7049+
self._check('T{i7x}', dict(itemsize=aligned(size + 7), **base))
7050+
self._check('T{ix0i}', dict(itemsize=2*size, **base))
7051+
self._check('T{b0i}', dict(itemsize=size, **bbase))
7052+
7053+
# check that alignment mode affects assumed trailing padding in T{}
7054+
self._check('T{=ix}', dict(itemsize=size + 1, **base))
70457055

70467056
self._check('^ix', dict(itemsize=size + 1, **base))
70477057
self._check('^ixx', dict(itemsize=size + 2, **base))
70487058
self._check('^ixxx', dict(itemsize=size + 3, **base))
70497059
self._check('^ixxxx', dict(itemsize=size + 4, **base))
70507060
self._check('^i7x', dict(itemsize=size + 7, **base))
7061+
self._check('^ixx0i', dict(itemsize=size + 2, **base))
7062+
self._check('^b0i', np.dtype('b'))
70517063

70527064
# check we can convert to memoryview and back, aligned and unaligned
70537065
arr = np.zeros(3, dtype=np.dtype('u1,i4,u1', align=True))
@@ -7056,6 +7068,28 @@ def aligned(n):
70567068
arr = np.zeros(3, dtype=np.dtype('u1,i4,u1', align=False))
70577069
assert_equal(arr.dtype, np.array(memoryview(arr)).dtype)
70587070

7071+
a = np.empty(0, np.dtype({'formats': ['u1'], 'offsets': [0],
7072+
'names': ['x'], 'itemsize': 4}))
7073+
assert_equal(a, np.array(memoryview(a)))
7074+
7075+
# check that 0-sized elements act as padding in @ alignment and not =
7076+
# outside of T{} (see python struct docs, example at very end)
7077+
self._check('B:f0:B:f1:', [('f0', 'u1'), ('f1', 'u1')])
7078+
self._check('B:f0:0iB:f1:0i', {'names': ['f0','f1'],
7079+
'formats': ['u1','u1'],
7080+
 903B 9;offsets': [0,4],
7081+
'itemsize': 8})
7082+
self._check('=B:f0:0iB:f1:0i', [('f0', 'u1'), ('f1', 'u1')])
7083+
7084+
# PEP3118 cannot support overlapping/out-of-order fields
7085+
# (update these tests if it is improved to allow this)
7086+
a = np.empty(3, dtype={'names': ['a', 'b'],
7087+
'formats': ['i4', 'i2'],
7088+
'offsets': [0, 2]})
7089+
assert_raises(ValueError, memoryview, a)
7090+
a = np.empty(3, dtype='i4,i4')[['f1', 'f0']]
7091+
assert_raises(ValueError, memoryview, a)
7092+
70597093
def test_native_padding_3(self):
70607094
dt = np.dtype(
70617095
[('a', 'b'), ('b', 'i'),

0 commit comments

Comments
 (0)
0