8000 ENH: properly account for trailing padding in PEP3118 by ahaldane · Pull Request #7798 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: properly account for trailing padding in PEP3118 #7798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
ENH: output trailing padding in PEP3118 format strings
Fixes #7797
  • Loading branch information
ahaldane committed Feb 21, 2021
commit 08734b1ab72f67c4e5ee5df8f3c8e41edd10e2be
13 changes: 11 additions & 2 deletions numpy/core/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,8 +689,17 @@ def __dtype_from_pep3118(stream, is_subdtype):

field_spec['itemsize'] = offset

# extra final padding for aligned types
if stream.byteorder == '@':
# extra final padding for aligned types:
# Inside of T{}, if in aligned mode, we add trailing padding like in a
# C struct so the end of the struct is aligned.
# Note that this behavior is *not* described by the PEP3118 spec, which
# does not say anything about T{} t 8000 railing padding. Note also that the Py
# struct docs say that trailing padding should *not* be implicitly added
# when outside of T{}, and the user should explicitly add a 0-sized
# trailing field to add padding, however struct does not implement T{}. So,
# here numpy is taking the initiative to specify how trailing padding works
# inside T{}, while we mimic struct outside of T{}.
if is_subdtype and stream.byteorder == '@':
field_spec['itemsize'] += (-offset) % common_alignment

# Check if this was a simple 1-item type, and unwrap it
Expand Down
40 changes: 34 additions & 6 deletions numpy/core/src/multiarray/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ _append_str(_tmp_string_t *s, char const *p)
return 0;
}

static int
_append_int(_tmp_string_t *s, unsigned int n)
{
/* even on 64bit, int strings are at most 20 bytes; 256 is overkill */
static char buf[256];
int nw = snprintf(buf, 256, "%u", n);
if (nw < 0 || nw >= 256) {
return -1;
}
return _append_str(s, buf);
}


/*
* Append a PEP3118-formatted field name, ":name:", to str
*/
Expand Down Expand Up @@ -276,15 +289,20 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
/* Insert padding manually */
if (*offset > new_offset) {
PyErr_SetString(
PyExc_ValueError,
"dtypes with overlapping or out-of-order fields are not "
"representable as buffers. Consider reordering the fields."
);
PyExc_ValueError, "The buffer interface does not support "
"overlapping fields or out-of-order "
"fields");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change of message deliberate, or just a consequence of the merge?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deliberate, based on your review above.

I just did a little bit more than a simple rebase. I also added support for 0-sized unnamed padding, see the release note.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh I see what you mean, the previous message seems good enough.

return -1;
}
while (*offset < new_offset) {
/* add padding bytes: repeat-count plus 'x' */
if (*offset < new_offset) {
if (new_offset - (*offset) > 1) {
if (_append_int(str, new_offset - (*offset)) < 0) {
return -1;
}
}
if (_append_char(str, 'x') < 0) return -1;
++*offset;
*offset = new_offset;
}

/* Insert child item */
Expand All @@ -297,6 +315,16 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
/* Insert field name */
if (_append_field_name(str, name) < 0) return -1;
}

/* Add any trailing padding */
if (*offset < descr->elsize) {
if (descr->elsize - (*offset) > 1) {
if (_append_int(str, descr->elsize - (*offset)) < 0) return -1;
}
if (_append_char(str, 'x') < 0) return -1;
*offset = descr->elsize;
}

if (_append_char(str, '}') < 0) return -1;
}
else {
Expand Down
37 changes: 21 additions & 16 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7023,8 +7023,6 @@ def test_native_padding_2(self):
self._check('^x3T{xi}', {'f0': (({'f0': ('i', 1)}, (3,)), 1)})

def test_trailing_padding(self):
# Trailing padding should be included, *and*, the item size
# should match the alignment if in aligned mode
align = np.dtype('i').alignment
size = np.dtype('i').itemsize

Expand All @@ -7033,18 +7031,31 @@ def aligned(n):

base = dict(formats=['i'], names=['f0'])

self._check('ix', dict(itemsize=aligned(size + 1), **base))
self._check('ixx', dict(itemsize=aligned(size + 2), **base))
self._check('ixxx', dict(itemsize=aligned(size + 3), **base))
self._check('ixxxx', dict(itemsize=aligned(size + 4), **base))
self._check('i7x', dict(itemsize=aligned(size + 7), **base))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these tests might remain valid with T{...}? In particular, I would expect trailing padding to be kept in a struct context, to match the behaviour of sizeof(T) in C, and what happens when structs are repeated

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PEP3118 spec is unclear about this.

One could argue the struct module has set no precedent in judging how alignment and padding work here since it doesn't implement the T{} format. We might then feel free to set the precedent here in this PR, deciding that aligned formats add trailing padding only inside T{}.

Copy link
Member
@eric-wieser eric-wieser Feb 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whereas the ctypes module has set the precedent of ignoring all the remarks that the struct docs make about alignment...

self._check('ix', dict(itemsize=size + 1, **base))
self._check('ixx', dict(itemsize=size + 2, **base))
self._check('ixxx', dict(itemsize=size + 3, **base))
self._check('ixxxx', dict(itemsize=size + 4, **base))
self._check('i7x', dict(itemsize=size + 7, **base))

self._check('T{i:f0:x}', dict(itemsize=aligned(size + 1), **base))
self._check('T{i:f0:xx}', dict(itemsize=aligned(size + 2), **base))
self._check('T{i:f0:xxx}', dict(itemsize=aligned(size + 3), **base))
self._check('T{i:f0:xxxx}', dict(itemsize=aligned(size + 4), **base))
self._check('T{i:f0:7x}', dict(itemsize=aligned(size + 7), **base))

self._check('^ix', dict(itemsize=size + 1, **base))
self._check('^ixx', dict(itemsize=size + 2, **base))
self._check('^ixxx', dict(itemsize=size + 3, **base))
self._check('^ixxxx', dict(itemsize=size + 4, **base))
self._check('^i7x', dict(itemsize=size + 7, **base))

# check we can convert to memoryview and back, aligned and unaligned
arr = np.zeros(3, dtype=np.dtype('u1,i4,u1', align=True))
assert_equal(arr.dtype, np.array(memoryview(arr)).dtype)

arr = np.zeros(3, dtype=np.dtype('u1,i4,u1', align=False))
assert_equal(arr.dtype, np.array(memoryview(arr)).dtype)

def test_native_padding_3(self):
dt = np.dtype(
[('a', 'b'), ('b', 'i'),
Expand Down Expand Up @@ -7075,15 +7086,9 @@ def test_intra_padding(self):
align = np.dtype('i').alignment
size = np.dtype('i').itemsize

def aligned(n):
return (align*(1 + (n-1)//align))

self._check('(3)T{ix}', (dict(
names=['f0'],
formats=['i'],
offsets=[0],
itemsize=aligned(size + 1)
), (3,)))
expected_dtype = {'names': ['f0'], 'formats': ['i'],
'itemsize': np.dtype('i,V1', align=True).itemsize}
self._check('(3)T{ix}', (expected_dtype, (3,)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this an exact duplicate of the above test? What am I missing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh sorry, I fudged the rebase here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although perhaps using np.dtype(..., align=True) is an improvement over aligned(size + 1)


def test_char_vs_string(self):
dt = np.dtype('c')
Expand Down
0