8000 MAINT: Move ctype -> dtype conversion to python by eric-wieser · Pull Request #12254 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

MAINT: Move ctype -> dtype conversion to python #12254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 26, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
MAINT/BUG: Move ctype -> dtype conversion to python, fix issues
This comes with a number of changes:

* Propagating the error if parsing a ctypes object goes wrong, rather than silencing it and trying more things.
* Rejecting ctypes bitfields, which were incorrectly treated as subarrays.
* Rejecting ctypes Unions, which were previously treated as if they were structs
* Rejecting ctypes pointers, which were silently converted into their dereferenced types.
  This was particularly dangerous behavior when parsing structs with nested pointers, where swapping out the type changes the struct layout!
* No longer support ctypes duck-types, which includes...
* No longer supporting a `_fields_` attribute on `np.void` subclasses. This was a bug due to a poorly-shared code-path between void and ctypes types.

The upshot is it should be easier to fix future problems in parsing ctypes types.
  • Loading branch information
eric-wieser committed Oct 26, 2018
commit 44810f07ec6a281d96d6a69c105ce6ae59aec718
68 changes: 68 additions & 0 deletions numpy/core/_dtype_ctypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Conversion from ctypes to dtype.

In an ideal world, we could acheive this through the PEP3118 buffer protocol,
something like::

def dtype_from_ctypes_type(t):
# needed to ensure that the shape of `t` is within memoryview.format
class DummyStruct(ctypes.Structure):
_fields_ = [('a', t)]

# empty to avoid memory allocation
ctype_0 = (DummyStruct * 0)()
mv = memoryview(ctype_0)

# convert the struct, and slice back out the field
return _dtype_from_pep3118(mv.format)['a']

Unfortunately, this fails because:

* ctypes cannot handle length-0 arrays with PEP3118 (bpo-32782)
* PEP3118 cannot represent unions, but both numpy and ctypes can
* ctypes cannot handle big-endian structs with PEP3118 (bpo-32780)
"""
import _ctypes
import ctypes

import numpy as np


def _from_ctypes_array(t):
return np.dtype((dtype_from_ctypes_type(t._type_), (t._length_,)))


def _from_ctypes_structure(t):
# TODO: gh-10533, gh-10532
fields = []
for item in t._fields_:
if len(item) > 2:
raise TypeError(
"ctypes bitfields have no dtype equivalent")
fname, ftyp = item
fields.append((fname, dtype_from_ctypes_type(ftyp)))

# by default, ctypes structs are aligned
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are they ever otherwise?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a _pack_ attribute that makes them otherwise: #10532

return np.dtype(fields, align=True)


def dtype_from_ctypes_type(t):
"""
Construct a dtype object from a ctypes type
"""
if issubclass(t, _ctypes.Array):
return _from_ctypes_array(t)
elif issubclass(t, _ctypes._Pointer):
raise TypeError("ctypes pointers have no dtype equivalent")
elif issubclass(t, _ctypes.Structure):
return _from_ctypes_structure(t)
elif issubclass(t, _ctypes.Union):
# TODO
raise NotImplementedError(
"conversion from ctypes.Union types like {} to dtype"
.format(t.__name__))
elif isinstance(t._type_, str):
return np.dtype(t._type_)
else:
raise NotImplementedError(
"Unknown ctypes type {}".format(t.__name__))
124 changes: 57 additions & 67 deletions numpy/core/src/multiarray/descriptor.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "numpy/arrayscalars.h"

#include "npy_config.h"

#include "npy_ctypes.h"
#include "npy_pycompat.h"

#include "_datetime.h"
Expand Down Expand Up @@ -54,79 +54,46 @@ Borrowed_PyMapping_GetItemString(PyObject *o, char *key)
return ret;
}

/*
* Creates a dtype object from ctypes inputs.
*
* Returns a new reference to a dtype object, or NULL
* if this is not possible. When it returns NULL, it does
* not set a Python exception.
*/
static PyArray_Descr *
_arraydescr_fromctypes(PyObject *obj)
_arraydescr_from_ctypes_type(PyTypeObject *type)
{
PyObject *dtypedescr;
PyArray_Descr *newdescr;
int ret;
PyObject *_numpy_dtype_ctypes;
PyObject *res;

/* Understand basic ctypes */
dtypedescr = PyObject_GetAttrString(obj, "_type_");
PyErr_Clear();
if (dtypedescr) {
ret = PyArray_DescrConverter(dtypedescr, &newdescr);
Py_DECREF(dtypedescr);
if (ret == NPY_SUCCEED) {
PyObject *length;
/* Check for ctypes arrays */
length = PyObject_GetAttrString(obj, "_length_");
PyErr_Clear();
if (length) {
/* derived type */
PyObject *newtup;
PyArray_Descr *derived;
newtup = Py_BuildValue("N(N)", newdescr, length);
ret = PyArray_DescrConverter(newtup, &derived);
Py_DECREF(newtup);
if (ret == NPY_SUCCEED) {
return derived;
}
PyErr_Clear();
return NULL;
}
return newdescr;
}
PyErr_Clear();
/* Call the python function of the same name. */
_numpy_dtype_ctypes = PyImport_ImportModule("numpy.core._dtype_ctypes");
if (_numpy_dtype_ctypes == NULL) {
return NULL;
}
/* Understand ctypes structures --
bit-fields are not supported
automatically aligns */
dtypedescr = PyObject_GetAttrString(obj, "_fields_");
PyErr_Clear();
if (dtypedescr) {
ret = PyArray_DescrAlignConverter(dtypedescr, &newdescr);
Py_DECREF(dtypedescr);
if (ret == NPY_SUCCEED) {
return newdescr;
}
PyErr_Clear();
res = PyObject_CallMethod(_numpy_dtype_ctypes, "dtype_from_ctypes_type", "O", (PyObject *)type);
Py_DECREF(_numpy_dtype_ctypes);
if (res == NULL) {
return NULL;
}

return NULL;
/*
* sanity check that dtype_from_ctypes_type returned the right type,
* since getting it wrong would give segfaults.
*/
if (!PyObject_TypeCheck(res, &PyArrayDescr_Type)) {
Py_DECREF(res);
PyErr_BadInternalCall();
return NULL;
}

return (PyArray_Descr *)res;
}

/*
* This function creates a dtype object when:
* - The object has a "dtype" attribute, and it can be converted
* to a dtype object.
* - The object is a ctypes type object, including array
* and structure types.
* This function creates a dtype object when the object has a "dtype" attribute,
* and it can be converted to a dtype object.
*
* Returns a new reference to a dtype object, or NULL
* if this is not possible. When it returns NULL, it does
* not set a Python exception.
*/
NPY_NO_EXPORT PyArray_Descr *
_arraydescr_fromobj(PyObject *obj)
_arraydescr_from_dtype_attr(PyObject *obj)
{
PyObject *dtypedescr;
PyArray_Descr *newdescr = NULL;
Expand All @@ -135,15 +102,18 @@ _arraydescr_fromobj(PyObject *obj)
/* For arbitrary objects that have a "dtype" attribute */
dtypedescr = PyObject_GetAttrString(obj, "dtype");
PyErr_Clear();
if (dtypedescr != NULL) {
ret = PyArray_DescrConverter(dtypedescr, &newdescr);
Py_DECREF(dtypedescr);
if (ret == NPY_SUCCEED) {
return newdescr;
}
if (dtypedescr == NULL) {
return NULL;
}

ret = PyArray_DescrConverter(dtypedescr, &newdescr);
Py_DECREF(dtypedescr);
if (ret != NPY_SUCCEED) {
PyErr_Clear();
return NULL;
}
return _arraydescr_fromctypes(obj);

return newdescr;
}

/*
Expand Down Expand Up @@ -1423,10 +1393,20 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
check_num = NPY_VOID;
}
else {
*at = _arraydescr_fromobj(obj);
*at = _arraydescr_from_dtype_attr(obj);
if (*at) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error return here is NULL, be nice to use that in the error check.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole function seems to use the style of not comparing against NULL, so I'd be inclined to leave it consistent

return NPY_SUCCEED;
}

/*
* Note: this comes after _arraydescr_from_dtype_attr because the ctypes
* type might override the dtype if numpy does not otherwise
* support it.
*/
if (npy_ctypes_check((PyTypeObject *)obj)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally prefer to check against value, especially as the error return is '0'.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe NPY_SUCCEED as that seems to be the interface for this bunch of functions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather leave this as is, since it matches things like PyString_Check

*at = _arraydescr_from_ctypes_type((PyTypeObject *)obj);
return *at ? NPY_SUCCEED : NPY_FAIL;
}
}
goto finish;
}
Expand Down Expand Up @@ -1596,13 +1576,23 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
goto fail;
}
else {
*at = _arraydescr_fromobj(obj);
*at = _arraydescr_from_dtype_attr(obj);
if (*at) {
return NPY_SUCCEED;
}
if (PyErr_Occurred()) {
return NPY_FAIL;
}

/*
* Note: this comes after _arraydescr_from_dtype_attr because the ctypes
* type might override the dtype if numpy does not otherwise
* support it.
*/
if (npy_ctypes_check(Py_TYPE(obj))) {
*at = _arraydescr_from_ctypes_type(Py_TYPE(obj));
return *at ? NPY_SUCCEED : NPY_FAIL;
}
goto fail;
}
if (PyErr_Occurred()) {
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ NPY_NO_EXPORT PyObject *
array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args);

NPY_NO_EXPORT PyArray_Descr *
_arraydescr_fromobj(PyObject *obj);
_arraydescr_from_dtype_attr(PyObject *obj);


NPY_NO_EXPORT int
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/scalarapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ PyArray_DescrFromTypeObject(PyObject *type)
/* Do special thing for VOID sub-types */
if (PyType_IsSubtype((PyTypeObject *)type, &PyVoidArrType_Type)) {
new = PyArray_DescrNewFromType(NPY_VOID);
conv = _arraydescr_fromobj(type);
conv = _arraydescr_from_dtype_attr(type);
if (conv) {
new->fields = conv->fields;
Py_INCREF(new->fields);
Expand Down
30 changes: 30 additions & 0 deletions numpy/core/tests/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,36 @@ class PaddedStruct(ctypes.Structure):
], align=True)
self.check(PaddedStruct, expected)

def test_bit_fields(self):
class BitfieldStruct(ctypes.Structure):
_fields_ = [
('a', ctypes.c_uint8, 7),
('b', ctypes.c_uint8, 1)
]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously this produced dtype([('a', 'u1', (7,)), ('b', 'u1')], align=True), which was garbage.

assert_raises(TypeError, np.dtype, BitfieldStruct)
assert_raises(TypeError, np.dtype, BitfieldStruct())

def test_pointer(self):
p_uint8 = ctypes.POINTER(ctypes.c_uint8)
assert_raises(TypeError, np.dtype, p_uint8)

@pytest.mark.xfail(
reason="Unions are not implemented",
raises=NotImplementedError)
def test_union(self):
class Union(ctypes.Union):
_fields_ = [
('a', ctypes.c_uint8),
('b', ctypes.c_uint16),
]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this one produced dtype({'names':['a','b'], 'formats':['u1','<u2'], 'offsets':[0,2], 'itemsize':4}, align=True), which is not a union

expected = np.dtype(dict(
names=['a', 'b'],
formats=[np.uint8, np.uint16],
offsets=[0, 0],
itemsize=2
))
self.check(Union, expected)

@pytest.mark.xfail(reason="_pack_ is ignored - see gh-11651")
def test_packed_structure(self):
class PackedStructure(ctypes.Structure):
Expand Down
0