8000 BUG: Fix SystemError when pickling datetime64 array with pickle5 by pitrou · Pull Request #12748 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG: Fix SystemError when pickling datetime64 array with pickle5 #12748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 121 additions & 100 deletions numpy/core/src/multiarray/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -1723,129 +1723,150 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
}

static PyObject *
array_reduce_ex(PyArrayObject *self, PyObject *args)
array_reduce_ex_regular(PyArrayObject *self, int protocol)
{
int protocol;
PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
PyObject *subclass_array_reduce = NULL;
PyObject *ret;

/* We do not call array_reduce directly but instead lookup and call
* the __reduce__ method to make sure that it's possible to customize
* pickling in sub-classes. */
subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
"__reduce__");
if (subclass_array_reduce == NULL) {
return NULL;
}
ret = PyObject_CallObject(subclass_array_reduce, NULL);
Py_DECREF(subclass_array_reduce);
return ret;
}

static PyObject *
array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol)
{
PyObject *numeric_mod = NULL, *from_buffer_func = NULL;
PyObject *pickle_module = NULL, *picklebuf_class = NULL;
PyObject *picklebuf_args = NULL;
PyObject *buffer = NULL, *transposed_array = NULL;
PyArray_Descr *descr = NULL;
char order;

if (PyArg_ParseTuple(args, "i", &protocol)){
descr = PyArray_DESCR(self);
if ((protocol < 5) ||
(!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
!PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
(PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
((PyObject*)self)->ob_type != &PyArray_Type) ||
PyDataType_ISUNSIZED(descr)) {
/* The PickleBuffer class from version 5 of the pickle protocol
* can only be used for arrays backed by a contiguous data buffer.
* For all other cases we fallback to the generic array_reduce
* method that involves using a temporary bytes allocation. However
* we do not call array_reduce directly but instead lookup and call
* the __reduce__ method to make sure that it's possible customize
* pickling in sub-classes. */
subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
"__reduce__");
return PyObject_CallObject(subclass_array_reduce, unused);
}
else if (protocol == 5){
ret = PyTuple_New(2);

if (ret == NULL) {
return NULL;
}
descr = PyArray_DESCR(self);

/* if the python version is below 3.8, the pickle module does not provide
* built-in support for protocol 5. We try importing the pickle5
* backport instead */
/* if the python version is below 3.8, the pickle module does not provide
* built-in support for protocol 5. We try importing the pickle5
* backport instead */
#if PY_VERSION_HEX >= 0x03080000
pickle_module = PyImport_ImportModule("pickle");
#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
pickle_module = PyImport_ImportModule("pickle5");
if (pickle_module == NULL){
/* for protocol 5, raise a clear ImportError if pickle5 is not found
*/
PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
"requires the pickle5 module for python versions >=3.6 "
"and <3.8");
return NULL;
}
/* we expect protocol 5 to be available in Python 3.8 */
pickle_module = PyImport_ImportModule("pickle");
#elif PY_VERSION_HEX >= 0x03060000
pickle_module = PyImport_ImportModule("pickle5");
if (pickle_module == NULL) {
/* for protocol 5, raise a clear ImportError if pickle5 is not found
*/
PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
"requires the pickle5 module for Python >=3.6 and <3.8");
return NULL;
}
#else
PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
"for python versions < 3.6");
return NULL;
PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
"for Python < 3.6");
return NULL;
#endif
if (pickle_module == NULL){
return NULL;
}

pickle_class = PyObject_GetAttrString(pickle_module,
"PickleBuffer");
if (pickle_module == NULL){
return NULL;
}
picklebuf_class = PyObject_GetAttrString(pickle_module, "PickleBuffer");
Py_DECREF(pickle_module);
if (picklebuf_class == NULL) {
return NULL;
}

class_args_tuple = PyTuple_New(1);
if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
/* Construct a PickleBuffer of the array */

/* if the array if Fortran-contiguous and not C-contiguous,
* the PickleBuffer instance will hold a view on the transpose
* of the initial array, that is C-contiguous. */
order = 'F';
transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
}
else {
order = 'C';
PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
Py_INCREF(self);
}
if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*) self) &&
PyArray_IS_F_CONTIGUOUS((PyArrayObject*) self)) {
/* if the array if Fortran-contiguous and not C-contiguous,
* the PickleBuffer instance will hold a view on the transpose
* of the initial array, that is C-contiguous. */
order = 'F';
transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
picklebuf_args = Py_BuildValue("(N)", transposed_array);
}
else {
order = 'C';
picklebuf_args = Py_BuildValue("(O)", self);
}
if (picklebuf_args == NULL) {
Py_DECREF(picklebuf_class);
return NULL;
}

class_args = Py_BuildValue("O", class_args_tuple);
buffer = PyObject_CallObject(picklebuf_class, picklebuf_args);
Py_DECREF(picklebuf_class);
Py_DECREF(picklebuf_args);
if (buffer == NULL) {
/* Some arrays may refuse to export a buffer, in which case
* just fall back on regular __reduce_ex__ implementation
* (gh-12745).
*/
PyErr_Clear();
return array_reduce_ex_regular(self, protocol);
}

buffer = PyObject_CallObject(pickle_class, class_args);
/* Get the _frombuffer() function for reconstruction */

numeric_mod = PyImport_ImportModule("numpy.core.numeric");
if (numeric_mod == NULL) {
Py_DECREF(ret);
return NULL;
}
from_buffer_func = PyObject_GetAttrString(numeric_mod,
"_frombuffer");
Py_DECREF(numeric_mod);
numeric_mod = PyImport_ImportModule("numpy.core.numeric");
if (numeric_mod == NULL) {
Py_DECREF(buffer);
return NULL;
}
from_buffer_func = PyObject_GetAttrString(numeric_mod,
"_frombuffer");
Py_DECREF(numeric_mod);
if (from_buffer_func == NULL) {
Py_DECREF(buffer);
return NULL;
}

Py_INCREF(descr);
return Py_BuildValue("N(NONN)",
from_buffer_func, buffer, (PyObject *)descr,
PyObject_GetAttrString((PyObject *)self, "shape"),
PyUnicode_FromStringAndSize(&order, 1));
}

buffer_tuple = PyTuple_New(4);
PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
PyTuple_SET_ITEM(buffer_tuple, 2,
PyObject_GetAttrString((PyObject *)self,
"shape"));
PyTuple_SET_ITEM(buffer_tuple, 3,
PyUnicode_FromStringAndSize(&order,
(Py_ssize_t)1));
static PyObject *
array_reduce_ex(PyArrayObject *self, PyObject *args)
{
int protocol;
PyArray_Descr *descr = NULL;

PyTuple_SET_ITEM(ret, 0, from_buffer_func);
PyTuple_SET_ITEM(ret, 1, buffer_tuple);
if (!PyArg_ParseTuple(args, "i", &protocol)) {
return NULL;
}

return ret;
}
else {
PyErr_Format(PyExc_ValueError,
"cannot call __reduce_ex__ with protocol >= %d",
5);
return NULL;
}
descr = PyArray_DESCR(self);
if ((protocol < 5) ||
(!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
!PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
(PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
((PyObject*)self)->ob_type != &PyArray_Type) ||
PyDataType_ISUNSIZED(descr)) {
/* The PickleBuffer class from version 5 of the pickle protocol
* can only be used for arrays backed by a contiguous data buffer.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PEP says "PickleBuffer can wrap any kind of buffer, including non-contiguous buffers.". Am I missing something?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only contiguous buffers are correctly handled by the pickle module, though.
(this is something that should probably be improved in the future, but is non-trivial)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that mentioned in the PEP, pickle docs, or cpython bug tracker?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I have to update the PEP.

* For all other cases we fallback to the generic array_reduce
* method that involves using a temporary bytes allocation. */
return array_reduce_ex_regular(self, protocol);
}
else if (protocol == 5) {
return array_reduce_ex_picklebuffer(self, protocol);
}
else {
PyErr_Format(PyExc_ValueError,
"__reduce_ex__ called with protocol > 5");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we expect this not to work with a later Python pickle protocol?

return NULL;
}

}

static PyObject *
Expand Down
6 changes: 6 additions & 0 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3769,10 +3769,16 @@ def test_roundtrip(self):
('c', float)])
]

refs = [weakref.ref(a) for a in DATA]
for a in DATA:
assert_equal(
a, pickle.loads(pickle.dumps(a, protocol=proto)),
err_msg="%r" % a)
del a, DATA, carray
gc.collect()
# check for reference leaks (gh-12793)
for ref in refs:
assert ref() is None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattip, this type of test will fail on pypy, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, yes, I should call gc.collect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could skip for pypy. These sorts of tests do tend to be fragile.


def _loads(self, obj):
if sys.version_info[0] >= 3:
Expand Down
8 changes: 8 additions & 0 deletions numpy/core/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2423,3 +2423,11 @@ def test_eff1d_casting(self):
assert_equal(res, [-99, 1, 2, 3, -7, 88, 99])
assert_raises(ValueError, np.ediff1d, x, to_begin=(1<<20))
assert_raises(ValueError, np.ediff1d, x, to_end=(1<<20))

def test_pickle_datetime64_array(self):
# gh-12745 (would fail with pickle5 installed)
d = np.datetime64('2015-07-04 12:59:59.50', 'ns')
arr = np.array([d])
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
dumped = pickle.dumps(arr, protocol=proto)
assert_equal(pickle.loads(dumped), arr)
0