8000 BUG: Fix SystemError when pickling datetime64 array with pickle5 by pitrou · Pull Request #12748 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG: Fix SystemError when pickling datetime64 array with pickle5 #12748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 23, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
BUG: Fix SystemError when pickling datetime64 array with pickle5
Fixes gh-12745
  • Loading branch information
pitrou committed Jan 22, 2019
commit 48ac84abe08a5458e7b4d94d405758c1c267bae2
208 changes: 109 additions & 99 deletions numpy/core/src/multiarray/methods.c
10000
Original file line number Diff line number Diff line change
Expand Up @@ -1723,129 +1723,139 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
}

static PyObject *
array_reduce_ex(PyArrayObject *self, PyObject *args)
array_reduce_ex_regular(PyArrayObject *self, int protocol)
{
int protocol;
PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
PyObject *subclass_array_reduce = NULL;

/* We do not call array_reduce directly but instead lookup and call
* the __reduce__ method to make sure that it's possible customize
* pickling in sub-classes. */
subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
"__reduce__");
return PyObject_CallObject(subclass_array_reduce, NULL);
}

static PyObject *
array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol)
{
PyObject *numeric_mod = NULL, *from_buffer_func = NULL;
PyObject *pickle_module = NULL, *picklebuf_class = NULL;
PyObject *picklebuf_args = NULL;
PyObject *buffer = NULL, *transposed_array = NULL;
PyArray_Descr *descr = NULL;
char order;

if (PyArg_ParseTuple(args, "i", &protocol)){
descr = PyArray_DESCR(self);
if ((protocol < 5) ||
(!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
!PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
(PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
((PyObject*)self)->ob_type != &PyArray_Type) ||
PyDataType_ISUNSIZED(descr)) {
/* The PickleBuffer class from version 5 of the pickle protocol
* can only be used for arrays backed by a contiguous data buffer.
* For all other cases we fallback to the generic array_reduce
* method that involves using a temporary bytes allocation. However
* we do not call array_reduce directly but instead lookup and call
* the __reduce__ method to make sure that it's possible customize
* pickling in sub-classes. */
subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
"__reduce__");
return PyObject_CallObject(subclass_array_reduce, unused);
}
else if (protocol == 5){
ret = PyTuple_New(2);

if (ret == NULL) {
return NULL;
}
descr = PyArray_DESCR(self);

/* if the python version is below 3.8, the pickle module does not provide
* built-in support for protocol 5. We try importing the pickle5
* backport instead */
/* if the python version is below 3.8, the pickle module does not provide
* built-in support for protocol 5. We try importing the pickle5
* backport instead */
#if PY_VERSION_HEX >= 0x03080000
pickle_module = PyImport_ImportModule("pickle");
pickle_module = PyImport_ImportModule("pickle");
#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A nit: you could drop the PY_VERSION_HEX < 0x03080000 here, it's implied by the condition above.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right.

pickle_module = PyImport_ImportModule("pickle5");
if (pickle_module == NULL){
/* for protocol 5, raise a clear ImportError if pickle5 is not found
*/
PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
"requires the pickle5 module for python versions >=3.6 "
"and <3.8");
return NULL;
}
pickle_module = PyImport_ImportModule("pickle5");
if (pickle_module == NULL) {
/* for protocol 5, raise a clear ImportError if pickle5 is not found
*/
PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
"requires the pickle5 module for python versions >=3.6 "
"and <3.8");
return NULL;
}
#else
PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
"for python versions < 3.6");
return NULL;
PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
"for python versions < 3.6");
return NULL;
#endif
if (pickle_module == NULL){
return NULL;
}

pickle_class = PyObject_GetAttrString(pickle_module,
"PickleBuffer");
if (pickle_module == NULL){
return NULL;
}
picklebuf_class = PyObject_GetAttrString(pickle_module, "PickleBuffer");
Py_DECREF(pickle_module);
if (picklebuf_class == NULL) {
return NULL;
}

class_args_tuple = PyTuple_New(1);
if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
/* Construct a PickleBuffer of the array */

/* if the array if Fortran-contiguous and not C-contiguous,
* the PickleBuffer instance will hold a view on the transpose
* of the initial array, that is C-contiguous. */
order = 'F';
transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
}
else {
order = 'C';
PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
Py_INCREF(self);
}
if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*) self) &&
PyArray_IS_F_CONTIGUOUS((PyArrayObject*) self)) {
/* if the array if Fortran-contiguous and not C-contiguous,
* the PickleBuffer instance will hold a view on the transpose
* of the initial array, that is C-contiguous. */
order = 'F';
transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
picklebuf_args = Py_BuildValue("(N)", transposed_array);
}
else {
order = 'C';
picklebuf_args = Py_BuildValue("(O)", self);
}

class_args = Py_BuildValue("O", class_args_tuple);
buffer = PyObject_CallObject(picklebuf_class, picklebuf_args);
Py_DECREF(picklebuf_args);
if (buffer == NULL) {
/* Some arrays may refuse to export a buffer, in which case
* just fall back on regular __reduce_ex__ implementation
* (gh-12745).
*/
PyErr_Clear();
return array_reduce_ex_regular(self, protocol);
}

buffer = PyObject_CallObject(pickle_class, class_args);
/* Get the _frombuffer() function for reconstruction */

numeric_mod = PyImport_ImportModule("numpy.core.numeric");
if (numeric_mod == NULL) {
Py_DECREF(ret);
return NULL;
}
from_buffer_func = PyObject_GetAttrString(numeric_mod,
"_frombuffer");
Py_DECREF(numeric_mod);
numeric_mod = PyImport_ImportModule("numpy.core.numeric");
if (numeric_mod == NULL) {
Py_DECREF(buffer);
return NULL;
}
from_buffer_func = PyObject_GetAttrString(numeric_mod,
"_frombuffer");
Py_DECREF(numeric_mod);
if (from_buffer_func == NULL) {
Py_DECREF(buffer);
return NULL;
}

Py_INCREF(descr);
return Py_BuildValue("N(NONN)",
from_buffer_func, buffer, (PyObject *)descr,
PyObject_GetAttrString((PyObject *)self, "shape"),
PyUnicode_FromStringAndSize(&order, 1));
}

buffer_tuple = PyTuple_New(4);
PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
PyTuple_SET_ITEM(buffer_tuple, 2,
PyObject_GetAttrString((PyObject *)self,
"shape"));
PyTuple_SET_ITEM(buffer_tuple, 3,
PyUnicode_FromStringAndSize(&order,
(Py_ssize_t)1));
static PyObject *
array_reduce_ex(PyArrayObject *self, PyObject *args)
{
int protocol;
PyArray_Descr *descr = NULL;

PyTuple_SET_ITEM(ret, 0, from_buffer_func);
PyTuple_SET_ITEM(ret, 1, buffer_tuple);
if (!PyArg_ParseTuple(args, "i", &protocol)) {
return NULL;
}

return ret;
}
else {
PyErr_Format(PyExc_ValueError,
"cannot call __reduce_ex__ with protocol >= %d",
5);
return NULL;
}
descr = PyArray_DESCR(self);
if ((protocol < 5) ||
(!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
!PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
(PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
((PyObject*)self)->ob_type != &PyArray_Type) ||
PyDataType_ISUNSIZED(descr)) {
/* The PickleBuffer class from version 5 of the pickle protocol
* can only be used for arrays backed by a contiguous data buffer.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PEP says "PickleBuffer can wrap any kind of buffer, including non-contiguous buffers.". Am I missing something?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only contiguous buffers are correctly handled by the pickle module, though.
(this is something that should probably be improved in the future, but is non-trivial)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that mentioned in the PEP, pickle docs, or cpython bug tracker?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I have to update the PEP.

* For all other cases we fallback to the generic array_reduce
* method that involves using a temporary bytes allocation. */
return array_reduce_ex_regular(self, protocol);
}
else if (protocol == 5) {
return array_reduce_ex_picklebuffer(self, protocol);
}
else {
PyErr_Format(PyExc_ValueError,
"__reduce_ex__ called with protocol > 5");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we expect this not to work with a later Python pickle protocol?

return NULL;
}

}

static PyObject *
Expand Down
8 changes: 8 additions & 0 deletions numpy/core/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2423,3 +2423,11 @@ def test_eff1d_casting(self):
assert_equal(res, [-99, 1, 2, 3, -7, 88, 99])
assert_raises(ValueError, np.ediff1d, x, to_begin=(1<<20))
assert_raises(ValueError, np.ediff1d, x, to_end=(1<<20))

def test_pickle_datetime64_array(self):
# gh-12745 (would fail with pickle5 installed)
d = np.datetime64('2015-07-04 12:59:59.50', 'ns')
arr = np.array([d])
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
dumped = pickle.dumps(arr, protocol=proto)
assert_equal(pickle.loads(dumped), arr)
0