8000 Use new buffer interface to unpack by jfolz · Pull Request #195 · msgpack/msgpack-python · GitHub
[go: up one dir, main page]

Skip to content

Use new buffer interface to unpack #195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
enable unpacking from memoryview
  • Loading branch information
jfolz committed Jun 13, 2016
commit 2b63e9fbbb3440d73d6638ec8af6315aeb8ecd97
92 changes: 74 additions & 18 deletions msgpack/_unpacker.pyx
10000
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,23 @@ from cpython.bytes cimport (
)
from cpython.buffer cimport (
Py_buffer,
PyBuffer_Release,
PyObject_CheckBuffer,
PyObject_GetBuffer,
PyBuffer_Release,
PyBuffer_IsContiguous,
PyBUF_READ,
PyBUF_SIMPLE,
PyBUF_FULL_RO,
)
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from cpython.object cimport PyCallable_Check
from cpython.ref cimport Py_DECREF
from cpython.exc cimport PyErr_WarnEx

cdef extern from "Python.h":
ctypedef struct PyObject
cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1
object PyMemoryView_GetContiguous(object obj, int buffertype, char order)

from libc.stdlib cimport *
from libc.string cimport *
Expand Down Expand Up @@ -110,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx,
def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)

cdef inline int get_data_from_buffer(object obj,
Py_buffer *view,
char **buf,
Py_ssize_t *buffer_len,
int *new_protocol) except 0:
cdef object contiguous
cdef Py_buffer tmp
if PyObject_CheckBuffer(obj):
new_protocol[0] = 1
if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1:
raise
if view.itemsize != 1:
PyBuffer_Release(view)
raise BufferError("cannot unpack from multi-byte object")
if PyBuffer_IsContiguous(view, 'A') == 0:
PyBuffer_Release(view)
# create a contiguous copy and get buffer
contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C')
PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE)
# view must hold the only reference to contiguous,
# so memory is freed when view is released
Py_DECREF(contiguous)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What this does:

  • PyMemoryView_GetContiguous creates a memoryview object to a contiguous copy of the non-contiguous memory
  • Get a Py_buffer for this contiguous memory and transfer the information to the target buffer
  • Release resources as necessary

buffer_len[0] = view.len
buf[0] = <char*> view.buf
return 1
else:
new_protocol[0] = 0
if PyObject_AsReadBuffer(obj, <const void**> buf, buffer_len) == -1:
raise BufferError("could not get memoryview")
PyErr_WarnEx(RuntimeWarning,
"using old buffer interface to unpack %s; "
"this leads to unpacking errors if slicing is used and "
"will be removed in a future version" % type(obj),
1)
return 1

def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None, ext_hook=ExtType,
Expand All @@ -129,27 +172,34 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef Py_ssize_t off = 0
cdef int ret

cdef char* buf
cdef Py_buffer view
cdef char* buf = NULL
cdef Py_ssize_t buf_len
cdef char* cenc = NULL
cdef char* cerr = NULL
cdef int new_protocol = 0

get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)

PyObject_AsReadBuffer(packed, <const void**>&buf, &buf_len)
try:
if encoding is not None:
if isinstance(encoding, unicode):
encoding = encoding.encode('ascii')
cenc = PyBytes_AsString(encoding)

if encoding is not None:
if isinstance(encoding, unicode):
encoding = encoding.encode('ascii')
cenc = PyBytes_AsString(encoding)
if unicode_errors is not None:
if isinstance(unicode_errors, unicode):
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)

if unicode_errors is not None:
if isinstance(unicode_errors, unicode):
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
10000 use_list, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
if new_protocol:
PyBuffer_Release(&view);

init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
use_list, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
if off < buf_len:
Expand Down Expand Up @@ -335,14 +385,20 @@ cdef class Unpacker(object):
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
cdef Py_buffer pybuff
cdef int new_protocol = 0
cdef char* buf
cdef Py_ssize_t buf_len

if self.file_like is not None:
raise AssertionError(
"unpacker.feed() is not be able to use with `file_like`.")
PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE)

get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol)
try:
self.append_buffer(<char*>pybuff.buf, pybuff.len)
self.append_buffer(buf, buf_len)
finally:
PyBuffer_Release(&pybuff)
if new_protocol:
PyBuffer_Release(&pybuff)

cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
cdef:
Expand Down
37 changes: 23 additions & 14 deletions msgpack/fallback.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Fallback pure Python implementation of msgpack"""

import sys
import array
import struct
import warnings

if sys.version_info[0] == 3:
PY3 = True
Expand Down Expand Up @@ -46,6 +46,7 @@ def getvalue(self):
from io import BytesIO as StringIO
newlist_hint = lambda size: []


from msgpack.exceptions import (
BufferFull,
OutOfData,
Expand Down Expand Up @@ -79,6 +80,24 @@ def _check_type_strict(obj, t, type=type, tuple=tuple):
return type(obj) is t


def _get_data_from_buffer(obj):
try:
view = memoryview(obj)
except TypeError:
# try to use legacy buffer protocol if 2.7, otherwise re-raise
if not PY3:
view = memoryview(buffer(obj))
warnings.warn("using old buffer interface to unpack %s; "
"this leads to unpacking errors if slicing is used and "
"will be removed in a future version" % type(obj),
RuntimeWarning)
else:
raise
if view.itemsize != 1:
raise ValueError("cannot unpack from multi-byte object")
return view


def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
Expand Down Expand Up @@ -239,17 +258,11 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
raise TypeError("`ext_hook` is not callable")

def feed(self, next_bytes):
if isinstance(next_bytes, array.array):
next_bytes = next_bytes.tostring()
if not isinstance(next_bytes, (bytes, bytearray)):
raise TypeError("next_bytes should be bytes, bytearray or array.array")
assert self._feeding

if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size):
view = _get_data_from_buffer(next_bytes)
if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size):
raise BufferFull
# bytes + bytearray -> bytearray
# So cast before append
self._buffer += next_bytes
self._buffer += view

def _consume(self):
""" Gets rid of the used parts of the buffer. """
Expand Down Expand Up @@ -308,7 +321,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
n = 0
obj = None
self._reserve(1)
#b = struct.unpack_from("B", self._buffer, self._buff_i)[0]
b = self._buffer[self._buff_i]
self._buff_i += 1
if b & 0b10000000 == 0:
Expand Down Expand Up @@ -340,7 +352,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
elif b == 0xc4:
typ = TYPE_BIN
self._reserve(1)
#n = struct.unpack_from("B", self._buffer, self._buff_i)[0]
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_bin_len:
Expand Down Expand Up @@ -396,7 +407,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
self._buff_i += 8
elif b == 0xcc:
self._reserve(1)
#obj = struct.unpack_from("B", self._buffer, self._buff_i)[0]
obj = self._buffer[self._buff_i]
self._buff_i += 1
elif b == 0xcd:
Expand Down Expand Up @@ -465,7 +475,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
elif b == 0xd9:
typ = TYPE_RAW
self._reserve(1)
#n, = struct.unpack_from("B", self._buffer, self._buff_i)
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_str_len:
Expand Down
9 changes: 9 additions & 0 deletions test/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@ def test_unpack_bytearray():
assert [b'foo', b'bar'] == obj
expected_type = bytes
assert all(type(s) == expected_type for s in obj)


def test_unpack_memoryview():
buf = bytearray(packb(('foo', 'bar')))
view = memoryview(buf)
obj = unpackb(view, use_list=1)
assert [b'foo', b'bar'] == obj
expected_type = bytes
assert all(type(s) == expected_type for s in obj)
0