8000 Merge pull request #195 from jfolz/master · lbolla/msgpack-python@0ef5f4d · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 0ef5f4d

Browse files
authored
Merge pull request msgpack#195 from jfolz/master
Use new buffer interface to unpack
2 parents b887c1a + 2b63e9f commit 0ef5f4d

File tree

3 files changed

+106
-32
lines changed

3 files changed

+106
-32
lines changed

msgpack/_unpacker.pyx

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,23 @@ from cpython.bytes cimport (
88
)
99
from cpython.buffer cimport (
1010
Py_buffer,
11-
PyBuffer_Release,
11+
PyObject_CheckBuffer,
1212
PyObject_GetBuffer,
13+
PyBuffer_Release,
14+
PyBuffer_IsContiguous,
15+
PyBUF_READ,
1316
PyBUF_SIMPLE,
17+
PyBUF_FULL_RO,
1418
)
1519
from cpython.mem cimport PyMem_Malloc, PyMem_Free
1620
from cpython.object cimport PyCallable_Check
21+
from cpython.ref cimport Py_DECREF
22+
from cpython.exc cimport PyErr_WarnEx
1723

1824
cdef extern from "Python.h":
1925
ctypedef struct PyObject
2026
cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1
27+
object PyMemoryView_GetContiguous(object obj, int buffertype, char order)
2128

2229
from libc.stdlib cimport *
2330
from libc.string cimport *
@@ -110,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx,
110117
def default_read_extended_type(typecode, data):
111118
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
112119

120+
cdef inline int get_data_from_buffer(object obj,
121+
Py_buffer *view,
122+
char **buf,
123+
Py_ssize_t *buffer_len,
124+
int *new_protocol) except 0:
125+
cdef object contiguous
126+
cdef Py_buffer tmp
127+
if PyObject_CheckBuffer(obj):
128+
new_protocol[0] = 1
129+
if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1:
130+
raise
131+
if view.itemsize != 1:
132+
PyBuffer_Release(view)
133+
raise BufferError("cannot unpack from multi-byte object")
134+
if PyBuffer_IsContiguous(view, 'A') == 0:
135+
PyBuffer_Release(view)
136+
# create a contiguous copy and get buffer
137+
contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C')
138+
PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE)
139+
# view must hold the only reference to contiguous,
140+
# so memory is freed when view is released
141+
Py_DECREF(contiguous)
142+
buffer_len[0] = view.len
143+
buf[0] = <char*> view.buf
144+
return 1
145+
else:
146+
new_protocol[0] = 0
147+
if PyObject_AsReadBuffer(obj, <const void**> buf, buffer_len) == -1:
148+
raise BufferError("could not get memoryview")
149+
PyErr_WarnEx(RuntimeWarning,
150+
"using old buffer interface to unpack %s; "
151+
"this leads to unpacking errors if slicing is used and "
152+
"will be removed in a future version" % type(obj),
153+
1)
154+
return 1
155+
113156
def unpackb(object packed, object object_hook=None, object list_hook=None,
114157
bint use_list=1, encoding=None, unicode_errors="strict",
115158
object_pairs_hook=None, ext_hook=ExtType,
@@ -129,27 +172,34 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
129172
cdef Py_ssize_t off = 0
130173
cdef int ret
131174

132-
cdef char* buf
175+
cdef Py_buffer view
176+
cdef char* buf = NULL
133177
cdef Py_ssize_t buf_len
134178
cdef char* cenc = NULL
135179
cdef char* cerr = NULL
180+
cdef int new_protocol = 0
181+
182+
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
136183

137-
PyObject_AsReadBuffer(packed, <const void**>&buf, &buf_len)
184+
try:
185+
if encoding is not None:
186+
if isinstance(encoding, unicode):
187+
encoding = encoding.encode('ascii')
188+
cenc = PyBytes_AsString(encoding)
138189

139-
if encoding is not None:
140-
if isinstance(encoding, unicode):
141-
encoding = encoding.encode('ascii')
142-
cenc = PyBytes_AsString(encoding)
190+
if unicode_errors is not None:
191+
if isinstance(unicode_errors, unicode):
192+
unicode_errors = unicode_errors.encode('ascii')
193+
cerr = PyBytes_AsString(unicode_errors)
143194

144-
if unicode_errors is not None:
145-
if isinstance(unicode_errors, unicode):
146-
unicode_errors = unicode_errors.encode('ascii')
147-
cerr = PyBytes_AsString(unicode_errors)
195+
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
196+
use_list, cenc, cerr,
197+
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
198+
ret = unpack_construct(&ctx, buf, buf_len, &off)
199+
finally:
200+
if new_protocol:
201+
PyBuffer_Release(&view);
148202

149-
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
150-
use_list, cenc, cerr,
151-
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
152-
ret = unpack_construct(&ctx, buf, buf_len, &off)
153203
if ret == 1:
154204
obj = unpack_data(&ctx)
155205
if off < buf_len:
@@ -335,14 +385,20 @@ cdef class Unpacker(object):
335385
def feed(self, object next_bytes):
336386
"""Append `next_bytes` to internal buffer."""
337387
cdef Py_buffer pybuff
388+
F438 cdef int new_protocol = 0
389+
cdef char* buf
390+
cdef Py_ssize_t buf_len
391+
338392
if self.file_like is not None:
339393
raise AssertionError(
340394
"unpacker.feed() is not be able to use with `file_like`.")
341-
PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE)
395+
396+
get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol)
342397
try:
343-
self.append_buffer(<char*>pybuff.buf, pybuff.len)
398+
self.append_buffer(buf, buf_len)
344399
finally:
345-
PyBuffer_Release(&pybuff)
400+
if new_protocol:
401+
PyBuffer_Release(&pybuff)
346402

347403
cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
348404
cdef:

msgpack/fallback.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Fallback pure Python implementation of msgpack"""
22

33
import sys
4-
import array
54
import struct
5+
import warnings
66

77
if sys.version_info[0] == 3:
88
PY3 = True
@@ -46,6 +46,7 @@ def getvalue(self):
4646
from io import BytesIO as StringIO
4747
newlist_hint = lambda size: []
4848

49+
4950
from msgpack.exceptions import (
5051
BufferFull,
5152
OutOfData,
@@ -79,6 +80,24 @@ def _check_type_strict(obj, t, type=type, tuple=tuple):
7980
return type(obj) is t
8081

8182

83+
def _get_data_from_buffer(obj):
84+
try:
85+
view = memoryview(obj)
86+
except TypeError:
87+
# try to use legacy buffer protocol if 2.7, otherwise re-raise
88+
if not PY3:
89+
view = memoryview(buffer(obj))
90+
warnings.warn("using old buffer interface to unpack %s; "
91+
"this leads to unpacking errors if slicing is used and "
92+
"will be removed in a future version" % type(obj),
93+
RuntimeWarning)
94+
else:
95+
raise
96+
if view.itemsize != 1:
97+
raise ValueError("cannot unpack from multi-byte object")
98+
return view
99+
100+
82101
def unpack(stream, **kwargs):
83102
"""
84103
Unpack an object from `stream`.
@@ -239,17 +258,11 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
239258
raise TypeError("`ext_hook` is not callable")
240259

241260
def feed(self, next_bytes):
242-
if isinstance(next_bytes, array.array):
243-
next_bytes = next_bytes.tostring()
244-
if not isinstance(next_bytes, (bytes, bytearray)):
245-
raise TypeError("next_bytes should be bytes, bytearray or array.array")
246261
assert self._feeding
247-
248-
if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size):
262+
view = _get_data_from_buffer(next_bytes)
263+
if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size):
249264
raise BufferFull
250-
# bytes + bytearray -> bytearray
251-
# So cast before append
252-
self._buffer += next_bytes
265+
self._buffer += view
253266

254267
def _consume(self):
255268
""" Gets rid of the used parts of the buffer. """
@@ -308,7 +321,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
308321
n = 0
309322
obj = None
310323
self._reserve(1)
311-
#b = struct.unpack_from("B", self._buffer, self._buff_i)[0]
312324
b = self._buffer[self._buff_i]
313325
self._buff_i += 1
314326
if b & 0b10000000 == 0:
@@ -340,7 +352,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
340352
elif b == 0xc4:
341353
typ = TYPE_BIN
342354
self._reserve(1)
343-
#n = struct.unpack_from("B", self._buffer, self._buff_i)[0]
344355
n = self._buffer[self._buff_i]
345356
self._buff_i += 1
346357
if n > self._max_bin_len:
@@ -396,7 +407,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
396407
self._buff_i += 8
397408
elif b == 0xcc:
398409
self._reserve(1)
399-
#obj = struct.unpack_from("B", self._buffer, self._buff_i)[0]
400410
obj = self._buffer[self._buff_i]
401411
self._buff_i += 1
402412
elif b == 0xcd:
@@ -465,7 +475,6 @@ def _read_header(self, execute=EX_CONSTRUCT):
465475
elif b == 0xd9:
466476
typ = TYPE_RAW
467477
self._reserve(1)
468-
#n, = struct.unpack_from("B", self._buffer, self._buff_i)
469478
n = self._buffer[self._buff_i]
470479
self._buff_i += 1
471480
if n > self._max_str_len:

test/test_buffer.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,12 @@ def test_unpack_bytearray():
1818
assert [b'foo', b'bar'] == obj
1919
expected_type = bytes
2020
assert all(type(s) == expected_type for s in obj)
21+
22+
23+
def test_unpack_memoryview():
24+
buf = bytearray(packb(('foo', 'bar')))
25+
view = memoryview(buf)
26+
obj = unpackb(view, use_list=1)
27+
assert [b'foo', b'bar'] == obj
28+
expected_type = bytes
29+
assert all(type(s) == expected_type for s in obj)

0 commit comments

Comments
 (0)
0