From e15adf1934e753842757d201104760cffb527812 Mon Sep 17 00:00:00 2001 From: enkore Date: Sat, 26 Nov 2016 14:53:17 +0100 Subject: [PATCH] Add Unpacker.offset := starting offset of current object --- msgpack/_unpacker.pyx | 26 ++++++++++++++++---- msgpack/fallback.py | 13 ++++++++++ test/test_sequnpack.py | 54 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 23f6478f..448792e1 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -256,7 +256,7 @@ cdef class Unpacker(object): """ cdef unpack_context ctx cdef char* buf - cdef Py_ssize_t buf_size, buf_head, buf_tail + cdef Py_ssize_t buf_size, buf_head, buf_tail, buf_offset, unpack_size cdef object file_like cdef object file_like_read cdef Py_ssize_t read_size @@ -308,6 +308,8 @@ cdef class Unpacker(object): self.buf_size = read_size self.buf_head = 0 self.buf_tail = 0 + self.buf_offset = 0 + self.unpack_size = 0 if encoding is not None: if isinstance(encoding, unicode): @@ -332,6 +334,11 @@ cdef class Unpacker(object): max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + @property + def offset(self): + print('-- %d\t%d\t%d' % (self.buf_head, self.buf_offset, self.unpack_size)) + return self.buf_head + self.buf_offset - self.unpack_size + def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff @@ -354,6 +361,8 @@ cdef class Unpacker(object): Py_ssize_t new_size if tail + _buf_len > buf_size: + print("a_b, t+bl>bs, head=%d" % head) + self.buf_offset += head if ((tail - head) + _buf_len) <= buf_size: # move to front. memmove(buf, buf + head, tail - head) @@ -450,7 +459,10 @@ cdef class Unpacker(object): Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_construct, write_bytes) + offset_before = self.offset + ret = self._unpack(unpack_construct, write_bytes) + self.unpack_size = self.offset - offset_before + return ret def skip(self, object write_bytes=None): """Read and ignore one object, returning None @@ -460,7 +472,10 @@ cdef class Unpacker(object): Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_skip, write_bytes) + offset_before = self.offset + ret = self._unpack(unpack_skip, write_bytes) + self.unpack_size = self.offset - offset_before + return ret def read_array_header(self, object write_bytes=None): """assuming the next object is an array, return its size n, such that @@ -482,7 +497,10 @@ cdef class Unpacker(object): return self def __next__(self): - return self._unpack(unpack_construct, None, 1) + offset_before = self.offset + ret = self._unpack(unpack_construct, None, 1) + self.unpack_size = self.offset - offset_before + return ret # for debug. #def _buf(self): diff --git a/msgpack/fallback.py b/msgpack/fallback.py index a23ad8ca..d1572269 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -199,6 +199,10 @@ def __init__(self, file_like=None, read_size=0, use_list=True, self._buffer = bytearray() #: Which position we currently reads self._buff_i = 0 + #: Virtual offset of the buffer in the complete stream + self._buff_offset = 0 + #: Length of the last top-level object that was unpacked + self._unpack_size = 0 # When Unpacker is used as an iterable, between the calls to next(), # the buffer is not "consumed" completely, for efficiency sake. @@ -238,6 +242,10 @@ def __init__(self, file_like=None, read_size=0, use_list=True, if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") + @property + def offset(self): + return self._buff_i + self._buff_offset - self._unpack_size + def feed(self, next_bytes): if isinstance(next_bytes, array.array): next_bytes = next_bytes.tostring() @@ -286,6 +294,7 @@ def _reserve(self, n): if self._buf_checkpoint > 0: del self._buffer[:self._buf_checkpoint] self._buff_i -= self._buf_checkpoint + self._buff_offset += self._buf_checkpoint self._buf_checkpoint = 0 # Read from file @@ -584,7 +593,9 @@ def __iter__(self): def __next__(self): try: + offset_before = self.offset ret = self._unpack(EX_CONSTRUCT) + self._unpack_size = self.offset - offset_before self._consume() return ret except OutOfData: @@ -600,7 +611,9 @@ def skip(self, write_bytes=None): self._consume() def unpack(self, write_bytes=None): + offset_before = self.offset ret = self._unpack(EX_CONSTRUCT) + self._unpack_size = self.offset - offset_before if write_bytes is not None: write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 45f4cc78..769abc64 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -96,3 +96,57 @@ def test_issue124(): unpacker.feed(b"!") assert tuple(unpacker) == (b'!',) assert tuple(unpacker) == () + + +def test_offset(): + unpacker= Unpacker() + unpacker.feed(b'\x81\x01\x02') + assert unpacker.offset == 0 + assert unpacker.unpack() == {1: 2} + assert unpacker.offset == 0 + unpacker.feed(b'\x81\x03\x04') + assert unpacker.offset == 0 + assert unpacker.unpack() == {3: 4} + assert unpacker.offset == 3 + + +def test_offset2(): + unpacker = Unpacker() + unpacker.feed(b'\x81\x01\x02\x81') + assert unpacker.offset == 0 + assert unpacker.unpack() == {1: 2} + assert unpacker.offset == 0 + with raises(OutOfData): + unpacker.unpack() + unpacker.feed(b'\x03\x04\x81') + assert unpacker.offset == 0 + assert unpacker.unpack() == {3: 4} + assert unpacker.offset == 3 + with raises(OutOfData): + unpacker.unpack() + assert unpacker.offset == 3 + unpacker.feed(b'\x05\x06') + assert unpacker.offset == 3 + assert unpacker.unpack() == {5: 6} + assert unpacker.offset == 6 + + +def test_offset3(): + unpacker = Unpacker() + unpacker.feed(b'\x81\x01\x02\x81') + for obj in unpacker: + assert obj == {1: 2} + assert unpacker.offset == 0 + assert unpacker.offset == 0 + for obj in unpacker: + assert False + unpacker.feed(b'\x03\x04\x81') + assert unpacker.offset == 0 + for obj in unpacker: + assert obj == {3: 4} + assert unpacker.offset == 3 + unpacker.feed(b'\x05\x06') + assert unpacker.offset == 3 + for obj in unpacker: + assert obj == {5: 6} + assert unpacker.offset == 6