10000 Unpacker: add tell() by jfolz · Pull Request #227 · msgpack/msgpack-python · GitHub
[go: up one dir, main page]

Skip to content

Unpacker: add tell() #227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 29, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions msgpack/_unpacker.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ cdef extern from "Python.h":
from libc.stdlib cimport *
from libc.string cimport *
from libc.limits cimport *
ctypedef unsigned long long uint64_t

from msgpack.exceptions import (
BufferFull,
Expand Down Expand Up @@ -314,6 +315,7 @@ cdef class Unpacker(object):
cdef object object_hook, object_pairs_hook, list_hook, ext_hook
cdef object encoding, unicode_errors
cdef Py_ssize_t max_buffer_size
cdef uint64_t stream_offset
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Py_ssize_t may be 32bit, which is a bit small even today. 16 Exabyte should be plenty for some time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be good for 30+ years (if you can unpack continuously at 16GB/s ;))


def __cinit__(self):
self.buf = NULL
Expand Down Expand Up @@ -358,6 +360,7 @@ cdef class Unpacker(object):
self.buf_size = read_size
self.buf_head = 0
self.buf_tail = 0
self.stream_offset = 0

if encoding is not None:
if isinstance(encoding, unicode):
Expand Down Expand Up @@ -468,6 +471,7 @@ cdef class Unpacker(object):

try:
ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
self.stream_offset += self.buf_head - prev_head
if write_bytes is not None:
write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head))

Expand Down Expand Up @@ -534,6 +538,9 @@ cdef class Unpacker(object):
"""
return self._unpack(read_map_header, write_bytes)

def tell(self):
return self.stream_offset

def __iter__(self):
return self

Expand Down
5 changes: 5 additions & 0 deletions msgpack/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
self._max_array_len = max_array_len
self._max_map_len = max_map_len
self._max_ext_len = max_ext_len
self._stream_offset = 0

if list_hook is not None and not callable(list_hook):
raise TypeError('`list_hook` is not callable')
Expand All @@ -266,6 +267,7 @@ def feed(self, next_bytes):

def _consume(self):
""" Gets rid of the used parts of the buffer. """
self._stream_offset += self._buff_i - self._buf_checkpoint
self._buf_checkpoint = self._buff_i

def _got_extradata(self):
Expand Down Expand Up @@ -629,6 +631,9 @@ def read_map_header(self, write_bytes=None):
self._consume()
return ret

def tell(self):
return self._stream_offset


class Packer(object):
"""
Expand Down
20 changes: 20 additions & 0 deletions test/test_sequnpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import io
from msgpack import Unpacker, BufferFull
from msgpack import pack
from msgpack.exceptions import OutOfData
from pytest import raises

Expand Down Expand Up @@ -96,3 +97,22 @@ def test_issue124():
unpacker.feed(b"!")
assert tuple(unpacker) == (b'!',)
assert tuple(unpacker) == ()


def test_unpack_tell():
stream = io.BytesIO()
messages = [2**i-1 for i in range(65)]
messages += [-(2**i) for i in range(1, 64)]
messages += [b'hello', b'hello'*1000, list(range(20)),
{i: bytes(i)*i for i in range(10)},
{i: bytes(i)*i for i in range(32)}]
offsets = []
for m in messages:
pack(m, stream)
offsets.append(stream.tell())
stream.seek(0)
unpacker = Unpacker(stream)
for m, o in zip(messages, offsets):
m2 = next(unpacker)
assert m == m2
assert o == unpacker.tell()
0