8000 fallback: Rewrite buffer from array of bytes to bytearray · lbolla/msgpack-python@f421f59 · GitHub
[go: up one dir, main page]

Skip to content

Commit f421f59

Browse files
committed
fallback: Rewrite buffer from array of bytes to bytearray
1 parent 318ddfc commit f421f59

File tree

1 file changed

+69
-100
lines changed

1 file changed

+69
-100
lines changed

msgpack/fallback.py

Lines changed: 69 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,8 @@ def unpack(stream, **kwargs):
8686
Raises `ExtraData` when `packed` contains extra bytes.
8787
See :class:`Unpacker` for options.
8888
"""
89-
unpacker = Unpacker(stream, **kwargs)
90-
ret = unpacker._fb_unpack()
91-
if unpacker._fb_got_extradata():
92-
raise ExtraData(ret, unpacker._fb_get_extradata())
93-
return ret
89+
data = stream.read()
90+
return unpackb(data, **kwargs)
9491

9592

9693
def unpackb(packed, **kwargs):
@@ -121,7 +118,7 @@ class Unpacker(object):
121118
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
122119
123120
:param int read_size:
124-
Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`)
121+
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
125122
126123
:param bool use_list:
127124
If true, unpack msgpack array to Python list.
@@ -199,27 +196,23 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
199196
self._fb_feeding = False
200197

201198
#: array of bytes feeded.
202-
self._fb_buffers = []
203-
#: Which buffer we currently reads
204-
self._fb_buf_i = 0
199+
self._buffer = b""
205200
#: Which position we currently reads
206-
self._fb_buf_o = 0
207-
#: Total size of _fb_bufferes
208-
self._fb_buf_n = 0
201+
self._buff_i = 0
209202

210203
# When Unpacker is used as an iterable, between the calls to next(),
211204
# the buffer is not "consumed" completely, for efficiency sake.
212205
# Instead, it is done sloppily. To make sure we raise BufferFull at
213206
# the correct moments, we have to keep track of how sloppy we were.
214207
# Furthermore, when the buffer is incomplete (that is: in the case
215208
# we raise an OutOfData) we need to rollback the buffer to the correct
216-
# state, which _fb_slopiness records.
217-
self._fb_sloppiness = 0
209+
# state, which _buf_checkpoint records.
210+
self._buf_checkpoint = 0
218211

219212
self._max_buffer_size = max_buffer_size or 2**31-1
220213
if read_size > self._max_buffer_size:
221214
raise ValueError("read_size must be smaller than max_buffer_size")
222-
self._read_size = read_size or min(self._max_buffer_size, 4096)
215+
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
223216
self._encoding = encoding
224217
self._unicode_errors = unicode_errors
225218
self._use_list = use_list
@@ -248,103 +241,75 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
248241
def feed(self, next_bytes):
249242
if isinstance(next_bytes, array.array):
250243
next_bytes = next_bytes.tostring()
251-
elif isinstance(next_bytes, bytearray):
252-
next_bytes = bytes(next_bytes)
244+
if not isinstance(next_bytes, (bytes, bytearray)):
245+
raise TypeError("next_bytes should be bytes, bytearray or array.array")
253246
assert self._fb_feeding
254-
if (self._fb_buf_n + len(next_bytes) - self._fb_sloppiness
255-
> self._max_buffer_size):
247+
248+
if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size):
256249
raise BufferFull
257-
self._fb_buf_n += len(next_bytes)
258-
self._fb_buffers.append(next_bytes)
259-
260-
def _fb_sloppy_consume(self):
261-
""" Gets rid of some of the used parts of the buffer. """
262-
if self._fb_buf_i:
263-
for i in xrange(self._fb_buf_i):
264-
self._fb_buf_n -= len(self._fb_buffers[i])
265-
self._fb_buffers = self._fb_buffers[self._fb_buf_i:]
266-
self._fb_buf_i = 0
267-
if self._fb_buffers:
268-
self._fb_sloppiness = self._fb_buf_o
269-
else:
270-
self._fb_sloppiness = 0
250+
# bytes + bytearray -> bytearray
251+
# So cast before append
252+
self._buffer += bytes(next_bytes)
271253

272254
def _fb_consume(self):
273255
""" Gets rid of the used parts of the buffer. """
274-
if self._fb_buf_i:
275-
for i in xrange(self._fb_buf_i):
276-
self._fb_buf_n -= len(self._fb_buffers[i])
277-
self._fb_buffers = self._fb_buffers[self._fb_buf_i:]
278-
self._fb_buf_i = 0
279-
if self._fb_buffers:
280-
self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:]
281-
self._fb_buf_n -= self._fb_buf_o
282-
else:
283-
self._fb_buf_n = 0
284-
self._fb_buf_o = 0
285-
self._fb_sloppiness = 0
256+
self._buf_checkpoint = self._buff_i
286257

287258
def _fb_got_extradata(self):
288-
if self._fb_buf_i != len(self._fb_buffers):
289-
return True
290-
if self._fb_feeding:
291-
return False
292-
if not self.file_like:
293-
return False
294-
if self.file_like.read(1):
295-
return True
296-
return False
259+
return self._buff_i < len(self._buffer)
297260

298-
def __iter__(self):
299-
return self
261+
def _fb_get_extradata(self):
262+
return self._buffer[self._buff_i:]
300263

301264
def read_bytes(self, n):
302265
return self._fb_read(n)
303266

304-
def _fb_rollback(self):
305-
self._fb_buf_i = 0
306-
self._fb_buf_o = self._fb_sloppiness
267+
def _fb_read(self, n, write_bytes=None):
268+
# (int, Optional[Callable]) -> bytearray
269+
remain_bytes = len(self._buffer) - self._buff_i - n
270+
271+
# Fast path: buffer has n bytes already
272+
if remain_bytes >= 0:
273+
ret = self._buffer[self._buff_i:self._buff_i+n]
274+
self._buff_i += n
275+
if write_bytes is not None:
276+
write_bytes(ret)
277+
return ret
307278

308-
def _fb_get_extradata(self):
309-
bufs = self._fb_buffers[self._fb_buf_i:]
310 47D8 -
if bufs:
311-
bufs[0] = bufs[0][self._fb_buf_o:]
312-
return b''.join(bufs)
279+
if self._fb_feeding:
280+
self._buff_i = self._buf_checkpoint
281+
raise OutOfData
313282

314-
def _fb_read(self, n, write_bytes=None):
315-
buffs = self._fb_buffers
316-
# We have a redundant codepath for the most common case, such that
317-
# pypy optimizes it properly. This is the case that the read fits
318-
# in the current buffer.
319-
if (write_bytes is None and self._fb_buf_i < len(buffs) and
320-
self._fb_buf_o + n < len(buffs[self._fb_buf_i])):
321-
self._fb_buf_o += n
322-
return buffs[self._fb_buf_i][self._fb_buf_o - n:self._fb_buf_o]
323-
324-
# The remaining cases.
325-
ret = b''
326-
while len(ret) != n:
327-
sliced = n - len(ret)
328-
if self._fb_buf_i == len(buffs):
329-
if self._fb_feeding:
330-
break
331-
to_read = sliced
332-
if self._read_size > to_read:
333-
to_read = self._read_size
334-
tmp = self.file_like.read(to_read)
335-
if not tmp:
336-
break
337-
buffs.append(tmp)
338-
self._fb_buf_n += len(tmp)
339-
continue
340-
ret += buffs[self._fb_buf_i][self._fb_buf_o:self._fb_buf_o + sliced]
341-
self._fb_buf_o += sliced
342-
if self._fb_buf_o >= len(buffs[self._fb_buf_i]):
343-
self._fb_buf_o = 0
344-
self._fb_buf_i += 1
345-
if len(ret) != n:
346-
self._fb_rollback()
283+
# Strip buffer before checkpoint before reading file.
284+
if self._buf_checkpoint > 0:
285+
self._buffer = self._buffer[self._buf_checkpoint:]
286+
self._buff_i -= self._buf_checkpoint
287+
self._buf_checkpoint = 0
288+
289+
# Read from file
290+
remain_bytes = -remain_bytes
291+
while remain_bytes > 0:
292+
to_read_bytes = max(self._read_size, remain_bytes)
293+
read_data = self.file_like.read(to_read_bytes)
294+
if not read_data:
295+
break
296+
assert isinstance(read_data, bytes)
297+
self._buffer += read_data
298+
remain_bytes -= len(read_data)
299+
300+
if len(self._buffer) < n + self._buff_i:
301+
self._buff_i = 0 # rollback
347302
raise OutOfData
303+
304+
if len(self._buffer) == n:
305+
# checkpoint == 0
306+
ret = self._buffer
307+
self._buffer = b""
308+
self._buff_i = 0
309+
else:
310+
ret = self._buffer[self._buff_i:self._buff_i+n]
311+
self._buff_i += n
312+
348313
if write_bytes is not None:
349314
write_bytes(ret)
350315
return ret
@@ -562,15 +527,19 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None):
562527
assert typ == TYPE_IMMEDIATE
563528
return obj
564529

565-
def next(self):
530+
def __iter__(self):
531+
return self
532+
533+
def __next__(self):
566534
try:
567535
ret = self._fb_unpack(EX_CONSTRUCT, None)
568-
self._fb_sloppy_consume()
536+
self._fb_consume()
569537
return ret
570538
except OutOfData:
571539
self._fb_consume()
572540
raise StopIteration
573-
__next__ = next
541+
542+
next = __next__
574543

575544
def skip(self, write_bytes=None):
576545
self._fb_unpack(EX_SKIP, write_bytes)

0 commit comments

Comments
 (0)
0