8000 fallback: Use mmap objects instead of strings to unpack · overcastcloud/msgpack-python@770fed6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 770fed6

Browse files
committed
fallback: Use mmap objects instead of strings to unpack
Signed-off-by: Bas Westerbaan <bas@westerbaan.name>
1 parent b9e9199 commit 770fed6

File tree

1 file changed

+51
-29
lines changed

1 file changed

+51
-29
lines changed

msgpack/fallback.py

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,27 @@
11
# Fallback pure Python implementation of msgpack
22

3+
#
4+
# Easy imports
5+
#
36
import sys
47
import array
58
import struct
69

10+
#
11+
# Tricky imports
12+
#
13+
try:
14+
from cStringIO import StringIO
15+
except ImportError:
16+
from StringIO import StringIO
17+
18+
# We will use wStringIO for buffering the writes for packing.
19+
# Normally, we will use cStringIO.StringIO.
20+
# On PyPy we will use PyPy's own StringBuilder.
721
if hasattr(sys, 'pypy_version_info'):
8-
# cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own
9-
# StringBuilder is fastest.
1022
from __pypy__.builders import StringBuilder
1123
USING_STRINGBUILDER = True
12-
class StringIO(object):
24+
class wStringIO(object):
1325
def __init__(self, s=''):
1426
if s:
1527
self.builder = StringBuilder(len(s))
@@ -22,10 +34,18 @@ def getvalue(self):
2234
return self.builder.build()
2335
else:
2436
USING_STRINGBUILDER = False
25-
try:
26-
from cStringIO import StringIO
27-
except ImportError:
28-
from StringIO import StringIO
37+
wStringIO = StringIO
38+
39+
# We will use rStringIO for unpacking.
40+
# Normally, this is a mmap. A normal StringIO is not a drop-in replacement ---
41+
# it misses the __len__ operation.
42+
# TODO add fallback for when mmap is unavailable
43+
import mmap
44+
def rStringIO(s):
45+
m = mmap.mmap(-1, len(s))
46+
m.write(s)
47+
m.seek(0)
48+
return m
2949

3050
from msgpack.exceptions import (
3151
BufferFull,
@@ -184,13 +204,13 @@ def feed(self, next_bytes):
184204
if self._fb_buf_n + len(next_bytes) > self.max_buffer_size:
185205
raise BufferFull
186206
self._fb_buf_n += len(next_bytes)
187-
self._fb_buffers.append(next_bytes)
207+
self._fb_buffers.append(rStringIO(next_bytes))
188208

189209
def _fb_consume(self):
190210
self._fb_buffers = self._fb_buffers[self._fb_buf_i:]
191211
if self._fb_buffers:
192-
self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:]
193-
self._fb_buf_o = 0
212+
self._fb_buffers[0] = rStringIO(self._fb_buffers[0][
213+
self._fb_buffers[0].tell():])
194214
self._fb_buf_i = 0
195215
self._fb_buf_n = sum(map(len, self._fb_buffers))
196216

@@ -212,16 +232,20 @@ def read_bytes(self, n):
212232
return self._fb_read(n)
213233

214234
def _fb_rollback(self):
235+
for buf in self._fb_buffers:
236+
buf.seek(0)
215237
self._fb_buf_i = 0
216-
self._fb_buf_o = 0
217238

218239
def _fb_get_extradata(self):
219240
bufs = self._fb_buffers[self._fb_buf_i:]
220241
if bufs:
221-
bufs[0] = bufs[0][self._fb_buf_o:]
222-
return ''.join(bufs)
242+
bufs[0] = rStringIO(bufs[0][bufs[0].tell():])
243+
return ''.join([buf[:] for buf in bufs])
223244

224245
def _fb_read(self, n, write_bytes=None):
246+
if (write_bytes is None and self._fb_buf_i < len(self._fb_buffers)
247+
and self._fb_buffers[0].tell() + n < len(self._fb_buffers[0])):
248+
return self._fb_buffers[0].read(n)
225249
ret = ''
226250
while len(ret) != n:
227251
if self._fb_buf_i == len(self._fb_buffers):
@@ -230,14 +254,12 @@ def _fb_read(self, n, write_bytes=None):
230254
tmp = self.file_like.read(self.read_size)
231255
if not tmp:
232256
break
233-
self._fb_buffers.append(tmp)
257+
self._fb_buffers.append(rStringIO(tmp))
234258
continue
235259
sliced = n - len(ret)
236-
ret += self._fb_buffers[self._fb_buf_i][
237-
self._fb_buf_o:self._fb_buf_o + sliced]
238-
self._fb_buf_o += sliced
239-
if self._fb_buf_o >= len(self._fb_buffers[self._fb_buf_i]):
240-
self._fb_buf_o = 0
260+
ret += self._fb_buffers[self._fb_buf_i].read(sliced)
261+
if (self._fb_buffers[self._fb_buf_i].tell()
262+
== len(self._fb_buffers[self._fb_buf_i])):
241263
self._fb_buf_i += 1
242264
if len(ret) != n:
243265
self._fb_rollback()
@@ -394,7 +416,7 @@ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
394416
self.autoreset = autoreset
395417
self.encoding = encoding
396418
self.unicode_errors = unicode_errors
397-
self.buffer = StringIO()
419+
self.buffer = wStringIO()
398420
if default is not None:
399421
if not callable(default):
400422
raise TypeError("default must be callable")
@@ -464,33 +486,33 @@ def pack(self, obj):
464486
self._pack(obj)
465487
ret = self.buffer.getvalue()
466488
if self.autoreset:
467-
self.buffer = StringIO()
489+
self.buffer = wStringIO()
468490
elif USING_STRINGBUILDER:
469-
self.buffer = StringIO(ret)
491+
self.buffer = wStringIO(ret)
470492
return ret
471493
def pack_map_pairs(self, pairs):
472494
self._fb_pack_map_pairs(len(pairs), pairs)
473495
ret = self.buffer.getvalue()
474496
if self.autoreset:
475-
self.buffer = StringIO()
497+
self.buffer = wStringIO()
476498
elif USING_STRINGBUILDER:
477-
self.buffer = StringIO(ret)
499+
self.buffer = wStringIO(ret)
478500
return ret
479501
def pa CA01 ck_array_header(self, n):
480502
self._fb_pack_array_header(n)
481503
ret = self.buffer.getvalue()
482504
if self.autoreset:
483-
self.buffer = StringIO()
505+
self.buffer = wStringIO()
484506
elif USING_STRINGBUILDER:
485-
self.buffer = StringIO(ret)
507+
self.buffer = wStringIO(ret)
486508
return ret
487509
def pack_map_header(self, n):
488510
self._fb_pack_map_header(n)
489511
ret = self.buffer.getvalue()
490512
if self.autoreset:
491-
self.buffer = StringIO()
513+
self.buffer = wStringIO()
492514
elif USING_STRINGBUILDER:
493-
self.buffer = StringIO(ret)
515+
self.buffer = wStringIO(ret)
494516
return ret
495517
def _fb_pack_array_header(self, n):
496518
if n <= 0x0f:
@@ -516,4 +538,4 @@ def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
516538
def bytes(self):
517539
return self.buffer.getvalue()
518540
def reset(self):
519-
self.buffer = StringIO()
541+
self.buffer = wStringIO()

0 commit comments

Comments
 (0)
0