@@ -86,11 +86,8 @@ def unpack(stream, **kwargs):
86
86
Raises `ExtraData` when `packed` contains extra bytes.
87
87
See :class:`Unpacker` for options.
88
88
"""
89
- unpacker = Unpacker (stream , ** kwargs )
90
- ret = unpacker ._fb_unpack ()
91
- if unpacker ._fb_got_extradata ():
92
- raise ExtraData (ret , unpacker ._fb_get_extradata ())
93
- return ret
89
+ data = stream .read ()
90
+ return unpackb (data , ** kwargs )
94
91
95
92
96
93
def unpackb (packed , ** kwargs ):
@@ -121,7 +118,7 @@ class Unpacker(object):
121
118
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
122
119
123
120
:param int read_size:
124
- Used as `file_like.read(read_size)`. (default: `min(1024**2 , max_buffer_size)`)
121
+ Used as `file_like.read(read_size)`. (default: `min(16*1024 , max_buffer_size)`)
125
122
126
123
:param bool use_list:
127
124
If true, unpack msgpack array to Python list.
@@ -199,27 +196,23 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
199
196
self ._fb_feeding = False
200
197
201
198
#: array of bytes feeded.
202
- self ._fb_buffers = []
203
- #: Which buffer we currently reads
204
- self ._fb_buf_i = 0
199
+ self ._buffer = b""
205
200
#: Which position we currently reads
206
- self ._fb_buf_o = 0
207
- #: Total size of _fb_bufferes
208
- self ._fb_buf_n = 0
201
+ self ._buff_i = 0
209
202
210
203
# When Unpacker is used as an iterable, between the calls to next(),
211
204
# the buffer is not "consumed" completely, for efficiency sake.
212
205
# Instead, it is done sloppily. To make sure we raise BufferFull at
213
206
# the correct moments, we have to keep track of how sloppy we were.
214
207
# Furthermore, when the buffer is incomplete (that is: in the case
215
208
# we raise an OutOfData) we need to rollback the buffer to the correct
216
- # state, which _fb_slopiness records.
217
- self ._fb_sloppiness = 0
209
+ # state, which _buf_checkpoint records.
210
+ self ._buf_checkpoint = 0
218
211
219
212
self ._max_buffer_size = max_buffer_size or 2 ** 31 - 1
220
213
if read_size > self ._max_buffer_size :
221
214
raise ValueError ("read_size must be smaller than max_buffer_size" )
222
- self ._read_size = read_size or min (self ._max_buffer_size , 4096 )
215
+ self ._read_size = read_size or min (self ._max_buffer_size , 16 * 1024 )
223
216
self ._encoding = encoding
224
217
self ._unicode_errors = unicode_errors
225
218
self ._use_list = use_list
@@ -248,103 +241,75 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
248
241
def feed (self , next_bytes ):
249
242
if isinstance (next_bytes , array .array ):
250
243
next_bytes = next_bytes .tostring ()
251
- elif isinstance (next_bytes , bytearray ):
252
- next_bytes = bytes ( next_bytes )
244
+ if not isinstance (next_bytes , ( bytes , bytearray ) ):
245
+ raise TypeError ( " next_bytes should be bytes, bytearray or array.array" )
253
246
assert self ._fb_feeding
254
- if ( self . _fb_buf_n + len ( next_bytes ) - self . _fb_sloppiness
255
- > self ._max_buffer_size ):
247
+
248
+ if ( len ( self . _buffer ) - self . _buff_i + len ( next_bytes ) > self ._max_buffer_size ):
256
249
raise BufferFull
257
- self ._fb_buf_n += len (next_bytes )
258
- self ._fb_buffers .append (next_bytes )
259
-
260
- def _fb_sloppy_consume (self ):
261
- """ Gets rid of some of the used parts of the buffer. """
262
- if self ._fb_buf_i :
263
- for i in xrange (self ._fb_buf_i ):
264
- self ._fb_buf_n -= len (self ._fb_buffers [i ])
265
- self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
266
- self ._fb_buf_i = 0
267
- if self ._fb_buffers :
268
- self ._fb_sloppiness = self ._fb_buf_o
269
- else :
270
- self ._fb_sloppiness = 0
250
+ # bytes + bytearray -> bytearray
251
+ # So cast before append
252
+ self ._buffer += bytes (next_bytes )
271
253
272
254
def _fb_consume (self ):
273
255
""" Gets rid of the used parts of the buffer. """
274
- if self ._fb_buf_i :
275
- for i in xrange (self ._fb_buf_i ):
276
- self ._fb_buf_n -= len (self ._fb_buffers [i ])
277
- self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
278
- self ._fb_buf_i = 0
279
- if self ._fb_buffers :
280
- self ._fb_buffers [0 ] = self ._fb_buffers [0 ][self ._fb_buf_o :]
281
- self ._fb_buf_n -= self ._fb_buf_o
282
- else :
283
- self ._fb_buf_n = 0
284
- self ._fb_buf_o = 0
285
- self ._fb_sloppiness = 0
256
+ self ._buf_checkpoint = self ._buff_i
286
257
287
258
def _fb_got_extradata (self ):
288
- if self ._fb_buf_i != len (self ._fb_buffers ):
289
- return True
290
- if self ._fb_feeding :
291
- return False
292
- if not self .file_like :
293
- return False
294
- if self .file_like .read (1 ):
295
- return True
296
- return False
259
+ return self ._buff_i < len (self ._buffer )
297
260
298
- def __iter__ (self ):
299
- return self
261
+ def _fb_get_extradata (self ):
262
+ return self . _buffer [ self . _buff_i :]
300
263
301
264
def read_bytes (self , n ):
302
265
return self ._fb_read (n )
303
266
304
- def _fb_rollback (self ):
305
- self ._fb_buf_i = 0
306
- self ._fb_buf_o = self ._fb_sloppiness
267
+ def _fb_read (self , n , write_bytes = None ):
268
+ # (int, Optional[Callable]) -> bytearray
269
+ remain_bytes = len (self ._buffer ) - self ._buff_i - n
270
+
271
+ # Fast path: buffer has n bytes already
272
+ if remain_bytes >= 0 :
273
+ ret = self ._buffer [self ._buff_i :self ._buff_i + n ]
274
+ self ._buff_i += n
275
+ if write_bytes is not None :
276
+ write_bytes (ret )
277
+ return ret
307
278
308
- def _fb_get_extradata (self ):
309
- bufs = self ._fb_buffers [self ._fb_buf_i :]
310
47D8
- if bufs :
311
- bufs [0 ] = bufs [0 ][self ._fb_buf_o :]
312
- return b'' .join (bufs )
279
+ if self ._fb_feeding :
280
+ self ._buff_i = self ._buf_checkpoint
281
+ raise OutOfData
313
282
314
- def _fb_read (self , n , write_bytes = None ):
315
- buffs = self ._fb_buffers
316
- # We have a redundant codepath for the most common case, such that
317
- # pypy optimizes it properly. This is the case that the read fits
318
- # in the current buffer.
319
- if (write_bytes is None and self ._fb_buf_i < len (buffs ) and
320
- self ._fb_buf_o + n < len (buffs [self ._fb_buf_i ])):
321
- self ._fb_buf_o += n
322
- return buffs [self ._fb_buf_i ][self ._fb_buf_o - n :self ._fb_buf_o ]
323
-
324
- # The remaining cases.
325
- ret = b''
326
- while len (ret ) != n :
327
- sliced = n - len (ret )
328
- if self ._fb_buf_i == len (buffs ):
329
- if self ._fb_feeding :
330
- break
331
- to_read = sliced
332
- if self ._read_size > to_read :
333
- to_read = self ._read_size
334
- tmp = self .file_like .read (to_read )
335
- if not tmp :
336
- break
337
- buffs .append (tmp )
338
- self ._fb_buf_n += len (tmp )
339
- continue
340
- ret += buffs [self ._fb_buf_i ][self ._fb_buf_o :self ._fb_buf_o + sliced ]
341
- self ._fb_buf_o += sliced
342
- if self ._fb_buf_o >= len (buffs [self ._fb_buf_i ]):
343
- self ._fb_buf_o = 0
344
- self ._fb_buf_i += 1
345
- if len (ret ) != n :
346
- self ._fb_rollback ()
283
+ # Strip buffer before checkpoint before reading file.
284
+ if self ._buf_checkpoint > 0 :
285
+ self ._buffer = self ._buffer [self ._buf_checkpoint :]
286
+ self ._buff_i -= self ._buf_checkpoint
287
+ self ._buf_checkpoint = 0
288
+
289
+ # Read from file
290
+ remain_bytes = - remain_bytes
291
+ while remain_bytes > 0 :
292
+ to_read_bytes = max (self ._read_size , remain_bytes )
293
+ read_data = self .file_like .read (to_read_bytes )
294
+ if not read_data :
295
+ break
296
+ assert isinstance (read_data , bytes )
297
+ self ._buffer += read_data
298
+ remain_bytes -= len (read_data )
299
+
300
+ if len (self ._buffer ) < n + self ._buff_i :
301
+ self ._buff_i = 0 # rollback
347
302
raise OutOfData
303
+
304
+ if len (self ._buffer ) == n :
305
+ # checkpoint == 0
306
+ ret = self ._buffer
307
+ self ._buffer = b""
308
+ self ._buff_i = 0
309
+ else :
310
+ ret = self ._buffer [self ._buff_i :self ._buff_i + n ]
311
+ self ._buff_i += n
312
+
348
313
if write_bytes is not None :
349
314
write_bytes (ret )
350
315
return ret
@@ -562,15 +527,19 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None):
562
527
assert typ == TYPE_IMMEDIATE
563
528
return obj
564
529
565
- def next (self ):
530
+ def __iter__ (self ):
531
+ return self
532
+
533
+ def __next__ (self ):
566
534
try :
567
535
ret = self ._fb_unpack (EX_CONSTRUCT , None )
568
- self ._fb_sloppy_consume ()
536
+ self ._fb_consume ()
569
537
return ret
570
538
except OutOfData :
571
539
self ._fb_consume ()
572
540
raise StopIteration
573
- __next__ = next
541
+
542
+ next = __next__
574
543
575
544
def skip (self , write_bytes = None ):
576
545
self ._fb_unpack (EX_SKIP , write_bytes )
0 commit comments