1313import codecs
1414import pickle
1515import re
16+ import sys
1617
1718__all__ = ['dis' , 'genops' , 'optimize' ]
1819
164165
165166# Represents the number of bytes consumed by a two-argument opcode where
166167# the first argument gives the number of bytes in the second argument.
167- TAKEN_FROM_ARGUMENT1 = - 2 # num bytes is 1-byte unsigned int
168- TAKEN_FROM_ARGUMENT4 = - 3 # num bytes is 4-byte signed little-endian int
168+ TAKEN_FROM_ARGUMENT1 = - 2 # num bytes is 1-byte unsigned int
169+ TAKEN_FROM_ARGUMENT4 = - 3 # num bytes is 4-byte signed little-endian int
170+ TAKEN_FROM_ARGUMENT4U = - 4 # num bytes is 4-byte unsigned little-endian int
169171
170172class ArgumentDescriptor (object ):
171173 __slots__ = (
@@ -193,7 +195,8 @@ def __init__(self, name, n, reader, doc):
193195 assert isinstance (n , int ) and (n >= 0 or
194196 n in (UP_TO_NEWLINE ,
195197 TAKEN_FROM_ARGUMENT1 ,
196- TAKEN_FROM_ARGUMENT4 ))
198+ TAKEN_FROM_ARGUMENT4 ,
199+ TAKEN_FROM_ARGUMENT4U ))
197200 self .n = n
198201
199202 self .reader = reader
@@ -264,6 +267,27 @@ def read_int4(f):
264267 doc = "Four-byte signed integer, little-endian, 2's complement." )
265268
266269
270+ def read_uint4 (f ):
271+ r"""
272+ >>> import io
273+ >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
274+ 255
275+ >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
276+ True
277+ """
278+
279+ data = f .read (4 )
280+ if len (data ) == 4 :
281+ return _unpack ("<I" , data )[0 ]
282+ raise ValueError ("not enough data in stream to read uint4" )
283+
284+ uint4 = ArgumentDescriptor (
285+ name = 'uint4' ,
286+ n = 4 ,
287+ reader = read_uint4 ,
288+ doc = "Four-byte unsigned integer, little-endian." )
289+
290+
267291def read_stringnl (f , decode = True , stripquotes = True ):
268292 r"""
269293 >>> import io
@@ -420,6 +444,67 @@ def read_string1(f):
420444 """ )
421445
422446
447+ def read_bytes1 (f ):
448+ r"""
449+ >>> import io
450+ >>> read_bytes1(io.BytesIO(b"\x00"))
451+ b''
452+ >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
453+ b'abc'
454+ """
455+
456+ n = read_uint1 (f )
457+ assert n >= 0
458+ data = f .read (n )
459+ if len (data ) == n :
460+ return data
461+ raise ValueError ("expected %d bytes in a bytes1, but only %d remain" %
462+ (n , len (data )))
463+
464+ bytes1 = ArgumentDescriptor (
465+ name = "bytes1" ,
466+ n = TAKEN_FROM_ARGUMENT1 ,
467+ reader = read_bytes1 ,
468+ doc = """A counted bytes string.
469+
470+ The first argument is a 1-byte unsigned int giving the number
471+ of bytes, and the second argument is that many bytes.
472+ """ )
473+
474+
475+ def read_bytes4 (f ):
476+ r"""
477+ >>> import io
478+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
479+ b''
480+ >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
481+ b'abc'
482+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
483+ Traceback (most recent call last):
484+ ...
485+ ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
486+ """
487+
488+ n = read_uint4 (f )
489+ if n > sys .maxsize :
490+ raise ValueError ("bytes4 byte count > sys.maxsize: %d" % n )
491+ data = f .read (n )
492+ if len (data ) == n :
493+ return data
494+ raise ValueError ("expected %d bytes in a bytes4, but only %d remain" %
495+ (n , len (data )))
496+
497+ bytes4 = ArgumentDescriptor (
498+ name = "bytes4" ,
499+ n = TAKEN_FROM_ARGUMENT4U ,
500+ reader = read_bytes4 ,
501+ doc = """A counted bytes string.
502+
503+ The first argument is a 4-byte little-endian unsigned int giving
504+ the number of bytes, and the second argument is that many bytes.
505+ """ )
506+
507+
423508def read_unicodestringnl (f ):
424509 r"""
425510 >>> import io
@@ -463,9 +548,9 @@ def read_unicodestring4(f):
463548 ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
464549 """
465550
466- n = read_int4 (f )
467- if n < 0 :
468- raise ValueError ("unicodestring4 byte count < 0 : %d" % n )
551+ n = read_uint4 (f )
552+ if n > sys . maxsize :
553+ raise ValueError ("unicodestring4 byte count > sys.maxsize : %d" % n )
469554 data = f .read (n )
470555 if len (data ) == n :
471556 return str (data , 'utf-8' , 'surrogatepass' )
@@ -474,7 +559,7 @@ def read_unicodestring4(f):
474559
475560unicodestring4 = ArgumentDescriptor (
476561 name = "unicodestring4" ,
477- n = TAKEN_FROM_ARGUMENT4 ,
562+ n = TAKEN_FROM_ARGUMENT4U ,
478563 reader = read_unicodestring4 ,
479564 doc = """A counted Unicode string.
480565
@@ -871,7 +956,7 @@ def __init__(self, name, code, arg,
871956 assert isinstance (x , StackObject )
872957 self .stack_after = stack_after
873958
874- assert isinstance (proto , int ) and 0 <= proto <= 3
959+ assert isinstance (proto , int ) and 0 <= proto <= pickle . HIGHEST_PROTOCOL
875960 self .proto = proto
876961
877962 assert isinstance (doc , str )
@@ -1037,28 +1122,28 @@ def __init__(self, name, code, arg,
10371122
10381123 I (name = 'BINBYTES' ,
10391124 code = 'B' ,
1040- arg = string4 ,
1125+ arg = bytes4 ,
10411126 stack_before = [],
10421127 stack_after = [pybytes ],
10431128 proto = 3 ,
10441129 doc = """Push a Python bytes object.
10451130
1046- There are two arguments: the first is a 4-byte little-endian signed int
1047- giving the number of bytes in the string , and the second is that many
1048- bytes, which are taken literally as the bytes content.
1131+ There are two arguments: the first is a 4-byte little-endian unsigned int
1132+ giving the number of bytes, and the second is that many bytes, which are
1133+ taken literally as the bytes content.
10491134 """ ),
10501135
10511136 I (name = 'SHORT_BINBYTES' ,
10521137 code = 'C' ,
1053- arg = string1 ,
1138+ arg = bytes1 ,
10541139 stack_before = [],
10551140 stack_after = [pybytes ],
10561141 proto = 3 ,
1057- doc = """Push a Python string object.
1142+ doc = """Push a Python bytes object.
10581143
10591144 There are two arguments: the first is a 1-byte unsigned int giving
1060- the number of bytes in the string , and the second is that many bytes,
1061- which are taken literally as the string content.
1145+ the number of bytes, and the second is that many bytes, which are taken
1146+ literally as the string content.
10621147 """ ),
10631148
10641149 # Ways to spell None.
@@ -1117,7 +1202,7 @@ def __init__(self, name, code, arg,
11171202 proto = 1 ,
11181203 doc = """Push a Python Unicode string object.
11191204
1120- There are two arguments: the first is a 4-byte little-endian signed int
1205+ There are two arguments: the first is a 4-byte little-endian unsigned int
11211206 giving the number of bytes in the string. The second is that many
11221207 bytes, and is the UTF-8 encoding of the Unicode string.
11231208 """ ),
@@ -1421,13 +1506,13 @@ def __init__(self, name, code, arg,
14211506
14221507 I (name = 'LONG_BINGET' ,
14231508 code = 'j' ,
1424- arg = int4 ,
1509+ arg = uint4 ,
14251510 stack_before = [],
14261511 stack_after = [anyobject ],
14271512 proto = 1 ,
14281513 doc = """Read an object from the memo and push it on the stack.
14291514
1430- The index of the memo object to push is given by the 4-byte signed
1515+ The index of the memo object to push is given by the 4-byte unsigned
14311516 little-endian integer following.
14321517 """ ),
14331518
@@ -1458,14 +1543,14 @@ def __init__(self, name, code, arg,
14581543
14591544 I (name = 'LONG_BINPUT' ,
14601545 code = 'r' ,
1461- arg = int4 ,
1546+ arg = uint4 ,
14621547 stack_before = [],
14631548 stack_after = [],
14641549 proto = 1 ,
14651550 doc = """Store the stack top into the memo. The stack is not popped.
14661551
14671552 The index of the memo location to write into is given by the 4-byte
1468- signed little-endian integer following.
1553+ unsigned little-endian integer following.
14691554 """ ),
14701555
14711556 # Access the extension registry (predefined objects). Akin to the GET
0 commit comments