8000 Merge 3.3 · python/cpython@96d6a78 · GitHub
[go: up one dir, main page]

Skip to content

Commit 96d6a78

Browse files
committed
Merge 3.3
2 parents a3e32c9 + 8157459 commit 96d6a78

File tree

1 file changed

+106
-21
lines changed

1 file changed

+106
-21
lines changed

Lib/pickletools.py

Lines changed: 106 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import codecs
1414
import pickle
1515
import re
16+
import sys
1617

1718
__all__ = ['dis', 'genops', 'optimize']
1819

@@ -164,8 +165,9 @@
164165

165166
# Represents the number of bytes consumed by a two-argument opcode where
166167
# the first argument gives the number of bytes in the second argument.
167-
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
168-
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
168+
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
169+
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
170+
TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
169171

170172
class ArgumentDescriptor(object):
171173
__slots__ = (
@@ -193,7 +195,8 @@ def __init__(self, name, n, reader, doc):
193195
assert isinstance(n, int) and (n >= 0 or
194196
n in (UP_TO_NEWLINE,
195197
TAKEN_FROM_ARGUMENT1,
196-
TAKEN_FROM_ARGUMENT4))
198+
TAKEN_FROM_ARGUMENT4,
199+
TAKEN_FROM_ARGUMENT4U))
197200
self.n = n
198201

199202
self.reader = reader
@@ -264,6 +267,27 @@ def read_int4(f):
264267
doc="Four-byte signed integer, little-endian, 2's complement.")
265268

266269

270+
def read_uint4(f):
271+
r"""
272+
>>> import io
273+
>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
274+
255
275+
>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
276+
True
277+
"""
278+
279+
data = f.read(4)
280+
if len(data) == 4:
281+
return _unpack("<I", data)[0]
282+
raise ValueError("not enough data in stream to read uint4")
283+
284+
uint4 = ArgumentDescriptor(
285+
name='uint4',
286+
n=4,
287+
reader=read_uint4,
288+
doc="Four-byte unsigned integer, little-endian.")
289+
290+
267291
def read_stringnl(f, decode=True, stripquotes=True):
268292
r"""
269293
>>> import io
@@ -420,6 +444,67 @@ def read_string1(f):
420444
""")
421445

422446

447+
def read_bytes1(f):
448+
r"""
449+
>>> import io
450+
>>> read_bytes1(io.BytesIO(b"\x00"))
451+
b''
452+
>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
453+
b'abc'
454+
"""
455+
456+
n = read_uint1(f)
457+
assert n >= 0
458+
data = f.read(n)
459+
if len(data) == n:
460+
return data
461+
raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
462+
(n, len(data)))
463+
464+
bytes1 = ArgumentDescriptor(
465+
name="bytes1",
466+
n=TAKEN_FROM_ARGUMENT1,
467+
reader=read_bytes1,
468+
doc="""A counted bytes string.
469+
470+
The first argument is a 1-byte unsigned int giving the number
471+
of bytes, and the second argument is that many bytes.
472+
""")
473+
474+
475+
def read_bytes4(f):
476+
r"""
477+
>>> import io
478+
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
479+
b''
480+
>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
481+
b'abc'
482+
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
483+
Traceback (most recent call last):
484+
...
485+
ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
486+
"""
487+
488+
n = read_uint4(f)
489+
if n > sys.maxsize:
490+
raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
491+
data = f.read(n)
492+
if len(data) == n:
493+
return data
494+
raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
495+
(n, len(data)))
496+
497+
bytes4 = ArgumentDescriptor(
498+
name="bytes4",
499+
n=TAKEN_FROM_ARGUMENT4U,
500+
reader=read_bytes4,
501+
doc="""A counted bytes string.
502+
503+
The first argument is a 4-byte little-endian unsigned int giving
504+
the number of bytes, and the second argument is that many bytes.
505+
""")
506+
507+
423508
def read_unicodestringnl(f):
424509
r"""
425510
>>> import io
@@ -463,9 +548,9 @@ def read_unicodestring4(f):
463548
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
464549
"""
465550

466-
n = read_int4(f)
467-
if n < 0:
468-
raise ValueError("unicodestring4 byte count < 0: %d" % n)
551+
n = read_uint4(f)
552+
if n > sys.maxsize:
553+
raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
469554
data = f.read(n)
470555
if len(data) == n:
471556
return str(data, 'utf-8', 'surrogatepass')
@@ -474,7 +559,7 @@ def read_unicodestring4(f):
474559

475560
unicodestring4 = ArgumentDescriptor(
476561
name="unicodestring4",
477-
n=TAKEN_FROM_ARGUMENT4,
562+
n=TAKEN_FROM_ARGUMENT4U,
478563
reader=read_unicodestring4,
479564
doc="""A counted Unicode string.
480565
@@ -871,7 +956,7 @@ def __init__(self, name, code, arg,
871956
assert isinstance(x, StackObject)
872957
self.stack_after = stack_after
873958

874-
assert isinstance(proto, int) and 0 <= proto <= 3
959+
assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
875960
self.proto = proto
876961

877962
assert isinstance(doc, str)
@@ -1037,28 +1122,28 @@ def __init__(self, name, code, arg,
10371122

10381123
I(name='BINBYTES',
10391124
code='B',
1040-
arg=string4,
1125+
arg=bytes4,
10411126
stack_before=[],
10421127
stack_after=[pybytes],
10431128
proto=3,
10441129
doc="""Push a Python bytes object.
10451130
1046-
There are two arguments: the first is a 4-byte little-endian signed int
1047-
giving the number of bytes in the string, and the second is that many
1048-
bytes, which are taken literally as the bytes content.
1131+
There are two arguments: the first is a 4-byte little-endian unsigned int
1132+
giving the number of bytes, and the second is that many bytes, which are
1133+
taken literally as the bytes content.
10491134
"""),
10501135

10511136
I(name='SHORT_BINBYTES',
10521137
code='C',
1053-
arg=string1,
1138+
arg=bytes1,
10541139
stack_before=[],
10551140
stack_after=[pybytes],
10561141
proto=3,
1057-
doc="""Push a Python string object.
1142+
doc="""Push a Python bytes object.
10581143
10591144
There are two arguments: the first is a 1-byte unsigned int giving
1060-
the number of bytes in the string, and the second is that many bytes,
1061-
which are taken literally as the string content.
1145+
the number of bytes, and the second is that many bytes, which are taken
1146+
literally as the string content.
10621147
"""),
10631148

10641149
# Ways to spell None.
@@ -1117,7 +1202,7 @@ def __init__(self, name, code, arg,
11171202
proto=1,
11181203
doc="""Push a Python Unicode string object.
11191204
1120-
There are two arguments: the first is a 4-byte little-endian signed int
1205+
There are two arguments: the first is a 4-byte little-endian unsigned int
11211206
giving the number of bytes in the string. The second is that many
11221207
bytes, and is the UTF-8 encoding of the Unicode string.
11231208
"""),
@@ -1421,13 +1506,13 @@ def __init__(self, name, code, arg,
14211506

14221507
I(name='LONG_BINGET',
14231508
code='j',
1424-
arg=int4,
1509+
arg=uint4,
14251510
stack_before=[],
14261511
stack_after=[anyobject],
14271512
proto=1,
14281513
doc="""Read an object from the memo and push it on the stack.
14291514
1430-
The index of the memo object to push is given by the 4-byte signed
1515+
The index of the memo object to push is given by the 4-byte unsigned
14311516
little-endian integer following.
14321517
"""),
14331518

@@ -1458,14 +1543,14 @@ def __init__(self, name, code, arg,
14581543

14591544
I(name='LONG_BINPUT',
14601545
code='r',
1461-
arg=int4,
1546+
arg=uint4,
14621547
stack_before=[],
14631548
stack_after=[],
14641549
proto=1,
14651550
doc="""Store the stack top into the memo. The stack is not popped.
14661551
14671552
The index of the memo location to write into is given by the 4-byte
1468-
signed little-endian integer following.
1553+
unsigned little-endian integer following.
14691554
"""),
14701555

14711556
# Access the extension registry (predefined objects). Akin to the GET

0 commit comments

Comments
 (0)
0