8000 Merge pull request #5260 from youknowone/update-pickle · RustPython/RustPython@ada1006 · GitHub
[go: up one dir, main page]

Skip to content

Commit ada1006

Browse files
authored
Merge pull request #5260 from youknowone/update-pickle
Update pickle from CPython 3.12.3
2 parents e5ca631 + d0f680b commit ada1006

7 files changed

+265
-36
lines changed

Lib/pickle.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,6 @@ class _Stop(Exception):
9898
def __init__(self, value):
9999
self.value = value
100100

101-
# Jython has PyStringMap; it's a dict subclass with string keys
102-
try:
103-
from org.python.core import PyStringMap
104-
except ImportError:
105-
PyStringMap = None
106-
107101
# Pickle opcodes. See pickletools.py for extensive docs. The listing
108102
# here is in kind-of alphabetical order of 1-character pickle code.
109103
# pickletools groups them by purpose.
@@ -861,13 +855,13 @@ def save_str(self, obj):
861855
else:
862856
self.write(BINUNICODE + pack("<I", n) + encoded)
863857
else:
864-
obj = obj.replace("\\", "\\u005c")
865-
obj = obj.replace("\0", "\\u0000")
866-
obj = obj.replace("\n", "\\u000a")
867-
obj = obj.replace("\r", "\\u000d")
868-
obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
869-
self.write(UNICODE + obj.encode('raw-unicode-escape') +
870-
b'\n')
858+
# Escape what raw-unicode-escape doesn't, but memoize the original.
859+
tmp = obj.replace("\\", "\\u005c")
860+
tmp = tmp.replace("\0", "\\u0000")
861+
tmp = tmp.replace("\n", "\\u000a")
862+
tmp = tmp.replace("\r", "\\u000d")
863+
tmp = tmp.replace("\x1a", "\\u001a") # EOF on DOS
864+
self.write(UNICODE + tmp.encode('raw-unicode-escape') + b'\n')
871865
self.memoize(obj)
872866
dispatch[str] = save_str
873867

@@ -972,8 +966,6 @@ def save_dict(self, obj):
972966
self._batch_setitems(obj.items())
973967

974968
dispatch[dict] = save_dict
975-
if PyStringMap is not None:
976-
dispatch[PyStringMap] = save_dict
977969

978970
def _batch_setitems(self, items):
979971
# Helper to batch up SETITEMS sequences; proto >= 1 only
@@ -1489,7 +1481,7 @@ def _instantiate(self, klass, args):
14891481
value = klass(*args)
14901482
except TypeError as err:
14911483
raise TypeError("in constructor for %s: %s" %
1492-
(klass.__name__, str(err)), sys.exc_info()[2])
1484+
(klass.__name__, str(err)), err.__traceback__)
14931485
else:
14941486
value = klass.__new__(klass)
14951487
self.append(value)
@@ -1799,7 +1791,7 @@ def _test():
17991791
parser = argparse.ArgumentParser(
18001792
description='display contents of the pickle files')
18011793
parser.add_argument(
1802-
'pickle_file', type=argparse.FileType('br'),
1794+
'pickle_file',
18031795
nargs='*', help='the pickle file')
18041796
parser.add_argument(
18051797
'-t', '--test', action='store_true',
@@ -1815,6 +1807,10 @@ def _test():
18151807
parser.print_help()
18161808
else:
18171809
import pprint
1818-
for f in args.pickle_file:
1819-
obj = load(f)
1810+
for fn in args.pickle_file:
1811+
if fn == '-':
1812+
obj = load(sys.stdin.buffer)
1813+
else:
1814+
with open(fn, 'rb') as f:
1815+
obj = load(f)
18201816
pprint.pprint(obj)

Lib/pickletools.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ def __init__(self, name, code, arg,
12531253
stack_before=[],
12541254
stack_after=[pyint],
12551255
proto=2,
1256-
doc="""Long integer using found-byte length.
1256+
doc="""Long integer using four-byte length.
12571257
12581258
A more efficient encoding of a Python long; the long4 encoding
12591259
says it all."""),
@@ -2848,10 +2848,10 @@ def _test():
28482848
parser = argparse.ArgumentParser(
28492849
description='disassemble one or more pickle files')
28502850
parser.add_argument(
2851-
'pickle_file', type=argparse.FileType('br'),
2851+
'pickle_file',
28522852
nargs='*', help='the pickle file')
28532853
parser.add_argument(
2854-
'-o', '--output', default=sys.stdout, type=argparse.FileType('w'),
2854+
'-o', '--output',
28552855
help='the file where the output should be written')
28562856
parser.add_argument(
28572857
'-m', '--memo', action='store_true',
@@ -2876,15 +2876,26 @@ def _test():
28762876
if args.test:
28772877
_test()
28782878
else:
2879-
annotate = 30 if args.annotate else 0
28802879
if not args.pickle_file:
28812880
parser.print_help()
2882-
elif len(args.pickle_file) == 1:
2883-
dis(args.pickle_file[0], args.output, None,
2884-
args.indentlevel, annotate)
28852881
else:
2882+
annotate = 30 if args.annotate else 0
28862883
memo = {} if args.memo else None
2887-
for f in args.pickle_file:
2888-
preamble = args.preamble.format(name=f.name)
2889-
args.output.write(preamble + '\n')
2890-
dis(f, args.output, memo, args.indentlevel, annotate)
2884+
if args.output is None:
2885+
output = sys.stdout
2886+
else:
2887+
output = open(args.output, 'w')
2888+
try:
2889+
for arg in args.pickle_file:
2890+
if len(args.pickle_file) > 1:
2891+
name = '<stdin>' if arg == '-' else arg
2892+
preamble = args.preamble.format(name=name)
2893+
output.write(preamble + '\n')
2894+
if arg == '-':
2895+
dis(sys.stdin.buffer, output, memo, args.indentlevel, annotate)
2896+
else:
2897+
with open(arg, 'rb') as f:
2898+
dis(f, output, memo, args.indentlevel, annotate)
2899+
finally:
2900+
if output is not sys.stdout:
2901+
output.close()

Lib/test/pickletester.py

Lines changed: 193 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import builtins
12
import collections
23
import copyreg
34
import dbm
@@ -11,6 +12,7 @@
1112
import struct
1213
import sys
1314
import threading
15+
import types
1416
import unittest
1517
import weakref
1618
from textwrap import dedent
@@ -1380,6 +1382,7 @@ def test_truncated_data(self):
13801382
self.check_unpickling_error(self.truncated_errors, p)
13811383

13821384
@threading_helper.reap_threads
1385+
@threading_helper.requires_working_threading()
13831386
def test_unpickle_module_race(self):
13841387
# https://bugs.python.org/issue34572
13851388
locker_module = dedent("""
@@ -1822,6 +1825,14 @@ def test_unicode_high_plane(self):
18221825
t2 = self.loads(p)
18231826
self.assert_is_copy(t, t2)
18241827

1828+
def test_unicode_memoization(self):
1829+
# Repeated str is re-used (even when escapes added).
1830+
for proto in protocols:
1831+
for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r':
1832+
p = self.dumps((s, s), proto)
1833+
s1, s2 = self.loads(p)
1834+
self.assertIs(s1, s2)
1835+
18251836
def test_bytes(self):
18261837
for proto in protocols:
18271838
for s in b'', b'xyz', b'xyz'*100:
@@ -1853,6 +1864,14 @@ def test_bytearray(self):
18531864
self.assertNotIn(b'bytearray', p)
18541865
self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
18551866

1867+
def test_bytearray_memoization_bug(self):
1868+
for proto in protocols:
1869+
for s in b'', b'xyz', b'xyz'*100:
1870+
b = bytearray(s)
1871+
p = self.dumps((b, b), proto)
1872+
b1, b2 = self.loads(p)
1873+
self.assertIs(b1, b2)
1874+
18561875
def test_ints(self):
18571876
for proto in protocols:
18581877
n = sys.maxsize
@@ -1971,6 +1990,35 @@ def test_singleton_types(self):
19711990
u = self.loads(s)
19721991
self.assertIs(type(singleton), u)
19731992

1993+
def test_builtin_types(self):
1994+
for t in builtins.__dict__.values():
1995+
if isinstance(t, type) and not issubclass(t, BaseException):
1996+
for proto in protocols:
1997+
s = self.dumps(t, proto)
1998+
self.assertIs(self.loads(s), t)
1999+
2000+
def test_builtin_exceptions(self):
2001+
for t in builtins.__dict__.values():
2002+
if isinstance(t, type) and issubclass(t, BaseException):
2003+
for proto in protocols:
2004+
s = self.dumps(t, proto)
2005+
u = self.loads(s)
2006+
if proto <= 2 and issubclass(t, OSError) and t is not BlockingIOError:
2007+
self.assertIs(u, OSError)
2008+
elif proto <= 2 and issubclass(t, ImportError):
2009+
self.assertIs(u, ImportError)
2010+
else:
2011+
self.assertIs(u, t)
2012+
2013+
# TODO: RUSTPYTHON
2014+
@unittest.expectedFailure
2015+
def test_builtin_functions(self):
2016+
for t in builtins.__dict__.values():
2017+
if isinstance(t, types.BuiltinFunctionType):
2018+
for proto in protocols:
2019+
s = self.dumps(t, proto)
2020+
self.assertIs(self.loads(s), t)
2021+
19742022
# Tests for protocol 2
19752023

19762024
def test_proto(self):
@@ -2370,13 +2418,17 @@ def test_reduce_calls_base(self):
23702418
y = self.loads(s)
23712419
self.assertEqual(y._reduce_called, 1)
23722420

2421+
# TODO: RUSTPYTHON
2422+
@unittest.expectedFailure
23732423
@no_tracing
23742424
def test_bad_getattr(self):
23752425
# Issue #3514: crash when there is an infinite loop in __getattr__
23762426
x = BadGetattr()
2377-
for proto in protocols:
2427+
for proto in range(2):
23782428
with support.infinite_recursion():
23792429
self.assertRaises(RuntimeError, self.dumps, x, proto)
2430+
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2431+
s = self.dumps(x, proto)
23802432

23812433
def test_reduce_bad_iterator(self):
23822434
# Issue4176: crash when 4th and 5th items of __reduce__()
@@ -2536,6 +2588,7 @@ def check_frame_opcodes(self, pickled):
25362588
self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
25372589

25382590
@support.skip_if_pgo_task
2591+
@support.requires_resource('cpu')
25392592
def test_framing_many_objects(self):
25402593
obj = list(range(10**5))
25412594
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
@@ -3024,6 +3077,67 @@ def check_array(arr):
30243077
# 2-D, non-contiguous
30253078
check_array(arr[::2])
30263079

3080+
def test_evil_class_mutating_dict(self):
3081+
# https://github.com/python/cpython/issues/92930
3082+
from random import getrandbits
3083+
3084+
global Bad
3085+
class Bad:
3086+
def __eq__(self, other):
3087+
return ENABLED
3088+
def __hash__(self):
3089+
return 42
3090+
def __reduce__(self):
3091+
if getrandbits(6) == 0:
3092+
collection.clear()
3093+
return (Bad, ())
3094+
3095+
for proto in protocols:
3096+
for _ in range(20):
3097+
ENABLED = False
3098+
collection = {Bad(): Bad() for _ in range(20)}
3099+
for bad in collection:
3100+
bad.bad = bad
3101+
bad.collection = collection
3102+
ENABLED = True
3103+
try:
3104+
data = self.dumps(collection, proto)
3105+
self.loads(data)
3106+
except RuntimeError as e:
3107+
expected = "changed size during iteration"
3108+
self.assertIn(expected, str(e))
3109+
3110+
def test_evil_pickler_mutating_collection(self):
3111+
# https://github.com/python/cpython/issues/92930
3112+
if not hasattr(self, "pickler"):
3113+
raise self.skipTest(f"{type(self)} has no associated pickler type")
3114+
3115+
global Clearer
3116+
class Clearer:
3117+
pass
3118+
3119+
def check(collection):
3120+
class EvilPickler(self.pickler):
3121+
def persistent_id(self, obj):
3122+
if isinstance(obj, Clearer):
3123+
collection.clear()
3124+
return None
3125+
pickler = EvilPickler(io.BytesIO(), proto)
3126+
try:
3127+
pickler.dump(collection)
3128+
except RuntimeError as e:
3129+
expected = "changed size during iteration"
3130+
self.assertIn(expected, str(e))
3131+
3132+
for proto in protocols:
3133+
check([Clearer()])
3134+
check([Clearer(), Clearer()])
3135+
check({Clearer()})
3136+
check({Clearer(), Clearer()})
3137+
check({Clearer(): 1})
3138+
check({Clearer(): 1, Clearer(): 2})
3139+
check({1: Clearer(), 2: Clearer()})
3140+
30273141

30283142
class BigmemPickleTests:
30293143

@@ -3363,6 +3477,84 @@ def __init__(self): pass
33633477
self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
33643478
self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
33653479

3480+
def test_unpickler_bad_file(self):
3481+
# bpo-38384: Crash in _pickle if the read attribute raises an error.
3482+
def raises_oserror(self, *args, **kwargs):
3483+
raise OSError
3484+
@property
3485+
def bad_property(self):
3486+
1/0
3487+
3488+
# File without read and readline
3489+
class F:
3490+
pass
3491+
self.assertRaises((AttributeError, TypeError), self.Unpickler, F())
3492+
3493+
# File without read
3494+
class F:
3495+
readline = raises_oserror
3496+
self.assertRaises((AttributeError, TypeError), self.Unpickler, F())
3497+
3498+
# File without readline
3499+
class F:
3500+
read = raises_oserror
3501+
self.assertRaises((AttributeError, TypeError), self.Unpickler, F())
3502+
3503+
# File with bad read
3504+
class F:
3505+
read = bad_property
3506+
readline = raises_oserror
3507+
self.assertRaises(ZeroDivisionError, self.Unpickler, F())
3508+
3509+
# File with bad readline
3510+
class F:
3511+
readline = bad_property
3512+
read = raises_oserror
3513+
self.assertRaises(ZeroDivisionError, self.Unpickler, F())
3514+
3515+
# File with bad readli F438 ne, no read
3516+
class F:
3517+
readline = bad_property
3518+
self.assertRaises(ZeroDivisionError, self.Unpickler, F())
3519+
3520+
# File with bad read, no readline
3521+
class F:
3522+
read = bad_property
3523+
self.assertRaises((AttributeError, ZeroDivisionError), self.Unpickler, F())
3524+
3525+
# File with bad peek
3526+
class F:
3527+
peek = bad_property
3528+
read = raises_oserror
3529+
readline = raises_oserror
3530+
try:
3531+
self.Unpickler(F())
3532+
except ZeroDivisionError:
3533+
pass
3534+
3535+
# File with bad readinto
3536+
class F:
3537+
readinto = bad_property
3538+
read = raises_oserror
3539+
readline = raises_oserror
3540+
try:
3541+
self.Unpickler(F())
3542+
except ZeroDivisionError:
3543+
pass
3544+
3545+
def test_pickler_bad_file(self):
3546+
# File without write
3547+
class F:
3548+
pass
3549+
self.assertRaises(TypeError, self.Pickler, F())
3550+
3551+
# File with bad write
3552+
class F:
3553+
@property
3554+
def write(self):
3555+
1/0
3556+
self.assertRaises(ZeroDivisionError, self.Pickler, F())
3557+
33663558
def check_dumps_loads_oob_buffers(self, dumps, loads):
33673559
# No need to do the full gamut of tests here, just enough to
33683560
# check that dumps() and loads() redirect their arguments

0 commit comments

Comments
 (0)
0