8000 [3.4] bpo-32072: Fix issues with binary plists. (GH-4455) (#4658) · python/cpython@c59731d · GitHub
[go: up one dir, main page]

Skip to content

Commit c59731d

Browse files
serhiy-storchakalarryhastings
authored andcommitted
[3.4] bpo-32072: Fix issues with binary plists. (GH-4455) (#4658)
* [3.4] bpo-32072: Fix issues with binary plists. (GH-4455) * Fixed saving bytearrays. * Identical objects will be saved only once. * Equal references will be load as identical objects. * Added support for saving and loading recursive data structures.. (cherry picked from commit a897aee) * Fix implementation dependent assertion in test_plistlib. (#4813) It is failed with an advanced optimizer.
1 parent 092db6c commit c59731d

File tree

3 files changed

+113
-37
lines changed

3 files changed

+113
-37
lines changed

Lib/plistlib.py

Lines changed: 50 additions & 36 deletions
elif token == 0x0f:
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):
590590

591591
_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
592592

593+
_undefined = object()
594+
593595
class _BinaryPlistParser:
594596
"""
595597
Read or write a binary plist file, following the description of the binary
@@ -620,7 +622,8 @@ def parse(self, fp):
620622
) = struct.unpack('>6xBBQQQ', trailer)
621623
self._fp.seek(offset_table_offset)
622624
self._object_offsets = self._read_ints(num_objects, offset_size)
623-
return self._read_object(self._object_offsets[top_object])
625+
self._objects = [_undefined] * num_objects
626+
return self._read_object(top_object)
624627

625628
except (OSError, IndexError, struct.error):
626629
raise InvalidFileException()
@@ -646,71 +649,77 @@ def _read_ints(self, n, size):
646649
def _read_refs(self, n):
647650
return self._read_ints(n, self._ref_size)
648651

649-
def _read_object(self, offset):
652+
def _read_object(self, ref):
650653
"""
651-
read the object at offset.
654+
read the object by reference.
652655
653656
May recursively read sub-objects (content of an array/dict/set)
654657
"""
658+
result = self._objects[ref]
659+
if result is not _undefined:
660+
return result
661+
662+
offset = self._object_offsets[ref]
655663
self._fp.seek(offset)
656664
token = self._fp.read(1)[0]
657665
tokenH, tokenL = token & 0xF0, token & 0x0F
658666

659667
if token == 0x00:
660-
return None
668+
result = None
661669

662670
elif token == 0x08:
663-
return False
671+
result = False
664672

665673
elif token == 0x09:
666-
return True
674+
result = True
667675

668676
# The referenced source code also mentions URL (0x0c, 0x0d) and
669677
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
670678

671679
672-
return b''
680+
result = b''
673681

674682
elif tokenH == 0x10: # int
675-
return int.from_bytes(self._fp.read(1 << tokenL),
676-
'big', signed=tokenL >= 3)
683+
result = int.from_bytes(self._fp.read(1 << tokenL),
684+
'big', signed=tokenL >= 3)
677685

678686
elif token == 0x22: # real
679-
return struct.unpack('>f', self._fp.read(4))[0]
687+
result = struct.unpack('>f', self._fp.read(4))[0]
680688

681689
elif token == 0x23: # real
682-
return struct.unpack('>d', self._fp.read(8))[0]
690+
result = struct.unpack('>d', self._fp.read(8))[0]
683691

684692
elif token == 0x33: # date
685693
f = struct.unpack('>d', self._fp.read(8))[0]
686694
# timestamp 0 of binary plists corresponds to 1/1/2001
687695
# (year of Mac OS X 10.0), instead of 1/1/1970.
688-
return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
696+
result = datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400)
689697

690698
elif tokenH == 0x40: # data
691699
s = self._get_size(tokenL)
692700
if self._use_builtin_types:
693-
return self._fp.read(s)
701+
result = self._fp.read(s)
694702
else:
695-
return Data(self._fp.read(s))
703+
result = Data(self._fp.read(s))
696704

697705
elif tokenH == 0x50: # ascii string
698706
s = self._get_size(tokenL)
699707
result = self._fp.read(s).decode('ascii')
700-
return result
708+
result = result
701709

702710
elif tokenH == 0x60: # unicode string
703711
s = self._get_size(tokenL)
704-
return self._fp.read(s * 2).decode('utf-16be')
712+
result = self._fp.read(s * 2).decode('utf-16be')
705713

706714
# tokenH == 0x80 is documented as 'UID' and appears to be used for
707715
# keyed-archiving, not in plists.
708716

709717
elif tokenH == 0xA0: # array
710718
s = self._get_size(tokenL)
711719
obj_refs = self._read_refs(s)
712-
return [self._read_object(self._object_offsets[x])
713-
for x in obj_refs]
720+
result = []
721+
self._objects[ref] = result
722+
result.extend(self._read_object(x) for x in obj_refs)
714723

715724
# tokenH == 0xB0 is documented as 'ordset', but is not actually
716725
# implemented in the Apple reference code.
@@ -723,12 +732,15 @@ def _read_object(self, offset):
723732
key_refs = self._read_refs(s)
724733
obj_refs = self._read_refs(s)
725734
result = self._dict_type()
735+
self._objects[ref] = result
726736
for k, o in zip(key_refs, obj_refs):
727-
result[self._read_object(self._object_offsets[k])
728-
] = self._read_object(self._object_offsets[o])
729-
return result
737+
result[self._read_object(k)] = self._read_object(o)
730738

731-
raise InvalidFileException()
739+
else:
740+
raise InvalidFileException()
741+
742+
self._objects[ref] = result
743+
return result
732744

733745
def _count_to_size(count):
734746
if count < 1 << 8:
@@ -743,6 +755,8 @@ def _count_to_size(count):
743755
else:
744756
return 8
745757

758+
_scalars = (str, int, float, datetime.datetime, bytes)
759+
746760
class _BinaryPlistWriter (object):
747761
def __init__(self, fp, sort_keys, skipkeys):
748762
self._fp = fp
@@ -798,24 +812,25 @@ def _flatten(self, value):
798812
# First check if the object is in the object table, not used for
799813
# containers to ensure that two subcontainers with the same contents
800814
# will be serialized as distinct values.
801-
if isinstance(value, (
802-
str, int, float, datetime.datetime, bytes, bytearray)):
815+
if isinstance(value, _scalars):
803816
if (type(value), value) in self._objtable:
804817
return
805818

806819
elif isinstance(value, Data):
807820
if (type(value.data), value.data) in self._objtable:
808821
return
809822

823+
elif id(value) in self._objidtable:
824+
return
825+
810826
# Add to objectreference map
811827
refnum = len(self._objlist)
812828
self._objlist.append(value)
813-
try:
814-
if isinstance(value, Data):
815-
self._objtable[(type(value.data), value.data)] = refnum
816-
else:
817-
self._objtable[(type(value), value)] = refnum
818-
except TypeError:
829+
if isinstance(value, _scalars):
830+
self._objtable[(type(value), value)] = refnum
831+
elif isinstance(value, Data):
832+
self._objtable[(type(value.data), value.data)] = refnum
833+
else:
819834
self._objidtable[id(value)] = refnum
820835

821836
# And finally recurse into containers
@@ -842,12 +857,11 @@ def _flatten(self, value):
842857
self._flatten(o)
843858

844859
def _getrefnum(self, value):
845-
try:
846-
if isinstance(value, Data):
847-
return self._objtable[(type(value.data), value.data)]
848-
else:
849-
return self._objtable[(type(value), value)]
850-
except TypeError:
860+
if isinstance(value, _scalars):
861+
return self._objtable[(type(value), value)]
862+
elif isinstance(value, Data):
863+
return self._objtable[(type(value.data), value.data)]
864+
else:
851865
return self._objidtable[id(value)]
852866

853867
def _write_size(self, token, size):

Lib/test/test_plistlib.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,17 @@ def test_int(self):
170170
self.assertRaises(OverflowError, plistlib.dumps,
171171
pl, fmt=fmt)
172172

173+
def test_bytearray(self):
174+
for pl in (b'<binary gunk>', b"<lots of binary gunk>\0\1\2\3" * 10):
175+
for fmt in ALL_FORMATS:
176+
with self.subTest(pl=pl, fmt=fmt):
177+
data = plistlib.dumps(bytearray(pl), fmt=fmt)
178+
pl2 = plistlib.loads(data)
179+
self.assertIsInstance(pl2, bytes)
180+
self.assertEqual(pl2, pl)
181+
data2 = plistlib.dumps(pl2, fmt=fmt)
182+
self.assertEqual(data, data2)
183+
173184
def test_bytes(self):
174185
pl = self._create()
175186
data = plistlib.dumps(pl)
@@ -311,7 +322,8 @@ def test_tuple_members(self):
311322
'second': [1, 2],
312323
'third': [3, 4],
313324
})
314-
self.assertIsNot(pl2['first'], pl2['second'])
325+
if fmt != plistlib.FMT_BINARY:
326+
self.assertIsNot(pl2['first'], pl2['second'])
315327

316328
def test_list_members(self):
317329
pl = {
@@ -416,6 +428,9 @@ def test_xml_encodings(self):
416428
pl2 = plistlib.loads(data)
417429
self.assertEqual(dict(pl), dict(pl2))
418430

431+
432+
class TestBinaryPlistlib(unittest.TestCase):
433+
419434
def test_nonstandard_refs_size(self):
420435
# Issue #21538: Refs and offsets are 24-bit integers
421436
data = (b'bplist00'
@@ -428,6 +443,47 @@ def test_nonstandard_refs_size(self):
428443
b'\x00\x00\x00\x00\x00\x00\x00\x13')
429444
self.assertEqual(plistlib.loads(data), {'a': 'b'})
430445

446+
def test_dump_duplicates(self):
447+
# Test effectiveness of saving duplicated objects
448+
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
449+
datetime.datetime(2004, 10, 26, 10, 33, 33),
450+
plistlib.Data(b'abcde'), bytearray(b'abcde'),
451+
[12, 345], (12, 345), {'12': 345}):
452+
with self.subTest(x=x):
453+
data = plistlib.dumps([x]*1000, fmt=plistlib.FMT_BINARY)
454+
self.assertLess(len(data), 1100, repr(data))
455+
456+
def test_identity(self):
457+
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
458+
datetime.datetime(2004, 10, 26, 10, 33, 33),
459+
plistlib.Data(b'abcde'), bytearray(b'abcde'),
460+
[12, 345], (12, 345), {'12': 345}):
461+
with self.subTest(x=x):
462+
data = plistlib.dumps([x]*2, fmt=plistlib.FMT_BINARY)
463+
a, b = plistlib.loads(data)
464+
if isinstance(x, tuple):
465+
x = list(x)
466+
self.assertEqual(a, x)
467+
self.assertEqual(b, x)
468+
self.assertIs(a, b)
469+
470+
def test_cycles(self):
471+
# recursive list
472+
a = []
473+
a.append(a)
474+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
475+
self.assertIs(b[0], b)
476+
# recursive tuple
477+
a = ([],)
478+
a[0].append(a)
479+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
480+
self.assertIs(b[0][0], b)
481+
# recursive dict
482+
a = {}
483+
a['x'] = a
484+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
485+
self.assertIs(b['x'], b)
486+
431487

432488
class TestPlistlibDeprecated(unittest.TestCase):
433489
def test_io_deprecated(self):
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fixed issues with binary plists:
2+
3+
* Fixed saving bytearrays.
4+
* Identical objects will be saved only once.
5+
* Equal references will be load as identical objects.
6+
* Added support for saving and loading recursive data structures.

0 commit comments

Comments
 (0)
0