8000 [3.5] bpo-32072: Fix issues with binary plists. (GH-4455) by serhiy-storchaka · Pull Request #4656 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.5] bpo-32072: Fix issues with binary plists. (GH-4455) #4656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 51 additions & 36 deletions Lib/plistlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):

_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}

_undefined = object()

class _BinaryPlistParser:
"""
Read or write a binary plist file, following the description of the binary
Expand Down Expand Up @@ -620,7 +622,8 @@ def parse(self, fp):
) = struct.unpack('>6xBBQQQ', trailer)
self._fp.seek(offset_table_offset)
self._object_offsets = self._read_ints(num_objects, offset_size)
return self._read_object(self._object_offsets[top_object])
self._objects = [_undefined] * num_objects
return self._read_object(top_object)

except (OSError, IndexError, struct.error):
raise InvalidFileException()
Expand All @@ -646,71 +649,78 @@ def _read_ints(self, n, size):
def _read_refs(self, n):
return self._read_ints(n, self._ref_size)

def _read_object(self, offset):
def _read_object(self, ref):
"""
read the object at offset.
read the object by reference.

May recursively read sub-objects (content of an array/dict/set)
"""
result = self._objects[ref]
if result is not _undefined:
return result

offset = self._object_offsets[ref]
self._fp.seek(offset)
token = self._fp.read(1)[0]
tokenH, tokenL = token & 0xF0, token & 0x0F

if token == 0x00:
return None
result = None

elif token == 0x08:
return False
result = False

elif token == 0x09:
return True
result = True

# The referenced source code also mentions URL (0x0c, 0x0d) and
# UUID (0x0e), but neither can be generated using the Cocoa libraries.

elif token == 0x0f:
return b''
result = b''

elif tokenH == 0x10: # int
return int.from_bytes(self._fp.read(1 << tokenL),
'big', signed=tokenL >= 3)
result = int.from_bytes(self._fp.read(1 << tokenL),
'big', signed=tokenL >= 3)

elif token == 0x22: # real
return struct.unpack('>f', self._fp.read(4))[0]
result = struct.unpack('>f', self._fp.read(4))[0]

elif token == 0x23: # real
return struct.unpack('>d', self._fp.read(8))[0]
result = struct.unpack('>d', self._fp.read(8))[0]

elif token == 0x33: # date
f = struct.unpack('>d', self._fp.read(8))[0]
# timestamp 0 of binary plists corresponds to 1/1/2001
# (year of Mac OS X 10.0), instead of 1/1/1970.
return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
result = (datetime.datetime(2001, 1, 1) +
datetime.timedelta(seconds=f))

elif tokenH == 0x40: # data
s = self._get_size(tokenL)
if self._use_builtin_types:
return self._fp.read(s)
result = self._fp.read(s)
else:
return Data(self._fp.read(s))
result = Data(self._fp.read(s))

elif tokenH == 0x50: # ascii string
s = self._get_size(tokenL)
result = self._fp.read(s).decode('ascii')
return result
result = result

elif tokenH == 0x60: # unicode string
s = self._get_size(tokenL)
return self._fp.read(s * 2).decode('utf-16be')
result = self._fp.read(s * 2).decode('utf-16be')

# tokenH == 0x80 is documented as 'UID' and appears to be used for
# keyed-archiving, not in plists.

elif tokenH == 0xA0: # array
s = self._get_size(tokenL)
obj_refs = self._read_refs(s)
return [self._read_object(self._object_offsets[x])
for x in obj_refs]
result = []
self._objects[ref] = result
result.extend(self._read_object(x) for x in obj_refs)

# tokenH == 0xB0 is documented as 'ordset', but is not actually
# implemented in the Apple reference code.
Expand All @@ -723,12 +733,15 @@ def _read_object(self, offset):
key_refs = self._read_refs(s)
obj_refs = self._read_refs(s)
result = self._dict_type()
self._objects[ref] = result
for k, o in zip(key_refs, obj_refs):
result[self._read_object(self._object_offsets[k])
] = self._read_object(self._object_offsets[o])
return result
result[self._read_object(k)] = self._read_object(o)

raise InvalidFileException()
else:
raise InvalidFileException()

self._objects[ref] = result
return result

def _count_to_size(count):
if count < 1 << 8:
Expand All @@ -743,6 +756,8 @@ def _count_to_size(count):
else:
return 8

_scalars = (str, int, float, datetime.datetime, bytes)

class _BinaryPlistWriter (object):
def __init__(self, fp, sort_keys, skipkeys):
self._fp = fp
Expand Down Expand Up @@ -798,24 +813,25 @@ def _flatten(self, value):
# First check if the object is in the object table, not used for
# containers to ensure that two subcontainers with the same contents
# will be serialized as distinct values.
if isinstance(value, (
str, int, float, datetime.datetime, bytes, bytearray)):
if isinstance(value, _scalars):
if (type(value), value) in self._objtable:
return

elif isinstance(value, Data):
if (type(value.data), value.data) in self._objtable:
return

elif id(value) in self._objidtable:
return

# Add to objectreference map
refnum = len(self._objlist)
self._objlist.append(value)
try:
if isinstance(value, Data):
self._objtable[(type(value.data), value.data)] = refnum
else:
self._objtable[(type(value), value)] = refnum
except TypeError:
if isinstance(value, _scalars):
self._objtable[(type(value), value)] = refnum
elif isinstance(value, Data):
self._objtable[(type(value.data), value.data)] = refnum
else:
self._objidtable[id(value)] = refnum

# And finally recurse into containers
Expand All @@ -842,12 +858,11 @@ def _flatten(self, value):
self._flatten(o)

def _getrefnum(self, value):
try:
if isinstance(value, Data):
return self._objtable[(type(value.data), value.data)]
else:
return self._objtable[(type(value), value)]
except TypeError:
if isinstance(value, _scalars):
return self._objtable[(type(value), value)]
elif isinstance(value, Data):
return self._objtable[(type(value.data), value.data)]
else:
return self._objidtable[id(value)]

def _write_size(self, token, size):
Expand Down
58 changes: 57 additions & 1 deletion Lib/test/test_plistlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ def test_int(self):
self.assertRaises(OverflowError, plistlib.dumps,
pl, fmt=fmt)

def test_bytearray(self):
for pl in (b'<binary gunk>', b"<lots of binary gunk>\0\1\2\3" * 10):
for fmt in ALL_FORMATS:
with self.subTest(pl=pl, fmt=fmt):
data = plistlib.dumps(bytearray(pl), fmt=fmt)
pl2 = plistlib.loads(data)
self.assertIsInstance(pl2, bytes)
self.assertEqual(pl2, pl)
data2 = plistlib.dumps(pl2, fmt=fmt)
self.assertEqual(data, data2)

def test_bytes(self):
pl = self._create()
data = plistlib.dumps(pl)
Expand Down Expand Up @@ -311,7 +322,8 @@ def test_tuple_members(self):
'second': [1, 2],
'third': [3, 4],
})
self.assertIsNot(pl2['first'], pl2['second'])
if fmt != plistlib.FMT_BINARY:
self.assertIsNot(pl2['first'], pl2['second'])

def test_list_members(self):
pl = {
Expand Down Expand Up @@ -423,6 +435,9 @@ def test_xml_encodings(self):
pl2 = plistlib.loads(data)
self.assertEqual(dict(pl), dict(pl2))


class TestBinaryPlistlib(unittest.TestCase):

def test_nonstandard_refs_size(self):
# Issue #21538: Refs and offsets are 24-bit integers
data = (b'bplist00'
Expand All @@ -435,6 +450,47 @@ def test_nonstandard_refs_size(self):
b'\x00\x00\x00\x00\x00\x00\x00\x13')
self.assertEqual(plistlib.loads(data), {'a': 'b'})

def test_dump_duplicates(self):
# Test effectiveness of saving duplicated objects
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
datetime.datetime(2004, 10, 26, 10, 33, 33),
plistlib.Data(b'abcde'), bytearray(b'abcde'),
[12, 345], (12, 345), {'12': 345}):
with self.subTest(x=x):
data = plistlib.dumps([x]*1000, fmt=plistlib.FMT_BINARY)
self.assertLess(len(data), 1100, repr(data))

def test_identity(self):
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
datetime.datetime(2004, 10, 26, 10, 33, 33),
plistlib.Data(b'abcde'), bytearray(b'abcde'),
[12, 345], (12, 345), {'12': 345}):
with self.subTest(x=x):
data = plistlib.dumps([x]*2, fmt=plistlib.FMT_BINARY)
a, b = plistlib.loads(data)
if isinstance(x, tuple):
x = list(x)
self.assertEqual(a, x)
self.assertEqual(b, x)
self.assertIs(a, b)

def test_cycles(self):
# recursive list
a = []
a.append(a)
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
self.assertIs(b[0], b)
# recursive tuple
a = ([],)
a[0].append(a)
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
self.assertIs(b[0][0], b)
# recursive dict
a = {}
a['x'] = a
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
self.assertIs(b['x'], b)

def test_large_timestamp(self):
# Issue #26709: 32-bit timestamp out of range
for ts in -2**31-1, 2**31:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fixed issues with binary plists:

* Fixed saving bytearrays.
* Identical objects will be saved only once.
* Equal references will be load as identical objects.
* Added support for saving and loading recursive data structures.
0