@@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):
590
590
591
591
_BINARY_FORMAT = {1 : 'B' , 2 : 'H' , 4 : 'L' , 8 : 'Q' }
592
592
593
+ _undefined = object ()
594
+
593
595
class _BinaryPlistParser :
594
596
"""
595
597
Read or write a binary plist file, following the description of the binary
@@ -620,7 +622,8 @@ def parse(self, fp):
620
622
) = struct .unpack ('>6xBBQQQ' , trailer )
621
623
self ._fp .seek (offset_table_offset )
622
624
self ._object_offsets = self ._read_ints (num_objects , offset_size )
623
- return self ._read_object (self ._object_offsets [top_object ])
625
+ self ._objects = [_undefined ] * num_objects
626
+ return self ._read_object (top_object )
624
627
625
628
except (OSError , IndexError , struct .error ):
626
629
raise InvalidFileException ()
@@ -646,71 +649,77 @@ def _read_ints(self, n, size):
646
649
def _read_refs (self , n ):
647
650
return self ._read_ints (n , self ._ref_size )
648
651
649
- def _read_object (self , offset ):
652
+ def _read_object (self , ref ):
650
653
"""
651
- read the object at offset .
654
+ read the object by reference .
652
655
653
656
May recursively read sub-objects (content of an array/dict/set)
654
657
"""
658
+ result = self ._objects [ref ]
659
+ if result is not _undefined :
660
+ return result
661
+
662
+ offset = self ._object_offsets [ref ]
655
663
self ._fp .seek (offset )
656
664
token = self ._fp .read (1 )[0 ]
657
665
tokenH , tokenL = token & 0xF0 , token & 0x0F
658
666
659
667
if token == 0x00 :
660
- return None
668
+ result = None
661
669
662
670
elif token == 0x08 :
663
- return False
671
+ result = False
664
672
665
673
elif token == 0x09 :
666
- return True
674
+ result = True
667
675
668
676
# The referenced source code also mentions URL (0x0c, 0x0d) and
669
677
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
670
678
671
679
elif token == 0x0f :
672
- return b''
680
+ result = b''
673
681
674
682
elif tokenH == 0x10 : # int
675
- return int .from_bytes (self ._fp .read (1 << tokenL ),
676
- 'big' , signed = tokenL >= 3 )
683
+ result = int .from_bytes (self ._fp .read (1 << tokenL ),
684
+ 'big' , signed = tokenL >= 3 )
677
685
678
686
elif token == 0x22 : # real
679
- return struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
687
+ result = struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
680
688
681
689
elif token == 0x23 : # real
682
- return struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
690
+ result = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
683
691
684
692
elif token == 0x33 : # date
685
693
f = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
686
694
# timestamp 0 of binary plists corresponds to 1/1/2001
687
695
# (year of Mac OS X 10.0), instead of 1/1/1970.
688
- return datetime .datetime .utcfromtimestamp (f + (31 * 365 + 8 ) * 86400 )
696
+ result = datetime .datetime .utcfromtimestamp (f + (31 * 365 + 8 ) * 86400 )
689
697
690
698
elif tokenH == 0x40 : # data
691
699
s = self ._get_size (tokenL )
692
700
if self ._use_builtin_types :
693
- return self ._fp .read (s )
701
+ result = self ._fp .read (s )
694
702
else :
695
- return Data (self ._fp .read (s ))
703
+ result = Data (self ._fp .read (s ))
696
704
697
705
elif tokenH == 0x50 : # ascii string
698
706
s = self ._get_size (tokenL )
699
707
result = self ._fp .read (s ).decode ('ascii' )
700
- return result
708
+ result = result
701
709
702
710
elif tokenH == 0x60 : # unicode string
703
711
s = self ._get_size (tokenL )
704
- return self ._fp .read (s * 2 ).decode ('utf-16be' )
712
+ result = self ._fp .read (s * 2 ).decode ('utf-16be' )
705
713
706
714
# tokenH == 0x80 is documented as 'UID' and appears to be used for
707
715
# keyed-archiving, not in plists.
708
716
709
717
elif tokenH == 0xA0 : # array
710
718
s = self ._get_size (tokenL )
711
719
obj_refs = self ._read_refs (s )
712
- return [self ._read_object (self ._object_offsets [x ])
713
- for x in obj_refs ]
720
+ result = []
721
+ self ._objects [ref ] = result
722
+ result .extend (self ._read_object (x ) for x in obj_refs )
714
723
715
724
# tokenH == 0xB0 is documented as 'ordset', but is not actually
716
725
# implemented in the Apple reference code.
@@ -723,12 +732,15 @@ def _read_object(self, offset):
723
732
key_refs = self ._read_refs (s )
724
733
obj_refs = self ._read_refs (s )
725
734
result = self ._dict_type ()
735
+ self ._objects [ref ] = result
726
736
for k , o in zip (key_refs , obj_refs ):
727
- result [self ._read_object (self ._object_offsets [k ])
728
- ] = self ._read_object (self ._object_offsets [o ])
729
- return result
737
+ result [self ._read_object (k )] = self ._read_object (o )
730
738
731
- raise InvalidFileException ()
739
+ else :
740
+ raise InvalidFileException ()
741
+
742
+ self ._objects [ref ] = result
743
+ return result
732
744
733
745
def _count_to_size (count ):
734
746
if count < 1 << 8 :
@@ -743,6 +755,8 @@ def _count_to_size(count):
743
755
else :
744
756
return 8
745
757
758
+ _scalars = (str , int , float , datetime .datetime , bytes )
759
+
746
760
class _BinaryPlistWriter (object ):
747
761
def __init__ (self , fp , sort_keys , skipkeys ):
748
762
self ._fp = fp
@@ -798,24 +812,25 @@ def _flatten(self, value):
798
812
# First check if the object is in the object table, not used for
799
813
# containers to ensure that two subcontainers with the same contents
800
814
# will be serialized as distinct values.
801
- if isinstance (value , (
802
- str , int , float , datetime .datetime , bytes , bytearray )):
815
+ if isinstance (value , _scalars ):
803
816
if (type (value ), value ) in self ._objtable :
804
817
return
805
818
806
819
elif isinstance (value , Data ):
807
820
if (type (value .data ), value .data ) in self ._objtable :
808
821
return
809
822
823
+ elif id (value ) in self ._objidtable :
824
+ return
825
+
810
826
# Add to objectreference map
811
827
refnum = len (self ._objlist )
812
828
self ._objlist .append (value )
813
- try :
814
- if isinstance (value , Data ):
815
- self ._objtable [(type (value .data ), value .data )] = refnum
816
- else :
817
- self ._objtable [(type (value ), value )] = refnum
818
- except TypeError :
829
+ if isinstance (value , _scalars ):
830
+ self ._objtable [(type (value ), value )] = refnum
831
+ elif isinstance (value , Data ):
832
+ self ._objtable [(type (value .data ), value .data )] = refnum
833
+ else :
819
834
self ._objidtable [id (value )] = refnum
820
835
821
836
# And finally recurse into containers
@@ -842,12 +857,11 @@ def _flatten(self, value):
842
857
self ._flatten (o )
843
858
844
859
def _getrefnum (self , value ):
845
- try :
846
- if isinstance (value , Data ):
847
- return self ._objtable [(type (value .data ), value .data )]
848
- else :
849
- return self ._objtable [(type (value ), value )]
850
- except TypeError :
860
+ if isinstance (value , _scalars ):
861
+ return self ._objtable [(type (value ), value )]
862
+ elif isinstance (value , Data ):
863
+ return self ._objtable [(type (value .data ), value .data )]
864
+ else :
851
865
return self ._objidtable [id (value )]
852
866
853
867
def _write_size (self , token , size ):
0 commit comments