@@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):
590
590
591
591
_BINARY_FORMAT = {1 : 'B' , 2 : 'H' , 4 : 'L' , 8 : 'Q' }
592
592
593
+ _undefined = object ()
594
+
593
595
class _BinaryPlistParser :
594
596
"""
595
597
Read or write a binary plist file, following the description of the binary
@@ -620,7 +622,8 @@ def parse(self, fp):
620
622
) = struct .unpack ('>6xBBQQQ' , trailer )
621
623
self ._fp .seek (offset_table_offset )
622
624
self ._object_offsets = self ._read_ints (num_objects , offset_size )
623
- return self ._read_object (self ._object_offsets [top_object ])
625
+ self ._objects = [_undefined ] * num_objects
626
+ return self ._read_object (top_object )
624
627
625
628
except (OSError , IndexError , struct .error ):
626
629
raise InvalidFileException ()
@@ -646,71 +649,78 @@ def _read_ints(self, n, size):
646
649
def _read_refs (self , n ):
647
650
return self ._read_ints (n , self ._ref_size )
648
651
649
- def _read_object (self , offset ):
652
+ def _read_object (self , ref ):
650
653
"""
651
- read the object at offset .
654
+ read the object by reference .
652
655
653
656
May recursively read sub-objects (content of an array/dict/set)
654
657
"""
658
+ result = self ._objects [ref ]
659
+ if result is not _undefined :
660
+ return result
661
+
662
+ offset = self ._object_offsets [ref ]
655
663
self ._fp .seek (offset )
656
664
token = self ._fp .read (1 )[0 ]
657
665
tokenH , tokenL = token & 0xF0 , token & 0x0F
658
666
659
667
if token == 0x00 :
660
- return None
668
+ result = None
661
669
662
670
elif token == 0x08 :
663
- return False
671
+ result = False
664
672
665
673
elif token == 0x09 :
666
- return True
674
+ result = True
667
675
668
676
# The referenced source code also mentions URL (0x0c, 0x0d) and
669
677
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
670
678
671
679
elif token == 0x0f :
672
- return b''
680
+ result = b''
673
681
674
682
elif tokenH == 0x10 : # int
675
- return int .from_bytes (self ._fp .read (1 << tokenL ),
676
- 'big' , signed = tokenL >= 3 )
683
+ result = int .from_bytes (self ._fp .read (1 << tokenL ),
684
+ 'big' , signed = tokenL >= 3 )
677
685
678
686
elif token == 0x22 : # real
679
- return struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
687
+ result = struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
680
688
681
689
elif token == 0x23 : # real
682
- return struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
690
+ result = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
683
691
684
692
elif token == 0x33 : # date
685
693
f = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
686
694
# timestamp 0 of binary plists corresponds to 1/1/2001
687
695
# (year of Mac OS X 10.0), instead of 1/1/1970.
688
- return datetime .datetime (2001 , 1 , 1 ) + datetime .timedelta (seconds = f )
696
+ result = (datetime .datetime (2001 , 1 , 1 ) +
697
+ datetime .timedelta (seconds = f ))
689
698
690
699
elif tokenH == 0x40 : # data
691
700
s = self ._get_size (tokenL )
692
701
if self ._use_builtin_types :
693
- return self ._fp .read (s )
702
+ result = self ._fp .read (s )
694
703
else :
695
- return Data (self ._fp .read (s ))
704
+ result = Data (self ._fp .read (s ))
696
705
697
706
elif tokenH == 0x50 : # ascii string
698
707
s = self ._get_size (tokenL )
699
708
result = self ._fp .read (s ).decode ('ascii' )
700
- return result
709
+ result = result
701
710
702
711
elif tokenH == 0x60 : # unicode string
703
712
s = self ._get_size (tokenL )
704
- return self ._fp .read (s * 2 ).decode ('utf-16be' )
713
+ result = self ._fp .read (s * 2 ).decode ('utf-16be' )
705
714
706
715
# tokenH == 0x80 is documented as 'UID' and appears to be used for
707
716
# keyed-archiving, not in plists.
708
717
709
718
elif tokenH == 0xA0 : # array
710
719
s = self ._get_size (tokenL )
711
720
obj_refs = self ._read_refs (s )
712
- return [self ._read_object (self ._object_offsets [x ])
713
- for x in obj_refs ]
721
+ result = []
722
+ self ._objects [ref ] = result
723
+ result .extend (self ._read_object (x ) for x in obj_refs )
714
724
715
725
# tokenH == 0xB0 is documented as 'ordset', but is not actually
716
726
# implemented in the Apple reference code.
@@ -723,12 +733,15 @@ def _read_object(self, offset):
723
733
key_refs = self ._read_refs (s )
724
734
obj_refs = self ._read_refs (s )
725
735
result = self ._dict_type ()
736
+ self ._objects [ref ] = result
726
737
for k , o in zip (key_refs , obj_refs ):
727
- result [self ._read_object (self ._object_offsets [k ])
728
- ] = self ._read_object (self ._object_offsets [o ])
729
- return result
738
+ result [self ._read_object (k )] = self ._read_object (o )
730
739
731
- raise InvalidFileException ()
740
+ else :
741
+ raise InvalidFileException ()
742
+
743
+ self ._objects [ref ] = result
744
+ return result
732
745
733
746
def _count_to_size (count ):
734
747
if count < 1 << 8 :
@@ -743,6 +756,8 @@ def _count_to_size(count):
743
756
else :
744
757
return 8
745
758
759
+ _scalars = (str , int , float , datetime .datetime , bytes )
760
+
746
761
class _BinaryPlistWriter (object ):
747
762
def __init__ (self , fp , sort_keys , skipkeys ):
748
763
self ._fp = fp
@@ -798,24 +813,25 @@ def _flatten(self, value):
798
813
# First check if the object is in the object table, not used for
799
814
# containers to ensure that two subcontainers with the same contents
800
815
# will be serialized as distinct values.
801
- if isinstance (value , (
802
- str , int , float , datetime .datetime , bytes , bytearray )):
816
+ if isinstance (value , _scalars ):
803
817
if (type (value ), value ) in self ._objtable :
804
818
return
805
819
806
820
elif isinstance (value , Data ):
807
821
if (type (value .data ), value .data ) in self ._objtable :
808
822
return
809
823
824
+ elif id (value ) in self ._objidtable :
825
+ return
826
+
810
827
# Add to objectreference map
811
828
refnum = len (self ._objlist )
812
829
self ._objlist .append (value )
813
- try :
814
- if isinstance (value , Data ):
815
- self ._objtable [(type (value .data ), value .data )] = refnum
816
- else :
817
- self ._objtable [(type (value ), value )] = refnum
818
- except TypeError :
830
+ if isinstance (value , _scalars ):
831
+ self ._objtable [(type (value ), value )] = refnum
832
+ elif isinstance (value , Data ):
833
+ self ._objtable [(type (value .data ), value .data )] = refnum
834
+ else :
819
835
self ._objidtable [id (value )] = refnum
820
836
821
837
# And finally recurse into containers
@@ -842,12 +858,11 @@ def _flatten(self, value):
842
858
self ._flatten (o )
843
859
844
860
def _getrefnum (self , value ):
845
- try :
846
- if isinstance (value , Data ):
847
- return self ._objtable [(type (value .data ), value .data )]
848
- else :
849
- return self ._objtable [(type (value ), value )]
850
- except TypeError :
861
+ if isinstance (value , _scalars ):
862
+ return self ._objtable [(type (value ), value )]
863
+ elif isinstance (value , Data ):
864
+ return self ._objtable [(type (value .data ), value .data )]
865
+ else :
851
866
return self ._objidtable [id (value )]
852
867
853
868
def _write_size (self , token , size ):
0 commit comments