21
21
v1.1, 1993. ISBN 0-201-57044-0.
22
22
"""
23
23
24
+ import base64
24
25
import binascii
25
26
import logging
26
27
import re
35
36
_log = logging .getLogger (__name__ )
36
37
37
38
39
+ def _make_tag (set ):
40
+ """
41
+ Hash set into a six-character tag make of uppercase letters
42
+
43
+ Useful for adding a tag into subsetted fonts while keeping the code
44
+ reproducible. The function always returns the same value for the
45
+ same set on the same exact Python version but is not guaranteed to
46
+ not have collisions.
47
+
48
+ Parameters
49
+ ----------
50
+ set : iterable
51
+ The set of glyphs present in a font subset
52
+
53
+ Returns
54
+ -------
55
+ bytes
56
+ Six uppercase ASCII letters
57
+ """
58
+
59
+ # freeze the set to make it hashable, interpret the hash as bytes
60
+ array = struct .pack ("@q" , hash (frozenset (set )))
61
+ # turn the bytes into characters with b32encode, which uses uppercase
62
+ # letters and numbers from 2 to 7 - remap those arbitrarily
63
+ trans = bytes .maketrans (b'234567' , b'MTPLIB' )
64
+ return base64 .b32encode (array ).translate (trans , delete = b'=' )[:6 ]
65
+
66
+
38
67
class _Token :
68
+
39
69
"""
40
70
A token in a PostScript stream
41
71
@@ -485,6 +515,15 @@ def convert(x): return x.decode('ascii', 'replace')
485
515
except StopIteration :
486
516
break
487
517
518
+ # there are some standard abbreviations whose names vary
519
+ # so detect them
520
+ if value == b'{noaccess def}' :
521
+ self ._abbr ['ND' ] = key .encode ('ascii' )
522
+ elif value == b'{noaccess put}' :
523
+ self ._abbr ['NP' ] = key .encode ('ascii' )
524
+ elif value == b'{string currentfile exch readstring pop}' :
525
+ self ._abbr ['RD' ] = key .encode ('ascii' )
526
+
488
527
# sometimes noaccess def and readonly def are abbreviated
489
528
if kw .is_name (b'def' , self ._abbr ['ND' ], self ._abbr ['NP' ]):
490
529
prop [key ] = value
@@ -556,13 +595,16 @@ def _parse_subrs(self, tokens, _data):
556
595
"Token preceding subr must be RD or equivalent, "
557
596
f"was { token } "
558
597
)
598
+ if not token .is_name (self ._abbr ['RD' ]):
599
+ raise RuntimeError (
600
+ f"Token preceding subr must be RD or equivalent, was { token } "
601
+ )
559
602
binary_token = tokens .send (1 + nbytes_token .numeric_value ())
560
603
array [index_token .numeric_value ()] = binary_token .value [1 :]
561
604
562
605
return array , next (tokens ).endpos ()
563
606
564
- @staticmethod
565
- def _parse_charstrings (tokens , _data ):
607
+ def _parse_charstrings (self , tokens , _data ):
566
608
count_token = next (tokens )
567
609
if not count_token .is_number ():
568
610
raise RuntimeError (
@@ -587,7 +629,11 @@ def _parse_charstrings(tokens, _data):
587
629
f"Token following /{ glyphname } in CharStrings definition "
588
630
f"must be a number, was { nbytes_token } "
589
631
)
590
- token = next (tokens ) # usually RD or |-
632
+ token = next (tokens )
633
+ if not token .is_name (self ._abbr ['RD' ]):
634
+ raise RuntimeError (
635
+ f"Token preceding charstring must be RD or equivalent, was { token } "
636
+ )
591
637
binary_token = tokens .send (1 + nbytes_token .numeric_value ())
592
638
charstrings [glyphname ] = binary_token .value [1 :]
593
639
@@ -620,16 +666,15 @@ def _parse_encoding(tokens, _data):
620
666
encoding [index_token .numeric_value ()] = \
621
667
name_token .value [1 :].decode ('ascii' , 'replace' )
622
668
623
- @staticmethod
624
- def _parse_othersubrs (tokens , data ):
669
+ def _parse_othersubrs (self , tokens , data ):
625
670
init_pos = None
626
671
while True :
627
672
token = next (tokens )
628
673
if init_pos is None :
629
674
init_pos = token .pos
630
675
if token .is_delim ():
631
676
_expression (token , tokens , data )
632
- elif token .value in (b'def' , b 'ND', b'|-' ):
677
+ elif token .value in (b'def' , self . _abbr [ 'ND' ] ):
633
678
return data [init_pos :token .endpos ()], token .endpos ()
634
679
635
680
def transform (self , effects ):
@@ -684,7 +729,7 @@ def transform(self, effects):
684
729
fontmatrix = (
685
730
'[%s]' % ' ' .join (_format_approx (x , 6 ) for x in array )
686
731
).encode ('ascii' )
687
- replacements = (
732
+ newparts = self . _replace (
688
733
[(x , b'/FontName/%s def' % fontname )
689
734
for x in self ._pos ['FontName' ]]
690
735
+ [(x , b'/ItalicAngle %a def' % italicangle )
@@ -694,6 +739,9 @@ def transform(self, effects):
694
739
+ [(x , b'' ) for x in self ._pos .get ('UniqueID' , [])]
695
740
)
696
741
742
+ return Type1Font ((newparts [0 ], self ._encrypt (newparts [1 ], 'eexec' ), self .parts [2 ]))
743
+
744
+ def _replace (self , replacements ):
697
745
data = bytearray (self .parts [0 ])
698
746
data .extend (self .decrypted )
699
747
len0 = len (self .parts [0 ])
@@ -708,11 +756,192 @@ def transform(self, effects):
708
756
len0 += len (value ) - pos1 + pos0
709
757
710
758
data = bytes (data )
711
- return Type1Font ((
712
- data [:len0 ],
713
- self ._encrypt (data [len0 :], 'eexec' ),
714
- self .parts [2 ]
715
- ))
759
+ return data [:len0 ], data [len0 :]
760
+
761
+ def subset (self , characters ):
762
+ """
763
+ Return a new font that only defines the given characters.
764
+
765
+ Parameters
766
+ ----------
767
+ characters : sequence of bytes
768
+ The subset of characters to include
769
+
770
+ Returns
771
+ -------
772
+ `Type1Font`
773
+ """
774
+
775
+ characters = set (characters )
776
+ encoding = {code : glyph
777
+ for code , glyph in self .prop ['Encoding' ].items ()
778
+ if code in characters }
779
+ encoding [0 ] = '.notdef'
780
+ # todo and done include strings (glyph names)
781
+ todo = set (encoding .values ())
782
+ done = set ()
783
+ seen_subrs = {0 , 1 , 2 , 3 }
784
+ while todo - done :
785
+ glyph = next (iter (todo - done ))
786
+ called_glyphs , called_subrs , _ , _ = self ._simulate (glyph , [], [])
787
+ todo .update (called_glyphs )
788
+ seen_subrs .update (called_subrs )
789
+ done .add (glyph )
790
+
791
+ fontname = _make_tag (todo ) + b'+' + self .prop ['FontName' ].encode ('ascii' )
792
+ charstrings = self ._subset_charstrings (todo )
793
+ subrs = self ._subset_subrs (seen_subrs )
794
+ newparts = self ._replace (
795
+ [(x , b'/FontName/%s def' % fontname ) for x in self ._pos ['FontName' ]] + [
796
+ (self ._pos ['CharStrings' ][0 ], charstrings ),
797
+ (self ._pos ['Subrs' ][0 ], subrs ),
798
+ (self ._pos ['Encoding' ][0 ], self ._subset_encoding (encoding ))
799
+ ] + [(x , b'' ) for x in self ._pos .get ('UniqueID' , [])]
800
+ )
801
+ return Type1Font ((newparts [0 ], self ._encrypt (newparts [1 ], 'eexec' ), self .parts [2 ]))
802
+
803
+ @staticmethod
804
+ def _charstring_tokens (data ):
805
+ data = iter (data )
806
+ for byte in data :
807
+ if 32 <= byte <= 246 :
808
+ yield byte - 139
809
+ elif 247 <= byte <= 250 :
810
+ byte2 = next (data )
811
+ yield (byte - 247 ) * 256 + byte2 + 108
812
+ elif 251 <= byte <= 254 :
813
+ byte2 = next (data )
814
+ yield - (byte - 251 )* 256 - byte2 - 108
815
+ elif byte == 255 :
816
+ bs = itertools .islice (data , 4 )
817
+ yield struct .unpack ('>i' , bs )[0 ]
818
+ elif byte == 12 :
819
+ byte1 = next (data )
820
+ yield {
821
+ 0 : 'dotsection' ,
822
+ 1 : 'vstem3' ,
823
+ 2 : 'hstem3' ,
824
+ 6 : 'seac' ,
825
+ 7 : 'sbw' ,
826
+ 12 : 'div' ,
827
+ 16 : 'callothersubr' ,
828
+ 17 : 'pop' ,
829
+ 33 : 'setcurrentpoint'
830
+ }[byte1 ]
831
+ else :
832
+ yield {
833
+ 1 : 'hstem' ,
834
+ 3 : 'vstem' ,
835
+ 4 : 'vmoveto' ,
836
+ 5 : 'rlineto' ,
837
+ 6 : 'hlineto' ,
838
+ 7 : 'vlineto' ,
839
+ 8 : 'rrcurveto' ,
840
+ 9 : 'closepath' ,
841
+ 10 : 'callsubr' ,
842
+ 11 : 'return' ,
843
+ 13 : 'hsbw' ,
844
+ 14 : 'endchar' ,
845
+ 21 : 'rmoveto' ,
846
+ 22 : 'hmoveto' ,
847
+ 30 : 'vhcurveto' ,
848
+ 31 : 'hvcurveto'
849
+ }[byte ]
850
+
851
+ def _step (self , buildchar_stack , postscript_stack , opcode ):
852
+ if isinstance (opcode , int ):
853
+ return set (), set (), buildchar_stack + [opcode ], postscript_stack
854
+ elif opcode in {'hsbw' , 'sbw' , 'closepath' , 'hlineto' , 'hmoveto' , 'hcurveto' , 'hvcurveto' ,
855
+ 'rlineto' , 'rmoveto' , 'rrcurveto' , 'vhcurveto' , 'vlineto' , 'vmoveto' ,
856
+ 'dotsection', 'hstem' , 'hstem3' , 'vstem' , 'vstem3' , 'setcurrentpoint' }:
857
+ return set (), set (), [], postscript_stack
858
+ elif opcode == 'seac' :
859
+ codes = buildchar_stack [3 :5 ]
860
+ glyphs = [self .prop ['Encoding' ][x ] for x in codes ]
861
+ return set (glyphs ), set (), [], postscript_stack
862
+ elif opcode == 'div' :
863
+ num1 , num2 = buildchar_stack [- 2 :]
864
+ return set (), set (), buildchar_stack [- 2 :] + [num1 / num2 ], postscript_stack
865
+ elif opcode == 'callothersubr' :
866
+ othersubr = buildchar_stack [- 1 ]
867
+ n = buildchar_stack [- 2 ]
868
+ args = buildchar_stack [- 2 - n :- 2 ]
869
+ if othersubr == 3 : # Section 8.1 in Type-1 spec
870
+ postscript_stack .append (args [0 ])
871
+ else :
872
+ postscript_stack .extend (args [::- 1 ])
873
+ return set (), set (), buildchar_stack [:- n - 2 ], postscript_stack
874
+ elif opcode == 'callsubr' :
875
+ subr = buildchar_stack [- 1 ]
876
+ glyphs , subrs , new_bc_stack , new_ps_stack = \
877
+ self ._simulate (subr , buildchar_stack [:- 1 ], postscript_stack )
878
+ return set (), subrs | {subr }, new_bc_stack , new_ps_stack
879
+ elif opcode == 'pop' :
880
+ return set (), set (), buildchar_stack + [postscript_stack [- 1 ]], postscript_stack [:- 1 ]
881
+ else :
882
+ raise RuntimeError (f'opcode { opcode } ' )
883
+
884
+ def _simulate (self , glyph_or_subr , buildchar_stack , postscript_stack ):
885
+ if isinstance (glyph_or_subr , str ):
886
+ program = self .prop ['CharStrings' ][glyph_or_subr ]
887
+ glyphs = {glyph_or_subr }
888
+ subrs = set ()
889
+ else :
890
+ program = self .prop ['Subrs' ][glyph_or_subr ]
891
+ glyphs = set ()
892
+ subrs = {glyph_or_subr }
893
+ for opcode in self ._charstring_tokens (program ):
894
+ if opcode in ('return' , 'endchar' ):
895
+ return glyphs , subrs , buildchar_stack , postscript_stack
896
+ newglyphs , newsubrs , buildchar_stack , postscript_stack = \
897
+ self ._step (buildchar_stack , postscript_stack , opcode )
898
+ glyphs .update (newglyphs )
899
+ subrs .update (newsubrs )
900
+
901
+ def _subset_encoding (self , encoding ):
902
+ data = bytearray (b'/Encoding 256 array\n 0 1 255 { 1 index exch /.notdef put } for\n ' )
903
+ for i , glyph in sorted (encoding .items ()):
904
+ if glyph == '.notdef' :
905
+ continue
906
+ data .extend (f'dup { i } /{ glyph } put\n ' .encode ('ascii' ))
907
+ data .extend (b'readonly def\n ' )
908
+ return bytes (data )
909
+
910
+ def _subset_charstrings (self , glyphs ):
911
+ data = bytearray (f'/CharStrings { len (glyphs )} dict dup begin\n ' .encode ('ascii' ))
912
+ for glyph in glyphs :
913
+ enc = self ._encrypt (self .prop ['CharStrings' ][glyph ], 'charstring' , self .prop .get ('lenIV' , 4 ))
914
+ data .extend (f'/{ glyph } { len (enc )} ' .encode ('ascii' ))
915
+ data .extend (self ._abbr ["RD" ])
916
+ data .extend (b' ' )
917
+ data .extend (enc )
918
+ data .extend (b' ' )
919
+ data .extend (self ._abbr ["ND" ])
920
+ data .extend (b'\n ' )
921
+ data .extend (b'end\n ' )
922
+ return bytes (data )
923
+
924
+ def _subset_subrs (self , indices ):
925
+ # we can't remove subroutines, we just replace unused ones with a stub
926
+ n_subrs = len (self .prop ['Subrs' ])
927
+ data = bytearray (f'/Subrs { n_subrs } array\n ' .encode ('ascii' ))
928
+ for i in range (n_subrs ):
929
+ if i in indices :
930
+ sub = self .prop ['Subrs' ][i ]
931
+ else :
932
+ sub = bytes ([11 ])
933
+ enc = self ._encrypt (sub , 'charstring' , self .prop .get ('lenIV' , 4 ))
934
+ data .extend (f'dup { i } { len (enc )} ' .encode ('ascii' ))
935
+ data .extend (self ._abbr ['RD' ])
936
+ data .extend (b' ' )
937
+ data .extend (enc )
938
+ data .extend (b' ' )
939
+ data .extend (self ._abbr ['NP' ])
940
+ data .extend (b'\n ' )
941
+
942
+ data .extend (self ._abbr ['ND' ])
943
+ data .extend (b'\n ' )
944
+ return bytes (data )
716
945
717
946
718
947
StandardEncoding = {
0 commit comments