8000 Merge pull request #20715 from jkseppan/type1-improved-parsing · matplotlib/matplotlib@0bb36e8 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0bb36e8

Browse files
authored
Merge pull request #20715 from jkseppan/type1-improved-parsing
Improve Type-1 font parsing
2 parents 1a3766c + e98bb83 commit 0bb36e8

File tree

5 files changed

+763
-181
lines changed

5 files changed

+763
-181
lines changed

LICENSE/LICENSE_COURIERTEN

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
The Courier10PitchBT-Bold.pfb file is a Type-1 version of
2+
Courier 10 Pitch BT Bold by Bitstream, obtained from
3+
<https://ctan.org/tex-archive/fonts/courierten>. It is included
4+
here as test data only, but the following license applies.
5+
6+
7+
(c) Copyright 1989-1992, Bitstream Inc., Cambridge, MA.
8+
9+
You are hereby granted permission under all Bitstream propriety rights
10+
to use, copy, modify, sublicense, sell, and redistribute the 4 Bitstream
11+
Charter (r) Type 1 outline fonts and the 4 Courier Type 1 outline fonts
12+
for any purpose and without restriction; provided, that this notice is
13+
left intact on all copies of such fonts and that Bitstream's trademark
14+
is acknowledged as shown below on all unmodified copies of the 4 Charter
15+
Type 1 fonts.
16+
17+
BITSTREAM CHARTER is a registered trademark of Bitstream Inc.
18+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
``Type1Font`` objects include more properties
2+
---------------------------------------------
3+
4+
The `.type1font.Type1Font.prop` dictionary now includes more keys, such
5+
as ``CharStrings`` and ``Subrs``. The value of the ``Encoding`` key is
6+
now a dictionary mapping codes to glyph names. The
7+
`.type1font.Type1Font.transform` method now correctly removes
8+
``UniqueID`` properties from the font.
37.2 KB
Binary file not shown.

lib/matplotlib/tests/test_type1font.py

Lines changed: 91 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import matplotlib.type1font as t1f
22
import os.path
33
import difflib
4+
import pytest
45

56

67
def test_Type1Font():
@@ -13,25 +14,49 @@ def test_Type1Font():
1314
assert font.parts[0] == rawdata[0x0006:0x10c5]
1415
assert font.parts[1] == rawdata[0x10cb:0x897f]
1516
assert font.parts[2] == rawdata[0x8985:0x8ba6]
16-
assert font.parts[1:] == slanted.parts[1:]
17-
assert font.parts[1:] == condensed.parts[1:]
1817
assert font.decrypted.startswith(b'dup\n/Private 18 dict dup begin')
1918
assert font.decrypted.endswith(b'mark currentfile closefile\n')
19+
assert slanted.decrypted.startswith(b'dup\n/Private 18 dict dup begin')
20+
assert slanted.decrypted.endswith(b'mark currentfile closefile\n')
21+
assert b'UniqueID 5000793' in font.parts[0]
22+
assert b'UniqueID 5000793' in font.decrypted
23+
assert font._pos['UniqueID'] == [(797, 818), (4483, 4504)]
24+
25+
len0 = len(font.parts[0])
26+
for key in font._pos.keys():
27+
for pos0, pos1 in font._pos[key]:
28+
if pos0 < len0:
29+
data = font.parts[0][pos0:pos1]
30+
else:
31+
data = font.decrypted[pos0-len0:pos1-len0]
32+
assert data.startswith(f'/{key}'.encode('ascii'))
33+
assert {'FontType', 'FontMatrix', 'PaintType', 'ItalicAngle', 'RD'
34+
} < set(font._pos.keys())
35+
36+
assert b'UniqueID 5000793' not in slanted.parts[0]
37+
assert b'UniqueID 5000793' not in slanted.decrypted
38+
assert 'UniqueID' not in slanted._pos
39+
assert font.prop['Weight'] == 'Medium'
40+
assert not font.prop['isFixedPitch']
41+
assert font.prop['ItalicAngle'] == 0
42+
assert slanted.prop['ItalicAngle'] == -45
43+
assert font.prop['Encoding'][5] == 'Pi'
44+
assert isinstance(font.prop['CharStrings']['Pi'], bytes)
45+
assert font._abbr['ND'] == 'ND'
2046

2147
differ = difflib.Differ()
2248
diff = list(differ.compare(
2349
font.parts[0].decode('latin-1').splitlines(),
2450
slanted.parts[0].decode('latin-1').splitlines()))
2551
for line in (
2652
# Removes UniqueID
27-
'- FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup',
28-
'+ FontDirectory/CMR10 known{/CMR10 findfont dup',
53+
'- /UniqueID 5000793 def',
2954
# Changes the font name
3055
'- /FontName /CMR10 def',
31-
'+ /FontName /CMR10_Slant_1000 def',
56+
'+ /FontName/CMR10_Slant_1000 def',
3257
# Alters FontMatrix
3358
'- /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def',
34-
'+ /FontMatrix [0.001 0 0.001 0.001 0 0]readonly def',
59+
'+ /FontMatrix [0.001 0 0.001 0.001 0 0] readonly def',
3560
# Alters ItalicAngle
3661
'- /ItalicAngle 0 def',
3762
'+ /ItalicAngle -45.0 def'):
@@ -42,17 +67,73 @@ def test_Type1Font():
4267
condensed.parts[0].decode('latin-1').splitlines()))
4368
for line in (
4469
# Removes UniqueID
45-
'- FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup',
46-
'+ FontDirectory/CMR10 known{/CMR10 findfont dup',
70+
'- /UniqueID 5000793 def',
4771
# Changes the font name
4872
'- /FontName /CMR10 def',
49-
'+ /FontName /CMR10_Extend_500 def',
73+
'+ /FontName/CMR10_Extend_500 def',
5074
# Alters FontMatrix
5175
'- /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def',
52-
'+ /FontMatrix [0.0005 0 0 0.001 0 0]readonly def'):
76+
'+ /FontMatrix [0.0005 0 0 0.001 0 0] readonly def'):
5377
assert line in diff, 'diff to condensed font must contain %s' % line
5478

5579

80+
def test_Type1Font_2():
81+
filename = os.path.join(os.path.dirname(__file__),
82+
'Courier10PitchBT-Bold.pfb')
83+
font = t1f.Type1Font(filename)
84+
assert font.prop['Weight'] == 'Bold'
85+
assert font.prop['isFixedPitch']
86+
assert font.prop['Encoding'][65] == 'A' # the font uses StandardEncoding
87+
(pos0, pos1), = font._pos['Encoding']
88+
assert font.parts[0][pos0:pos1] == b'/Encoding StandardEncoding'
89+
assert font._abbr['ND'] == '|-'
90+
91+
92+
def test_tokenize():
93+
data = (b'1234/abc false -9.81 Foo <<[0 1 2]<0 1ef a\t>>>\n'
94+
b'(string with(nested\t\\) par)ens\\\\)')
95+
# 1 2 x 2 xx1
96+
# 1 and 2 are matching parens, x means escaped character
97+
n, w, num, kw, d = 'name', 'whitespace', 'number', 'keyword', 'delimiter'
98+
b, s = 'boolean', 'string'
99+
correct = [
100+
(num, 1234), (n, 'abc'), (w, ' '), (b, False), (w, ' '), (num, -9.81),
101+
(w, ' '), (kw, 'Foo'), (w, ' '), (d, '<<'), (d, '['), (num, 0),
102+
(w, ' '), (num, 1), (w, ' '), (num, 2), (d, ']'), (s, b'\x01\xef\xa0'),
103+
(d, '>>'), (w, '\n'), (s, 'string with(nested\t) par)ens\\')
104+
]
105+
correct_no_ws = [x for x in correct if x[0] != w]
106+
107+
def convert(tokens):
108+
return [(t.kind, t.value()) for t in tokens]
109+
110+
assert convert(t1f._tokenize(data, False)) == correct
111+
assert convert(t1f._tokenize(data, True)) == correct_no_ws
112+
113+
def bin_after(n):
114+
tokens = t1f._tokenize(data, True)
115+
result = []
116+
for _ in range(n):
117+
result.append(next(tokens))
118+
result.append(tokens.send(10))
119+
return convert(result)
120+
121+
for n in range(1, len(correct_no_ws)):
122+
result = bin_after(n)
123+
assert result[:-1] == correct_no_ws[:n]
124+
assert result[-1][0] == 'binary'
125+
assert isinstance(result[-1][1], bytes)
126+
127+
128+
def test_tokenize_errors():
129+
with pytest.raises(ValueError):
130+
list(t1f._tokenize(b'1234 (this (string) is unterminated\\)', True))
131+
with pytest.raises(ValueError):
132+
list(t1f._tokenize(b'/Foo<01234', True))
133+
with pytest.raises(ValueError):
134+
list(t1f._tokenize(b'/Foo<01234abcg>/Bar', True))
135+
136+
56137
def test_overprecision():
57138
# We used to output too many digits in FontMatrix entries and
58139
# ItalicAngle, which could make Type-1 parsers unhappy.

0 commit comments

Comments
 (0)
0