8000 Make type1font.py work better on Python 3.x · matplotlib/matplotlib@1bb09ff · GitHub
[go: up one dir, main page]

Skip to content

Commit 1bb09ff

Browse files
committed
Make type1font.py work better on Python 3.x
Communicate token types as objects instead of strings from the tokenizer to the parser. Use proper Unicode strings for string-like data in the font properties. Fix the handling of delimiters. Resolves #3049.
1 parent 894f3df commit 1bb09ff

File tree

1 file changed

+36
-25
lines changed

1 file changed

+36
-25
lines changed

lib/matplotlib/type1font.py

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -141,30 +141,37 @@ def _split(self, data):
141141

142142
return data[:len1], binary, data[idx:]
143143

144-
_whitespace = re.compile(br'[\0\t\r\014\n ]+')
145-
_token = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
146-
_comment = re.compile(br'%[^\r\n\v]*')
147-
_instring = re.compile(br'[()\\]')
144+
_whitespace_re = re.compile(br'[\0\t\r\014\n ]+')
145+
_token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
146+
_comment_re = re.compile(br'%[^\r\n\v]*')
147+
_instring_re = re.compile(br'[()\\]')
148+
149+
# token types
150+
_whitespace = object()
151+
_name = object()
152+
_string = object()
153+
_delimiter = object()
154+
_number = object()
148155

149156
@classmethod
150157
def _tokens(cls, text):
151158
"""
152159
A PostScript tokenizer. Yield (token, value) pairs such as
153-
('whitespace', ' ') or ('name', '/Foobar').
160+
(cls._whitespace, ' ') or (cls._name, '/Foobar').
154161
"""
155162
pos = 0
156163
while pos < len(text):
157-
match = (cls._comment.match(text[pos:]) or
158-
cls._whitespace.match(text[pos:]))
164+
match = (cls._comment_re.match(text[pos:]) or
165+
cls._whitespace_re.match(text[pos:]))
159166
if match:
160-
yield ('whitespace', match.group())
167+
yield (cls._whitespace, match.group())
161168
pos += match.end()
162169
elif text[pos] == '(':
163170
start = pos
164171
pos += 1
165172
depth = 1
166173
while depth:
167-
match = cls._instring.search(text[pos:])
174+
match = cls._instring_re.search(text[pos:])
168175
if match is None:
169176
return
170177
pos += match.end()
@@ -174,25 +181,25 @@ def _tokens(cls, text):
174181
depth -= 1
175182
else: # a backslash - skip the next character
176183
pos += 1
177-
yield ('string', text[start:pos])
184+
yield (cls._string, text[start:pos])
178185
elif text[pos:pos + 2] in ('<<', '>>'):
179-
yield ('delimiter', text[pos:pos + 2])
186+
yield (cls._delimiter, text[pos:pos + 2])
180187
pos += 2
181188
elif text[pos] == '<':
182189
start = pos
183190
pos += text[pos:].index('>')
184-
yield ('string', text[start:pos])
191+
yield (cls._string, text[start:pos])
185192
else:
186-
match = cls._token.match(text[pos:])
193+
match = cls._token_re.match(text[pos:])
187194
if match:
188195
try:
189196
float(match.group())
190-
yield ('number', match.group())
197+
yield (cls._number, match.group())
191198
except ValueError:
192-
yield ('name', match.group())
199+
yield (cls._name, match.group())
193200
pos += match.end()
194201
else:
195-
yield ('delimiter', text[pos])
202+
yield (cls._delimiter, text[pos:pos + 1])
196203
pos += 1
197204

198205
def _parse(self):
@@ -205,26 +212,30 @@ def _parse(self):
205212
prop = {'weight': 'Regular', 'ItalicAngle': 0.0, 'isFixedPitch': False,
206213
'UnderlinePosition': -100, 'UnderlineThickness': 50}
207214
tokenizer = self._tokens(self.parts[0])
208-
filtered = filter(lambda x: x[0] != 'whitespace', tokenizer)
215+
filtered = filter(lambda x: x[0] != self._whitespace, tokenizer)
216+
# The spec calls this an ASCII format; in Python 2.x we could
217+
# just treat the strings and names as opaque bytes but let's
218+
# turn them into proper Unicode, and be lenient in case of high bytes.
219+
convert = lambda x: x.decode('ascii', errors='replace')
209220
for token, value in filtered:
210-
if token == b'name' and value.startswith(b'/'):
211-
key = value[1:]
221+
if token is self._name and value.startswith(b'/'):
222+
key = convert(value[1:])
212223
token, value = next(filtered)
213-
if token == b'name':
224+
if token is self._name:
214225
if value in (b'true', b'false'):
215226
value = value == b'true'
216227
else:
217-
value = value.lstrip(b'/')
218-
elif token == b'string':
219-
value = value.lstrip(b'(').rstrip(b')')
220-
elif token == b'number':
228+
value = convert(value.lstrip(b'/'))
229+
elif token is self._string:
230+
value = convert(value.lstrip(b'(').rstrip(b')'))
231+
elif token is self._number:
221232
if b'.' in value:
222233
value = float(value)
223234
else:
224235
value = int(value)
225236
else: # more complicated value such as an array
226237
value = None
227-
if key != b'FontInfo' and value is not None:
238+
if key != 'FontInfo' and value is not None:
228239
prop[key] = value
229240

230241
# Fill in the various *Name properties

0 commit comments

Comments
 (0)
0