8000 Implement reading dvi files into the cache · matplotlib/matplotlib@2fc05d5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2fc05d5

Browse files
committed
Implement reading dvi files into the cache
Rename the Dvi class to _DviReader and use it only for storing the files into the cache. The new Dvi class reads from the cache, after calling _DviReader to insert the file into it.
1 parent 4342306 commit 2fc05d5

File tree

2 files changed

+144
-121
lines changed

2 files changed

+144
-121
lines changed

lib/matplotlib/dviread.py

Lines changed: 139 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from collections import namedtuple
2626
from contextlib import contextmanager
2727
from functools import partial, wraps
28+
from itertools import chain
2829
import logging
2930
import numpy as np
3031
import os
@@ -183,28 +184,23 @@ def wrapper(self, byte):
183184
return decorate
184185

185186

186-
class Dvi(object):
187-
"""
188-
A reader for a dvi ("device-independent") file, as produced by TeX.
189-
The current implementation can only iterate through pages in order.
187+
def _keep(func, keys):
188+
"""Return mapping from each k in keys to func(k)
189+
such that func(k) is not None"""
190+
return dict((k, v) for k, v in zip(keys, map(func, keys)) if v is not None)
190191

191-
This class can be used as a context manager to close the underlying
192-
file upon exit. Pages can be read via iteration. Here is an overly
193-
simple way to extract text without trying to detect whitespace::
194192

195-
>>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
196-
>>> for page in dvi:
197-
>>> print ''.join(unichr(t.glyph) for t in page.text)
193+
class _DviReader(object):
194+
"""
195+
A reader for a dvi ("device-independent") file, as produced by TeX.
196+
This implementation is only used to store the file in a cache, from
197+
which it is read by Dvi.
198198
199199
Parameters
200200
----------
201201
202202
filename : str
203203
dvi file to read
204-
dpi : number or None
205-
Dots per inch, can be floating-point; this affects the
206-
coordinates returned. Use None to get TeX's internal units
207-
which are likely only useful for debugging.
208204
cache : _tex_support_cache instance, optional
209205
Support file cache instance, defaults to the _tex_support_cache
210206
singleton.
@@ -213,28 +209,28 @@ class Dvi(object):
213209
_dtable = [None for _ in xrange(256)]
214210
dispatch = partial(_dispatch, _dtable)
215211 9E81

216-
def __init__(self, filename, dpi, cache=None):
217-
"""
218-
Read the data from the file named *filename* and convert
219-
TeX's internal units to units of *dpi* per inch.
220-
*dpi* only sets the units and does not limit the resolution.
221-
Use None to return TeX's internal units.
222-
"""
212+
def __init__(self, filename, cache=None):
223213
_log.debug('Dvi: %s', filename)
224214
if cache is None:
225215
cache = _tex_support_cache.get_cache()
226216
self.cache = cache
227217
self.file = open(filename, 'rb')
228-
self.dpi = dpi
229218
self.fonts = {}
219+
self.recursive_fonts = set()
230220
self.state = _dvistate.pre
231221
self.baseline = self._get_baseline(filename)
232-
self.fontnames = sorted(set(self._read_fonts()))
222+
self.fontnames = set(self._read_fonts())
233223
# populate kpsewhich cache with font pathnames
234224
find_tex_files([x + suffix for x in self.fontnames
235225
for suffix in ('.tfm', '.vf', '.pfb')],
236226
cache)
237-
cache.optimize()
227+
self._tfm = _keep(_tfmfile, self.fontnames)
228+
self._vf = _keep(_vffile, self.fontnames)
229+
for vf in self._vf.values():
230+
self.fontnames.update(vf.fontnames)
231+
232+
def close(self):
233+
self.file.close()
238234

239235
def _get_baseline(self, filename):
240236
if rcParams['text.latex.preview']:
@@ -247,88 +243,32 @@ def _get_baseline(self, filename):
247243
return float(depth)
248244
return None
249245

250-
def __enter__(self):
251-
"""
252-
Context manager enter method, does nothing.
253-
"""
254-
return self
255-
256-
def __exit__(self, etype, evalue, etrace):
257-
"""
258-
Context manager exit method, closes the underlying file if it is open.
259-
"""
260-
self.close()
261-
262-
def __iter__(self):
263-
"""
264-
Iterate through the pages of the file.
265-
266-
Yields
267-
------
268-
Page
269-
Details of all the text and box objects on the page.
270-
The Page tuple contains lists of Text and Box tuples and
271-
the page dimensions, and the Text and Box tuples contain
272-
coordinates transformed into a standard Cartesian
273-
coordinate system at the dpi value given when initializing.
274-
The coordinates are floating point numbers, but otherwise
275-
precision is not lost and coordinate values are not clipped to
276-
integers.
277-
"""
278-
while True:
279-
have_page = self._read()
280-
if have_page:
281-
yield self._output()
282-
else:
283-
break
284-
285-
def close(self):
286-
"""
287-
Close the underlying file if it is open.
288-
"""
289-
if not self.file.closed:
290-
self.file.close()
291-
292-
def _output(self):
293-
"""
294-
Output the text and boxes belonging to the most recent page.
295-
page = dvi._output()
296-
"""
297-
minx, miny, maxx, maxy = np.inf, np.inf, -np.inf, -np.inf
298-
maxy_pure = -np.inf
299-
for elt in self.text + self.boxes:
300-
if isinstance(elt, Box):
301-
x, y, h, w = elt
302-
e = 0 # zero depth
303-
else: # glyph
304-
x, y, font, g, w = elt
305-
h, e = font._height_depth_of(g)
306-
minx = min(minx, x)
307-
miny = min(miny, y - h)
308-
maxx = max(maxx, x + w)
309-
maxy = max(maxy, y + e)
310-
maxy_pure = max(maxy_pure, y)
311-
312-
if self.dpi is None:
313-
# special case for ease of debugging: output raw dvi coordinates
314-
return Page(text=self.text, boxes=self.boxes,
315-
width=maxx-minx, height=maxy_pure-miny,
316-
descent=maxy-maxy_pure)
317-
318-
# convert from TeX's "scaled points" to dpi units
319-
d = self.dpi / (72.27 * 2**16)
320-
if self.baseline is None:
321-
descent = (maxy - maxy_pure) * d
322-
else:
323-
descent = self.baseline
324-
325-
text = [Text((x-minx)*d, (maxy-y)*d - descent, f, g, w*d)
326-
for (x, y, f, g, w) in self.text]
327-
boxes = [Box((x-minx)*d, (maxy-y)*d - descent, h*d, w*d)
328-
for (x, y, h, w) in self.boxes]
329-
330-
return Page(text=text, boxes=boxes, width=(maxx-minx)*d,
331-
height=(maxy_pure-miny)*d, descent=descent)
246+
def store(self):
247+
c = self.cache
248+
with c.transaction() as t:
249+
fileid = c.dvi_new_file(self.file.name, t)
250+
_log.debug('fontnames is %s', self.fontnames)
251+
fontid = c.dvi_font_sync_ids(self.fontnames, t)
252+
253+
pageno = 0
254+
while True:
255+
if not self._read():
256+
break
257+
for seq, elt in enumerate(self.text + self.boxes):
258+
if isinstance(elt, Box):
259+
c.dvi_add_box(elt, fileid, pageno, seq, t)
260+
else:
261+
texname = elt.font.texname.decode('ascii')
262+
c.dvi_add_text(elt, fileid, pageno, seq,
263+
fontid[texname], t)
264+
pageno += 1
265+
266+
for dvifont in chain(self.recursive_fonts, self.fonts.values()):
267+
c.dvi_font_sync_metrics(dvifont, t)
268+
if self.baseline is not None:
269+
c.dvi_add_baseline(fileid, 0, self.baseline, t)
270+
c.optimize()
271+
return fileid
332272

333273
def _read_fonts(self):
334274
"""Read the postamble of the file and return a list of fonts used."""
@@ -376,7 +316,8 @@ def _read_fonts(self):
376316
_arg(1, False, self, None),
377317
_arg(1, False, self, None))
378318
fontname = file.read(a + length)[-length:].decode('ascii')
379-
_log.debug('dvi._read_fonts(%s): encountered %s', self.file.name, fontname)
319+
_log.debug('dvi._read_fonts(%s): encountered %s',
320+
self.file.name, fontname)
380321
fonts.append(fontname)
381322
elif byte == 249:
382323
break
@@ -443,6 +384,7 @@ def _put_char_real(self, char):
443384
for x, y, f, g, w in font._vf[char].text:
444385
newf = DviFont(scale=_mul2012(scale, f.scale),
445386
tfm=f._tfm, texname=f.texname, vf=f._vf)
387+
self.recursive_fonts.add(newf)
446388
self.text.append(Text(self.h + _mul2012(x, scale),
447389
self.v + _mul2012(y, scale),
448390
newf, g, newf._width_of(g)))
@@ -544,7 +486,7 @@ def _fnt_def(self, k, c, s, d, a, l):
544486
def _fnt_def_real(self, k, c, s, d, a, l):
545487
n = self.file.read(a + l)
546488
fontname = n[-l:].decode('ascii')
547-
tfm = _tfmfile(fontname)
489+
tfm = self._tfm.get(fontname)
548490
if tfm is None:
549491
if six.PY2:
550492
error_class = OSError
@@ -553,9 +495,7 @@ def _fnt_def_real(self, k, c, s, d, a, l):
553495
raise error_class("missing font metrics file: %s" % fontname)
554496
if c != 0 and tfm.checksum != 0 and c != tfm.checksum:
555497
raise ValueError('tfm checksum mismatch: %s' % n)
556-
557-
vf = _vffile(fontname)
558-
498+
vf = self._vf.get(fontname)
559499
self.fonts[k] = DviFont(scale=s, tfm=tfm, texname=n, vf=vf)
560500

561501
@dispatch(247, state=_dvistate.pre, args=('u1', 'u4', 'u4', 'u4', 'u1'))
@@ -695,7 +635,89 @@ def _height_depth_of(self, char):
695635
return result
696636

697637

698-
class Vf(Dvi):
638+
class Dvi(object):
639+
"""
640+
A representation of a dvi ("device-independent") file, as produced by TeX.
641+
642+
Parameters
643+
----------
644+
645+
filename : str
646+
dpi : float or None
647+
cache : _tex_support_cache, optional
648+
649+
Attributes
650+
----------
651+
652+
filename : str
653+
dpi : float or None
654+
cache : _tex_support_cache
655+
656+
657+
"""
658+
def __init__(self, filename, dpi, cache=None):
659+
if cache is None:
660+
cache = _tex_support_cache.get_cache()
661+
self.cache = cache
662+
self.filename = filename
663+
self.dpi = dpi
664+
self._filename_id = cache.dvi_id(filename)
665+
if self._filename_id is None:
666+
self._filename_id = _DviReader(filename, cache).store()
667+
self._fonts = cache.dvi_fonts(self._filename_id)
668+
669+
def __enter__(self):
670+
return self
671+
672+
def __exit__(self, etype, evalue, etrace):
673+
pass
674+
675+
def __getitem__(self, pageno):
676+
if self.cache.dvi_page_exists(self._filename_id, pageno):
677+
return self._output(pageno)
678+
raise IndexError
679+
680+
def _output(self, page):
681+
extrema = self.cache.dvi_page_boundingbox(self._filename_id, page)
682+
min_x, min_y, max_x, max_y, max_y_pure = (
683+
extrema[n] for n in ('min_x', 'min_y', 'max_x',
684+
'max_y', 'max_y_pure'))
685+
boxes = self.cache.dvi_page_boxes(self._filename_id, page)
686+
text = self.cache.dvi_page_text(self._filename_id, page)
687+
baseline = self.cache.dvi_get_baseline(self._filename_id, page)
688+
if self.dpi is None:
689+
return Page(text=[Text(x=row['x'], y=row['y'],
690+
font=self._fonts[(row['texname'],
691+
row['fontscale'])],
692+
glyph=row['glyph'], width=row['width'])
693+
for row in text],
694+
boxes=[Box(x=row['x'], y=row['y'],
695+
height=row['height'], width=row['width'])
696+
for row in boxes],
697+
width=max_x-min_x,
698+
height=max_y_pure-min_y,
699+
descent=max_y-max_y_pure)
700+
d = self.dpi / (72.27 * 2**16)
701+
descent = \
702+
baseline if baseline is not None else (max_y - max_y_pure) * d
703+
704+
return Page(text=[Text((row['x'] - min_x) * d,
705+
(max_y - row['y']) * d - descent,
706+
self._fonts[(row['texname'], row['fontscale'])],
707+
row['glyph'],
708+
row['width'] * d)
709+
for row in text],
710+
boxes=[Box((row['x'] - min_x) * d,
711+
(max_y - row['y']) * d - descent,
712+
row['height'] * d,
713+
row['width'] * d)
714+
for row in boxes],
715+
width=(max_x - min_x) * d,
716+
height=(max_y_pure - min_y) * d,
717+
descent=descent)
718+
719+
720+
class Vf(_DviReader):
699721
"""
700722
A virtual font (\\*.vf file) containing subroutines for dvi files.
701723
@@ -719,12 +741,12 @@ class Vf(Dvi):
719741
720742
The virtual font format is a derivative of dvi:
721743
http://mirrors.ctan.org/info/knuth/virtual 10000 -fonts
722-
This class reuses some of the machinery of `Dvi`
744+
This class reuses some of the machinery of `_DviReader`
723745
but replaces the `_read` loop and dispatch mechanism.
724746
"""
725747

726748
def __init__(self, filename, cache=None):
727-
Dvi.__init__(self, filename, dpi=0, cache=cache)
749+
_DviReader.__init__(self, filename, cache=cache)
728750
try:
729751
self._first_font = None
730752
self._chars = {}
@@ -749,7 +771,8 @@ def _read_fonts(self):
749771
_, _, _, a, length = [self._arg(x) for x in (4, 4, 4, 1, 1)]
750772
fontname = self.file.read(a + length)[-length:].decode('ascii')
751773
fonts.append(fontname)
752-
_log.debug('Vf._read_fonts(%s): encountered %s', self.file.name, fontname)
774+
_log.debug('Vf._read_fonts(%s): encountered %s',
775+
self.file.name, fontname)
753776
elif byte == 247:
754777
_, k = self._arg(1), self._arg(1)
755778
_ = self.file.read(k)
@@ -778,7 +801,7 @@ def _read(self):
778801
if byte in (139, 140) or byte >= 243:
779802
raise ValueError(
780803
"Inappropriate opcode %d in vf file" % byte)
781-
Dvi._dtable[byte](self, byte)
804+
_DviReader._dtable[byte](self, byte)
782805
continue
783806

784807
# We are outside a packet

lib/matplotlib/tests/test_dviread.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ def test_dviread():
8383
@skip_if_command_unavailable(["kpsewhich", "-version"])
8484
def test_dviread_get_fonts():
8585
dir = os.path.join(os.path.dirname(__file__), 'baseline_images', 'dviread')
86-
with dr.Dvi(os.path.join(dir, 'test.dvi'), None) as dvi:
87-
assert dvi.fontnames == \
88-
['cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7']
89-
with dr.Vf(os.path.join(dir, 'virtual.vf')) as vf:
90-
assert vf.fontnames == ['cmex10', 'cmr10']
86+
dvi = dr._DviReader(os.path.join(dir, 'test.dvi'), None)
87+
assert dvi.fontnames == \
88+
{'cmex10', 'cmmi10', 'cmmi5', 'cmr10', 'cmr5', 'cmr7'}
89+
vf = dr.Vf(os.path.join(dir, 'virtual.vf'))
90+
assert vf.fontnames == {'cmex10', 'cmr10'}
9191

9292

9393
def test_tex_support_cache(tmpdir):

0 commit comments

Comments
 (0)
0