25
25
from collections import namedtuple
26
26
from contextlib import contextmanager
27
27
from functools import partial , wraps
28
+ from itertools import chain
28
29
import logging
29
30
import numpy as np
30
31
import os
@@ -183,28 +184,23 @@ def wrapper(self, byte):
183
184
return decorate
184
185
185
186
186
- class Dvi ( object ):
187
- """
188
- A reader for a dvi ("device-independent") file, as produced by TeX.
189
- The current implementation can only iterate through pages in order.
187
+ def _keep ( func , keys ):
188
+ """Return mapping from each k in keys to func(k)
189
+ such that func(k) is not None"""
190
+ return dict (( k , v ) for k , v in zip ( keys , map ( func , keys )) if v is not None )
190
191
191
- This class can be used as a context manager to close the underlying
192
- file upon exit. Pages can be read via iteration. Here is an overly
193
- simple way to extract text without trying to detect whitespace::
194
192
195
- >>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
196
- >>> for page in dvi:
197
- >>> print ''.join(unichr(t.glyph) for t in page.text)
193
+ class _DviReader (object ):
194
+ """
195
+ A reader for a dvi ("device-independent") file, as produced by TeX.
196
+ This implementation is only used to store the file in a cache, from
197
+ which it is read by Dvi.
198
198
199
199
Parameters
200
200
----------
201
201
202
202
filename : str
203
203
dvi file to read
204
- dpi : number or None
205
- Dots per inch, can be floating-point; this affects the
206
- coordinates returned. Use None to get TeX's internal units
207
- which are likely only useful for debugging.
208
204
cache : _tex_support_cache instance, optional
209
205
Support file cache instance, defaults to the _tex_support_cache
210
206
singleton.
@@ -213,28 +209,28 @@ class Dvi(object):
213
209
_dtable = [None for _ in xrange (256 )]
214
210
dispatch = partial (_dispatch , _dtable )
215
211
9E81
216
- def __init__ (self , filename , dpi , cache = None ):
217
- """
218
- Read the data from the file named *filename* and convert
219
- TeX's internal units to units of *dpi* per inch.
220
- *dpi* only sets the units and does not limit the resolution.
221
- Use None to return TeX's internal units.
222
- """
212
+ def __init__ (self , filename , cache = None ):
223
213
_log .debug ('Dvi: %s' , filename )
224
214
if cache is None :
225
215
cache = _tex_support_cache .get_cache ()
226
216
self .cache = cache
227
217
self .file = open (filename , 'rb' )
228
- self .dpi = dpi
229
218
self .fonts = {}
219
+ self .recursive_fonts = set ()
230
220
self .state = _dvistate .pre
231
221
self .baseline = self ._get_baseline (filename )
232
- self .fontnames = sorted ( set (self ._read_fonts () ))
222
+ self .fontnames = set (self ._read_fonts ())
233
223
# populate kpsewhich cache with font pathnames
234
224
find_tex_files ([x + suffix for x in self .fontnames
235
225
for suffix in ('.tfm' , '.vf' , '.pfb' )],
236
226
cache )
237
- cache .optimize ()
227
+ self ._tfm = _keep (_tfmfile , self .fontnames )
228
+ self ._vf = _keep (_vffile , self .fontnames )
229
+ for vf in self ._vf .values ():
230
+ self .fontnames .update (vf .fontnames )
231
+
232
+ def close (self ):
233
+ self .file .close ()
238
234
239
235
def _get_baseline (self , filename ):
240
236
if rcParams ['text.latex.preview' ]:
@@ -247,88 +243,32 @@ def _get_baseline(self, filename):
247
243
return float (depth )
248
244
return None
249
245
250
- def __enter__ (self ):
251
- """
252
- Context manager enter method, does nothing.
253
- """
254
- return self
255
-
256
- def __exit__ (self , etype , evalue , etrace ):
257
- """
258
- Context manager exit method, closes the underlying file if it is open.
259
- """
260
- self .close ()
261
-
262
- def __iter__ (self ):
263
- """
264
- Iterate through the pages of the file.
265
-
266
- Yields
267
- ------
268
- Page
269
- Details of all the text and box objects on the page.
270
- The Page tuple contains lists of Text and Box tuples and
271
- the page dimensions, and the Text and Box tuples contain
272
- coordinates transformed into a standard Cartesian
273
- coordinate system at the dpi value given when initializing.
274
- The coordinates are floating point numbers, but otherwise
275
- precision is not lost and coordinate values are not clipped to
276
- integers.
277
- """
278
- while True :
279
- have_page = self ._read ()
280
- if have_page :
281
- yield self ._output ()
282
- else :
283
- break
284
-
285
- def close (self ):
286
- """
287
- Close the underlying file if it is open.
288
- """
289
- if not self .file .closed :
290
- self .file .close ()
291
-
292
- def _output (self ):
293
- """
294
- Output the text and boxes belonging to the most recent page.
295
- page = dvi._output()
296
- """
297
- minx , miny , maxx , maxy = np .inf , np .inf , - np .inf , - np .inf
298
- maxy_pure = - np .inf
299
- for elt in self .text + self .boxes :
300
- if isinstance (elt , Box ):
301
- x , y , h , w = elt
302
- e = 0 # zero depth
303
- else : # glyph
304
- x , y , font , g , w = elt
305
- h , e = font ._height_depth_of (g )
306
- minx = min (minx , x )
307
- miny = min (miny , y - h )
308
- maxx = max (maxx , x + w )
309
- maxy = max (maxy , y + e )
310
- maxy_pure = max (maxy_pure , y )
311
-
312
- if self .dpi is None :
313
- # special case for ease of debugging: output raw dvi coordinates
314
- return Page (text = self .text , boxes = self .boxes ,
315
- width = maxx - minx , height = maxy_pure - miny ,
316
- descent = maxy - maxy_pure )
317
-
318
- # convert from TeX's "scaled points" to dpi units
319
- d = self .dpi / (72.27 * 2 ** 16 )
320
- if self .baseline is None :
321
- descent = (maxy - maxy_pure ) * d
322
- else :
323
- descent = self .baseline
324
-
325
- text = [Text ((x - minx )* d , (maxy - y )* d - descent , f , g , w * d )
326
- for (x , y , f , g , w ) in self .text ]
327
- boxes = [Box ((x - minx )* d , (maxy - y )* d - descent , h * d , w * d )
328
- for (x , y , h , w ) in self .boxes ]
329
-
330
- return Page (text = text , boxes = boxes , width = (maxx - minx )* d ,
331
- height = (maxy_pure - miny )* d , descent = descent )
246
+ def store (self ):
247
+ c = self .cache
248
+ with c .transaction () as t :
249
+ fileid = c .dvi_new_file (self .file .name , t )
250
+ _log .debug ('fontnames is %s' , self .fontnames )
251
+ fontid = c .dvi_font_sync_ids (self .fontnames , t )
252
+
253
+ pageno = 0
254
+ while True :
255
+ if not self ._read ():
256
+ break
257
+ for seq , elt in enumerate (self .text + self .boxes ):
258
+ if isinstance (elt , Box ):
259
+ c .dvi_add_box (elt , fileid , pageno , seq , t )
260
+ else :
261
+ texname = elt .font .texname .decode ('ascii' )
262
+ c .dvi_add_text (elt , fileid , pageno , seq ,
263
+ fontid [texname ], t )
264
+ pageno += 1
265
+
266
+ for dvifont in chain (self .recursive_fonts , self .fonts .values ()):
267
+ c .dvi_font_sync_metrics (dvifont , t )
268
+ if self .baseline is not None :
269
+ c .dvi_add_baseline (fileid , 0 , self .baseline , t )
270
+ c .optimize ()
271
+ return fileid
332
272
333
273
def _read_fonts (self ):
334
274
"""Read the postamble of the file and return a list of fonts used."""
@@ -376,7 +316,8 @@ def _read_fonts(self):
376
316
_arg (1 , False , self , None ),
377
317
_arg (1 , False , self , None ))
378
318
fontname = file .read (a + length )[- length :].decode ('ascii' )
379
- _log .debug ('dvi._read_fonts(%s): encountered %s' , self .file .name , fontname )
319
+ _log .debug ('dvi._read_fonts(%s): encountered %s' ,
320
+ self .file .name , fontname )
380
321
fonts .append (fontname )
381
322
elif byte == 249 :
382
323
break
@@ -443,6 +384,7 @@ def _put_char_real(self, char):
443
384
for x , y , f , g , w in font ._vf [char ].text :
444
385
newf = DviFont (scale = _mul2012 (scale , f .scale ),
445
386
tfm = f ._tfm , texname = f .texname , vf = f ._vf )
387
+ self .recursive_fonts .add (newf )
446
388
self .text .append (Text (self .h + _mul2012 (x , scale ),
447
389
self .v + _mul2012 (y , scale ),
448
390
newf , g , newf ._width_of (g )))
@@ -544,7 +486,7 @@ def _fnt_def(self, k, c, s, d, a, l):
544
486
def _fnt_def_real (self , k , c , s , d , a , l ):
545
487
n = self .file .read (a + l )
546
488
fontname = n [- l :].decode ('ascii' )
547
- tfm = _tfmfile (fontname )
489
+ tfm = self . _tfm . get (fontname )
548
490
if tfm is None :
549
491
if six .PY2 :
550
492
error_class = OSError
@@ -553,9 +495,7 @@ def _fnt_def_real(self, k, c, s, d, a, l):
553
495
raise error_class ("missing font metrics file: %s" % fontname )
554
496
if c != 0 and tfm .checksum != 0 and c != tfm .checksum :
555
497
raise ValueError ('tfm checksum mismatch: %s' % n )
556
-
557
- vf = _vffile (fontname )
558
-
498
+ vf = self ._vf .get (fontname )
559
499
self .fonts [k ] = DviFont (scale = s , tfm = tfm , texname = n , vf = vf )
560
500
561
501
@dispatch (247 , state = _dvistate .pre , args = ('u1' , 'u4' , 'u4' , 'u4' , 'u1' ))
@@ -695,7 +635,89 @@ def _height_depth_of(self, char):
695
635
return result
696
636
697
637
698
- class Vf (Dvi ):
638
+ class Dvi (object ):
639
+ """
640
+ A representation of a dvi ("device-independent") file, as produced by TeX.
641
+
642
+ Parameters
643
+ ----------
644
+
645
+ filename : str
646
+ dpi : float or None
647
+ cache : _tex_support_cache, optional
648
+
649
+ Attributes
650
+ ----------
651
+
652
+ filename : str
653
+ dpi : float or None
654
+ cache : _tex_support_cache
655
+
656
+
657
+ """
658
+ def __init__ (self , filename , dpi , cache = None ):
659
+ if cache is None :
660
+ cache = _tex_support_cache .get_cache ()
661
+ self .cache = cache
662
+ self .filename = filename
663
+ self .dpi = dpi
664
+ self ._filename_id = cache .dvi_id (filename )
665
+ if self ._filename_id is None :
666
+ self ._filename_id = _DviReader (filename , cache ).store ()
667
+ self ._fonts = cache .dvi_fonts (self ._filename_id )
668
+
669
+ def __enter__ (self ):
670
+ return self
671
+
672
+ def __exit__ (self , etype , evalue , etrace ):
673
+ pass
674
+
675
+ def __getitem__ (self , pageno ):
676
+ if self .cache .dvi_page_exists (self ._filename_id , pageno ):
677
+ return self ._output (pageno )
678
+ raise IndexError
679
+
680
+ def _output (self , page ):
681
+ extrema = self .cache .dvi_page_boundingbox (self ._filename_id , page )
682
+ min_x , min_y , max_x , max_y , max_y_pure = (
683
+ extrema [n ] for n in ('min_x' , 'min_y' , 'max_x' ,
684
+ 'max_y' , 'max_y_pure' ))
685
+ boxes = self .cache .dvi_page_boxes (self ._filename_id , page )
686
+ text = self .cache .dvi_page_text (self ._filename_id , page )
687
+ baseline = self .cache .dvi_get_baseline (self ._filename_id , page )
688
+ if self .dpi is None :
689
+ return Page (text = [Text (x = row ['x' ], y = row ['y' ],
690
+ font = self ._fonts [(row ['texname' ],
691
+ row ['fontscale' ])],
692
+ glyph = row ['glyph' ], width = row ['width' ])
693
+ for row in text ],
694
+ boxes = [Box (x = row ['x' ], y = row ['y' ],
695
+ height = row ['height' ], width = row ['width' ])
696
+ for row in boxes ],
697
+ width = max_x - min_x ,
698
+ height = max_y_pure - min_y ,
699
+ descent = max_y - max_y_pure )
700
+ d = self .dpi / (72.27 * 2 ** 16 )
701
+ descent = \
702
+ baseline if baseline is not None else (max_y - max_y_pure ) * d
703
+
704
+ return Page (text = [Text ((row ['x' ] - min_x ) * d ,
705
+ (max_y - row ['y' ]) * d - descent ,
706
+ self ._fonts [(row ['texname' ], row ['fontscale' ])],
707
+ row ['glyph' ],
708
+ row ['width' ] * d )
709
+ for row in text ],
710
+ boxes = [Box ((row ['x' ] - min_x ) * d ,
711
+ (max_y - row ['y' ]) * d - descent ,
712
+ row ['height' ] * d ,
713
+ row ['width' ] * d )
714
+ for row in boxes ],
715
+ width = (max_x - min_x ) * d ,
716
+ height = (max_y_pure - min_y ) * d ,
717
+ descent = descent )
718
+
719
+
720
+ class Vf (_DviReader ):
699
721
"""
700
722
A virtual font (\\ *.vf file) containing subroutines for dvi files.
701
723
@@ -719,12 +741,12 @@ class Vf(Dvi):
719
741
720
742
The virtual font format is a derivative of dvi:
721
743
http://mirrors.ctan.org/info/knuth/virtual
10000
-fonts
722
- This class reuses some of the machinery of `Dvi `
744
+ This class reuses some of the machinery of `_DviReader `
723
745
but replaces the `_read` loop and dispatch mechanism.
724
746
"""
725
747
726
748
def __init__ (self , filename , cache = None ):
727
- Dvi .__init__ (self , filename , dpi = 0 , cache = cache )
749
+ _DviReader .__init__ (self , filename , cache = cache )
728
750
try :
729
751
self ._first_font = None
730
752
self ._chars = {}
@@ -749,7 +771,8 @@ def _read_fonts(self):
749
771
_ , _ , _ , a , length = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
750
772
fontname = self .file .read (a + length )[- length :].decode ('ascii' )
751
773
fonts .append (fontname )
752
- _log .debug ('Vf._read_fonts(%s): encountered %s' , self .file .name , fontname )
774
+ _log .debug ('Vf._read_fonts(%s): encountered %s' ,
775
+ self .file .name , fontname )
753
776
elif byte == 247 :
754
777
_ , k = self ._arg (1 ), self ._arg (1 )
755
778
_ = self .file .read (k )
@@ -778,7 +801,7 @@ def _read(self):
778
801
if byte in (139 , 140 ) or byte >= 243 :
779
802
raise ValueError (
780
803
"Inappropriate opcode %d in vf file" % byte )
781
- Dvi ._dtable [byte ](self , byte )
804
+ _DviReader ._dtable [byte ](self , byte )
782
805
continue
783
806
784
807
# We are outside a packet
0 commit comments