From ef58a59331d58a85dbb786a23d8c8a4f62008c8b Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Fri, 20 Sep 2019 00:29:49 +0200 Subject: [PATCH 1/3] Simplify and unify character tracking in pdf and ps backends. Instead of trying to resolve font paths to absolute files and key off by inode(!), just track fonts using whatever names they use, and simplify used_characters to be a straight mapping of filenames to character ids (making the attribute private -- with a backcompat shim) at the same time). The previous approach would avoid embedding the same file twice if it is given under two different filenames (hardlinks to the same file...), but it would fail if the user passes a relative path, chdir()s to another directory, and passes another different font with the same filename, because of the lru_cache(). None of these seem likely to happen in practice, and in any case we can cover most of it by making the font paths absolute before passing them to FreeType (which is going to open the file anyways, so the cost of making them absolute doesn't matter). --- doc/api/next_api_changes/deprecations.rst | 6 ++ doc/missing-references.json | 4 + lib/matplotlib/backend_bases.py | 1 + lib/matplotlib/backends/_backend_pdf_ps.py | 40 +++++++++ lib/matplotlib/backends/backend_pdf.py | 46 +++++------ lib/matplotlib/backends/backend_ps.py | 94 ++++++++++------------ lib/matplotlib/cbook/__init__.py | 1 + lib/matplotlib/font_manager.py | 3 + lib/matplotlib/mathtext.py | 6 +- 9 files changed, 120 insertions(+), 81 deletions(-) diff --git a/doc/api/next_api_changes/deprecations.rst b/doc/api/next_api_changes/deprecations.rst index 61cea849478e..711bf450e39f 100644 --- a/doc/api/next_api_changes/deprecations.rst +++ b/doc/api/next_api_changes/deprecations.rst @@ -41,3 +41,9 @@ Flags containing "U" passed to `.cbook.to_filehandle` and `.cbook.open_file_cm` Please remove "U" from flags passed to `.cbook.to_filehandle` and `.cbook.open_file_cm`. This is consistent with their removal from `open` in Python 3.9. + +PDF and PS character tracking internals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The ``used_characters`` attribute and ``track_characters`` and +``merge_used_characters`` methods of `.RendererPdf`, `.PdfFile`, and +`.RendererPS` are deprecated. diff --git a/doc/missing-references.json b/doc/missing-references.json index 61edfeb19f7b..eccddf5e48dd 100644 --- a/doc/missing-references.json +++ b/doc/missing-references.json @@ -357,6 +357,10 @@ "lib/matplotlib/backend_tools.py:docstring of matplotlib.backend_tools.ToolGrid:1", "lib/matplotlib/backend_tools.py:docstring of matplotlib.backend_tools.ToolMinorGrid:1" ], + "matplotlib.backends._backend_pdf_ps.CharacterTracker": [ + "lib/matplotlib/backends/backend_pdf.py:docstring of matplotlib.backends.backend_pdf.PdfFile:1", + "lib/matplotlib/backends/backend_ps.py:docstring of matplotlib.backends.backend_ps.RendererPS:1" + ], "matplotlib.backends._backend_pdf_ps.RendererPDFPSBase": [ "lib/matplotlib/backends/backend_pdf.py:docstring of matplotlib.backends.backend_pdf.RendererPdf:1", "lib/matplotlib/backends/backend_ps.py:docstring of matplotlib.backends.backend_ps.RendererPS:1" diff --git a/lib/matplotlib/backend_bases.py b/lib/matplotlib/backend_bases.py index 053774bbf517..5ad28181ff28 100644 --- a/lib/matplotlib/backend_bases.py +++ b/lib/matplotlib/backend_bases.py @@ -142,6 +142,7 @@ class RendererBase: """ def __init__(self): + super().__init__() self._texmanager = None self._text2path = textpath.TextToPath() diff --git a/lib/matplotlib/backends/_backend_pdf_ps.py b/lib/matplotlib/backends/_backend_pdf_ps.py index e806cddd6985..7c0fedab4514 100644 --- a/lib/matplotlib/backends/_backend_pdf_ps.py +++ b/lib/matplotlib/backends/_backend_pdf_ps.py @@ -16,11 +16,51 @@ def _cached_get_afm_from_fname(fname): return AFM(fh) +class CharacterTracker: + """ + Helper for font subsetting by the pdf and ps backends. + + Maintains a mapping of font paths to the set of character codepoints that + are being used from that font. + """ + + def __init__(self): + self.used = {} + + @mpl.cbook.deprecated("3.3") + @property + def used_characters(self): + d = {} + for fname, chars in self.used.items(): + realpath, stat_key = mpl.cbook.get_realpath_and_stat(fname) + d[stat_key] = (realpath, chars) + return d + + def track(self, font, s): + """Record that string *s* is being typeset using font *font*.""" + if isinstance(font, str): + # Unused, can be removed after removal of track_characters. + fname = font + else: + fname = font.fname + self.used.setdefault(fname, set()).update(map(ord, s)) + + def merge(self, other): + """Update self with a font path to character codepoints.""" + for fname, charset in other.items(): + self.used.setdefault(fname, set()).update(charset) + + class RendererPDFPSBase(RendererBase): # The following attributes must be defined by the subclasses: # - _afm_font_dir # - _use_afm_rc_name + def __init__(self, width, height): + super().__init__() + self.width = width + self.height = height + def flipy(self): # docstring inherited return False # y increases from bottom to top. diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index 1c9a85449f83..2d08b36f8acf 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -451,6 +451,8 @@ def __init__(self, filename, metadata=None): for `'Creator'`, `'Producer'` and `'CreationDate'`. They can be removed by setting them to `None`. """ + super().__init__() + self._object_seq = itertools.count(1) # consumed by reserveObject self.xrefTable = [[0, 65535, 'the zero object']] self.passed_in_file_object = False @@ -513,7 +515,7 @@ def __init__(self, filename, metadata=None): self.dviFontInfo = {} # maps dvi font names to embedding information # differently encoded Type-1 fonts may share the same descriptor self.type1Descriptors = {} - self.used_characters = {} + self._character_tracker = _backend_pdf_ps.CharacterTracker() self.alphaStates = {} # maps alpha values to graphics state objects self._alpha_state_seq = (Name(f'A{i}') for i in itertools.count(1)) @@ -550,6 +552,11 @@ def __init__(self, filename, metadata=None): 'ProcSet': procsets} self.writeObject(self.resourceObject, resources) + @cbook.deprecated("3.3") + @property + def used_characters(self): + return self.file._character_tracker.used_characters + def newPage(self, width, height): self.endStream() @@ -724,10 +731,9 @@ def writeFonts(self): else: # a normal TrueType font _log.debug('Writing TrueType font.') - realpath, stat_key = cbook.get_realpath_and_stat(filename) - chars = self.used_characters.get(stat_key) - if chars is not None and len(chars[1]): - fonts[Fx] = self.embedTTF(realpath, chars[1]) + chars = self._character_tracker.used.get(filename) + if chars: + fonts[Fx] = self.embedTTF(filename, chars) self.writeObject(self.fontObject, fonts) def _write_afm_font(self, filename): @@ -1675,9 +1681,7 @@ def afm_font_cache(self, _cache=cbook.maxdict(50)): _use_afm_rc_name = "pdf.use14corefonts" def __init__(self, file, image_dpi, height, width): - RendererBase.__init__(self) - self.height = height - self.width = width + super().__init__(width, height) self.file = file self.gc = self.new_gc() self.mathtext_parser = MathTextParser("Pdf") @@ -1713,22 +1717,14 @@ def check_gc(self, gc, fillcolor=None): gc._fillcolor = orig_fill gc._effective_alphas = orig_alphas - def track_characters(self, font, s): + @cbook.deprecated("3.3") + def track_characters(self, *args, **kwargs): """Keeps track of which characters are required from each font.""" - if isinstance(font, str): - fname = font - else: - fname = font.fname - realpath, stat_key = cbook.get_realpath_and_stat(fname) - used_characters = self.file.used_characters.setdefault( - stat_key, (realpath, set())) - used_characters[1].update(map(ord, s)) - - def merge_used_characters(self, other): - for stat_key, (realpath, charset) in other.items(): - used_characters = self.file.used_characters.setdefault( - stat_key, (realpath, set())) - used_characters[1].update(charset) + self.file._character_tracker.track(*args, **kwargs) + + @cbook.deprecated("3.3") + def merge_used_characters(self, *args, **kwargs): + self.file._character_tracker.merge(*args, **kwargs) def get_image_magnification(self): return self.image_dpi/72.0 @@ -1938,7 +1934,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle): # TODO: fix positioning and encoding width, height, descent, glyphs, rects, used_characters = \ self.mathtext_parser.parse(s, 72, prop) - self.merge_used_characters(used_characters) + self.file._character_tracker.merge(used_characters) # When using Type 3 fonts, we can't use character codes higher # than 255, so we use the "Do" command to render those @@ -2101,7 +2097,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None): fonttype = 1 else: font = self._get_font_ttf(prop) - self.track_characters(font, s) + self.file._character_tracker.track(font, s) fonttype = rcParams['pdf.fonttype'] # We can't subset all OpenType fonts, so switch to Type 42 # in that case. diff --git a/lib/matplotlib/backends/backend_ps.py b/lib/matplotlib/backends/backend_ps.py index 0eee04f07cf6..33221e7ccd55 100644 --- a/lib/matplotlib/backends/backend_ps.py +++ b/lib/matplotlib/backends/backend_ps.py @@ -25,8 +25,7 @@ from matplotlib.backend_bases import ( _Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase, RendererBase) -from matplotlib.cbook import (get_realpath_and_stat, is_writable_file_like, - file_requires_unicode) +from matplotlib.cbook import is_writable_file_like, file_requires_unicode from matplotlib.font_manager import is_opentype_cff_font, get_font from matplotlib.ft2font import LOAD_NO_HINTING from matplotlib.ttconv import convert_ttf_to_ps @@ -202,9 +201,7 @@ def __init__(self, width, height, pswriter, imagedpi=72): # Although postscript itself is dpi independent, we need to inform the # image code about a requested dpi to generate high resolution images # and them scale them before embedding them. - RendererBase.__init__(self) - self.width = width - self.height = height + super().__init__(width, height) self._pswriter = pswriter if rcParams['text.usetex']: self.textcnt = 0 @@ -224,21 +221,22 @@ def __init__(self, width, height, pswriter, imagedpi=72): self._clip_paths = {} self._path_collection_id = 0 - self.used_characters = {} + self._character_tracker = _backend_pdf_ps.CharacterTracker() self.mathtext_parser = MathTextParser("PS") - def track_characters(self, font, s): + @cbook.deprecated("3.3") + @property + def used_characters(self): + return self._character_tracker.used_characters + + @cbook.deprecated("3.3") + def track_characters(self, *args, **kwargs): """Keeps track of which characters are required from each font.""" - realpath, stat_key = get_realpath_and_stat(font.fname) - used_characters = self.used_characters.setdefault( - stat_key, (realpath, set())) - used_characters[1].update(map(ord, s)) + self._character_tracker.track(*args, **kwargs) - def merge_used_characters(self, other): - for stat_key, (realpath, charset) in other.items(): - used_characters = self.used_characters.setdefault( - stat_key, (realpath, set())) - used_characters[1].update(charset) + @cbook.deprecated("3.3") + def merge_used_characters(self, *args, **kwargs): + self._character_tracker.merge(*args, **kwargs) def set_color(self, r, g, b, store=1): if (r, g, b) != self.color: @@ -621,7 +619,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None): else: font = self._get_font_ttf(prop) font.set_text(s, 0, flags=LOAD_NO_HINTING) - self.track_characters(font, s) + self._character_tracker.track(font, s) self.set_color(*gc.get_rgb()) ps_name = (font.postscript_name @@ -650,7 +648,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle): width, height, descent, pswriter, used_characters = \ self.mathtext_parser.parse(s, 72, prop) - self.merge_used_characters(used_characters) + self._character_tracker.merge(used_characters) self.set_color(*gc.get_rgb()) thetext = pswriter.getvalue() self._pswriter.write(f"""\ @@ -980,7 +978,7 @@ def print_figure_impl(fh): Ndict = len(psDefs) print("%%BeginProlog", file=fh) if not rcParams['ps.useafm']: - Ndict += len(ps_renderer.used_characters) + Ndict += len(ps_renderer._character_tracker.used) print("/mpldict %d dict def" % Ndict, file=fh) print("mpldict begin", file=fh) for d in psDefs: @@ -988,38 +986,32 @@ def print_figure_impl(fh): for l in d.split('\n'): print(l.strip(), file=fh) if not rcParams['ps.useafm']: - for font_filename, chars in \ - ps_renderer.used_characters.values(): - if len(chars): - font = get_font(font_filename) - glyph_ids = [font.get_char_index(c) for c in chars] - - fonttype = rcParams['ps.fonttype'] - - # Can not use more than 255 characters from a - # single font for Type 3 - if len(glyph_ids) > 255: - fonttype = 42 - - # The ttf to ps (subsetting) support doesn't work for - # OpenType fonts that are Postscript inside (like the - # STIX fonts). This will simply turn that off to avoid - # errors. - if is_opentype_cff_font(font_filename): - raise RuntimeError( - "OpenType CFF fonts can not be saved using " - "the internal Postscript backend at this " - "time; consider using the Cairo backend") - else: - fh.flush() - try: - convert_ttf_to_ps(os.fsencode(font_filename), - fh, fonttype, glyph_ids) - except RuntimeError: - _log.warning("The PostScript backend does not " - "currently support the selected " - "font.") - raise + for font_path, chars \ + in ps_renderer._character_tracker.used.items(): + if not chars: + continue + font = get_font(font_path) + glyph_ids = [font.get_char_index(c) for c in chars] + fonttype = rcParams['ps.fonttype'] + # Can't use more than 255 chars from a single Type 3 font. + if len(glyph_ids) > 255: + fonttype = 42 + # The ttf to ps (subsetting) support doesn't work for + # OpenType fonts that are Postscript inside (like the STIX + # fonts). This will simply turn that off to avoid errors. + if is_opentype_cff_font(font_path): + raise RuntimeError( + "OpenType CFF fonts can not be saved using " + "the internal Postscript backend at this " + "time; consider using the Cairo backend") + fh.flush() + try: + convert_ttf_to_ps(os.fsencode(font_path), + fh, fonttype, glyph_ids) + except RuntimeError: + _log.warning("The PostScript backend does not " + "currently support the selected font.") + raise print("end", file=fh) print("%%EndProlog", file=fh) diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index 1ef2e9e30aea..8b09f2b9d51a 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -524,6 +524,7 @@ def flatten(seq, scalarp=is_scalar_or_string): yield from flatten(item, scalarp) +@deprecated("3.3", alternative="os.path.realpath and os.stat") @functools.lru_cache() def get_realpath_and_stat(path): realpath = os.path.realpath(path) diff --git a/lib/matplotlib/font_manager.py b/lib/matplotlib/font_manager.py index 0767573adc56..ff6ab240941e 100644 --- a/lib/matplotlib/font_manager.py +++ b/lib/matplotlib/font_manager.py @@ -1339,6 +1339,9 @@ def is_opentype_cff_font(filename): def get_font(filename, hinting_factor=None): + # Resolving the path avoids embedding the font twice in pdf/ps output if a + # single font is selected using two different relative paths. + filename = os.path.realpath(filename) if hinting_factor is None: hinting_factor = rcParams['text.hinting_factor'] return _get_font(os.fspath(filename), hinting_factor, diff --git a/lib/matplotlib/mathtext.py b/lib/matplotlib/mathtext.py index bec7e03843a8..719946a4318e 100644 --- a/lib/matplotlib/mathtext.py +++ b/lib/matplotlib/mathtext.py @@ -32,7 +32,6 @@ from matplotlib import cbook, colors as mcolors, rcParams from matplotlib.afm import AFM -from matplotlib.cbook import get_realpath_and_stat from matplotlib.ft2font import FT2Image, KERNING_DEFAULT, LOAD_NO_HINTING from matplotlib.font_manager import findfont, FontProperties, get_font from matplotlib._mathtext_data import (latex_to_bakoma, latex_to_standard, @@ -485,10 +484,7 @@ def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi): - *dpi*: The dpi to draw at. """ info = self._get_info(facename, font_class, sym, fontsize, dpi) - realpath, stat_key = get_realpath_and_stat(info.font.fname) - used_characters = self.used_characters.setdefault( - stat_key, (realpath, set())) - used_characters[1].add(info.num) + self.used_characters.setdefault(info.font.fname, set()).add(info.num) self.mathtext_backend.render_glyph(ox, oy, info) def render_rect_filled(self, x1, y1, x2, y2): From 4a78af537879f3c25f9a6093014e0a20fb877df2 Mon Sep 17 00:00:00 2001 From: Frank Sauerburger Date: Tue, 12 Nov 2019 00:23:32 +0100 Subject: [PATCH 2/3] Use internal font index for XObject names --- lib/matplotlib/backends/backend_pdf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index 2d08b36f8acf..689853da86e9 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -874,9 +874,11 @@ def createType1Descriptor(self, t1font, fontfile): return fontdescObject def _get_xobject_symbol_name(self, filename, symbol_name): - return "%s-%s" % ( + Fx = self.fontName(filename) + return "-".join([ + Fx.name.decode(), os.path.splitext(os.path.basename(filename))[0], - symbol_name) + symbol_name]) _identityToUnicodeCMap = b"""/CIDInit /ProcSet findresource begin 12 dict begin From 6a126a4cb790bf691368ccb35cc7ff883d2fee52 Mon Sep 17 00:00:00 2001 From: Frank Sauerburger Date: Tue, 12 Nov 2019 23:41:26 +0100 Subject: [PATCH 3/3] Resolve linked fonts in fontManager.findfont() The commit ensures that the realpath of linked fonts is used across the library. This prevents embedding linked fonts twice and makes sure that linked fonts are referenced consistently in PDF files. The link resolution cannot happend in addfont() since this would break the *directory* argument of findfont(). --- lib/matplotlib/font_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/font_manager.py b/lib/matplotlib/font_manager.py index ff6ab240941e..d45e5f1fb49e 100644 --- a/lib/matplotlib/font_manager.py +++ b/lib/matplotlib/font_manager.py @@ -1238,9 +1238,10 @@ def findfont(self, prop, fontext='ttf', directory=None, rc_params = tuple(tuple(rcParams[key]) for key in [ "font.serif", "font.sans-serif", "font.cursive", "font.fantasy", "font.monospace"]) - return self._findfont_cached( + filename = self._findfont_cached( prop, fontext, directory, fallback_to_default, rebuild_if_missing, rc_params) + return os.path.realpath(filename) @lru_cache() def _findfont_cached(self, prop, fontext, directory, fallback_to_default,