diff --git a/doc/api/api_changes_3.3/deprecations.rst b/doc/api/api_changes_3.3/deprecations.rst index 70595495a4a0..f80732851d15 100644 --- a/doc/api/api_changes_3.3/deprecations.rst +++ b/doc/api/api_changes_3.3/deprecations.rst @@ -593,3 +593,11 @@ APIs which support the values True, False, and "TeX" for ``ismath``. ``matplotlib.ttconv`` ~~~~~~~~~~~~~~~~~~~~~ This module is deprecated. + +Stricter PDF metadata keys in PGF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Saving metadata in PDF with the PGF backend currently normalizes all keys to +lowercase, unlike the PDF backend, which only accepts the canonical case. This +is deprecated; in a future version, only the canonically cased keys listed in +the PDF specification (and the `~.backend_pgf.PdfPages` documentation) will be +accepted. diff --git a/doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst b/doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst new file mode 100644 index 000000000000..465139776c3b --- /dev/null +++ b/doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst @@ -0,0 +1,8 @@ +Saving PDF metadata via PGF now consistent with PDF backend +----------------------------------------------------------- + +When saving PDF files using the PGF backend, passed metadata will be +interpreted in the same way as with the PDF backend. Previously, this metadata +was only accepted by the PGF backend when saving a multi-page PDF with +`.backend_pgf.PdfPages`, but is now allowed when saving a single figure, as +well. diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index b03bd794a8c7..5ebb5a969225 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -135,6 +135,110 @@ def _string_escape(match): assert False +def _create_pdf_info_dict(backend, metadata): + """ + Create a PDF infoDict based on user-supplied metadata. + + A default ``Creator``, ``Producer``, and ``CreationDate`` are added, though + the user metadata may override it. The date may be the current time, or a + time set by the ``SOURCE_DATE_EPOCH`` environment variable. + + Metadata is verified to have the correct keys and their expected types. Any + unknown keys/types will raise a warning. + + Parameters + ---------- + backend : str + The name of the backend to use in the Producer value. + metadata : Dict[str, Union[str, datetime, Name]] + A dictionary of metadata supplied by the user with information + following the PDF specification, also defined in + `~.backend_pdf.PdfPages` below. + + If any value is *None*, then the key will be removed. This can be used + to remove any pre-defined values. + + Returns + ------- + Dict[str, Union[str, datetime, Name]] + A validated dictionary of metadata. + """ + + # get source date from SOURCE_DATE_EPOCH, if set + # See https://reproducible-builds.org/specs/source-date-epoch/ + source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") + if source_date_epoch: + source_date = datetime.utcfromtimestamp(int(source_date_epoch)) + source_date = source_date.replace(tzinfo=UTC) + else: + source_date = datetime.today() + + info = { + 'Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + 'Producer': f'Matplotlib {backend} backend v{mpl.__version__}', + 'CreationDate': source_date, + **metadata + } + info = {k: v for (k, v) in info.items() if v is not None} + + def is_string_like(x): + return isinstance(x, str) + + def is_date(x): + return isinstance(x, datetime) + + def check_trapped(x): + if isinstance(x, Name): + return x.name in (b'True', b'False', b'Unknown') + else: + return x in ('True', 'False', 'Unknown') + + keywords = { + 'Title': is_string_like, + 'Author': is_string_like, + 'Subject': is_string_like, + 'Keywords': is_string_like, + 'Creator': is_string_like, + 'Producer': is_string_like, + 'CreationDate': is_date, + 'ModDate': is_date, + 'Trapped': check_trapped, + } + for k in info: + if k not in keywords: + cbook._warn_external(f'Unknown infodict keyword: {k}') + elif not keywords[k](info[k]): + cbook._warn_external(f'Bad value for infodict keyword {k}') + if 'Trapped' in info: + info['Trapped'] = Name(info['Trapped']) + + return info + + +def _datetime_to_pdf(d): + """ + Convert a datetime to a PDF string representing it. + + Used for PDF and PGF. + """ + r = d.strftime('D:%Y%m%d%H%M%S') + z = d.utcoffset() + if z is not None: + z = z.seconds + else: + if time.daylight: + z = time.altzone + else: + z = time.timezone + if z == 0: + r += 'Z' + elif z < 0: + r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600) + else: + r += "-%02d'%02d'" % (z // 3600, z % 3600) + return r + + def pdfRepr(obj): """Map Python objects to PDF syntax.""" @@ -199,22 +303,7 @@ def pdfRepr(obj): # A date. elif isinstance(obj, datetime): - r = obj.strftime('D:%Y%m%d%H%M%S') - z = obj.utcoffset() - if z is not None: - z = z.seconds - else: - if time.daylight: - z = time.altzone - else: - z = time.timezone - if z == 0: - r += 'Z' - elif z < 0: - r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600) - else: - r += "-%02d'%02d'" % (z // 3600, z % 3600) - return pdfRepr(r) + return pdfRepr(_datetime_to_pdf(obj)) # A bounding box elif isinstance(obj, BboxBase): @@ -503,24 +592,7 @@ def __init__(self, filename, metadata=None): 'Pages': self.pagesObject} self.writeObject(self.rootObject, root) - # get source date from SOURCE_DATE_EPOCH, if set - # See https://reproducible-builds.org/specs/source-date-epoch/ - source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") - if source_date_epoch: - source_date = datetime.utcfromtimestamp(int(source_date_epoch)) - source_date = source_date.replace(tzinfo=UTC) - else: - source_date = datetime.today() - - self.infoDict = { - 'Creator': f'matplotlib {mpl.__version__}, http://matplotlib.org', - 'Producer': f'matplotlib pdf backend {mpl.__version__}', - 'CreationDate': source_date - } - if metadata is not None: - self.infoDict.update(metadata) - self.infoDict = {k: v for (k, v) in self.infoDict.items() - if v is not None} + self.infoDict = _create_pdf_info_dict('pdf', metadata or {}) self.fontNames = {} # maps filenames to internal font names self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1)) @@ -1640,32 +1712,6 @@ def writeXref(self): def writeInfoDict(self): """Write out the info dictionary, checking it for good form""" - def is_string_like(x): - return isinstance(x, str) - - def is_date(x): - return isinstance(x, datetime) - - check_trapped = (lambda x: isinstance(x, Name) and - x.name in ('True', 'False', 'Unknown')) - - keywords = {'Title': is_string_like, - 'Author': is_string_like, - 'Subject': is_string_like, - 'Keywords': is_string_like, - 'Creator': is_string_like, - 'Producer': is_string_like, - 'CreationDate': is_date, - 'ModDate': is_date, - 'Trapped': check_trapped} - for k in self.infoDict: - if k not in keywords: - cbook._warn_external('Unknown infodict keyword: %s' % k) - else: - if not keywords[k](self.infoDict[k]): - cbook._warn_external( - 'Bad value for infodict keyword %s' % k) - self.infoObject = self.reserveObject('info') self.writeObject(self.infoObject, self.infoDict) diff --git a/lib/matplotlib/backends/backend_pgf.py b/lib/matplotlib/backends/backend_pgf.py index d68c743b7053..a0f0eb921a24 100644 --- a/lib/matplotlib/backends/backend_pgf.py +++ b/lib/matplotlib/backends/backend_pgf.py @@ -1,5 +1,6 @@ import atexit import codecs +import datetime import functools import logging import math @@ -20,6 +21,8 @@ _Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase, RendererBase) from matplotlib.backends.backend_mixed import MixedModeRenderer +from matplotlib.backends.backend_pdf import ( + _create_pdf_info_dict, _datetime_to_pdf) from matplotlib.path import Path from matplotlib.figure import Figure from matplotlib._pylab_helpers import Gcf @@ -157,6 +160,17 @@ def _font_properties_str(prop): return "".join(commands) +def _metadata_to_str(key, value): + """Convert metadata key/value to a form that hyperref accepts.""" + if isinstance(value, datetime.datetime): + value = _datetime_to_pdf(value) + elif key == 'Trapped': + value = value.name.decode('ascii') + else: + value = str(value) + return f'{key}={{{value}}}' + + def make_pdf_to_png_converter(): """Return a function that converts a pdf file to a png file.""" if shutil.which("pdftocairo"): @@ -867,9 +881,13 @@ def print_pgf(self, fname_or_fh, *args, **kwargs): file = codecs.getwriter("utf-8")(file) self._print_pgf_to_fh(file, *args, **kwargs) - def _print_pdf_to_fh(self, fh, *args, **kwargs): + def _print_pdf_to_fh(self, fh, *args, metadata=None, **kwargs): w, h = self.figure.get_figwidth(), self.figure.get_figheight() + info_dict = _create_pdf_info_dict('pgf', metadata or {}) + hyperref_options = ','.join( + _metadata_to_str(k, v) for k, v in info_dict.items()) + try: # create temporary directory for compiling the figure tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_") @@ -883,6 +901,8 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs): latex_preamble = get_preamble() latex_fontspec = get_fontspec() latexcode = """ +\\PassOptionsToPackage{pdfinfo={%s}}{hyperref} +\\RequirePackage{hyperref} \\documentclass[12pt]{minimal} \\usepackage[paperwidth=%fin, paperheight=%fin, margin=0in]{geometry} %s @@ -892,7 +912,7 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs): \\begin{document} \\centering \\input{figure.pgf} -\\end{document}""" % (w, h, latex_preamble, latex_fontspec) +\\end{document}""" % (hyperref_options, w, h, latex_preamble, latex_fontspec) pathlib.Path(fname_tex).write_text(latexcode, encoding="utf-8") texcommand = mpl.rcParams["pgf.texsystem"] @@ -989,7 +1009,8 @@ class PdfPages: '_fname_pdf', '_n_figures', '_file', - 'metadata', + '_info_dict', + '_metadata', ) def __init__(self, filename, *, keep_empty=True, metadata=None): @@ -1017,7 +1038,21 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): self._outputfile = filename self._n_figures = 0 self.keep_empty = keep_empty - self.metadata = metadata or {} + self._metadata = (metadata or {}).copy() + if metadata: + for key in metadata: + canonical = { + 'creationdate': 'CreationDate', + 'moddate': 'ModDate', + }.get(key.lower(), key.lower().title()) + if canonical != key: + cbook.warn_deprecated( + '3.3', message='Support for setting PDF metadata keys ' + 'case-insensitively is deprecated since %(since)s and ' + 'will be removed %(removal)s; ' + f'set {canonical} instead of {key}.') + self._metadata[canonical] = self._metadata.pop(key) + self._info_dict = _create_pdf_info_dict('pgf', self._metadata) # create temporary directory for compiling the figure self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_") @@ -1026,29 +1061,21 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): self._fname_pdf = os.path.join(self._tmpdir, self._basename + ".pdf") self._file = open(self._fname_tex, 'wb') + @cbook.deprecated('3.3') + @property + def metadata(self): + return self._metadata + def _write_header(self, width_inches, height_inches): - supported_keys = { - 'title', 'author', 'subject', 'keywords', 'creator', - 'producer', 'trapped' - } - infoDict = { - 'creator': f'matplotlib {mpl.__version__}, https://matplotlib.org', - 'producer': f'matplotlib pgf backend {mpl.__version__}', - } - metadata = {k.lower(): v for k, v in self.metadata.items()} - infoDict.update(metadata) - hyperref_options = '' - for k, v in infoDict.items(): - if k not in supported_keys: - raise ValueError( - 'Not a supported pdf metadata field: "{}"'.format(k) - ) - hyperref_options += 'pdf' + k + '={' + str(v) + '},' + hyperref_options = ','.join( + _metadata_to_str(k, v) for k, v in self._info_dict.items()) latex_preamble = get_preamble() latex_fontspec = get_fontspec() latex_header = r"""\PassOptionsToPackage{{ - {metadata} + pdfinfo={{ + {metadata} + }} }}{{hyperref}} \RequirePackage{{hyperref}} \documentclass[12pt]{{minimal}} diff --git a/lib/matplotlib/tests/test_backend_pdf.py b/lib/matplotlib/tests/test_backend_pdf.py index 92fc22bdc7e3..4e125992138a 100644 --- a/lib/matplotlib/tests/test_backend_pdf.py +++ b/lib/matplotlib/tests/test_backend_pdf.py @@ -1,3 +1,4 @@ +import datetime import io import os from pathlib import Path @@ -7,6 +8,7 @@ import numpy as np import pytest +import matplotlib as mpl from matplotlib import dviread, pyplot as plt, checkdep_usetex, rcParams from matplotlib.backends.backend_pdf import PdfPages from matplotlib.testing.compare import compare_images @@ -125,6 +127,78 @@ def test_composite_image(): assert len(pdf._file._images) == 2 +def test_savefig_metadata(monkeypatch): + pikepdf = pytest.importorskip('pikepdf') + monkeypatch.setenv('SOURCE_DATE_EPOCH', '0') + + fig, ax = plt.subplots() + ax.plot(range(5)) + + md = { + 'Author': 'me', + 'Title': 'Multipage PDF', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'True' + } + buf = io.BytesIO() + fig.savefig(buf, metadata=md, format='pdf') + + with pikepdf.Pdf.open(buf) as pdf: + info = {k: str(v) for k, v in pdf.docinfo.items()} + + assert info == { + '/Author': 'me', + '/CreationDate': 'D:19700101000000Z', + '/Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + '/Keywords': 'test,pdf,multipage', + '/ModDate': 'D:19680801000000Z', + '/Producer': f'Matplotlib pdf backend v{mpl.__version__}', + '/Subject': 'Test page', + '/Title': 'Multipage PDF', + '/Trapped': '/True', + } + + +def test_multipage_metadata(monkeypatch): + pikepdf = pytest.importorskip('pikepdf') + monkeypatch.setenv('SOURCE_DATE_EPOCH', '0') + + fig, ax = plt.subplots() + ax.plot(range(5)) + + md = { + 'Author': 'me', + 'Title': 'Multipage PDF', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'True' + } + buf = io.BytesIO() + with PdfPages(buf, metadata=md) as pdf: + pdf.savefig(fig) + pdf.savefig(fig) + + with pikepdf.Pdf.open(buf) as pdf: + info = {k: str(v) for k, v in pdf.docinfo.items()} + + assert info == { + '/Author': 'me', + '/CreationDate': 'D:19700101000000Z', + '/Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + '/Keywords': 'test,pdf,multipage', + '/ModDate': 'D:19680801000000Z', + '/Producer': f'Matplotlib pdf backend v{mpl.__version__}', + '/Subject': 'Test page', + '/Title': 'Multipage PDF', + '/Trapped': '/True', + } + + def test_pdfpages_fspath(): with PdfPages(Path(os.devnull)) as pdf: pdf.savefig(plt.figure()) diff --git a/lib/matplotlib/tests/test_backend_pgf.py b/lib/matplotlib/tests/test_backend_pgf.py index ba4d2877eec5..780435413a0b 100644 --- a/lib/matplotlib/tests/test_backend_pgf.py +++ b/lib/matplotlib/tests/test_backend_pgf.py @@ -1,3 +1,4 @@ +import datetime from io import BytesIO import os from pathlib import Path @@ -213,82 +214,99 @@ def test_bbox_inches(): tol=0) -@needs_pdflatex @pytest.mark.style('default') @pytest.mark.backend('pgf') -def test_pdf_pages(): +@pytest.mark.parametrize('system', [ + pytest.param('lualatex', marks=[needs_lualatex]), + pytest.param('pdflatex', marks=[needs_pdflatex]), + pytest.param('xelatex', marks=[needs_xelatex]), +]) +def test_pdf_pages(system): rc_pdflatex = { 'font.family': 'serif', 'pgf.rcfonts': False, - 'pgf.texsystem': 'pdflatex', + 'pgf.texsystem': system, } mpl.rcParams.update(rc_pdflatex) - fig1 = plt.figure() - ax1 = fig1.add_subplot(1, 1, 1) + fig1, ax1 = plt.subplots() ax1.plot(range(5)) fig1.tight_layout() - fig2 = plt.figure(figsize=(3, 2)) - ax2 = fig2.add_subplot(1, 1, 1) + fig2, ax2 = plt.subplots(figsize=(3, 2)) ax2.plot(range(5)) fig2.tight_layout() - with PdfPages(os.path.join(result_dir, 'pdfpages.pdf')) as pdf: - pdf.savefig(fig1) - pdf.savefig(fig2) - - -@needs_xelatex -@pytest.mark.style('default') -@pytest.mark.backend('pgf') -def test_pdf_pages_metadata(): - rc_pdflatex = { - 'font.family': 'serif', - 'pgf.rcfonts': False, - 'pgf.texsystem': 'xelatex', + path = os.path.join(result_dir, f'pdfpages_{system}.pdf') + md = { + 'Author': 'me', + 'Title': 'Multipage PDF with pgf', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'Unknown' } - mpl.rcParams.update(rc_pdflatex) - - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) - ax.plot(range(5)) - fig.tight_layout() - - md = {'author': 'me', 'title': 'Multipage PDF with pgf'} - path = os.path.join(result_dir, 'pdfpages_meta.pdf') with PdfPages(path, metadata=md) as pdf: - pdf.savefig(fig) - pdf.savefig(fig) - pdf.savefig(fig) + pdf.savefig(fig1) + pdf.savefig(fig2) + pdf.savefig(fig1) assert pdf.get_pagecount() == 3 -@needs_lualatex @pytest.mark.style('default') @pytest.mark.backend('pgf') -def test_pdf_pages_lualatex(): - rc_pdflatex = { - 'font.family': 'serif', - 'pgf.rcfonts': False, - 'pgf.texsystem': 'lualatex' - } - mpl.rcParams.update(rc_pdflatex) +@pytest.mark.parametrize('system', [ + pytest.param('lualatex', marks=[needs_lualatex]), + pytest.param('pdflatex', marks=[needs_pdflatex]), + pytest.param('xelatex', marks=[needs_xelatex]), +]) +def test_pdf_pages_metadata_check(monkeypatch, system): + # Basically the same as test_pdf_pages, but we keep it separate to leave + # pikepdf as an optional dependency. + pikepdf = pytest.importorskip('pikepdf') + monkeypatch.setenv('SOURCE_DATE_EPOCH', '0') - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) + mpl.rcParams.update({'pgf.texsystem': system}) + + fig, ax = plt.subplots() ax.plot(range(5)) - fig.tight_layout() - md = {'author': 'me', 'title': 'Multipage PDF with pgf'} - path = os.path.join(result_dir, 'pdfpages_lua.pdf') + md = { + 'Author': 'me', + 'Title': 'Multipage PDF with pgf', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'True' + } + path = os.path.join(result_dir, f'pdfpages_meta_check_{system}.pdf') with PdfPages(path, metadata=md) as pdf: pdf.savefig(fig) - pdf.savefig(fig) - assert pdf.get_pagecount() == 2 + with pikepdf.Pdf.open(path) as pdf: + info = {k: str(v) for k, v in pdf.docinfo.items()} + + # Not set by us, so don't bother checking. + if '/PTEX.FullBanner' in info: + del info['/PTEX.FullBanner'] + if '/PTEX.Fullbanner' in info: + del info['/PTEX.Fullbanner'] + + assert info == { + '/Author': 'me', + '/CreationDate': 'D:19700101000000Z', + '/Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + '/Keywords': 'test,pdf,multipage', + '/ModDate': 'D:19680801000000Z', + '/Producer': f'Matplotlib pgf backend v{mpl.__version__}', + '/Subject': 'Test page', + '/Title': 'Multipage PDF with pgf', + '/Trapped': '/True', + } @needs_xelatex diff --git a/requirements/testing/travis_extra.txt b/requirements/testing/travis_extra.txt index 19b6eb279272..19b774b382f7 100644 --- a/requirements/testing/travis_extra.txt +++ b/requirements/testing/travis_extra.txt @@ -4,4 +4,5 @@ ipykernel nbconvert[execute] nbformat!=5.0.0,!=5.0.1 pandas!=0.25.0 +pikepdf pytz