From c4613f8597373156b226e08b0c28f99599b6de4c Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 23 Apr 2020 20:01:59 -0400 Subject: [PATCH 1/7] pgf: Pass PDF metadata via pdfinfo option. This takes keys that match the PDF specification instead of lower-case, 'pdf'-prefixed keys, so is a bit simpler. Also, deprecate ability to accept key case-insensitively, as that is not done in the PDF backend. --- doc/api/api_changes_3.3/deprecations.rst | 8 ++++++ lib/matplotlib/backends/backend_pgf.py | 32 +++++++++++++++++------- lib/matplotlib/tests/test_backend_pgf.py | 4 +-- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/doc/api/api_changes_3.3/deprecations.rst b/doc/api/api_changes_3.3/deprecations.rst index 70595495a4a0..f80732851d15 100644 --- a/doc/api/api_changes_3.3/deprecations.rst +++ b/doc/api/api_changes_3.3/deprecations.rst @@ -593,3 +593,11 @@ APIs which support the values True, False, and "TeX" for ``ismath``. ``matplotlib.ttconv`` ~~~~~~~~~~~~~~~~~~~~~ This module is deprecated. + +Stricter PDF metadata keys in PGF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Saving metadata in PDF with the PGF backend currently normalizes all keys to +lowercase, unlike the PDF backend, which only accepts the canonical case. This +is deprecated; in a future version, only the canonically cased keys listed in +the PDF specification (and the `~.backend_pgf.PdfPages` documentation) will be +accepted. diff --git a/lib/matplotlib/backends/backend_pgf.py b/lib/matplotlib/backends/backend_pgf.py index d68c743b7053..b60950ce9551 100644 --- a/lib/matplotlib/backends/backend_pgf.py +++ b/lib/matplotlib/backends/backend_pgf.py @@ -1017,7 +1017,20 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): self._outputfile = filename self._n_figures = 0 self.keep_empty = keep_empty - self.metadata = metadata or {} + self.metadata = (metadata or {}).copy() + if metadata: + for key in metadata: + canonical = { + 'creationdate': 'CreationDate', + 'moddate': 'ModDate', + }.get(key.lower(), key.lower().title()) + if canonical != key: + cbook.warn_deprecated( + '3.3', message='Support for setting PDF metadata keys ' + 'case-insensitively is deprecated since %(since)s and ' + 'will be removed %(removal)s; ' + f'set {canonical} instead of {key}.') + self.metadata[canonical] = self.metadata.pop(key) # create temporary directory for compiling the figure self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_") @@ -1028,27 +1041,28 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): def _write_header(self, width_inches, height_inches): supported_keys = { - 'title', 'author', 'subject', 'keywords', 'creator', - 'producer', 'trapped' + 'Title', 'Author', 'Subject', 'Keywords', 'Creator', + 'Producer', 'Trapped' } infoDict = { - 'creator': f'matplotlib {mpl.__version__}, https://matplotlib.org', - 'producer': f'matplotlib pgf backend {mpl.__version__}', + 'Creator': f'matplotlib {mpl.__version__}, https://matplotlib.org', + 'Producer': f'matplotlib pgf backend {mpl.__version__}', + **self.metadata } - metadata = {k.lower(): v for k, v in self.metadata.items()} - infoDict.update(metadata) hyperref_options = '' for k, v in infoDict.items(): if k not in supported_keys: raise ValueError( 'Not a supported pdf metadata field: "{}"'.format(k) ) - hyperref_options += 'pdf' + k + '={' + str(v) + '},' + hyperref_options += k + '={' + str(v) + '},' latex_preamble = get_preamble() latex_fontspec = get_fontspec() latex_header = r"""\PassOptionsToPackage{{ - {metadata} + pdfinfo={{ + {metadata} + }} }}{{hyperref}} \RequirePackage{{hyperref}} \documentclass[12pt]{{minimal}} diff --git a/lib/matplotlib/tests/test_backend_pgf.py b/lib/matplotlib/tests/test_backend_pgf.py index ba4d2877eec5..ab790299bf76 100644 --- a/lib/matplotlib/tests/test_backend_pgf.py +++ b/lib/matplotlib/tests/test_backend_pgf.py @@ -255,7 +255,7 @@ def test_pdf_pages_metadata(): ax.plot(range(5)) fig.tight_layout() - md = {'author': 'me', 'title': 'Multipage PDF with pgf'} + md = {'Author': 'me', 'Title': 'Multipage PDF with pgf'} path = os.path.join(result_dir, 'pdfpages_meta.pdf') with PdfPages(path, metadata=md) as pdf: @@ -282,7 +282,7 @@ def test_pdf_pages_lualatex(): ax.plot(range(5)) fig.tight_layout() - md = {'author': 'me', 'title': 'Multipage PDF with pgf'} + md = {'Author': 'me', 'Title': 'Multipage PDF with pgf'} path = os.path.join(result_dir, 'pdfpages_lua.pdf') with PdfPages(path, metadata=md) as pdf: pdf.savefig(fig) From 1127bbdc9c9da9b877a8106201c79fd704b7b8e3 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 23 Apr 2020 23:35:44 -0400 Subject: [PATCH 2/7] pdf/pgf: Move PDF metadata checks to a common function. This ensures that the same default values are produced, and more importantly ensures that the same keys are accepted. Previously, PGF only allowed a subset of keys. --- lib/matplotlib/backends/backend_pdf.py | 161 ++++++++++++++--------- lib/matplotlib/backends/backend_pgf.py | 45 ++++--- lib/matplotlib/tests/test_backend_pgf.py | 10 +- 3 files changed, 136 insertions(+), 80 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index b03bd794a8c7..c63a51d18c1a 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -135,6 +135,105 @@ def _string_escape(match): assert False +def _create_pdf_info_dict(backend, metadata): + """ + Create a PDF infoDict based on user-supplied metadata. + + A default ``Creator``, ``Producer``, and ``CreationDate`` are added, though + the user metadata may override it. The date may be the current time, or a + time set by the ``SOURCE_DATE_EPOCH`` environment variable. + + Metadata is verified to have the correct keys and their expected types. Any + unknown keys/types will raise a warning. + + Parameters + ---------- + backend : str + The name of the backend to use in the Producer value. + metadata : Dict[str, Union[str, datetime, Name]] + A dictionary of metadata supplied by the user with information + following the PDF specification, also defined in + `~.backend_pdf.PdfPages` below. + + If any value is *None*, then the key will be removed. This can be used + to remove any pre-defined values. + + Returns + ------- + Dict[str, Union[str, datetime, Name]] + A validated dictionary of metadata. + """ + + # get source date from SOURCE_DATE_EPOCH, if set + # See https://reproducible-builds.org/specs/source-date-epoch/ + source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") + if source_date_epoch: + source_date = datetime.utcfromtimestamp(int(source_date_epoch)) + source_date = source_date.replace(tzinfo=UTC) + else: + source_date = datetime.today() + + info = { + 'Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + 'Producer': f'Matplotlib {backend} backend v{mpl.__version__}', + 'CreationDate': source_date, + **metadata + } + info = {k: v for (k, v) in info.items() if v is not None} + + def is_string_like(x): + return isinstance(x, str) + + def is_date(x): + return isinstance(x, datetime) + + check_trapped = (lambda x: isinstance(x, Name) and + x.name in (b'True', b'False', b'Unknown')) + + keywords = { + 'Title': is_string_like, + 'Author': is_string_like, + 'Subject': is_string_like, + 'Keywords': is_string_like, + 'Creator': is_string_like, + 'Producer': is_string_like, + 'CreationDate': is_date, + 'ModDate': is_date, + 'Trapped': check_trapped, + } + for k in info: + if k not in keywords: + cbook._warn_external(f'Unknown infodict keyword: {k}') + elif not keywords[k](info[k]): + cbook._warn_external(f'Bad value for infodict keyword {k}') + + return info + + +def _datetime_to_pdf(d): + """ + Convert a datetime to a PDF string representing it. + + Used for PDF and PGF. + """ + r = d.strftime('D:%Y%m%d%H%M%S') + z = d.utcoffset() + if z is not None: + z = z.seconds + else: + if time.daylight: + z = time.altzone + else: + z = time.timezone + if z == 0: + r += 'Z' + elif z < 0: + r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600) + else: + r += "-%02d'%02d'" % (z // 3600, z % 3600) + return r + + def pdfRepr(obj): """Map Python objects to PDF syntax.""" @@ -199,22 +298,7 @@ def pdfRepr(obj): # A date. elif isinstance(obj, datetime): - r = obj.strftime('D:%Y%m%d%H%M%S') - z = obj.utcoffset() - if z is not None: - z = z.seconds - else: - if time.daylight: - z = time.altzone - else: - z = time.timezone - if z == 0: - r += 'Z' - elif z < 0: - r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600) - else: - r += "-%02d'%02d'" % (z // 3600, z % 3600) - return pdfRepr(r) + return pdfRepr(_datetime_to_pdf(obj)) # A bounding box elif isinstance(obj, BboxBase): @@ -503,24 +587,7 @@ def __init__(self, filename, metadata=None): 'Pages': self.pagesObject} self.writeObject(self.rootObject, root) - # get source date from SOURCE_DATE_EPOCH, if set - # See https://reproducible-builds.org/specs/source-date-epoch/ - source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") - if source_date_epoch: - source_date = datetime.utcfromtimestamp(int(source_date_epoch)) - source_date = source_date.replace(tzinfo=UTC) - else: - source_date = datetime.today() - - self.infoDict = { - 'Creator': f'matplotlib {mpl.__version__}, http://matplotlib.org', - 'Producer': f'matplotlib pdf backend {mpl.__version__}', - 'CreationDate': source_date - } - if metadata is not None: - self.infoDict.update(metadata) - self.infoDict = {k: v for (k, v) in self.infoDict.items() - if v is not None} + self.infoDict = _create_pdf_info_dict('pdf', metadata or {}) self.fontNames = {} # maps filenames to internal font names self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1)) @@ -1640,32 +1707,6 @@ def writeXref(self): def writeInfoDict(self): """Write out the info dictionary, checking it for good form""" - def is_string_like(x): - return isinstance(x, str) - - def is_date(x): - return isinstance(x, datetime) - - check_trapped = (lambda x: isinstance(x, Name) and - x.name in ('True', 'False', 'Unknown')) - - keywords = {'Title': is_string_like, - 'Author': is_string_like, - 'Subject': is_string_like, - 'Keywords': is_string_like, - 'Creator': is_string_like, - 'Producer': is_string_like, - 'CreationDate': is_date, - 'ModDate': is_date, - 'Trapped': check_trapped} - for k in self.infoDict: - if k not in keywords: - cbook._warn_external('Unknown infodict keyword: %s' % k) - else: - if not keywords[k](self.infoDict[k]): - cbook._warn_external( - 'Bad value for infodict keyword %s' % k) - self.infoObject = self.reserveObject('info') self.writeObject(self.infoObject, self.infoDict) diff --git a/lib/matplotlib/backends/backend_pgf.py b/lib/matplotlib/backends/backend_pgf.py index b60950ce9551..aa5cee056250 100644 --- a/lib/matplotlib/backends/backend_pgf.py +++ b/lib/matplotlib/backends/backend_pgf.py @@ -1,5 +1,6 @@ import atexit import codecs +import datetime import functools import logging import math @@ -20,6 +21,8 @@ _Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase, RendererBase) from matplotlib.backends.backend_mixed import MixedModeRenderer +from matplotlib.backends.backend_pdf import ( + _create_pdf_info_dict, _datetime_to_pdf) from matplotlib.path import Path from matplotlib.figure import Figure from matplotlib._pylab_helpers import Gcf @@ -157,6 +160,17 @@ def _font_properties_str(prop): return "".join(commands) +def _metadata_to_str(key, value): + """Convert metadata key/value to a form that hyperref accepts.""" + if isinstance(value, datetime.datetime): + value = _datetime_to_pdf(value) + elif key == 'Trapped': + value = value.name.decode('ascii') + else: + value = str(value) + return f'{key}={{{value}}}' + + def make_pdf_to_png_converter(): """Return a function that converts a pdf file to a png file.""" if shutil.which("pdftocairo"): @@ -989,7 +1003,8 @@ class PdfPages: '_fname_pdf', '_n_figures', '_file', - 'metadata', + '_info_dict', + '_metadata', ) def __init__(self, filename, *, keep_empty=True, metadata=None): @@ -1017,7 +1032,7 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): self._outputfile = filename self._n_figures = 0 self.keep_empty = keep_empty - self.metadata = (metadata or {}).copy() + self._metadata = (metadata or {}).copy() if metadata: for key in metadata: canonical = { @@ -1030,7 +1045,8 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): 'case-insensitively is deprecated since %(since)s and ' 'will be removed %(removal)s; ' f'set {canonical} instead of {key}.') - self.metadata[canonical] = self.metadata.pop(key) + self._metadata[canonical] = self._metadata.pop(key) + self._info_dict = _create_pdf_info_dict('pgf', self._metadata) # create temporary directory for compiling the figure self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_") @@ -1039,23 +1055,14 @@ def __init__(self, filename, *, keep_empty=True, metadata=None): self._fname_pdf = os.path.join(self._tmpdir, self._basename + ".pdf") self._file = open(self._fname_tex, 'wb') + @cbook.deprecated('3.3') + @property + def metadata(self): + return self._metadata + def _write_header(self, width_inches, height_inches): - supported_keys = { - 'Title', 'Author', 'Subject', 'Keywords', 'Creator', - 'Producer', 'Trapped' - } - infoDict = { - 'Creator': f'matplotlib {mpl.__version__}, https://matplotlib.org', - 'Producer': f'matplotlib pgf backend {mpl.__version__}', - **self.metadata - } - hyperref_options = '' - for k, v in infoDict.items(): - if k not in supported_keys: - raise ValueError( - 'Not a supported pdf metadata field: "{}"'.format(k) - ) - hyperref_options += k + '={' + str(v) + '},' + hyperref_options = ','.join( + _metadata_to_str(k, v) for k, v in self._info_dict.items()) latex_preamble = get_preamble() latex_fontspec = get_fontspec() diff --git a/lib/matplotlib/tests/test_backend_pgf.py b/lib/matplotlib/tests/test_backend_pgf.py index ab790299bf76..15acc43b7747 100644 --- a/lib/matplotlib/tests/test_backend_pgf.py +++ b/lib/matplotlib/tests/test_backend_pgf.py @@ -1,3 +1,4 @@ +from datetime import datetime from io import BytesIO import os from pathlib import Path @@ -255,7 +256,14 @@ def test_pdf_pages_metadata(): ax.plot(range(5)) fig.tight_layout() - md = {'Author': 'me', 'Title': 'Multipage PDF with pgf'} + md = { + 'Author': 'me', + 'Title': 'Multipage PDF with pgf', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime(1968, 8, 1), + } + path = os.path.join(result_dir, 'pdfpages_meta.pdf') with PdfPages(path, metadata=md) as pdf: From 31e462b2924f0077c4f9ea7ae33d2f77325c58e4 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Fri, 24 Apr 2020 00:03:30 -0400 Subject: [PATCH 3/7] pdf: Don't require Trapped to be a Name instance. It's something internal to the PDF backend, and there's no need to require the metadata to use that type instead of a plain string. --- lib/matplotlib/backends/backend_pdf.py | 9 +++++++-- lib/matplotlib/tests/test_backend_pgf.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index c63a51d18c1a..5ebb5a969225 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -187,8 +187,11 @@ def is_string_like(x): def is_date(x): return isinstance(x, datetime) - check_trapped = (lambda x: isinstance(x, Name) and - x.name in (b'True', b'False', b'Unknown')) + def check_trapped(x): + if isinstance(x, Name): + return x.name in (b'True', b'False', b'Unknown') + else: + return x in ('True', 'False', 'Unknown') keywords = { 'Title': is_string_like, @@ -206,6 +209,8 @@ def is_date(x): cbook._warn_external(f'Unknown infodict keyword: {k}') elif not keywords[k](info[k]): cbook._warn_external(f'Bad value for infodict keyword {k}') + if 'Trapped' in info: + info['Trapped'] = Name(info['Trapped']) return info diff --git a/lib/matplotlib/tests/test_backend_pgf.py b/lib/matplotlib/tests/test_backend_pgf.py index 15acc43b7747..3bb7318fd63e 100644 --- a/lib/matplotlib/tests/test_backend_pgf.py +++ b/lib/matplotlib/tests/test_backend_pgf.py @@ -262,6 +262,7 @@ def test_pdf_pages_metadata(): 'Subject': 'Test page', 'Keywords': 'test,pdf,multipage', 'ModDate': datetime(1968, 8, 1), + 'Trapped': 'Unknown' } path = os.path.join(result_dir, 'pdfpages_meta.pdf') From 65ac78fdf3d95ff427741c47f4587d3b743d97c6 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Fri, 24 Apr 2020 00:21:40 -0400 Subject: [PATCH 4/7] Implement PDF metadata on PGF backend. This is supported through the multipage PdfPages, but not direct output of a single figure via PGF. --- lib/matplotlib/backends/backend_pgf.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/backends/backend_pgf.py b/lib/matplotlib/backends/backend_pgf.py index aa5cee056250..a0f0eb921a24 100644 --- a/lib/matplotlib/backends/backend_pgf.py +++ b/lib/matplotlib/backends/backend_pgf.py @@ -881,9 +881,13 @@ def print_pgf(self, fname_or_fh, *args, **kwargs): file = codecs.getwriter("utf-8")(file) self._print_pgf_to_fh(file, *args, **kwargs) - def _print_pdf_to_fh(self, fh, *args, **kwargs): + def _print_pdf_to_fh(self, fh, *args, metadata=None, **kwargs): w, h = self.figure.get_figwidth(), self.figure.get_figheight() + info_dict = _create_pdf_info_dict('pgf', metadata or {}) + hyperref_options = ','.join( + _metadata_to_str(k, v) for k, v in info_dict.items()) + try: # create temporary directory for compiling the figure tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_") @@ -897,6 +901,8 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs): latex_preamble = get_preamble() latex_fontspec = get_fontspec() latexcode = """ +\\PassOptionsToPackage{pdfinfo={%s}}{hyperref} +\\RequirePackage{hyperref} \\documentclass[12pt]{minimal} \\usepackage[paperwidth=%fin, paperheight=%fin, margin=0in]{geometry} %s @@ -906,7 +912,7 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs): \\begin{document} \\centering \\input{figure.pgf} -\\end{document}""" % (w, h, latex_preamble, latex_fontspec) +\\end{document}""" % (hyperref_options, w, h, latex_preamble, latex_fontspec) pathlib.Path(fname_tex).write_text(latexcode, encoding="utf-8") texcommand = mpl.rcParams["pgf.texsystem"] From ae098ea7d2a8bbc0fdfca8163d60b865a46393d6 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Fri, 24 Apr 2020 00:44:22 -0400 Subject: [PATCH 5/7] Add What's New entry about previous metadata changes. --- .../next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst diff --git a/doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst b/doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst new file mode 100644 index 000000000000..465139776c3b --- /dev/null +++ b/doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst @@ -0,0 +1,8 @@ +Saving PDF metadata via PGF now consistent with PDF backend +----------------------------------------------------------- + +When saving PDF files using the PGF backend, passed metadata will be +interpreted in the same way as with the PDF backend. Previously, this metadata +was only accepted by the PGF backend when saving a multi-page PDF with +`.backend_pgf.PdfPages`, but is now allowed when saving a single figure, as +well. From aa0fa1a7190251fb62bccf79620717e04bd395b3 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 2 Jun 2020 18:16:28 -0400 Subject: [PATCH 6/7] Combine PdfPages tests. They're basically the same, but with different `pgf.texsystem` settings. --- lib/matplotlib/tests/test_backend_pgf.py | 71 +++++------------------- 1 file changed, 13 insertions(+), 58 deletions(-) diff --git a/lib/matplotlib/tests/test_backend_pgf.py b/lib/matplotlib/tests/test_backend_pgf.py index 3bb7318fd63e..79adf0e529fc 100644 --- a/lib/matplotlib/tests/test_backend_pgf.py +++ b/lib/matplotlib/tests/test_backend_pgf.py @@ -214,48 +214,30 @@ def test_bbox_inches(): tol=0) -@needs_pdflatex @pytest.mark.style('default') @pytest.mark.backend('pgf') -def test_pdf_pages(): +@pytest.mark.parametrize('system', [ + pytest.param('lualatex', marks=[needs_lualatex]), + pytest.param('pdflatex', marks=[needs_pdflatex]), + pytest.param('xelatex', marks=[needs_xelatex]), +]) +def test_pdf_pages(system): rc_pdflatex = { 'font.family': 'serif', 'pgf.rcfonts': False, - 'pgf.texsystem': 'pdflatex', + 'pgf.texsystem': system, } mpl.rcParams.update(rc_pdflatex) - fig1 = plt.figure() - ax1 = fig1.add_subplot(1, 1, 1) + fig1, ax1 = plt.subplots() ax1.plot(range(5)) fig1.tight_layout() - fig2 = plt.figure(figsize=(3, 2)) - ax2 = fig2.add_subplot(1, 1, 1) + fig2, ax2 = plt.subplots(figsize=(3, 2)) ax2.plot(range(5)) fig2.tight_layout() - with PdfPages(os.path.join(result_dir, 'pdfpages.pdf')) as pdf: - pdf.savefig(fig1) - pdf.savefig(fig2) - - -@needs_xelatex -@pytest.mark.style('default') -@pytest.mark.backend('pgf') -def test_pdf_pages_metadata(): - rc_pdflatex = { - 'font.family': 'serif', - 'pgf.rcfonts': False, - 'pgf.texsystem': 'xelatex', - } - mpl.rcParams.update(rc_pdflatex) - - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) - ax.plot(range(5)) - fig.tight_layout() - + path = os.path.join(result_dir, f'pdfpages_{system}.pdf') md = { 'Author': 'me', 'Title': 'Multipage PDF with pgf', @@ -265,41 +247,14 @@ def test_pdf_pages_metadata(): 'Trapped': 'Unknown' } - path = os.path.join(result_dir, 'pdfpages_meta.pdf') - with PdfPages(path, metadata=md) as pdf: - pdf.savefig(fig) - pdf.savefig(fig) - pdf.savefig(fig) + pdf.savefig(fig1) + pdf.savefig(fig2) + pdf.savefig(fig1) assert pdf.get_pagecount() == 3 -@needs_lualatex -@pytest.mark.style('default') -@pytest.mark.backend('pgf') -def test_pdf_pages_lualatex(): - rc_pdflatex = { - 'font.family': 'serif', - 'pgf.rcfonts': False, - 'pgf.texsystem': 'lualatex' - } - mpl.rcParams.update(rc_pdflatex) - - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) - ax.plot(range(5)) - fig.tight_layout() - - md = {'Author': 'me', 'Title': 'Multipage PDF with pgf'} - path = os.path.join(result_dir, 'pdfpages_lua.pdf') - with PdfPages(path, metadata=md) as pdf: - pdf.savefig(fig) - pdf.savefig(fig) - - assert pdf.get_pagecount() == 2 - - @needs_xelatex def test_tex_restart_after_error(): fig = plt.figure() From cf4a117ac842d572bcc92ddd1e3bb1c6316b5748 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 2 Jun 2020 18:39:17 -0400 Subject: [PATCH 7/7] Add tests for PDF metadata saving using pikepdf. --- lib/matplotlib/tests/test_backend_pdf.py | 74 ++++++++++++++++++++++++ lib/matplotlib/tests/test_backend_pgf.py | 58 ++++++++++++++++++- requirements/testing/travis_extra.txt | 1 + 3 files changed, 131 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/tests/test_backend_pdf.py b/lib/matplotlib/tests/test_backend_pdf.py index 92fc22bdc7e3..4e125992138a 100644 --- a/lib/matplotlib/tests/test_backend_pdf.py +++ b/lib/matplotlib/tests/test_backend_pdf.py @@ -1,3 +1,4 @@ +import datetime import io import os from pathlib import Path @@ -7,6 +8,7 @@ import numpy as np import pytest +import matplotlib as mpl from matplotlib import dviread, pyplot as plt, checkdep_usetex, rcParams from matplotlib.backends.backend_pdf import PdfPages from matplotlib.testing.compare import compare_images @@ -125,6 +127,78 @@ def test_composite_image(): assert len(pdf._file._images) == 2 +def test_savefig_metadata(monkeypatch): + pikepdf = pytest.importorskip('pikepdf') + monkeypatch.setenv('SOURCE_DATE_EPOCH', '0') + + fig, ax = plt.subplots() + ax.plot(range(5)) + + md = { + 'Author': 'me', + 'Title': 'Multipage PDF', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'True' + } + buf = io.BytesIO() + fig.savefig(buf, metadata=md, format='pdf') + + with pikepdf.Pdf.open(buf) as pdf: + info = {k: str(v) for k, v in pdf.docinfo.items()} + + assert info == { + '/Author': 'me', + '/CreationDate': 'D:19700101000000Z', + '/Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + '/Keywords': 'test,pdf,multipage', + '/ModDate': 'D:19680801000000Z', + '/Producer': f'Matplotlib pdf backend v{mpl.__version__}', + '/Subject': 'Test page', + '/Title': 'Multipage PDF', + '/Trapped': '/True', + } + + +def test_multipage_metadata(monkeypatch): + pikepdf = pytest.importorskip('pikepdf') + monkeypatch.setenv('SOURCE_DATE_EPOCH', '0') + + fig, ax = plt.subplots() + ax.plot(range(5)) + + md = { + 'Author': 'me', + 'Title': 'Multipage PDF', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'True' + } + buf = io.BytesIO() + with PdfPages(buf, metadata=md) as pdf: + pdf.savefig(fig) + pdf.savefig(fig) + + with pikepdf.Pdf.open(buf) as pdf: + info = {k: str(v) for k, v in pdf.docinfo.items()} + + assert info == { + '/Author': 'me', + '/CreationDate': 'D:19700101000000Z', + '/Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + '/Keywords': 'test,pdf,multipage', + '/ModDate': 'D:19680801000000Z', + '/Producer': f'Matplotlib pdf backend v{mpl.__version__}', + '/Subject': 'Test page', + '/Title': 'Multipage PDF', + '/Trapped': '/True', + } + + def test_pdfpages_fspath(): with PdfPages(Path(os.devnull)) as pdf: pdf.savefig(plt.figure()) diff --git a/lib/matplotlib/tests/test_backend_pgf.py b/lib/matplotlib/tests/test_backend_pgf.py index 79adf0e529fc..780435413a0b 100644 --- a/lib/matplotlib/tests/test_backend_pgf.py +++ b/lib/matplotlib/tests/test_backend_pgf.py @@ -1,4 +1,4 @@ -from datetime import datetime +import datetime from io import BytesIO import os from pathlib import Path @@ -243,7 +243,8 @@ def test_pdf_pages(system): 'Title': 'Multipage PDF with pgf', 'Subject': 'Test page', 'Keywords': 'test,pdf,multipage', - 'ModDate': datetime(1968, 8, 1), + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), 'Trapped': 'Unknown' } @@ -255,6 +256,59 @@ def test_pdf_pages(system): assert pdf.get_pagecount() == 3 +@pytest.mark.style('default') +@pytest.mark.backend('pgf') +@pytest.mark.parametrize('system', [ + pytest.param('lualatex', marks=[needs_lualatex]), + pytest.param('pdflatex', marks=[needs_pdflatex]), + pytest.param('xelatex', marks=[needs_xelatex]), +]) +def test_pdf_pages_metadata_check(monkeypatch, system): + # Basically the same as test_pdf_pages, but we keep it separate to leave + # pikepdf as an optional dependency. + pikepdf = pytest.importorskip('pikepdf') + monkeypatch.setenv('SOURCE_DATE_EPOCH', '0') + + mpl.rcParams.update({'pgf.texsystem': system}) + + fig, ax = plt.subplots() + ax.plot(range(5)) + + md = { + 'Author': 'me', + 'Title': 'Multipage PDF with pgf', + 'Subject': 'Test page', + 'Keywords': 'test,pdf,multipage', + 'ModDate': datetime.datetime( + 1968, 8, 1, tzinfo=datetime.timezone(datetime.timedelta(0))), + 'Trapped': 'True' + } + path = os.path.join(result_dir, f'pdfpages_meta_check_{system}.pdf') + with PdfPages(path, metadata=md) as pdf: + pdf.savefig(fig) + + with pikepdf.Pdf.open(path) as pdf: + info = {k: str(v) for k, v in pdf.docinfo.items()} + + # Not set by us, so don't bother checking. + if '/PTEX.FullBanner' in info: + del info['/PTEX.FullBanner'] + if '/PTEX.Fullbanner' in info: + del info['/PTEX.Fullbanner'] + + assert info == { + '/Author': 'me', + '/CreationDate': 'D:19700101000000Z', + '/Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org', + '/Keywords': 'test,pdf,multipage', + '/ModDate': 'D:19680801000000Z', + '/Producer': f'Matplotlib pgf backend v{mpl.__version__}', + '/Subject': 'Test page', + '/Title': 'Multipage PDF with pgf', + '/Trapped': '/True', + } + + @needs_xelatex def test_tex_restart_after_error(): fig = plt.figure() diff --git a/requirements/testing/travis_extra.txt b/requirements/testing/travis_extra.txt index 19b6eb279272..19b774b382f7 100644 --- a/requirements/testing/travis_extra.txt +++ b/requirements/testing/travis_extra.txt @@ -4,4 +4,5 @@ ipykernel nbconvert[execute] nbformat!=5.0.0,!=5.0.1 pandas!=0.25.0 +pikepdf pytz