8000 pdf/pgf: Move PDF metadata checks to a common function. · matplotlib/matplotlib@f93f15f · GitHub
[go: up one dir, main page]

Skip to content

Commit f93f15f

Browse files
committed
pdf/pgf: Move PDF metadata checks to a common function.
This ensures that the same default values are produced, and more importantly ensures that the same keys are accepted. Previously, PGF only allowed a subset of keys.
1 parent 77a68c6 commit f93f15f

File tree

3 files changed

+98
-64
lines changed

3 files changed

+98
-64
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 76 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,81 @@ def _string_escape(match):
135135
assert False
136136

137137

138+
def _create_pdf_info_dict(backend, metadata):
139+
"""
140+
Create a PDF infoDict based on user-supplied metadata.
141+
142+
A default ``Creator``, ``Producer``, and ``CreationDate`` are added, though
143+
the user metadata may override it. The date may be the current time, or a
144+
time set by the ``SOURCE_DATE_EPOCH`` environment variable.
145+
146+
Metadata is verified to have the correct keys and their expected types. Any
147+
unknown keys/types will raise a warning.
148+
149+
Parameters
150+
----------
151+
backend : str
152+
The name of the backend to use in the Producer value.
153+
metadata : Dict[str, str]
154+
A dictionary of metadata supplied by the user with information
155+
following the PDF specification, also defined in
156+
`~.backend_pdf.PdfPages` below.
157+
158+
If any value is *None*, then the key will be removed. This can be used
159+
to remove any pre-defined values.
160+
161+
Returns
162+
-------
163+
Dict[str, str]
164+
A validated dictionary of metadata.
165+
"""
166+
167+
# get source date from SOURCE_DATE_EPOCH, if set
168+
# See https://reproducible-builds.org/specs/source-date-epoch/
169+
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
170+
if source_date_epoch:
171+
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
172+
source_date = source_date.replace(tzinfo=UTC)
173+
else:
174+
source_date = datetime.today()
175+
176+
info = {
177+
'Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org',
178+
'Producer': f'Matplotlib {backend} backend v{mpl.__version__}',
179+
'CreationDate': source_date,
180+
**metadata
181+
}
182+
info = {k: v for (k, v) in info.items() if v is not None}
183+
184+
def is_string_like(x):
185+
return isinstance(x, str)
186+
187+
def is_date(x):
188+
return isinstance(x, datetime)
189+
190+
check_trapped = (lambda x: isinstance(x, Name) and
191+
x.name in ('True', 'False', 'Unknown'))
192+
193+
keywords = {
194+
'Title': is_string_like,
195+
'Author': is_string_like,
196+
'Subject': is_string_like,
197+
'Keywords': is_string_like,
198+
'Creator': is_string_like,
199+
'Producer': is_string_like,
200+
'CreationDate': is_date,
201+
'ModDate': is_date,
202+
'Trapped': check_trapped,
203+
}
204+
for k in info:
205+
if k not in keywords:
206+
cbook._warn_external(f'Unknown infodict keyword: {k}')
207+
elif not keywords[k](info[k]):
208+
cbook._warn_external(f'Bad value for infodict keyword {k}')
209+
210+
return info
211+
212+
138213
def pdfRepr(obj):
139214
"""Map Python objects to PDF syntax."""
140215

@@ -503,24 +578,7 @@ def __init__(self, filename, metadata=None):
503578
'Pages': self.pagesObject}
504579
self.writeObject(self.rootObject, root)
505580

506-
# get source date from SOURCE_DATE_EPOCH, if set
507-
# See https://reproducible-builds.org/specs/source-date-epoch/
508-
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
509-
if source_date_epoch:
510-
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
511-
source_date = source_date.replace(tzinfo=UTC)
512-
else:
513-
source_date = datetime.today()
514-
515-
self.infoDict = {
516-
'Creator': f'matplotlib {mpl.__version__}, http://matplotlib.org',
517-
'Producer': f'matplotlib pdf backend {mpl.__version__}',
518-
'CreationDate': source_date
519-
}
520-
if metadata is not None:
521-
self.infoDict.update(metadata)
522-
self.infoDict = {k: v for (k, v) in self.infoDict.items()
523-
if v is not None}
581+
self.infoDict = _create_pdf_info_dict('pdf', metadata or {})
524582

525583
self.fontNames = {} # maps filenames to internal font names
526584
self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1))
@@ -1640,32 +1698,6 @@ def writeXref(self):
16401698
def writeInfoDict(self):
16411699
"""Write out the info dictionary, checking it for good form"""
16421700

1643-
def is_string_like(x):
1644-
return isinstance(x, str)
1645-
1646-
def is_date(x):
1647-
return isinstance(x, datetime)
1648-
1649-
check_trapped = (lambda x: isinstance(x, Name) and
1650-
x.name in ('True', 'False', 'Unknown'))
1651-
1652-
keywords = {'Title': is_string_like,
1653-
'Author': is_string_like,
1654-
'Subject': is_string_like,
1655-
'Keywords': is_string_like,
1656-
'Creator': is_string_like,
1657-
'Producer': is_string_like,
1658-
'CreationDate': is_date,
1659-
'ModDate': is_date,
1660-
'Trapped': check_trapped}
1661-
for k in self.infoDict:
1662-
if k not in keywords:
1663-
cbook._warn_external('Unknown infodict keyword: %s' % k)
1664-
else:
1665-
if not keywords[k](self.infoDict[k]):
1666-
cbook._warn_external(
1667-
'Bad value for infodict keyword %s' % k)
1668-
16691701
self.infoObject = self.reserveObject('info')
16701702
self.writeObject(self.infoObject, self.infoDict)
16711703

lib/matplotlib/backends/backend_pgf.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
_Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase,
2121
RendererBase)
2222
from matplotlib.backends.backend_mixed import MixedModeRenderer
23+
from matplotlib.backends.backend_pdf import _create_pdf_info_dict
2324
from matplotlib.path import Path
2425
from matplotlib.figure import Figure
2526
from matplotlib._pylab_helpers import Gcf
@@ -989,7 +990,8 @@ class PdfPages:
989990
'_fname_pdf',
990991
'_n_figures',
991992
'_file',
992-
'metadata',
993+
'_info_dict',
994+
'_metadata',
993995
)
994996

995997
def __init__(self, filename, *, keep_empty=True, metadata=None):
@@ -1017,7 +1019,7 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10171019
self._outputfile = filename
10181020
self._n_figures = 0
10191021
self.keep_empty = keep_empty
1020-
self.metadata = (metadata or {}).copy()
1022+
self._metadata = (metadata or {}).copy()
10211023
if metadata:
10221024
for key in metadata:
10231025
canonical = {
@@ -1030,7 +1032,8 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10301032
'case-insensitively is deprecated since %(since)s and '
10311033
'will be removed %(removal)s; '
10321034
f'set {canonical} instead of {key}.')
1033-
self.metadata[canonical] = self.metadata.pop(key)
1035+
self._metadata[canonical] = self._metadata.pop(key)
1036+
self._info_dict = _create_pdf_info_dict('pgf', self._metadata)
10341037

10351038
# create temporary directory for compiling the figure
10361039
self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_")
@@ -1039,23 +1042,14 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10391042
self._fname_pdf = os.path.join(self._tmpdir, self._basename + ".pdf")
10401043
self._file = open(self._fname_tex, 'wb')
10411044

1045+
@cbook.deprecated('3.3')
1046+
@property
1047+
def metadata(self):
1048+
return self._metadata
1049+
10421050
def _write_header(self, width_inches, height_inches):
1043-
supported_keys = {
1044-
'Title', 'Author', 'Subject', 'Keywords', 'Creator',
1045-
'Producer', 'Trapped'
1046-
}
1047-
infoDict = {
1048-
'Creator': f'matplotlib {mpl.__version__}, https://matplotlib.org',
1049-
'Producer': f'matplotlib pgf backend {mpl.__version__}',
1050-
**self.metadata
1051-
}
1052-
hyperref_options = ''
1053-
for k, v in infoDict.items():
1054-
if k not in supported_keys:
1055-
raise ValueError(
1056-
'Not a supported pdf metadata field: "{}"'.format(k)
1057-
)
1058-
hyperref_options += k + '={' + str(v) + '},'
1051+
hyperref_options = ','.join(
1052+
f'{k}={{{v}}}' for k, v in self._info_dict.items())
10591053

10601054
latex_preamble = get_preamble()
10611055
latex_fontspec = get_fontspec()

lib/matplotlib/tests/test_backend_pgf.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime
12
from io import BytesIO
23
import os
34
from pathlib import Path
@@ -255,7 +256,14 @@ def test_pdf_pages_metadata():
255256
ax.plot(range(5))
256257
fig.tight_layout()
257258

258-
md = {'Author': 'me', 'Title': 'Multipage PDF with pgf'}
259+
md = {
260+
'Author': 'me',
261+
'Title': 'Multipage PDF with pgf',
262+
'Subject': 'Test page',
263+
'Keywords': 'test,pdf,multipage',
264+
'ModDate': datetime(1968, 8, 1),
265+
}
266+
259267
path = os.path.join(result_dir, 'pdfpages_meta.pdf')
260268

261269
with PdfPages(path, metadata=md) as pdf:

0 commit comments

Comments
 (0)
0