8000 pdf/pgf: Move PDF metadata checks to a common function. · matplotlib/matplotlib@1127bbd · GitHub
[go: up one dir, main page]

Skip to content

Commit 1127bbd

Browse files
committed
pdf/pgf: Move PDF metadata checks to a common function.
This ensures that the same default values are produced, and more importantly ensures that the same keys are accepted. Previously, PGF only allowed a subset of keys.
1 parent c4613f8 commit 1127bbd

File tree

3 files changed

+136
-80
lines changed

3 files changed

+136
-80
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 101 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,105 @@ def _string_escape(match):
135135
assert False
136136

137137

138+
def _create_pdf_info_dict(backend, metadata):
139+
"""
140+
Create a PDF infoDict based on user-supplied metadata.
141+
142+
A default ``Creator``, ``Producer``, and ``CreationDate`` are added, though
143+
the user metadata may override it. The date may be the current time, or a
144+
time set by the ``SOURCE_DATE_EPOCH`` environment variable.
145+
146+
Metadata is verified to have the correct keys and their expected types. Any
147+
unknown keys/types will raise a warning.
148+
149+
Parameters
150+
----------
151+
backend : str
152+
The name of the backend to use in the Producer value.
153+
metadata : Dict[str, Union[str, datetime, Name]]
154+
A dictionary of metadata supplied by the user with information
155+
following the PDF specification, also defined in
156+
`~.backend_pdf.PdfPages` below.
157+
158+
If any value is *None*, then the key will be removed. This can be used
159+
to remove any pre-defined values.
160+
161+
Returns
162+
-------
163+
Dict[str, Union[str, datetime, Name]]
164+
A validated dictionary of metadata.
165+
"""
166+
167+
# get source date from SOURCE_DATE_EPOCH, if set
168+
# See https://reproducible-builds.org/specs/source-date-epoch/
169+
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
170+
if source_date_epoch:
171+
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
172+
source_date = source_date.replace(tzinfo=UTC)
173+
else:
174+
source_date = datetime.today()
175+
176+
info = {
177+
'Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org',
178+
'Producer': f'Matplotlib {backend} backend v{mpl.__version__}',
179+
'CreationDate': source_date,
180+
**metadata
181+
}
182+
info = {k: v for (k, v) in info.items() if v is not None}
183+
184+
def is_string_like(x):
185+
return isinstance(x, str)
186+
187+
def is_date(x):
188+
return isinstance(x, datetime)
189+
190+
check_trapped = (lambda x: isinstance(x, Name) and
191+
x.name in (b'True', b'False', b'Unknown'))
192+
193+
keywords = {
194+
'Title': is_string_like,
195+
'Author': is_string_like,
196+
'Subject': is_string_like,
197+
'Keywords': is_string_like,
198+
'Creator': is_string_like,
199+
'Producer': is_string_like,
200+
'CreationDate': is_date,
201+
'ModDate': is_date,
202+
'Trapped': check_trapped,
203+
}
204+
for k in info:
205+
if k not in keywords:
206+
cbook._warn_external(f'Unknown infodict keyword: {k}')
207+
elif not keywords[k](info[k]):
208+
cbook._warn_external(f'Bad value for infodict keyword {k}')
209+
210+
return info
211+
212+
213+
def _datetime_to_pdf(d):
214+
"""
215+
Convert a datetime to a PDF string representing it.
216+
217+
Used for PDF and PGF.
218+
"""
219+
r = d.strftime('D:%Y%m%d%H%M%S')
220+
z = d.utcoffset()
221+
if z is not None:
222+
z = z.seconds
223+
else:
224+
if time.daylight:
225+
z = time.altzone
226+
else:
227+
z = time.timezone
228+
if z == 0:
229+
r += 'Z'
230+
elif z < 0:
231+
r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600)
232+
else:
233+
r += "-%02d'%02d'" % (z // 3600, z % 3600)
234+
return r
235+
236+
138237
def pdfRepr(obj):
139238
"""Map Python objects to PDF syntax."""
140239

@@ -199,22 +298,7 @@ def pdfRepr(obj):
199298

200299
# A date.
201300
elif isinstance(obj, datetime):
202-
r = obj.strftime('D:%Y%m%d%H%M%S')
203-
z = obj.utcoffset()
204-
if z is not None:
205-
z = z.seconds
206-
else:
207-
if time.daylight:
208-
z = time.altzone
209-
else:
210-
z = time.timezone
211-
if z == 0:
212-
r += 'Z'
213-
elif z < 0:
214-
r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600)
215-
else:
216-
r += "-%02d'%02d'" % (z // 3600, z % 3600)
217-
return pdfRepr(r)
301+
return pdfRepr(_datetime_to_pdf(obj))
218302

219303
# A bounding box
220304
elif isinstance(obj, BboxBase):
@@ -503,24 +587,7 @@ def __init__(self, filename, metadata=None):
503587
'Pages': self.pagesObject}
504588
self.writeObject(self.rootObject, root)
505589

506-
# get source date from SOURCE_DATE_EPOCH, if set
507-
# See https://reproducible-builds.org/specs/source-date-epoch/
508-
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
509-
if source_date_epoch:
510-
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
511-
source_date = source_date.replace(tzinfo=UTC)
512-
else:
513-
source_date = datetime.today()
514-
515-
self.infoDict = {
516-
'Creator': f'matplotlib {mpl.__version__}, http://matplotlib.org',
517-
'Producer': f'matplotlib pdf backend {mpl.__version__}',
518-
'CreationDate': source_date
519-
}
520-
if metadata is not None:
521-
self.infoDict.update(metadata)
522-
self.infoDict = {k: v for (k, v) in self.infoDict.items()
523-
if v is not None}
590+
self.infoDict = _create_pdf_info_dict('pdf', metadata or {})
524591

525592
self.fontNames = {} # maps filenames to internal font names
526593
self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1))
@@ -1640,32 +1707,6 @@ def writeXref(self):
16401707
def writeInfoDict(self):
16411708
"""Write out the info dictionary, checking it for good form"""
16421709

1643-
def is_string_like(x):
1644-
return isinstance(x, str)
1645-
1646-
def is_date(x):
1647-
return isinstance(x, datetime)
1648-
1649-
check_trapped = (lambda x: isinstance(x, Name) and
1650-
x.name in ('True', 'False', 'Unknown'))
1651-
1652-
keywords = {'Title': is_string_like,
1653-
'Author': is_string_like,
1654-
'Subject': is_string_like,
1655-
'Keywords': is_string_like,
1656-
'Creator': is_string_like,
1657-
'Producer': is_string_like,
1658-
'CreationDate': is_date,
1659-
'ModDate': is_date,
1660-
'Trapped': check_trapped}
1661-
for k in self.infoDict:
1662-
if k not in keywords:
1663-
cbook._warn_external('Unknown infodict keyword: %s' % k)
1664-
else:
1665-
if not keywords[k](self.infoDict[k]):
1666-
cbook._warn_external(
1667-
'Bad value for infodict keyword %s' % k)
1668-
16691710
self.infoObject = self.reserveObject('info')
16701711
self.writeObject(self.infoObject, self.infoDict)
16711712

lib/matplotlib/backends/backend_pgf.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import atexit
22
import codecs
3+
import datetime
34
import functools
45
import logging
56
import math
@@ -20,6 +21,8 @@
2021
_Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase,
2122
RendererBase)
2223
from matplotlib.backends.backend_mixed import MixedModeRenderer
24+
from matplotlib.backends.backend_pdf import (
25+
_create_pdf_info_dict, _datetime_to_pdf)
2326
from matplotlib.path import Path
2427
from matplotlib.figure import Figure
2528
from matplotlib._pylab_helpers import Gcf
@@ -157,6 +160,17 @@ def _font_properties_str(prop):
157160
return "".join(commands)
158161

159162

163+
def _metadata_to_str(key, value):
164+
"""Convert metadata key/value to a form that hyperref accepts."""
165+
if isinstance(value, datetime.datetime):
166+
value = _datetime_to_pdf(value)
167+
elif key == 'Trapped':
168+
value = value.name.decode('ascii')
169+
else:
170+
value = str(value)
171+
return f'{key}={{{value}}}'
172+
173+
160174
def make_pdf_to_png_converter():
161175
"""Return a function that converts a pdf file to a png file."""
162176
if shutil.which("pdftocairo"):
@@ -989,7 +1003,8 @@ class PdfPages:
9891003
'_fname_pdf',
9901004
'_n_figures',
9911005
'_file',
992-
'metadata',
1006+
'_info_dict',
1007+
'_metadata',
9931008
)
9941009

9951010
def __init__(self, filename, *, keep_empty=True, metadata=None):
@@ -1017,7 +1032,7 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10171032
self._outputfile = filename
10181033
self._n_figures = 0
10191034
self.keep_empty = keep_empty
1020-
self.metadata = (metadata or {}).copy()
1035+
self._metadata = (metadata or {}).copy()
10211036
if metadata:
10221037
for key in metadata:
10231038
canonical = {
@@ -1030,7 +1045,8 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10301045
'case-insensitively is deprecated since %(since)s and '
10311046
'will be removed %(removal)s; '
10321047
f'set {canonical} instead of {key}.')
1033-
self.metadata[canonical] = self.metadata.pop(key)
1048+
self._metadata[canonical] = self._metadata.pop(key)
1049+
self._info_dict = _create_pdf_info_dict('pgf', self._metadata)
10341050

10351051
# create temporary directory for compiling the figure
10361052
self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_")
@@ -1039,23 +1055,14 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
10391055
self._fname_pdf = os.path.join(self._tmpdir, self._basename + ".pdf")
10401056
self._file = open(self._fname_tex, 'wb')
10411057

1058+
@cbook.deprecated('3.3')
1059+
@property
1060+
def metadata(self):
1061+
return self._metadata
1062+
10421063
def _write_header(self, width_inches, height_inches):
1043-
supported_keys = {
1044-
'Title', 'Author', 'Subject', 'Keywords', 'Creator',
1045-
'Producer', 'Trapped'
1046-
}
1047-
infoDict = {
1048-
'Creator': f'matplotlib {mpl.__version__}, https://matplotlib.org',
1049-
'Producer': f'matplotlib pgf backend {mpl.__version__}',
1050-
**self.metadata
1051-
}
1052-
hyperref_options = ''
1053-
for k, v in infoDict.items():
1054-
if k not in supported_keys:
1055-
raise ValueError(
1056-
'Not a supported pdf metadata field: "{}"'.format(k)
1057-
)
1058-
hyperref_options += k + '={' + str(v) + '},'
1064+
hyperref_options = ','.join(
1065+
_metadata_to_str(k, v) for k, v in self._info_dict.items())
10591066

10601067
latex_preamble = get_preamble()
10611068
latex_fontspec = get_fontspec()

lib/matplotlib/tests/test_backend_pgf.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime
12
from io import BytesIO
23
import os
34
from pathlib import Path
@@ -255,7 +256,14 @@ def test_pdf_pages_metadata():
255256
ax.plot(range(5))
256257
fig.tight_layout()
257258

258-
md = {'Author': 'me', 'Title': 'Multipage PDF with pgf'}
259+
md = {
260+
'Author': 'me',
261+
'Title': 'Multipage PDF with pgf',
262+
'Subject': 'Test page',
263+
'Keywords': 'test,pdf,multipage',
264+
'ModDate': datetime(1968, 8, 1),
265+
}
266+
259267
path = os.path.join(result_dir, 'pdfpages_meta.pdf')
260268

261269
with PdfPages(path, metadata=md) as pdf:

0 commit comments

Comments
 (0)
0