8000 Improve PDF metadata support in PGF by QuLogic · Pull Request #17233 · matplotlib/matplotlib · GitHub
[go: up one dir, main page]

Skip to content

Improve PDF metadata support in PGF #17233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/api/api_changes_3.3/deprecations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -593,3 +593,11 @@ APIs which support the values True, False, and "TeX" for ``ismath``.
``matplotlib.ttconv``
~~~~~~~~~~~~~~~~~~~~~
This module is deprecated.

Stricter PDF metadata keys in PGF
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Saving metadata in PDF with the PGF backend currently normalizes all keys to
lowercase, unlike the PDF backend, which only accepts the canonical case. This
is deprecated; in a future version, only the canonically cased keys listed in
the PDF specification (and the `~.backend_pgf.PdfPages` documentation) will be
accepted.
8 changes: 8 additions & 0 deletions doc/users/next_whats_new/2020-04-24-ES-pdf-pgf-metadata.rst
< 8000 td class="blob-num blob-num-addition empty-cell">
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Saving PDF metadata via PGF now consistent with PDF backend
-----------------------------------------------------------

When saving PDF files using the PGF backend, passed metadata will be
interpreted in the same way as with the PDF backend. Previously, this metadata
was only accepted by the PGF backend when saving a multi-page PDF with
`.backend_pgf.PdfPages`, but is now allowed when saving a single figure, as
well.
166 changes: 106 additions & 60 deletions lib/matplotlib/backends/backend_pdf.py
628C
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,110 @@ def _string_escape(match):
assert False


def _create_pdf_info_dict(backend, metadata):
"""
Create a PDF infoDict based on user-supplied metadata.

A default ``Creator``, ``Producer``, and ``CreationDate`` are added, though
the user metadata may override it. The date may be the current time, or a
time set by the ``SOURCE_DATE_EPOCH`` environment variable.

Metadata is verified to have the correct keys and their expected types. Any
unknown keys/types will raise a warning.

Parameters
----------
backend : str
The name of the backend to use in the Producer value.
metadata : Dict[str, Union[str, datetime, Name]]
A dictionary of metadata supplied by the user with information
following the PDF specification, also defined in
`~.backend_pdf.PdfPages` below.

If any value is *None*, then the key will be removed. This can be used
to remove any pre-defined values.

Returns
-------
Dict[str, Union[str, datetime, Name]]
A validated dictionary of metadata.
"""

# get source date from SOURCE_DATE_EPOCH, if set
# See https://reproducible-builds.org/specs/source-date-epoch/
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
if source_date_epoch:
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
source_date = source_date.replace(tzinfo=UTC)
else:
source_date = datetime.today()

info = {
'Creator': f'Matplotlib v{mpl.__version__}, https://matplotlib.org',
'Producer': f'Matplotlib {backend} backend v{mpl.__version__}',
'CreationDate': source_date,
**metadata
}
info = {k: v for (k, v) in info.items() if v is not None}

def is_string_like(x):
return isinstance(x, str)

def is_date(x):
return isinstance(x, datetime)

def check_trapped(x):
if isinstance(x, Name):
return x.name in (b'True', b'False', b'Unknown')
else:
return x in ('True', 'False', 'Unknown')

keywords = {
'Title': is_string_like,
'Author': is_string_like,
'Subject': is_string_like,
'Keywords': is_string_like,
'Creator': is_string_like,
'Producer': is_string_like,
'CreationDate': is_date,
'ModDate': is_date,
'Trapped': check_trapped,
}
for k in info:
if k not in keywords:
cbook._warn_external(f'Unknown infodict keyword: {k}')
elif not keywords[k](info[k]):
cbook._warn_external(f'Bad value for infodict keyword {k}')
if 'Trapped' in info:
info['Trapped'] = Name(info['Trapped'])

return info


def _datetime_to_pdf(d):
"""
Convert a datetime to a PDF string representing it.

Used for PDF and PGF.
"""
r = d.strftime('D:%Y%m%d%H%M%S')
z = d.utcoffset()
if z is not None:
z = z.seconds
else:
if time.daylight:
z = time.altzone
else:
z = time.timezone
if z == 0:
r += 'Z'
elif z < 0:
r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600)
else:
r += "-%02d'%02d'" % (z // 3600, z % 3600)
return r


def pdfRepr(obj):
"""Map Python objects to PDF syntax."""

Expand Down Expand Up @@ -199,22 +303,7 @@ def pdfRepr(obj):

# A date.
elif isinstance(obj, datetime):
r = obj.strftime('D:%Y%m%d%H%M%S')
z = obj.utcoffset()
if z is not None:
z = z.seconds
else:
if time.daylight:
z = time.altzone
else:
z = time.timezone
if z == 0:
r += 'Z'
elif z < 0:
r += "+%02d'%02d'" % ((-z) // 3600, (-z) % 3600)
else:
r += "-%02d'%02d'" % (z // 3600, z % 3600)
return pdfRepr(r)
return pdfRepr(_datetime_to_pdf(obj))

# A bounding box
elif isinstance(obj, BboxBase):
Expand Down Expand Up @@ -503,24 +592,7 @@ def __init__(self, filename, metadata=None):
'Pages': self.pagesObject}
self.writeObject(self.rootObject, root)

# get source date from SOURCE_DATE_EPOCH, if set
# See https://reproducible-builds.org/specs/source-date-epoch/
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
if source_date_epoch:
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
source_date = source_date.replace(tzinfo=UTC)
else:
source_date = datetime.today()

self.infoDict = {
'Creator': f'matplotlib {mpl.__version__}, http://matplotlib.org',
'Producer': f'matplotlib pdf backend {mpl.__version__}',
'CreationDate': source_date
}
if metadata is not None:
self.infoDict.update(metadata)
self.infoDict = {k: v for (k, v) in self.infoDict.items()
if v is not None}
self.infoDict = _create_pdf_info_dict('pdf', metadata or {})

self.fontNames = {} # maps filenames to internal font names
self._internal_font_seq = (Name(f'F{i}') for i in itertools.count(1))
Expand Down Expand Up @@ -1640,32 +1712,6 @@ def writeXref(self):
def writeInfoDict(self):
"""Write out the info dictionary, checking it for good form"""

def is_string_like(x):
return isinstance(x, str)

def is_date(x):
return isinstance(x, datetime)

check_trapped = (lambda x: isinstance(x, Name) and
x.name in ('True', 'False', 'Unknown'))

keywords = {'Title': is_string_like,
'Author': is_string_like,
'Subject': is_string_like,
'Keywords': is_string_like,
'Creator': is_string_like,
'Producer': is_string_like,
'CreationDate': is_date,
'ModDate': is_date,
'Trapped': check_trapped}
for k in self.infoDict:
if k not in keywords:
cbook._warn_external('Unknown infodict keyword: %s' % k)
else:
if not keywords[k](self.infoDict[k]):
cbook._warn_external(
'Bad value for infodict keyword %s' % k)

self.infoObject = self.reserveObject('info')
self.writeObject(self.infoObject, self.infoDict)

Expand Down
71 changes: 49 additions & 22 deletions lib/matplotlib/backends/backend_pgf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import atexit
import codecs
import datetime
import functools
import logging
import math
Expand All @@ -20,6 +21,8 @@
_Backend, FigureCanvasBase, FigureManagerBase, GraphicsContextBase,
RendererBase)
from matplotlib.backends.backend_mixed import MixedModeRenderer
from matplotlib.backends.backend_pdf import (
_create_pdf_info_dict, _datetime_to_pdf)
from matplotlib.path import Path
from matplotlib.figure import Figure
from matplotlib._pylab_helpers import Gcf
Expand Down Expand Up @@ -157,6 +160,17 @@ def _font_properties_str(prop):
return "".join(commands)


def _metadata_to_str(key, value):
"""Convert metadata key/value to a form that hyperref accepts."""
if isinstance(value, datetime.datetime):
value = _datetime_to_pdf(value)
elif key == 'Trapped':
value = value.name.decode('ascii')
else:
value = str(value)
return f'{key}={{{value}}}'


def make_pdf_to_png_converter():
"""Return a function that converts a pdf file to a png file."""
if shutil.which("pdftocairo"):
Expand Down Expand Up @@ -867,9 +881,13 @@ def print_pgf(self, fname_or_fh, *args, **kwargs):
file = codecs.getwriter("utf-8")(file)
self._print_pgf_to_fh(file, *args, **kwargs)

def _print_pdf_to_fh(self, fh, *args, **kwargs):
def _print_pdf_to_fh(self, fh, *args, metadata=None, **kwargs):
w, h = self.figure.get_figwidth(), self.figure.get_figheight()

info_dict = _create_pdf_info_dict('pgf', metadata or {})
hyperref_options = ','.join(
_metadata_to_str(k, v) for k, v in info_dict.items())

try:
# create temporary directory for compiling the figure
tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_")
Expand All @@ -883,6 +901,8 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs):
latex_preamble = get_preamble()
latex_fontspec = get_fontspec()
latexcode = """
\\PassOptionsToPackage{pdfinfo={%s}}{hyperref}
\\RequirePackage{hyperref}
\\documentclass[12pt]{minimal}
\\usepackage[paperwid F438 th=%fin, paperheight=%fin, margin=0in]{geometry}
%s
Expand All @@ -892,7 +912,7 @@ def _print_pdf_to_fh(self, fh, *args, **kwargs):
\\begin{document}
\\centering
\\input{figure.pgf}
\\end{document}""" % (w, h, latex_preamble, latex_fontspec)
\\end{document}""" % (hyperref_options, w, h, latex_preamble, latex_fontspec)
pathlib.Path(fname_tex).write_text(latexcode, encoding="utf-8")

texcommand = mpl.rcParams["pgf.texsystem"]
Expand Down Expand Up @@ -989,7 +1009,8 @@ class PdfPages:
'_fname_pdf',
'_n_figures',
'_file',
'metadata',
'_info_dict',
'_metadata',
)

def __init__(self, filename, *, keep_empty=True, metadata=None):
Expand Down Expand Up @@ -1017,7 +1038,21 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
self._outputfile = filename
self._n_figures = 0
self.keep_empty = keep_empty
self.metadata = metadata or {}
self._metadata = (metadata or {}).copy()
if metadata:
for key in metadata:
canonical = {
'creationdate': 'CreationDate',
'moddate': 'ModDate',
}.get(key.lower(), key.lower().title())
if canonical != key:
cbook.warn_deprecated(
'3.3', message='Support for setting PDF metadata keys '
'case-insensitively is deprecated since %(since)s and '
'will be removed %(removal)s; '
f'set {canonical} instead of {key}.')
self._metadata[canonical] = self._metadata.pop(key)
self._info_dict = _create_pdf_info_dict('pgf', self._metadata)

# create temporary directory for compiling the figure
self._tmpdir = tempfile.mkdtemp(prefix="mpl_pgf_pdfpages_")
Expand All @@ -1026,29 +1061,21 @@ def __init__(self, filename, *, keep_empty=True, metadata=None):
self._fname_pdf = os.path.join(self._tmpdir, self._basename + ".pdf")
self._file = open(self._fname_tex, 'wb')

@cbook.deprecated('3.3')
@property
def metadata(self):
return self._metadata

def _write_header(self, width_inches, height_inches):
supported_keys = {
'title', 'author', 'subject', 'keywords', 'creator',
'producer', 'trapped'
}
infoDict = {
'creator': f'matplotlib {mpl.__version__}, https://matplotlib.org',
'producer': f'matplotlib pgf backend {mpl.__version__}',
}
metadata = {k.lower(): v for k, v in self.metadata.items()}
infoDict.update(metadata)
hyperref_options = ''
for k, v in infoDict.items():
if k not in supported_keys:
raise ValueError(
'Not a supported pdf metadata field: "{}"'.format(k)
)
hyperref_options += 'pdf' + k + '={' + str(v) + '},'
hyperref_options = ','.join(
_metadata_to_str(k, v) for k, v in self._info_dict.items())

latex_preamble = get_preamble()
latex_fontspec = get_fontspec()
latex_header = r"""\PassOptionsToPackage{{
{metadata}
pdfinfo={{
{metadata}
}}
}}{{hyperref}}
\RequirePackage{{hyperref}}
\documentclass[12pt]{{minimal}}
Expand Down
Loading
0