8000 Merge pull request #4605 from jkseppan/png-in-pdf · matplotlib/matplotlib@336c1bb · GitHub
[go: up one dir, main page]

Skip to content

Commit 336c1bb

Browse files
committed
Merge pull request #4605 from jkseppan/png-in-pdf
ENH: Use png predictors when compressing images in pdf files
2 parents bc185a2 + 18dcc54 commit 336c1bb

File tree

2 files changed

+141
-74
lines changed
  • src
  • 2 files changed

    +141
    -74
    lines changed

    lib/matplotlib/backends/backend_pdf.py

    Lines changed: 96 additions & 61 deletions
    Original file line numberDiff line numberDiff line change
    @@ -12,6 +12,7 @@
    1212
    import codecs
    1313
    import os
    1414
    import re
    15+
    import struct
    1516
    import sys
    1617
    import time
    1718
    import warnings
    @@ -43,6 +44,7 @@
    4344
    from matplotlib.transforms import Affine2D, BboxBase
    4445
    from matplotlib.path import Path
    4546
    from matplotlib import _path
    47+
    from matplotlib import _png
    4648
    from matplotlib import ttconv
    4749

    4850
    # Overview
    @@ -87,7 +89,6 @@
    8789

    8890
    # TODOs:
    8991
    #
    90-
    # * image compression could be improved (PDF supports png-like compression)
    9192
    # * encoding of fonts, including mathtext fonts and unicode support
    9293
    # * TTF support has lots of small TODOs, e.g., how do you know if a font
    9394
    # is serif/sans-serif, or symbolic/non-symbolic?
    @@ -334,11 +335,12 @@ class Stream(object):
    334335
    """
    335336
    __slots__ = ('id', 'len', 'pdfFile', 'file', 'compressobj', 'extra', 'pos')
    336337

    337-
    def __init__(self, id, len, file, extra=None):
    338+
    def __init__(self, id, len, file, extra=None, png=None):
    338339
    """id: object id of stream; len: an unused Reference object for the
    339340
    length of the stream, or None (to use a memory buffer); file:
    340341
    a PdfFile; extra: a dictionary of extra key-value pairs to
    341-
    include in the stream header """
    342+
    include in the stream header; png: if the data is already
    343+
    png compressed, the decode parameters"""
    342344
    self.id = id # object id
    343345
    self.len = len # id of length object
    344346
    self.pdfFile = file
    @@ -347,10 +349,13 @@ def __init__(self, id, len, file, extra=None):
    347349
    if extra is None:
    348350
    self.extra = dict()
    349351
    else:
    350-
    self.extra = extra
    352+
    self.extra = extra.copy()
    353+
    if png is not None:
    354+
    self.extra.update({'Filter': Name('FlateDecode'),
    355+
    'DecodeParms': png})
    351356

    352357
    self.pdfFile.recordXref(self.id)
    353-
    if rcParams['pdf.compression']:
    358+
    if rcParams['pdf.compression'] and not png:
    354359
    self.compressobj = zlib.compressobj(rcParams['pdf.compression'])
    355360
    if self.len is None:
    356361
    self.file = BytesIO()
    @@ -583,9 +588,9 @@ def output(self, *data):
    583588
    self.write(fill([pdfRepr(x) for x in data]))
    584589
    self.write(b'\n')
    585590

    586-
    def beginStream(self, id, len, extra=None):
    591+
    def beginStream(self, id, len, extra=None, png=None):
    587592
    assert self.currentstream is None
    588-
    self.currentstream = Stream(id, len, self, extra)
    593+
    self.currentstream = Stream(id, len, self, extra, png)
    589594

    590595
    def endStream(self):
    591596
    if self.currentstream is not None:
    @@ -1247,73 +1252,103 @@ def imageObject(self, image):
    12471252
    self.images[image] = (name, ob)
    12481253
    return name
    12491254

    1250-
    def _rgb(self, im):
    1251-
    h, w, s = im.as_rgba_str()
    1255+
    def _unpack(self, im):
    1256+
    """
    1257+
    Unpack the image object im into height, width, data, alpha,
    1258+
    where data and alpha are HxWx3 (RGB) or HxWx1 (grayscale or alpha)
    1259+
    arrays, except alpha is None if the image is fully opaque.
    1260+
    """
    12521261

    1262+
    h, w, s = im.as_rgba_str()
    12531263
    rgba = np.fromstring(s, np.uint8)
    12541264
    rgba.shape = (h, w, 4)
    12551265
    rgba = rgba[::-1]
    1256-
    rgb = rgba[:, :, :3].tostring()
    1257-
    a = rgba[:, :, 3]
    1258-
    if np.all(a == 255):
    1266+
    rgb = rgba[:, :, :3]
    1267+
    alpha = rgba[:, :, 3][..., None]
    1268+
    if np.all(alpha == 255):
    12591269
    alpha = None
    12601270
    else:
    1261-
    alpha = a.tostring()
    1262-
    return h, w, rgb, alpha
    1263-
    1264-
    def _gray(self, im, rc=0.3, gc=0.59, bc=0.11):
    1265-
    rgbat = im.as_rgba_str()
    1266-
    rgba = np.fromstring(rgbat[2], np.uint8)
    1267-
    rgba.shape = (rgbat[0], rgbat[1], 4)
    1268-
    rgba = rgba[::-1]
    1269-
    rgba_f = rgba.astype(np.float32)
    1270-
    r = rgba_f[:, :, 0]
    1271-
    g = rgba_f[:, :, 1]
    1272-
    b = rgba_f[:, :, 2]
    1273-
    a = rgba[:, :, 3]
    1274-
    if np.all(a == 255):
    1275-
    alpha = None
    1271+
    alpha = np.array(alpha, order='C')
    1272+
    if im.is_grayscale:
    1273+
    r, g, b = rgb.astype(np.float32).transpose(2, 0, 1)
    1274+
    gray = (0.3 * r + 0.59 * g + 0.11 * b).astype(np.uint8)[..., None]
    1275+
    return h, w, gray, alpha
    12761276
    else:
    1277-
    alpha = a.tostring()
    1278-
    gray = (r*rc + g*gc + b*bc).astype(np.uint8).tostring()
    1279-
    return rgbat[0], rgbat[1], gray, alpha
    1277+
    rgb = np.array(rgb, order='C')
    1278+
    return h, w, rgb, alpha
    12801279

    1281-
    def writeImages(self):
    1282-
    for img, pair in six.iteritems(self.images):
    1283-
    if img.is_grayscale:
    1284-
    height, width, data, adata = self._gray(img)
    1280+
    def _writePng(self, data):
    1281+
    """
    1282+
    Write the image *data* into the pdf file using png
    1283+
    predictors with Flate compression.
    1284+
    """
    1285+
    1286+
    buffer = BytesIO()
    1287+
    _png.write_png(data, buffer)
    1288+
    buffer.seek(8)
    1289+
    written = 0
    1290+
    header = bytearray(8)
    1291+
    while True:
    1292+
    n = buffer.readinto(header)
    1293+
    assert n == 8
    1294+
    length, type = struct.unpack(b'!L4s', bytes(header))
    1295+
    if type == b'IDAT':
    1296+
    data = bytearray(length)
    1297+
    n = buffer.readinto(data)
    1298+
    assert n == length
    1299+
    self.currentstream.write(bytes(data))
    1300+
    written += n
    1301+
    elif type == b'IEND':
    1302+
    break
    12851303
    else:
    1286-
    height, width, data, adata = self._rgb(img)
    1304+
    buffer.seek(length, 1)
    1305+
    buffer.seek(4, 1) # skip CRC
    1306+
    1307+
    def _writeImg(self, data, height, width, grayscale, id, smask=None):
    1308+
    """
    1309+
    Write the image *data* of size *height* x *width*, as grayscale
    1310+
    if *grayscale* is true and RGB otherwise, as pdf object *id*
    1311+
    and with the soft mask (alpha channel) *smask*, which should be
    1312+
    either None or a *height* x *width* x 1 array.
    1313+
    """
    12871314

    1288-
    colorspace = 'DeviceGray' if img.is_grayscale else 'DeviceRGB'
    1289-
    obj = {'Type': Name('XObject'),
    1290-
    'Subtype': Name('Image'),
    1291-
    'Width': width,
    1292-
    'Height': height,
    1293-
    'ColorSpace': Name(colorspace),
    1294-
    'BitsPerComponent': 8}
    1315+
    obj = {'Type': Name('XObject'),
    1316+
    'Subtype': Name('Image'),
    1317+
    'Width': width,
    1318+
    'Height': height,
    1319+
    'ColorSpace': Name('DeviceGray' if grayscale
    1320+
    else 'DeviceRGB'),
    1321+
    'BitsPerComponent': 8}
    1322+
    if smask:
    1323+
    obj['SMask'] = smask
    1324+
    if rcParams['pdf.compression']:
    1325+
    png = {'Predictor': 10,
    1326+
    'Colors': 1 if grayscale else 3,
    1327+
    'Columns': width}
    1328+
    else:
    1329+
    png = None
    1330+
    self.beginStream(
    1331+
    id,
    1332+
    self.reserveObject('length of image stream'),
    1333+
    obj,
    1334+
    png=png
    1335+
    )
    1336+
    if png:
    1337+
    self._writePng(data)
    1338+
    else:
    1339+
    self.currentstream.write(data.tostring())
    1340+
    self.endStream()
    12951341

    1342+
    def writeImages(self):
    1343+
    for img, pair in six.iteritems(self.images):
    1344+
    height, width, data, adata = self._unpack(img)
    12961345
    if adata is not None:
    12971346
    smaskObject = self.reserveObject("smask")
    1298-
    self.beginStream(
    1299-
    smaskObject.id,
    1300-
    self.reserveObject('length of smask stream'),
    1301-
    {'Type': Name('XObject'), 'Subtype': Name('Image'),
    1302-
    'Width': width, 'Height': height,
    1303-
    'ColorSpace': Name('DeviceGray'), 'BitsPerComponent': 8})
    1304-
    # TODO: predictors (i.e., output png)
    1305-
    self.currentstream.write(adata)
    1306-
    self.endStream()
    1307-
    obj['SMask'] = smaskObject
    1308-
    1309-
    self.beginStream(
    1310-
    pair[1].id,
    1311-
    self.reserveObject('length of image stream'),
    1312-
    obj
    1313-
    )
    1314-
    # TODO: predictors (i.e., output png)
    1315-
    self.currentstream.write(data)
    1316-
    self.endStream()
    1347+
    self._writeImg(adata, height, width, True, smaskObject.id)
    1348+
    else:
    1349+
    smaskObject = None
    1350+
    self._writeImg(data, height, width, img.is_grayscale,
    1351+
    pair[1].id, smaskObject)
    13171352

    13181353
    def markerObject(self, path, trans, fill, stroke, lw, joinstyle,
    13191354
    capstyle):

    src/_png.cpp

    Lines changed: 45 additions & 13 deletions
    Original file line numberDiff line numberDiff line change
    @@ -71,11 +71,15 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
    7171
    double dpi = 0;
    7272
    const char *names[] = { "buffer", "file", "dpi", NULL };
    7373

    74+
    // We don't need strict contiguity, just for each row to be
    75+
    // contiguous, and libpng has special handling for getting RGB out
    76+
    // of RGBA, ARGB or BGR. But the simplest thing to do is to
    77+
    // enforce contiguity using array_view::converter_contiguous.
    7478
    if (!PyArg_ParseTupleAndKeywords(args,
    7579
    kwds,
    7680
    "O&O|d:write_png",
    7781
    (char **)names,
    78-
    &buffer.converter,
    82+
    &buffer.converter_contiguous,
    7983
    &buffer,
    8084
    &filein,
    8185
    &dpi)) {
    @@ -84,6 +88,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
    8488

    8589
    png_uint_32 width = (png_uint_32)buffer.dim(1);
    8690
    png_uint_32 height = (png_uint_32)buffer.dim(0);
    91+
    int channels = buffer.dim(2);
    8792
    std::vector<png_bytep> row_pointers(height);
    8893
    for (png_uint_32 row = 0; row < (png_uint_32)height; ++row) {
    8994
    row_pointers[row] = (png_bytep)buffer[row].data();
    @@ -98,9 +103,22 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
    98103
    png_structp png_ptr = NULL;
    99104
    png_infop info_ptr = NULL;
    100105
    struct png_color_8_struct sig_bit;
    101-
    102-
    if (buffer.dim(2) != 4) {
    103-
    PyErr_SetString(PyExc_ValueError, "Buffer must be RGBA NxMx4 array");
    106+
    int png_color_type;
    107+
    108+
    switch (channels) {
    109+
    case 1:
    110+
    png_color_type = PNG_COLOR_TYPE_GRAY;
    111+
    break;
    112+
    case 3:
    113+
    png_color_type = PNG_COLOR_TYPE_RGB;
    114+
    break;
    115+
    case 4:
    116+
    png_color_type = PNG_COLOR_TYPE_RGB_ALPHA;
    117+
    break;
    118+
    default:
    119+
    PyErr_SetString(PyExc_ValueError,
    120+
    "Buffer must be an NxMxD array with D in 1, 3, 4 "
    121+
    "(grayscale, RGB, RGBA)");
    104122
    goto exit;
    105123
    }
    106124

    @@ -141,7 +159,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
    141159
    }
    142160

    143161
    if (setjmp(png_jmpbuf(png_ptr))) {
    144-
    PyErr_SetString(PyExc_RuntimeError, "Error setting jumps");
    162+
    PyErr_SetString(PyExc_RuntimeError, "libpng signaled error");
    145163
    goto exit;
    146164
    }
    147165

    @@ -155,7 +173,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
    155173
    width,
    156174
    height,
    157175
    8,
    158-
    PNG_COLOR_TYPE_RGB_ALPHA,
    176+
    png_color_type,
    159177
    PNG_INTERLACE_NONE,
    160178
    PNG_COMPRESSION_TYPE_BASE,
    161179
    PNG_FILTER_TYPE_BASE);
    @@ -166,13 +184,27 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
    166184
    png_set_pHYs(png_ptr, info_ptr, dots_per_meter, dots_per_meter, PNG_RESOLUTION_METER);
    167185
    }
    168186

    169-
    // this a a color image!
    170-
    sig_bit.gray = 0;
    171-
    sig_bit.red = 8;
    172-
    sig_bit.green = 8;
    173-
    sig_bit.blue = 8;
    174-
    /* if the image has an alpha channel then */
    175-
    sig_bit.alpha = 8;
    187+
    sig_bit.alpha = 0;
    188+
    switch (png_color_type) {
    189+
    case PNG_COLOR_TYPE_GRAY:
    190+
    sig_bit.gray = 8;
    191+
    sig_bit.red = 0;
    192+
    sig_bit.green = 0;
    193+
    sig_bit.blue = 0;
    194+
    break;
    195+
    case PNG_COLOR_TYPE_RGB_ALPHA:
    196+
    sig_bit.alpha = 8;
    197+
    // fall through
    198+
    case PNG_COLOR_TYPE_RGB:
    199+
    sig_bit.gray = 0;
    200+
    sig_bit.red = 8;
    201+
    sig_bit.green = 8;
    202+
    sig_bit.blue = 8;
    203+
    break;
    204+
    default:
    205+
    PyErr_SetString(PyExc_RuntimeError, "internal error, bad png_color_type");
    206+
    goto exit;
    207+
    }
    176208
    png_set_sBIT(png_ptr, info_ptr, &sig_bit);
    177209

    178210
    png_write_info(png_ptr, info_ptr);

    0 commit comments

    Comments
     (0)
    0