diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index 9317771c08b3..71dfdc91be64 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -1521,28 +1521,34 @@ def _unpack(self, im): alpha = None return rgb, alpha - def _writePng(self, data): + def _writePng(self, img): """ - Write the image *data* into the pdf file using png + Write the image *img* into the pdf file using png predictors with Flate compression. """ buffer = BytesIO() - if data.shape[-1] == 1: - data = data.squeeze(axis=-1) - Image.fromarray(data).save(buffer, format="png") + img.save(buffer, format="png") buffer.seek(8) + png_data = b'' + bit_depth = palette = None while True: length, type = struct.unpack(b'!L4s', buffer.read(8)) - if type == b'IDAT': + if type in [b'IHDR', b'PLTE', b'IDAT']: data = buffer.read(length) if len(data) != length: raise RuntimeError("truncated data") - self.currentstream.write(data) + if type == b'IHDR': + bit_depth = int(data[8]) + elif type == b'PLTE': + palette = data + elif type == b'IDAT': + png_data += data elif type == b'IEND': break else: buffer.seek(length, 1) buffer.seek(4, 1) # skip CRC + return png_data, bit_depth, palette def _writeImg(self, data, id, smask=None): """ @@ -1551,17 +1557,40 @@ def _writeImg(self, data, id, smask=None): (alpha channel) *smask*, which should be either None or a ``(height, width, 1)`` array. """ - height, width, colors = data.shape + height, width, color_channels = data.shape obj = {'Type': Name('XObject'), 'Subtype': Name('Image'), 'Width': width, 'Height': height, - 'ColorSpace': Name({1: 'DeviceGray', 3: 'DeviceRGB'}[colors]), + 'ColorSpace': Name({1: 'DeviceGray', + 3: 'DeviceRGB'}[color_channels]), 'BitsPerComponent': 8} if smask: obj['SMask'] = smask if mpl.rcParams['pdf.compression']: - png = {'Predictor': 10, 'Colors': colors, 'Columns': width} + if data.shape[-1] == 1: + data = data.squeeze(axis=-1) + img = Image.fromarray(data) + img_colors = img.getcolors(maxcolors=256) + if color_channels == 3 and img_colors is not None: + # Convert to indexed color if there are 256 colors or fewer + # This can significantly reduce the file size + num_colors = len(img_colors) + img = img.convert(mode='P', dither=Image.NONE, + palette=Image.ADAPTIVE, colors=num_colors) + png_data, bit_depth, palette = self._writePng(img) + if bit_depth is None or palette is None: + raise RuntimeError("invalid PNG header") + palette = palette[:num_colors * 3] # Trim padding + palette = pdfRepr(palette) + obj['ColorSpace'] = Verbatim(b'[/Indexed /DeviceRGB ' + + str(num_colors - 1).encode() + + b' ' + palette + b']') + obj['BitsPerComponent'] = bit_depth + color_channels = 1 + else: + png_data, _, _ = self._writePng(img) + png = {'Predictor': 10, 'Colors': color_channels, 'Columns': width} else: png = None self.beginStream( @@ -1571,7 +1600,7 @@ def _writeImg(self, data, id, smask=None): png=png ) if png: - self._writePng(data) + self.currentstream.write(png_data) else: self.currentstream.write(data.tobytes()) self.endStream() diff --git a/lib/matplotlib/tests/test_image.py b/lib/matplotlib/tests/test_image.py index 91ab8295956e..8f11d961584b 100644 --- a/lib/matplotlib/tests/test_image.py +++ b/lib/matplotlib/tests/test_image.py @@ -732,7 +732,11 @@ def test_log_scale_image(): ax.set(yscale='log') -@image_comparison(['rotate_image'], remove_text=True) +# Increased tolerance is needed for PDF test to avoid failure. After the PDF +# backend was modified to use indexed color, there are ten pixels that differ +# due to how the subpixel calculation is done when converting the PDF files to +# PNG images. +@image_comparison(['rotate_image'], remove_text=True, tol=0.35) def test_rotate_image(): delta = 0.25 x = y = np.arange(-3.0, 3.0, delta)