diff --git a/conftest.py b/conftest.py new file mode 100644 index 000000000000..8d399e677fed --- /dev/null +++ b/conftest.py @@ -0,0 +1,11 @@ +from __future__ import (absolute_import, division, print_function, + unicode_literals) + + +def pytest_addoption(parser): + group = parser.getgroup("matplotlib", "matplotlib custom options") + group.addoption("--conversion-cache-max-size", action="store", + help="conversion cache maximum size in bytes") + group.addoption("--conversion-cache-report-misses", + action="store_true", + help="report conversion cache misses") diff --git a/lib/matplotlib/sphinxext/tests/conftest.py b/lib/matplotlib/sphinxext/tests/conftest.py index dcdc3612eecb..0e02d385d2ee 100644 --- a/lib/matplotlib/sphinxext/tests/conftest.py +++ b/lib/matplotlib/sphinxext/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from matplotlib.testing.conftest import (mpl_test_settings, - pytest_configure, pytest_unconfigure) +from matplotlib.testing.conftest import ( + mpl_test_settings, pytest_configure, pytest_unconfigure, + pytest_sessionfinish, pytest_terminal_summary) diff --git a/lib/matplotlib/testing/_conversion_cache.py b/lib/matplotlib/testing/_conversion_cache.py new file mode 100644 index 000000000000..85d2a0923d59 --- /dev/null +++ b/lib/matplotlib/testing/_conversion_cache.py @@ -0,0 +1,209 @@ +""" +A cache of png files keyed by the MD5 hashes of corresponding svg and +pdf files, to reduce test suite running times for svg and pdf files +that stay exactly the same from one run to the next. + +The test setup in matplotlib.tests.conftest sets this up for image +comparison tests and allows specifying the maximum cache size. +""" + +from __future__ import (absolute_import, division, print_function, + unicode_literals) + +import hashlib +import shutil +import os +import warnings + +from matplotlib import _get_cachedir +from matplotlib import cbook +from matplotlib import checkdep_ghostscript +from matplotlib import checkdep_inkscape + + +class _ConversionCache(object): + """A cache that stores png files converted from svg or pdf formats. + + The image comparison test cases compare svg and pdf files by + converting them to png files. When a test case has produced a + file, e.g. result.pdf, it queries this cache by the pathname + '/path/to/result_images/result.pdf'. The cache computes a hash of + the file (and the version of the external software used to convert + the file) and if a result by that hash value is available, writes + the data to the output location supplied by the caller. Otherwise + the test case has to run the conversion and can then insert the + result into the cache. + + Parameters + ---------- + directory : str, optional + Files are stored in this directory. Defaults to `'test_cache'` in + the overall Matplotlib cache directory. + max_size : int, optional + The flush method will delete files until their combined size is + under this limit, in bytes. Defaults to 100 megabytes. + + """ + + def __init__(self, directory=None, max_size=int(1e8)): + # gets is the set of filenames queried from the cache + self.gets = set() + # hits is the set of filenames found in the cache when queried + self.hits = set() + if directory is not None: + self.cachedir = directory + else: + self.cachedir = self.get_cache_dir() + self.ensure_cache_dir() + if not isinstance(max_size, int): + raise ValueError("max_size is of type %s, expected int" % + type(max_size)) + self.max_size = max_size + self.cached_ext = '.png' + self.converter_version = {} + _, gs_version = checkdep_ghostscript() + if gs_version is not None: + self.converter_version['.pdf'] = gs_version.encode('utf-8') + is_version = checkdep_inkscape() + if is_version is not None: + self.converter_version['.svg'] = is_version.encode('utf-8') + self.hash_cache = {} + + def get(self, filename, newname): + """Query the cache. + + Parameters + ---------- + filename : str + Full path to the original file. + newname : str + Path to which the result should be written. + + Returns + ------- + bool + True if the file was found in the cache and is now written + to `newname`. + """ + self.gets.add(filename) + hash_value = self._get_file_hash(filename) + cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext) + with cbook.Locked(self.cachedir): + if os.path.exists(cached_file): + shutil.copyfile(cached_file, newname) + self.hits.add(filename) + return True + else: + return False + + def put(self, original, converted): + """Insert a file into the cache. + + Parameters + ---------- + original : str + Full path to the original file. + converted : str + Full path to the png file converted from the original. + """ + hash_value = self._get_file_hash(original) + cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext) + with cbook.Locked(self.cachedir): + shutil.copyfile(converted, cached_file) + + def _get_file_hash(self, path, block_size=2 ** 20): + if path in self.hash_cache: + return self.hash_cache[path] + _, ext = os.path.splitext(path) + version_tag = self.converter_version.get(ext) + if version_tag is None: + warnings.warn( + ("Don't know the external converter for files with extension " + "%s, cannot ensure cache invalidation on version update. " + "Either the relevant conversion program is missing or caching" + " is attempted for an unknown file type.") + % ext) + result = self._get_file_hash_static(path, block_size, version_tag) + self.hash_cache[path] = result + return result + + @staticmethod + def _get_file_hash_static(path, block_size, version_tag): + # the parts of _get_file_hash that are called from the deprecated + # compare.get_file_hash; can merge into _get_file_hash once that + # function is removed + md5 = hashlib.md5() + with open(path, 'rb') as fd: + while True: + data = fd.read(block_size) + if not data: + break + md5.update(data) + if version_tag is not None: + md5.update(version_tag) + return md5.hexdigest() + + def report(self): + """Return information about the cache. + + Returns + ------- + r : dict + `r['gets']` is the set of files queried, + `r['hits']` is the set of files found in the cache + """ + return dict(hits=self.hits, gets=self.gets) + + def expire(self): + """Delete cached files until the disk usage is under the limit. + + Orders files by access time, so the least recently used files + get deleted first. + """ + with cbook.Locked(self.cachedir): + stats = {filename: os.stat(os.path.join(self.cachedir, filename)) + for filename in os.listdir(self.cachedir) + if filename.endswith(self.cached_ext)} + usage = sum(f.st_size for f in stats.values()) + to_free = usage - self.max_size + if to_free <= 0: + return + + files = sorted(stats.keys(), + key=lambda f: stats[f].st_atime, + reverse=True) + while to_free > 0: + filename = files.pop() + os.remove(os.path.join(self.cachedir, filename)) + to_free -= stats[filename].st_size + + @staticmethod + def get_cache_dir(): + cachedir = _get_cachedir() + if cachedir is None: + raise _CacheError('No suitable configuration directory') + cachedir = os.path.join(cachedir, 'test_cache') + return cachedir + + def ensure_cache_dir(self): + if not os.path.exists(self.cachedir): + try: + cbook.mkdirs(self.cachedir) + except OSError as e: + raise _CacheError("Error creating cache directory %s: %s" + % (self.cachedir, str(e))) + if not os.access(self.cachedir, os.W_OK): + raise _CacheError("Cache directory %s not writable" + % self.cachedir) + + +class _CacheError(Exception): + def __init__(self, message): + self.message = message + + def __str__(self): + return self.message + + +# A global cache instance, set by the appropriate test runner. +_conversion_cache = None diff --git a/lib/matplotlib/testing/compare.py b/lib/matplotlib/testing/compare.py index ab2f26fc0081..bf873d57ed54 100644 --- a/lib/matplotlib/testing/compare.py +++ b/lib/matplotlib/testing/compare.py @@ -5,21 +5,16 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -import six - -import hashlib import os -import shutil import numpy as np import matplotlib +from matplotlib import cbook from matplotlib.compat import subprocess +from matplotlib.testing import _conversion_cache as ccache from matplotlib.testing.exceptions import ImageComparisonFailure from matplotlib import _png -from matplotlib import _get_cachedir -from matplotlib import cbook -from distutils import version __all__ = ['compare_float', 'compare_images', 'comparable_formats'] @@ -76,40 +71,6 @@ def compare_float(expected, actual, relTol=None, absTol=None): return msg or None -def get_cache_dir(): - cachedir = _get_cachedir() - if cachedir is None: - raise RuntimeError('Could not find a suitable configuration directory') - cache_dir = os.path.join(cachedir, 'test_cache') - if not os.path.exists(cache_dir): - try: - cbook.mkdirs(cache_dir) - except IOError: - return None - if not os.access(cache_dir, os.W_OK): - return None - return cache_dir - - -def get_file_hash(path, block_size=2 ** 20): - md5 = hashlib.md5() - with open(path, 'rb') as fd: - while True: - data = fd.read(block_size) - if not data: - break - md5.update(data) - - if path.endswith('.pdf'): - from matplotlib import checkdep_ghostscript - md5.update(checkdep_ghostscript()[1].encode('utf-8')) - elif path.endswith('.svg'): - from matplotlib import checkdep_inkscape - md5.update(checkdep_inkscape().encode('utf-8')) - - return md5.hexdigest() - - def make_external_conversion_command(cmd): def convert(old, new): cmdline = cmd(old, new) @@ -160,16 +121,39 @@ def comparable_formats(): return ['png'] + list(converter) -def convert(filename, cache): +@cbook.deprecated('2.1') +def get_cache_dir(): + return ccache._ConversionCache.get_cache_dir() + + +@cbook.deprecated('2.1') +def get_file_hash(path, block_size=2 ** 20): + if path.endswith('.pdf'): + from matplotlib import checkdep_ghostscript + version_tag = checkdep_ghostscript()[1].encode('utf-8') + elif path.endswith('.svg'): + from matplotlib import checkdep_inkscape + version_tag = checkdep_inkscape().encode('utf-8') + else: + version_tag = None + return ccache._ConversionCache._get_file_hash_static( + path, block_size, version_tag) + + +def convert(filename, cache=None): """ Convert the named file into a png file. Returns the name of the created file. - If *cache* is True, the result of the conversion is cached in - `matplotlib._get_cachedir() + '/test_cache/'`. The caching is based - on a hash of the exact contents of the input file. The is no limit - on the size of the cache, so it may need to be manually cleared - periodically. + Parameters + ---------- + filename : str + cache : _ConversionCache, optional + + Returns + ------- + str + The converted file. """ base, extension = filename.rsplit('.', 1) @@ -190,23 +174,12 @@ def convert(filename, cache): # is out of date. if (not os.path.exists(newname) or os.stat(newname).st_mtime < os.stat(filename).st_mtime): - if cache: - cache_dir = get_cache_dir() - else: - cache_dir = None - - if cache_dir is not None: - hash_value = get_file_hash(filename) - new_ext = os.path.splitext(newname)[1] - cached_file = os.path.join(cache_dir, hash_value + new_ext) - if os.path.exists(cached_file): - shutil.copyfile(cached_file, newname) - return newname - + in_cache = cache and cache.get(filename, newname) + if in_cache: + return newname converter[extension](filename, newname) - - if cache_dir is not None: - shutil.copyfile(newname, cached_file) + if cache: + cache.put(filename, newname) return newname @@ -269,7 +242,7 @@ def calculate_rms(expectedImage, actualImage): return rms -def compare_images(expected, actual, tol, in_decorator=False): +def compare_images(expected, actual, tol, in_decorator=False, cache=None): """ Compare two "image" files checking differences within a tolerance. @@ -290,6 +263,7 @@ def compare_images(expected, actual, tol, in_decorator=False): in_decorator : bool If called from image_comparison decorator, this should be True. (default=False) + cache : matplotlib.testing._conversion_cache._ConversionCache, optional Example ------- @@ -311,8 +285,8 @@ def compare_images(expected, actual, tol, in_decorator=False): raise IOError('Baseline image %r does not exist.' % expected) if extension != 'png': - actual = convert(actual, False) - expected = convert(expected, True) + actual = convert(actual, cache) + expected = convert(expected, cache) # open the image files and remove the alpha channel (if it exists) expectedImage = _png.read_png_int(expected) diff --git a/lib/matplotlib/testing/conftest.py b/lib/matplotlib/testing/conftest.py index 918bafc873b9..2725f99e091e 100644 --- a/lib/matplotlib/testing/conftest.py +++ b/lib/matplotlib/testing/conftest.py @@ -1,9 +1,10 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) - +from six.moves import reduce import pytest import matplotlib +from matplotlib.testing import _conversion_cache as ccache def pytest_configure(config): @@ -11,11 +12,45 @@ def pytest_configure(config): matplotlib._called_from_pytest = True matplotlib._init_tests() + max_size = config.getoption('--conversion-cache-max-size') + if max_size is not None: + ccache._conversion_cache = \ + ccache._ConversionCache(max_size=int(max_size)) + else: + ccache._conversion_cache = ccache._ConversionCache() + if config.pluginmanager.hasplugin('xdist'): + config.pluginmanager.register(DeferPlugin()) + def pytest_unconfigure(config): + ccache._conversion_cache.expire() matplotlib._called_from_pytest = False +def pytest_sessionfinish(session): + if hasattr(session.config, 'slaveoutput'): + session.config.slaveoutput['cache-report'] = \ + ccache._conversion_cache.report() + + +def pytest_terminal_summary(terminalreporter): + tr = terminalreporter + if hasattr(tr.config, 'cache_reports'): + reports = tr.config.cache_reports + data = {'hits': reduce(lambda x, y: x.union(y), + (rep['hits'] for rep in reports)), + 'gets': reduce(lambda x, y: x.union(y), + (rep['gets'] for rep in reports))} + else: + data = ccache._conversion_cache.report() + tr.write_sep('=', 'Image conversion cache report') + tr.write_line('Hit rate: %d/%d' % (len(data['hits']), len(data['gets']))) + if tr.config.getoption('--conversion-cache-report-misses'): + tr.write_line('Missed files:') + for filename in sorted(data['gets'].difference(data['hits'])): + tr.write_line(' %s' % filename) + + @pytest.fixture(autouse=True) def mpl_test_settings(request): from matplotlib.testing.decorators import _do_cleanup @@ -53,3 +88,10 @@ def mpl_test_settings(request): plt.switch_backend(prev_backend) _do_cleanup(original_units_registry, original_settings) + + +class DeferPlugin(object): + def pytest_testnodedown(self, node, error): + if not hasattr(node.config, 'cache_reports'): + node.config.cache_reports = [] + node.config.cache_reports.append(node.slaveoutput['cache-report']) diff --git a/lib/matplotlib/testing/decorators.py b/lib/matplotlib/testing/decorators.py index a026eb8c02ea..f6635a3dca84 100644 --- a/lib/matplotlib/testing/decorators.py +++ b/lib/matplotlib/testing/decorators.py @@ -28,6 +28,7 @@ make_test_filename from . import _copy_metadata, is_called_from_pytest from .exceptions import ImageComparisonFailure +from ._conversion_cache import _conversion_cache def _knownfailureif(fail_condition, msg=None, known_exception_class=None): @@ -197,7 +198,8 @@ def remove_ticks_and_titles(figure): def _raise_on_image_difference(expected, actual, tol): __tracebackhide__ = True - err = compare_images(expected, actual, tol, in_decorator=True) + err = compare_images(expected, actual, tol, in_decorator=True, + cache=_conversion_cache) if not os.path.exists(expected): raise ImageComparisonFailure('image does not exist: %s' % expected) diff --git a/lib/matplotlib/tests/conftest.py b/lib/matplotlib/tests/conftest.py index dcdc3612eecb..0e02d385d2ee 100644 --- a/lib/matplotlib/tests/conftest.py +++ b/lib/matplotlib/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from matplotlib.testing.conftest import (mpl_test_settings, - pytest_configure, pytest_unconfigure) +from matplotlib.testing.conftest import ( + mpl_test_settings, pytest_configure, pytest_unconfigure, + pytest_sessionfinish, pytest_terminal_summary) diff --git a/lib/matplotlib/tests/test_cache.py b/lib/matplotlib/tests/test_cache.py new file mode 100644 index 000000000000..d8b0dee7eeb9 --- /dev/null +++ b/lib/matplotlib/tests/test_cache.py @@ -0,0 +1,101 @@ +from __future__ import (absolute_import, division, print_function, + unicode_literals) +try: + from unittest import mock +except ImportError: + import mock + +import os + +from matplotlib import cbook +from matplotlib.testing._conversion_cache import _ConversionCache, _CacheError +import pytest + + +def test_cache_basic(tmpdir): + def intmp(f): + return tmpdir.join(f) + + def fname(f): + return str(intmp(f)) + + cache = _ConversionCache(fname('cache')) + intmp('fake.pdf').write_binary(b'this is a fake pdf file') + intmp('fake.svg').write_binary(b'this pretends to be an svg file') + assert not cache.get(fname('fake.pdf'), fname('fakepdf.png')) + assert not cache.get(fname('fake.svg'), fname('fakesvg.png')) + assert cache.report() == \ + {'gets': {fname('fake.pdf'), fname('fake.svg')}, + 'hits': set()} + + intmp('fakepdf.png').write_binary(b'generated from the pdf file') + cache.put(fname('fake.pdf'), fname('fakepdf.png')) + assert cache.get(fname('fake.pdf'), fname('copypdf.png')) + assert intmp('copypdf.png').read() == 'generated from the pdf file' + assert cache.report() == \ + {'gets': {fname('fake.pdf'), fname('fake.svg')}, + 'hits': {fname('fake.pdf')}} + + intmp('fakesvg.png').write_binary(b'generated from the svg file') + cache.put(fname('fake.svg'), fname('fakesvg.png')) + assert cache.get(fname('fake.svg'), fname('copysvg.png')) + assert intmp('copysvg.png').read() == 'generated from the svg file' + assert cache.report() == \ + {'gets': {fname('fake.pdf'), fname('fake.svg')}, + 'hits': {fname('fake.pdf'), fname('fake.svg')}} + + +def test_cache_expire(tmpdir): + def intmp(*f): + return tmpdir.join(*f) + + def fname(*f): + return str(intmp(*f)) + + cache = _ConversionCache(fname('cache'), 10) + for i in range(5): + pngfile = intmp('cache', 'file%d.png' % i) + pngfile.write_binary(b'1234') + os.utime(str(pngfile), (i*1000, i*1000)) + + cache.expire() + assert not os.path.exists(fname('cache', 'file0.png')) + assert not os.path.exists(fname('cache', 'file1.png')) + assert not os.path.exists(fname('cache', 'file2.png')) + assert os.path.exists(fname('cache', 'file3.png')) + assert os.path.exists(fname('cache', 'file4.png')) + + intmp('cache', 'onemore.png').write_binary(b'x' * 11) + os.utime(fname('cache', 'onemore.png'), (5000, 5000)) + + cache.expire() + assert not os.path.exists(fname('cache', 'file0.png')) + assert not os.path.exists(fname('cache', 'file1.png')) + assert not os.path.exists(fname('cache', 'file2.png')) + assert not os.path.exists(fname('cache', 'file3.png')) + assert not os.path.exists(fname('cache', 'file4.png')) + assert not os.path.exists(fname('cache', 'onemore.png')) + + +def test_cache_default_dir(): + try: + path = _ConversionCache.get_cache_dir() + assert path.endswith('test_cache') + except _CacheError: + pass + + +@mock.patch('matplotlib.testing._conversion_cache.cbook.mkdirs', + side_effect=OSError) +def test_cache_mkdir_error(mkdirs, tmpdir): + with pytest.raises(_CacheError): + c = _ConversionCache(str(tmpdir.join('cache'))) + + +@mock.patch('matplotlib.testing._conversion_cache.os.access', + side_effect=[False]) +def test_cache_unwritable_error(access, tmpdir): + with pytest.raises(_CacheError): + cachedir = tmpdir.join('cache') + cachedir.ensure(dir=True) + c = _ConversionCache(str(cachedir)) diff --git a/lib/mpl_toolkits/tests/conftest.py b/lib/mpl_toolkits/tests/conftest.py index dcdc3612eecb..0e02d385d2ee 100644 --- a/lib/mpl_toolkits/tests/conftest.py +++ b/lib/mpl_toolkits/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from matplotlib.testing.conftest import (mpl_test_settings, - pytest_configure, pytest_unconfigure) +from matplotlib.testing.conftest import ( + mpl_test_settings, pytest_configure, pytest_unconfigure, + pytest_sessionfinish, pytest_terminal_summary)