8000 Factor the converted-image cache out of compare.py · jkseppan/matplotlib@39b3173 · GitHub
[go: up one dir, main page]

Skip to content

Commit 39b3173

Browse files
committed
Factor the converted-image cache out of compare.py
There is a cache of png files keyed by the MD5 hashes of corresponding svg and pdf files, which helps reduce test suite running times for svg and pdf files that stay exactly the same from one run to the next. This patch enables caching of test results, not only expected results, which is only useful if the tests are mostly deterministic (see matplotlib#7748). It adds reporting of cache misses, which can be helpful in getting tests to stay deterministic, and expiration since the test results are going to change more often than the expected results.
1 parent 5e825bd commit 39b3173

File tree

7 files changed

+417
-68
lines changed

7 files changed

+417
-68
lines changed

lib/matplotlib/testing/_nose/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
def get_extra_test_plugins():
88
from .plugins.performgc import PerformGC
99
from .plugins.knownfailure import KnownFailure
10+
from .plugins.conversion_cache import ConversionCache
1011
from nose.plugins import attrib
1112

12-
return [PerformGC, KnownFailure, attrib.Plugin]
13+
return [PerformGC, KnownFailure, attrib.Plugin, ConversionCache]
1314

1415

1516
def get_env():

lib/matplotlib/testing/compare.py

Lines changed: 19 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,14 @@
55
from __future__ import (absolute_import, division, print_function,
66
unicode_literals)
77

8-
import six
9-
10-
import hashlib
118
import os
12-
import shutil
139

1410
import numpy as np
1511

1612
import matplotlib
1713
from matplotlib.compat import subprocess
1814
from matplotlib.testing.exceptions import ImageComparisonFailure
1915
from matplotlib import _png
20-
from matplotlib import _get_cachedir
21-
from matplotlib import cbook
22-
from distutils import version
2316

2417
__all__ = ['compare_float', 'compare_images', 'comparable_formats']
2518

@@ -76,40 +69,6 @@ def compare_float(expected, actual, relTol=None, absTol=None):
7669
return msg or None
7770

7871

79-
def get_cache_dir():
80-
cachedir = _get_cachedir()
81-
if cachedir is None:
82-
raise RuntimeError('Could not find a suitable configuration directory')
83-
cache_dir = os.path.join(cachedir, 'test_cache')
84-
if not os.path.exists(cache_dir):
85-
try:
86-
cbook.mkdirs(cache_dir)
87-
except IOError:
88-
return None
89-
if not os.access(cache_dir, os.W_OK):
90-
return None
91-
return cache_dir
92-
93-
94-
def get_file_hash(path, block_size=2 ** 20):
95-
md5 = hashlib.md5()
96-
with open(path, 'rb') as fd:
97-
while True:
98-
data = fd.read(block_size)
99-
if not data:
100-
break
101-
md5.update(data)
102-
103-
if path.endswith('.pdf'):
104-
from matplotlib import checkdep_ghostscript
105-
md5.update(checkdep_ghostscript()[1].encode('utf-8'))
106-
elif path.endswith('.svg'):
107-
from matplotlib import checkdep_inkscape
108-
md5.update(checkdep_inkscape().encode('utf-8'))
109-
110-
return md5.hexdigest()
111-
112-
11372
def make_external_conversion_command(cmd):
11473
def convert(old, new):
11574
cmdline = cmd(old, new)
@@ -160,16 +119,20 @@ def comparable_formats():
160119
return ['png'] + list(converter)
161120

162121

163-
def convert(filename, cache):
122+
def convert(filename, cache=None):
164123
"""
165124
Convert the named file into a png file. Returns the name of the
166125
created file.
167126
168-
If *cache* is True, the result of the conversion is cached in
169-
`matplotlib._get_cachedir() + '/test_cache/'`. The caching is based
170-
on a hash of the exact contents of the input file. The is no limit
171-
on the size of the cache, so it may need to be manually cleared
172-
periodically.
127+
Parameters
128+
----------
129+
filename : str
130+
cache : ConversionCache, optional
131+
132+
Returns
133+
-------
134+
str
135+
The converted file.
173136
174137
"""
175138
base, extension = filename.rsplit('.', 1)
@@ -190,23 +153,12 @@ def convert(filename, cache):
190153
# is out of date.
191154
if (not os.path.exists(newname) or
192155
os.stat(newname).st_mtime < os.stat(filename).st_mtime):
193-
if cache:
194-
cache_dir = get_cache_dir()
195-
else:
196-
cache_dir = None
197-
198-
if cache_dir is not None:
199-
hash_value = get_file_hash(filename)
200-
new_ext = os.path.splitext(newname)[1]
201-
cached_file = os.path.join(cache_dir, hash_value + new_ext)
202-
if os.path.exists(cached_file):
203-
shutil.copyfile(cached_file, newname)
204-
return newname
205-
156+
in_cache = cache and cache.get(filename, newname)
157+
if in_cache:
158+
return newname
206159
converter[extension](filename, newname)
207-
208-
if cache_dir is not None:
209-
shutil.copyfile(newname, cached_file)
160+
if cache:
161+
cache.put(filename, newname)
210162

211163
return newname
212164

@@ -269,7 +221,7 @@ def calculate_rms(expectedImage, actualImage):
269221
return rms
270222

271223

272-
def compare_images(expected, actual, tol, in_decorator=False):
224+
def compare_images(expected, actual, tol, in_decorator=False, cache=None):
273225
"""
274226
Compare two "image" files checking differences within a tolerance.
275227
@@ -290,6 +242,7 @@ def compare_images(expected, actual, tol, in_decorator=False):
290242
in_decorator : bool
291243
If called from image_comparison decorator, this should be
292244
True. (default=False)
245+
cache : cache.ConversionCache, optional
293246
294247
Example
295248
-------
@@ -311,8 +264,8 @@ def compare_images(expected, actual, tol, in_decorator=False):
311264
raise IOError('Baseline image %r does not exist.' % expected)
312265

313266
if extension != 'png':
314-
actual = convert(actual, False)
315-
expected = convert(expected, True)
267+
actual = convert(actual, cache)
268+
expected = convert(expected, cache)
316269

317270
# open the image files and remove the alpha channel (if it exists)
318271
expectedImage = _png.read_png_int(expected)
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""
2+
A cache of png files keyed by the MD5 hashes of corresponding svg and
3+
pdf files, to reduce test suite running times for svg and pdf files
4+
that stay exactly the same from one run to the next.
5+
6+
There is a corresponding nose plugin in testing/nose/plugins and
7+
similar pytest code in conftest.py.
8+
"""
9+
10+
from __future__ import (absolute_import, division, print_function,
11+
unicode_literals)
12+
13+
import hashlib
14+
import shutil
15+
import os
16+
import warnings
17+
18+
from matplotlib import _get_cachedir
19+
from matplotlib import cbook
20+
from matplotlib import checkdep_ghostscript
21+
from matplotlib import checkdep_inkscape
22+
23+
24+
class ConversionCache(object):
25+
"""A cache that stores png files converted from svg or pdf formats.
26+
27+
The image comparison test cases compare svg and pdf files by
28+
converting them to png files. When a test case has produced a
29+
file, e.g. result.pdf, it queries this cache by the pathname
30+
'/path/to/result_images/result.pdf'. The cache computes a hash of
31+
the file (and the version of the external software used to convert
32+
the file) and if a result by that hash value is available, writes
33+
the data to the output location supplied by the caller. Otherwise
34+
the test case has to run the conversion and can then insert the
35+
result into the cache.
36+
37+
Parameters
38+
----------
39+
directory : str, optional
40+
Files are stored in this directory, defaults to `'test_cache'` in
41+
the overall Matplotlib cache directory.
42+
max_size : int, optional
43+
The flush method will delete files until their combined size is
44+
under this limit, in bytes. Defaults to 100 megabytes.
45+
46+
"""
47+
48+
def __init__(self, directory=None, max_size=int(1e8)):
49+
self.gets = set()
50+
self.hits = set()
51+
if directory is not None:
52+
self.cachedir = directory
53+
else:
54+
self.cachedir = self.get_cache_dir()
55+
self.ensure_cache_dir()
56+
if not isinstance(max_size, int):
57+
raise ValueError("max_size is %s, expected int" % type(max_size))
58+
self.max_size = max_size
59+
self.cached_ext = '.png'
60+
self.converter_version = {}
61+
try:
62+
self.converter_version['.pdf'] = \
63+
checkdep_ghostscript()[1].encode('utf-8')
64+
except:
65+
pass
66+
try:
67+
self.converter_version['.svg'] = \
68+
checkdep_inkscape().encode('utf-8')
69+
except:
70+
pass
71+
self.hash_cache = {}
72+
73+
def get(self, filename, newname):
74+
"""Query the cache.
75+
76+
Parameters
77+
----------
78+
filename : str
79+
Full path to the original file.
80+
newname : str
81+
Path to which the result should be written.
82+
83+
Returns
84+
-------
85+
bool
86+
True if the file was found in the cache and is now written
87+
to `newname`.
88+
"""
89+
self.gets.add(filename)
90+
hash_value = self._get_file_hash(filename)
91+
cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext)
92+
if os.path.exists(cached_file):
93+
shutil.copyfile(cached_file, newname)
94+
self.hits.add(filename)
95+
return True
96+
else:
97+
return False
98+
99+
def put(self, original, converted):
100+
"""Insert a file into the cache.
101+
102+
Parameters
103+
----------
104+
original : str
105+
Full path to the original file.
106+
converted : str
107+
Full path to the png file converted from the original.
108+
"""
109+
hash_value = self._get_file_hash(original)
110+
cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext)
111+
shutil.copyfile(converted, cached_file)
112+
113+
def _get_file_hash(self, path, block_size=2 ** 20):
114+
if path in self.hash_cache:
115+
return self.hash_cache[path]
116+
md5 = hashlib.md5()
117+
with open(path, 'rb') as fd:
118+
while True:
119+
data = fd.read(block_size)
120+
if not data:
121+
break
122+
md5.update(data)
123+
_, ext = os.path.splitext(path)
124+
version_tag = self.converter_version.get(ext)
125+
if version_tag:
126+
md5.update(version_tag)
127+
else:
128+
warnings.warn(("Don't know the external converter for %s, cannot "
129+
"ensure cache invalidation on version update.")
130+
% path)
131+
132+
result = md5.hexdigest()
133+
self.hash_cache[path] = result
134+
return result
135+
136+
def report(self):
137+
"""Return information about the cache.
138+
139+
Returns
140+
-------
141+
r : dict
142+
`r['gets']` is the set of files queried,
143+
`r['hits']` is the set of files found in the cache
144+
"""
145+
return dict(hits=self.hits, gets=self.gets)
146+
147+
def expire(self):
148+
"""Delete cached files until the disk usage is under the limit.
149+
150+
Orders files by access time, so the least recently used files
151+
get deleted first.
152+
"""
153+
stats = {filename: os.stat(os.path.join(self.cachedir, filename))
154+
for filename in os.listdir(self.cachedir)}
155+
usage = sum(f.st_size for f in stats.values())
156+
to_free = usage - self.max_size
157+
if to_free <= 0:
158+
return
159+
160+
files = sorted(os.listdir(self.cachedir),
161+
key=lambda f: stats[f].st_atime,
162+
reverse=True)
163+
while to_free > 0:
164+
filename = files.pop()
165+
os.remove(os.path.join(self.cachedir, filename))
166+
to_free -= stats[filename].st_size
167+
168+
@staticmethod
169+
def get_cache_dir():
170+
cachedir = _get_cachedir()
171+
if cachedir is None:
172+
raise CacheError('No suitable configuration directory')
173+
cachedir = os.path.join(cachedir, 'test_cache')
174+
return cachedir
175+
176+
def ensure_cache_dir(self):
< 7E60 /td>
177+
if not os.path.exists(self.cachedir):
178+
try:
179+
cbook.mkdirs(self.cachedir)
180+
except IOError as e:
181+
raise CacheError("Error creating cache directory %s: %s"
182+
% (self.cachedir, str(e)))
183+
if not os.access(self.cachedir, os.W_OK):
184+
raise CacheError("Cache directory %s not writable" % self.cachedir)
185+
186+
187+
class CacheError(Exception):
188+
def __init__(self, message):
189+
self.message = message
190+
191+
def __str__(self):
192+
return self.message
193+
194+
195+
# A global cache instance, set by the appropriate test runner.
196+
conversion_cache = None

lib/matplotlib/testing/decorators.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
make_test_filename
2929
from . import _copy_metadata, is_called_from_pytest
3030
from .exceptions import ImageComparisonFailure
31+
from .conversion_cache import conversion_cache
3132

3233

3334
def _knownfailureif(fail_condition, msg=None, known_exception_class=None):
@@ -197,7 +198,8 @@ def remove_ticks_and_titles(figure):
197198
def _raise_on_image_difference(expected, actual, tol):
198199
__tracebackhide__ = True
199200

200-
err = compare_images(expected, actual, tol, in_decorator=True)
201+
err = compare_images(expected, actual, tol, in_decorator=True,
202+
cache=conversion_cache)
201203

202204
if not os.path.exists(expected):
203205
raise ImageComparisonFailure('image does not exist: %s' % expected)

0 commit comments

Comments
 (0)
0