From 7d0bca1e5a9b5fb1413fb9ee435949a4c39d14b5 Mon Sep 17 00:00:00 2001 From: Romain HERAULT Date: Wed, 21 Oct 2015 15:13:54 +0200 Subject: [PATCH 1/2] ENH: switch to 'with' statement for open/close file in lib.npyio._savez The function _savez stages data during archiving through temporary files. Before that commit these temp and zip files were handled through old open/close style. This commits switches to 'with' statement. which is the recommendation since python 2.5. It's simplifying A LOT exception handling and garbage collection. Due to uncompatibility between zipfile in python2.6 and with statement, ZipFile class has been subclassed to add needed stub methods. BUG: WinNT prevents a file to be opened twice Using contextlib.closing rather than own class --- numpy/lib/npyio.py | 65 ++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index a6e4a8dac3b0..486d99bb2bbd 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -2,6 +2,8 @@ import sys import os +import os.path +import contextlib import re import itertools import warnings @@ -599,13 +601,41 @@ def savez_compressed(file, *args, **kwds): """ _savez(file, args, kwds, True) +@contextlib.contextmanager +def DeleteOnContextExitNamedTemporaryFile(*args, **kwds): + """ Factory for a NamedTemporaryFile that is + deleted on context exit but not on file close. + + Usefull replacement for NamedTemporaryFile, + as WinNT prevents a file to be opened twice. + + Typical case: + with DeleteOnExitNamedTemporaryFile() as fid: + foo(fid) + fid.close() + # File not deleted here + bar(fid.name) + # File deleted here + """ + # Import deferred for startup time improvement + import tempfile + local_kwds = kwds.copy() + local_kwds['delete'] = False + filename = None + with tempfile.NamedTemporaryFile(*args, **local_kwds) as fid: + filename = fid.name + yield fid + try: + if os.path.exists(filename): + os.unlink(filename) + except OSError: + pass + def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. import zipfile - # Import deferred for startup time improvement - import tempfile if isinstance(file, basestring): if not file.endswith('.npz'): @@ -624,35 +654,20 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): else: compression = zipfile.ZIP_STORED - zipf = zipfile_factory(file, mode="w", compression=compression) - - # Stage arrays in a temporary file on disk, before writing to zip. - - # Since target file might be big enough to exceed capacity of a global - # temporary directory, create temp file side-by-side with the target file. - file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') - fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') - os.close(fd) - try: + # Context manager compatible with the 'with' statement + # In python => 2.7 ZipFile class has been corrected + # No more need of contextlib.closing + with contextlib.closing( + zipfile_factory(file, mode="w", compression=compression)) as zipf: for key, val in namedict.items(): - fname = key + '.npy' - fid = open(tmpfile, 'wb') - try: + with DeleteOnContextExitNamedTemporaryFile() as fid: + fname = key + '.npy' format.write_array(fid, np.asanyarray(val), allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) fid.close() - fid = None - zipf.write(tmpfile, arcname=fname) - except IOError as exc: - raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) - finally: - if fid: - fid.close() - finally: - os.remove(tmpfile) + zipf.write(fid.name, arcname=fname) - zipf.close() def _getconv(dtype): From e17f22fdd266f4f907cc5ae0254d1506eaee0958 Mon Sep 17 00:00:00 2001 From: Romain HERAULT Date: Wed, 21 Oct 2015 15:18:35 +0200 Subject: [PATCH 2/2] ENH: add choice between in-disk/in-mem temp files in lib.npyio._savez Temporary files are used in the _savez function to stage data during archiving. The choice is done through new keyword arg: disk_temp_files If set to True, use in-disk temporary files (default_option). If set to False, use in-mem temporary files. Please note that the in-mem files are based on BytesIO. In python2 BytesIO lacks getbuffer method which returns the content of the file without copying it. Thus in python2, getvalue is used which is less memory efficient. In python3 getbuffer is privileged. WINNT bug --- numpy/lib/npyio.py | 27 +++++++++++++++++++++++---- numpy/lib/tests/test_io.py | 12 ++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 486d99bb2bbd..1bacd92f1ee8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -632,10 +632,30 @@ def DeleteOnContextExitNamedTemporaryFile(*args, **kwds): pass -def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): +def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None, disk_temp_files=True): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. import zipfile + if disk_temp_files: + # use in-disk temporary files + TempFile = DeleteOnContextExitNamedTemporaryFile + def _zipwrite(fid,fname): + fid.close() + zipf.write(fid.name, arcname=fname) + else: + # use in-mem temporary files + import io + TempFile = io.BytesIO + if sys.version_info[0] < 3: + def _zipwrite(fid,fname): + fid.flush() + zipf.writestr(fname, fid.getvalue()) + else: + # getbuffer() doesn't copy data, but only + # available in python 3 + def _zipwrite(fid,fname): + fid.flush() + zipf.writestr(fname, fid.getbuffer()) if isinstance(file, basestring): if not file.endswith('.npz'): @@ -660,13 +680,12 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): with contextlib.closing( zipfile_factory(file, mode="w", compression=compression)) as zipf: for key, val in namedict.items(): - with DeleteOnContextExitNamedTemporaryFile() as fid: + with TempFile() as fid: fname = key + '.npy' format.write_array(fid, np.asanyarray(val), allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) - fid.close() - zipf.write(fid.name, arcname=fname) + _zipwrite(fid,fname) diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index c0f8c1953829..bbf57a8f0cb0 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -14,6 +14,7 @@ import numpy as np import numpy.ma as ma from numpy.lib._iotools import ConverterError, ConversionWarning +from numpy.lib.npyio import _savez, DeleteOnContextExitNamedTemporaryFile from numpy.compat import asbytes, bytes, unicode from numpy.ma.testutils import assert_equal from numpy.testing import ( @@ -303,6 +304,17 @@ def test_closing_zipfile_after_load(self): data.close() assert_(fp.closed) + def test_in_mem_tempfiles(self): + # Check _savez with in-mem temporary files. + a = np.array([[1, 2], [3, 4]], float) + b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) + with DeleteOnContextExitNamedTemporaryFile(suffix='.npz') as fid: + _savez(file=fid.name, args=[], kwds={'a':a,'b':b}, + compress=False, disk_temp_files=False) + l = np.load(fid.name) + assert_equal(a, l['a']) + assert_equal(b, l['b']) + class TestSaveTxt(TestCase): def test_array(self):