8000 EHN: Using in-mem temporary files rather than in-disk for building zip archive in _savez by rherault-pro · Pull Request #6540 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

EHN: Using in-mem temporary files rather than in-disk for building zip archive in _savez #6540

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 61 additions & 27 deletions numpy/lib/npyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import sys
import os
import os.path
import contextlib
import re
import itertools
import warnings
Expand Down Expand Up @@ -599,13 +601,61 @@ def savez_compressed(file, *args, **kwds):
"""
_savez(file, args, kwds, True)

@contextlib.contextmanager
def DeleteOnContextExitNamedTemporaryFile(*args, **kwds):
""" Factory for a NamedTemporaryFile that is
deleted on context exit but not on file close.

Usefull replacement for NamedTemporaryFile,
as WinNT prevents a file to be opened twice.

Typical case:
with DeleteOnExitNamedTemporaryFile() as fid:
foo(fid)
fid.close()
# File not deleted here
bar(fid.name)
# File deleted here
"""
# Import deferred for startup time improvement
import tempfile
local_kwds = kwds.copy()
local_kwds['delete'] = False
filename = None
with tempfile.NamedTemporaryFile(*args, **local_kwds) as fid:
filename = fid.name
yield fid
try:
if os.path.exists(filename):
os.unlink(filename)
except OSError:
pass

def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):

def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None, disk_temp_files=True):
# Import is postponed to here since zipfile depends on gzip, an optional
# component of the so-called standard library.
import zipfile
# Import deferred for startup time improvement
import tempfile
if disk_temp_files:
# use in-disk temporary files
TempFile = DeleteOnContextExitNamedTemporaryFile
def _zipwrite(fid,fname):
fid.close()
zipf.write(fid.name, arcname=fname)
else:
# use in-mem temporary files
import io
TempFile = io.BytesIO
if sys.version_info[0] < 3:
def _zipwrite(fid,fname):
fid.flush()
zipf.writestr(fname, fid.getvalue())
else:
# getbuffer() doesn't copy data, but only
# available in python 3
def _zipwrite(fid,fname):
fid.flush()
zipf.writestr(fname, fid.getbuffer())

if isinstance(file, basestring):
if not file.endswith('.npz'):
Expand All @@ -624,35 +674,19 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
else:
compression = zipfile.ZIP_STORED

zipf = zipfile_factory(file, mode="w", compression=compression)

# Stage arrays in a temporary file on disk, before writing to zip.

# Since target file might be big enough to exceed capacity of a global
# temporary directory, create temp file side-by-side with the target file.
file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp')
fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy')
os.close(fd)
try:
# Context manager compatible with the 'with' statement
# In python => 2.7 ZipFile class has been corrected
# No more need of contextlib.closing
with contextlib.closing(
zipfile_factory(file, mode="w", compression=compression)) as zipf:
for key, val in namedict.items():
fname = key + '.npy'
fid = open(tmpfile, 'wb')
try:
with TempFile() as fid:
fname = key + '.npy'
format.write_array(fid, np.asanyarray(val),
allow_pickle=allow_pickle,
pickle_kwargs=pickle_kwargs)
fid.close()
fid = None
zipf.write(tmpfile, arcname=fname)
except IOError as exc:
raise IOError("Failed to write to %s: %s" % (tmpfile, exc))
finally:
if fid:
fid.close()
finally:
os.remove(tmpfile)
_zipwrite(fid,fname)

zipf.close()


def _getconv(dtype):
Expand Down
12 changes: 12 additions & 0 deletions numpy/lib/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import numpy as np
import numpy.ma as ma
from numpy.lib._iotools import ConverterError, ConversionWarning
from numpy.lib.npyio import _savez, DeleteOnContextExitNamedTemporaryFile
from numpy.compat import asbytes, bytes, unicode
from numpy.ma.testutils import assert_equal
from numpy.testing import (
Expand Down Expand Up @@ -303,6 +304,17 @@ def test_closing_zipfile_after_load(self):
data.close()
assert_(fp.closed)

def test_in_mem_tempfiles(self):
# Check _savez with in-mem temporary files.
a = np.array([[1, 2], [3, 4]], float)
b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
with DeleteOnContextExitNamedTemporaryFile(suffix='.npz') as fid:
_savez(file=fid.name, args=[], kwds={'a':a,'b':b},
compress=False, disk_temp_files=False)
l = np.load(fid.name)
assert_equal(a, l['a'])
assert_equal(b, l['b'])


class TestSaveTxt(TestCase):
def test_array(self):
Expand Down
0