From f5eee8637d2f9ea2d01578f344d1e09fe022311e Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 19 Sep 2023 15:12:26 -0400 Subject: [PATCH 1/3] ENH: Add copy() method to ArrayProxy --- nibabel/arrayproxy.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 12a0a7caf3..f123e98d75 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -217,6 +217,15 @@ def __init__(self, file_like, spec, *, mmap=True, order=None, keep_file_open=Non ) self._lock = RLock() + def copy(self): + spec = self._shape, self._dtype, self._offset, self._slope, self._inter + return ArrayProxy( + self.file_like, + spec, + mmap=self._mmap, + keep_file_open=self._keep_file_open, + ) + def __del__(self): """If this ``ArrayProxy`` was created with ``keep_file_open=True``, the open file object is closed if necessary. From 65228f041df0dc63bb20000dcd2a1571e47abc22 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 22 Sep 2023 08:25:35 -0400 Subject: [PATCH 2/3] ENH: Copy lock if filehandle is shared, add tests --- nibabel/arrayproxy.py | 39 ++++++++++++++++++++------------ nibabel/tests/test_arrayproxy.py | 28 +++++++++++++++++++---- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index f123e98d75..57d8aa0f8b 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -58,6 +58,7 @@ if ty.TYPE_CHECKING: # pragma: no cover import numpy.typing as npt + from typing_extensions import Self # PY310 # Taken from numpy/__init__.pyi _DType = ty.TypeVar('_DType', bound=np.dtype[ty.Any]) @@ -212,19 +213,29 @@ def __init__(self, file_like, spec, *, mmap=True, order=None, keep_file_open=Non self.order = order # Flags to keep track of whether a single ImageOpener is created, and # whether a single underlying file handle is created. - self._keep_file_open, self._persist_opener = self._should_keep_file_open( - file_like, keep_file_open - ) + self._keep_file_open, self._persist_opener = self._should_keep_file_open(keep_file_open) self._lock = RLock() - def copy(self): + def _has_fh(self) -> bool: + """Determine if our file-like is a filehandle or path""" + return hasattr(self.file_like, 'read') and hasattr(self.file_like, 'seek') + + def copy(self) -> Self: + """Create a new ArrayProxy for the same file and parameters + + If the proxied file is an open file handle, the new ArrayProxy + will share a lock with the old one. + """ spec = self._shape, self._dtype, self._offset, self._slope, self._inter - return ArrayProxy( + new = self.__class__( self.file_like, spec, mmap=self._mmap, keep_file_open=self._keep_file_open, ) + if self._has_fh(): + new._lock = self._lock + return new def __del__(self): """If this ``ArrayProxy`` was created with ``keep_file_open=True``, @@ -245,13 +256,13 @@ def __setstate__(self, state): self.__dict__.update(state) self._lock = RLock() - def _should_keep_file_open(self, file_like, keep_file_open): + def _should_keep_file_open(self, keep_file_open): """Called by ``__init__``. This method determines how to manage ``ImageOpener`` instances, and the underlying file handles - the behaviour depends on: - - whether ``file_like`` is an an open file handle, or a path to a + - whether ``self.file_like`` is an an open file handle, or a path to a ``'.gz'`` file, or a path to a non-gzip file. - whether ``indexed_gzip`` is present (see :attr:`.openers.HAVE_INDEXED_GZIP`). @@ -270,24 +281,24 @@ def _should_keep_file_open(self, file_like, keep_file_open): and closed on each file access. The internal ``_keep_file_open`` flag is only relevant if - ``file_like`` is a ``'.gz'`` file, and the ``indexed_gzip`` library is + ``self.file_like`` is a ``'.gz'`` file, and the ``indexed_gzip`` library is present. This method returns the values to be used for the internal ``_persist_opener`` and ``_keep_file_open`` flags; these values are derived according to the following rules: - 1. If ``file_like`` is a file(-like) object, both flags are set to + 1. If ``self.file_like`` is a file(-like) object, both flags are set to ``False``. 2. If ``keep_file_open`` (as passed to :meth:``__init__``) is ``True``, both internal flags are set to ``True``. - 3. If ``keep_file_open`` is ``False``, but ``file_like`` is not a path + 3. If ``keep_file_open`` is ``False``, but ``self.file_like`` is not a path to a ``.gz`` file or ``indexed_gzip`` is not present, both flags are set to ``False``. - 4. If ``keep_file_open`` is ``False``, ``file_like`` is a path to a + 4. If ``keep_file_open`` is ``False``, ``self.file_like`` is a path to a ``.gz`` file, and ``indexed_gzip`` is present, ``_persist_opener`` is set to ``True``, and ``_keep_file_open`` is set to ``False``. In this case, file handle management is delegated to the @@ -296,8 +307,6 @@ def _should_keep_file_open(self, file_like, keep_file_open): Parameters ---------- - file_like : object - File-like object or filename, as passed to ``__init__``. keep_file_open : { True, False } Flag as passed to ``__init__``. @@ -320,10 +329,10 @@ def _should_keep_file_open(self, file_like, keep_file_open): raise ValueError('keep_file_open must be one of {None, True, False}') # file_like is a handle - keep_file_open is irrelevant - if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): + if self._has_fh(): return False, False # if the file is a gzip file, and we have_indexed_gzip, - have_igzip = openers.HAVE_INDEXED_GZIP and file_like.endswith('.gz') + have_igzip = openers.HAVE_INDEXED_GZIP and self.file_like.endswith('.gz') persist_opener = keep_file_open or have_igzip return keep_file_open, persist_opener diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index e50caa54c9..acf6099859 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -553,16 +553,36 @@ def test_keep_file_open_true_false_invalid(): ArrayProxy(fname, ((10, 10, 10), dtype)) +def islock(l): + # isinstance doesn't work on threading.Lock? + return hasattr(l, 'acquire') and hasattr(l, 'release') + + def test_pickle_lock(): # Test that ArrayProxy can be pickled, and that thread lock is created - def islock(l): - # isinstance doesn't work on threading.Lock? - return hasattr(l, 'acquire') and hasattr(l, 'release') - proxy = ArrayProxy('dummyfile', ((10, 10, 10), np.float32)) assert islock(proxy._lock) pickled = pickle.dumps(proxy) unpickled = pickle.loads(pickled) assert islock(unpickled._lock) assert proxy._lock is not unpickled._lock + + +def test_copy(): + # Test copying array proxies + + # If the file-like is a file name, get a new lock + proxy = ArrayProxy('dummyfile', ((10, 10, 10), np.float32)) + assert islock(proxy._lock) + copied = proxy.copy() + assert islock(copied._lock) + assert proxy._lock is not copied._lock + + # If an open filehandle, the lock should be shared to + # avoid changing filehandle state in critical sections + proxy = ArrayProxy(BytesIO(), ((10, 10, 10), np.float32)) + assert islock(proxy._lock) + copied = proxy.copy() + assert islock(copied._lock) + assert proxy._lock is copied._lock From 1c1845f75c4e2cfacfa4fa8b485adf6b09b650a1 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 6 Dec 2023 08:19:19 -0500 Subject: [PATCH 3/3] TEST: Check IndexedGzipFile ArrayProxys are copied properly --- nibabel/tests/test_arrayproxy.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index acf6099859..a207e4ed6d 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -23,7 +23,7 @@ from .. import __version__ from ..arrayproxy import ArrayProxy, get_obj_dtype, is_proxy, reshape_dataobj from ..deprecator import ExpiredDeprecationError -from ..nifti1 import Nifti1Header +from ..nifti1 import Nifti1Header, Nifti1Image from ..openers import ImageOpener from ..testing import memmap_after_ufunc from ..tmpdirs import InTemporaryDirectory @@ -586,3 +586,20 @@ def test_copy(): copied = proxy.copy() assert islock(copied._lock) assert proxy._lock is copied._lock + + +def test_copy_with_indexed_gzip_handle(tmp_path): + indexed_gzip = pytest.importorskip('indexed_gzip') + + spec = ((50, 50, 50, 50), np.float32, 352, 1, 0) + data = np.arange(np.prod(spec[0]), dtype=spec[1]).reshape(spec[0]) + fname = str(tmp_path / 'test.nii.gz') + Nifti1Image(data, np.eye(4)).to_filename(fname) + + with indexed_gzip.IndexedGzipFile(fname) as fobj: + proxy = ArrayProxy(fobj, spec) + copied = proxy.copy() + + assert proxy.file_like is copied.file_like + assert np.array_equal(proxy[0, 0, 0], copied[0, 0, 0]) + assert np.array_equal(proxy[-1, -1, -1], copied[-1, -1, -1])