8000 Start stop for iterator (#621) · trinetta/zarr-python@7c9dc78 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7c9dc78

Browse files
Start stop for iterator (zarr-developers#621)
* added option to iterate over part of array * fixed a small error in islice * added test for array.islice * mimick itertools.islice input param edge case handling * add docstring for islice make input argument error handling more verbose * add missing input param edge case to tests * fix islice docstring * add islice to API docs * API doc entry in wrong file fixed Co-authored-by: jmoore <josh@glencoesoftware.com> Co-authored-by: Josh Moore <j.a.moore@dundee.ac.uk>
1 parent 946ed95 commit 7c9dc78

File tree

4 files changed

+108
-4
lines changed

4 files changed

+108
-4
lines changed

docs/api/hierarchy.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ Groups (``zarr.hierarchy``)
3838
.. automethod:: zeros_like
3939
.. automethod:: ones_like
4040
.. automethod:: full_like
41-
.. automethod:: move
41+
.. automethod:: move

docs/release.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ This release will be the last to support Python 3.5, next version of Zarr will b
116116
applies to reading.
117117
By :user:`Martin Durant <martindurant>`; :issue:`606`
118118

119+
* Efficient iteration expanded with option to pass start and stop index via
120+
``array.islice``.
121+
By :user:`Sebastian Grill <yetyetanotherusername>`, :issue:`615`.
122+
119123
.. _release_2.4.0:
120124

121125
2.4.0

zarr/core.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -458,18 +458,72 @@ def __array__(self, *args):
458458
a = a.astype(args[0])
459459
return a
460460

461-
def __iter__(self):
461+
def islice(self, start=None, end=None):
462+
"""
463+
Yield a generator for iterating over the entire or parts of the
464+
array. Uses a cache so chunks only have to be decompressed once.
465+
466+
Parameters
467+
----------
468+
start : int, optional
469+
Start index for the generator to start at. Defaults to 0.
470+
end : int, optional
471+
End index for the generator to stop at. Defaults to self.shape[0].
472+
473+
Yields
474+
------
475+
out : generator
476+
A generator that can be used to iterate over the requested region
477+
the array.
478+
479+
Examples
480+
--------
481+
Setup a 1-dimensional array::
482+
483+
>>> import zarr
484+
>>> import numpy as np
485+
>>> z = zarr.array(np.arange(100))
486+
487+
Iterate over part of the array:
488+
>>> for value in z.islice(25, 30): value;
489+
25
490+
26
491+
27
492+
28
493+
29
494+
"""
495+
462496
if len(self.shape) == 0:
463497
# Same error as numpy
464498
raise TypeError("iteration over a 0-d array")
499+
if start is None:
500+
start = 0
501+
if end is None or end > self.shape[0]:
502+
end = self.shape[0]
503+
504+
if not isinstance(start, int) or start < 0:
505+
raise ValueError('start must be a nonnegative integer')
506+
507+
if not isinstance(end, int) or end < 0:
508+
raise ValueError('end must be a nonnegative integer')
509+
465510
# Avoid repeatedly decompressing chunks by iterating over the chunks
466511
# in the first dimension.
467512
chunk_size = self.chunks[0]
468-
for j in range(self.shape[0]):
513+
chunk = None
514+
for j in range(start, end):
469515
if j % chunk_size == 0:
470516
chunk = self[j: j + chunk_size]
517+
# init chunk if we start offset of chunk borders
518+
elif chunk is None:
519+
chunk_start = j - j % chunk_size
520+
chunk_end = chunk_start + chunk_size
521+
chunk = self[chunk_start:chunk_end]
471522
yield chunk[j % chunk_size]
472523

524+
def __iter__(self):
525+
return self.islice()
526+
473527
def __len__(self):
474528
if self.shape:
475529
return self.shape[0]

zarr/tests/test_core.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1347,7 +1347,8 @@ def test_object_codec_warnings(self):
13471347
if hasattr(z.store, 'close'):
13481348
z.store.close()
13491349

1350-
def test_zero_d_iter(self):
1350+
def test_iteration_exceptions(self):
1351+
# zero d array
13511352
a = np.array(1, dtype=int)
13521353
z = self.create_array(shape=a.shape, dtype=int)
13531354
z[...] = a
@@ -1357,6 +1358,27 @@ def test_zero_d_iter(self):
13571358
with pytest.raises(TypeError):
13581359
# noinspection PyStatementEffect
13591360
list(z)
1361+
1362+
# input argument error handling
1363+
a = np.array((10, 10), dtype=int)
1364+
z = self.create_array(shape=a.shape, dtype=int)
1365+
z[...] = a
1366+
1367+
params = (
1368+
(-1, 0),
1369+
(0, -1),
1370+
(0.5, 1),
1371+
(0, 0.5)
1372+
)
1373+
1374+
for start, end in params:
1375+
with pytest.raises(ValueError):
1376+
# noinspection PyStatementEffect
1377+
list(z.islice(start, end))
1378+
1379+
# check behavior for start > end
1380+
assert [] == list(z.islice(6, 5))
1381+
13601382
if hasattr(z.store, 'close'):
13611383
z.store.close()
13621384

@@ -1385,6 +1407,30 @@ def test_iter(self):
13851407
if hasattr(z.store, 'close'):
13861408
z.store.close()
13871409

1410+
def test_islice(self):
1411+
params = (
1412+
((1,), (1,), 0, 1),
1413+
((2,), (1,), 0, 1),
1414+
((1,), (2,), 0, 1),
1415+
((3,), (3,), 1, 2),
1416+
((1000,), (100,), 150, 1050),
1417+
((100,), (1000,), 25, 75),
1418+
((1, 100), (1, 1), 0, 1),
1419+
((100, 1), (3, 1), 56, 100),
1420+
((100, 100), (10, 10), 13, 99),
1421+
((10, 10, 10), (3, 3, 3), 2, 4),
1422+
)
1423+
for shape, chunks, start, end in params:
1424+
z = self.create_array(shape=shape, chunks=chunks, dtype=int)
1425+
a = np.arange(np.product(shape)).reshape(shape)
1426+
z[:] = a
1427+
end_array = min(end, a.shape[0])
1428+
for expect, actual in zip_longest(a[start:end_array],
1429+
z.islice(start, end)):
1430+
assert_array_equal(expect, actual)
1431+
if hasattr(z.store, 'close'):
1432+
z.store.close()
1433+
13881434
def test_compressors(self):
13891435
compressors = [
13901436
None, BZ2(), Blosc(), LZ4(), Zlib(), GZip()

0 commit comments

Comments
 (0)
0