8000 Add shards to array strategy (#2822) · jbms/zarr-python@3c25dac · GitHub
[go: up one dir, main page]

Skip to content

Commit 3c25dac

Browse files
dcheriand-v-b
andauthored
Add shards to array strategy (zarr-developers#2822)
* Add shards to array strategy * Prioritize v3 over v2 in property tests --------- Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com>
1 parent 24ef221 commit 3c25dac

File tree

2 files changed

+39
-17
lines changed

2 files changed

+39
-17
lines changed

changes/2822.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add arbitrary `shards` to Hypothesis strategy for generating arrays.

src/zarr/testing/strategies.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def clear_store(x: Store) -> Store:
9696
# So we map a clear to reset the store.
9797
stores = st.builds(MemoryStore, st.just({})).map(clear_store)
9898
compressors = st.sampled_from([None, "default"])
99-
zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([2, 3])
99+
zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2])
100100
array_shapes = npst.array_shapes(max_dims=4, min_side=0)
101101

102102

@@ -166,6 +166,32 @@ def numpy_arrays(
166166
return draw(npst.arrays(dtype=dtype, shape=shapes))
167167

168168

169+
@st.composite # type: ignore[misc]
170+
def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]:
171+
# We want this strategy to shrink towards arrays with smaller number of chunks
172+
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
173+
numchunks = draw(
174+
st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape])
175+
)
176+
# 2. and now generate the chunks tuple
177+
return tuple(
178+
size // nchunks if nchunks > 0 else 0
179+
for size, nchunks in zip(shape, numchunks, strict=True)
180+
)
181+
182+
183+
@st.composite # type: ignore[misc]
184+
def shard_shapes(
185+
draw: st.DrawFn, *, shape: tuple[int, ...], chunk_shape: tuple[int, ...]
186+
) -> tuple[int, ...]:
187+
# We want this strategy to shrink towards arrays with smaller number of shards
188+
# shards must be an integral number of chunks
189+
assert all(c != 0 for c in chunk_shape)
190+
numchunks = tuple(s // c for s, c in zip(shape, chunk_shape, strict=True))
191+
multiples = tuple(draw(st.integers(min_value=1, max_value=nc)) for nc in numchunks)
192+
return tuple(m * c for m, c in zip(multiples, chunk_shape, strict=True))
193+
194+
169195
@st.composite # type: ignore[misc]
170196
def np_array_and_chunks(
171197
draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays
@@ -175,19 +201,7 @@ def np_array_and_chunks(
175201
Returns: a tuple of the array and a suitable random chunking for it.
176202
"""
177203
array = draw(arrays)
178-
# We want this strategy to shrink towards arrays with smaller number of chunks
179-
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
180-
numchunks = draw(
181-
st.tuples(
182-
*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in array.shape]
183-
)
184-
)
185-
# 2. and now generate the chunks tuple
186-
chunks = tuple(
187-
size // nchunks if nchunks > 0 else 0
188-
for size, nchunks in zip(array.shape, numchunks, strict=True)
189-
)
190-
return (array, chunks)
204+
return (array, draw(chunk_shapes(shape=array.shape)))
191205

192206

193207
@st.composite # type: ignore[misc]
@@ -210,7 +224,12 @@ def arrays(
210224
zarr_format = draw(zarr_formats)
211225
if arrays is None:
212226
arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format))
213-
nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
227+
nparray = draw(arrays)
228+
chunk_shape = draw(chunk_shapes(shape=nparray.shape))
229+
if zarr_format == 3 and all(c > 0 for c in chunk_shape):
230+
shard_shape = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunk_shape))
231+
else:
232+
shard_shape = None
214233
# test that None works too.
215234
fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
216235
# compressor = draw(compressors)
@@ -223,7 +242,8 @@ def arrays(
223242
a = root.create_array(
224243
array_path,
225244
shape=nparray.shape,
226-
chunks=chunks,
245+
chunks=chunk_shape,
246+
shards=shard_shape,
227247
dtype=nparray.dtype,
228248
attributes=attributes,
229249
# compressor=compressor, # FIXME
@@ -236,7 +256,8 @@ def arrays(
236256
assert a.name is not None
237257
assert isinstance(root[array_path], Array)
238258
assert nparray.shape == a.shape
239-
assert chunks == a.chunks
259+
assert chunk_shape == a.chunks
260+
assert shard_shape == a.shards
240261
assert array_path == a.path, (path, name, array_path, a.name, a.path)
241262
assert a.basename == name, (a.basename, name)
242263
assert dict(a.attrs) == expected_attrs

0 commit comments

Comments
 (0)
0