8000 Added Array.info_complete (#2514) · zarr-developers/zarr-python@206d145 · GitHub
[go: up one dir, main page]

Skip to content

Commit 206d145

Browse files
TomAugspurgerd-v-bnormanrz
authored
Added Array.info_complete (#2514)
Now that Store.getsize is a thing, we can do info_complete which includes the number of chunks written and the size of those bytes. Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com> Co-authored-by: Norman Rzepka <code@normanrz.com>
1 parent 2961246 commit 206d145

File tree

2 files changed

+127
-8
lines changed

2 files changed

+127
-8
lines changed

src/zarr/core/array.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,18 +1346,53 @@ def info(self) -> Any:
13461346
AsyncArray.info_complete
13471347
All information about a group, including dynamic information
13481348
like the number of bytes and chunks written.
1349+
1350+
Examples
1351+
--------
1352+
1353+
>>> arr = await zarr.api.asynchronous.create(
1354+
... path="array", shape=(3, 4, 5), chunks=(2, 2, 2))
1355+
... )
1356+
>>> arr.info
1357+
Type : Array
1358+
Zarr format : 3
1359+
Data type : DataType.float64
1360+
Shape : (3, 4, 5)
1361+
Chunk shape : (2, 2, 2)
1362+
Order : C
1363+
Read-only : False
1364+
Store type : MemoryStore
1365+
Codecs : [{'endian': <Endian.little: 'little'>}]
1366+
No. bytes : 480
13491367
"""
13501368
return self._info()
13511369

13521370
async def info_complete(self) -> Any:
1353-
# TODO: get the size of the object from the store.
1354-
extra = {
1355-
"count_chunks_initialized": await self.nchunks_initialized(),
1356-
# count_bytes_stored isn't yet implemented.
1357-
}
1358-
return self._info(extra=extra)
1359-
1360-
def _info(self, extra: dict[str, int] | None = None) -> Any:
1371+
"""
1372+
Return all the information for an array, including dynamic information like a storage size.
1373+
1374+
In addition to the static information, this provides
1375+
1376+
- The count of chunks initialized
1377+
- The sum of the bytes written
1378+
1379+
Returns
1380+
-------
1381+
ArrayInfo
1382+
1383+
See Also
1384+
--------
1385+
AsyncArray.info
1386+
A property giving just the statically known information about an array.
1387+
"""
1388+
return self._info(
1389+
await self.nchunks_initialized(),
1390+
await self.store_path.store.getsize_prefix(self.store_path.path),
1391+
)
1392+
1393+
def _info(
1394+
self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
1395+
) -> Any:
13611396
kwargs: dict[str, Any] = {}
13621397
if self.metadata.zarr_format == 2:
13631398
assert isinstance(self.metadata, ArrayV2Metadata)
@@ -1386,6 +1421,8 @@ def _info(self, extra: dict[str, int] | None = None) -> Any:
13861421
_read_only=self.read_only,
13871422
_store_type=type(self.store_path.store).__name__,
13881423
_count_bytes=self.dtype.itemsize * self.size,
1424+
_count_bytes_stored=count_bytes_stored,
1425+
_count_chunks_initialized=count_chunks_initialized,
13891426
**kwargs,
13901427
)
13911428

tests/test_array.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import dataclasses
12
import json
23
import math
34
import pickle
@@ -474,6 +475,87 @@ def test_info_v3(self) -> None:
474475
)
475476
assert result == expected
476477

478+
def test_info_complete(self) -> None:
479+
arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
480+
result = arr.info_complete()
481+
expected = ArrayInfo(
482+
_zarr_format=3,
483+
_data_type=DataType.parse("float64"),
484+
_shape=(4, 4),
485+
_chunk_shape=(2, 2),
486+
_order="C",
487+
_read_only=False,
488+
_store_type="MemoryStore",
489+
_codecs=[BytesCodec()],
490+
_count_bytes=128,
491+
_count_chunks_initialized=0,
492+
_count_bytes_stored=373, # the metadata?
493+
)
494+
assert result == expected
495+
496+
arr[:2, :2] = 10
497+
result = arr.info_complete()
498+
expected = dataclasses.replace(
499+
expected, _count_chunks_initialized=1, _count_bytes_stored=405
500+
)
501+
assert result == expected
502+
503+
async def test_info_v2_async(self) -> None:
504+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=2)
505+
result = arr.info
506+
expected = ArrayInfo(
507+
_zarr_format=2,
508+
_data_type=np.dtype("float64"),
509+
_shape=(4, 4),
510+
_chunk_shape=(2, 2),
511+
_order="C",
512+
_read_only=False,
513+
_store_type="MemoryStore",
514+
_count_bytes=128,
515+
)
516+
assert result == expected
517+
518+
async def test_info_v3_async(self) -> None:
519+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
520+
result = arr.info
521+
expected = ArrayInfo(
522+
_zarr_format=3,
523+
_data_type=DataType.parse("float64"),
524+
_shape=(4, 4),
525+
_chunk_shape=(2, 2),
526+
_order="C",
527+
_read_only=False,
528+
_store_type="MemoryStore",
529+
_codecs=[BytesCodec()],
530+
_count_bytes=128,
531+
)
532+
assert result == expected
533+
534+
async def test_info_complete_async(self) -> None:
535+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
536+
result = await arr.info_complete()
537+
expected = ArrayInfo(
538+
_zarr_format=3,
539+
_data_type=DataType.parse("float64"),
540+
_shape=(4, 4),
541+
_chunk_shape=(2, 2),
542+
_order="C",
543+
_read_only=False,
544+
_store_type="MemoryStore",
545+
_codecs=[BytesCodec()],
546+
_count_bytes=128,
547+
_count_chunks_initialized=0,
548+
_count_bytes_stored=373, # the metadata?
549+
)
550+
assert result == expected
551+
552+
await arr.setitem((slice(2), slice(2)), 10)
553+
result = await arr.info_complete()
554+
expected = dataclasses.replace(
555+
expected, _count_chunks_initialized=1, _count_bytes_stored=405
556+
)
557+
assert result == expected
558+
477559

478560
@pytest.mark.parametrize("store", ["memory"], indirect=True)
479561
@pytest.mark.parametrize("zarr_format", [2, 3])

0 commit comments

Comments
 (0)
0