From 61f6dd00f78b06f3aef4b312e4ef82ee0c9d7870 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 14:42:29 +0000 Subject: [PATCH 01/11] Fix usage of config json_indent in V3JsonEncoder --- src/zarr/core/metadata/v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 6ea9ed69f1..8b645e3bb0 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -135,8 +135,8 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: class V3JsonEncoder(json.JSONEncoder): def __init__(self, *args: Any, **kwargs: Any) -> None: - self.indent = kwargs.pop("indent", config.get("json_indent")) super().__init__(*args, **kwargs) + self.indent = config.get("json_indent") def default(self, o: object) -> Any: if isinstance(o, np.dtype): From 5ab3640665eb493c906aca2f3b0faa5df2a7d2d5 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 15:29:07 +0000 Subject: [PATCH 02/11] Add test for json_indent --- tests/test_metadata/test_v3.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 4e4ba23313..560b19e4ef 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -10,7 +10,7 @@ from zarr.codecs.bytes import BytesCodec from zarr.core.buffer import default_buffer_prototype from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding -from zarr.core.group import parse_node_type +from zarr.core.group import parse_node_type, GroupMetadata from zarr.core.metadata.v3 import ( ArrayV3Metadata, DataType, @@ -304,6 +304,19 @@ def test_metadata_to_dict( assert observed == expected +def test_json_indent(): + m = GroupMetadata() + d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() + class TestIndentEncoder(json.JSONEncoder): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.indent = 2 + + # expected has extra ' ' on each line compared with json.dumps( indent=2) + expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode() + assert d == expected + + # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) # @pytest.mark.parametrize("precision", ["ns", "D"]) # async def test_datetime_metadata(fill_value: int, precision: str) -> None: From 37f96b011d353868ff24b0e3582fe62de12338ae Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 15:56:04 +0000 Subject: [PATCH 03/11] parametrize json indent --- tests/test_metadata/test_v3.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 560b19e4ef..0ddb9d18f3 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -10,6 +10,7 @@ from zarr.codecs.bytes import BytesCodec from zarr.core.buffer import default_buffer_prototype from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding +from zarr.core.config import config from zarr.core.group import parse_node_type, GroupMetadata from zarr.core.metadata.v3 import ( ArrayV3Metadata, @@ -304,17 +305,19 @@ def test_metadata_to_dict( assert observed == expected -def test_json_indent(): - m = GroupMetadata() - d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() - class TestIndentEncoder(json.JSONEncoder): - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.indent = 2 +@pytest.mark.parametrize('indent', (2, 4)) +def test_json_indent(indent: int): + with config.set({"json_indent": indent}): + m = GroupMetadata() + d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() + class TestIndentEncoder(json.JSONEncoder): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.indent = indent - # expected has extra ' ' on each line compared with json.dumps( indent=2) - expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode() - assert d == expected + # expected has extra ' ' on each line compared with json.dumps( indent=2) + expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode() + assert d == expected # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) From 5af9d5fb2a1e6925eb3ba7bb3d8a9a13a67b21f7 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 16:08:56 +0000 Subject: [PATCH 04/11] Add None to indent test parameters --- tests/test_metadata/test_v3.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 0ddb9d18f3..64d732b21a 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -305,7 +305,7 @@ def test_metadata_to_dict( assert observed == expected -@pytest.mark.parametrize('indent', (2, 4)) +@pytest.mark.parametrize('indent', (2, 4, None)) def test_json_indent(indent: int): with config.set({"json_indent": indent}): m = GroupMetadata() @@ -315,9 +315,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.indent = indent - # expected has extra ' ' on each line compared with json.dumps( indent=2) + # using json.JSONEncoder adds an extra ' ' on each line + # compared with json.dumps(json.loads(d), indent=2)... expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode() assert d == expected + # ...but we can check that None really removes indent. + if indent is None: + assert d == json.dumps(json.loads(d), indent=indent).encode() # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) From 1e37dd22d5e6a7d4b1e7f0a7cc5bafd519dde17b Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 16:17:30 +0000 Subject: [PATCH 05/11] ruff fix --- tests/test_metadata/test_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 64d732b21a..fe28ec0f0b 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -305,7 +305,7 @@ def test_metadata_to_dict( assert observed == expected -@pytest.mark.parametrize('indent', (2, 4, None)) +@pytest.mark.parametrize('indent', [2, 4, None]) def test_json_indent(indent: int): with config.set({"json_indent": indent}): m = GroupMetadata() From 599eefca84574c354c157bc884427bb35751f991 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 16:19:09 +0000 Subject: [PATCH 06/11] other ruff fixes --- tests/test_metadata/test_v3.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index fe28ec0f0b..817facfecc 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -11,7 +11,7 @@ from zarr.core.buffer import default_buffer_prototype from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding from zarr.core.config import config -from zarr.core.group import parse_node_type, GroupMetadata +from zarr.core.group import GroupMetadata, parse_node_type from zarr.core.metadata.v3 import ( ArrayV3Metadata, DataType, @@ -305,11 +305,12 @@ def test_metadata_to_dict( assert observed == expected -@pytest.mark.parametrize('indent', [2, 4, None]) +@pytest.mark.parametrize("indent", [2, 4, None]) def test_json_indent(indent: int): with config.set({"json_indent": indent}): m = GroupMetadata() d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() + class TestIndentEncoder(json.JSONEncoder): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) From 263dac43f21753fa267d83584195175299843dfe Mon Sep 17 00:00:00 2001 From: Will Moore Date: Mon, 9 Dec 2024 22:59:42 +0000 Subject: [PATCH 07/11] Update src/zarr/core/metadata/v3.py Co-authored-by: Joe Hamman --- src/zarr/core/metadata/v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 8b645e3bb0..2cfbab03c1 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -136,7 +136,7 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: class V3JsonEncoder(json.JSONEncoder): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) - self.indent = config.get("json_indent") + self.indent = kwargs.pop("indent") or config.get("json_indent") def default(self, o: object) -> Any: if isinstance(o, np.dtype): From 7a442e1b700e0d908732e21be89250ce48202560 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 23:37:27 +0000 Subject: [PATCH 08/11] Use explicit json encoder args --- src/zarr/core/metadata/v3.py | 27 ++++++++++++++++++++++++--- tests/test_metadata/test_v3.py | 14 +------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 2cfbab03c1..6efc11ed44 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -134,9 +134,30 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: class V3JsonEncoder(json.JSONEncoder): - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.indent = kwargs.pop("indent") or config.get("json_indent") + def __init__( + self, + *, + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + sort_keys=False, + indent=None, + separators=None, + default=None, + ) -> None: + if indent is None: + indent = config.get("json_indent") + super().__init__( + skipkeys=skipkeys, + ensure_ascii=ensure_ascii, + check_circular=check_circular, + allow_nan=allow_nan, + sort_keys=sort_keys, + indent=indent, + separators=separators, + default=default, + ) def default(self, o: object) -> Any: if isinstance(o, np.dtype): diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 817facfecc..9b1379689f 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -310,19 +310,7 @@ def test_json_indent(indent: int): with config.set({"json_indent": indent}): m = GroupMetadata() d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() - - class TestIndentEncoder(json.JSONEncoder): - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.indent = indent - - # using json.JSONEncoder adds an extra ' ' on each line - # compared with json.dumps(json.loads(d), indent=2)... - expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode() - assert d == expected - # ...but we can check that None really removes indent. - if indent is None: - assert d == json.dumps(json.loads(d), indent=indent).encode() + assert d == json.dumps(json.loads(d), indent=indent).encode() # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) From 1442f4a5c762c922af90281b1c4da7c8a30b5269 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 9 Dec 2024 23:54:39 +0000 Subject: [PATCH 09/11] Add types --- src/zarr/core/metadata/v3.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 6efc11ed44..a22d81115b 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -7,6 +7,7 @@ from zarr.core.buffer.core import default_buffer_prototype if TYPE_CHECKING: + from collections.abc import Callable from typing import Self from zarr.core.buffer import Buffer, BufferPrototype @@ -137,14 +138,14 @@ class V3JsonEncoder(json.JSONEncoder): def __init__( self, *, - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - sort_keys=False, - indent=None, - separators=None, - default=None, + skipkeys: bool = False, + ensure_ascii: bool = True, + check_circular: bool = True, + allow_nan: bool = True, + sort_keys: bool = False, + indent: int | None = None, + separators: tuple[str, str] | None = None, + default: Callable[[object], object] | None = None, ) -> None: if indent is None: indent = config.get("json_indent") From 7123ce354c62ed81f6c59a049893bc70de4ec2b8 Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 17 Dec 2024 13:48:08 +0000 Subject: [PATCH 10/11] Update byte counts for tests --- tests/test_array.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_array.py b/tests/test_array.py index 86da801d1f..16f3ce5994 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -376,25 +376,25 @@ async def test_chunks_initialized() -> None: def test_nbytes_stored() -> None: arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4") result = arr.nbytes_stored() - assert result == 366 # the size of the metadata document. This is a fragile test. + assert result == 502 # the size of the metadata document. This is a fragile test. arr[:50] = 1 result = arr.nbytes_stored() - assert result == 566 # the size with 5 chunks filled. + assert result == 702 # the size with 5 chunks filled. arr[50:] = 2 result = arr.nbytes_stored() - assert result == 766 # the size with all chunks filled. + assert result == 902 # the size with all chunks filled. async def test_nbytes_stored_async() -> None: arr = await zarr.api.asynchronous.create(shape=(100,), chunks=(10,), dtype="i4") result = await arr.nbytes_stored() - assert result == 366 # the size of the metadata document. This is a fragile test. + assert result == 502 # the size of the metadata document. This is a fragile test. await arr.setitem(slice(50), 1) result = await arr.nbytes_stored() - assert result == 566 # the size with 5 chunks filled. + assert result == 702 # the size with 5 chunks filled. await arr.setitem(slice(50, 100), 2) result = await arr.nbytes_stored() - assert result == 766 # the size with all chunks filled. + assert result == 902 # the size with all chunks filled. def test_default_fill_values() -> None: @@ -489,14 +489,14 @@ def test_info_complete(self) -> None: _codecs=[BytesCodec()], _count_bytes=128, _count_chunks_initialized=0, - _count_bytes_stored=373, # the metadata? + _count_bytes_stored=521, # the metadata? ) assert result == expected arr[:2, :2] = 10 result = arr.info_complete() expected = dataclasses.replace( - expected, _count_chunks_initialized=1, _count_bytes_stored=405 + expected, _count_chunks_initialized=1, _count_bytes_stored=553 ) assert result == expected @@ -545,14 +545,14 @@ async def test_info_complete_async(self) -> None: _codecs=[BytesCodec()], _count_bytes=128, _count_chunks_initialized=0, - _count_bytes_stored=373, # the metadata? + _count_bytes_stored=521, # the metadata? ) assert result == expected await arr.setitem((slice(2), slice(2)), 10) result = await arr.info_complete() expected = dataclasses.replace( - expected, _count_chunks_initialized=1, _count_bytes_stored=405 + expected, _count_chunks_initialized=1, _count_bytes_stored=553 ) assert result == expected From e2622357de190ce27561c3e883e55eb8e4907c59 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Wed, 8 Jan 2025 10:12:35 +0000 Subject: [PATCH 11/11] Fix doctests --- docs/user-guide/arrays.rst | 4 ++-- docs/user-guide/groups.rst | 4 ++-- docs/user-guide/performance.rst | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst index ba85ce1cda..ae2c4b47eb 100644 --- a/docs/user-guide/arrays.rst +++ b/docs/user-guide/arrays.rst @@ -209,7 +209,7 @@ prints additional diagnostics, e.g.:: Serializer : BytesCodec(endian=) Compressors : (BloscCodec(typesize=4, cname=, clevel=3, shuffle=, blocksize=0),) No. bytes : 400000000 (381.5M) - No. bytes stored : 9696302 + No. bytes stored : 9696520 Storage ratio : 41.3 Chunks Initialized : 100 @@ -611,7 +611,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za Serializer : BytesCodec(endian=) Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 100000000 (95.4M) - No. bytes stored : 3981060 + No. bytes stored : 3981552 Storage ratio : 25.1 Shards Initialized : 100 diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst index da5f393246..1e72df3478 100644 --- a/docs/user-guide/groups.rst +++ b/docs/user-guide/groups.rst @@ -113,8 +113,8 @@ property. E.g.:: Serializer : BytesCodec(endian=) Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 8000000 (7.6M) - No. bytes stored : 1432 - Storage ratio : 5586.6 + No. bytes stored : 1614 + Storage ratio : 4956.6 Chunks Initialized : 0 >>> baz.info Type : Array diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst index 265bef8efe..42d830780f 100644 --- a/docs/user-guide/performance.rst +++ b/docs/user-guide/performance.rst @@ -131,7 +131,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Serializer : BytesCodec(endian=) Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 400000000 (381.5M) - No. bytes stored : 342588717 + No. bytes stored : 342588911 Storage ratio : 1.2 Chunks Initialized : 100 >>> with zarr.config.set({'array.order': 'F'}): @@ -150,7 +150,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Serializer : BytesCodec(endian=) Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 400000000 (381.5M) - No. bytes stored : 342588717 + No. bytes stored : 342588911 Storage ratio : 1.2 Chunks Initialized : 100