zarr-developers · dstansby · Jan 8, 2025 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
@@ -209,7 +209,7 @@ prints additional diagnostics, e.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 9696302
+   No. bytes stored   : 9696520
    Storage ratio      : 41.3
    Chunks Initialized : 100
 
@@ -611,7 +611,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
   Compressors        : (ZstdCodec(level=0, checksum=False),)
   No. bytes          : 100000000 (95.4M)
-  No. bytes stored   : 3981060
+  No. bytes stored   : 3981552
   Storage ratio      : 25.1
   Shards Initialized : 100
 

diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst
@@ -113,8 +113,8 @@ property. E.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 8000000 (7.6M)
-   No. bytes stored   : 1432
-   Storage ratio      : 5586.6
+   No. bytes stored   : 1614
+   Storage ratio      : 4956.6
    Chunks Initialized : 0
    >>> baz.info
    Type               : Array

diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
@@ -131,7 +131,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 342588717
+   No. bytes stored   : 342588911
    Storage ratio      : 1.2
    Chunks Initialized : 100
    >>> with zarr.config.set({'array.order': 'F'}):
@@ -150,7 +150,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 342588717
+   No. bytes stored   : 342588911
    Storage ratio      : 1.2
    Chunks Initialized : 100
 

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
@@ -7,6 +7,7 @@
 from zarr.core.buffer.core import default_buffer_prototype
 
 if TYPE_CHECKING:
+    from collections.abc import Callable
     from typing import Self
 
     from zarr.core.buffer import Buffer, BufferPrototype
@@ -143,9 +144,30 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
 
 
 class V3JsonEncoder(json.JSONEncoder):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        self.indent = kwargs.pop("indent", config.get("json_indent"))
-        super().__init__(*args, **kwargs)
+    def __init__(
+        self,
+        *,
+        skipkeys: bool = False,
+        ensure_ascii: bool = True,
+        check_circular: bool = True,
+        allow_nan: bool = True,
+        sort_keys: bool = False,
+        indent: int | None = None,
+        separators: tuple[str, str] | None = None,
+        default: Callable[[object], object] | None = None,
+    ) -> None:
+        if indent is None:
+            indent = config.get("json_indent")
+        super().__init__(
+            skipkeys=skipkeys,
+            ensure_ascii=ensure_ascii,
+            check_circular=check_circular,
+            allow_nan=allow_nan,
+            sort_keys=sort_keys,
+            indent=indent,
+            separators=separators,
+            default=default,
+        )
 
     def default(self, o: object) -> Any:
         if isinstance(o, np.dtype):

diff --git a/tests/test_array.py b/tests/test_array.py
@@ -399,27 +399,27 @@ async def test_chunks_initialized() -> None:
 def test_nbytes_stored() -> None:
     arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()])
     result = arr.nbytes_stored()
-    assert result == 366  # the size of the metadata document. This is a fragile test.
+    assert result == 502  # the size of the metadata document. This is a fragile test.
     arr[:50] = 1
     result = arr.nbytes_stored()
-    assert result == 566  # the size with 5 chunks filled.
+    assert result == 702  # the size with 5 chunks filled.
     arr[50:] = 2
     result = arr.nbytes_stored()
-    assert result == 766  # the size with all chunks filled.
+    assert result == 902  # the size with all chunks filled.
 
 
 async def test_nbytes_stored_async() -> None:
     arr = await zarr.api.asynchronous.create(
         shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()]
     )
     result = await arr.nbytes_stored()
-    assert result == 366  # the size of the metadata document. This is a fragile test.
+    assert result == 502  # the size of the metadata document. This is a fragile test.
     await arr.setitem(slice(50), 1)
     result = await arr.nbytes_stored()
-    assert result == 566  # the size with 5 chunks filled.
+    assert result == 702  # the size with 5 chunks filled.
     await arr.setitem(slice(50, 100), 2)
     result = await arr.nbytes_stored()
-    assert result == 766  # the size with all chunks filled.
+    assert result == 902  # the size with all chunks filled.
 
 
 def test_default_fill_values() -> None:
@@ -537,19 +537,19 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
             _serializer=BytesCodec(),
             _count_bytes=512,
             _count_chunks_initialized=0,
-            _count_bytes_stored=373 if shards is None else 578,  # the metadata?
+            _count_bytes_stored=521 if shards is None else 982,  # the metadata?
         )
         assert result == expected
 
         arr[:4, :4] = 10
         result = arr.info_complete()
         if shards is None:
             expected = dataclasses.replace(
-                expected, _count_chunks_initialized=4, _count_bytes_stored=501
+                expected, _count_chunks_initialized=4, _count_bytes_stored=649
             )
         else:
             expected = dataclasses.replace(
-                expected, _count_chunks_initialized=1, _count_bytes_stored=774
+                expected, _count_chunks_initialized=1, _count_bytes_stored=1178
             )
         assert result == expected
 
@@ -624,21 +624,20 @@ async def test_info_complete_async(
             _serializer=BytesCodec(),
             _count_bytes=512,
             _count_chunks_initialized=0,
-            _count_bytes_stored=373 if shards is None else 578,  # the metadata?
+            _count_bytes_stored=521 if shards is None else 982,  # the metadata?
         )
         assert result == expected
 
         await arr.setitem((slice(4), slice(4)), 10)
         result = await arr.info_complete()
         if shards is None:
             expected = dataclasses.replace(
-                expected, _count_chunks_initialized=4, _count_bytes_stored=501
+                expected, _count_chunks_initialized=4, _count_bytes_stored=553
             )
         else:
             expected = dataclasses.replace(
-                expected, _count_chunks_initialized=1, _count_bytes_stored=774
+                expected, _count_chunks_initialized=1, _count_bytes_stored=1178
             )
-        assert result == expected
 
 
 @pytest.mark.parametrize("store", ["memory"], indirect=True)

diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
@@ -10,7 +10,8 @@
 from zarr.codecs.bytes import BytesCodec
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
-from zarr.core.group import parse_node_type
+from zarr.core.config import config
+from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     DataType,
@@ -304,6 +305,14 @@ def test_metadata_to_dict(
     assert observed == expected
 
 
+@pytest.mark.parametrize("indent", [2, 4, None])
+def test_json_indent(indent: int):
+    with config.set({"json_indent": indent}):
+        m = GroupMetadata()
+        d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
+        assert d == json.dumps(json.loads(d), indent=indent).encode()
+
+
 # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
 # @pytest.mark.parametrize("precision", ["ns", "D"])
 # async def test_datetime_metadata(fill_value: int, precision: str) -> None: