From 61f6dd00f78b06f3aef4b312e4ef82ee0c9d7870 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 14:42:29 +0000
Subject: [PATCH 01/11] Fix usage of config json_indent in V3JsonEncoder

---
 src/zarr/core/metadata/v3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 6ea9ed69f1..8b645e3bb0 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -135,8 +135,8 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
 
 class V3JsonEncoder(json.JSONEncoder):
     def __init__(self, *args: Any, **kwargs: Any) -> None:
-        self.indent = kwargs.pop("indent", config.get("json_indent"))
         super().__init__(*args, **kwargs)
+        self.indent = config.get("json_indent")
 
     def default(self, o: object) -> Any:
         if isinstance(o, np.dtype):

From 5ab3640665eb493c906aca2f3b0faa5df2a7d2d5 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 15:29:07 +0000
Subject: [PATCH 02/11] Add test for json_indent

---
 tests/test_metadata/test_v3.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 4e4ba23313..560b19e4ef 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -10,7 +10,7 @@
 from zarr.codecs.bytes import BytesCodec
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
-from zarr.core.group import parse_node_type
+from zarr.core.group import parse_node_type, GroupMetadata
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     DataType,
@@ -304,6 +304,19 @@ def test_metadata_to_dict(
     assert observed == expected
 
 
+def test_json_indent():
+    m = GroupMetadata()
+    d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
+    class TestIndentEncoder(json.JSONEncoder):
+        def __init__(self, *args: Any, **kwargs: Any) -> None:
+            super().__init__(*args, **kwargs)
+            self.indent = 2
+
+    # expected has extra ' ' on each line compared with json.dumps( indent=2)
+    expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode()
+    assert d == expected
+
+
 # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
 # @pytest.mark.parametrize("precision", ["ns", "D"])
 # async def test_datetime_metadata(fill_value: int, precision: str) -> None:

From 37f96b011d353868ff24b0e3582fe62de12338ae Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 15:56:04 +0000
Subject: [PATCH 03/11] parametrize json indent

---
 tests/test_metadata/test_v3.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 560b19e4ef..0ddb9d18f3 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -10,6 +10,7 @@
 from zarr.codecs.bytes import BytesCodec
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
+from zarr.core.config import config
 from zarr.core.group import parse_node_type, GroupMetadata
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
@@ -304,17 +305,19 @@ def test_metadata_to_dict(
     assert observed == expected
 
 
-def test_json_indent():
-    m = GroupMetadata()
-    d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
-    class TestIndentEncoder(json.JSONEncoder):
-        def __init__(self, *args: Any, **kwargs: Any) -> None:
-            super().__init__(*args, **kwargs)
-            self.indent = 2
+@pytest.mark.parametrize('indent', (2, 4))
+def test_json_indent(indent: int):
+    with config.set({"json_indent": indent}):
+        m = GroupMetadata()
+        d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
+        class TestIndentEncoder(json.JSONEncoder):
+            def __init__(self, *args: Any, **kwargs: Any) -> None:
+                super().__init__(*args, **kwargs)
+                self.indent = indent
 
-    # expected has extra ' ' on each line compared with json.dumps( indent=2)
-    expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode()
-    assert d == expected
+        # expected has extra ' ' on each line compared with json.dumps( indent=2)
+        expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode()
+        assert d == expected
 
 
 # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])

From 5af9d5fb2a1e6925eb3ba7bb3d8a9a13a67b21f7 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 16:08:56 +0000
Subject: [PATCH 04/11] Add None to indent test parameters

---
 tests/test_metadata/test_v3.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 0ddb9d18f3..64d732b21a 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -305,7 +305,7 @@ def test_metadata_to_dict(
     assert observed == expected
 
 
-@pytest.mark.parametrize('indent', (2, 4))
+@pytest.mark.parametrize('indent', (2, 4, None))
 def test_json_indent(indent: int):
     with config.set({"json_indent": indent}):
         m = GroupMetadata()
@@ -315,9 +315,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
                 super().__init__(*args, **kwargs)
                 self.indent = indent
 
-        # expected has extra ' ' on each line compared with json.dumps( indent=2)
+        # using json.JSONEncoder adds an extra ' ' on each line
+        # compared with json.dumps(json.loads(d), indent=2)...
         expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode()
         assert d == expected
+        # ...but we can check that None really removes indent.
+        if indent is None:
+            assert d == json.dumps(json.loads(d), indent=indent).encode()
 
 
 # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])

From 1e37dd22d5e6a7d4b1e7f0a7cc5bafd519dde17b Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 16:17:30 +0000
Subject: [PATCH 05/11] ruff fix

---
 tests/test_metadata/test_v3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 64d732b21a..fe28ec0f0b 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -305,7 +305,7 @@ def test_metadata_to_dict(
     assert observed == expected
 
 
-@pytest.mark.parametrize('indent', (2, 4, None))
+@pytest.mark.parametrize('indent', [2, 4, None])
 def test_json_indent(indent: int):
     with config.set({"json_indent": indent}):
         m = GroupMetadata()

From 599eefca84574c354c157bc884427bb35751f991 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 16:19:09 +0000
Subject: [PATCH 06/11] other ruff fixes

---
 tests/test_metadata/test_v3.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index fe28ec0f0b..817facfecc 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -11,7 +11,7 @@
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
-from zarr.core.group import parse_node_type, GroupMetadata
+from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     DataType,
@@ -305,11 +305,12 @@ def test_metadata_to_dict(
     assert observed == expected
 
 
-@pytest.mark.parametrize('indent', [2, 4, None])
+@pytest.mark.parametrize("indent", [2, 4, None])
 def test_json_indent(indent: int):
     with config.set({"json_indent": indent}):
         m = GroupMetadata()
         d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
+
         class TestIndentEncoder(json.JSONEncoder):
             def __init__(self, *args: Any, **kwargs: Any) -> None:
                 super().__init__(*args, **kwargs)

From 263dac43f21753fa267d83584195175299843dfe Mon Sep 17 00:00:00 2001
From: Will Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 22:59:42 +0000
Subject: [PATCH 07/11] Update src/zarr/core/metadata/v3.py

Co-authored-by: Joe Hamman <jhamman1@gmail.com>
---
 src/zarr/core/metadata/v3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 8b645e3bb0..2cfbab03c1 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -136,7 +136,7 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
 class V3JsonEncoder(json.JSONEncoder):
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
-        self.indent = config.get("json_indent")
+        self.indent = kwargs.pop("indent") or config.get("json_indent")
 
     def default(self, o: object) -> Any:
         if isinstance(o, np.dtype):

From 7a442e1b700e0d908732e21be89250ce48202560 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 23:37:27 +0000
Subject: [PATCH 08/11] Use explicit json encoder args

---
 src/zarr/core/metadata/v3.py   | 27 ++++++++++++++++++++++++---
 tests/test_metadata/test_v3.py | 14 +-------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 2cfbab03c1..6efc11ed44 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -134,9 +134,30 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
 
 
 class V3JsonEncoder(json.JSONEncoder):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        super().__init__(*args, **kwargs)
-        self.indent = kwargs.pop("indent") or config.get("json_indent")
+    def __init__(
+        self,
+        *,
+        skipkeys=False,
+        ensure_ascii=True,
+        check_circular=True,
+        allow_nan=True,
+        sort_keys=False,
+        indent=None,
+        separators=None,
+        default=None,
+    ) -> None:
+        if indent is None:
+            indent = config.get("json_indent")
+        super().__init__(
+            skipkeys=skipkeys,
+            ensure_ascii=ensure_ascii,
+            check_circular=check_circular,
+            allow_nan=allow_nan,
+            sort_keys=sort_keys,
+            indent=indent,
+            separators=separators,
+            default=default,
+        )
 
     def default(self, o: object) -> Any:
         if isinstance(o, np.dtype):
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 817facfecc..9b1379689f 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -310,19 +310,7 @@ def test_json_indent(indent: int):
     with config.set({"json_indent": indent}):
         m = GroupMetadata()
         d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
-
-        class TestIndentEncoder(json.JSONEncoder):
-            def __init__(self, *args: Any, **kwargs: Any) -> None:
-                super().__init__(*args, **kwargs)
-                self.indent = indent
-
-        # using json.JSONEncoder adds an extra ' ' on each line
-        # compared with json.dumps(json.loads(d), indent=2)...
-        expected = json.dumps(json.loads(d), cls=TestIndentEncoder).encode()
-        assert d == expected
-        # ...but we can check that None really removes indent.
-        if indent is None:
-            assert d == json.dumps(json.loads(d), indent=indent).encode()
+        assert d == json.dumps(json.loads(d), indent=indent).encode()
 
 
 # @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])

From 1442f4a5c762c922af90281b1c4da7c8a30b5269 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Mon, 9 Dec 2024 23:54:39 +0000
Subject: [PATCH 09/11] Add types

---
 src/zarr/core/metadata/v3.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 6efc11ed44..a22d81115b 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -7,6 +7,7 @@
 from zarr.core.buffer.core import default_buffer_prototype
 
 if TYPE_CHECKING:
+    from collections.abc import Callable
     from typing import Self
 
     from zarr.core.buffer import Buffer, BufferPrototype
@@ -137,14 +138,14 @@ class V3JsonEncoder(json.JSONEncoder):
     def __init__(
         self,
         *,
-        skipkeys=False,
-        ensure_ascii=True,
-        check_circular=True,
-        allow_nan=True,
-        sort_keys=False,
-        indent=None,
-        separators=None,
-        default=None,
+        skipkeys: bool = False,
+        ensure_ascii: bool = True,
+        check_circular: bool = True,
+        allow_nan: bool = True,
+        sort_keys: bool = False,
+        indent: int | None = None,
+        separators: tuple[str, str] | None = None,
+        default: Callable[[object], object] | None = None,
     ) -> None:
         if indent is None:
             indent = config.get("json_indent")

From 7123ce354c62ed81f6c59a049893bc70de4ec2b8 Mon Sep 17 00:00:00 2001
From: William Moore <w.moore@dundee.ac.uk>
Date: Tue, 17 Dec 2024 13:48:08 +0000
Subject: [PATCH 10/11] Update byte counts for tests

---
 tests/test_array.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index 86da801d1f..16f3ce5994 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -376,25 +376,25 @@ async def test_chunks_initialized() -> None:
 def test_nbytes_stored() -> None:
     arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4")
     result = arr.nbytes_stored()
-    assert result == 366  # the size of the metadata document. This is a fragile test.
+    assert result == 502  # the size of the metadata document. This is a fragile test.
     arr[:50] = 1
     result = arr.nbytes_stored()
-    assert result == 566  # the size with 5 chunks filled.
+    assert result == 702  # the size with 5 chunks filled.
     arr[50:] = 2
     result = arr.nbytes_stored()
-    assert result == 766  # the size with all chunks filled.
+    assert result == 902  # the size with all chunks filled.
 
 
 async def test_nbytes_stored_async() -> None:
     arr = await zarr.api.asynchronous.create(shape=(100,), chunks=(10,), dtype="i4")
     result = await arr.nbytes_stored()
-    assert result == 366  # the size of the metadata document. This is a fragile test.
+    assert result == 502  # the size of the metadata document. This is a fragile test.
     await arr.setitem(slice(50), 1)
     result = await arr.nbytes_stored()
-    assert result == 566  # the size with 5 chunks filled.
+    assert result == 702  # the size with 5 chunks filled.
     await arr.setitem(slice(50, 100), 2)
     result = await arr.nbytes_stored()
-    assert result == 766  # the size with all chunks filled.
+    assert result == 902  # the size with all chunks filled.
 
 
 def test_default_fill_values() -> None:
@@ -489,14 +489,14 @@ def test_info_complete(self) -> None:
             _codecs=[BytesCodec()],
             _count_bytes=128,
             _count_chunks_initialized=0,
-            _count_bytes_stored=373,  # the metadata?
+            _count_bytes_stored=521,  # the metadata?
         )
         assert result == expected
 
         arr[:2, :2] = 10
         result = arr.info_complete()
         expected = dataclasses.replace(
-            expected, _count_chunks_initialized=1, _count_bytes_stored=405
+            expected, _count_chunks_initialized=1, _count_bytes_stored=553
         )
         assert result == expected
 
@@ -545,14 +545,14 @@ async def test_info_complete_async(self) -> None:
             _codecs=[BytesCodec()],
             _count_bytes=128,
             _count_chunks_initialized=0,
-            _count_bytes_stored=373,  # the metadata?
+            _count_bytes_stored=521,  # the metadata?
         )
         assert result == expected
 
         await arr.setitem((slice(2), slice(2)), 10)
         result = await arr.info_complete()
         expected = dataclasses.replace(
-            expected, _count_chunks_initialized=1, _count_bytes_stored=405
+            expected, _count_chunks_initialized=1, _count_bytes_stored=553
         )
         assert result == expected
 

From e2622357de190ce27561c3e883e55eb8e4907c59 Mon Sep 17 00:00:00 2001
From: David Stansby <dstansby@gmail.com>
Date: Wed, 8 Jan 2025 10:12:35 +0000
Subject: [PATCH 11/11] Fix doctests

---
 docs/user-guide/arrays.rst      | 4 ++--
 docs/user-guide/groups.rst      | 4 ++--
 docs/user-guide/performance.rst | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
index ba85ce1cda..ae2c4b47eb 100644
--- a/docs/user-guide/arrays.rst
+++ b/docs/user-guide/arrays.rst
@@ -209,7 +209,7 @@ prints additional diagnostics, e.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 9696302
+   No. bytes stored   : 9696520
    Storage ratio      : 41.3
    Chunks Initialized : 100
 
@@ -611,7 +611,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
   Compressors        : (ZstdCodec(level=0, checksum=False),)
   No. bytes          : 100000000 (95.4M)
-  No. bytes stored   : 3981060
+  No. bytes stored   : 3981552
   Storage ratio      : 25.1
   Shards Initialized : 100
 
diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst
index da5f393246..1e72df3478 100644
--- a/docs/user-guide/groups.rst
+++ b/docs/user-guide/groups.rst
@@ -113,8 +113,8 @@ property. E.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 8000000 (7.6M)
-   No. bytes stored   : 1432
-   Storage ratio      : 5586.6
+   No. bytes stored   : 1614
+   Storage ratio      : 4956.6
    Chunks Initialized : 0
    >>> baz.info
    Type               : Array
diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
index 265bef8efe..42d830780f 100644
--- a/docs/user-guide/performance.rst
+++ b/docs/user-guide/performance.rst
@@ -131,7 +131,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 342588717
+   No. bytes stored   : 342588911
    Storage ratio      : 1.2
    Chunks Initialized : 100
    >>> with zarr.config.set({'array.order': 'F'}):
@@ -150,7 +150,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 342588717
+   No. bytes stored   : 342588911
    Storage ratio      : 1.2
    Chunks Initialized : 100