From 379ea1d0dfdc228a4dacbcab28671407be770e31 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 6 May 2025 16:00:33 +0100 Subject: [PATCH 1/6] Allow no compressor for v2 arrays --- src/zarr/api/asynchronous.py | 6 ++++-- src/zarr/api/synchronous.py | 4 +++- src/zarr/core/array.py | 19 +++++++++++-------- tests/test_api.py | 6 ++++++ 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 9b8b43a517..8c2cf71ea9 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -837,7 +837,9 @@ async def create( *, # Note: this is a change from v2 chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] | None = None, # TODO: default and type change + compressor: dict[str, JSON] + | Literal["default"] + | None = "default", # TODO: default and type change fill_value: Any | None = 0, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, @@ -990,7 +992,7 @@ async def create( dtype = parse_dtype(dtype, zarr_format) if not filters: filters = _default_filters(dtype) - if not compressor: + if compressor == "default": compressor = _default_compressor(dtype) elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr] if chunks is not None: diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 4c577936cd..ee0d71748f 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -598,7 +598,9 @@ def create( *, # Note: this is a change from v2 chunks: ChunkCoords | int | bool | None = None, dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] | None = None, # TODO: default and type change + compressor: dict[str, JSON] + | Literal["default"] + | None = "default", # TODO: default and type change fill_value: Any | None = 0, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b0e8b03cd7..231d795d37 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -301,7 +301,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | Literal["default"] | None = "default", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -392,7 +392,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | Literal["default"] | None = "default", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -427,7 +427,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | Literal["default"] | None = "default", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -568,7 +568,7 @@ async def _create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | Literal["default"] | None = "default", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -602,7 +602,7 @@ async def _create( raise ValueError( "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead." ) - if compressor is not None: + if compressor != "default": raise ValueError( "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead." ) @@ -807,7 +807,7 @@ async def _create_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, - compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None, + compressor: dict[str, JSON] | numcodecs.abc.Codec | Literal["default"] | None = None, attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArray[ArrayV2Metadata]: @@ -819,6 +819,9 @@ async def _create_v2( else: await ensure_no_existing_node(store_path, zarr_format=2) + if compressor == "default": + compressor = _default_compressor(dtype) + metadata = cls._create_metadata_v2( shape=shape, dtype=dtype, @@ -1749,7 +1752,7 @@ def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | Literal["default"] | None = "default", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, @@ -1878,7 +1881,7 @@ def _create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | None = None, + compressor: dict[str, JSON] | Literal["default"] | None = "default", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, diff --git a/tests/test_api.py b/tests/test_api.py index 9f03a1067a..fd3470ebec 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1190,3 +1190,9 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: # assert_array_equal doesn't check the type assert isinstance(result, type(src)) cp.testing.assert_array_equal(result, src[:10, :10]) + + +def test_v2_without_copmpressor() -> None: + # Make sure it's possible to set no compressor for v2 arrays + arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None) + assert arr.compressors == () From 4930ad1002ae1ff09224e1d952e5b7f7721f3efc Mon Sep 17 00:00:00 2001 From: David Stansby Date: Wed, 7 May 2025 17:41:51 +0100 Subject: [PATCH 2/6] Use typing aliases for compressors --- src/zarr/api/asynchronous.py | 15 +++++++++----- src/zarr/api/synchronous.py | 6 ++---- src/zarr/core/array.py | 39 ++++++++++++++++++++++++------------ src/zarr/core/metadata/v2.py | 10 ++++++--- 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 8c2cf71ea9..1d73c17d73 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -9,7 +9,14 @@ import numpy.typing as npt from typing_extensions import deprecated -from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata +from zarr.core.array import ( + Array, + AsyncArray, + CompressorLike, + create_array, + from_array, + get_array_metadata, +) from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams from zarr.core.buffer import NDArrayLike from zarr.core.common import ( @@ -837,9 +844,7 @@ async def create( *, # Note: this is a change from v2 chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] - | Literal["default"] - | None = "default", # TODO: default and type change + compressor: CompressorLike = "auto", fill_value: Any | None = 0, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, @@ -992,7 +997,7 @@ async def create( dtype = parse_dtype(dtype, zarr_format) if not filters: filters = _default_filters(dtype) - if compressor == "default": + if compressor == "auto": compressor = _default_compressor(dtype) elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr] if chunks is not None: diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index ee0d71748f..f820af0310 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -7,7 +7,7 @@ import zarr.api.asynchronous as async_api import zarr.core.array from zarr._compat import _deprecate_positional_args -from zarr.core.array import Array, AsyncArray +from zarr.core.array import Array, AsyncArray, CompressorLike from zarr.core.group import Group from zarr.core.sync import sync from zarr.core.sync_group import create_hierarchy @@ -598,9 +598,7 @@ def create( *, # Note: this is a change from v2 chunks: ChunkCoords | int | bool | None = None, dtype: npt.DTypeLike | None = None, - compressor: dict[str, JSON] - | Literal["default"] - | None = "default", # TODO: default and type change + compressor: CompressorLike = "auto", fill_value: Any | None = 0, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 231d795d37..deef56dcaf 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -101,6 +101,7 @@ T_ArrayMetadata, ) from zarr.core.metadata.v2 import ( + CompressorLikev2, _default_compressor, _default_filters, parse_compressor, @@ -301,7 +302,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | Literal["default"] | None = "default", + compressor: CompressorLikev2 | Literal["auto"] = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -392,7 +393,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | Literal["default"] | None = "default", + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -427,7 +428,7 @@ async def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | Literal["default"] | None = "default", + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -568,7 +569,7 @@ async def _create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | Literal["default"] | None = "default", + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, @@ -602,7 +603,7 @@ async def _create( raise ValueError( "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead." ) - if compressor != "default": + if compressor != "auto": raise ValueError( "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead." ) @@ -766,7 +767,7 @@ def _create_metadata_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, - compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None, + compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: if dimension_separator is None: @@ -807,7 +808,7 @@ async def _create_v2( dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, - compressor: dict[str, JSON] | numcodecs.abc.Codec | Literal["default"] | None = None, + compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArray[ArrayV2Metadata]: @@ -819,8 +820,16 @@ async def _create_v2( else: await ensure_no_existing_node(store_path, zarr_format=2) - if compressor == "default": - compressor = _default_compressor(dtype) + compressor_parsed: CompressorLikev2 + if compressor == "auto": + compressor_parsed = _default_compressor(dtype) + elif isinstance(compressor, BytesBytesCodec): + raise ValueError( + "Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. " + "Use a numcodecs codec directly instead." + ) + else: + compressor_parsed = compressor metadata = cls._create_metadata_v2( shape=shape, @@ -830,7 +839,7 @@ async def _create_v2( dimension_separator=dimension_separator, fill_value=fill_value, filters=filters, - compressor=compressor, + compressor=compressor_parsed, attributes=attributes, ) @@ -1752,7 +1761,7 @@ def create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | Literal["default"] | None = "default", + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, @@ -1881,7 +1890,7 @@ def _create( dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, - compressor: dict[str, JSON] | Literal["default"] | None = "default", + compressor: CompressorLike = "auto", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, @@ -3788,7 +3797,11 @@ def _get_default_codecs( | Literal["auto"] | None ) -CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None +# Union of acceptable types for v2 and v3 compressors +CompressorLike: TypeAlias = ( + dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None +) + CompressorsLike: TypeAlias = ( Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec] | dict[str, JSON] diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index d19193963f..029a3e09a7 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -5,7 +5,7 @@ from collections.abc import Iterable, Sequence from enum import Enum from functools import cached_property -from typing import TYPE_CHECKING, Any, TypedDict, cast +from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast import numcodecs.abc @@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict): attributes: dict[str, JSON] +# Union of acceptable types for v2 compressors +CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None + + @dataclass(frozen=True, kw_only=True) class ArrayV2Metadata(Metadata): shape: ChunkCoords @@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata): order: MemoryOrder = "C" filters: tuple[numcodecs.abc.Codec, ...] | None = None dimension_separator: Literal[".", "/"] = "." - compressor: numcodecs.abc.Codec | None = None + compressor: CompressorLikev2 attributes: dict[str, JSON] = field(default_factory=dict) zarr_format: Literal[2] = field(init=False, default=2) @@ -65,7 +69,7 @@ def __init__( fill_value: Any, order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", - compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, + compressor: CompressorLikev2 = None, filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, attributes: dict[str, JSON] | None = None, ) -> None: From ef37b33b466291db8b1052e488ca769f8b37b7e9 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Thu, 8 May 2025 10:43:02 +0100 Subject: [PATCH 3/6] Test v2 array w/ v3 codec errors --- tests/test_api.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index fd3470ebec..68b8f2ade8 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -2,6 +2,8 @@ from typing import TYPE_CHECKING +import zarr.codecs + if TYPE_CHECKING: import pathlib @@ -1196,3 +1198,14 @@ def test_v2_without_copmpressor() -> None: # Make sure it's possible to set no compressor for v2 arrays arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None) assert arr.compressors == () + + +def test_v2_with_v3_copmpressor() -> None: + # Check trying to create a v2 array with a v3 compressor fails + with pytest.raises( + ValueError, + match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.", + ): + zarr.create( + store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec() + ) From 2a4756a81b3580d63e01dad67c8158af85a32139 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Thu, 8 May 2025 10:45:57 +0100 Subject: [PATCH 4/6] Add changelog entry --- changes/3039.bugfix.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changes/3039.bugfix.rst diff --git a/changes/3039.bugfix.rst b/changes/3039.bugfix.rst new file mode 100644 index 0000000000..be2b424cf5 --- /dev/null +++ b/changes/3039.bugfix.rst @@ -0,0 +1,5 @@ +It is now possible to specify no compressor when creating a zarr format 2 array. +This can be done by passing ``compressor=None`` to the various array creation routines. + +The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. +To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead. From 14f84d307c5dd215127ef8809f2c185447097a4c Mon Sep 17 00:00:00 2001 From: David Stansby Date: Thu, 8 May 2025 12:27:05 +0100 Subject: [PATCH 5/6] Update type comment --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index deef56dcaf..32e8643e25 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3797,7 +3797,7 @@ def _get_default_codecs( | Literal["auto"] | None ) -# Union of acceptable types for v2 and v3 compressors +# Union of acceptable types for users to pass in for both v2 and v3 compressors CompressorLike: TypeAlias = ( dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None ) From 20d6a06c3e13865cbde0213e8ff2d777bc65a2a4 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 13 May 2025 12:31:35 +0100 Subject: [PATCH 6/6] fix test names Co-authored-by: Davis Bennett --- tests/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 68b8f2ade8..d1912f7238 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1194,13 +1194,13 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: cp.testing.assert_array_equal(result, src[:10, :10]) -def test_v2_without_copmpressor() -> None: +def test_v2_without_compressor() -> None: # Make sure it's possible to set no compressor for v2 arrays arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None) assert arr.compressors == () -def test_v2_with_v3_copmpressor() -> None: +def test_v2_with_v3_compressor() -> None: # Check trying to create a v2 array with a v3 compressor fails with pytest.raises( ValueError,