8000 Allow no compressor for v2 arrays by dstansby · Pull Request #3039 · zarr-developers/zarr-python · GitHub
[go: up one dir, main page]

Skip to content

Allow no compressor for v2 arrays #3039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changes/3039.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
It is now possible to specify no compressor when creating a zarr format 2 array.
This can be done by passing ``compressor=None`` to the various array creation routines.

The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.
13 changes: 10 additions & 3 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@
import numpy.typing as npt
from typing_extensions import deprecated

from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
from zarr.core.array import (
Array,
AsyncArray,
CompressorLike,
create_array,
from_array,
get_array_metadata,
)
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
from zarr.core.buffer import NDArrayLike
from zarr.core.common import (
Expand Down Expand Up @@ -838,7 +845,7 @@ async def create(
*, # Note: this is a change from v2
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
dtype: npt.DTypeLike | None = None,
compressor: dict[str, JSON] | None = None, # TODO: default and type change
compressor: CompressorLike = "auto",
fill_value: Any | None = 0, # TODO: need type
order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
Expand Down Expand Up @@ -991,7 +998,7 @@ async def create(
dtype = parse_dtype(dtype, zarr_format)
if not filters:
filters = _default_filters(dtype)
if not compressor:
if compressor == "auto":
compressor = _default_compressor(dtype)
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
if chunks is not None:
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import zarr.api.asynchronous as async_api
import zarr.core.array
from zarr._compat import _deprecate_positional_args
from zarr.core.array import Array, AsyncArray
from zarr.core.array import Array, AsyncArray, CompressorLike
from zarr.core.group import Group
from zarr.core.sync import sync
from zarr.core.sync_group import create_hierarchy
Expand Down Expand Up @@ -599,7 +599,7 @@ def create(
*, # Note: this is a change from v2
chunks: ChunkCoords | int | bool | None = None,
dtype: npt.DTypeLike | None = None,
compressor: dict[str, JSON] | None = None, # TODO: default and type change
compressor: CompressorLike = "auto",
fill_value: Any | None = 0, # TODO: need type
order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
Expand Down
38 changes: 27 additions & 11 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
T_ArrayMetadata,
)
from zarr.core.metadata.v2 import (
CompressorLikev2,
_default_compressor,
_default_filters,
parse_compressor,
Expand Down Expand Up @@ -303,7 +304,7 @@ async def create(
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLikev2 | Literal["auto"] = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -394,7 +395,7 @@ async def create(
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -429,7 +430,7 @@ async def create(
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -570,7 +571,7 @@ async def _create(
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
Expand Down Expand Up @@ -604,7 +605,7 @@ async def _create(
raise ValueError(
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
)
if compressor is not None:
if compressor != "auto":
raise ValueError(
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
)
Expand Down Expand Up @@ -768,7 +769,7 @@ def _create_metadata_v2(
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
compressor: CompressorLikev2 = None,
attributes: dict[str, JSON] | None = None,
) -> ArrayV2Metadata:
if dimension_separator is None:
Expand Down Expand Up @@ -809,7 +810,7 @@ async def _create_v2(
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
compressor: CompressorLike = "auto",
attributes: dict[str, JSON] | None = None,
overwrite: bool = False,
) -> AsyncArray[ArrayV2Metadata]:
Expand All @@ -821,6 +822,17 @@ async def _create_v2(
else:
await ensure_no_existing_node(store_path, zarr_format=2)

compressor_parsed: CompressorLikev2
if compressor == "auto":
compressor_parsed = _default_compressor(dtype)
elif isinstance(compressor, BytesBytesCodec):
raise ValueError(
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
"Use a numcodecs codec directly instead."
)
else:
compressor_parsed = compressor

metadata = cls._create_metadata_v2(
shape=shape,
dtype=dtype,
Expand All @@ -829,7 +841,7 @@ async def _create_v2(
dimension_separator=dimension_separator,
fill_value=fill_value,
filters=filters,
compressor=compressor,
compressor=compressor_parsed,
attributes=attributes,
)

Expand Down Expand Up @@ -1751,7 +1763,7 @@ def create(
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
config: ArrayConfigLike | None = None,
Expand Down Expand Up @@ -1880,7 +1892,7 @@ def _create(
dimension_separator: Literal[".", "/"] | None = None,
order: MemoryOrder | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
compressor: CompressorLike = "auto",
# runtime
overwrite: bool = False,
config: ArrayConfigLike | None = None,
Expand Down Expand Up @@ -3792,7 +3804,11 @@ def _get_default_codecs(
| Literal["auto"]
| None
)
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
# Union of acceptable types for users to pass in for both v2 and v3 compressors
CompressorLike: TypeAlias = (
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
)

CompressorsLike: TypeAlias = (
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
| dict[str, JSON]
Expand Down
10 changes: 7 additions & 3 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Iterable, Sequence
from enum import Enum
from functools import cached_property
from typing import TYPE_CHECKING, Any, TypedDict, cast
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast

import numcodecs.abc

Expand Down Expand Up @@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict):
attributes: dict[str, JSON]


# Union of acceptable types for v2 compressors
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None


@dataclass(frozen=True, kw_only=True)
class ArrayV2Metadata(Metadata):
shape: ChunkCoords
Expand All @@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata):
order: MemoryOrder = "C"
filters: tuple[numcodecs.abc.Codec, ...] | None = None
dimension_separator: Literal[".", "/"] = "."
compressor: numcodecs.abc.Codec | None = None
compressor: CompressorLikev2
attributes: dict[str, JSON] = field(default_factory=dict)
zarr_format: Literal[2] = field(init=False, default=2)

Expand All @@ -65,7 +69,7 @@ def __init__(
fill_value: Any,
order: MemoryOrder,
dimension_separator: Literal[".", "/"] = ".",
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
compressor: CompressorLikev2 = None,
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
attributes: dict[str, JSON] | None = None,
) -> None:
Expand Down
19 changes: 19 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import TYPE_CHECKING

import zarr.codecs

if TYPE_CHECKING:
import pathlib

Expand Down Expand Up @@ -1190,3 +1192,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
# assert_array_equal doesn't check the type
assert isinstance(result, type(src))
cp.testing.assert_array_equal(result, src[:10, :10])


def test_v2_without_compressor() -> None:
# Make sure it's possible to set no compressor for v2 arrays
arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None)
assert arr.compressors == ()


def test_v2_with_v3_compressor() -> None:
# Check trying to create a v2 array with a v3 compressor fails
with pytest.raises(
ValueError,
match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.",
):
zarr.create(
store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec()
)
0