diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index 8ce9cc0043..9223019fab 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -17,6 +17,7 @@ from typing_extensions import Self from zarr.array_spec import ArraySpec + from zarr.common import JSON from zarr.indexing import SelectorTuple CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer) @@ -254,7 +255,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: @classmethod @abstractmethod - def from_list(cls, codecs: list[Codec]) -> Self: + def from_list(cls, codecs: Iterable[Codec]) -> Self: """Creates a codec pipeline from a list of codecs. Parameters @@ -390,6 +391,15 @@ async def write( """ ... + @classmethod + def from_dict(cls, data: Iterable[JSON | Codec]) -> Self: + """ + Create an instance of the model from a dictionary + """ + ... + + return cls(**data) + async def batching_helper( func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]], diff --git a/src/zarr/array.py b/src/zarr/array.py index e366321b15..289c7e3ed9 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -25,7 +25,6 @@ from zarr.chunk_key_encodings import ChunkKeyEncoding, DefaultChunkKeyEncoding, V2ChunkKeyEncoding from zarr.codecs import BytesCodec from zarr.codecs._v2 import V2Compressor, V2Filters -from zarr.codecs.pipeline import BatchedCodecPipeline from zarr.common import ( JSON, ZARR_JSON, @@ -61,6 +60,7 @@ pop_fields, ) from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata +from zarr.registry import get_pipeline_class from zarr.store import StoreLike, StorePath, make_store_path from zarr.store.core import ( ensure_no_existing_node, @@ -79,11 +79,11 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: raise TypeError -def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> BatchedCodecPipeline: +def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecPipeline: if isinstance(metadata, ArrayV3Metadata): - return BatchedCodecPipeline.from_list(metadata.codecs) + return get_pipeline_class().from_list(metadata.codecs) elif isinstance(metadata, ArrayV2Metadata): - return BatchedCodecPipeline.from_list( + return get_pipeline_class().from_list( [V2Filters(metadata.filters or []), V2Compressor(metadata.compressor)] ) else: @@ -483,8 +483,13 @@ async def _get_selection( return out_buffer.as_ndarray_like() async def getitem( - self, selection: BasicSelection, *, prototype: BufferPrototype = default_buffer_prototype + self, + selection: BasicSelection, + *, + prototype: BufferPrototype | None = None, ) -> NDArrayLike: + if prototype is None: + prototype = default_buffer_prototype() indexer = BasicIndexer( selection, shape=self.metadata.shape, @@ -493,7 +498,7 @@ async def getitem( return await self._get_selection(indexer, prototype=prototype) async def _save_metadata(self, metadata: ArrayMetadata) -> None: - to_save = metadata.to_buffer_dict() + to_save = metadata.to_buffer_dict(default_buffer_prototype()) awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] await gather(*awaitables) @@ -545,8 +550,10 @@ async def setitem( self, selection: BasicSelection, value: npt.ArrayLike, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> None: + if prototype is None: + prototype = default_buffer_prototype() indexer = BasicIndexer( selection, shape=self.metadata.shape, @@ -1001,7 +1008,7 @@ def get_basic_selection( selection: BasicSelection = Ellipsis, *, out: NDBuffer | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, fields: Fields | None = None, ) -> NDArrayLike: """Retrieve data for an item or region of the array. @@ -1108,6 +1115,8 @@ def get_basic_selection( """ + if prototype is None: + prototype = default_buffer_prototype() return sync( self._async_array._get_selection( BasicIndexer(selection, self.shape, self.metadata.chunk_grid), @@ -1123,7 +1132,7 @@ def set_basic_selection( value: npt.ArrayLike, *, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> None: """Modify data for an item or region of the array. @@ -1207,6 +1216,8 @@ def set_basic_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @@ -1216,7 +1227,7 @@ def get_orthogonal_selection( *, out: NDBuffer | None = None, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve data by making a selection for each dimension of the array. For example, if an array has 2 dimensions, allows selecting specific rows and/or @@ -1325,6 +1336,8 @@ def get_orthogonal_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( @@ -1338,7 +1351,7 @@ def set_orthogonal_selection( value: npt.ArrayLike, *, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> None: """Modify data via a selection for each dimension of the array. @@ -1435,6 +1448,8 @@ def set_orthogonal_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype) @@ -1446,7 +1461,7 @@ def get_mask_selection( *, out: NDBuffer | None = None, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True @@ -1513,6 +1528,8 @@ def get_mask_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( @@ -1526,7 +1543,7 @@ def set_mask_selection( value: npt.ArrayLike, *, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True @@ -1593,6 +1610,8 @@ def set_mask_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @@ -1602,7 +1621,7 @@ def get_coordinate_selection( *, out: NDBuffer | None = None, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. @@ -1671,6 +1690,8 @@ def get_coordinate_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) out_array = sync( self._async_array._get_selection( @@ -1689,7 +1710,7 @@ def set_coordinate_selection( value: npt.ArrayLike, *, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual items, by providing the indices (coordinates) for each item to be modified. @@ -1753,6 +1774,8 @@ def set_coordinate_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() # setup indexer indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) @@ -1776,7 +1799,7 @@ def get_block_selection( *, out: NDBuffer | None = None, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. @@ -1859,6 +1882,8 @@ def get_block_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( @@ -1872,7 +1897,7 @@ def set_block_selection( value: npt.ArrayLike, *, fields: Fields | None = None, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual blocks, by providing the chunk indices (coordinates) for each block to be modified. @@ -1950,6 +1975,8 @@ def set_block_selection( vindex, oindex, blocks, __getitem__, __setitem__ """ + if prototype is None: + prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index 86f9b53477..9c75ba1410 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -16,6 +16,12 @@ import numpy.typing as npt from zarr.common import ChunkCoords +from zarr.registry import ( + get_buffer_class, + get_ndbuffer_class, + register_buffer, + register_ndbuffer, +) if TYPE_CHECKING: from typing_extensions import Self @@ -479,4 +485,14 @@ class BufferPrototype(NamedTuple): # The default buffer prototype used throughout the Zarr codebase. -default_buffer_prototype = BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) +def default_buffer_prototype() -> BufferPrototype: + return BufferPrototype(buffer=get_buffer_class(), nd_buffer=get_ndbuffer_class()) + + +# The numpy prototype used for E.g. when reading the shard index +def numpy_buffer_prototype() -> BufferPrototype: + return BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) + + +register_buffer(Buffer) +register_ndbuffer(NDBuffer) diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py index c43a087a94..60854bee34 100644 --- a/src/zarr/codecs/_v2.py +++ b/src/zarr/codecs/_v2.py @@ -7,8 +7,9 @@ from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec from zarr.array_spec import ArraySpec -from zarr.buffer import Buffer, NDBuffer +from zarr.buffer import Buffer, NDBuffer, default_buffer_prototype from zarr.common import JSON, to_thread +from zarr.registry import get_ndbuffer_class @dataclass(frozen=True) @@ -34,7 +35,7 @@ async def _decode_single( if str(chunk_numpy_array.dtype) != chunk_spec.dtype: chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype) - return NDBuffer.from_numpy_array(chunk_numpy_array) + return get_ndbuffer_class().from_numpy_array(chunk_numpy_array) async def _encode_single( self, @@ -55,7 +56,7 @@ async def _encode_single( else: encoded_chunk_bytes = ensure_bytes(chunk_numpy_array) - return Buffer.from_bytes(encoded_chunk_bytes) + return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes) def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: raise NotImplementedError @@ -86,7 +87,7 @@ async def _decode_single( order=chunk_spec.order, ) - return NDBuffer.from_ndarray_like(chunk_ndarray) + return get_ndbuffer_class().from_ndarray_like(chunk_ndarray) async def _encode_single( self, @@ -99,7 +100,7 @@ async def _encode_single( filter = numcodecs.get_codec(filter_metadata) chunk_ndarray = await to_thread(filter.encode, chunk_ndarray) - return NDBuffer.from_ndarray_like(chunk_ndarray) + return get_ndbuffer_class().from_ndarray_like(chunk_ndarray) def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: raise NotImplementedError diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py index df1976d4c1..d03ecbcbf2 100644 --- a/src/zarr/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -11,8 +11,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.array_spec import ArraySpec from zarr.buffer import Buffer, as_numpy_array_wrapper -from zarr.codecs.registry import register_codec from zarr.common import JSON, parse_enum, parse_named_configuration, to_thread +from zarr.registry import register_codec if TYPE_CHECKING: from typing_extensions import Self diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 80b596a157..1fe9d900eb 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -10,8 +10,8 @@ from zarr.abc.codec import ArrayBytesCodec from zarr.array_spec import ArraySpec from zarr.buffer import Buffer, NDArrayLike, NDBuffer -from zarr.codecs.registry import register_codec from zarr.common import JSON, parse_enum, parse_named_configuration +from zarr.registry import register_codec if TYPE_CHECKING: from typing_extensions import Self diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py index b670b25429..5c94558b00 100644 --- a/src/zarr/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -9,8 +9,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.array_spec import ArraySpec from zarr.buffer import Buffer -from zarr.codecs.registry import register_codec from zarr.common import JSON, parse_named_configuration +from zarr.registry import register_codec if TYPE_CHECKING: from typing_extensions import Self diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py index 0ad97c1207..915ae79832 100644 --- a/src/zarr/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -8,8 +8,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.array_spec import ArraySpec from zarr.buffer import Buffer, as_numpy_array_wrapper -from zarr.codecs.registry import register_codec from zarr.common import JSON, parse_named_configuration, to_thread +from zarr.registry import register_codec if TYPE_CHECKING: from typing_extensions import Self diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index f75491d29f..8cda04c9ff 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -20,10 +20,10 @@ from zarr.abc.store import ByteGetter, ByteSetter from zarr.buffer import Buffer, BufferPrototype, NDBuffer from zarr.chunk_grids import ChunkGrid -from zarr.codecs.registry import get_codec_class from zarr.common import JSON, ChunkCoords, concurrent_map, parse_named_configuration from zarr.config import config from zarr.indexing import SelectorTuple, is_scalar, is_total_slice +from zarr.registry import get_codec_class, register_pipeline if TYPE_CHECKING: from typing_extensions import Self @@ -527,3 +527,6 @@ def codecs_from_list( raise ValueError("Required ArrayBytesCodec was not found.") else: return array_array, array_bytes_maybe, bytes_bytes + + +register_pipeline(BatchedCodecPipeline) diff --git a/src/zarr/codecs/registry.py b/src/zarr/codecs/registry.py index 0956ce75d0..e69de29bb2 100644 --- a/src/zarr/codecs/registry.py +++ b/src/zarr/codecs/registry.py @@ -1,38 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from zarr.abc.codec import Codec - -from importlib.metadata import EntryPoint -from importlib.metadata import entry_points as get_entry_points - -__codec_registry: dict[str, type[Codec]] = {} -__lazy_load_codecs: dict[str, EntryPoint] = {} - - -def _collect_entrypoints() -> dict[str, EntryPoint]: - entry_points = get_entry_points() - for e in entry_points.select(group="zarr.codecs"): - __lazy_load_codecs[e.name] = e - return __lazy_load_codecs - - -def register_codec(key: str, codec_cls: type[Codec]) -> None: - __codec_registry[key] = codec_cls - - -def get_codec_class(key: str) -> type[Codec]: - item = __codec_registry.get(key) - if item is None and key in __lazy_load_codecs: - # logger.debug("Auto loading codec '%s' from entrypoint", codec_id) - cls = __lazy_load_codecs[key].load() - register_codec(key, cls) - item = __codec_registry.get(key) - if item: - return item - raise KeyError(key) - - -_collect_entrypoints() diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index e3ef664b94..ef8b80c02d 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -15,15 +15,20 @@ ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, Codec, + CodecPipeline, ) from zarr.abc.store import ByteGetter, ByteSetter from zarr.array_spec import ArraySpec -from zarr.buffer import Buffer, BufferPrototype, NDBuffer, default_buffer_prototype +from zarr.buffer import ( + Buffer, + BufferPrototype, + NDBuffer, + default_buffer_prototype, + numpy_buffer_prototype, +) from zarr.chunk_grids import ChunkGrid, RegularChunkGrid from zarr.codecs.bytes import BytesCodec from zarr.codecs.crc32c_ import Crc32cCodec -from zarr.codecs.pipeline import BatchedCodecPipeline -from zarr.codecs.registry import register_codec from zarr.common import ( ChunkCoords, ChunkCoordsLike, @@ -34,6 +39,7 @@ ) from zarr.indexing import BasicIndexer, SelectorTuple, c_order_iter, get_indexer, morton_order_iter from zarr.metadata import parse_codecs +from zarr.registry import get_ndbuffer_class, get_pipeline_class, register_codec if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterator @@ -66,8 +72,8 @@ async def get( ) -> Buffer | None: assert byte_range is None, "byte_range is not supported within shards" assert ( - prototype is default_buffer_prototype - ), "prototype is not supported within shards currently" + prototype == default_buffer_prototype() + ), f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}" return self.shard_dict.get(self.chunk_coords) @@ -169,10 +175,14 @@ async def from_bytes( return obj @classmethod - def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardReader: + def create_empty( + cls, chunks_per_shard: ChunkCoords, buffer_prototype: BufferPrototype | None = None + ) -> _ShardReader: + if buffer_prototype is None: + buffer_prototype = default_buffer_prototype() index = _ShardIndex.create_empty(chunks_per_shard) obj = cls() - obj.buf = Buffer.create_zero_length() + obj.buf = buffer_prototype.buffer.create_zero_length() obj.index = index return obj @@ -215,9 +225,13 @@ def merge_with_morton_order( return obj @classmethod - def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder: + def create_empty( + cls, chunks_per_shard: ChunkCoords, buffer_prototype: BufferPrototype | None = None + ) -> _ShardBuilder: + if buffer_prototype is None: + buffer_prototype = default_buffer_prototype() obj = cls() - obj.buf = Buffer.create_zero_length() + obj.buf = buffer_prototype.buffer.create_zero_length() obj.index = _ShardIndex.create_empty(chunks_per_shard) return obj @@ -346,8 +360,8 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: return cls(**configuration_parsed) # type: ignore[arg-type] @property - def codec_pipeline(self) -> BatchedCodecPipeline: - return BatchedCodecPipeline.from_list(self.codecs) + def codec_pipeline(self) -> CodecPipeline: + return get_pipeline_class().from_list(self.codecs) def to_dict(self) -> dict[str, JSON]: return { @@ -593,7 +607,9 @@ async def _decode_shard_index( ) -> _ShardIndex: index_array = next( iter( - await BatchedCodecPipeline.from_list(self.index_codecs).decode( + await get_pipeline_class() + .from_list(self.index_codecs) + .decode( [(index_bytes, self._get_index_chunk_spec(chunks_per_shard))], ) ) @@ -604,10 +620,12 @@ async def _decode_shard_index( async def _encode_shard_index(self, index: _ShardIndex) -> Buffer: index_bytes = next( iter( - await BatchedCodecPipeline.from_list(self.index_codecs).encode( + await get_pipeline_class() + .from_list(self.index_codecs) + .encode( [ ( - NDBuffer.from_numpy_array(index.offsets_and_lengths), + get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths), self._get_index_chunk_spec(index.chunks_per_shard), ) ], @@ -619,8 +637,12 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer: return index_bytes def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int: - return BatchedCodecPipeline.from_list(self.index_codecs).compute_encoded_size( - 16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard) + return ( + get_pipeline_class() + .from_list(self.index_codecs) + .compute_encoded_size( + 16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard) + ) ) def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec: @@ -629,7 +651,7 @@ def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec: dtype=np.dtype(" ArraySpec: @@ -657,11 +679,11 @@ async def _load_shard_index_maybe( shard_index_size = self._shard_index_size(chunks_per_shard) if self.index_location == ShardingCodecIndexLocation.start: index_bytes = await byte_getter.get( - prototype=default_buffer_prototype, byte_range=(0, shard_index_size) + prototype=numpy_buffer_prototype(), byte_range=(0, shard_index_size) ) else: index_bytes = await byte_getter.get( - prototype=default_buffer_prototype, byte_range=(-shard_index_size, None) + prototype=numpy_buffer_prototype(), byte_range=(-shard_index_size, None) ) if index_bytes is not None: return await self._decode_shard_index(index_bytes, chunks_per_shard) diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py index 9dad89002e..3f9ae61676 100644 --- a/src/zarr/codecs/transpose.py +++ b/src/zarr/codecs/transpose.py @@ -10,8 +10,8 @@ from zarr.array_spec import ArraySpec from zarr.buffer import NDBuffer from zarr.chunk_grids import ChunkGrid -from zarr.codecs.registry import register_codec from zarr.common import JSON, ChunkCoordsLike, parse_named_configuration +from zarr.registry import register_codec if TYPE_CHECKING: from typing import Any diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 4c5afba00b..b244ee703a 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -9,8 +9,8 @@ from zarr.abc.codec import BytesBytesCodec from zarr.array_spec import ArraySpec from zarr.buffer import Buffer, as_numpy_array_wrapper -from zarr.codecs.registry import register_codec from zarr.common import JSON, parse_named_configuration, to_thread +from zarr.registry import register_codec if TYPE_CHECKING: from typing_extensions import Self diff --git a/src/zarr/config.py b/src/zarr/config.py index e711a98cb5..ec78747a6b 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -2,16 +2,66 @@ from typing import Any, Literal, cast -from donfig import Config +from donfig import Config as DConfig + +class BadConfigError(ValueError): + _msg = "bad Config: %r" + + +class Config(DConfig): # type: ignore[misc] + """Will collect configuration from config files and environment variables + + Example environment variables: + Grabs environment variables of the form "ZARR_FOO__BAR_BAZ=123" and + turns these into config variables of the form ``{"foo": {"bar-baz": 123}}`` + It transforms the key and value in the following way: + + - Lower-cases the key text + - Treats ``__`` (double-underscore) as nested access + - Calls ``ast.literal_eval`` on the value + + """ + + def reset(self) -> None: + self.clear() + self.refresh() + + +""" +The config module is responsible for managing the configuration of zarr and is based on the Donfig python library. +For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations +in the registry and then select them in the config. +e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be +"NewBytesCodec". +Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations +e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec" +(for more information see github.com/pytroll/donfig) +Default values below point to the standard implementations of zarr-python +""" config = Config( "zarr", defaults=[ { "array": {"order": "C"}, "async": {"concurrency": None, "timeout": None}, - "codec_pipeline": {"batch_size": 1}, "json_indent": 2, + "codec_pipeline": { + "path": "zarr.codecs.pipeline.BatchedCodecPipeline", + "batch_size": 1, + }, + "codecs": { + "blosc": "zarr.codecs.blosc.BloscCodec", + "gzip": "zarr.codecs.gzip.GzipCodec", + "zstd": "zarr.codecs.zstd.ZstdCodec", + "bytes": "zarr.codecs.bytes.BytesCodec", + "endian": "zarr.codecs.bytes.BytesCodec", # compatibility with earlier versions of ZEP1 + "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", + "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", + "transpose": "zarr.codecs.transpose.TransposeCodec", + }, + "buffer": "zarr.buffer.Buffer", + "ndbuffer": "zarr.buffer.NDBuffer", } ], ) diff --git a/src/zarr/group.py b/src/zarr/group.py index 5361eb1345..0e6c3f63ab 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -15,7 +15,7 @@ from zarr.abc.store import set_or_delete from zarr.array import Array, AsyncArray from zarr.attributes import Attributes -from zarr.buffer import Buffer +from zarr.buffer import Buffer, BufferPrototype, default_buffer_prototype from zarr.chunk_key_encodings import ChunkKeyEncoding from zarr.common import ( JSON, @@ -81,20 +81,20 @@ class GroupMetadata(Metadata): zarr_format: ZarrFormat = 3 node_type: Literal["group"] = field(default="group", init=False) - def to_buffer_dict(self) -> dict[str, Buffer]: + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: json_indent = config.get("json_indent") if self.zarr_format == 3: return { - ZARR_JSON: Buffer.from_bytes( + ZARR_JSON: prototype.buffer.from_bytes( json.dumps(self.to_dict(), indent=json_indent).encode() ) } else: return { - ZGROUP_JSON: Buffer.from_bytes( + ZGROUP_JSON: prototype.buffer.from_bytes( json.dumps({"zarr_format": self.zarr_format}, indent=json_indent).encode() ), - ZATTRS_JSON: Buffer.from_bytes( + ZATTRS_JSON: prototype.buffer.from_bytes( json.dumps(self.attributes, indent=json_indent).encode() ), } @@ -274,7 +274,7 @@ async def delitem(self, key: str) -> None: raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}") async def _save_metadata(self) -> None: - to_save = self.metadata.to_buffer_dict() + to_save = self.metadata.to_buffer_dict(default_buffer_prototype()) awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] await asyncio.gather(*awaitables) @@ -583,7 +583,7 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group new_metadata = replace(self.metadata, attributes=new_attributes) # Write new metadata - to_save = new_metadata.to_buffer_dict() + to_save = new_metadata.to_buffer_dict(default_buffer_prototype()) awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] await asyncio.gather(*awaitables) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 71462b6583..f19d230a60 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -15,8 +15,8 @@ from zarr.buffer import Buffer, BufferPrototype, default_buffer_prototype from zarr.chunk_grids import ChunkGrid, RegularChunkGrid from zarr.chunk_key_encodings import ChunkKeyEncoding, parse_separator -from zarr.codecs.registry import get_codec_class from zarr.config import config +from zarr.registry import get_codec_class, get_pipeline_class if TYPE_CHECKING: from typing_extensions import Self @@ -40,7 +40,6 @@ # For type checking _bool = bool - __all__ = ["ArrayMetadata"] @@ -143,7 +142,7 @@ def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: pass @abstractmethod - def to_buffer_dict(self) -> dict[str, Buffer]: + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: pass @abstractmethod @@ -197,7 +196,7 @@ def __init__( dtype=data_type_parsed, fill_value=fill_value_parsed, order="C", # TODO: order is not needed here. - prototype=default_buffer_prototype, # TODO: prototype is not needed here. + prototype=default_buffer_prototype(), # TODO: prototype is not needed here. ) codecs_parsed = [c.evolve_from_array_spec(array_spec) for c in codecs_parsed_partial] @@ -253,7 +252,7 @@ def get_chunk_spec( def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: return self.chunk_key_encoding.encode_chunk_key(chunk_coords) - def to_buffer_dict(self) -> dict[str, Buffer]: + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: def _json_convert(o: Any) -> Any: if isinstance(o, np.dtype): return str(o) @@ -277,7 +276,7 @@ def _json_convert(o: Any) -> Any: json_indent = config.get("json_indent") return { - ZARR_JSON: Buffer.from_bytes( + ZARR_JSON: prototype.buffer.from_bytes( json.dumps(self.to_dict(), default=_json_convert, indent=json_indent).encode() ) } @@ -377,7 +376,7 @@ def dtype(self) -> np.dtype[Any]: def chunks(self) -> ChunkCoords: return self.chunk_grid.chunk_shape - def to_buffer_dict(self) -> dict[str, Buffer]: + def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: def _json_convert( o: Any, ) -> Any: @@ -398,10 +397,12 @@ def _json_convert( assert isinstance(zattrs_dict, dict) json_indent = config.get("json_indent") return { - ZARRAY_JSON: Buffer.from_bytes( + ZARRAY_JSON: prototype.buffer.from_bytes( json.dumps(zarray_dict, default=_json_convert, indent=json_indent).encode() ), - ZATTRS_JSON: Buffer.from_bytes(json.dumps(zattrs_dict, indent=json_indent).encode()), + ZATTRS_JSON: prototype.buffer.from_bytes( + json.dumps(zattrs_dict, indent=json_indent).encode() + ), } @classmethod @@ -506,11 +507,9 @@ def parse_v2_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: def create_pipeline(data: Iterable[Codec | JSON]) -> CodecPipeline: - from zarr.codecs import BatchedCodecPipeline - if not isinstance(data, Iterable): raise TypeError(f"Expected iterable, got {type(data)}") - return BatchedCodecPipeline.from_dict(data) + return get_pipeline_class().from_dict(data) def parse_codecs(data: Iterable[Codec | dict[str, JSON]]) -> tuple[Codec, ...]: diff --git a/src/zarr/registry.py b/src/zarr/registry.py new file mode 100644 index 0000000000..ac373f401d --- /dev/null +++ b/src/zarr/registry.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +import warnings +from collections import defaultdict +from typing import TYPE_CHECKING, Any, Generic, TypeVar + +if TYPE_CHECKING: + from zarr.abc.codec import Codec, CodecPipeline + from zarr.buffer import Buffer, NDBuffer + +from importlib.metadata import EntryPoint +from importlib.metadata import entry_points as get_entry_points + +from zarr.config import BadConfigError, config + +T = TypeVar("T") + + +class Registry(Generic[T], dict[str, type[T]]): + def __init__(self) -> None: + super().__init__() + self.lazy_load_list: list[EntryPoint] = [] + + def lazy_load(self) -> None: + for e in self.lazy_load_list: + self.register(e.load()) + self.lazy_load_list.clear() + + def register(self, cls: type[T]) -> None: + self[fully_qualified_name(cls)] = cls + + +__codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry) +__pipeline_registry: Registry[CodecPipeline] = Registry() +__buffer_registry: Registry[Buffer] = Registry() +__ndbuffer_registry: Registry[NDBuffer] = Registry() + +""" +The registry module is responsible for managing implementations of codecs, pipelines, buffers and ndbuffers and +collecting them from entrypoints. +The implementation used is determined by the config +""" + + +def _collect_entrypoints() -> list[Registry[Any]]: + """ + Collects codecs, pipelines, buffers and ndbuffers from entrypoints. + Entry points can either be single items or groups of items. + Allowed syntax for entry_points.txt is e.g. + + [zarr.codecs] + gzip = package:EntrypointGzipCodec1 + [zarr.codecs.gzip] + some_name = package:EntrypointGzipCodec2 + another = package:EntrypointGzipCodec3 + + [zarr] + buffer = package:TestBuffer1 + [zarr.buffer] + xyz = package:TestBuffer2 + abc = package:TestBuffer3 + ... + """ + entry_points = get_entry_points() + + __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.buffer")) + __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="buffer")) + __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer")) + __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer")) + __pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline")) + __pipeline_registry.lazy_load_list.extend( + entry_points.select(group="zarr", name="codec_pipeline") + ) + for e in entry_points.select(group="zarr.codecs"): + __codec_registries[e.name].lazy_load_list.append(e) + for group in entry_points.groups: + if group.startswith("zarr.codecs."): + codec_name = group.split(".")[2] + __codec_registries[codec_name].lazy_load_list.extend(entry_points.select(group=group)) + return [ + *__codec_registries.values(), + __pipeline_registry, + __buffer_registry, + __ndbuffer_registry, + ] + + +def _reload_config() -> None: + config.refresh() + + +def fully_qualified_name(cls: type) -> str: + module = cls.__module__ + return module + "." + cls.__qualname__ + + +def register_codec(key: str, codec_cls: type[Codec]) -> None: + if key not in __codec_registries.keys(): + __codec_registries[key] = Registry() + __codec_registries[key].register(codec_cls) + + +def register_pipeline(pipe_cls: type[CodecPipeline]) -> None: + __pipeline_registry.register(pipe_cls) + + +def register_ndbuffer(cls: type[NDBuffer]) -> None: + __ndbuffer_registry.register(cls) + + +def register_buffer(cls: type[Buffer]) -> None: + __buffer_registry.register(cls) + + +def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]: + if reload_config: + _reload_config() + + if key in __codec_registries: + # logger.debug("Auto loading codec '%s' from entrypoint", codec_id) + __codec_registries[key].lazy_load() + + codec_classes = __codec_registries[key] + if not codec_classes: + raise KeyError(key) + + config_entry = config.get("codecs", {}).get(key) + if config_entry is None: + warnings.warn( + f"Codec '{key}' not configured in config. Selecting any implementation.", stacklevel=2 + ) + return list(codec_classes.values())[-1] + selected_codec_cls = codec_classes[config_entry] + + if selected_codec_cls: + return selected_codec_cls + raise KeyError(key) + + +def get_pipeline_class(reload_config: bool = False) -> type[CodecPipeline]: + if reload_config: + _reload_config() + __pipeline_registry.lazy_load() + path = config.get("codec_pipeline.path") + pipeline_class = __pipeline_registry.get(path) + if pipeline_class: + return pipeline_class + raise BadConfigError( + f"Pipeline class '{path}' not found in registered pipelines: {list(__pipeline_registry.keys())}." + ) + + +def get_buffer_class(reload_config: bool = False) -> type[Buffer]: + if reload_config: + _reload_config() + __buffer_registry.lazy_load() + + path = config.get("buffer") + buffer_class = __buffer_registry.get(path) + if buffer_class: + return buffer_class + raise BadConfigError( + f"Buffer class '{path}' not found in registered buffers: {list(__buffer_registry.keys())}." + ) + + +def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]: + if reload_config: + _reload_config() + __ndbuffer_registry.lazy_load() + path = config.get("ndbuffer") + ndbuffer_class = __ndbuffer_registry.get(path) + if ndbuffer_class: + return ndbuffer_class + raise BadConfigError( + f"NDBuffer class '{path}' not found in registered buffers: {list(__ndbuffer_registry.keys())}." + ) + + +_collect_entrypoints() diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index 85f85aabde..ade2ed4269 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -31,9 +31,11 @@ def __init__(self, store: Store, path: str | None = None): async def get( self, - prototype: BufferPrototype = default_buffer_prototype, + prototype: BufferPrototype | None = None, byte_range: tuple[int, int | None] | None = None, ) -> Buffer | None: + if prototype is None: + prototype = default_buffer_prototype() return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py new file mode 100644 index 0000000000..d2da1c5a6e --- /dev/null +++ b/src/zarr/testing/buffer.py @@ -0,0 +1,66 @@ +# mypy: ignore-errors +from __future__ import annotations + +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any, Literal + +import numpy as np +import numpy.typing as npt + +from zarr.buffer import Buffer, BufferPrototype, NDBuffer +from zarr.store import MemoryStore + +if TYPE_CHECKING: + from typing_extensions import Self + + +class TestNDArrayLike(np.ndarray): + """An example of a ndarray-like class""" + + +class TestBuffer(Buffer): + """Example of a custom Buffer that handles ArrayLike""" + + +class NDBufferUsingTestNDArrayLike(NDBuffer): + """Example of a custom NDBuffer that handles MyNDArrayLike""" + + @classmethod + def create( + cls, + *, + shape: Iterable[int], + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + fill_value: Any | None = None, + ) -> Self: + """Overwrite `NDBuffer.create` to create an TestNDArrayLike instance""" + ret = cls(TestNDArrayLike(shape=shape, dtype=dtype, order=order)) + if fill_value is not None: + ret.fill(fill_value) + return ret + + +class StoreExpectingTestBuffer(MemoryStore): + """Example of a custom Store that expect MyBuffer for all its non-metadata + + We assume that keys containing "json" is metadata + """ + + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + if "json" not in key: + assert isinstance(value, TestBuffer) + await super().set(key, value, byte_range) + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: tuple[int, int | None] | None = None, + ) -> Buffer | None: + if "json" not in key: + assert prototype.buffer is TestBuffer + ret = await super().get(key=key, prototype=prototype, byte_range=byte_range) + if ret is not None: + assert isinstance(ret, prototype.buffer) + return ret diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index a4e154bbc9..260d71cf0a 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -91,7 +91,7 @@ async def test_get( """ data_buf = Buffer.from_bytes(data) self.set(store, key, data_buf) - observed = await store.get(key, prototype=default_buffer_prototype, byte_range=byte_range) + observed = await store.get(key, prototype=default_buffer_prototype(), byte_range=byte_range) start, length = _normalize_interval_index(data_buf, interval=byte_range) expected = data_buf[start : start + length] assert_bytes_equal(observed, expected) @@ -126,7 +126,7 @@ async def test_get_partial_values( # read back just part of it observed_maybe = await store.get_partial_values( - prototype=default_buffer_prototype, key_ranges=key_ranges + prototype=default_buffer_prototype(), key_ranges=key_ranges ) observed: list[Buffer] = [] @@ -138,7 +138,9 @@ async def test_get_partial_values( for idx in range(len(observed)): key, byte_range = key_ranges[idx] - result = await store.get(key, prototype=default_buffer_prototype, byte_range=byte_range) + result = await store.get( + key, prototype=default_buffer_prototype(), byte_range=byte_range + ) assert result is not None expected.append(result) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 8b75d9f2f8..268281851c 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -6,6 +6,7 @@ from _pytest.compat import LEGACY_PATH +from zarr import config from zarr.abc.store import Store from zarr.common import ChunkCoords, MemoryOrder, ZarrFormat from zarr.group import AsyncGroup @@ -96,6 +97,13 @@ def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]: yield pytest.importorskip(request.param) +@pytest.fixture(autouse=True) +def reset_config(): + config.reset() + yield + config.reset() + + @dataclass class ArrayRequest: shape: ChunkCoords diff --git a/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt b/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt index 2c9dc375de..eee724c912 100644 --- a/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt +++ b/tests/v3/package_with_entrypoint-0.1.dist-info/entry_points.txt @@ -1,2 +1,14 @@ [zarr.codecs] -test = package_with_entrypoint:TestCodec +test = package_with_entrypoint:TestEntrypointCodec +[zarr.codecs.test] +another_codec = package_with_entrypoint:TestEntrypointGroup.Codec +[zarr] +codec_pipeline = package_with_entrypoint:TestEntrypointCodecPipeline +ndbuffer = package_with_entrypoint:TestEntrypointNDBuffer +buffer = package_with_entrypoint:TestEntrypointBuffer +[zarr.buffer] +another_buffer = package_with_entrypoint:TestEntrypointGroup.Buffer +[zarr.ndbuffer] +another_ndbuffer = package_with_entrypoint:TestEntrypointGroup.NDBuffer +[zarr.codec_pipeline] +another_pipeline = package_with_entrypoint:TestEntrypointGroup.Pipeline diff --git a/tests/v3/package_with_entrypoint/__init__.py b/tests/v3/package_with_entrypoint/__init__.py index 6368e5b236..4d626808d8 100644 --- a/tests/v3/package_with_entrypoint/__init__.py +++ b/tests/v3/package_with_entrypoint/__init__.py @@ -1,26 +1,65 @@ +from collections.abc import Iterable + from numpy import ndarray -from zarr.abc.codec import ArrayBytesCodec +from zarr.abc.codec import ArrayBytesCodec, CodecInput, CodecPipeline from zarr.array_spec import ArraySpec +from zarr.buffer import Buffer, NDBuffer +from zarr.codecs import BytesCodec from zarr.common import BytesLike -class TestCodec(ArrayBytesCodec): +class TestEntrypointCodec(ArrayBytesCodec): is_fixed_size = True async def encode( self, - chunk_array: ndarray, - chunk_spec: ArraySpec, + chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], ) -> BytesLike | None: pass async def decode( self, - chunk_bytes: BytesLike, - chunk_spec: ArraySpec, + chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], ) -> ndarray: pass def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: return input_byte_length + + +class TestEntrypointCodecPipeline(CodecPipeline): + def __init__(self, batch_size: int = 1): + pass + + async def encode( + self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]] + ) -> BytesLike: + pass + + async def decode( + self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]] + ) -> ndarray: + pass + + +class TestEntrypointBuffer(Buffer): + pass + + +class TestEntrypointNDBuffer(NDBuffer): + pass + + +class TestEntrypointGroup: + class Codec(BytesCodec): + pass + + class Buffer(Buffer): + pass + + class NDBuffer(NDBuffer): + pass + + class Pipeline(CodecPipeline): + pass diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index 77e1b6b688..d53e98d42d 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -1,14 +1,10 @@ from __future__ import annotations -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any, Literal - import numpy as np -import numpy.typing as npt import pytest from zarr.array import AsyncArray -from zarr.buffer import ArrayLike, Buffer, BufferPrototype, NDArrayLike, NDBuffer +from zarr.buffer import ArrayLike, BufferPrototype, NDArrayLike, numpy_buffer_prototype from zarr.codecs.blosc import BloscCodec from zarr.codecs.bytes import BytesCodec from zarr.codecs.crc32c_ import Crc32cCodec @@ -16,62 +12,12 @@ from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.store.core import StorePath -from zarr.store.memory import MemoryStore - -if TYPE_CHECKING: - from typing_extensions import Self - - -class MyNDArrayLike(np.ndarray): - """An example of a ndarray-like class""" - - -class MyBuffer(Buffer): - """Example of a custom Buffer that handles ArrayLike""" - - -class MyNDBuffer(NDBuffer): - """Example of a custom NDBuffer that handles MyNDArrayLike""" - - @classmethod - def create( - cls, - *, - shape: Iterable[int], - dtype: npt.DTypeLike, - order: Literal["C", "F"] = "C", - fill_value: Any | None = None, - ) -> Self: - """Overwrite `NDBuffer.create` to create an MyNDArrayLike instance""" - ret = cls(MyNDArrayLike(shape=shape, dtype=dtype, order=order)) - if fill_value is not None: - ret.fill(fill_value) - return ret - - -class MyStore(MemoryStore): - """Example of a custom Store that expect MyBuffer for all its non-metadata - - We assume that keys containing "json" is metadata - """ - - async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: - if "json" not in key: - assert isinstance(value, MyBuffer) - await super().set(key, value, byte_range) - - async def get( - self, - key: str, - prototype: BufferPrototype, - byte_range: tuple[int, int | None] | None = None, - ) -> Buffer | None: - if "json" not in key: - assert prototype.buffer is MyBuffer - ret = await super().get(key=key, prototype=prototype, byte_range=byte_range) - if ret is not None: - assert isinstance(ret, prototype.buffer) - return ret +from zarr.testing.buffer import ( + NDBufferUsingTestNDArrayLike, + StoreExpectingTestBuffer, + TestBuffer, + TestNDArrayLike, +) def test_nd_array_like(xp): @@ -86,7 +32,7 @@ async def test_async_array_prototype(): expect = np.zeros((9, 9), dtype="uint16", order="F") a = await AsyncArray.create( - StorePath(MyStore(mode="w")) / "test_async_array_prototype", + StorePath(StoreExpectingTestBuffer(mode="w")) / "test_async_array_prototype", shape=expect.shape, chunk_shape=(5, 5), dtype=expect.dtype, @@ -94,7 +40,7 @@ async def test_async_array_prototype(): ) expect[1:4, 3:6] = np.ones((3, 3)) - my_prototype = BufferPrototype(buffer=MyBuffer, nd_buffer=MyNDBuffer) + my_prototype = BufferPrototype(buffer=TestBuffer, nd_buffer=NDBufferUsingTestNDArrayLike) await a.setitem( selection=(slice(1, 4), slice(3, 6)), @@ -102,7 +48,7 @@ async def test_async_array_prototype(): prototype=my_prototype, ) got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=my_prototype) - assert isinstance(got, MyNDArrayLike) + assert isinstance(got, TestNDArrayLike) assert np.array_equal(expect, got) @@ -110,7 +56,7 @@ async def test_async_array_prototype(): async def test_codecs_use_of_prototype(): expect = np.zeros((10, 10), dtype="uint16", order="F") a = await AsyncArray.create( - StorePath(MyStore(mode="w")) / "test_codecs_use_of_prototype", + StorePath(StoreExpectingTestBuffer(mode="w")) / "test_codecs_use_of_prototype", shape=expect.shape, chunk_shape=(5, 5), dtype=expect.dtype, @@ -126,7 +72,7 @@ async def test_codecs_use_of_prototype(): ) expect[:] = np.arange(100).reshape(10, 10) - my_prototype = BufferPrototype(buffer=MyBuffer, nd_buffer=MyNDBuffer) + my_prototype = BufferPrototype(buffer=TestBuffer, nd_buffer=NDBufferUsingTestNDArrayLike) await a.setitem( selection=(slice(0, 10), slice(0, 10)), @@ -134,5 +80,12 @@ async def test_codecs_use_of_prototype(): prototype=my_prototype, ) got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=my_prototype) - assert isinstance(got, MyNDArrayLike) + assert isinstance(got, TestNDArrayLike) assert np.array_equal(expect, got) + + +def test_numpy_buffer_prototype(): + buffer = numpy_buffer_prototype().buffer.create_zero_length() + ndbuffer = numpy_buffer_prototype().nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64")) + assert isinstance(buffer.as_array_like(), np.ndarray) + assert isinstance(ndbuffer.as_ndarray_like(), np.ndarray) diff --git a/tests/v3/test_codec_entrypoints.py b/tests/v3/test_codec_entrypoints.py index 6b5c221f4d..9e2932fdd5 100644 --- a/tests/v3/test_codec_entrypoints.py +++ b/tests/v3/test_codec_entrypoints.py @@ -3,7 +3,8 @@ import pytest -import zarr.codecs.registry +import zarr.registry +from zarr import config here = os.path.abspath(os.path.dirname(__file__)) @@ -11,14 +12,38 @@ @pytest.fixture() def set_path(): sys.path.append(here) - zarr.codecs.registry._collect_entrypoints() + zarr.registry._collect_entrypoints() yield sys.path.remove(here) - entry_points = zarr.codecs.registry._collect_entrypoints() - entry_points.pop("test") + registries = zarr.registry._collect_entrypoints() + for registry in registries: + registry.lazy_load_list.clear() + config.reset() @pytest.mark.usefixtures("set_path") -def test_entrypoint_codec(): - cls = zarr.codecs.registry.get_codec_class("test") - assert cls.__name__ == "TestCodec" +@pytest.mark.parametrize("codec_name", ["TestEntrypointCodec", "TestEntrypointGroup.Codec"]) +def test_entrypoint_codec(codec_name): + config.set({"codecs.test": "package_with_entrypoint." + codec_name}) + cls_test = zarr.registry.get_codec_class("test") + assert cls_test.__qualname__ == codec_name + + +@pytest.mark.usefixtures("set_path") +def test_entrypoint_pipeline(): + config.set({"codec_pipeline.path": "package_with_entrypoint.TestEntrypointCodecPipeline"}) + cls = zarr.registry.get_pipeline_class() + assert cls.__name__ == "TestEntrypointCodecPipeline" + + +@pytest.mark.usefixtures("set_path") +@pytest.mark.parametrize("buffer_name", ["TestEntrypointBuffer", "TestEntrypointGroup.Buffer"]) +def test_entrypoint_buffer(buffer_name): + config.set( + { + "buffer": "package_with_entrypoint." + buffer_name, + "ndbuffer": "package_with_entrypoint.TestEntrypointNDBuffer", + } + ) + assert zarr.registry.get_buffer_class().__qualname__ == buffer_name + assert zarr.registry.get_ndbuffer_class().__name__ == "TestEntrypointNDBuffer" diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py index 04c4c671c8..33ca9eba77 100644 --- a/tests/v3/test_codecs/test_blosc.py +++ b/tests/v3/test_codecs/test_blosc.py @@ -26,7 +26,7 @@ async def test_blosc_evolve(store: Store, dtype: str) -> None: ) zarr_json = json.loads( - (await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype)).to_bytes() + (await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype())).to_bytes() ) blosc_configuration_json = zarr_json["codecs"][1]["configuration"] assert blosc_configuration_json["typesize"] == typesize @@ -47,7 +47,7 @@ async def test_blosc_evolve(store: Store, dtype: str) -> None: ) zarr_json = json.loads( - (await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype)).to_bytes() + (await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype())).to_bytes() ) blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"] assert blosc_configuration_json["typesize"] == typesize diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 1104805d4b..a2b459f60d 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -127,7 +127,7 @@ async def test_order( ) z[:, :] = data assert_bytes_equal( - await store.get(f"{path}/0.0", prototype=default_buffer_prototype), z._store["0.0"] + await store.get(f"{path}/0.0", prototype=default_buffer_prototype()), z._store["0.0"] ) @@ -249,7 +249,7 @@ async def test_delete_empty_chunks(store: Store) -> None: await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) await _AsyncArrayProxy(a)[:16, :16].set(data) assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) - assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype) is None + assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @@ -280,16 +280,16 @@ async def test_zarr_compat(store: Store) -> None: assert np.array_equal(data, z2[:16, :18]) assert_bytes_equal( - z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype) + z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) ) assert_bytes_equal( - z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype) + z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) ) assert_bytes_equal( - z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype) + z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) ) assert_bytes_equal( - z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype) + z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) ) @@ -323,16 +323,16 @@ async def test_zarr_compat_F(store: Store) -> None: assert np.array_equal(data, z2[:16, :18]) assert_bytes_equal( - z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype) + z2._store["0.0"], await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) ) assert_bytes_equal( - z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype) + z2._store["0.1"], await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) ) assert_bytes_equal( - z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype) + z2._store["1.0"], await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) ) assert_bytes_equal( - z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype) + z2._store["1.1"], await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) ) @@ -365,7 +365,7 @@ async def test_dimension_names(store: Store) -> None: ) assert (await AsyncArray.open(spath2)).metadata.dimension_names is None - zarr_json_buffer = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype) + zarr_json_buffer = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype()) assert zarr_json_buffer is not None assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) @@ -473,14 +473,14 @@ async def test_resize(store: Store) -> None: ) await _AsyncArrayProxy(a)[:16, :18].set(data) - assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype) is not None - assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype) is not None - assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype) is not None - assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype) is not None + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None a = await a.resize((10, 12)) assert a.metadata.shape == (10, 12) - assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype) is not None - assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype) is not None - assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype) is None - assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype) is None + assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None + assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None + assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py index 8301a424b9..6f3e1c9482 100644 --- a/tests/v3/test_codecs/test_endian.py +++ b/tests/v3/test_codecs/test_endian.py @@ -44,7 +44,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: ) z[:, :] = data assert_bytes_equal( - await store.get(f"{path}/0.0", prototype=default_buffer_prototype), z._store["0.0"] + await store.get(f"{path}/0.0", prototype=default_buffer_prototype()), z._store["0.0"] ) @@ -83,5 +83,5 @@ async def test_endian_write( ) z[:, :] = data assert_bytes_equal( - await store.get(f"{path}/0.0", prototype=default_buffer_prototype), z._store["0.0"] + await store.get(f"{path}/0.0", prototype=default_buffer_prototype()), z._store["0.0"] ) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index f0031349cb..27667ca9dd 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -314,8 +314,8 @@ async def test_delete_empty_shards(store: Store) -> None: data = np.ones((16, 16), dtype="uint16") data[:8, :8] = 0 assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) - assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype) is None - chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype) + assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype()) is None + chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype()) assert chunk_bytes is not None and len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py index 3fd4350299..bea7435122 100644 --- a/tests/v3/test_codecs/test_transpose.py +++ b/tests/v3/test_codecs/test_transpose.py @@ -79,8 +79,8 @@ async def test_transpose( ) z[:, :] = data assert await store.get( - "transpose/0.0", prototype=default_buffer_prototype - ) == await store.get("transpose_zarr/0.0", default_buffer_prototype) + "transpose/0.0", prototype=default_buffer_prototype() + ) == await store.get("transpose_zarr/0.0", default_buffer_prototype()) @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py index 684ab0dfce..8e7b868520 100644 --- a/tests/v3/test_config.py +++ b/tests/v3/test_config.py @@ -1,8 +1,38 @@ +import os +from collections.abc import Iterable from typing import Any +from unittest import mock +from unittest.mock import Mock +import numpy as np import pytest -from zarr.config import config +import zarr +from zarr import Array, zeros +from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline +from zarr.abc.store import ByteSetter +from zarr.array_spec import ArraySpec +from zarr.buffer import NDBuffer +from zarr.codecs import BatchedCodecPipeline, BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec +from zarr.config import BadConfigError, config +from zarr.indexing import SelectorTuple +from zarr.registry import ( + fully_qualified_name, + get_buffer_class, + get_codec_class, + get_ndbuffer_class, + get_pipeline_class, + register_buffer, + register_codec, + register_ndbuffer, + register_pipeline, +) +from zarr.testing.buffer import ( + NDBufferUsingTestNDArrayLike, + StoreExpectingTestBuffer, + TestBuffer, + TestNDArrayLike, +) def test_config_defaults_set() -> None: @@ -11,8 +41,23 @@ def test_config_defaults_set() -> None: { "array": {"order": "C"}, "async": {"concurrency": None, "timeout": None}, - "codec_pipeline": {"batch_size": 1}, "json_indent": 2, + "codec_pipeline": { + "path": "zarr.codecs.pipeline.BatchedCodecPipeline", + "batch_size": 1, + }, + "buffer": "zarr.buffer.Buffer", + "ndbuffer": "zarr.buffer.NDBuffer", + "codecs": { + "blosc": "zarr.codecs.blosc.BloscCodec", + "gzip": "zarr.codecs.gzip.GzipCodec", + "zstd": "zarr.codecs.zstd.ZstdCodec", + "bytes": "zarr.codecs.bytes.BytesCodec", + "endian": "zarr.codecs.bytes.BytesCodec", + "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", + "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", + "transpose": "zarr.codecs.transpose.TransposeCodec", + }, } ] assert config.get("array.order") == "C" @@ -30,3 +75,154 @@ def test_config_defaults_can_be_overridden(key: str, old_val: Any, new_val: Any) assert config.get(key) == old_val with config.set({key: new_val}): assert config.get(key) == new_val + + +def test_fully_qualified_name(): + class MockClass: + pass + + assert "v3.test_config.test_fully_qualified_name..MockClass" == fully_qualified_name( + MockClass + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_config_codec_pipeline_class(store): + # has default value + assert get_pipeline_class().__name__ != "" + + config.set({"codec_pipeline.name": "zarr.codecs.pipeline.BatchedCodecPipeline"}) + assert get_pipeline_class() == zarr.codecs.pipeline.BatchedCodecPipeline + + _mock = Mock() + + class MockCodecPipeline(BatchedCodecPipeline): + async def write( + self, + batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]], + value: NDBuffer, + drop_axes: tuple[int, ...] = (), + ) -> None: + _mock.call() + + register_pipeline(MockCodecPipeline) + config.set({"codec_pipeline.path": fully_qualified_name(MockCodecPipeline)}) + + assert get_pipeline_class() == MockCodecPipeline + + # test if codec is used + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + ) + arr[:] = range(100) + + _mock.call.assert_called() + + with pytest.raises(BadConfigError): + config.set({"codec_pipeline.path": "wrong_name"}) + get_pipeline_class() + + class MockEnvCodecPipeline(CodecPipeline): + pass + + register_pipeline(MockEnvCodecPipeline) + + with mock.patch.dict( + os.environ, {"ZARR_CODEC_PIPELINE__PATH": fully_qualified_name(MockEnvCodecPipeline)} + ): + assert get_pipeline_class(reload_config=True) == MockEnvCodecPipeline + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_config_codec_implementation(store): + # has default value + assert fully_qualified_name(get_codec_class("blosc")) == config.defaults[0]["codecs"]["blosc"] + + _mock = Mock() + + class MockBloscCodec(BloscCodec): + async def _encode_single( + self, chunk_data: CodecInput, chunk_spec: ArraySpec + ) -> CodecOutput | None: + _mock.call() + + config.set({"codecs.blosc": fully_qualified_name(MockBloscCodec)}) + register_codec("blosc", MockBloscCodec) + assert get_codec_class("blosc") == MockBloscCodec + + # test if codec is used + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + codecs=[BytesCodec(), {"name": "blosc", "configuration": {}}], + ) + arr[:] = range(100) + _mock.call.assert_called() + + with mock.patch.dict(os.environ, {"ZARR_CODECS__BLOSC": fully_qualified_name(BloscCodec)}): + assert get_codec_class("blosc", reload_config=True) == BloscCodec + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +def test_config_ndbuffer_implementation(store): + # has default value + assert fully_qualified_name(get_ndbuffer_class()) == config.defaults[0]["ndbuffer"] + + # set custom ndbuffer with TestNDArrayLike implementation + register_ndbuffer(NDBufferUsingTestNDArrayLike) + config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}) + assert get_ndbuffer_class() == NDBufferUsingTestNDArrayLike + arr = Array.create( + store=store, + shape=(100,), + chunks=(10,), + zarr_format=3, + dtype="i4", + ) + got = arr[:] + print(type(got)) + assert isinstance(got, TestNDArrayLike) + + +def test_config_buffer_implementation(): + # has default value + assert fully_qualified_name(get_buffer_class()) == config.defaults[0]["buffer"] + + arr = zeros(shape=(100), store=StoreExpectingTestBuffer(mode="w")) + + # AssertionError of StoreExpectingTestBuffer when not using my buffer + with pytest.raises(AssertionError): + arr[:] = np.arange(100) + + register_buffer(TestBuffer) + config.set({"buffer": fully_qualified_name(TestBuffer)}) + assert get_buffer_class() == TestBuffer + + # no error using TestBuffer + data = np.arange(100) + arr[:] = np.arange(100) + assert np.array_equal(arr[:], data) + + data2d = np.arange(1000).reshape(100, 10) + arr_sharding = zeros( + shape=(100, 10), + store=StoreExpectingTestBuffer(mode="w"), + codecs=[ShardingCodec(chunk_shape=(10, 10))], + ) + arr_sharding[:] = data2d + assert np.array_equal(arr_sharding[:], data2d) + + arr_Crc32c = zeros( + shape=(100, 10), + store=StoreExpectingTestBuffer(mode="w"), + codecs=[BytesCodec(), Crc32cCodec()], + ) + arr_Crc32c[:] = data2d + assert np.array_equal(arr_Crc32c[:], data2d) diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index c84c091089..33dc2521e8 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -21,6 +21,7 @@ oindex_set, replace_ellipsis, ) +from zarr.registry import get_ndbuffer_class from zarr.store.core import StorePath from zarr.store.memory import MemoryStore @@ -1393,7 +1394,7 @@ def test_get_selection_out(store: StorePath): ] for selection in selections: expect = a[selection] - out = NDBuffer.from_numpy_array(np.empty(expect.shape)) + out = get_ndbuffer_class().from_numpy_array(np.empty(expect.shape)) z.get_basic_selection(selection, out=out) assert_array_equal(expect, out.as_numpy_array()[:]) @@ -1423,7 +1424,7 @@ def test_get_selection_out(store: StorePath): ] for selection in selections: expect = oindex(a, selection) - out = NDBuffer.from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) + out = get_ndbuffer_class().from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) z.get_orthogonal_selection(selection, out=out) assert_array_equal(expect, out.as_numpy_array()[:]) @@ -1445,7 +1446,7 @@ def test_get_selection_out(store: StorePath): ] for selection in selections: expect = a[selection] - out = NDBuffer.from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) + out = get_ndbuffer_class().from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) z.get_coordinate_selection(selection, out=out) assert_array_equal(expect, out.as_numpy_array()[:]) diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 0dc399be42..fa6fb3a5b7 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -88,9 +88,9 @@ async def test_basic(): data = b"hello" await store.set("foo", Buffer.from_bytes(data)) assert await store.exists("foo") - assert (await store.get("foo", prototype=default_buffer_prototype)).to_bytes() == data + assert (await store.get("foo", prototype=default_buffer_prototype())).to_bytes() == data out = await store.get_partial_values( - prototype=default_buffer_prototype, key_ranges=[("foo", (1, None))] + prototype=default_buffer_prototype(), key_ranges=[("foo", (1, None))] ) assert out[0].to_bytes() == data[1:]