8000 Implement `dimension_separator` for Python storage classes (See #715)… · rjgildea/zarr-python@2d0acfb · GitHub
[go: up one dir, main page]

Skip to content

Commit 2d0acfb

Browse files
authored
Implement dimension_separator for Python storage classes (See zarr-developers#715) (zarr-developers#716)
* Implement `dimension_separator` for Python storage classes (See zarr-developers#715) * All top-level storage classes now take an optional `dimension_separator` parameter which defaults to `None`, but can also be `.` or `/`. * A ValueError is raised at normalization time if this is not the case. * `None`s are normalized to the default of `.` in all except the NestedDirectoryStore case. * The value is stored as `self._dimension_separator` on participating classes so that array creation can lookup the value. * This value deprecates the `key_separator` value from FSStore. * Wrapper classes like LRUCacheStore and ConsolidatedMetadataStore *do not* follow this pattern and instead rely on the value in the underlying store. * Only store `dimension_separator` if not None All hexdigest tests were failing due to updated array metadata. In the case of NestedDirectoryStore and N5Store, this is necessary. If the dimension_separator key is excluded from the .zarray JSON when None, then most standard tests continue to pass. * Fix doctests with optional key * Add separator to missed LDBMStore * Fix linting issue * De-deprecate key_separator as public, non-null API * Add test for normalize_dim_sep to appease codecov * More tests for codecov * Remove key from n5 array metadata * Fix minor typo * Cleanup DIGESTS in test_core.py * Fix cut-n-paste error in test_utils.py * And hopefully on last codecov fix * Apply review changes * Add 2.8.0 release notes
1 parent 4d4d833 commit 2d0acfb

File tree

11 files changed

+341
-92
lines changed

11 files changed

+341
-92
lines changed

docs/release.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
Release notes
22
=============
33

4+
.. _release_2.8.0:
5+
6+
2.8.0
7+
-----
8+
9+
V2 Specification Update
10+
~~~~~~~~~~~~~~~~~~~~~~~
11+
12+
* Introduce optional dimension_separator .zarray key for nested chunks.
13+
By :user:`Josh Moore <joshmoore>`; :issue:`715`, :issue:`716`.
14+
15+
.. _release_2.7.0:
16+
17+
418
.. _release_2.7.1:
519

620
2.7.1

zarr/core.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class Array:
8989
dtype
9090
compression
9191
compression_opts
92+
dimension_separator
9293
fill_value
9394
order
9495
synchronizer
@@ -194,6 +195,7 @@ def _load_metadata_nosync(self):
194195
self._dtype = meta['dtype']
195196
self._fill_value = meta['fill_value']
196197
self._order = meta['order']
198+
self._dimension_separator = meta.get('dimension_separator', '.')
197199

198200
# setup compressor
199201
config = meta['compressor']

zarr/creation.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@
1313
from zarr.storage import (DirectoryStore, ZipStore, contains_array,
1414
contains_group, default_compressor, init_array,
1515
normalize_storage_path, FSStore)
16+
from zarr.util import normalize_dimension_separator
1617

1718

1819
def create(shape, chunks=True, dtype=None, compressor='default',
1920
fill_value=0, order='C', store=None, synchronizer=None,
2021
overwrite=False, path=None, chunk_store=None, filters=None,
2122
cache_metadata=True, cache_attrs=True, read_only=False,
22-
object_codec=None, **kwargs):
23+
object_codec=None, dimension_separator=None, **kwargs):
2324
"""Create an array.
2425
2526
Parameters
@@ -66,6 +67,9 @@ def create(shape, chunks=True, dtype=None, compressor='default',
6667
True if array should be protected against modification.
6768
object_codec : Codec, optional
6869
A codec to encode object arrays, only needed if dtype=object.
70+
dimension_separator : {'.', '/'}, optional
71+
Separator placed between the dimensions of a chunk.
72+
.. versionadded:: 2.8
6973
7074
Returns
7175
-------
@@ -117,10 +121,16 @@ def create(shape, chunks=True, dtype=None, compressor='default',
117121
# API compatibility with h5py
118122
compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
119123

124+
# optional array metadata
125+
if dimension_separator is None:
126+
dimension_separator = getattr(store, "_dimension_separator", None)
127+
dimension_separator = normalize_dimension_separator(dimension_separator)
128+
120129
# initialize array metadata
121130
init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,
122131
fill_value=fill_value, order=order, overwrite=overwrite, path=path,
123-
chunk_store=chunk_store, filters=filters, object_codec=object_codec)
132+
chunk_store=chunk_store, filters=filters, object_codec=object_codec,
133+
dimension_separator=dimension_separator)
124134

125135
# instantiate array
126136
z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer,

zarr/hierarchy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,8 @@ def create_dataset(self, name, **kwargs):
783783
lifetime of the object. If False, array metadata will be reloaded
784784
prior to all data access and modification operations (may incur
785785
overhead depending on storage and data access pattern).
786+
dimension_separator : {'.', '/'}, optional
787+
Separator placed between the dimensions of a chunk.
786788
787789
Returns
788790
-------

zarr/meta.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
5050
fill_value=fill_value,
5151
order=meta['order'],
5252
filters=meta['filters'],
53+
dimension_separator=meta.get('dimension_separator', '.'),
5354
)
55+
5456
except Exception as e:
5557
raise MetadataError('error decoding metadata: %s' % e)
5658
else:
@@ -62,6 +64,9 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
6264
sdshape = ()
6365
if dtype.subdtype is not None:
6466
dtype, sdshape = dtype.subdtype
67+
68+
dimension_separator = meta.get('dimension_separator')
69+
6570
meta = dict(
6671
zarr_format=ZARR_FORMAT,
6772
shape=meta['shape'] + sdshape,
@@ -72,6 +77,10 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
7277
order=meta['order'],
7378
filters=meta['filters'],
7479
)
80+
81+
if dimension_separator:
82+
meta['dimension_separator'] = dimension_separator
83+
7584
return json_dumps(meta)
7685

7786

zarr/n5.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,9 @@ def array_metadata_to_n5(array_metadata):
355355
compressor_config = compressor_config_to_n5(compressor_config)
356356
array_metadata['compression'] = compressor_config
357357

358+
if 'dimension_separator' in array_metadata:
359+
del array_metadata['dimension_separator']
360+
358361
return array_metadata
359362

360363

0 commit comments

Comments
 (0)
0