8000 lazily create attrs · sailfish009/zarr-python@9919b92 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9919b92

Browse files
committed
lazily create attrs
1 parent edf0d71 commit 9919b92

File tree

5 files changed

+47
-66
lines changed

5 files changed

+47
-66
lines changed

docs/spec/v2.rst

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ Attributes
275275
An array or group can be associated with custom attributes, which are simple
276276
key/value items with application-specific meaning. Custom attributes are
277277
encoded as a JSON object and stored under the ".zattrs" key within an array
278-
store.
278+
store. The ".zattrs" key does not have to be present, and if it is absent the
279+
attributes should be treated as empty.
279280

280281
For example, the JSON object below encodes three attributes named
281282
"foo", "bar" and "baz"::
@@ -308,7 +309,7 @@ have been set in the store::
308309

309310
>>> import os
310311
>>> sorted(os.listdir('data/example.zarr'))
311-
['.zarray', '.zattrs']
312+
['.zarray']
312313

313314
Inspect the array metadata::
314315

@@ -333,23 +334,18 @@ Inspect the array metadata::
333334
"zarr_format": 2
334335
}
335336

336-
Inspect the array attributes::
337-
338-
>>> print(open('data/example.zarr/.zattrs').read())
339-
{}
340-
341337
Chunks are initialized on demand. E.g., set some data::
342338

343339
>>> a[0:10, 0:10] = 1
344340
>>> sorted(os.listdir('data/example.zarr'))
345-
['.zarray', '.zattrs', '0.0']
341+
['.zarray', '0.0']
346342

347343
Set some more data::
348344

349345
>>> a[0:10, 10:20] = 2
350346
>>> a[10:20, :] = 3
351347
>>> sorted(os.listdir('data/example.zarr'))
352-
['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
348+
['.zarray', '0.0', '0.1', '1.0', '1.1']
353349

354350
Manually decompress a single chunk for illustration::
355351

@@ -369,6 +365,8 @@ Modify the array attributes::
369365
>>> a.attrs['foo'] = 42
370366
>>> a.attrs['bar'] = 'apples'
371367
>>> a.attrs['baz'] = [1, 2, 3, 4]
368+
>>> sorted(os.listdir('data/example.zarr'))
369+
['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
372370
>>> print(open('data/example.zarr/.zattrs').read())
373371
{
374372
"bar": "apples",
@@ -398,12 +396,11 @@ Create the root group::
398396

399397
>>> root_grp = zarr.group(store, overwrite=True)
400398

401-
The metadata resource for the root group has been created, as well as a custom
402-
attributes resource::
399+
The metadata resource for the root group has been created::
403400

404401
>>> import os
405402
>>> sorted(os.listdir('data/group.zarr'))
406-
['.zattrs', '.zgroup']
403+
['.zgroup']
407404

408405
Inspect the group metadata::
409406

@@ -412,33 +409,32 @@ Inspect the group metadata::
412409
"zarr_format": 2
413410
}
414411

415-
Inspect the group attributes::
416-
417-
>>> print(open('data/group.zarr/.zattrs').read())
418-
{}
419-
420412
Create a sub-group::
421413

422414
>>> sub_grp = root_grp.create_group('foo')
423415

424416
What has been stored::
425417

426418
>>> sorted(os.listdir('data/group.zarr'))
427-
['.zattrs', '.zgroup', 'foo']
419+
['.zgroup', 'foo']
428420
>>> sorted(os.listdir('data/group.zarr/foo'))
429-
['.zattrs', '.zgroup']
421+
['.zgroup']
430422

431423
Create an array within the sub-group::
432424

433425
>>> a = sub_grp.create_dataset('bar', shape=(20, 20), chunks=(10, 10))
434426
>>> a[:] = 42
435427

428+
Set a custom attributes::
429+
430+
>>> a.attrs['comment'] = 'answer to life, the universe and everything'
431+
436432
What has been stored::
437433

438434
>>> sorted(os.listdir('data/group.zarr'))
439-
['.zattrs', '.zgroup', 'foo']
435+
['.zgroup', 'foo']
440436
>>> sorted(os.listdir('data/group.zarr/foo'))
441-
['.zattrs', '.zgroup', 'bar']
437+
['.zgroup', 'bar']
442438
>>> sorted(os.listdir('data/group.zarr/foo/bar'))
443439
['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
444440

@@ -449,6 +445,7 @@ Here is the same example using a Zip file as storage::
449445
>>> sub_grp = root_grp.create_group('foo')
450446
>>> a = sub_grp.create_dataset('bar', shape=(20, 20), chunks=(10, 10))
451447
>>> a[:] = 42
448+
>>> a.attrs['comment'] = 'answer to life, the universe and everything'
452449
>>> store.close()
453450

454451
What has been stored::
@@ -457,9 +454,7 @@ What has been stored::
457454
>>> zf = zipfile.ZipFile('data/group.zip', mode='r')
458455
>>> for name in sorted(zf.namelist()):
459456
... print(name)
460-
.zattrs
461457
.zgroup
462-
foo/.zattrs
463458
foo/.zgroup
464459
foo/bar/.zarray
465460
foo/bar/.zattrs

docs/tutorial.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ print some diagnostics, e.g.::
178178
: blocksize=0)
179179
Store type : builtins.dict
180180
No. bytes : 400000000 (381.5M)
181-
No. bytes stored : 4565055 (4.4M)
181+
No. bytes stored : 4565053 (4.4M)
182182
Storage ratio : 87.6
183183
Chunks initialized : 100/100
184184

@@ -270,7 +270,7 @@ Here is an example using a delta filter with the Blosc compressor::
270270
Compressor : Blosc(cname='zstd', clevel=1, shuffle=SHUFFLE, blocksize=0)
271271
Store type : builtins.dict
272272
No. bytes : 400000000 (381.5M)
273-
No. bytes stored : 648607 (633.4K)
273+
No. bytes stored : 648605 (633.4K)
274274
Storage ratio : 616.7
275275
Chunks initialized : 100/100
276276

@@ -394,7 +394,7 @@ property. E.g.::
394394
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
395395
Store type : zarr.storage.DictStore
396396
No. bytes : 8000000 (7.6M)
397-
No. bytes stored : 37482 (36.6K)
397+
No. bytes stored : 37480 (36.6K)
398398
Storage ratio : 213.4
399399
Chunks initialized : 10/10
400400

@@ -409,7 +409,7 @@ property. E.g.::
409409
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
410410
Store type : zarr.storage.DictStore
411411
No. bytes : 4000000 (3.8M)
412-
No. bytes stored : 23245 (22.7K)
412+
No. bytes stored : 23243 (22.7K)
413413
Storage ratio : 172.1
414414
Chunks initialized : 100/100
415415

@@ -898,7 +898,7 @@ ratios, depending on the correlation structure within the data. E.g.::
898898
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
899899
Store type : builtins.dict
900900
No. bytes : 400000000 (381.5M)
901-
No. bytes stored : 26805737 (25.6M)
901+
No. bytes stored : 26805735 (25.6M)
902902
Storage ratio : 14.9
903903
Chunks initialized : 100/100
904904
>>> f = zarr.array(a, chunks=(1000, 1000), order='F')
@@ -912,7 +912,7 @@ ratios, depending on the correlation structure within the data. E.g.::
912912
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
913913
Store type : builtins.dict
914914
No. bytes : 400000000 (381.5M)
915-
No. bytes stored : 9633603 (9.2M)
915+
No. bytes stored : 9633601 (9.2M)
916916
Storage ratio : 41.5
917917
Chunks initialized : 100/100
918918

zarr/storage.py

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from collections import MutableMapping
1111
import os
1212
import tempfile
13-
import json
1413
import zipfile
1514
import shutil
1615
import atexit
@@ -212,7 +211,7 @@ def init_array(store, shape, chunks=True, dtype=None, compressor='default',
212211
>>> store = dict()
213212
>>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000))
214213
>>> sorted(store.keys())
215-
['.zarray', '.zattrs']
214+
['.zarray']
216215
217216
Array metadata is stored as JSON::
218217
@@ -240,17 +239,12 @@ def init_array(store, shape, chunks=True, dtype=None, compressor='default',
240239
"zarr_format": 2
241240
}
242241
243-
User-defined attributes are also stored as JSON, initially empty::
244-
245-
>>> print(store['.zattrs'].decode())
246-
{}
247-
248242
Initialize an array using a storage path::
249243
250244
>>> store = dict()
251245
>>> init_array(store, shape=100000000, chunks=1000000, dtype='i1', path='foo')
252246
>>> sorted(store.keys())
253-
['.zattrs', '.zgroup', 'foo/.zarray', 'foo/.zattrs']
247+
['.zgroup', 'foo/.zarray']
254248
>>> print(store['foo/.zarray'].decode())
255249
{
256250
"chunks": [
@@ -276,8 +270,7 @@ def init_array(store, shape, chunks=True, dtype=None, compressor='default',
276270
Notes
277271
-----
278272
The initialisation process involves normalising all array metadata, encoding
279-
as JSON and storing under the '.zarray' key. User attributes are also
280-
initialized and stored as JSON under the '.zattrs' key.
273+
as JSON and storing under the '.zarray' key.
281274
282275
"""
283276

@@ -349,10 +342,6 @@ def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='defa
349342
key = _path_to_prefix(path) + array_meta_key
350343
store[key] = encode_array_metadata(meta)
351344

352-
# initialize attributes
353-
key = _path_to_prefix(path) + attrs_key
354-
store[key] = json.dumps(dict()).encode('ascii')
355-
356345

357346
# backwards compatibility
358347
init_store = init_array
@@ -408,10 +397,6 @@ def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None):
408397
key = _path_to_prefix(path) + group_meta_key
409398
store[key] = encode_group_metadata(meta)
410399

411-
# initialize attributes
412-
key = _path_to_prefix(path) + attrs_key
413-
store[key] = json.dumps(dict()).encode('ascii')
414-
415400

416401
def ensure_bytes(s):
417402
if isinstance(s, binary_type):
@@ -654,7 +639,7 @@ class DirectoryStore(MutableMapping):
654639
655640
>>> import os
656641
>>> sorted(os.listdir('data/array.zarr'))
657-
['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
642+
['.zarray', '0.0', '0.1', '1.0', '1.1']
658643
659644
Store a group::
660645
@@ -668,11 +653,11 @@ class DirectoryStore(MutableMapping):
668653
directories on the file system, i.e.::
669654
670655
>>> sorted(os.listdir('data/group.zarr'))
671-
['.zattrs', '.zgroup', 'foo']
656+
['.zgroup', 'foo']
672657
>>> sorted(os.listdir('data/group.zarr/foo'))
673-
['.zattrs', '.zgroup', 'bar']
658+
['.zgroup', 'bar']
674659
>>> sorted(os.listdir('data/group.zarr/foo/bar'))
675-
['.zarray', '.zattrs', '0.0', '0.1', '1.0', '1.1']
660+
['.zarray', '0.0', '0.1', '1.0', '1.1']
676661
677662
Notes
678663
-----
@@ -909,7 +894,7 @@ class NestedDirectoryStore(DirectoryStore):
909894
910895
>>> import os
911896
>>> sorted(os.listdir('data/array.zarr'))
912-
['.zarray', '.zattrs', '0', '1']
897+
['.zarray', '0', '1']
913898
>>> sorted(os.listdir('data/array.zarr/0'))
914899
['0', '1']
915900
>>> sorted(os.listdir('data/array.zarr/1'))
@@ -927,11 +912,11 @@ class NestedDirectoryStore(DirectoryStore):
927912
directories on the file system, i.e.::
928913
929914
>>> sorted(os.listdir('data/group.zarr'))
930-
['.zattrs', '.zgroup', 'foo']
915+
['.zgroup', 'foo']
931916
>>> sorted(os.listdir('data/group.zarr/foo'))
932-
['.zattrs', '.zgroup', 'bar']
917+
['.zgroup', 'bar']
933918
>>> sorted(os.listdir('data/group.zarr/foo/bar'))
934-
['.zarray', '.zattrs', '0', '1']
919+
['.zarray', '0', '1']
935920
>>> sorted(os.listdir('data/group.zarr/foo/bar/0'))
936921
['0', '1']
937922
>>> sorted(os.listdir('data/group.zarr/foo/bar/1'))

zarr/tests/test_hierarchy.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array,
21-
attrs_key, array_meta_key, group_meta_key, atexit_rmtree,
21+
array_meta_key, group_meta_key, atexit_rmtree,
2222
NestedDirectoryStore, DBMStore, LMDBStore)
2323
from zarr.core import Array
2424
from zarr.compat import PY2, text_type
@@ -66,6 +66,8 @@ def test_group_init_1(self):
6666
eq('/', g.name)
6767
eq('', g.basename)
6868
assert_is_instance(g.attrs, Attributes)
69+
g.attrs['foo'] = 'bar'
70+
assert g.attrs['foo'] == 'bar'
6971
assert_is_instance(g.info, InfoReporter)
7072
assert_is_instance(repr(g.info), str)
7173
assert_is_instance(g.info._repr_html_(), str)
@@ -940,8 +942,7 @@ def test_chunk_store(self):
940942
assert_array_equal(np.arange(100), a[:])
941943

942944
# check store keys
943-
expect = sorted([attrs_key, group_meta_key, 'foo/' + attrs_key,
944-
'foo/' + array_meta_key])
945+
expect = sorted([group_meta_key, 'foo/' + array_meta_key])
945946
actual = sorted(store.keys())
946947
eq(expect, actual)
947948
expect = ['foo/' + str(i) for i in range(10)]

zarr/tests/test_storage.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
NestedDirectoryStore, default_compressor, DBMStore, LMDBStore)
2323
from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT,
2424
decode_group_metadata, encode_group_metadata)
25-
from zarr.compat import text_type, PY2
25+
from zarr.compat import PY2
2626
from zarr.codecs import Zlib, Blosc, BZ2
2727
from zarr.errors import PermissionError
2828
from zarr.hierarchy import group
@@ -310,8 +310,8 @@ def test_init_array(self):
310310
assert_is_none(meta['fill_value'])
311311

312312
# check attributes
313-
assert attrs_key in store
314-
eq(dict(), json.loads(text_type(store[attrs_key], 'ascii')))
313+
# assert attrs_key in store
314+
# eq(dict(), json.loads(text_type(store[attrs_key], 'ascii')))
315315

316316
def test_init_array_overwrite(self):
317317
# setup
@@ -361,9 +361,9 @@ def test_init_array_path(self):
361361
assert_is_none(meta['fill_value'])
362362

363363
# check attributes
364-
key = path + '/' + attrs_key
365-
assert key in store
366-
eq(dict(), json.loads(text_type(store[key], 'ascii')))
364+
# key = path + '/' + attrs_key
365+
# assert key in store
366+
# eq(dict(), json.loads(text_type(store[key], 'ascii')))
367367

368368
def test_init_array_overwrite_path(self):
369369
# setup
@@ -477,8 +477,8 @@ def test_init_group(self):
477477
eq(ZARR_FORMAT, meta['zarr_format'])
478478

479479
# check attributes
480-
assert attrs_key in store
481-
eq(dict(), json.loads(text_type(store[attrs_key], 'ascii')))
480+
# assert attrs_key in store
481+
# eq(dict(), json.loads(text_type(store[attrs_key], 'ascii')))
482482

483483
def test_init_group_overwrite(self):
484484
# setup

0 commit comments

Comments
 (0)
0