From fd5bfb5e79edbc22cf509a94076283d02bbf9dea Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 29 Mar 2018 15:05:07 -0400 Subject: [PATCH 01/21] basic implementation working --- zarr/storage.py | 154 +++++++++++++++++++++++++++++++++++++ zarr/tests/test_core.py | 27 ++++++- zarr/tests/test_storage.py | 32 +++++++- 3 files changed, 210 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 39a497d08b..a73e0ed032 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1883,3 +1883,157 @@ def __delitem__(self, key): with self._mutex: self._invalidate_keys() self._invalidate_value(key) + + +# utility functions for object stores + + +def _strip_prefix_from_path(path, prefix): + # normalized things will not have any leading or trailing slashes + path_norm = normalize_storage_path(path) + prefix_norm = normalize_storage_path(prefix) + if path_norm.startswith(prefix_norm): + return path_norm[(len(prefix_norm)+1):] + else: + return path + + +def _append_path_to_prefix(path, prefix): + return '/'.join([normalize_storage_path(prefix), + normalize_storage_path(path)]) + + +def atexit_rmgcspath(bucket, path): + from google.cloud import storage + client = storage.Client() + bucket = client.get_bucket(bucket) + bucket.delete_blobs(bucket.list_blobs(prefix=path)) + print('deleted blobs') + + +class GCSStore(MutableMapping): + + def __init__(self, bucket_name, prefix=None, client_kwargs={}): + + self.bucket_name = bucket_name + self.prefix = normalize_storage_path(prefix) + self.client_kwargs = {} + self.initialize_bucket() + + def initialize_bucket(self): + from google.cloud import storage + # run `gcloud auth application-default login` from shell + client = storage.Client(**self.client_kwargs) + self.bucket = client.get_bucket(self.bucket_name) + # need to properly handle excpetions + import google.api_core.exceptions as exceptions + self.exceptions = exceptions + + # needed for pickling + def __getstate__(self): + state = self.__dict__.copy() + del state['bucket'] + del state['exceptions'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.initialize_bucket() + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def __getitem__(self, key): + blob_name = _append_path_to_prefix(key, self.prefix) + blob = self.bucket.get_blob(blob_name) + if blob: + return blob.download_as_string() + else: + raise KeyError('Blob %s not found' % blob_name) + + def __setitem__(self, key, value): + blob_name = _append_path_to_prefix(key, self.prefix) + blob = self.bucket.blob(blob_name) + blob.upload_from_string(value) + + def __delitem__(self, key): + blob_name = _append_path_to_prefix(key, self.prefix) + try: + self.bucket.delete_blob(blob_name) + except self.exceptions.NotFound as er: + raise KeyError(er.message) + + def __contains__(self, key): + blob_name = _append_path_to_prefix(key, self.prefix) + return self.bucket.get_blob(blob_name) is not None + + def __eq__(self, other): + return ( + isinstance(other, GCSMap) and + self.bucket_name == other.bucket_name and + self.prefix == other.prefix + ) + + def __iter__(self): + blobs = self.bucket.list_blobs(prefix=self.prefix) + for blob in blobs: + yield _strip_prefix_from_path(blob.name, self.prefix) + + def __len__(self): + iterator = self.bucket.list_blobs(prefix=self.prefix) + return len(list(iterator)) + + def list_gcs_directory_blobs(self, path): + """Return list of all blobs *directly* under a gcs prefix.""" + prefix = normalize_storage_path(path) + '/' + return [blob.name for blob in + self.bucket.list_blobs(prefix=prefix, delimiter='/')] + + # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920#issuecomment-326125992 + def list_gcs_subdirectories(self, path): + """Return set of all "subdirectories" from a gcs prefix.""" + prefix = normalize_storage_path(path) + '/' + iterator = self.bucket.list_blobs(prefix=prefix, delimiter='/') + prefixes = set() + for page in iterator.pages: + prefixes.update(page.prefixes) + # need to strip trailing slash to be consistent with os.listdir + return [path[:-1] for path in prefixes] + + def list_gcs_directory(self, prefix, strip_prefix=True): + """Return a list of all blobs and subdirectories from a gcs prefix.""" + items = set() + items.update(self.list_gcs_directory_blobs(prefix)) + items.update(self.list_gcs_subdirectories(prefix)) + items = list(items) + if strip_prefix: + items = [_strip_prefix_from_path(path, prefix) for path in items] + return items + + def dir_path(self, path=None): + dir_path = _append_path_to_prefix(path, self.prefix) + return dir_path + + def listdir(self, path=None): + dir_path = self.dir_path(path) + return sorted(self.list_gcs_directory(dir_path, strip_prefix=True)) + + def rename(self, src_path, dst_path): + raise NotImplementedErrror + + def rmdir(self, path=None): + dir_path = self.dir_path(path) + self.bucket.delete_blobs(self.bucket.list_blobs(prefix=dir_path)) + + def getsize(self, path=None): + dir_path = self.dir_path(path) + size = 0 + for blob in self.bucket.list_blobs(prefix=dir_path): + size += blob.size + return size + + def clear(self): + self.rmdir() diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 390f888287..6a02fd3821 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -7,7 +7,7 @@ import pickle import os import warnings - +import uuid import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal @@ -16,7 +16,7 @@ from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob, - LRUStoreCache) + LRUStoreCache, GCSStore, atexit_rmgcspath) from zarr.core import Array from zarr.errors import PermissionError from zarr.compat import PY2, text_type, binary_type @@ -1698,3 +1698,26 @@ def create_array(read_only=False, **kwargs): init_array(store, **kwargs) return Array(store, read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) + +try: + from google.cloud import storage as gcstorage + # cleanup function + +except ImportError: # pragma: no cover + gcstorage = None + + +@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') +class TestGCSArray(TestArray): + + def create_array(self, read_only=False, **kwargs): + bucket = 'zarr-test' + prefix = uuid.uuid4() + atexit.register(atexit_rmgcspath, bucket, prefix) + store = GCSStore(bucket, prefix) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + kwargs.setdefault('compressor', Zlib(1)) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index f68f8a6ed6..afa2d333b5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -8,6 +8,7 @@ import array import shutil import os +import uuid import numpy as np @@ -19,7 +20,8 @@ DirectoryStore, ZipStore, init_group, group_meta_key, getsize, migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor, DBMStore, - LMDBStore, atexit_rmglob, LRUStoreCache) + LMDBStore, atexit_rmglob, LRUStoreCache, GCSStore, + atexit_rmgcspath) from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT, decode_group_metadata, encode_group_metadata) from zarr.compat import PY2 @@ -1235,3 +1237,31 @@ def test_format_compatibility(): else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config() + + +try: + from google.cloud import storage as gcstorage + # cleanup function + +except ImportError: # pragma: no cover + gcstorage = None + + +@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') +class TestGCSStore(StoreTests, unittest.TestCase): + + def create_store(self): + # would need to be replaced with a dedicated test bucket + bucket = 'zarr-test' + prefix = uuid.uuid4() + + print('registering') + atexit.register(atexit_rmgcspath, bucket, prefix) + store = GCSStore(bucket, prefix) + return store + + def test_context_manager(self): + with self.create_store() as store: + store['foo'] = b'bar' + store['baz'] = b'qux' + assert 2 == len(store) From a26752c018bc7ab97d3906c7b08f806f109cd83b Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 29 Mar 2018 16:20:25 -0400 Subject: [PATCH 02/21] docs and cleanup --- zarr/storage.py | 134 ++++++++++++++++++++++--------------- zarr/tests/test_core.py | 3 +- zarr/tests/test_storage.py | 2 - 3 files changed, 80 insertions(+), 59 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index a73e0ed032..8f0c7e8a21 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1908,10 +1908,29 @@ def atexit_rmgcspath(bucket, path): client = storage.Client() bucket = client.get_bucket(bucket) bucket.delete_blobs(bucket.list_blobs(prefix=path)) - print('deleted blobs') class GCSStore(MutableMapping): + """Storage class using a Google Cloud Storage (GCS) + + Parameters + ---------- + bucket_name : string + The name of the GCS bucket + prefix : string, optional + The prefix within the bucket (i.e. subdirectory) + client_kwargs : dict, optional + Extra options passed to ``google.cloud.storage.Client`` when connecting + to GCS + + Notes + ----- + In order to use this store, you must install the Google Cloud Storage + `Python Client Library `_. + You must also provide valid application credentials, either by setting the + ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable or via + `default credentials `_. + """ def __init__(self, bucket_name, prefix=None, client_kwargs={}): @@ -1946,45 +1965,8 @@ def __enter__(self): def __exit__(self, *args): pass - def __getitem__(self, key): - blob_name = _append_path_to_prefix(key, self.prefix) - blob = self.bucket.get_blob(blob_name) - if blob: - return blob.download_as_string() - else: - raise KeyError('Blob %s not found' % blob_name) - - def __setitem__(self, key, value): - blob_name = _append_path_to_prefix(key, self.prefix) - blob = self.bucket.blob(blob_name) - blob.upload_from_string(value) - - def __delitem__(self, key): - blob_name = _append_path_to_prefix(key, self.prefix) - try: - self.bucket.delete_blob(blob_name) - except self.exceptions.NotFound as er: - raise KeyError(er.message) - - def __contains__(self, key): - blob_name = _append_path_to_prefix(key, self.prefix) - return self.bucket.get_blob(blob_name) is not None - - def __eq__(self, other): - return ( - isinstance(other, GCSMap) and - self.bucket_name == other.bucket_name and - self.prefix == other.prefix - ) - - def __iter__(self): - blobs = self.bucket.list_blobs(prefix=self.prefix) - for blob in blobs: - yield _strip_prefix_from_path(blob.name, self.prefix) - - def __len__(self): - iterator = self.bucket.list_blobs(prefix=self.prefix) - return len(list(iterator)) + def full_path(self, path=None): + return _append_path_to_prefix(path, self.prefix) def list_gcs_directory_blobs(self, path): """Return list of all blobs *directly* under a gcs prefix.""" @@ -1992,7 +1974,7 @@ def list_gcs_directory_blobs(self, path): return [blob.name for blob in self.bucket.list_blobs(prefix=prefix, delimiter='/')] - # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920#issuecomment-326125992 + # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920 def list_gcs_subdirectories(self, path): """Return set of all "subdirectories" from a gcs prefix.""" prefix = normalize_storage_path(path) + '/' @@ -2013,27 +1995,69 @@ def list_gcs_directory(self, prefix, strip_prefix=True): items = [_strip_prefix_from_path(path, prefix) for path in items] return items - def dir_path(self, path=None): - dir_path = _append_path_to_prefix(path, self.prefix) - return dir_path - def listdir(self, path=None): - dir_path = self.dir_path(path) + dir_path = self.full_path(path) return sorted(self.list_gcs_directory(dir_path, strip_prefix=True)) - def rename(self, src_path, dst_path): - raise NotImplementedErrror - def rmdir(self, path=None): - dir_path = self.dir_path(path) + # make sure it's a directory + dir_path = normalize_storage_path(self.full_path(path)) + '/' self.bucket.delete_blobs(self.bucket.list_blobs(prefix=dir_path)) def getsize(self, path=None): - dir_path = self.dir_path(path) - size = 0 - for blob in self.bucket.list_blobs(prefix=dir_path): - size += blob.size - return size + # this function should *not* be recursive + # a lot of slash trickery is required to make this work right + full_path = self.full_path(path) + blob = self.bucket.get_blob(full_path) + if blob is not None: + return blob.size + else: + dir_path = normalize_storage_path(full_path) + '/' + blobs = self.bucket.list_blobs(prefix=dir_path, delimiter='/') + size = 0 + for blob in blobs: + size += blob.size + return size def clear(self): self.rmdir() + + def __getitem__(self, key): + blob_name = self.full_path(key) + blob = self.bucket.get_blob(blob_name) + if blob: + return blob.download_as_string() + else: + raise KeyError('Blob %s not found' % blob_name) + + def __setitem__(self, key, value): + blob_name = self.full_path(key) + blob = self.bucket.blob(blob_name) + blob.upload_from_string(value) + + def __delitem__(self, key): + blob_name = self.full_path(key) + try: + self.bucket.delete_blob(blob_name) + except self.exceptions.NotFound as er: + raise KeyError(er.message) + + def __contains__(self, key): + blob_name = self.full_path(key) + return self.bucket.get_blob(blob_name) is not None + + def __eq__(self, other): + return ( + isinstance(other, GCSStore) and + self.bucket_name == other.bucket_name and + self.prefix == other.prefix + ) + + def __iter__(self): + blobs = self.bucket.list_blobs(prefix=self.prefix) + for blob in blobs: + yield _strip_prefix_from_path(blob.name, self.prefix) + + def __len__(self): + iterator = self.bucket.list_blobs(prefix=self.prefix) + return len(list(iterator)) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 6a02fd3821..23d64d1f2c 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1699,10 +1699,9 @@ def create_array(read_only=False, **kwargs): return Array(store, read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) + try: from google.cloud import storage as gcstorage - # cleanup function - except ImportError: # pragma: no cover gcstorage = None diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index afa2d333b5..ffc19822a5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1254,8 +1254,6 @@ def create_store(self): # would need to be replaced with a dedicated test bucket bucket = 'zarr-test' prefix = uuid.uuid4() - - print('registering') atexit.register(atexit_rmgcspath, bucket, prefix) store = GCSStore(bucket, prefix) return store From 23dd8f6f3effccd2903e871adbfcbedf7e1289c7 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Fri, 30 Mar 2018 10:14:43 -0400 Subject: [PATCH 03/21] fixed client_kwargs bug --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8f0c7e8a21..ac3e64558e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1936,7 +1936,7 @@ def __init__(self, bucket_name, prefix=None, client_kwargs={}): self.bucket_name = bucket_name self.prefix = normalize_storage_path(prefix) - self.client_kwargs = {} + self.client_kwargs = client_kwargs self.initialize_bucket() def initialize_bucket(self): From dec75dd6680b04f5f51c35b38685990bdf9ae430 Mon Sep 17 00:00:00 2001 From: Friedrich Knuth Date: Sun, 15 Apr 2018 13:01:43 -0400 Subject: [PATCH 04/21] Add ABSStore mutable mapping --- zarr/storage.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index ac3e64558e..bbfde16b1e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2061,3 +2061,130 @@ def __iter__(self): def __len__(self): iterator = self.bucket.list_blobs(prefix=self.prefix) return len(list(iterator)) + +class ABSStore(MutableMapping): + +#import logging +#logger = logging.getLogger(__name__) + + def __init__(self, container_name, prefix, user, token): + + self.user = user + self.token = token + self.container_name = container_name + self.prefix = normalize_storage_path(prefix) + self.initialize_container() + + def initialize_container(self): + from azure import storage + self.client = storage.blob.BlockBlobService(self.user, self.token) + # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). + # client needs to be used in functions and container name needs to be passed on. + # could get rid of this function and consolidate. + + # needed for pickling + def __getstate__(self): + state = self.__dict__.copy() + del state['container'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.initialize_container() + + + def __getitem__(self, key): + #logger.debug('__getitem__(%s)' % key) # not sure what logger returns. need to test live and adapt. + blob_name = '/'.join([self.prefix, key]) + blob = self.client.get_blob_to_text(self.container_name, blob_name) + if blob: + return blob + else: + raise KeyError('Blob %s not found' % blob_name) + + def __setitem__(self, key, value): + raise NotImplementedError + + def __delitem__(self, key): + raise NotImplementedError + + def __contains__(self, key): + #logger.debug('__contains__(%s)' % key) + blob_name = '/'.join([self.container_name, key]) + return self.client.get_blob_to_text(blob_name) is not None + + def __eq__(self, other): + return ( + isinstance(other, ABSMap) and + self.container_name == other.container_name and + self.prefix == other.prefix + ) + + def keys(self): + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + def __len__(self): + raise NotImplementedError + + def __contains__(self, key): + #logger.debug('__contains__(%s)' % key) + blob_name = '/'.join([self.prefix, key]) + return self.client.get_blob_to_text(blob_name) is not None + + def list_abs_directory_blobs(self, prefix): + """Return list of all blobs under a abs prefix.""" + return [blob.name for blob in + self.client.list_blobs(prefix=prefix)] + + def list_abs_subdirectories(self, prefix): + """Return set of all "subdirectories" from a abs prefix.""" + iterator = self.client.list_blobs(prefix=prefix, delimiter='/') + + # here comes a hack. azure list_blobs() doesn't seems to have iterator.pages + + return set([blob.name.rsplit('/',1)[:-1][0] for blob in iterator if '/' in blob.name]) + + def list_abs_directory(self, prefix, strip_prefix=True): + """Return a list of all blobs and subdirectories from a gcs prefix.""" + items = set() + items.update(self.list_abs_directory_blobs(prefix)) + items.update(self.list_abs_subdirectories(prefix)) + items = list(items) + if strip_prefix: + items = [_strip_prefix_from_path(path, prefix) for path in items] + return items + + def dir_path(self, path=None): + store_path = normalize_storage_path(path) + # prefix is normalized to not have a trailing slash + dir_path = self.prefix + if store_path: + dir_path = '/'.join(dir_path, store_path) + else: + dir_path += '/' + return dir_path + + def listdir(self, path=None): + #logger.debug('listdir(%s)' % path) + dir_path = self.dir_path(path) + return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) + + def rename(self, src_path, dst_path): + raise NotImplementedErrror + + def rmdir(self, path=None): + raise NotImplementedErrror + + def getsize(self, path=None): + #logger.debug('getsize %s' % path) + dir_path = self.dir_path(path) + size = 0 + for blob in self.client.list_blobs(prefix=dir_path): + size += blob.properties.content_length # from https://stackoverflow.com/questions/47694592/get-container-sizes-in-azure-blob-storage-using-python + return size + + def clear(self): + raise NotImplementedError From 13c207751b28b01e46054d1b1c2dcdd8fbb5aa59 Mon Sep 17 00:00:00 2001 From: Friedrich Knuth Date: Sun, 15 Apr 2018 13:21:25 -0400 Subject: [PATCH 05/21] Fix import syntax --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index bbfde16b1e..7e6b92afbb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2076,8 +2076,8 @@ def __init__(self, container_name, prefix, user, token): self.initialize_container() def initialize_container(self): - from azure import storage - self.client = storage.blob.BlockBlobService(self.user, self.token) + from azure.storage.blob import BlockBlobService + self.client = BlockBlobService(self.user, self.token) # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). # client needs to be used in functions and container name needs to be passed on. # could get rid of this function and consolidate. From 86603fae80650a5596e3751bcbb3b380937613cb Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 09:52:41 -0400 Subject: [PATCH 06/21] Get open_zarr() working --- zarr/storage.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 7e6b92afbb..7b1cb222e7 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2064,9 +2064,6 @@ def __len__(self): class ABSStore(MutableMapping): -#import logging -#logger = logging.getLogger(__name__) - def __init__(self, container_name, prefix, user, token): self.user = user @@ -2076,6 +2073,7 @@ def __init__(self, container_name, prefix, user, token): self.initialize_container() def initialize_container(self): + from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.user, self.token) # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). @@ -2092,13 +2090,11 @@ def __setstate__(self, state): self.__dict__.update(state) self.initialize_container() - def __getitem__(self, key): - #logger.debug('__getitem__(%s)' % key) # not sure what logger returns. need to test live and adapt. blob_name = '/'.join([self.prefix, key]) - blob = self.client.get_blob_to_text(self.container_name, blob_name) + blob = self.client.get_blob_to_bytes(self.container_name, blob_name) if blob: - return blob + return blob.content else: raise KeyError('Blob %s not found' % blob_name) @@ -2108,14 +2104,9 @@ def __setitem__(self, key, value): def __delitem__(self, key): raise NotImplementedError - def __contains__(self, key): - #logger.debug('__contains__(%s)' % key) - blob_name = '/'.join([self.container_name, key]) - return self.client.get_blob_to_text(blob_name) is not None - def __eq__(self, other): return ( - isinstance(other, ABSMap) and + isinstance(other, ABSStore) and self.container_name == other.container_name and self.prefix == other.prefix ) @@ -2130,25 +2121,22 @@ def __len__(self): raise NotImplementedError def __contains__(self, key): - #logger.debug('__contains__(%s)' % key) blob_name = '/'.join([self.prefix, key]) - return self.client.get_blob_to_text(blob_name) is not None + try: + return self.client.get_blob_to_text(self.container_name, blob_name) + except: + return None def list_abs_directory_blobs(self, prefix): - """Return list of all blobs under a abs prefix.""" - return [blob.name for blob in - self.client.list_blobs(prefix=prefix)] + """Return list of all blobs under an abs prefix.""" + return [blob.name for blob in self.client.list_blobs(self.container_name)] def list_abs_subdirectories(self, prefix): """Return set of all "subdirectories" from a abs prefix.""" - iterator = self.client.list_blobs(prefix=prefix, delimiter='/') - - # here comes a hack. azure list_blobs() doesn't seems to have iterator.pages - - return set([blob.name.rsplit('/',1)[:-1][0] for blob in iterator if '/' in blob.name]) + return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) def list_abs_directory(self, prefix, strip_prefix=True): - """Return a list of all blobs and subdirectories from a gcs prefix.""" + """Return a list of all blobs and subdirectories from an abs prefix.""" items = set() items.update(self.list_abs_directory_blobs(prefix)) items.update(self.list_abs_subdirectories(prefix)) @@ -2168,7 +2156,6 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): - #logger.debug('listdir(%s)' % path) dir_path = self.dir_path(path) return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) @@ -2179,7 +2166,6 @@ def rmdir(self, path=None): raise NotImplementedErrror def getsize(self, path=None): - #logger.debug('getsize %s' % path) dir_path = self.dir_path(path) size = 0 for blob in self.client.list_blobs(prefix=dir_path): From 4b999baa4e140871de9c18907e5763062cbba7d8 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 13:35:48 -0400 Subject: [PATCH 07/21] Change account variable names --- zarr/storage.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 7b1cb222e7..e95b1629bd 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2064,10 +2064,9 @@ def __len__(self): class ABSStore(MutableMapping): - def __init__(self, container_name, prefix, user, token): - - self.user = user - self.token = token + def __init__(self, container_name, prefix, account_name, account_key): + self.account_name = account_name + self.account_key = account_key self.container_name = container_name self.prefix = normalize_storage_path(prefix) self.initialize_container() @@ -2075,10 +2074,7 @@ def __init__(self, container_name, prefix, user, token): def initialize_container(self): from azure.storage.blob import BlockBlobService - self.client = BlockBlobService(self.user, self.token) - # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). - # client needs to be used in functions and container name needs to be passed on. - # could get rid of this function and consolidate. + self.client = BlockBlobService(self.account_name, self.account_key) # needed for pickling def __getstate__(self): From 677ec1c726cd486bde3cabe1293e6b740c9266f1 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 15:11:30 -0400 Subject: [PATCH 08/21] Fix client.exists() logging issue --- zarr/storage.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index e95b1629bd..9867111733 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2072,9 +2072,11 @@ def __init__(self, container_name, prefix, account_name, account_key): self.initialize_container() def initialize_container(self): - from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) + # change logging level to deal with https://github.com/Azure/azure-storage-python/issues/437 + import logging + logging.basicConfig(level=logging.CRITICAL) # needed for pickling def __getstate__(self): @@ -2117,11 +2119,12 @@ def __len__(self): raise NotImplementedError def __contains__(self, key): + # this is where the logging error occurs. not sure why we are looking for a .zarray below every blob blob_name = '/'.join([self.prefix, key]) - try: - return self.client.get_blob_to_text(self.container_name, blob_name) - except: - return None + if self.client.exists(self.container_name, blob_name): + return True + else: + return False def list_abs_directory_blobs(self, prefix): """Return list of all blobs under an abs prefix.""" From d9be9ba9a334d50d8321580abbd6f5d14360b78d Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 15:38:32 -0400 Subject: [PATCH 09/21] Minor comment changes --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 9867111733..15a3412e0f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2127,11 +2127,11 @@ def __contains__(self, key): return False def list_abs_directory_blobs(self, prefix): - """Return list of all blobs under an abs prefix.""" + """Return list of all blobs from an abs prefix.""" return [blob.name for blob in self.client.list_blobs(self.container_name)] def list_abs_subdirectories(self, prefix): - """Return set of all "subdirectories" from a abs prefix.""" + """Return list of all "subdirectories" from an abs prefix.""" return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) def list_abs_directory(self, prefix, strip_prefix=True): From 13b1ee8e4a666d14587748bef6e4b7d46a3fb27d Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 15:59:01 -0400 Subject: [PATCH 10/21] Get to_zarr() working --- zarr/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 15a3412e0f..f36ec02139 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2097,7 +2097,8 @@ def __getitem__(self, key): raise KeyError('Blob %s not found' % blob_name) def __setitem__(self, key, value): - raise NotImplementedError + blob_name = '/'.join([self.prefix, key]) + self.client.create_blob_from_text(self.container_name, blob_name, value) def __delitem__(self, key): raise NotImplementedError From e5564c36252a3b5f475431807483a9289ef6bf00 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Tue, 17 Apr 2018 05:12:34 -0400 Subject: [PATCH 11/21] Remove state['container'] delete --- zarr/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index f36ec02139..c4741eb7fb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2075,13 +2075,14 @@ def initialize_container(self): from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) # change logging level to deal with https://github.com/Azure/azure-storage-python/issues/437 + # it would be better to set up a logging filter that throws out just the + # error logged when calling exists(). import logging logging.basicConfig(level=logging.CRITICAL) # needed for pickling def __getstate__(self): state = self.__dict__.copy() - del state['container'] return state def __setstate__(self, state): From a85e5595a1648ba0479278d5e629f8758cac2f8a Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Tue, 17 Apr 2018 08:35:49 -0400 Subject: [PATCH 12/21] Implement rmdir --- zarr/storage.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index c4741eb7fb..f4d83bd937 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2089,6 +2089,15 @@ def __setstate__(self, state): self.__dict__.update(state) self.initialize_container() + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def full_path(self, path=None): + return _append_path_to_prefix(path, self.prefix) + def __getitem__(self, key): blob_name = '/'.join([self.prefix, key]) blob = self.client.get_blob_to_bytes(self.container_name, blob_name) @@ -2164,7 +2173,9 @@ def rename(self, src_path, dst_path): raise NotImplementedErrror def rmdir(self, path=None): - raise NotImplementedErrror + dir_path = normalize_storage_path(self.full_path(path)) + '/' + for blob in self.client.list_blobs(self.container_name, dir_path): + self.client.delete_blob(self.container_name, blob.name) def getsize(self, path=None): dir_path = self.dir_path(path) From 272d234caed82c0e72a6d1a8df23c1855989206b Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Thu, 2 Aug 2018 16:57:00 -0400 Subject: [PATCH 13/21] Add docstring for ABSStore --- zarr/storage.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index f4d83bd937..c59d61c50d 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2063,6 +2063,24 @@ def __len__(self): return len(list(iterator)) class ABSStore(MutableMapping): + """Storage class using Azure Blob Storage (ABS) + + Parameters + ---------- + container_name : string + The name of the ABS container to use + prefix : string, optional + The prefix within the container (i.e. subdirectory) + account_name : string + The Azure blob storage account name + account_key : string + The Azure blob storage account acess key + + Notes + ----- + In order to use this store, you must install the Azure Blob Storage + `Python Client Library `_. + """ def __init__(self, container_name, prefix, account_name, account_key): self.account_name = account_name From bb406a0e354d6026430f523d0192e5167c6dc9fb Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Thu, 2 Aug 2018 16:59:20 -0400 Subject: [PATCH 14/21] Remove GCSStore from this branch --- zarr/storage.py | 152 ------------------------------------------------ 1 file changed, 152 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index c59d61c50d..dffc73749f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1910,158 +1910,6 @@ def atexit_rmgcspath(bucket, path): bucket.delete_blobs(bucket.list_blobs(prefix=path)) -class GCSStore(MutableMapping): - """Storage class using a Google Cloud Storage (GCS) - - Parameters - ---------- - bucket_name : string - The name of the GCS bucket - prefix : string, optional - The prefix within the bucket (i.e. subdirectory) - client_kwargs : dict, optional - Extra options passed to ``google.cloud.storage.Client`` when connecting - to GCS - - Notes - ----- - In order to use this store, you must install the Google Cloud Storage - `Python Client Library `_. - You must also provide valid application credentials, either by setting the - ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable or via - `default credentials `_. - """ - - def __init__(self, bucket_name, prefix=None, client_kwargs={}): - - self.bucket_name = bucket_name - self.prefix = normalize_storage_path(prefix) - self.client_kwargs = client_kwargs - self.initialize_bucket() - - def initialize_bucket(self): - from google.cloud import storage - # run `gcloud auth application-default login` from shell - client = storage.Client(**self.client_kwargs) - self.bucket = client.get_bucket(self.bucket_name) - # need to properly handle excpetions - import google.api_core.exceptions as exceptions - self.exceptions = exceptions - - # needed for pickling - def __getstate__(self): - state = self.__dict__.copy() - del state['bucket'] - del state['exceptions'] - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.initialize_bucket() - - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - def full_path(self, path=None): - return _append_path_to_prefix(path, self.prefix) - - def list_gcs_directory_blobs(self, path): - """Return list of all blobs *directly* under a gcs prefix.""" - prefix = normalize_storage_path(path) + '/' - return [blob.name for blob in - self.bucket.list_blobs(prefix=prefix, delimiter='/')] - - # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920 - def list_gcs_subdirectories(self, path): - """Return set of all "subdirectories" from a gcs prefix.""" - prefix = normalize_storage_path(path) + '/' - iterator = self.bucket.list_blobs(prefix=prefix, delimiter='/') - prefixes = set() - for page in iterator.pages: - prefixes.update(page.prefixes) - # need to strip trailing slash to be consistent with os.listdir - return [path[:-1] for path in prefixes] - - def list_gcs_directory(self, prefix, strip_prefix=True): - """Return a list of all blobs and subdirectories from a gcs prefix.""" - items = set() - items.update(self.list_gcs_directory_blobs(prefix)) - items.update(self.list_gcs_subdirectories(prefix)) - items = list(items) - if strip_prefix: - items = [_strip_prefix_from_path(path, prefix) for path in items] - return items - - def listdir(self, path=None): - dir_path = self.full_path(path) - return sorted(self.list_gcs_directory(dir_path, strip_prefix=True)) - - def rmdir(self, path=None): - # make sure it's a directory - dir_path = normalize_storage_path(self.full_path(path)) + '/' - self.bucket.delete_blobs(self.bucket.list_blobs(prefix=dir_path)) - - def getsize(self, path=None): - # this function should *not* be recursive - # a lot of slash trickery is required to make this work right - full_path = self.full_path(path) - blob = self.bucket.get_blob(full_path) - if blob is not None: - return blob.size - else: - dir_path = normalize_storage_path(full_path) + '/' - blobs = self.bucket.list_blobs(prefix=dir_path, delimiter='/') - size = 0 - for blob in blobs: - size += blob.size - return size - - def clear(self): - self.rmdir() - - def __getitem__(self, key): - blob_name = self.full_path(key) - blob = self.bucket.get_blob(blob_name) - if blob: - return blob.download_as_string() - else: - raise KeyError('Blob %s not found' % blob_name) - - def __setitem__(self, key, value): - blob_name = self.full_path(key) - blob = self.bucket.blob(blob_name) - blob.upload_from_string(value) - - def __delitem__(self, key): - blob_name = self.full_path(key) - try: - self.bucket.delete_blob(blob_name) - except self.exceptions.NotFound as er: - raise KeyError(er.message) - - def __contains__(self, key): - blob_name = self.full_path(key) - return self.bucket.get_blob(blob_name) is not None - - def __eq__(self, other): - return ( - isinstance(other, GCSStore) and - self.bucket_name == other.bucket_name and - self.prefix == other.prefix - ) - - def __iter__(self): - blobs = self.bucket.list_blobs(prefix=self.prefix) - for blob in blobs: - yield _strip_prefix_from_path(blob.name, self.prefix) - - def __len__(self): - iterator = self.bucket.list_blobs(prefix=self.prefix) - return len(list(iterator)) - class ABSStore(MutableMapping): """Storage class using Azure Blob Storage (ABS) From 937d16224db40b291c4f743a5924471890fd9a48 Mon Sep 17 00:00:00 2001 From: Zain Patel <30357972+mzjp2@users.noreply.github.com> Date: Thu, 2 Aug 2018 22:23:21 +0100 Subject: [PATCH 15/21] Fixed missing argument in getsize of ABStore Was missing self.container_name as an argument --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index dffc73749f..68037b6971 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2046,7 +2046,7 @@ def rmdir(self, path=None): def getsize(self, path=None): dir_path = self.dir_path(path) size = 0 - for blob in self.client.list_blobs(prefix=dir_path): + for blob in self.client.list_blobs(self.container_name, prefix=dir_path): size += blob.properties.content_length # from https://stackoverflow.com/questions/47694592/get-container-sizes-in-azure-blob-storage-using-python return size From 74920c43463a577e2d78507a85b4bd0d48d0f3fc Mon Sep 17 00:00:00 2001 From: Zain Patel <30357972+mzjp2@users.noreply.github.com> Date: Thu, 2 Aug 2018 22:23:46 +0100 Subject: [PATCH 16/21] Specified prefix argument in rmdir for abstore --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 68037b6971..e539ce7b65 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2040,7 +2040,7 @@ def rename(self, src_path, dst_path): def rmdir(self, path=None): dir_path = normalize_storage_path(self.full_path(path)) + '/' - for blob in self.client.list_blobs(self.container_name, dir_path): + for blob in self.client.list_blobs(self.container_name, prefix=dir_path): self.client.delete_blob(self.container_name, blob.name) def getsize(self, path=None): From bd1648bfba6127accb9cc1464ac0f2b9f583b6d5 Mon Sep 17 00:00:00 2001 From: Zain Patel <30357972+mzjp2@users.noreply.github.com> Date: Thu, 2 Aug 2018 22:24:44 +0100 Subject: [PATCH 17/21] Fixed join string error in dir_path in ABStore Join only accepts one argument, using os.path.join(x,y) formats the string as a valid file path for us. --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index e539ce7b65..dbbcb27dce 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2026,7 +2026,7 @@ def dir_path(self, path=None): # prefix is normalized to not have a trailing slash dir_path = self.prefix if store_path: - dir_path = '/'.join(dir_path, store_path) + dir_path = os.path.join(dir_path, store_path) else: dir_path += '/' return dir_path From 0e71f709a58161c05eb4023096b114d26e5db620 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Fri, 3 Aug 2018 06:52:23 -0400 Subject: [PATCH 18/21] Remove logging work-around as the issue was fixed in azure-storage 1.3.0 --- zarr/storage.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index dbbcb27dce..2d5ccd90e5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1940,11 +1940,6 @@ def __init__(self, container_name, prefix, account_name, account_key): def initialize_container(self): from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) - # change logging level to deal with https://github.com/Azure/azure-storage-python/issues/437 - # it would be better to set up a logging filter that throws out just the - # error logged when calling exists(). - import logging - logging.basicConfig(level=logging.CRITICAL) # needed for pickling def __getstate__(self): @@ -1996,7 +1991,6 @@ def __len__(self): raise NotImplementedError def __contains__(self, key): - # this is where the logging error occurs. not sure why we are looking for a .zarray below every blob blob_name = '/'.join([self.prefix, key]) if self.client.exists(self.container_name, blob_name): return True From de5bb9c95dd10168ad7104056a0c6f46db4646d8 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Fri, 3 Aug 2018 07:27:19 -0400 Subject: [PATCH 19/21] Clean up docstring --- zarr/storage.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 2d5ccd90e5..0956eb738f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1911,23 +1911,25 @@ def atexit_rmgcspath(bucket, path): class ABSStore(MutableMapping): - """Storage class using Azure Blob Storage (ABS) + """Storage class using Azure Blob Storage (ABS). Parameters ---------- container_name : string - The name of the ABS container to use - prefix : string, optional - The prefix within the container (i.e. subdirectory) + The name of the ABS container to use. Currently this must exist in the + storage account. + prefix : string + Location of the "directory" to use as the root of the storage hierarchy + within the container. account_name : string - The Azure blob storage account name + The Azure blob storage account name. account_key : string - The Azure blob storage account acess key + The Azure blob storage account acess key. Notes ----- In order to use this store, you must install the Azure Blob Storage - `Python Client Library `_. + `Python Client Library `_ version >= 1.3.0. """ def __init__(self, container_name, prefix, account_name, account_key): @@ -2041,7 +2043,7 @@ def getsize(self, path=None): dir_path = self.dir_path(path) size = 0 for blob in self.client.list_blobs(self.container_name, prefix=dir_path): - size += blob.properties.content_length # from https://stackoverflow.com/questions/47694592/get-container-sizes-in-azure-blob-storage-using-python + size += blob.properties.content_length return size def clear(self): From 13a6d3092463d9b43ce7e36653ec333b00511562 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Fri, 3 Aug 2018 07:30:24 -0400 Subject: [PATCH 20/21] Remove more GCSStore code --- zarr/storage.py | 7 ------- zarr/tests/test_core.py | 26 ++------------------------ zarr/tests/test_storage.py | 30 +----------------------------- 3 files changed, 3 insertions(+), 60 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 0956eb738f..2ab4cf16c1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1903,13 +1903,6 @@ def _append_path_to_prefix(path, prefix): normalize_storage_path(path)]) -def atexit_rmgcspath(bucket, path): - from google.cloud import storage - client = storage.Client() - bucket = client.get_bucket(bucket) - bucket.delete_blobs(bucket.list_blobs(prefix=path)) - - class ABSStore(MutableMapping): """Storage class using Azure Blob Storage (ABS). diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 23d64d1f2c..390f888287 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -7,7 +7,7 @@ import pickle import os import warnings -import uuid + import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal @@ -16,7 +16,7 @@ from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob, - LRUStoreCache, GCSStore, atexit_rmgcspath) + LRUStoreCache) from zarr.core import Array from zarr.errors import PermissionError from zarr.compat import PY2, text_type, binary_type @@ -1698,25 +1698,3 @@ def create_array(read_only=False, **kwargs): init_array(store, **kwargs) return Array(store, read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) - - -try: - from google.cloud import storage as gcstorage -except ImportError: # pragma: no cover - gcstorage = None - - -@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') -class TestGCSArray(TestArray): - - def create_array(self, read_only=False, **kwargs): - bucket = 'zarr-test' - prefix = uuid.uuid4() - atexit.register(atexit_rmgcspath, bucket, prefix) - store = GCSStore(bucket, prefix) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ffc19822a5..f68f8a6ed6 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -8,7 +8,6 @@ import array import shutil import os -import uuid import numpy as np @@ -20,8 +19,7 @@ DirectoryStore, ZipStore, init_group, group_meta_key, getsize, migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor, DBMStore, - LMDBStore, atexit_rmglob, LRUStoreCache, GCSStore, - atexit_rmgcspath) + LMDBStore, atexit_rmglob, LRUStoreCache) from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT, decode_group_metadata, encode_group_metadata) from zarr.compat import PY2 @@ -1237,29 +1235,3 @@ def test_format_compatibility(): else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config() - - -try: - from google.cloud import storage as gcstorage - # cleanup function - -except ImportError: # pragma: no cover - gcstorage = None - - -@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') -class TestGCSStore(StoreTests, unittest.TestCase): - - def create_store(self): - # would need to be replaced with a dedicated test bucket - bucket = 'zarr-test' - prefix = uuid.uuid4() - atexit.register(atexit_rmgcspath, bucket, prefix) - store = GCSStore(bucket, prefix) - return store - - def test_context_manager(self): - with self.create_store() as store: - store['foo'] = b'bar' - store['baz'] = b'qux' - assert 2 == len(store) From 7b52e3923dfb036d0213e2a8281cd1876d5ec962 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 6 Aug 2018 10:52:54 -0400 Subject: [PATCH 21/21] Move utility functions into ABSStore class --- zarr/storage.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 2ab4cf16c1..7964e3dd01 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1885,24 +1885,6 @@ def __delitem__(self, key): self._invalidate_value(key) -# utility functions for object stores - - -def _strip_prefix_from_path(path, prefix): - # normalized things will not have any leading or trailing slashes - path_norm = normalize_storage_path(path) - prefix_norm = normalize_storage_path(prefix) - if path_norm.startswith(prefix_norm): - return path_norm[(len(prefix_norm)+1):] - else: - return path - - -def _append_path_to_prefix(path, prefix): - return '/'.join([normalize_storage_path(prefix), - normalize_storage_path(path)]) - - class ABSStore(MutableMapping): """Storage class using Azure Blob Storage (ABS). @@ -1951,6 +1933,10 @@ def __enter__(self): def __exit__(self, *args): pass + def _append_path_to_prefix(path, prefix): + return '/'.join([normalize_storage_path(prefix), + normalize_storage_path(path)]) + def full_path(self, path=None): return _append_path_to_prefix(path, self.prefix) @@ -2000,6 +1986,15 @@ def list_abs_subdirectories(self, prefix): """Return list of all "subdirectories" from an abs prefix.""" return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) + def _strip_prefix_from_path(path, prefix): + # normalized things will not have any leading or trailing slashes + path_norm = normalize_storage_path(path) + prefix_norm = normalize_storage_path(prefix) + if path_norm.startswith(prefix_norm): + return path_norm[(len(prefix_norm)+1):] + else: + return path + def list_abs_directory(self, prefix, strip_prefix=True): """Return a list of all blobs and subdirectories from an abs prefix.""" items = set()