@@ -1910,158 +1910,6 @@ def atexit_rmgcspath(bucket, path):
1910
1910
bucket .delete_blobs (bucket .list_blobs (prefix = path ))
1911
1911
1912
1912
1913
- class GCSStore (MutableMapping ):
1914
- """Storage class using a Google Cloud Storage (GCS)
1915
-
1916
- Parameters
1917
- ----------
1918
- bucket_name : string
1919
- The name of the GCS bucket
1920
- prefix : string, optional
1921
- The prefix within the bucket (i.e. subdirectory)
1922
- client_kwargs : dict, optional
1923
- Extra options passed to ``google.cloud.storage.Client`` when connecting
1924
- to GCS
1925
-
1926
- Notes
1927
- -----
1928
- In order to use this store, you must install the Google Cloud Storage
1929
- `Python Client Library <https://cloud.google.com/storage/docs/reference/libraries>`_.
1930
- You must also provide valid application credentials, either by setting the
1931
- ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable or via
1932
- `default credentials <https://cloud.google.com/sdk/gcloud/reference/auth/application-default/login>`_.
1933
- """
1934
-
1935
- def __init__ (self , bucket_name , prefix = None , client_kwargs = {}):
1936
-
1937
- self .bucket_name = bucket_name
1938
- self .prefix = normalize_storage_path (prefix )
1939
- self .client_kwargs = client_kwargs
1940
- self .initialize_bucket ()
1941
-
1942
- def initialize_bucket (self ):
1943
- from google .cloud import storage
1944
- # run `gcloud auth application-default login` from shell
1945
- client = storage .Client (** self .client_kwargs )
1946
- self .bucket = client .get_bucket (self .bucket_name )
1947
- # need to properly handle excpetions
1948
- import google .api_core .exceptions as exceptions
1949
- self .exceptions = exceptions
1950
-
1951
- # needed for pickling
1952
- def __getstate__ (self ):
1953
- state = self .__dict__ .copy ()
1954
- del state ['bucket' ]
1955
- del state ['exceptions' ]
1956
- return state
1957
-
1958
- def __setstate__ (self , state ):
1959
- self .__dict__ .update (state )
1960
- self .initialize_bucket ()
1961
-
1962
- def __enter__ (self ):
1963
- return self
1964
-
1965
- def __exit__ (self , * args ):
1966
- pass
1967
-
1968
- def full_path (self , path = None ):
1969
- return _append_path_to_prefix (path , self .prefix )
1970
-
1971
- def list_gcs_directory_blobs (self , path ):
1972
- """Return list of all blobs *directly* under a gcs prefix."""
1973
- prefix = normalize_storage_path (path ) + '/'
1974
- return [blob .name for blob in
1975
- self .bucket .list_blobs (prefix = prefix , delimiter = '/' )]
1976
-
1977
- # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920
1978
- def list_gcs_subdirectories (self , path ):
1979
- """Return set of all "subdirectories" from a gcs prefix."""
1980
- prefix = normalize_storage_path (path ) + '/'
1981
- iterator = self .bucket .list_blobs (prefix = prefix , delimiter = '/' )
1982
- prefixes = set ()
1983
- for page in iterator .pages :
1984
- prefixes .update (page .prefixes )
1985
- # need to strip trailing slash to be consistent with os.listdir
1986
- return [path [:- 1 ] for path in prefixes ]
1987
-
1988
- def list_gcs_directory (self , prefix , strip_prefix = True ):
1989
- """Return a list of all blobs and subdirectories from a gcs prefix."""
1990
- items = set ()
1991
- items .update (self .list_gcs_directory_blobs (prefix ))
1992
- items .update (self .list_gcs_subdirectories (prefix ))
1993
- items = list (items )
1994
- if strip_prefix :
1995
- items = [_strip_prefix_from_path (path , prefix ) for path in items ]
1996
- return items
1997
-
1998
- def listdir (self , path = None ):
1999
- dir_path = self .full_path (path )
2000
- return sorted (self .list_gcs_directory (dir_path , strip_prefix = True ))
2001
-
2002
- def rmdir (self , path = None ):
2003
- # make sure it's a directory
2004
- dir_path = normalize_storage_path (self .full_path (path )) + '/'
2005
- self .bucket .delete_blobs (self .bucket .list_blobs (prefix = dir_path ))
2006
-
2007
- def getsize (self , path = None ):
2008
- # this function should *not* be recursive
2009
- # a lot of slash trickery is required to make this work right
2010
- full_path = self .full_path (path )
2011
- blob = self .bucket .get_blob (full_path )
2012
- if blob is not None :
2013
- return blob .size
2014
- else :
2015
- dir_path = normalize_storage_path (full_path ) + '/'
2016
- blobs = self .bucket .list_blobs (prefix = dir_path , delimiter = '/' )
2017
- size = 0
2018
- for blob in blobs :
2019
- size += blob .size
2020
- return size
2021
-
2022
- def clear (self ):
2023
- self .rmdir ()
2024
-
2025
- def __getitem__ (self , key ):
2026
- blob_name = self .full_path (key )
2027
- blob = self .bucket .get_blob (blob_name )
2028
- if blob :
2029
- return blob .download_as_string ()
2030
- else :
2031
- raise KeyError ('Blob %s not found' % blob_name )
2032
-
2033
- def __setitem__ (self , key , value ):
2034
- blob_name = self .full_path (key )
2035
- blob = self .bucket .blob (blob_name )
2036
- blob .upload_from_string (value )
2037
-
2038
- def __delitem__ (self , key ):
2039
- blob_name = self .full_path (key )
2040
- try :
2041
- self .bucket .delete_blob (blob_name )
2042
- except self .exceptions .NotFound as er :
2043
- raise KeyError (er .message )
2044
-
2045
- def __contains__ (self , key ):
2046
- blob_name = self .full_path (key )
2047
- return self .bucket .get_blob (blob_name ) is not None
2048
-
2049
- def __eq__ (self , other ):
2050
- return (
2051
- isinstance (other , GCSStore ) and
2052
- self .bucket_name == other .bucket_name and
2053
- self .prefix == other .prefix
2054
- )
2055
-
2056
- def __iter__ (self ):
2057
- blobs = self .bucket .list_blobs (prefix = self .prefix )
2058
- for blob in blobs :
2059
- yield _strip_prefix_from_path (blob .name , self .prefix )
2060
-
2061
- def __len__ (self ):
2062
- iterator = self .bucket .list_blobs (prefix = self .prefix )
2063
- return len (list (iterator ))
2064
-
2065
1913
class ABSStore (MutableMapping ):
2066
1914
"""Storage class using Azure Blob Storage (ABS)
2067
1915
0 commit comments