From 6c160794afded5e8f4179399f1fe5248e32bf707 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Fri, 16 Jan 2026 01:17:59 +0530 Subject: [PATCH 01/23] fix(experimental): implement requests_done method to signal end of requests in async streams. Gracefully close streams. (#1700) fix(experimental): implement requests_done method to signal end of requests in async streams. Gracefully close streams. --- .../asyncio/async_read_object_stream.py | 7 +++++ .../asyncio/async_write_object_stream.py | 8 ++++++ .../asyncio/test_async_read_object_stream.py | 26 +++++++++++++++++++ .../asyncio/test_async_write_object_stream.py | 23 ++++++++++++++++ 4 files changed, 64 insertions(+) diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 7adcdd1c9..5e84485d5 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -151,9 +151,16 @@ async def close(self) -> None: """Closes the bidi-gRPC connection.""" if not self._is_stream_open: raise ValueError("Stream is not open") + await self.requests_done() await self.socket_like_rpc.close() self._is_stream_open = False + async def requests_done(self): + """Signals that all requests have been sent.""" + + await self.socket_like_rpc.send(None) + await self.socket_like_rpc.recv() + async def send( self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest ) -> None: diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index 183a8eeb1..a0ebaa498 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -152,9 +152,16 @@ async def close(self) -> None: """Closes the bidi-gRPC connection.""" if not self._is_stream_open: raise ValueError("Stream is not open") + await self.requests_done() await self.socket_like_rpc.close() self._is_stream_open = False + async def requests_done(self): + """Signals that all requests have been sent.""" + + await self.socket_like_rpc.send(None) + await self.socket_like_rpc.recv() + async def send( self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest ) -> None: @@ -186,3 +193,4 @@ async def recv(self) -> _storage_v2.BidiWriteObjectResponse: @property def is_stream_open(self) -> bool: return self._is_stream_open + diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 4ba8d34a1..18e0c464d 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -197,15 +197,41 @@ async def test_close(mock_client, mock_cls_async_bidi_rpc): read_obj_stream = await instantiate_read_obj_stream( mock_client, mock_cls_async_bidi_rpc, open=True ) + read_obj_stream.requests_done = AsyncMock() # act await read_obj_stream.close() # assert + read_obj_stream.requests_done.assert_called_once() read_obj_stream.socket_like_rpc.close.assert_called_once() assert not read_obj_stream.is_stream_open +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" +) +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" +) +@pytest.mark.asyncio +async def test_requests_done(mock_client, mock_cls_async_bidi_rpc): + """Test that requests_done signals the end of requests.""" + # Arrange + read_obj_stream = await instantiate_read_obj_stream( + mock_client, mock_cls_async_bidi_rpc, open=True + ) + read_obj_stream.socket_like_rpc.send = AsyncMock() + read_obj_stream.socket_like_rpc.recv = AsyncMock() + + # Act + await read_obj_stream.requests_done() + + # Assert + read_obj_stream.socket_like_rpc.send.assert_called_once_with(None) + read_obj_stream.socket_like_rpc.recv.assert_called_once() + + @mock.patch( "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" ) diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index c6ea8a8ff..63b5495bd 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -289,11 +289,13 @@ async def test_close(mock_cls_async_bidi_rpc, mock_client): write_obj_stream = await instantiate_write_obj_stream( mock_client, mock_cls_async_bidi_rpc, open=True ) + write_obj_stream.requests_done = AsyncMock() # Act await write_obj_stream.close() # Assert + write_obj_stream.requests_done.assert_called_once() write_obj_stream.socket_like_rpc.close.assert_called_once() assert not write_obj_stream.is_stream_open @@ -394,3 +396,24 @@ async def test_recv_without_open_should_raise_error( # Act & Assert with pytest.raises(ValueError, match="Stream is not open"): await write_obj_stream.recv() + + +@pytest.mark.asyncio +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" +) +async def test_requests_done(mock_cls_async_bidi_rpc, mock_client): + """Test that requests_done signals the end of requests.""" + # Arrange + write_obj_stream = await instantiate_write_obj_stream( + mock_client, mock_cls_async_bidi_rpc, open=True + ) + write_obj_stream.socket_like_rpc.send = AsyncMock() + write_obj_stream.socket_like_rpc.recv = AsyncMock() + + # Act + await write_obj_stream.requests_done() + + # Assert + write_obj_stream.socket_like_rpc.send.assert_called_once_with(None) + write_obj_stream.socket_like_rpc.recv.assert_called_once() \ No newline at end of file From ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Fri, 16 Jan 2026 23:24:52 +0530 Subject: [PATCH 02/23] feat(ZonalBuckets): add support for `generation=0` to avoid overwriting existing objects and add `is_stream_open` support (#1709) feat(ZonalBuckets): add support for `generation=0` to prevent overwriting existing objects feat(ZonalBuckets): add `is_stream_open` property to AsyncAppendableObjectWriter for stream status check --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../asyncio/async_appendable_object_writer.py | 34 ++++++--- .../asyncio/async_write_object_stream.py | 19 +++-- tests/system/test_zonal.py | 72 +++++++++++++++++++ .../test_async_appendable_object_writer.py | 12 ++-- .../asyncio/test_async_write_object_stream.py | 36 ++++++++++ 5 files changed, 155 insertions(+), 18 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index c808cb52a..444e8d030 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -97,15 +97,29 @@ def __init__( :param object_name: The name of the GCS Appendable Object to be written. :type generation: int - :param generation: (Optional) If present, selects a specific revision of - that object. - If None, a new object is created. - If None and Object already exists then it'll will be - overwritten. + :param generation: (Optional) If present, creates writer for that + specific revision of that object. Use this to append data to an + existing Appendable Object. + + Setting to ``0`` makes the `writer.open()` succeed only if + object doesn't exist in the bucket (useful for not accidentally + overwriting existing objects). + + Warning: If `None`, a new object is created. If an object with the + same name already exists, it will be overwritten the moment + `writer.open()` is called. :type write_handle: bytes - :param write_handle: (Optional) An existing handle for writing the object. - If provided, opening the bidi-gRPC connection will be faster. + :param write_handle: (Optional) An handle for writing the object. + If provided, opening the bidi-gRPC connection will be faster. + + :type writer_options: dict + :param writer_options: (Optional) A dictionary of writer options. + Supported options: + - "FLUSH_INTERVAL_BYTES": int + The number of bytes to append before "persisting" data in GCS + servers. Default is `_DEFAULT_FLUSH_INTERVAL_BYTES`. + Must be a multiple of `_MAX_CHUNK_SIZE_BYTES`. """ raise_if_no_fast_crc32c() self.client = client @@ -133,7 +147,6 @@ def __init__( self.flush_interval = writer_options.get( "FLUSH_INTERVAL_BYTES", _DEFAULT_FLUSH_INTERVAL_BYTES ) - # TODO: add test case for this. if self.flush_interval < _MAX_CHUNK_SIZE_BYTES: raise exceptions.OutOfRange( f"flush_interval must be >= {_MAX_CHUNK_SIZE_BYTES} , but provided {self.flush_interval}" @@ -346,6 +359,11 @@ async def finalize(self) -> _storage_v2.Object: self.offset = None return self.object_resource + @property + def is_stream_open(self) -> bool: + return self._is_stream_open + + # helper methods. async def append_from_string(self, data: str): """ diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index a0ebaa498..1454803ad 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -47,8 +47,17 @@ class _AsyncWriteObjectStream(_AsyncAbstractObjectStream): :param object_name: The name of the GCS ``Appendable Object`` to be write. :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. If None, a new object is created. + :param generation_number: (Optional) If present, creates writer for that + specific revision of that object. Use this to append data to an + existing Appendable Object. + + Setting to ``0`` makes the `writer.open()` succeed only if + object doesn't exist in the bucket (useful for not accidentally + overwriting existing objects). + + Warning: If `None`, a new object is created. If an object with the + same name already exists, it will be overwritten the moment + `writer.open()` is called. :type write_handle: bytes :param write_handle: (Optional) An existing handle for writing the object. @@ -101,13 +110,16 @@ async def open(self) -> None: # Create a new object or overwrite existing one if generation_number # is None. This makes it consistent with GCS JSON API behavior. # Created object type would be Appendable Object. - if self.generation_number is None: + # if `generation_number` == 0 new object will be created only if there + # isn't any existing object. + if self.generation_number is None or self.generation_number == 0: self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( write_object_spec=_storage_v2.WriteObjectSpec( resource=_storage_v2.Object( name=self.object_name, bucket=self._full_bucket_name ), appendable=True, + if_generation_match=self.generation_number, ), ) else: @@ -118,7 +130,6 @@ async def open(self) -> None: generation=self.generation_number, ), ) - self.socket_like_rpc = AsyncBidiRpc( self.rpc, initial_request=self.first_bidi_write_req, metadata=self.metadata ) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 2ade654ad..b5942365a 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -18,6 +18,7 @@ from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) +from google.api_core.exceptions import FailedPrecondition pytestmark = pytest.mark.skipif( @@ -360,3 +361,74 @@ async def test_append_flushes_and_state_lookup(storage_client, blobs_to_delete): await mrd.close() content = buffer.getvalue() assert content == full_data + +@pytest.mark.asyncio +async def test_open_with_generation_zero(storage_client, blobs_to_delete): + """Tests that using `generation=0` fails if the object already exists. + + This test verifies that: + 1. An object can be created using `AsyncAppendableObjectWriter` with `generation=0`. + 2. Attempting to create the same object again with `generation=0` raises a + `FailedPrecondition` error with a 400 status code, because the + precondition (object must not exist) is not met. + """ + object_name = f"test_append_with_generation-{uuid.uuid4()}" + grpc_client = AsyncGrpcClient().grpc_client + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name, generation=0) + + # Empty object is created. + await writer.open() + assert writer.is_stream_open + + await writer.close() + assert not writer.is_stream_open + + + with pytest.raises(FailedPrecondition) as exc_info: + writer = AsyncAppendableObjectWriter( + grpc_client, _ZONAL_BUCKET, object_name, generation=0 + ) + await writer.open() + assert exc_info.value.code == 400 + + # cleanup + del writer + gc.collect() + + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + +@pytest.mark.asyncio +async def test_open_existing_object_with_gen_None_overrides_existing(storage_client, blobs_to_delete): + """ + Test that a new writer when specifies `None` overrides the existing object. + """ + object_name = f"test_append_with_generation-{uuid.uuid4()}" + + grpc_client = AsyncGrpcClient().grpc_client + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name, generation=0) + + # Empty object is created. + await writer.open() + assert writer.is_stream_open + old_gen = writer.generation + + + await writer.close() + assert not writer.is_stream_open + + + + new_writer = AsyncAppendableObjectWriter( + grpc_client, _ZONAL_BUCKET, object_name, generation=None + ) + await new_writer.open() + assert new_writer.generation != old_gen + + # assert exc_info.value.code == 400 + + # cleanup + del writer + del new_writer + gc.collect() + + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) \ No newline at end of file diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py index 07f7047d8..c19622238 100644 --- a/tests/unit/asyncio/test_async_appendable_object_writer.py +++ b/tests/unit/asyncio/test_async_appendable_object_writer.py @@ -55,7 +55,7 @@ def test_init(mock_write_object_stream, mock_client): assert writer.object_name == OBJECT assert writer.generation is None assert writer.write_handle is None - assert not writer._is_stream_open + assert not writer.is_stream_open assert writer.offset is None assert writer.persisted_size is None assert writer.bytes_appended_since_last_flush == 0 @@ -225,7 +225,7 @@ async def test_open_appendable_object_writer(mock_write_object_stream, mock_clie # Assert mock_stream.open.assert_awaited_once() - assert writer._is_stream_open + assert writer.is_stream_open assert writer.generation == GENERATION assert writer.write_handle == WRITE_HANDLE assert writer.persisted_size == 0 @@ -255,7 +255,7 @@ async def test_open_appendable_object_writer_existing_object( # Assert mock_stream.open.assert_awaited_once() - assert writer._is_stream_open + assert writer.is_stream_open assert writer.generation == GENERATION assert writer.write_handle == WRITE_HANDLE assert writer.persisted_size == PERSISTED_SIZE @@ -379,7 +379,7 @@ async def test_close(mock_write_object_stream, mock_client): mock_stream.close.assert_awaited_once() assert writer.offset is None assert persisted_size == 1024 - assert not writer._is_stream_open + assert not writer.is_stream_open @pytest.mark.asyncio @@ -415,7 +415,7 @@ async def test_finalize_on_close(mock_write_object_stream, mock_client): # Assert mock_stream.close.assert_awaited_once() - assert not writer._is_stream_open + assert not writer.is_stream_open assert writer.offset is None assert writer.object_resource == mock_resource assert writer.persisted_size == 2048 @@ -448,7 +448,7 @@ async def test_finalize(mock_write_object_stream, mock_client): assert writer.object_resource == mock_resource assert writer.persisted_size == 123 assert gcs_object == mock_resource - assert writer._is_stream_open is False + assert not writer.is_stream_open assert writer.offset is None diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index 63b5495bd..f0d90543f 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -148,6 +148,42 @@ async def test_open_for_new_object(mock_async_bidi_rpc, mock_client): assert stream.persisted_size == 0 +@pytest.mark.asyncio +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" +) +async def test_open_for_new_object_with_generation_zero(mock_async_bidi_rpc, mock_client): + """Test opening a stream for a new object.""" + # Arrange + socket_like_rpc = mock.AsyncMock() + mock_async_bidi_rpc.return_value = socket_like_rpc + socket_like_rpc.open = mock.AsyncMock() + + mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) + mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) + mock_response.resource.generation = GENERATION + mock_response.resource.size = 0 + mock_response.write_handle = WRITE_HANDLE + socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) + + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT, generation_number=0) + + # Act + await stream.open() + + # Assert + mock_async_bidi_rpc.assert_called_once() + _, call_kwargs = mock_async_bidi_rpc.call_args + initial_request = call_kwargs["initial_request"] + assert initial_request.write_object_spec.if_generation_match == 0 + assert stream._is_stream_open + socket_like_rpc.open.assert_called_once() + socket_like_rpc.recv.assert_called_once() + assert stream.generation_number == GENERATION + assert stream.write_handle == WRITE_HANDLE + assert stream.persisted_size == 0 + + @pytest.mark.asyncio @mock.patch( "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" From 2e1a1eb5cbe1c909f1f892a0cc74fe63c8ef36ff Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Tue, 20 Jan 2026 18:20:24 +0530 Subject: [PATCH 03/23] feat(samples): add samples for appendable objects writes and reads (#1705) feat(samples): add samples for appendable objects writes and reads --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- cloudbuild/run_zonal_tests.sh | 1 + .../asyncio/async_multi_range_downloader.py | 10 +- .../asyncio/async_write_object_stream.py | 12 +- ...rage_create_and_write_appendable_object.py | 77 ++++++ ...orage_finalize_appendable_object_upload.py | 78 ++++++ ...orage_open_multiple_objects_ranged_read.py | 85 ++++++ ...torage_open_object_multiple_ranged_read.py | 85 ++++++ .../storage_open_object_read_full_object.py | 72 +++++ .../storage_open_object_single_ranged_read.py | 77 ++++++ ...rage_pause_and_resume_appendable_upload.py | 94 +++++++ .../storage_read_appendable_object_tail.py | 141 ++++++++++ .../zonal_buckets/zonal_snippets_test.py | 260 ++++++++++++++++++ setup.py | 18 ++ .../test_async_appendable_object_writer.py | 10 +- 14 files changed, 1010 insertions(+), 10 deletions(-) create mode 100644 samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py create mode 100644 samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py create mode 100644 samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py create mode 100644 samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py create mode 100644 samples/snippets/zonal_buckets/storage_open_object_read_full_object.py create mode 100644 samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py create mode 100644 samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py create mode 100644 samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py create mode 100644 samples/snippets/zonal_buckets/zonal_snippets_test.py diff --git a/cloudbuild/run_zonal_tests.sh b/cloudbuild/run_zonal_tests.sh index 283ed6826..22ca8fe4b 100644 --- a/cloudbuild/run_zonal_tests.sh +++ b/cloudbuild/run_zonal_tests.sh @@ -26,3 +26,4 @@ export RUN_ZONAL_SYSTEM_TESTS=True CURRENT_ULIMIT=$(ulimit -n) echo '--- Running Zonal tests on VM with ulimit set to ---' $CURRENT_ULIMIT pytest -vv -s --log-format='%(asctime)s %(levelname)s %(message)s' --log-date-format='%H:%M:%S' tests/system/test_zonal.py +pytest -vv -s --log-format='%(asctime)s %(levelname)s %(message)s' --log-date-format='%H:%M:%S' samples/snippets/zonal_buckets/zonal_snippets_test.py diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 8f16294d8..dedff67e2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -303,11 +303,19 @@ async def download_ranges( :type read_ranges: List[Tuple[int, int, "BytesIO"]] :param read_ranges: A list of tuples, where each tuple represents a - combintaion of byte_range and writeable buffer in format - + combination of byte_range and writeable buffer in format - (`start_byte`, `bytes_to_read`, `writeable_buffer`). Buffer has to be provided by the user, and user has to make sure appropriate memory is available in the application to avoid out-of-memory crash. + Special cases: + if the value of `bytes_to_read` is 0, it'll be interpreted as + download all contents until the end of the file from `start_byte`. + Examples: + * (0, 0, buffer) : downloads 0 to end , i.e. entire object. + * (100, 0, buffer) : downloads from 100 to end. + + :type lock: asyncio.Lock :param lock: (Optional) An asyncio lock to synchronize sends and recvs on the underlying bidi-GRPC stream. This is required when multiple diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index 1454803ad..b79e707f2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -101,8 +101,16 @@ def __init__( self.object_resource: Optional[_storage_v2.Object] = None async def open(self) -> None: - """Opening an object for write , should do it's state lookup - to know what's the persisted size is. + """ + Opens the bidi-gRPC connection to write to the object. + + This method sends an initial request to start the stream and receives + the first response containing metadata and a write handle. + + :rtype: None + :raises ValueError: If the stream is already open. + :raises google.api_core.exceptions.FailedPrecondition: + if `generation_number` is 0 and object already exists. """ if self._is_stream_open: raise ValueError("Stream is already open") diff --git a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py new file mode 100644 index 000000000..87da6cfdd --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio + +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient + + +# [START storage_create_and_write_appendable_object] + + +async def storage_create_and_write_appendable_object( + bucket_name, object_name, grpc_client=None +): + """Uploads an appendable object to zonal bucket. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + writer = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + generation=0, # throws `FailedPrecondition` if object already exists. + ) + # This creates a new appendable object of size 0 and opens it for appending. + await writer.open() + + # appends data to the object + # you can perform `.append` multiple times as needed. Data will be appended + # to the end of the object. + await writer.append(b"Some data") + + # Once all appends are done, close the gRPC bidirectional stream. + await writer.close() + + print( + f"Appended object {object_name} created of size {writer.persisted_size} bytes." + ) + + +# [END storage_create_and_write_appendable_object] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument("--object_name", help="Your Cloud Storage object name.") + + args = parser.parse_args() + + asyncio.run( + storage_create_and_write_appendable_object( + bucket_name=args.bucket_name, + object_name=args.object_name, + ) + ) diff --git a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py new file mode 100644 index 000000000..4a4b64289 --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio + +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient + + +# [START storage_finalize_appendable_object_upload] +async def storage_finalize_appendable_object_upload( + bucket_name, object_name, grpc_client=None +): + """Creates, writes to, and finalizes an appendable object. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + writer = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + generation=0, # throws `FailedPrecondition` if object already exists. + ) + # This creates a new appendable object of size 0 and opens it for appending. + await writer.open() + + # Appends data to the object. + await writer.append(b"Some data") + + # finalize the appendable object, + # NOTE: + # 1. once finalized no more appends can be done to the object. + # 2. If you don't want to finalize, you can simply call `writer.close` + # 3. calling `.finalize()` also closes the grpc-bidi stream, calling + # `.close` after `.finalize` may lead to undefined behavior. + object_resource = await writer.finalize() + + print(f"Appendable object {object_name} created and finalized.") + print("Object Metadata:") + print(object_resource) + + +# [END storage_finalize_appendable_object_upload] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument("--object_name", help="Your Cloud Storage object name.") + + args = parser.parse_args() + + asyncio.run( + storage_finalize_appendable_object_upload( + bucket_name=args.bucket_name, + object_name=args.object_name, + ) + ) diff --git a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py new file mode 100644 index 000000000..c22023a5c --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Downloads a range of bytes from multiple objects concurrently.""" +import argparse +import asyncio +from io import BytesIO + +from google.cloud.storage._experimental.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) + + +# [START storage_open_multiple_objects_ranged_read] +async def storage_open_multiple_objects_ranged_read( + bucket_name, object_names, grpc_client=None +): + """Downloads a range of bytes from multiple objects concurrently. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + + async def _download_range(object_name): + """Helper coroutine to download a range from a single object.""" + mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) + try: + # Open the object, mrd always opens in read mode. + await mrd.open() + + # Each object downloads the first 100 bytes. + start_byte = 0 + size = 100 + + # requested range will be downloaded into this buffer, user may provide + # their own buffer or file-like object. + output_buffer = BytesIO() + await mrd.download_ranges([(start_byte, size, output_buffer)]) + finally: + if mrd.is_stream_open: + await mrd.close() + + # Downloaded size can differ from requested size if object is smaller. + # mrd will download at most up to the end of the object. + downloaded_size = output_buffer.getbuffer().nbytes + print(f"Downloaded {downloaded_size} bytes from {object_name}") + + download_tasks = [_download_range(name) for name in object_names] + await asyncio.gather(*download_tasks) + + +# [END storage_open_multiple_objects_ranged_read] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument( + "--object_names", nargs="+", help="Your Cloud Storage object name(s)." + ) + + args = parser.parse_args() + + asyncio.run( + storage_open_multiple_objects_ranged_read(args.bucket_name, args.object_names) + ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py new file mode 100644 index 000000000..55d92d9da --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio +from io import BytesIO + +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) + + +# [START storage_open_object_multiple_ranged_read] +async def storage_open_object_multiple_ranged_read( + bucket_name, object_name, grpc_client=None +): + """Downloads multiple ranges of bytes from a single object into different buffers. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + + mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) + + try: + # Open the object, mrd always opens in read mode. + await mrd.open() + + # Specify four different buffers to download ranges into. + buffers = [BytesIO(), BytesIO(), BytesIO(), BytesIO()] + + # Define the ranges to download. Each range is a tuple of (start_byte, size, buffer). + # All ranges will download 10 bytes from different starting positions. + # We choose arbitrary start bytes for this example. An object should be large enough. + # A user can choose any start byte between 0 and `object_size`. + # If `start_bytes` is greater than `object_size`, mrd will throw an error. + ranges = [ + (0, 10, buffers[0]), + (20, 10, buffers[1]), + (40, 10, buffers[2]), + (60, 10, buffers[3]), + ] + + await mrd.download_ranges(ranges) + + finally: + await mrd.close() + + # Print the downloaded content from each buffer. + for i, output_buffer in enumerate(buffers): + downloaded_size = output_buffer.getbuffer().nbytes + print( + f"Downloaded {downloaded_size} bytes into buffer {i + 1} from start byte {ranges[i][0]}: {output_buffer.getvalue()}" + ) + + +# [END storage_open_object_multiple_ranged_read] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument("--object_name", help="Your Cloud Storage object name.") + + args = parser.parse_args() + + asyncio.run( + storage_open_object_multiple_ranged_read(args.bucket_name, args.object_name) + ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py new file mode 100644 index 000000000..044c42b71 --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio +from io import BytesIO + +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) + + +# [START storage_open_object_read_full_object] +async def storage_open_object_read_full_object( + bucket_name, object_name, grpc_client=None +): + """Downloads the entire content of an object using a multi-range downloader. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + + # mrd = Multi-Range-Downloader + mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) + + try: + # Open the object, mrd always opens in read mode. + await mrd.open() + + # This could be any buffer or file-like object. + output_buffer = BytesIO() + # A download range of (0, 0) means to read from the beginning to the end. + await mrd.download_ranges([(0, 0, output_buffer)]) + finally: + if mrd.is_stream_open: + await mrd.close() + + downloaded_bytes = output_buffer.getvalue() + print( + f"Downloaded all {len(downloaded_bytes)} bytes from object {object_name} in bucket {bucket_name}." + ) + + +# [END storage_open_object_read_full_object] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument("--object_name", help="Your Cloud Storage object name.") + + args = parser.parse_args() + + asyncio.run( + storage_open_object_read_full_object(args.bucket_name, args.object_name) + ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py new file mode 100644 index 000000000..41effce36 --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio +from io import BytesIO + +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) + + +# [START storage_open_object_single_ranged_read] +async def storage_open_object_single_ranged_read( + bucket_name, object_name, start_byte, size, grpc_client=None +): + """Downloads a range of bytes from an object. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + + mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) + + try: + # Open the object, mrd always opens in read mode. + await mrd.open() + + # requested range will be downloaded into this buffer, user may provide + # their own buffer or file-like object. + output_buffer = BytesIO() + await mrd.download_ranges([(start_byte, size, output_buffer)]) + finally: + if mrd.is_stream_open: + await mrd.close() + + # Downloaded size can differ from requested size if object is smaller. + # mrd will download at most up to the end of the object. + downloaded_size = output_buffer.getbuffer().nbytes + print(f"Downloaded {downloaded_size} bytes from {object_name}") + + +# [END storage_open_object_single_ranged_read] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument("--object_name", help="Your Cloud Storage object name.") + parser.add_argument( + "--start_byte", type=int, help="The starting byte of the range." + ) + parser.add_argument("--size", type=int, help="The number of bytes to download.") + + args = parser.parse_args() + + asyncio.run( + storage_open_object_single_ranged_read( + args.bucket_name, args.object_name, args.start_byte, args.size + ) + ) diff --git a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py new file mode 100644 index 000000000..db54bb821 --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio + +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient + + +# [START storage_pause_and_resume_appendable_upload] +async def storage_pause_and_resume_appendable_upload( + bucket_name, object_name, grpc_client=None +): + """Demonstrates pausing and resuming an appendable object upload. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + + writer1 = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + ) + await writer1.open() + await writer1.append(b"First part of the data. ") + print(f"Appended {writer1.persisted_size} bytes with the first writer.") + + # 2. After appending some data, close the writer to "pause" the upload. + # NOTE: you can pause indefinitely and still read the conetent uploaded so far using MRD. + await writer1.close() + + print("First writer closed. Upload is 'paused'.") + + # 3. Create a new writer, passing the generation number from the previous + # writer. This is a precondition to ensure that the object hasn't been + # modified since we last accessed it. + generation_to_resume = writer1.generation + print(f"Generation to resume from is: {generation_to_resume}") + + writer2 = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + generation=generation_to_resume, + ) + # 4. Open the new writer. + try: + await writer2.open() + + # 5. Append some more data using the new writer. + await writer2.append(b"Second part of the data.") + print(f"Appended more data. Total size is now {writer2.persisted_size} bytes.") + finally: + # 6. Finally, close the new writer. + if writer2._is_stream_open: + await writer2.close() + print("Second writer closed. Full object uploaded.") + + +# [END storage_pause_and_resume_appendable_upload] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument("--object_name", help="Your Cloud Storage object name.") + + args = parser.parse_args() + + asyncio.run( + storage_pause_and_resume_appendable_upload( + bucket_name=args.bucket_name, + object_name=args.object_name, + ) + ) diff --git a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py new file mode 100644 index 000000000..587513056 --- /dev/null +++ b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Copyright 2026 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio +import time +from datetime import datetime +from io import BytesIO + +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) + +BYTES_TO_APPEND = b"fav_bytes." +NUM_BYTES_TO_APPEND_EVERY_SECOND = len(BYTES_TO_APPEND) + + +# [START storage_read_appendable_object_tail] +async def appender(writer: AsyncAppendableObjectWriter, duration: int): + """Appends 10 bytes to the object every second for a given duration.""" + print("Appender started.") + bytes_appended = 0 + for i in range(duration): + await writer.append(BYTES_TO_APPEND) + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + bytes_appended += NUM_BYTES_TO_APPEND_EVERY_SECOND + print( + f"[{now}] Appended {NUM_BYTES_TO_APPEND_EVERY_SECOND} new bytes. Total appended: {bytes_appended} bytes." + ) + await asyncio.sleep(1) + print("Appender finished.") + + +async def tailer( + bucket_name: str, object_name: str, duration: int, client: AsyncGrpcClient +): + """Tails the object by reading new data as it is appended.""" + print("Tailer started.") + start_byte = 0 + start_time = time.monotonic() + mrd = AsyncMultiRangeDownloader(client, bucket_name, object_name) + try: + await mrd.open() + # Run the tailer for the specified duration. + while time.monotonic() - start_time < duration: + output_buffer = BytesIO() + # A download range of (start, 0) means to read from 'start' to the end. + await mrd.download_ranges([(start_byte, 0, output_buffer)]) + + bytes_downloaded = output_buffer.getbuffer().nbytes + if bytes_downloaded > 0: + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + print( + f"[{now}] Tailer read {bytes_downloaded} new bytes: {output_buffer.getvalue()}" + ) + start_byte += bytes_downloaded + + await asyncio.sleep(0.1) # Poll for new data every 0.1 seconds. + finally: + if mrd.is_stream_open: + await mrd.close() + print("Tailer finished.") + + +# read_appendable_object_tail simulates a "tail -f" command on a GCS object. It +# repeatedly polls an appendable object for new content. In a real +# application, the object would be written to by a separate process. +async def read_appendable_object_tail( + bucket_name: str, object_name: str, duration: int, grpc_client=None +): + """Main function to create an appendable object and run tasks. + + grpc_client: an existing grpc_client to use, this is only for testing. + """ + if grpc_client is None: + grpc_client = AsyncGrpcClient().grpc_client + writer = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=bucket_name, + object_name=object_name, + ) + # 1. Create an empty appendable object. + try: + # 1. Create an empty appendable object. + await writer.open() + print(f"Created empty appendable object: {object_name}") + + # 2. Create the appender and tailer coroutines. + appender_task = asyncio.create_task(appender(writer, duration)) + tailer_task = asyncio.create_task( + tailer(bucket_name, object_name, duration, grpc_client) + ) + + # 3. Execute the coroutines concurrently. + await asyncio.gather(appender_task, tailer_task) + finally: + if writer._is_stream_open: + await writer.close() + print("Writer closed.") + + +# [END storage_read_appendable_object_tail] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Demonstrates tailing an appendable GCS object.", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--bucket_name", help="Your Cloud Storage bucket name.") + parser.add_argument( + "--object_name", help="Your Cloud Storage object name to be created." + ) + parser.add_argument( + "--duration", + type=int, + default=60, + help="Duration in seconds to run the demo.", + ) + + args = parser.parse_args() + + asyncio.run( + read_appendable_object_tail(args.bucket_name, args.object_name, args.duration) + ) diff --git a/samples/snippets/zonal_buckets/zonal_snippets_test.py b/samples/snippets/zonal_buckets/zonal_snippets_test.py new file mode 100644 index 000000000..e45d21c47 --- /dev/null +++ b/samples/snippets/zonal_buckets/zonal_snippets_test.py @@ -0,0 +1,260 @@ +# Copyright 2025 Google, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import uuid +import os + +import pytest +from google.cloud.storage import Client +import contextlib + +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) + +# Import all the snippets +import storage_create_and_write_appendable_object +import storage_finalize_appendable_object_upload +import storage_open_multiple_objects_ranged_read +import storage_open_object_multiple_ranged_read +import storage_open_object_read_full_object +import storage_open_object_single_ranged_read +import storage_pause_and_resume_appendable_upload +import storage_read_appendable_object_tail + +pytestmark = pytest.mark.skipif( + os.getenv("RUN_ZONAL_SYSTEM_TESTS") != "True", + reason="Zonal system tests need to be explicitly enabled. This helps scheduling tests in Kokoro and Cloud Build.", +) + + +# TODO: replace this with a fixture once zonal bucket creation / deletion +# is supported in grpc client or json client client. +_ZONAL_BUCKET = os.getenv("ZONAL_BUCKET") + + +async def create_async_grpc_client(): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient().grpc_client + + +# Forcing a single event loop for the whole test session +@pytest.fixture(scope="session") +def event_loop(): + """Redefine pytest-asyncio's event_loop fixture to be session-scoped.""" + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + + +@pytest.fixture(scope="session") +def async_grpc_client(event_loop): + """Yields a StorageAsyncClient that is closed after the test session.""" + grpc_client = event_loop.run_until_complete(create_async_grpc_client()) + yield grpc_client + + +@pytest.fixture(scope="session") +def json_client(): + client = Client() + with contextlib.closing(client): + yield client + + +async def create_appendable_object(grpc_client, object_name, data): + writer = AsyncAppendableObjectWriter( + client=grpc_client, + bucket_name=_ZONAL_BUCKET, + object_name=object_name, + generation=0, # throws `FailedPrecondition` if object already exists. + ) + await writer.open() + await writer.append(data) + await writer.close() + return writer.generation + + +# TODO: replace this with a fixture once zonal bucket creation / deletion +# is supported in grpc client or json client client. +_ZONAL_BUCKET = os.getenv("ZONAL_BUCKET") + + +def test_storage_create_and_write_appendable_object( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"zonal-snippets-test-{uuid.uuid4()}" + + event_loop.run_until_complete( + storage_create_and_write_appendable_object.storage_create_and_write_appendable_object( + _ZONAL_BUCKET, object_name, grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert f"Appended object {object_name} created of size" in out + + blob = json_client.bucket(_ZONAL_BUCKET).blob(object_name) + blob.delete() + + +def test_storage_finalize_appendable_object_upload( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"test-finalize-appendable-{uuid.uuid4()}" + event_loop.run_until_complete( + storage_finalize_appendable_object_upload.storage_finalize_appendable_object_upload( + _ZONAL_BUCKET, object_name, grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert f"Appendable object {object_name} created and finalized." in out + blob = json_client.bucket(_ZONAL_BUCKET).get_blob(object_name) + blob.delete() + + +def test_storage_pause_and_resume_appendable_upload( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"test-pause-resume-{uuid.uuid4()}" + event_loop.run_until_complete( + storage_pause_and_resume_appendable_upload.storage_pause_and_resume_appendable_upload( + _ZONAL_BUCKET, object_name, grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert "First writer closed. Upload is 'paused'." in out + assert "Second writer closed. Full object uploaded." in out + + blob = json_client.bucket(_ZONAL_BUCKET).get_blob(object_name) + blob.delete() + + +def test_storage_read_appendable_object_tail( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"test-read-tail-{uuid.uuid4()}" + event_loop.run_until_complete( + storage_read_appendable_object_tail.read_appendable_object_tail( + _ZONAL_BUCKET, object_name, duration=3, grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert f"Created empty appendable object: {object_name}" in out + assert "Appender started." in out + assert "Tailer started." in out + assert "Tailer read" in out + assert "Tailer finished." in out + assert "Writer closed." in out + + bucket = json_client.bucket(_ZONAL_BUCKET) + blob = bucket.blob(object_name) + blob.delete() + + +def test_storage_open_object_read_full_object( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"test-open-read-full-{uuid.uuid4()}" + data = b"Hello, is it me you're looking for?" + event_loop.run_until_complete( + create_appendable_object(async_grpc_client, object_name, data) + ) + event_loop.run_until_complete( + storage_open_object_read_full_object.storage_open_object_read_full_object( + _ZONAL_BUCKET, object_name, grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert ( + f"Downloaded all {len(data)} bytes from object {object_name} in bucket {_ZONAL_BUCKET}." + in out + ) + blob = json_client.bucket(_ZONAL_BUCKET).blob(object_name) + blob.delete() + + +def test_storage_open_object_single_ranged_read( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"test-open-single-range-{uuid.uuid4()}" + event_loop.run_until_complete( + create_appendable_object( + async_grpc_client, object_name, b"Hello, is it me you're looking for?" + ) + ) + download_size = 5 + event_loop.run_until_complete( + storage_open_object_single_ranged_read.storage_open_object_single_ranged_read( + _ZONAL_BUCKET, + object_name, + start_byte=0, + size=download_size, + grpc_client=async_grpc_client, + ) + ) + out, _ = capsys.readouterr() + assert f"Downloaded {download_size} bytes from {object_name}" in out + blob = json_client.bucket(_ZONAL_BUCKET).blob(object_name) + blob.delete() + + +def test_storage_open_object_multiple_ranged_read( + async_grpc_client, json_client, event_loop, capsys +): + object_name = f"test-open-multi-range-{uuid.uuid4()}" + data = b"a" * 100 + event_loop.run_until_complete( + create_appendable_object(async_grpc_client, object_name, data) + ) + event_loop.run_until_complete( + storage_open_object_multiple_ranged_read.storage_open_object_multiple_ranged_read( + _ZONAL_BUCKET, object_name, grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert "Downloaded 10 bytes into buffer 1 from start byte 0: b'aaaaaaaaaa'" in out + assert "Downloaded 10 bytes into buffer 2 from start byte 20: b'aaaaaaaaaa'" in out + assert "Downloaded 10 bytes into buffer 3 from start byte 40: b'aaaaaaaaaa'" in out + assert "Downloaded 10 bytes into buffer 4 from start byte 60: b'aaaaaaaaaa'" in out + blob = json_client.bucket(_ZONAL_BUCKET).blob(object_name) + blob.delete() + + +def test_storage_open_multiple_objects_ranged_read( + async_grpc_client, json_client, event_loop, capsys +): + blob1_name = f"multi-obj-1-{uuid.uuid4()}" + blob2_name = f"multi-obj-2-{uuid.uuid4()}" + data1 = b"Content of object 1" + data2 = b"Content of object 2" + event_loop.run_until_complete( + create_appendable_object(async_grpc_client, blob1_name, data1) + ) + event_loop.run_until_complete( + create_appendable_object(async_grpc_client, blob2_name, data2) + ) + + event_loop.run_until_complete( + storage_open_multiple_objects_ranged_read.storage_open_multiple_objects_ranged_read( + _ZONAL_BUCKET, [blob1_name, blob2_name], grpc_client=async_grpc_client + ) + ) + out, _ = capsys.readouterr() + assert f"Downloaded {len(data1)} bytes from {blob1_name}" in out + assert f"Downloaded {len(data2)} bytes from {blob2_name}" in out + blob1 = json_client.bucket(_ZONAL_BUCKET).blob(blob1_name) + blob2 = json_client.bucket(_ZONAL_BUCKET).blob(blob2_name) + blob1.delete() + blob2.delete() diff --git a/setup.py b/setup.py index b45053856..8ac6cc8b5 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,24 @@ "tracing": [ "opentelemetry-api >= 1.1.0, < 2.0.0", ], + "testing": [ + "mock", + "pytest", + "pytest-cov", + "pytest-asyncio", + "pytest-rerunfailures", + "pytest-xdist", + "google-cloud-testutils", + "google-cloud-iam", + "google-cloud-pubsub", + "google-cloud-kms", + "brotli", + "coverage", + "pyopenssl", + "opentelemetry-sdk", + "flake8" + "black" + ], } diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py index c19622238..2a9747f28 100644 --- a/tests/unit/asyncio/test_async_appendable_object_writer.py +++ b/tests/unit/asyncio/test_async_appendable_object_writer.py @@ -509,9 +509,7 @@ async def test_append_sends_data_in_chunks(mock_write_object_stream, mock_client data = b"a" * (_MAX_CHUNK_SIZE_BYTES + 1) mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse( - persisted_size=100 + len(data) - ) + return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=100 + len(data)) ) await writer.append(data) @@ -620,9 +618,7 @@ async def test_append_data_two_times(mock_write_object_stream, mock_client): data1 = b"a" * (_MAX_CHUNK_SIZE_BYTES + 10) mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse( - persisted_size= len(data1) - ) + return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=len(data1)) ) await writer.append(data1) @@ -634,7 +630,7 @@ async def test_append_data_two_times(mock_write_object_stream, mock_client): data2 = b"b" * (_MAX_CHUNK_SIZE_BYTES + 20) mock_stream.recv = mock.AsyncMock( return_value=_storage_v2.BidiWriteObjectResponse( - persisted_size= len(data2) + len(data1) + persisted_size=len(data2) + len(data1) ) ) await writer.append(data2) From cdb2486bb051dcbfbffc2510aff6aacede5e54d3 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 21 Jan 2026 12:01:06 +0530 Subject: [PATCH 04/23] feat(grpc_user_agent): send `user_agent` to grpc channel (#1712) * feat(async_grpc_client): send `user_agent` to grpc channel by providing it as channel arg - "grpc.primary_user_agent". * This will send "user_agent" to all RPC calls. --- .../asyncio/async_grpc_client.py | 15 ++++- tests/unit/asyncio/test_async_grpc_client.py | 67 ++++++++++++++++--- 2 files changed, 68 insertions(+), 14 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py index a5cccca59..b455b1c29 100644 --- a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py +++ b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py @@ -15,6 +15,9 @@ """An async client for interacting with Google Cloud Storage using the gRPC API.""" from google.cloud import _storage_v2 as storage_v2 +from google.cloud._storage_v2.services.storage.transports.base import ( + DEFAULT_CLIENT_INFO, +) class AsyncGrpcClient: @@ -30,7 +33,7 @@ class AsyncGrpcClient: The client info used to send a user-agent string along with API requests. If ``None``, then default info will be used. - :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` + :type client_options: :class:`~google.api_core.client_options.ClientOptions` :param client_options: (Optional) Client options used to set user options on the client. @@ -39,7 +42,6 @@ class AsyncGrpcClient: (Optional) Whether to attempt to use DirectPath for gRPC connections. Defaults to ``True``. """ - def __init__( self, credentials=None, @@ -65,8 +67,15 @@ def _create_async_grpc_client( transport_cls = storage_v2.StorageAsyncClient.get_transport_class( "grpc_asyncio" ) + + if client_info is None: + client_info = DEFAULT_CLIENT_INFO + primary_user_agent = client_info.to_user_agent() + channel = transport_cls.create_channel( - attempt_direct_path=attempt_direct_path, credentials=credentials + attempt_direct_path=attempt_direct_path, + credentials=credentials, + options=(("grpc.primary_user_agent", primary_user_agent),), ) transport = transport_cls(channel=channel) diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index eb06ab938..02b416f6b 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -16,6 +16,11 @@ from unittest import mock from google.auth import credentials as auth_credentials from google.auth.credentials import AnonymousCredentials +from google.api_core import client_info as client_info_lib +from google.cloud.storage._experimental.asyncio import async_grpc_client +from google.cloud.storage._experimental.asyncio.async_grpc_client import ( + DEFAULT_CLIENT_INFO, +) def _make_credentials(spec=None): @@ -27,19 +32,25 @@ def _make_credentials(spec=None): class TestAsyncGrpcClient(unittest.TestCase): @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_constructor_default_options(self, mock_async_storage_client): - from google.cloud.storage._experimental.asyncio import async_grpc_client - + # Arrange mock_transport_cls = mock.MagicMock() mock_async_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials() + primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() + expected_options = (("grpc.primary_user_agent", primary_user_agent),) + + # Act async_grpc_client.AsyncGrpcClient(credentials=mock_creds) + # Assert mock_async_storage_client.get_transport_class.assert_called_once_with( "grpc_asyncio" ) mock_transport_cls.create_channel.assert_called_once_with( - attempt_direct_path=True, credentials=mock_creds + attempt_direct_path=True, + credentials=mock_creds, + options=expected_options, ) mock_channel = mock_transport_cls.create_channel.return_value mock_transport_cls.assert_called_once_with(channel=mock_channel) @@ -47,12 +58,34 @@ def test_constructor_default_options(self, mock_async_storage_client): mock_async_storage_client.assert_called_once_with( transport=mock_transport, client_options=None, - client_info=None, + client_info=DEFAULT_CLIENT_INFO, + ) + + @mock.patch("google.cloud._storage_v2.StorageAsyncClient") + def test_constructor_with_client_info(self, mock_async_storage_client): + + mock_transport_cls = mock.MagicMock() + mock_async_storage_client.get_transport_class.return_value = mock_transport_cls + mock_creds = _make_credentials() + client_info = client_info_lib.ClientInfo( + client_library_version="1.2.3", + ) + + async_grpc_client.AsyncGrpcClient( + credentials=mock_creds, client_info=client_info + ) + + primary_user_agent = client_info.to_user_agent() + expected_options = (("grpc.primary_user_agent", primary_user_agent),) + + mock_transport_cls.create_channel.assert_called_once_with( + attempt_direct_path=True, + credentials=mock_creds, + options=expected_options, ) @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_constructor_disables_directpath(self, mock_async_storage_client): - from google.cloud.storage._experimental.asyncio import async_grpc_client mock_transport_cls = mock.MagicMock() mock_async_storage_client.get_transport_class.return_value = mock_transport_cls @@ -62,15 +95,19 @@ def test_constructor_disables_directpath(self, mock_async_storage_client): credentials=mock_creds, attempt_direct_path=False ) + primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() + expected_options = (("grpc.primary_user_agent", primary_user_agent),) + mock_transport_cls.create_channel.assert_called_once_with( - attempt_direct_path=False, credentials=mock_creds + attempt_direct_path=False, + credentials=mock_creds, + options=expected_options, ) mock_channel = mock_transport_cls.create_channel.return_value mock_transport_cls.assert_called_once_with(channel=mock_channel) @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_grpc_client_property(self, mock_grpc_gapic_client): - from google.cloud.storage._experimental.asyncio import async_grpc_client # Arrange mock_transport_cls = mock.MagicMock() @@ -81,7 +118,8 @@ def test_grpc_client_property(self, mock_grpc_gapic_client): mock_transport_cls.return_value = mock.sentinel.transport mock_creds = _make_credentials() - mock_client_info = mock.sentinel.client_info + mock_client_info = mock.MagicMock(spec=client_info_lib.ClientInfo) + mock_client_info.to_user_agent.return_value = "test-user-agent" mock_client_options = mock.sentinel.client_options mock_attempt_direct_path = mock.sentinel.attempt_direct_path @@ -102,8 +140,11 @@ def test_grpc_client_property(self, mock_grpc_gapic_client): retrieved_client = client.grpc_client # Assert + expected_options = (("grpc.primary_user_agent", "test-user-agent"),) mock_transport_cls.create_channel.assert_called_once_with( - attempt_direct_path=mock_attempt_direct_path, credentials=mock_creds + attempt_direct_path=mock_attempt_direct_path, + credentials=mock_creds, + options=expected_options, ) mock_transport_cls.assere_with(channel=channel_sentinel) mock_grpc_gapic_client.assert_called_once_with( @@ -115,7 +156,6 @@ def test_grpc_client_property(self, mock_grpc_gapic_client): @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): - from google.cloud.storage._experimental.asyncio import async_grpc_client # Arrange mock_transport_cls = mock.MagicMock() @@ -133,7 +173,12 @@ def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): # Assert self.assertIs(retrieved_client, mock_grpc_gapic_client.return_value) + primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() + expected_options = (("grpc.primary_user_agent", primary_user_agent),) + mock_transport_cls.create_channel.assert_called_once_with( - attempt_direct_path=True, credentials=anonymous_creds + attempt_direct_path=True, + credentials=anonymous_creds, + options=expected_options, ) mock_transport_cls.assert_called_once_with(channel=channel_sentinel) From cc4831d7e253b265b0b96e08b5479f4c759be442 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 21 Jan 2026 16:25:26 +0530 Subject: [PATCH 05/23] fix: Fix formatting in setup.py dependencies list (#1713) Fix formatting in setup.py dependencies list --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 8ac6cc8b5..8a10fc8de 100644 --- a/setup.py +++ b/setup.py @@ -74,8 +74,8 @@ "coverage", "pyopenssl", "opentelemetry-sdk", - "flake8" - "black" + "flake8", + "black", ], } From 1917649fac41481da1adea6c2a9f4ab1298a34c4 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 21 Jan 2026 16:51:18 +0530 Subject: [PATCH 06/23] feat: Add micro-benchmarks for reads comparing standard (regional) vs rapid (zonal) buckets. (#1697) * Add performance microbenchmarking suite for Sequential and Random Reads * This compares Regional (Standard Storage) with Zonal (Rapid Storage) buckets. * Regional uses JSON wheres Zonal uses gRPC bidi --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .gitignore | 3 + setup.py | 6 + tests/perf/__init__.py | 0 tests/perf/microbenchmarks/README.md | 38 ++ tests/perf/microbenchmarks/__init__.py | 13 + tests/perf/microbenchmarks/_utils.py | 163 +++++++ tests/perf/microbenchmarks/conftest.py | 144 ++++++ tests/perf/microbenchmarks/json_to_csv.py | 190 ++++++++ tests/perf/microbenchmarks/parameters.py | 28 ++ tests/perf/microbenchmarks/reads/__init__.py | 0 tests/perf/microbenchmarks/reads/config.py | 108 +++++ tests/perf/microbenchmarks/reads/config.yaml | 49 +++ .../perf/microbenchmarks/reads/parameters.py | 20 + .../perf/microbenchmarks/reads/test_reads.py | 415 ++++++++++++++++++ .../perf/microbenchmarks/resource_monitor.py | 99 +++++ 15 files changed, 1276 insertions(+) create mode 100644 tests/perf/__init__.py create mode 100644 tests/perf/microbenchmarks/README.md create mode 100644 tests/perf/microbenchmarks/__init__.py create mode 100644 tests/perf/microbenchmarks/_utils.py create mode 100644 tests/perf/microbenchmarks/conftest.py create mode 100644 tests/perf/microbenchmarks/json_to_csv.py create mode 100644 tests/perf/microbenchmarks/parameters.py create mode 100644 tests/perf/microbenchmarks/reads/__init__.py create mode 100644 tests/perf/microbenchmarks/reads/config.py create mode 100644 tests/perf/microbenchmarks/reads/config.yaml create mode 100644 tests/perf/microbenchmarks/reads/parameters.py create mode 100644 tests/perf/microbenchmarks/reads/test_reads.py create mode 100644 tests/perf/microbenchmarks/resource_monitor.py diff --git a/.gitignore b/.gitignore index d083ea1dd..220056c20 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,6 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc pylintrc.test + +# Benchmarking results and logs +__benchmark_results__/** diff --git a/setup.py b/setup.py index 8a10fc8de..02cd11140 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,12 @@ "opentelemetry-api >= 1.1.0, < 2.0.0", ], "testing": [ + "google-cloud-testutils", + "numpy", + "psutil", + "py-cpuinfo", + "pytest-benchmark", + "PyYAML", "mock", "pytest", "pytest-cov", diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/perf/microbenchmarks/README.md b/tests/perf/microbenchmarks/README.md new file mode 100644 index 000000000..0219a5bd9 --- /dev/null +++ b/tests/perf/microbenchmarks/README.md @@ -0,0 +1,38 @@ +# Performance Microbenchmarks + +This directory contains performance microbenchmarks for the Python Storage client library. + +## Usage + +To run the benchmarks, use `pytest` with the `--benchmark-json` flag to specify an output file for the results. + +Example: +```bash +pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py +``` + +### Running a Specific Test + +To run a single test, append `::` followed by the test name to the file path. + +Example: +```bash +pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py::test_downloads_single_proc_single_coro +``` + +## Configuration + +The benchmarks are configured using `config.yaml` files located in the respective subdirectories (e.g., `reads/config.yaml`). + +## Overriding Buckets + +You can override the buckets used in the benchmarks by setting environment variables. Please refer to the specific benchmark implementation for the environment variable names. + +## Output + +The benchmarks produce a JSON file with the results. This file can be converted to a CSV file for easier analysis in spreadsheets using the provided `json_to_csv.py` script. + +Example: +```bash +python3 tests/perf/microbenchmarks/json_to_csv.py output.json +``` diff --git a/tests/perf/microbenchmarks/__init__.py b/tests/perf/microbenchmarks/__init__.py new file mode 100644 index 000000000..58d482ea3 --- /dev/null +++ b/tests/perf/microbenchmarks/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/perf/microbenchmarks/_utils.py b/tests/perf/microbenchmarks/_utils.py new file mode 100644 index 000000000..b2589f2ac --- /dev/null +++ b/tests/perf/microbenchmarks/_utils.py @@ -0,0 +1,163 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List +import statistics +import io +import os + + +def publish_benchmark_extra_info( + benchmark: Any, + params: Any, + benchmark_group: str = "read", + true_times: List[float] = [], +) -> None: + """ + Helper function to publish benchmark parameters to the extra_info property. + """ + + benchmark.extra_info["num_files"] = params.num_files + benchmark.extra_info["file_size"] = params.file_size_bytes + benchmark.extra_info["chunk_size"] = params.chunk_size_bytes + if benchmark_group == "write": + benchmark.extra_info["pattern"] = "seq" + else: + benchmark.extra_info["pattern"] = params.pattern + benchmark.extra_info["coros"] = params.num_coros + benchmark.extra_info["rounds"] = params.rounds + benchmark.extra_info["bucket_name"] = params.bucket_name + benchmark.extra_info["bucket_type"] = params.bucket_type + benchmark.extra_info["processes"] = params.num_processes + benchmark.group = benchmark_group + + object_size = params.file_size_bytes + num_files = params.num_files + total_uploaded_mib = (object_size / (1024 * 1024) * num_files) + min_throughput = total_uploaded_mib / benchmark.stats["max"] + max_throughput = total_uploaded_mib / benchmark.stats["min"] + mean_throughput = total_uploaded_mib / benchmark.stats["mean"] + median_throughput = total_uploaded_mib / benchmark.stats["median"] + + benchmark.extra_info["throughput_MiB_s_min"] = min_throughput + benchmark.extra_info["throughput_MiB_s_max"] = max_throughput + benchmark.extra_info["throughput_MiB_s_mean"] = mean_throughput + benchmark.extra_info["throughput_MiB_s_median"] = median_throughput + + print("\nThroughput Statistics (MiB/s):") + print(f" Min: {min_throughput:.2f} (from max time)") + print(f" Max: {max_throughput:.2f} (from min time)") + print(f" Mean: {mean_throughput:.2f} (approx, from mean time)") + print(f" Median: {median_throughput:.2f} (approx, from median time)") + + if true_times: + throughputs = [total_uploaded_mib / t for t in true_times] + true_min_throughput = min(throughputs) + true_max_throughput = max(throughputs) + true_mean_throughput = statistics.mean(throughputs) + true_median_throughput = statistics.median(throughputs) + + benchmark.extra_info["true_throughput_MiB_s_min"] = true_min_throughput + benchmark.extra_info["true_throughput_MiB_s_max"] = true_max_throughput + benchmark.extra_info["true_throughput_MiB_s_mean"] = true_mean_throughput + benchmark.extra_info["true_throughput_MiB_s_median"] = true_median_throughput + + print("\nThroughput Statistics from true_times (MiB/s):") + print(f" Min: {true_min_throughput:.2f}") + print(f" Max: {true_max_throughput:.2f}") + print(f" Mean: {true_mean_throughput:.2f}") + print(f" Median: {true_median_throughput:.2f}") + + # Get benchmark name, rounds, and iterations + name = benchmark.name + rounds = benchmark.stats['rounds'] + iterations = benchmark.stats['iterations'] + + # Header for throughput table + header = "\n\n" + "-" * 125 + "\n" + header += "Throughput Benchmark (MiB/s)\n" + header += "-" * 125 + "\n" + header += f"{'Name':<50} {'Min':>10} {'Max':>10} {'Mean':>10} {'StdDev':>10} {'Median':>10} {'Rounds':>8} {'Iterations':>12}\n" + header += "-" * 125 + + # Data row for throughput table + # The table headers (Min, Max) refer to the throughput values. + row = f"{name:<50} {min_throughput:>10.4f} {max_throughput:>10.4f} {mean_throughput:>10.4f} {'N/A':>10} {median_throughput:>10.4f} {rounds:>8} {iterations:>12}" + + print(header) + print(row) + print("-" * 125) + +class RandomBytesIO(io.RawIOBase): + """ + A file-like object that generates random bytes using os.urandom. + It enforces a fixed size and an upper safety cap. + """ + # 10 GiB default safety cap + DEFAULT_CAP = 10 * 1024 * 1024 * 1024 + + def __init__(self, size, max_size=DEFAULT_CAP): + """ + Args: + size (int): The exact size of the virtual file in bytes. + max_size (int): The maximum allowed size to prevent safety issues. + """ + if size is None: + raise ValueError("Size must be defined (cannot be infinite).") + + if size > max_size: + raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).") + + self._size = size + self._pos = 0 + + def read(self, n=-1): + # 1. Handle "read all" (n=-1) + if n is None or n < 0: + n = self._size - self._pos + + # 2. Handle EOF (End of File) + if self._pos >= self._size: + return b"" + + # 3. Clamp read amount to remaining size + # This ensures we stop exactly at `size` bytes. + n = min(n, self._size - self._pos) + + # 4. Generate data + data = os.urandom(n) + self._pos += len(data) + return data + + def readable(self): + return True + + def seekable(self): + return True + + def tell(self): + return self._pos + + def seek(self, offset, whence=io.SEEK_SET): + if whence == io.SEEK_SET: + new_pos = offset + elif whence == io.SEEK_CUR: + new_pos = self._pos + offset + elif whence == io.SEEK_END: + new_pos = self._size + offset + else: + raise ValueError(f"Invalid whence: {whence}") + + # Clamp position to valid range [0, size] + self._pos = max(0, min(new_pos, self._size)) + return self._pos diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py new file mode 100644 index 000000000..7637d2e2a --- /dev/null +++ b/tests/perf/microbenchmarks/conftest.py @@ -0,0 +1,144 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +from typing import Any +from tests.perf.microbenchmarks.resource_monitor import ResourceMonitor +import pytest +from tests.system._helpers import delete_blob + +import asyncio +import multiprocessing +import os +import uuid +from google.cloud import storage +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient + +_OBJECT_NAME_PREFIX = "micro-benchmark" + + +@pytest.fixture(scope="function") +def blobs_to_delete(): + blobs_to_delete = [] + + yield blobs_to_delete + + for blob in blobs_to_delete: + delete_blob(blob) + + +@pytest.fixture(scope="session") +def storage_client(): + from google.cloud.storage import Client + + client = Client() + with contextlib.closing(client): + yield client + +@pytest.fixture +def monitor(): + """ + Provides the ResourceMonitor class. + Usage: with monitor() as m: ... + """ + return ResourceMonitor + +def publish_resource_metrics(benchmark: Any, monitor: ResourceMonitor) -> None: + """ + Helper function to publish resource monitor results to the extra_info property. + """ + benchmark.extra_info.update( + { + "cpu_max_global": f"{monitor.max_cpu:.2f}", + "mem_max": f"{monitor.max_mem:.2f}", + "net_throughput_mb_s": f"{monitor.throughput_mb_s:.2f}", + "vcpus": monitor.vcpus, + } + ) + + +async def upload_appendable_object(bucket_name, object_name, object_size, chunk_size): + # flush interval set to little over 1GiB to minimize number of flushes. + # this method is to write "appendable" objects which will be used for + # benchmarking reads, hence not concerned performance of writes here. + writer = AsyncAppendableObjectWriter( + AsyncGrpcClient().grpc_client, bucket_name, object_name, writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024 ** 2} + ) + await writer.open() + uploaded_bytes = 0 + while uploaded_bytes < object_size: + bytes_to_upload = min(chunk_size, object_size - uploaded_bytes) + await writer.append(os.urandom(bytes_to_upload)) + uploaded_bytes += bytes_to_upload + object_metdata = await writer.close(finalize_on_close=True) + assert object_metdata.size == uploaded_bytes + return uploaded_bytes + + +def upload_simple_object(bucket_name, object_name, object_size, chunk_size): + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(object_name) + blob.chunk_size = chunk_size + data = os.urandom(object_size) + blob.upload_from_string(data) + return object_size + + +def _upload_worker(args): + bucket_name, object_name, object_size, chunk_size, bucket_type = args + if bucket_type == "zonal": + uploaded_bytes = asyncio.run( + upload_appendable_object(bucket_name, object_name, object_size, chunk_size) + ) + else: + uploaded_bytes = upload_simple_object(bucket_name, object_name, object_size, chunk_size) + return object_name, uploaded_bytes + + +def _create_files(num_files, bucket_name, bucket_type, object_size, chunk_size=1024 * 1024 * 1024): + """ + Create/Upload objects for benchmarking and return a list of their names. + """ + object_names = [ + f"{_OBJECT_NAME_PREFIX}-{uuid.uuid4().hex[:5]}" for _ in range(num_files) + ] + + args_list = [ + (bucket_name, object_names[i], object_size, chunk_size, bucket_type) + for i in range(num_files) + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool() as pool: + results = pool.map(_upload_worker, args_list) + + total_uploaded_bytes = sum(r[1] for r in results) + assert total_uploaded_bytes == object_size * num_files + + return [r[0] for r in results] + + +@pytest.fixture +def workload_params(request): + params = request.param + files_names = _create_files( + params.num_files, + params.bucket_name, + params.bucket_type, + params.file_size_bytes, + ) + return params, files_names \ No newline at end of file diff --git a/tests/perf/microbenchmarks/json_to_csv.py b/tests/perf/microbenchmarks/json_to_csv.py new file mode 100644 index 000000000..1ef58f907 --- /dev/null +++ b/tests/perf/microbenchmarks/json_to_csv.py @@ -0,0 +1,190 @@ +import json +import csv +import argparse +import logging +import numpy as np + +MB = 1024 * 1024 + + +def _process_benchmark_result(bench, headers, extra_info_headers, stats_headers): + """ + Process a single benchmark result and prepare it for CSV reporting. + + This function extracts relevant statistics and metadata from a benchmark + run, calculates derived metrics like percentiles and throughput, and + formats it as a dictionary. + + Args: + bench (dict): The dictionary for a single benchmark from the JSON output. + headers (list): The list of all header names for the CSV. + extra_info_headers (list): Headers from the 'extra_info' section. + stats_headers (list): Headers from the 'stats' section. + + """ + row = {h: "" for h in headers} + row["name"] = bench.get("name", "") + row["group"] = bench.get("group", "") + + extra_info = bench.get("extra_info", {}) + + # Populate extra_info and stats + for key in extra_info_headers: + row[key] = extra_info.get(key) + for key in stats_headers: + row[key] = bench.get("stats", {}).get(key) + + # Handle threads/coros mapping + if "threads" in row: + row["threads"] = extra_info.get("num_coros", extra_info.get("coros")) + + # Calculate percentiles + timings = bench.get("stats", {}).get("data") + if timings: + row["p90"] = np.percentile(timings, 90) + row["p95"] = np.percentile(timings, 95) + row["p99"] = np.percentile(timings, 99) + + # Calculate max throughput + file_size = extra_info.get("file_size_bytes", extra_info.get("file_size", 0)) + num_files = extra_info.get("num_files", 1) + total_bytes = file_size * num_files + + min_time = bench.get("stats", {}).get("min") + if min_time and min_time > 0: + row["max_throughput_mb_s"] = (total_bytes / min_time) / MB + else: + row["max_throughput_mb_s"] = 0.0 + + return row + + +def _generate_report(json_path, csv_path): + """Generate a CSV summary report from the pytest-benchmark JSON output. + + Args: + json_path (str): The path to the JSON file containing benchmark results. + csv_path (str): The path where the CSV report will be saved. + + Returns: + str: The path to the generated CSV report file. + + """ + logging.info(f"Generating CSV report from {json_path}") + + with open(json_path, "r") as f: + data = json.load(f) + + benchmarks = data.get("benchmarks", []) + if not benchmarks: + logging.warning("No benchmarks found in the JSON file.") + return + + # headers order - name group block_size bucket_name bucket_type chunk_size cpu_max_global file_size mem_max net_throughput_mb_s num_files pattern processes rounds threads vcpus min max mean median stddev p90 p95 p99 max_throughput_mb_s + # if there are any other column keep it at the afterwards. + ordered_headers = [ + "name", + "group", + "block_size", + "bucket_name", + "bucket_type", + "chunk_size", + "cpu_max_global", + "file_size", + "mem_max", + "net_throughput_mb_s", + "num_files", + "pattern", + "processes", + "rounds", + "threads", + "vcpus", + "min", + "max", + "mean", + "median", + "stddev", + "p90", + "p95", + "p99", + "max_throughput_mb_s", + ] + + # Gather all available headers from the data + all_available_headers = set(["name", "group"]) + stats_headers = ["min", "max", "mean", "median", "stddev"] + custom_headers = ["p90", "p95", "p99", "max_throughput_mb_s"] + + all_available_headers.update(stats_headers) + all_available_headers.update(custom_headers) + + extra_info_keys = set() + for bench in benchmarks: + if "extra_info" in bench and isinstance(bench["extra_info"], dict): + extra_info_keys.update(bench["extra_info"].keys()) + all_available_headers.update(extra_info_keys) + + # Construct the final header list + final_headers = list(ordered_headers) + + # Add any headers from the data that are not in the ordered list + for header in sorted(list(all_available_headers)): + if header not in final_headers: + final_headers.append(header) + + # We still need the full list of extra_info headers for _process_benchmark_result + extra_info_headers = sorted(list(extra_info_keys)) + + with open(csv_path, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(final_headers) + + for bench in benchmarks: + row = _process_benchmark_result( + bench, final_headers, extra_info_headers, stats_headers + ) + writer.writerow([row.get(h, "") for h in final_headers]) + + logging.info(f"CSV report generated at {csv_path}") + return csv_path + + +def main(): + """ + Converts a JSON benchmark file to a CSV file. + + The CSV file will contain the 'name' of each benchmark and all fields + from the 'extra_info' section. + """ + parser = argparse.ArgumentParser(description="Convert benchmark JSON to CSV.") + parser.add_argument( + "--input_file", + nargs="?", + default="output.json", + help="Path to the input JSON file (default: output.json)", + ) + parser.add_argument( + "--output_file", + nargs="?", + default="output.csv", + help="Path to the output CSV file (default: output.csv)", + ) + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" + ) + + try: + _generate_report(args.input_file, args.output_file) + print(f"Successfully converted {args.input_file} to {args.output_file}") + except FileNotFoundError: + logging.error(f"Error: Input file not found at {args.input_file}") + except json.JSONDecodeError: + logging.error(f"Error: Could not decode JSON from {args.input_file}") + except Exception as e: + logging.error(f"An unexpected error occurred: {e}") + + +if __name__ == "__main__": + main() diff --git a/tests/perf/microbenchmarks/parameters.py b/tests/perf/microbenchmarks/parameters.py new file mode 100644 index 000000000..72b8476b6 --- /dev/null +++ b/tests/perf/microbenchmarks/parameters.py @@ -0,0 +1,28 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass + + +@dataclass +class IOBenchmarkParameters: + name: str + workload_name: str + bucket_name: str + bucket_type: str + num_coros: int + num_processes: int + num_files: int + rounds: int + chunk_size_bytes: int + file_size_bytes: int diff --git a/tests/perf/microbenchmarks/reads/__init__.py b/tests/perf/microbenchmarks/reads/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/perf/microbenchmarks/reads/config.py b/tests/perf/microbenchmarks/reads/config.py new file mode 100644 index 000000000..b9cfb1a92 --- /dev/null +++ b/tests/perf/microbenchmarks/reads/config.py @@ -0,0 +1,108 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import os +from typing import Dict, List + +import yaml + +try: + from tests.perf.microbenchmarks.reads.parameters import ReadParameters +except ModuleNotFoundError: + from reads.parameters import ReadParameters + + +def _get_params() -> Dict[str, List[ReadParameters]]: + """Generates a dictionary of benchmark parameters for read operations. + + This function reads configuration from a `config.yaml` file, which defines + common parameters (like bucket types, file sizes) and different workloads. + It then generates all possible combinations of these parameters for each + workload using `itertools.product`. + + The resulting parameter sets are encapsulated in `ReadParameters` objects + and organized by workload name in the returned dictionary. + + Bucket names can be overridden by setting the `DEFAULT_RAPID_ZONAL_BUCKET` + and `DEFAULT_STANDARD_BUCKET` environment variables. + + Returns: + Dict[str, List[ReadParameters]]: A dictionary where keys are workload + names (e.g., 'read_seq', 'read_rand_multi_coros') and values are lists + of `ReadParameters` objects, each representing a unique benchmark scenario. + """ + params: Dict[str, List[ReadParameters]] = {} + config_path = os.path.join(os.path.dirname(__file__), "config.yaml") + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + common_params = config["common"] + bucket_types = common_params["bucket_types"] + file_sizes_mib = common_params["file_sizes_mib"] + chunk_sizes_mib = common_params["chunk_sizes_mib"] + rounds = common_params["rounds"] + + bucket_map = { + "zonal": os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", config['defaults']['DEFAULT_RAPID_ZONAL_BUCKET']), + "regional": os.environ.get("DEFAULT_STANDARD_BUCKET", config['defaults']['DEFAULT_STANDARD_BUCKET']) + } + + for workload in config["workload"]: + workload_name = workload["name"] + params[workload_name] = [] + pattern = workload["pattern"] + processes = workload["processes"] + coros = workload["coros"] + + # Create a product of all parameter combinations + product = itertools.product( + bucket_types, + file_sizes_mib, + chunk_sizes_mib, + processes, + coros, + ) + + for ( + bucket_type, + file_size_mib, + chunk_size_mib, + num_processes, + num_coros, + ) in product: + file_size_bytes = file_size_mib * 1024 * 1024 + chunk_size_bytes = chunk_size_mib * 1024 * 1024 + bucket_name = bucket_map[bucket_type] + + num_files = num_processes * num_coros + + # Create a descriptive name for the parameter set + name = f"{pattern}_{bucket_type}_{num_processes}p_{num_coros}c" + + params[workload_name].append( + ReadParameters( + name=name, + workload_name=workload_name, + pattern=pattern, + bucket_name=bucket_name, + bucket_type=bucket_type, + num_coros=num_coros, + num_processes=num_processes, + num_files=num_files, + rounds=rounds, + chunk_size_bytes=chunk_size_bytes, + file_size_bytes=file_size_bytes, + ) + ) + return params diff --git a/tests/perf/microbenchmarks/reads/config.yaml b/tests/perf/microbenchmarks/reads/config.yaml new file mode 100644 index 000000000..25bfd92c8 --- /dev/null +++ b/tests/perf/microbenchmarks/reads/config.yaml @@ -0,0 +1,49 @@ +common: + bucket_types: + - "regional" + - "zonal" + file_sizes_mib: + - 1024 # 1GiB + chunk_sizes_mib: [100] + rounds: 10 + +workload: + + ############# single process single coroutine ######### + - name: "read_seq" + pattern: "seq" + coros: [1] + processes: [1] + + - name: "read_rand" + pattern: "rand" + coros: [1] + processes: [1] + + ############# single process multi coroutine ######### + + - name: "read_seq_multi_coros" + pattern: "seq" + coros: [2, 4, 8, 16] + processes: [1] + + - name: "read_rand_multi_coros" + pattern: "rand" + coros: [2, 4, 8, 16] + processes: [1] + + ############# multi process multi coroutine ######### + - name: "read_seq_multi_process" + pattern: "seq" + coros: [1, 2, 4] + processes: [2, 16, 48, 96] + + - name: "read_rand_multi_process" + pattern: "rand" + coros: [1, 2, 4] + processes: [2, 16, 48, 96] + + +defaults: + DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb" + DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb" diff --git a/tests/perf/microbenchmarks/reads/parameters.py b/tests/perf/microbenchmarks/reads/parameters.py new file mode 100644 index 000000000..0785a4147 --- /dev/null +++ b/tests/perf/microbenchmarks/reads/parameters.py @@ -0,0 +1,20 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from ..parameters import IOBenchmarkParameters + + +@dataclass +class ReadParameters(IOBenchmarkParameters): + pattern: str diff --git a/tests/perf/microbenchmarks/reads/test_reads.py b/tests/perf/microbenchmarks/reads/test_reads.py new file mode 100644 index 000000000..81f180d75 --- /dev/null +++ b/tests/perf/microbenchmarks/reads/test_reads.py @@ -0,0 +1,415 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Microbenchmarks for Google Cloud Storage read operations. + +This module contains performance benchmarks for various read patterns from Google Cloud Storage. +It includes three main test functions: +- `test_downloads_single_proc_single_coro`: Benchmarks reads using a single process and a single coroutine. +- `test_downloads_single_proc_multi_coro`: Benchmarks reads using a single process and multiple coroutines. +- `test_downloads_multi_proc_multi_coro`: Benchmarks reads using multiple processes and multiple coroutines. + +All other functions in this module are helper methods for these three tests. +""" + +import time +import asyncio +import random +from io import BytesIO +import logging + +import pytest + +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + AsyncMultiRangeDownloader, +) +from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info +from tests.perf.microbenchmarks.conftest import ( + publish_resource_metrics, +) +import tests.perf.microbenchmarks.reads.config as config +from concurrent.futures import ThreadPoolExecutor +import multiprocessing + +all_params = config._get_params() + + +async def create_client(): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient().grpc_client + + +async def download_chunks_using_mrd_async(client, filename, other_params, chunks): + # start timer. + start_time = time.monotonic_ns() + + total_bytes_downloaded = 0 + mrd = AsyncMultiRangeDownloader(client, other_params.bucket_name, filename) + await mrd.open() + for offset, size in chunks: + buffer = BytesIO() + await mrd.download_ranges([(offset, size, buffer)]) + total_bytes_downloaded += buffer.tell() + await mrd.close() + + assert total_bytes_downloaded == other_params.file_size_bytes + + # end timer. + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + + +def download_chunks_using_mrd(loop, client, filename, other_params, chunks): + return loop.run_until_complete( + download_chunks_using_mrd_async(client, filename, other_params, chunks) + ) + + +def download_chunks_using_json(_, json_client, filename, other_params, chunks): + bucket = json_client.bucket(other_params.bucket_name) + blob = bucket.blob(filename) + start_time = time.monotonic_ns() + for offset, size in chunks: + _ = blob.download_as_bytes(start=offset, end=offset + size - 1) + return (time.monotonic_ns() - start_time) / 1_000_000_000 + + +@pytest.mark.parametrize( + "workload_params", + all_params["read_rand"] + all_params["read_seq"], + indirect=True, + ids=lambda p: p.name, +) +def test_downloads_single_proc_single_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + Benchmarks reads using a single process and a single coroutine. + It creates chunks based on object size and chunk_size, then passes them to either + `download_chunks_using_mrd` (for zonal buckets) or `download_chunks_using_json` (for regional buckets) + for benchmarking using `benchmark.pedantic`. + """ + params, files_names = workload_params + + object_size = params.file_size_bytes + chunk_size = params.chunk_size_bytes + chunks = [] + for offset in range(0, object_size, chunk_size): + size = min(chunk_size, object_size - offset) + chunks.append((offset, size)) + + if params.pattern == "rand": + logging.info("randomizing chunks") + random.shuffle(chunks) + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = download_chunks_using_mrd + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = download_chunks_using_json + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names[0], + params, + chunks, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + + +def download_files_using_mrd_multi_coro(loop, client, files, other_params, chunks): + """ + Downloads multiple files concurrently using AsyncMultiRangeDownloader (MRD) with asyncio. + + For each file, it creates a coroutine to download its chunks using `download_chunks_using_mrd_async`. + All coroutines are then executed concurrently using `asyncio.gather`. + The function returns the maximum latency observed among all coroutines. + + Args: + loop: The asyncio event loop. + client: The AsyncGrpcClient instance. + files (list): A list of filenames to download. + other_params: An object containing benchmark parameters (e.g., bucket_name, file_size_bytes). + chunks (list): A list of (offset, size) tuples representing the parts of each file to download. + + Returns: + float: The maximum latency (in seconds) among all coroutines. + """ + async def main(): + if len(files) == 1: + result = await download_chunks_using_mrd_async( + client, files[0], other_params, chunks + ) + return [result] + else: + tasks = [] + for f in files: + tasks.append( + download_chunks_using_mrd_async(client, f, other_params, chunks) + ) + return await asyncio.gather(*tasks) + + results = loop.run_until_complete(main()) + return max(results) + + +def download_files_using_json_multi_threaded( + _, json_client, files, other_params, chunks +): + """ + Downloads multiple files concurrently using the JSON API with a ThreadPoolExecutor. + + For each file, it submits a task to a `ThreadPoolExecutor` to download its chunks + using `download_chunks_using_json`. The number of concurrent downloads is + determined by `other_params.num_coros` (which acts as `max_workers`). + The function returns the maximum latency among all concurrent downloads. + + The `chunks` parameter is a list of (offset, size) tuples representing + the parts of each file to download. + """ + results = [] + # In the context of multi-coro, num_coros is the number of files to download concurrently. + # So we can use it as max_workers for the thread pool. + with ThreadPoolExecutor(max_workers=other_params.num_coros) as executor: + futures = [] + for f in files: + future = executor.submit( + download_chunks_using_json, None, json_client, f, other_params, chunks + ) + futures.append(future) + + for future in futures: + results.append(future.result()) + + return max(results) + + +@pytest.mark.parametrize( + "workload_params", + all_params["read_seq_multi_coros"] + all_params["read_rand_multi_coros"], + indirect=True, + ids=lambda p: p.name, +) +def test_downloads_single_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + params, files_names = workload_params + + object_size = params.file_size_bytes + chunk_size = params.chunk_size_bytes + chunks = [] + for offset in range(0, object_size, chunk_size): + size = min(chunk_size, object_size - offset) + chunks.append((offset, size)) + + if params.pattern == "rand": + logging.info("randomizing chunks") + random.shuffle(chunks) + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = download_files_using_mrd_multi_coro + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = download_files_using_json_multi_threaded + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names, + params, + chunks, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + + +def _download_files_worker(files_to_download, other_params, chunks, bucket_type): + # For regional buckets, a new client must be created for each process. + # For zonal, the same is done for consistency. + if bucket_type == "zonal": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + try: + # download_files_using_mrd_multi_coro returns max latency of coros + result = download_files_using_mrd_multi_coro( + loop, client, files_to_download, other_params, chunks + ) + finally: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + return result + else: # regional + from google.cloud import storage + + json_client = storage.Client() + # download_files_using_json_multi_threaded returns max latency of threads + return download_files_using_json_multi_threaded( + None, json_client, files_to_download, other_params, chunks + ) + + +def download_files_mp_mc_wrapper(files_names, params, chunks, bucket_type): + num_processes = params.num_processes + num_coros = params.num_coros # This is n, number of files per process + + # Distribute filenames to processes + filenames_per_process = [ + files_names[i : i + num_coros] for i in range(0, len(files_names), num_coros) + ] + + args = [ + ( + filenames, + params, + chunks, + bucket_type, + ) + for filenames in filenames_per_process + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool(processes=num_processes) as pool: + results = pool.starmap(_download_files_worker, args) + + return max(results) + + +@pytest.mark.parametrize( + "workload_params", + all_params["read_seq_multi_process"] + + all_params["read_rand_multi_process"], + indirect=True, + ids=lambda p: p.name, +) +def test_downloads_multi_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + Benchmarks reads using multiple processes and multiple coroutines. + + This test distributes `m*n` files among `m` processes, where each process + downloads `n` files concurrently using `n` coroutines. The processes are spawned + in "spawn" mode. The reported latency for each round is the maximum latency + observed across all processes. + """ + params, files_names = workload_params + logging.info(f"num files: {len(files_names)}") + + object_size = params.file_size_bytes + chunk_size = params.chunk_size_bytes + chunks = [] + for offset in range(0, object_size, chunk_size): + size = min(chunk_size, object_size - offset) + chunks.append((offset, size)) + + if params.pattern == "rand": + logging.info("randomizing chunks") + random.shuffle(chunks) + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = download_files_mp_mc_wrapper(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + files_names, + params, + chunks, + params.bucket_type, + ), + ) + finally: + publish_benchmark_extra_info(benchmark, params, true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) \ No newline at end of file diff --git a/tests/perf/microbenchmarks/resource_monitor.py b/tests/perf/microbenchmarks/resource_monitor.py new file mode 100644 index 000000000..be6ae7025 --- /dev/null +++ b/tests/perf/microbenchmarks/resource_monitor.py @@ -0,0 +1,99 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import threading +import time + +import psutil + + +class ResourceMonitor: + def __init__(self): + self.interval = 1.0 + + self.vcpus = psutil.cpu_count() or 1 + self.max_cpu = 0.0 + self.max_mem = 0.0 + + # Network and Time tracking + self.start_time = 0.0 + self.duration = 0.0 + self.start_net = None + self.net_sent_mb = 0.0 + self.net_recv_mb = 0.0 + + self._stop_event = threading.Event() + self._thread = None + + def __enter__(self): + self.start_net = psutil.net_io_counters() + self.start_time = time.perf_counter() + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + self.duration = time.perf_counter() - self.start_time + end_net = psutil.net_io_counters() + + self.net_sent_mb = (end_net.bytes_sent - self.start_net.bytes_sent) / ( + 1024 * 1024 + ) + self.net_recv_mb = (end_net.bytes_recv - self.start_net.bytes_recv) / ( + 1024 * 1024 + ) + + def _monitor(self): + psutil.cpu_percent(interval=None) + current_process = psutil.Process() + while not self._stop_event.is_set(): + try: + # CPU and Memory tracking for current process tree + total_cpu = current_process.cpu_percent(interval=None) + current_mem = current_process.memory_info().rss + for child in current_process.children(recursive=True): + try: + total_cpu += child.cpu_percent(interval=None) + current_mem += child.memory_info().rss + except (psutil.NoSuchProcess, psutil.AccessDenied): + continue + + # Normalize CPU by number of vcpus + global_cpu = total_cpu / self.vcpus + + mem = current_mem + + if global_cpu > self.max_cpu: + self.max_cpu = global_cpu + if mem > self.max_mem: + self.max_mem = mem + except psutil.NoSuchProcess: + pass + + time.sleep(self.interval) + + def start(self): + self._thread = threading.Thread(target=self._monitor, daemon=True) + self._thread.start() + + def stop(self): + self._stop_event.set() + if self._thread: + self._thread.join() + + @property + def throughput_mb_s(self): + """Calculates combined network throughput.""" + if self.duration <= 0: + return 0.0 + return (self.net_sent_mb + self.net_recv_mb) / self.duration \ No newline at end of file From dbe9d8b89d975dfbed8c830a5687ccfafea51d5f Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 21 Jan 2026 21:04:18 +0530 Subject: [PATCH 07/23] feat: Add micro-benchmarks for writes comparing standard (regional) vs rapid (zonal) buckets. (#1707) * Add performance microbenchmarking suite for Writes * This compares Regional (Standard Storage) with Zonal (Rapid Storage) buckets. * Regional uses JSON wheres Zonal uses gRPC bidi --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tests/perf/microbenchmarks/README.md | 5 +- tests/perf/microbenchmarks/conftest.py | 19 +- tests/perf/microbenchmarks/writes/__init__.py | 0 tests/perf/microbenchmarks/writes/config.py | 100 +++++ tests/perf/microbenchmarks/writes/config.yaml | 34 ++ .../perf/microbenchmarks/writes/parameters.py | 20 + .../microbenchmarks/writes/test_writes.py | 414 ++++++++++++++++++ 7 files changed, 585 insertions(+), 7 deletions(-) create mode 100644 tests/perf/microbenchmarks/writes/__init__.py create mode 100644 tests/perf/microbenchmarks/writes/config.py create mode 100644 tests/perf/microbenchmarks/writes/config.yaml create mode 100644 tests/perf/microbenchmarks/writes/parameters.py create mode 100644 tests/perf/microbenchmarks/writes/test_writes.py diff --git a/tests/perf/microbenchmarks/README.md b/tests/perf/microbenchmarks/README.md index 0219a5bd9..a3e045682 100644 --- a/tests/perf/microbenchmarks/README.md +++ b/tests/perf/microbenchmarks/README.md @@ -15,10 +15,13 @@ pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test To run a single test, append `::` followed by the test name to the file path. -Example: +Examples: ```bash pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py::test_downloads_single_proc_single_coro ``` +```bash +pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/writes/test_writes.py::test_uploads_single_proc_single_coro +``` ## Configuration diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py index 7637d2e2a..f66f9bf6c 100644 --- a/tests/perf/microbenchmarks/conftest.py +++ b/tests/perf/microbenchmarks/conftest.py @@ -26,6 +26,7 @@ AsyncAppendableObjectWriter, ) from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from tests.perf.microbenchmarks.writes.parameters import WriteParameters _OBJECT_NAME_PREFIX = "micro-benchmark" @@ -135,10 +136,16 @@ def _create_files(num_files, bucket_name, bucket_type, object_size, chunk_size=1 @pytest.fixture def workload_params(request): params = request.param - files_names = _create_files( - params.num_files, - params.bucket_name, - params.bucket_type, - params.file_size_bytes, - ) + if isinstance(params, WriteParameters): + files_names = [ + f"{_OBJECT_NAME_PREFIX}-{uuid.uuid4().hex[:5]}" + for _ in range(params.num_files) + ] + else: + files_names = _create_files( + params.num_files, + params.bucket_name, + params.bucket_type, + params.file_size_bytes, + ) return params, files_names \ No newline at end of file diff --git a/tests/perf/microbenchmarks/writes/__init__.py b/tests/perf/microbenchmarks/writes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/perf/microbenchmarks/writes/config.py b/tests/perf/microbenchmarks/writes/config.py new file mode 100644 index 000000000..bc1548511 --- /dev/null +++ b/tests/perf/microbenchmarks/writes/config.py @@ -0,0 +1,100 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +import os +from typing import Dict, List + +import yaml + +try: + from tests.perf.microbenchmarks.writes.parameters import WriteParameters +except ModuleNotFoundError: + from parameters import WriteParameters + + +def get_write_params() -> Dict[str, List[WriteParameters]]: + """Generates benchmark parameters from a YAML configuration file. + + This function reads the configuration from `config.yaml`, located in the + same directory, and generates all possible combinations of write parameters + based on the defined workloads. It uses `itertools.product` to create + a Cartesian product of parameters like bucket types, file sizes, etc. + + Returns: + Dict[str, List[WriteParameters]]: A dictionary where keys are workload + names and values are lists of `WriteParameters` instances for that + workload. + """ + params: Dict[str, List[WriteParameters]] = {} + config_path = os.path.join(os.path.dirname(__file__), "config.yaml") + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + common_params = config["common"] + bucket_types = common_params["bucket_types"] + file_sizes_mib = common_params["file_sizes_mib"] + chunk_sizes_mib = common_params["chunk_sizes_mib"] + rounds = common_params["rounds"] + + bucket_map = { + "zonal": os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", config['defaults']['DEFAULT_RAPID_ZONAL_BUCKET']), + "regional": os.environ.get("DEFAULT_STANDARD_BUCKET", config['defaults']['DEFAULT_STANDARD_BUCKET']) + } + + for workload in config["workload"]: + workload_name = workload["name"] + params[workload_name] = [] + processes = workload["processes"] + coros = workload["coros"] + + # Create a product of all parameter combinations + product = itertools.product( + bucket_types, + file_sizes_mib, + chunk_sizes_mib, + processes, + coros, + ) + + for ( + bucket_type, + file_size_mib, + chunk_size_mib, + num_processes, + num_coros, + ) in product: + file_size_bytes = file_size_mib * 1024 * 1024 + chunk_size_bytes = chunk_size_mib * 1024 * 1024 + bucket_name = bucket_map[bucket_type] + + num_files = num_processes * num_coros + + # Create a descriptive name for the parameter set + name = f"{workload_name}_{bucket_type}_{num_processes}p_{num_coros}c" + + params[workload_name].append( + WriteParameters( + name=name, + workload_name=workload_name, + bucket_name=bucket_name, + bucket_type=bucket_type, + num_coros=num_coros, + num_processes=num_processes, + num_files=num_files, + rounds=rounds, + chunk_size_bytes=chunk_size_bytes, + file_size_bytes=file_size_bytes, + ) + ) + return params diff --git a/tests/perf/microbenchmarks/writes/config.yaml b/tests/perf/microbenchmarks/writes/config.yaml new file mode 100644 index 000000000..b4d93ba52 --- /dev/null +++ b/tests/perf/microbenchmarks/writes/config.yaml @@ -0,0 +1,34 @@ +common: + bucket_types: + - "regional" + - "zonal" + file_sizes_mib: + - 1024 # 1GiB + chunk_sizes_mib: [100] + rounds: 10 + +workload: + + ############# single proc single coroutines ######### + - name: "write_seq" + pattern: "seq" + coros: [1] + processes: [1] + + ############# single proc multiple coroutines ######### + + - name: "write_seq_multi_coros" + pattern: "seq" + coros: [2, 4, 8, 16] + processes: [1] + + ############# multiple proc multiple coroutines ######### + - name: "write_seq_multi_process" + pattern: "seq" + coros: [1, 2] + processes: [8, 16, 32, 64] + + +defaults: + DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb" + DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb" diff --git a/tests/perf/microbenchmarks/writes/parameters.py b/tests/perf/microbenchmarks/writes/parameters.py new file mode 100644 index 000000000..8d44b93dc --- /dev/null +++ b/tests/perf/microbenchmarks/writes/parameters.py @@ -0,0 +1,20 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from ..parameters import IOBenchmarkParameters + + +@dataclass +class WriteParameters(IOBenchmarkParameters): + pass diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py new file mode 100644 index 000000000..2b3c57abd --- /dev/null +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -0,0 +1,414 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Microbenchmarks for Google Cloud Storage write operations. + +This module contains performance benchmarks for various write patterns to Google Cloud Storage. +It includes three main test functions: +- `test_uploads_single_proc_single_coro`: Benchmarks uploads using a single process and a single coroutine. +- `test_uploads_single_proc_multi_coro`: Benchmarks uploads using a single process and multiple coroutines. +- `test_uploads_multi_proc_multi_coro`: Benchmarks uploads using multiple processes and multiple coroutines. + +All other functions in this module are helper methods for these three tests. +""" + +import os +import time +import asyncio +from concurrent.futures import ThreadPoolExecutor +import multiprocessing +import logging + +import pytest +from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter + +from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info, RandomBytesIO +from tests.perf.microbenchmarks.conftest import publish_resource_metrics +import tests.perf.microbenchmarks.writes.config as config +from google.cloud import storage + +# Get write parameters +all_params = config.get_write_params() + +async def create_client(): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient().grpc_client + +async def upload_chunks_using_grpc_async(client, filename, other_params): + """Uploads a file in chunks using the gRPC API asynchronously. + + Args: + client: The async gRPC client. + filename (str): The name of the object to create. + other_params: An object containing benchmark parameters like bucket_name, + file_size_bytes, and chunk_size_bytes. + + Returns: + float: The total time taken for the upload in seconds. + """ + start_time = time.monotonic_ns() + + writer = AsyncAppendableObjectWriter( + client=client, bucket_name=other_params.bucket_name, object_name=filename + ) + await writer.open() + + uploaded_bytes = 0 + upload_size = other_params.file_size_bytes + chunk_size = other_params.chunk_size_bytes + + while uploaded_bytes < upload_size: + bytes_to_upload = min(chunk_size, upload_size - uploaded_bytes) + data = os.urandom(bytes_to_upload) + await writer.append(data) + uploaded_bytes += bytes_to_upload + await writer.close() + + assert uploaded_bytes == upload_size + + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + +def upload_chunks_using_grpc(loop, client, filename, other_params): + """Wrapper to run the async gRPC upload in a synchronous context. + + Args: + loop: The asyncio event loop. + client: The async gRPC client. + filename (str): The name of the object to create. + other_params: An object containing benchmark parameters. + + Returns: + float: The total time taken for the upload in seconds. + """ + return loop.run_until_complete( + upload_chunks_using_grpc_async(client, filename, other_params) + ) + +def upload_using_json(_, json_client, filename, other_params): + """Uploads a file using the JSON API. + + Args: + _ (any): Unused. + json_client: The standard Python Storage client. + filename (str): The name of the object to create. + other_params: An object containing benchmark parameters like bucket_name + and file_size_bytes. + + Returns: + float: The total time taken for the upload in seconds. + """ + start_time = time.monotonic_ns() + + bucket = json_client.bucket(other_params.bucket_name) + blob = bucket.blob(filename) + upload_size = other_params.file_size_bytes + # Don't use BytesIO because it'll report high memory usage for large files. + # `RandomBytesIO` generates random bytes on the fly. + in_mem_file = RandomBytesIO(upload_size) + blob.upload_from_file(in_mem_file) + + end_time = time.monotonic_ns() + elapsed_time = end_time - start_time + return elapsed_time / 1_000_000_000 + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_single_proc_single_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + Benchmarks uploads using a single process and a single coroutine. + It passes the workload to either `upload_chunks_using_grpc` (for zonal buckets) + or `upload_using_json` (for regional buckets) for benchmarking using `benchmark.pedantic`. + """ + params, files_names = workload_params + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = upload_chunks_using_grpc + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = upload_using_json + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names[0], + params, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + +def upload_files_using_grpc_multi_coro(loop, client, files, other_params): + """Uploads multiple files concurrently using gRPC with asyncio. + + Args: + loop: The asyncio event loop. + client: The async gRPC client. + files (list): A list of filenames to upload. + other_params: An object containing benchmark parameters. + + Returns: + float: The maximum latency observed among all coroutines. + """ + async def main(): + tasks = [] + for f in files: + tasks.append( + upload_chunks_using_grpc_async(client, f, other_params) + ) + return await asyncio.gather(*tasks) + + results = loop.run_until_complete(main()) + return max(results) + +def upload_files_using_json_multi_threaded(_, json_client, files, other_params): + """Uploads multiple files concurrently using the JSON API with a ThreadPoolExecutor. + + Args: + _ (any): Unused. + json_client: The standard Python Storage client. + files (list): A list of filenames to upload. + other_params: An object containing benchmark parameters. + + Returns: + float: The maximum latency observed among all concurrent uploads. + """ + results = [] + with ThreadPoolExecutor(max_workers=other_params.num_coros) as executor: + futures = [] + for f in files: + future = executor.submit( + upload_using_json, None, json_client, f, other_params + ) + futures.append(future) + + for future in futures: + results.append(future.result()) + + return max(results) + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq_multi_coros"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_single_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + Benchmarks uploads using a single process and multiple coroutines. + + For zonal buckets, it uses `upload_files_using_grpc_multi_coro` to upload + multiple files concurrently with asyncio. For regional buckets, it uses + `upload_files_using_json_multi_threaded` with a ThreadPoolExecutor. + """ + params, files_names = workload_params + + if params.bucket_type == "zonal": + logging.info("bucket type zonal") + target_func = upload_files_using_grpc_multi_coro + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + else: + logging.info("bucket type regional") + target_func = upload_files_using_json_multi_threaded + loop = None + client = storage_client + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = target_func(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + loop, + client, + files_names, + params, + ), + ) + finally: + if loop is not None: + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) + +def _upload_files_worker(files_to_upload, other_params, bucket_type): + """A worker function for multi-processing uploads. + + Initializes a client and calls the appropriate multi-coroutine upload function. + This function is intended to be called in a separate process. + + Args: + files_to_upload (list): List of filenames for this worker to upload. + other_params: An object containing benchmark parameters. + bucket_type (str): The type of bucket ('zonal' or 'regional'). + + Returns: + float: The maximum latency from the uploads performed by this worker. + """ + if bucket_type == "zonal": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + client = loop.run_until_complete(create_client()) + try: + result = upload_files_using_grpc_multi_coro( + loop, client, files_to_upload, other_params + ) + finally: + # cleanup loop + tasks = asyncio.all_tasks(loop=loop) + for task in tasks: + task.cancel() + loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) + loop.close() + return result + else: # regional + + json_client = storage.Client() + return upload_files_using_json_multi_threaded( + None, json_client, files_to_upload, other_params + ) + +def upload_files_mp_mc_wrapper(files_names, params): + """Wrapper for multi-process, multi-coroutine uploads. + + Distributes files among a pool of processes and calls the worker function. + + Args: + files_names (list): The full list of filenames to upload. + params: An object containing benchmark parameters (num_processes, num_coros). + + Returns: + float: The maximum latency observed across all processes. + """ + num_processes = params.num_processes + num_coros = params.num_coros + + filenames_per_process = [ + files_names[i : i + num_coros] for i in range(0, len(files_names), num_coros) + ] + + args = [ + ( + filenames, + params, + params.bucket_type, + ) + for filenames in filenames_per_process + ] + + ctx = multiprocessing.get_context("spawn") + with ctx.Pool(processes=num_processes) as pool: + results = pool.starmap(_upload_files_worker, args) + + return max(results) + +@pytest.mark.parametrize( + "workload_params", + all_params["write_seq_multi_process"], + indirect=True, + ids=lambda p: p.name, +) +def test_uploads_multi_proc_multi_coro( + benchmark, storage_client, blobs_to_delete, monitor, workload_params +): + """ + Benchmarks uploads using multiple processes and multiple coroutines. + + This test distributes files among a pool of processes using `upload_files_mp_mc_wrapper`. + The reported latency for each round is the maximum latency observed across all processes. + """ + params, files_names = workload_params + + output_times = [] + + def target_wrapper(*args, **kwargs): + result = upload_files_mp_mc_wrapper(*args, **kwargs) + output_times.append(result) + return output_times + + try: + with monitor() as m: + output_times = benchmark.pedantic( + target=target_wrapper, + iterations=1, + rounds=params.rounds, + args=( + files_names, + params, + ), + ) + finally: + publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_resource_metrics(benchmark, m) + + blobs_to_delete.extend( + storage_client.bucket(params.bucket_name).blob(f) for f in files_names + ) \ No newline at end of file From f751f1e8ff4b9b737edb310ca9e21985675cba3f Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Thu, 22 Jan 2026 11:29:02 +0530 Subject: [PATCH 08/23] refactor: move system tests to single event loop (#1714) refactor: move system tests to single event loop. * Without this more adding more tests were "shaky" , some of them crashed due to segmentation fault in underlying gRPC's c layer. --- tests/system/test_zonal.py | 692 +++++++++++++++++++++---------------- 1 file changed, 390 insertions(+), 302 deletions(-) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index b5942365a..b5291ca08 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -2,6 +2,7 @@ import os import uuid from io import BytesIO +import asyncio # python additional imports import google_crc32c @@ -33,12 +34,55 @@ _BYTES_TO_UPLOAD = b"dummy_bytes_to_write_read_and_delete_appendable_object" +async def create_async_grpc_client(attempt_direct_path=True): + """Initializes async client and gets the current event loop.""" + return AsyncGrpcClient(attempt_direct_path=attempt_direct_path).grpc_client + + +@pytest.fixture(scope="session") +def event_loop(): + """Redefine pytest-asyncio's event_loop fixture to be session-scoped.""" + loop = asyncio.new_event_loop() + yield loop + loop.close() + + +@pytest.fixture(scope="session") +def grpc_clients(event_loop): + + # grpc clients has to be instantiated in the event loop, + # otherwise grpc creates it's own event loop and attaches to the client. + # Which will lead to deadlock because client running in one event loop and + # MRD or Appendable-Writer in another. + # https://github.com/grpc/grpc/blob/61fe9b40a986792ab7d4eb8924027b671faf26ba/src/python/grpcio/grpc/aio/_channel.py#L369 + # https://github.com/grpc/grpc/blob/61fe9b40a986792ab7d4eb8924027b671faf26ba/src/python/grpcio/grpc/_cython/_cygrpc/aio/common.pyx.pxi#L249 + clients = { + True: event_loop.run_until_complete( + create_async_grpc_client(attempt_direct_path=True) + ), + False: event_loop.run_until_complete( + create_async_grpc_client(attempt_direct_path=False) + ), + } + return clients + + +# This fixture is for tests that are NOT parametrized by attempt_direct_path +@pytest.fixture +def grpc_client(grpc_clients): + return grpc_clients[False] + + +@pytest.fixture +def grpc_client_direct(grpc_clients): + return grpc_clients[True] + + def _get_equal_dist(a: int, b: int) -> tuple[int, int]: step = (b - a) // 3 return a + step, a + 2 * step -@pytest.mark.asyncio @pytest.mark.parametrize( "object_size", [ @@ -51,47 +95,46 @@ def _get_equal_dist(a: int, b: int) -> tuple[int, int]: "attempt_direct_path", [True, False], ) -async def test_basic_wrd( - storage_client, blobs_to_delete, attempt_direct_path, object_size +def test_basic_wrd( + storage_client, + blobs_to_delete, + attempt_direct_path, + object_size, + event_loop, + grpc_clients, ): object_name = f"test_basic_wrd-{str(uuid.uuid4())}" - # Client instantiation; it cannot be part of fixture because. - # grpc_client's event loop and event loop of coroutine running it - # (i.e. this test) must be same. - # Note: - # 1. @pytest.mark.asyncio ensures new event loop for each test. - # 2. we can keep the same event loop for entire module but that may - # create issues if tests are run in parallel and one test hogs the event - # loop slowing down other tests. - object_data = os.urandom(object_size) - object_checksum = google_crc32c.value(object_data) - grpc_client = AsyncGrpcClient(attempt_direct_path=attempt_direct_path).grpc_client - - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) - await writer.open() - await writer.append(object_data) - object_metadata = await writer.close(finalize_on_close=True) - assert object_metadata.size == object_size - assert int(object_metadata.checksums.crc32c) == object_checksum - - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) - buffer = BytesIO() - await mrd.open() - # (0, 0) means read the whole object - await mrd.download_ranges([(0, 0, buffer)]) - await mrd.close() - assert buffer.getvalue() == object_data - assert mrd.persisted_size == object_size - - # Clean up; use json client (i.e. `storage_client` fixture) to delete. - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) - del writer - del mrd - gc.collect() - - -@pytest.mark.asyncio + async def _run(): + object_data = os.urandom(object_size) + object_checksum = google_crc32c.value(object_data) + grpc_client = grpc_clients[attempt_direct_path] + + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) + await writer.open() + await writer.append(object_data) + object_metadata = await writer.close(finalize_on_close=True) + assert object_metadata.size == object_size + assert int(object_metadata.checksums.crc32c) == object_checksum + + mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) + buffer = BytesIO() + await mrd.open() + # (0, 0) means read the whole object + await mrd.download_ranges([(0, 0, buffer)]) + await mrd.close() + assert buffer.getvalue() == object_data + assert mrd.persisted_size == object_size + + # Clean up; use json client (i.e. `storage_client` fixture) to delete. + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + del mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + @pytest.mark.parametrize( "object_size", [ @@ -100,48 +143,43 @@ async def test_basic_wrd( 20 * 1024 * 1024, # greater than _MAX_BUFFER_SIZE_BYTES ], ) -async def test_basic_wrd_in_slices(storage_client, blobs_to_delete, object_size): +def test_basic_wrd_in_slices( + storage_client, blobs_to_delete, object_size, event_loop, grpc_client +): object_name = f"test_basic_wrd-{str(uuid.uuid4())}" - # Client instantiation; it cannot be part of fixture because. - # grpc_client's event loop and event loop of coroutine running it - # (i.e. this test) must be same. - # Note: - # 1. @pytest.mark.asyncio ensures new event loop for each test. - # 2. we can keep the same event loop for entire module but that may - # create issues if tests are run in parallel and one test hogs the event - # loop slowing down other tests. - object_data = os.urandom(object_size) - object_checksum = google_crc32c.value(object_data) - grpc_client = AsyncGrpcClient().grpc_client - - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) - await writer.open() - mark1, mark2 = _get_equal_dist(0, object_size) - await writer.append(object_data[0:mark1]) - await writer.append(object_data[mark1:mark2]) - await writer.append(object_data[mark2:]) - object_metadata = await writer.close(finalize_on_close=True) - assert object_metadata.size == object_size - assert int(object_metadata.checksums.crc32c) == object_checksum - - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) - buffer = BytesIO() - await mrd.open() - # (0, 0) means read the whole object - await mrd.download_ranges([(0, 0, buffer)]) - await mrd.close() - assert buffer.getvalue() == object_data - assert mrd.persisted_size == object_size - - # Clean up; use json client (i.e. `storage_client` fixture) to delete. - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) - del writer - del mrd - gc.collect() - - -@pytest.mark.asyncio + async def _run(): + object_data = os.urandom(object_size) + object_checksum = google_crc32c.value(object_data) + + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) + await writer.open() + mark1, mark2 = _get_equal_dist(0, object_size) + await writer.append(object_data[0:mark1]) + await writer.append(object_data[mark1:mark2]) + await writer.append(object_data[mark2:]) + object_metadata = await writer.close(finalize_on_close=True) + assert object_metadata.size == object_size + assert int(object_metadata.checksums.crc32c) == object_checksum + + mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) + buffer = BytesIO() + await mrd.open() + # (0, 0) means read the whole object + await mrd.download_ranges([(0, 0, buffer)]) + await mrd.close() + assert buffer.getvalue() == object_data + assert mrd.persisted_size == object_size + + # Clean up; use json client (i.e. `storage_client` fixture) to delete. + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + del mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + @pytest.mark.parametrize( "flush_interval", [ @@ -151,219 +189,264 @@ async def test_basic_wrd_in_slices(storage_client, blobs_to_delete, object_size) _DEFAULT_FLUSH_INTERVAL_BYTES, ], ) -async def test_wrd_with_non_default_flush_interval( +def test_wrd_with_non_default_flush_interval( storage_client, blobs_to_delete, flush_interval, + event_loop, + grpc_client, ): object_name = f"test_basic_wrd-{str(uuid.uuid4())}" object_size = 9 * 1024 * 1024 - # Client instantiation; it cannot be part of fixture because. - # grpc_client's event loop and event loop of coroutine running it - # (i.e. this test) must be same. - # Note: - # 1. @pytest.mark.asyncio ensures new event loop for each test. - # 2. we can keep the same event loop for entire module but that may - # create issues if tests are run in parallel and one test hogs the event - # loop slowing down other tests. - object_data = os.urandom(object_size) - object_checksum = google_crc32c.value(object_data) - grpc_client = AsyncGrpcClient().grpc_client - - writer = AsyncAppendableObjectWriter( - grpc_client, - _ZONAL_BUCKET, - object_name, - writer_options={"FLUSH_INTERVAL_BYTES": flush_interval}, - ) - await writer.open() - mark1, mark2 = _get_equal_dist(0, object_size) - await writer.append(object_data[0:mark1]) - await writer.append(object_data[mark1:mark2]) - await writer.append(object_data[mark2:]) - object_metadata = await writer.close(finalize_on_close=True) - assert object_metadata.size == object_size - assert int(object_metadata.checksums.crc32c) == object_checksum - - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) - buffer = BytesIO() - await mrd.open() - # (0, 0) means read the whole object - await mrd.download_ranges([(0, 0, buffer)]) - await mrd.close() - assert buffer.getvalue() == object_data - assert mrd.persisted_size == object_size - - # Clean up; use json client (i.e. `storage_client` fixture) to delete. - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) - del writer - del mrd - gc.collect() - - -@pytest.mark.asyncio -async def test_read_unfinalized_appendable_object(storage_client, blobs_to_delete): - object_name = f"read_unfinalized_appendable_object-{str(uuid.uuid4())[:4]}" - grpc_client = AsyncGrpcClient(attempt_direct_path=True).grpc_client - - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) - await writer.open() - await writer.append(_BYTES_TO_UPLOAD) - await writer.flush() - - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) - buffer = BytesIO() - await mrd.open() - assert mrd.persisted_size == len(_BYTES_TO_UPLOAD) - # (0, 0) means read the whole object - await mrd.download_ranges([(0, 0, buffer)]) - await mrd.close() - assert buffer.getvalue() == _BYTES_TO_UPLOAD - - # Clean up; use json client (i.e. `storage_client` fixture) to delete. - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) - del writer - del mrd - gc.collect() - - -@pytest.mark.asyncio -async def test_mrd_open_with_read_handle(): - grpc_client = AsyncGrpcClient().grpc_client - object_name = f"test_read_handl-{str(uuid.uuid4())[:4]}" - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) - await writer.open() - await writer.append(_BYTES_TO_UPLOAD) - await writer.close() - - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) - await mrd.open() - read_handle = mrd.read_handle - await mrd.close() - - # Open a new MRD using the `read_handle` obtained above - new_mrd = AsyncMultiRangeDownloader( - grpc_client, _ZONAL_BUCKET, object_name, read_handle=read_handle - ) - await new_mrd.open() - # persisted_size not set when opened with read_handle - assert new_mrd.persisted_size is None - buffer = BytesIO() - await new_mrd.download_ranges([(0, 0, buffer)]) - await new_mrd.close() - assert buffer.getvalue() == _BYTES_TO_UPLOAD - del mrd - del new_mrd - gc.collect() - - -@pytest.mark.asyncio -async def test_read_unfinalized_appendable_object_with_generation( - storage_client, blobs_to_delete + async def _run(): + object_data = os.urandom(object_size) + object_checksum = google_crc32c.value(object_data) + + writer = AsyncAppendableObjectWriter( + grpc_client, + _ZONAL_BUCKET, + object_name, + writer_options={"FLUSH_INTERVAL_BYTES": flush_interval}, + ) + await writer.open() + mark1, mark2 = _get_equal_dist(0, object_size) + await writer.append(object_data[0:mark1]) + await writer.append(object_data[mark1:mark2]) + await writer.append(object_data[mark2:]) + object_metadata = await writer.close(finalize_on_close=True) + assert object_metadata.size == object_size + assert int(object_metadata.checksums.crc32c) == object_checksum + + mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) + buffer = BytesIO() + await mrd.open() + # (0, 0) means read the whole object + await mrd.download_ranges([(0, 0, buffer)]) + await mrd.close() + assert buffer.getvalue() == object_data + assert mrd.persisted_size == object_size + + # Clean up; use json client (i.e. `storage_client` fixture) to delete. + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + del mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + +def test_read_unfinalized_appendable_object( + storage_client, blobs_to_delete, event_loop, grpc_client_direct ): object_name = f"read_unfinalized_appendable_object-{str(uuid.uuid4())[:4]}" - grpc_client = AsyncGrpcClient(attempt_direct_path=True).grpc_client - async def _read_and_verify(expected_content, generation=None): - """Helper to read object content and verify against expected.""" - mrd = AsyncMultiRangeDownloader( - grpc_client, _ZONAL_BUCKET, object_name, generation + async def _run(): + grpc_client = grpc_client_direct + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) + await writer.open() + await writer.append(_BYTES_TO_UPLOAD) + await writer.flush() + + mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) + buffer = BytesIO() + await mrd.open() + assert mrd.persisted_size == len(_BYTES_TO_UPLOAD) + # (0, 0) means read the whole object + await mrd.download_ranges([(0, 0, buffer)]) + await mrd.close() + assert buffer.getvalue() == _BYTES_TO_UPLOAD + + # Clean up; use json client (i.e. `storage_client` fixture) to delete. + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + del mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + +def test_mrd_open_with_read_handle(event_loop, grpc_client_direct): + object_name = f"test_read_handl-{str(uuid.uuid4())[:4]}" + + async def _run(): + writer = AsyncAppendableObjectWriter( + grpc_client_direct, _ZONAL_BUCKET, object_name + ) + await writer.open() + await writer.append(_BYTES_TO_UPLOAD) + await writer.close() + + mrd = AsyncMultiRangeDownloader(grpc_client_direct, _ZONAL_BUCKET, object_name) + await mrd.open() + read_handle = mrd.read_handle + await mrd.close() + + # Open a new MRD using the `read_handle` obtained above + new_mrd = AsyncMultiRangeDownloader( + grpc_client_direct, _ZONAL_BUCKET, object_name, read_handle=read_handle ) + await new_mrd.open() + # persisted_size not set when opened with read_handle + assert new_mrd.persisted_size is None buffer = BytesIO() + await new_mrd.download_ranges([(0, 0, buffer)]) + await new_mrd.close() + assert buffer.getvalue() == _BYTES_TO_UPLOAD + del mrd + del new_mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + +def test_mrd_open_with_read_handle_over_cloud_path(event_loop, grpc_client): + object_name = f"test_read_handl-{str(uuid.uuid4())[:4]}" + + async def _run(): + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) + await writer.open() + await writer.append(_BYTES_TO_UPLOAD) + await writer.close() + + mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) await mrd.open() - try: - assert mrd.persisted_size == len(expected_content) - await mrd.download_ranges([(0, 0, buffer)]) - assert buffer.getvalue() == expected_content - finally: - await mrd.close() - return mrd - - # First write - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) - await writer.open() - await writer.append(_BYTES_TO_UPLOAD) - await writer.flush() - generation = writer.generation - - # First read - mrd = await _read_and_verify(_BYTES_TO_UPLOAD) - - # Second write, using generation from the first write. - writer_2 = AsyncAppendableObjectWriter( - grpc_client, _ZONAL_BUCKET, object_name, generation=generation - ) - await writer_2.open() - await writer_2.append(_BYTES_TO_UPLOAD) - await writer_2.flush() - - # Second read - mrd_2 = await _read_and_verify(_BYTES_TO_UPLOAD + _BYTES_TO_UPLOAD, generation) - - # Clean up - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) - del writer - del writer_2 - del mrd - del mrd_2 - gc.collect() - - -@pytest.mark.asyncio -async def test_append_flushes_and_state_lookup(storage_client, blobs_to_delete): + read_handle = mrd.read_handle + await mrd.close() + + # Open a new MRD using the `read_handle` obtained above + new_mrd = AsyncMultiRangeDownloader( + grpc_client, _ZONAL_BUCKET, object_name, read_handle=read_handle + ) + await new_mrd.open() + # persisted_size is set regardless of whether we use read_handle or not + # because read_handle won't work in CLOUD_PATH. + assert new_mrd.persisted_size == len(_BYTES_TO_UPLOAD) + buffer = BytesIO() + await new_mrd.download_ranges([(0, 0, buffer)]) + await new_mrd.close() + assert buffer.getvalue() == _BYTES_TO_UPLOAD + del mrd + del new_mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + +def test_read_unfinalized_appendable_object_with_generation( + storage_client, blobs_to_delete, event_loop, grpc_client_direct +): + object_name = f"read_unfinalized_appendable_object-{str(uuid.uuid4())[:4]}" + grpc_client = grpc_client_direct + + async def _run(): + async def _read_and_verify(expected_content, generation=None): + """Helper to read object content and verify against expected.""" + mrd = AsyncMultiRangeDownloader( + grpc_client, _ZONAL_BUCKET, object_name, generation + ) + buffer = BytesIO() + await mrd.open() + try: + assert mrd.persisted_size == len(expected_content) + await mrd.download_ranges([(0, 0, buffer)]) + assert buffer.getvalue() == expected_content + finally: + await mrd.close() + return mrd + + # First write + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) + await writer.open() + await writer.append(_BYTES_TO_UPLOAD) + await writer.flush() + generation = writer.generation + + # First read + mrd = await _read_and_verify(_BYTES_TO_UPLOAD) + + # Second write, using generation from the first write. + writer_2 = AsyncAppendableObjectWriter( + grpc_client, _ZONAL_BUCKET, object_name, generation=generation + ) + await writer_2.open() + await writer_2.append(_BYTES_TO_UPLOAD) + await writer_2.flush() + + # Second read + mrd_2 = await _read_and_verify(_BYTES_TO_UPLOAD + _BYTES_TO_UPLOAD, generation) + + # Clean up + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + del writer_2 + del mrd + del mrd_2 + gc.collect() + + event_loop.run_until_complete(_run()) + + +def test_append_flushes_and_state_lookup( + storage_client, blobs_to_delete, event_loop, grpc_client +): """ System test for AsyncAppendableObjectWriter, verifying flushing behavior for both small and large appends. """ object_name = f"test-append-flush-varied-size-{uuid.uuid4()}" - grpc_client = AsyncGrpcClient().grpc_client - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) - # Schedule for cleanup - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + async def _run(): + writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name) + + # Schedule for cleanup + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + + # --- Part 1: Test with small data --- + small_data = b"small data" + + await writer.open() + assert writer._is_stream_open - # --- Part 1: Test with small data --- - small_data = b"small data" + await writer.append(small_data) + persisted_size = await writer.state_lookup() + assert persisted_size == len(small_data) - await writer.open() - assert writer._is_stream_open + # --- Part 2: Test with large data --- + large_data = os.urandom(38 * 1024 * 1024) - await writer.append(small_data) - persisted_size = await writer.state_lookup() - assert persisted_size == len(small_data) + # Append data larger than the default flush interval (16 MiB). + # This should trigger the interval-based flushing logic. + await writer.append(large_data) - # --- Part 2: Test with large data --- - large_data = os.urandom(38 * 1024 * 1024) + # Verify the total data has been persisted. + total_size = len(small_data) + len(large_data) + persisted_size = await writer.state_lookup() + assert persisted_size == total_size - # Append data larger than the default flush interval (16 MiB). - # This should trigger the interval-based flushing logic. - await writer.append(large_data) + # --- Part 3: Finalize and verify --- + final_object = await writer.close(finalize_on_close=True) - # Verify the total data has been persisted. - total_size = len(small_data) + len(large_data) - persisted_size = await writer.state_lookup() - assert persisted_size == total_size + assert not writer._is_stream_open + assert final_object.size == total_size - # --- Part 3: Finalize and verify --- - final_object = await writer.close(finalize_on_close=True) + # Verify the full content of the object. + full_data = small_data + large_data + mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) + buffer = BytesIO() + await mrd.open() + # (0, 0) means read the whole object + await mrd.download_ranges([(0, 0, buffer)]) + await mrd.close() + content = buffer.getvalue() + assert content == full_data - assert not writer._is_stream_open - assert final_object.size == total_size + event_loop.run_until_complete(_run()) - # Verify the full content of the object. - full_data = small_data + large_data - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) - buffer = BytesIO() - await mrd.open() - # (0, 0) means read the whole object - await mrd.download_ranges([(0, 0, buffer)]) - await mrd.close() - content = buffer.getvalue() - assert content == full_data -@pytest.mark.asyncio -async def test_open_with_generation_zero(storage_client, blobs_to_delete): +def test_open_with_generation_zero( + storage_client, blobs_to_delete, event_loop, grpc_client +): """Tests that using `generation=0` fails if the object already exists. This test verifies that: @@ -373,62 +456,67 @@ async def test_open_with_generation_zero(storage_client, blobs_to_delete): precondition (object must not exist) is not met. """ object_name = f"test_append_with_generation-{uuid.uuid4()}" - grpc_client = AsyncGrpcClient().grpc_client - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name, generation=0) - - # Empty object is created. - await writer.open() - assert writer.is_stream_open - await writer.close() - assert not writer.is_stream_open - - - with pytest.raises(FailedPrecondition) as exc_info: + async def _run(): writer = AsyncAppendableObjectWriter( grpc_client, _ZONAL_BUCKET, object_name, generation=0 ) + + # Empty object is created. await writer.open() - assert exc_info.value.code == 400 + assert writer.is_stream_open - # cleanup - del writer - gc.collect() + await writer.close() + assert not writer.is_stream_open - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + with pytest.raises(FailedPrecondition) as exc_info: + writer_fail = AsyncAppendableObjectWriter( + grpc_client, _ZONAL_BUCKET, object_name, generation=0 + ) + await writer_fail.open() + assert exc_info.value.code == 400 -@pytest.mark.asyncio -async def test_open_existing_object_with_gen_None_overrides_existing(storage_client, blobs_to_delete): + # cleanup + del writer + gc.collect() + + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + + event_loop.run_until_complete(_run()) + + +def test_open_existing_object_with_gen_None_overrides_existing( + storage_client, blobs_to_delete, event_loop, grpc_client +): """ Test that a new writer when specifies `None` overrides the existing object. """ object_name = f"test_append_with_generation-{uuid.uuid4()}" - grpc_client = AsyncGrpcClient().grpc_client - writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name, generation=0) - - # Empty object is created. - await writer.open() - assert writer.is_stream_open - old_gen = writer.generation - - - await writer.close() - assert not writer.is_stream_open + async def _run(): + writer = AsyncAppendableObjectWriter( + grpc_client, _ZONAL_BUCKET, object_name, generation=0 + ) + # Empty object is created. + await writer.open() + assert writer.is_stream_open + old_gen = writer.generation + await writer.close() + assert not writer.is_stream_open - new_writer = AsyncAppendableObjectWriter( + new_writer = AsyncAppendableObjectWriter( grpc_client, _ZONAL_BUCKET, object_name, generation=None - ) - await new_writer.open() - assert new_writer.generation != old_gen + ) + await new_writer.open() + assert new_writer.generation != old_gen - # assert exc_info.value.code == 400 + # cleanup + del writer + del new_writer + gc.collect() - # cleanup - del writer - del new_writer - gc.collect() + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) - blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) \ No newline at end of file + event_loop.run_until_complete(_run()) From 2bc15fa570683ba584230c51b439d189dbdcd580 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Thu, 22 Jan 2026 13:00:03 +0530 Subject: [PATCH 09/23] feat: Add support for opening via `write_handle` and fix `write_handle` type (#1715) feat: Add support for opening via `write_handle` and fix `write_handle` type --- .../asyncio/async_abstract_object_stream.py | 6 +- .../asyncio/async_appendable_object_writer.py | 11 ++-- .../asyncio/async_multi_range_downloader.py | 12 ++-- .../asyncio/async_read_object_stream.py | 8 +-- .../asyncio/async_write_object_stream.py | 48 ++++++++------ tests/system/test_zonal.py | 50 +++++++++++++++ .../asyncio/test_async_write_object_stream.py | 64 ++++++++++++++----- 7 files changed, 146 insertions(+), 53 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 49d7a293a..26cbab7a0 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -32,7 +32,7 @@ class _AsyncAbstractObjectStream(abc.ABC): :param generation_number: (Optional) If present, selects a specific revision of this object. - :type handle: bytes + :type handle: Any :param handle: (Optional) The handle for the object, could be read_handle or write_handle, based on how the stream is used. """ @@ -42,13 +42,13 @@ def __init__( bucket_name: str, object_name: str, generation_number: Optional[int] = None, - handle: Optional[bytes] = None, + handle: Optional[Any] = None, ) -> None: super().__init__() self.bucket_name: str = bucket_name self.object_name: str = object_name self.generation_number: Optional[int] = generation_number - self.handle: Optional[bytes] = handle + self.handle: Optional[Any] = handle @abc.abstractmethod async def open(self) -> None: diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index 444e8d030..c961fbefb 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -49,8 +49,8 @@ def __init__( client: AsyncGrpcClient.grpc_client, bucket_name: str, object_name: str, - generation=None, - write_handle=None, + generation: Optional[int] = None, + write_handle: Optional[_storage_v2.BidiWriteHandle] = None, writer_options: Optional[dict] = None, ): """ @@ -96,7 +96,7 @@ def __init__( :type object_name: str :param object_name: The name of the GCS Appendable Object to be written. - :type generation: int + :type generation: Optional[int] :param generation: (Optional) If present, creates writer for that specific revision of that object. Use this to append data to an existing Appendable Object. @@ -106,10 +106,10 @@ def __init__( overwriting existing objects). Warning: If `None`, a new object is created. If an object with the - same name already exists, it will be overwritten the moment + same name already exists, it will be overwritten the moment `writer.open()` is called. - :type write_handle: bytes + :type write_handle: _storage_v2.BidiWriteHandle :param write_handle: (Optional) An handle for writing the object. If provided, opening the bidi-gRPC connection will be faster. @@ -363,7 +363,6 @@ async def finalize(self) -> _storage_v2.Object: def is_stream_open(self) -> bool: return self._is_stream_open - # helper methods. async def append_from_string(self, data: str): """ diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index dedff67e2..66a9f0057 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -129,7 +129,7 @@ async def create_mrd( bucket_name: str, object_name: str, generation_number: Optional[int] = None, - read_handle: Optional[bytes] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, retry_policy: Optional[AsyncRetry] = None, metadata: Optional[List[Tuple[str, str]]] = None, ) -> AsyncMultiRangeDownloader: @@ -149,7 +149,7 @@ async def create_mrd( :param generation_number: (Optional) If present, selects a specific revision of this object. - :type read_handle: bytes + :type read_handle: _storage_v2.BidiReadHandle :param read_handle: (Optional) An existing handle for reading the object. If provided, opening the bidi-gRPC connection will be faster. @@ -172,7 +172,7 @@ def __init__( bucket_name: str, object_name: str, generation_number: Optional[int] = None, - read_handle: Optional[bytes] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, ) -> None: """Constructor for AsyncMultiRangeDownloader, clients are not adviced to use it directly. Instead it's adviced to use the classmethod `create_mrd`. @@ -190,7 +190,7 @@ def __init__( :param generation_number: (Optional) If present, selects a specific revision of this object. - :type read_handle: bytes + :type read_handle: _storage_v2.BidiReadHandle :param read_handle: (Optional) An existing read handle. """ @@ -200,7 +200,7 @@ def __init__( self.bucket_name = bucket_name self.object_name = object_name self.generation_number = generation_number - self.read_handle = read_handle + self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle self.read_obj_str: Optional[_AsyncReadObjectStream] = None self._is_stream_open: bool = False self._routing_token: Optional[str] = None @@ -493,4 +493,4 @@ async def close(self): @property def is_stream_open(self) -> bool: - return self._is_stream_open + return self._is_stream_open \ No newline at end of file diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 5e84485d5..15772da87 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -51,7 +51,7 @@ class _AsyncReadObjectStream(_AsyncAbstractObjectStream): :param generation_number: (Optional) If present, selects a specific revision of this object. - :type read_handle: bytes + :type read_handle: _storage_v2.BidiReadHandle :param read_handle: (Optional) An existing handle for reading the object. If provided, opening the bidi-gRPC connection will be faster. """ @@ -62,7 +62,7 @@ def __init__( bucket_name: str, object_name: str, generation_number: Optional[int] = None, - read_handle: Optional[bytes] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, ) -> None: if client is None: raise ValueError("client must be provided") @@ -77,7 +77,7 @@ def __init__( generation_number=generation_number, ) self.client: AsyncGrpcClient.grpc_client = client - self.read_handle: Optional[bytes] = read_handle + self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" @@ -195,4 +195,4 @@ async def recv(self) -> _storage_v2.BidiReadObjectResponse: @property def is_stream_open(self) -> bool: - return self._is_stream_open + return self._is_stream_open \ No newline at end of file diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index b79e707f2..731b18e45 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -59,7 +59,7 @@ class _AsyncWriteObjectStream(_AsyncAbstractObjectStream): same name already exists, it will be overwritten the moment `writer.open()` is called. - :type write_handle: bytes + :type write_handle: _storage_v2.BidiWriteHandle :param write_handle: (Optional) An existing handle for writing the object. If provided, opening the bidi-gRPC connection will be faster. """ @@ -70,7 +70,7 @@ def __init__( bucket_name: str, object_name: str, generation_number: Optional[int] = None, # None means new object - write_handle: Optional[bytes] = None, + write_handle: Optional[_storage_v2.BidiWriteHandle] = None, ) -> None: if client is None: raise ValueError("client must be provided") @@ -85,7 +85,7 @@ def __init__( generation_number=generation_number, ) self.client: AsyncGrpcClient.grpc_client = client - self.write_handle: Optional[bytes] = write_handle + self.write_handle: Optional[_storage_v2.BidiWriteHandle] = write_handle self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" @@ -120,6 +120,9 @@ async def open(self) -> None: # Created object type would be Appendable Object. # if `generation_number` == 0 new object will be created only if there # isn't any existing object. + is_open_via_write_handle = ( + self.write_handle is not None and self.generation_number + ) if self.generation_number is None or self.generation_number == 0: self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( write_object_spec=_storage_v2.WriteObjectSpec( @@ -136,6 +139,7 @@ async def open(self) -> None: bucket=self._full_bucket_name, object=self.object_name, generation=self.generation_number, + write_handle=self.write_handle, ), ) self.socket_like_rpc = AsyncBidiRpc( @@ -145,25 +149,32 @@ async def open(self) -> None: await self.socket_like_rpc.open() # this is actually 1 send response = await self.socket_like_rpc.recv() self._is_stream_open = True - - if not response.resource: - raise ValueError( - "Failed to obtain object resource after opening the stream" - ) - if not response.resource.generation: - raise ValueError( - "Failed to obtain object generation after opening the stream" - ) + if is_open_via_write_handle: + # Don't use if not response.persisted_size because this will be true + # if persisted_size==0 (0 is considered "Falsy" in Python) + if response.persisted_size is None: + raise ValueError( + "Failed to obtain persisted_size after opening the stream via write_handle" + ) + self.persisted_size = response.persisted_size + else: + if not response.resource: + raise ValueError( + "Failed to obtain object resource after opening the stream" + ) + if not response.resource.generation: + raise ValueError( + "Failed to obtain object generation after opening the stream" + ) + if not response.resource.size: + # Appending to a 0 byte appendable object. + self.persisted_size = 0 + else: + self.persisted_size = response.resource.size if not response.write_handle: raise ValueError("Failed to obtain write_handle after opening the stream") - if not response.resource.size: - # Appending to a 0 byte appendable object. - self.persisted_size = 0 - else: - self.persisted_size = response.resource.size - self.generation_number = response.resource.generation self.write_handle = response.write_handle @@ -212,4 +223,3 @@ async def recv(self) -> _storage_v2.BidiWriteObjectResponse: @property def is_stream_open(self) -> bool: return self._is_stream_open - diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index b5291ca08..0bd61ff53 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -333,6 +333,56 @@ async def _run(): event_loop.run_until_complete(_run()) +def test_wrd_open_with_write_handle( + event_loop, grpc_client_direct, storage_client, blobs_to_delete +): + object_name = f"test_write_handl-{str(uuid.uuid4())[:4]}" + + async def _run(): + # 1. Create an object and get its write_handle + writer = AsyncAppendableObjectWriter( + grpc_client_direct, _ZONAL_BUCKET, object_name + ) + await writer.open() + write_handle = writer.write_handle + await writer.close() + + # 2. Open a new writer using the obtained `write_handle` and generation + new_writer = AsyncAppendableObjectWriter( + grpc_client_direct, + _ZONAL_BUCKET, + object_name, + write_handle=write_handle, + generation=writer.generation, + ) + await new_writer.open() + # Verify that the new writer is open and has the same write_handle + assert new_writer.is_stream_open + assert new_writer.generation == writer.generation + + # 3. Append some data using the new writer + test_data = b"data_from_new_writer" + await new_writer.append(test_data) + await new_writer.close() + + # 4. Verify the data was written correctly by reading it back + mrd = AsyncMultiRangeDownloader(grpc_client_direct, _ZONAL_BUCKET, object_name) + buffer = BytesIO() + await mrd.open() + await mrd.download_ranges([(0, 0, buffer)]) + await mrd.close() + assert buffer.getvalue() == test_data + + # Clean up + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + del new_writer + del mrd + gc.collect() + + event_loop.run_until_complete(_run()) + + def test_read_unfinalized_appendable_object_with_generation( storage_client, blobs_to_delete, event_loop, grpc_client_direct ): diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index f0d90543f..619d5f7e6 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -25,6 +25,7 @@ OBJECT = "my-object" GENERATION = 12345 WRITE_HANDLE = b"test-handle" +WRITE_HANDLE_PROTO = _storage_v2.BidiWriteHandle(handle=WRITE_HANDLE) @pytest.fixture @@ -56,7 +57,7 @@ async def instantiate_write_obj_stream(mock_client, mock_cls_async_bidi_rpc, ope mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) mock_response.resource.generation = GENERATION mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE + mock_response.write_handle = WRITE_HANDLE_PROTO socket_like_rpc.recv = AsyncMock(return_value=mock_response) write_obj_stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) @@ -90,18 +91,16 @@ def test_async_write_object_stream_init(mock_client): def test_async_write_object_stream_init_with_generation_and_handle(mock_client): """Test the constructor with optional arguments.""" - generation = 12345 - write_handle = b"test-handle" stream = _AsyncWriteObjectStream( mock_client, BUCKET, OBJECT, - generation_number=generation, - write_handle=write_handle, + generation_number=GENERATION, + write_handle=WRITE_HANDLE_PROTO, ) - assert stream.generation_number == generation - assert stream.write_handle == write_handle + assert stream.generation_number == GENERATION + assert stream.write_handle == WRITE_HANDLE_PROTO def test_async_write_object_stream_init_raises_value_error(): @@ -131,7 +130,7 @@ async def test_open_for_new_object(mock_async_bidi_rpc, mock_client): mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) mock_response.resource.generation = GENERATION mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE + mock_response.write_handle = WRITE_HANDLE_PROTO socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) @@ -144,7 +143,7 @@ async def test_open_for_new_object(mock_async_bidi_rpc, mock_client): socket_like_rpc.open.assert_called_once() socket_like_rpc.recv.assert_called_once() assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE + assert stream.write_handle == WRITE_HANDLE_PROTO assert stream.persisted_size == 0 @@ -163,7 +162,7 @@ async def test_open_for_new_object_with_generation_zero(mock_async_bidi_rpc, moc mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) mock_response.resource.generation = GENERATION mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE + mock_response.write_handle = WRITE_HANDLE_PROTO socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT, generation_number=0) @@ -180,7 +179,7 @@ async def test_open_for_new_object_with_generation_zero(mock_async_bidi_rpc, moc socket_like_rpc.open.assert_called_once() socket_like_rpc.recv.assert_called_once() assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE + assert stream.write_handle == WRITE_HANDLE_PROTO assert stream.persisted_size == 0 @@ -199,7 +198,7 @@ async def test_open_for_existing_object(mock_async_bidi_rpc, mock_client): mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) mock_response.resource.size = 1024 mock_response.resource.generation = GENERATION - mock_response.write_handle = WRITE_HANDLE + mock_response.write_handle = WRITE_HANDLE_PROTO socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) stream = _AsyncWriteObjectStream( @@ -214,7 +213,7 @@ async def test_open_for_existing_object(mock_async_bidi_rpc, mock_client): socket_like_rpc.open.assert_called_once() socket_like_rpc.recv.assert_called_once() assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE + assert stream.write_handle == WRITE_HANDLE_PROTO assert stream.persisted_size == 1024 @@ -233,7 +232,7 @@ async def test_open_when_already_open_raises_error(mock_async_bidi_rpc, mock_cli mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) mock_response.resource.generation = GENERATION mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE + mock_response.write_handle = WRITE_HANDLE_PROTO socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) @@ -315,6 +314,41 @@ async def test_open_raises_error_on_missing_write_handle( await stream.open() +@pytest.mark.asyncio +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" +) +async def test_open_raises_error_on_missing_persisted_size_with_write_handle( + mock_async_bidi_rpc, mock_client +): + """Test that open raises ValueError if persisted_size is None when opened via write_handle.""" + socket_like_rpc = mock.AsyncMock() + mock_async_bidi_rpc.return_value = socket_like_rpc + + # + mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) + mock_response.persisted_size = None # This is the key part of the test + mock_response.write_handle = ( + WRITE_HANDLE_PROTO # Ensure write_handle is present to avoid that error + ) + socket_like_rpc.recv.return_value = mock_response + + # ACT + stream = _AsyncWriteObjectStream( + mock_client, + BUCKET, + OBJECT, + write_handle=WRITE_HANDLE_PROTO, + generation_number=GENERATION, + ) + + with pytest.raises( + ValueError, + match="Failed to obtain persisted_size after opening the stream via write_handle", + ): + await stream.open() + + @pytest.mark.asyncio @mock.patch( "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" @@ -452,4 +486,4 @@ async def test_requests_done(mock_cls_async_bidi_rpc, mock_client): # Assert write_obj_stream.socket_like_rpc.send.assert_called_once_with(None) - write_obj_stream.socket_like_rpc.recv.assert_called_once() \ No newline at end of file + write_obj_stream.socket_like_rpc.recv.assert_called_once() From 5d9fafe1466b5ccb1db4a814967a5cc8465148a2 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Thu, 22 Jan 2026 14:56:53 +0530 Subject: [PATCH 10/23] fix: update write handle on every recv() (#1716) fix: update `write_handle` on every `recv()` from write object stream. --- .../storage/_experimental/asyncio/_utils.py | 6 ++ .../asyncio/async_appendable_object_writer.py | 8 +- .../asyncio/async_write_object_stream.py | 7 +- .../asyncio/test_async_write_object_stream.py | 102 +++++++++++++++++- 4 files changed, 116 insertions(+), 7 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/_utils.py b/google/cloud/storage/_experimental/asyncio/_utils.py index 32d83a586..170a0cfae 100644 --- a/google/cloud/storage/_experimental/asyncio/_utils.py +++ b/google/cloud/storage/_experimental/asyncio/_utils.py @@ -33,3 +33,9 @@ def raise_if_no_fast_crc32c(): "C extension is required for faster data integrity checks." "For more information, see https://github.com/googleapis/python-crc32c." ) + + +def update_write_handle_if_exists(obj, response): + """Update the write_handle attribute of an object if it exists in the response.""" + if hasattr(response, "write_handle") and response.write_handle is not None: + obj.write_handle = response.write_handle diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index c961fbefb..eda21019d 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -27,7 +27,7 @@ from google_crc32c import Checksum from google.api_core import exceptions -from ._utils import raise_if_no_fast_crc32c +from . import _utils from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.async_grpc_client import ( AsyncGrpcClient, @@ -121,7 +121,7 @@ def __init__( servers. Default is `_DEFAULT_FLUSH_INTERVAL_BYTES`. Must be a multiple of `_MAX_CHUNK_SIZE_BYTES`. """ - raise_if_no_fast_crc32c() + _utils.raise_if_no_fast_crc32c() self.client = client self.bucket_name = bucket_name self.object_name = object_name @@ -175,6 +175,7 @@ async def state_lookup(self) -> int: ) ) response = await self.write_obj_stream.recv() + _utils.update_write_handle_if_exists(self, response) self.persisted_size = response.persisted_size return self.persisted_size @@ -253,6 +254,7 @@ async def append(self, data: bytes) -> None: if is_last_chunk: response = await self.write_obj_stream.recv() + _utils.update_write_handle_if_exists(self, response) self.persisted_size = response.persisted_size self.offset = self.persisted_size self.bytes_appended_since_last_flush = 0 @@ -295,6 +297,7 @@ async def flush(self) -> int: ) ) response = await self.write_obj_stream.recv() + _utils.update_write_handle_if_exists(self, response) self.persisted_size = response.persisted_size self.offset = self.persisted_size return self.persisted_size @@ -351,6 +354,7 @@ async def finalize(self) -> _storage_v2.Object: _storage_v2.BidiWriteObjectRequest(finish_write=True) ) response = await self.write_obj_stream.recv() + _utils.update_write_handle_if_exists(self, response) self.object_resource = response.resource self.persisted_size = self.object_resource.size await self.write_obj_stream.close() diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index 731b18e45..682438dea 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -22,6 +22,7 @@ """ from typing import Optional +from . import _utils from google.cloud import _storage_v2 from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( @@ -190,7 +191,7 @@ async def requests_done(self): """Signals that all requests have been sent.""" await self.socket_like_rpc.send(None) - await self.socket_like_rpc.recv() + _utils.update_write_handle_if_exists(self, await self.socket_like_rpc.recv()) async def send( self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest @@ -218,7 +219,9 @@ async def recv(self) -> _storage_v2.BidiWriteObjectResponse: """ if not self._is_stream_open: raise ValueError("Stream is not open") - return await self.socket_like_rpc.recv() + response = await self.socket_like_rpc.recv() + _utils.update_write_handle_if_exists(self, response) + return response @property def is_stream_open(self) -> bool: diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index 619d5f7e6..92ba2925a 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -24,8 +24,10 @@ BUCKET = "my-bucket" OBJECT = "my-object" GENERATION = 12345 -WRITE_HANDLE = b"test-handle" -WRITE_HANDLE_PROTO = _storage_v2.BidiWriteHandle(handle=WRITE_HANDLE) +WRITE_HANDLE_BYTES = b"test-handle" +NEW_WRITE_HANDLE_BYTES = b"new-test-handle" +WRITE_HANDLE_PROTO = _storage_v2.BidiWriteHandle(handle=WRITE_HANDLE_BYTES) +NEW_WRITE_HANDLE_PROTO = _storage_v2.BidiWriteHandle(handle=NEW_WRITE_HANDLE_BYTES) @pytest.fixture @@ -151,7 +153,9 @@ async def test_open_for_new_object(mock_async_bidi_rpc, mock_client): @mock.patch( "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" ) -async def test_open_for_new_object_with_generation_zero(mock_async_bidi_rpc, mock_client): +async def test_open_for_new_object_with_generation_zero( + mock_async_bidi_rpc, mock_client +): """Test opening a stream for a new object.""" # Arrange socket_like_rpc = mock.AsyncMock() @@ -487,3 +491,95 @@ async def test_requests_done(mock_cls_async_bidi_rpc, mock_client): # Assert write_obj_stream.socket_like_rpc.send.assert_called_once_with(None) write_obj_stream.socket_like_rpc.recv.assert_called_once() + + +@pytest.mark.asyncio +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" +) +async def test_open_for_existing_object_with_none_size( + mock_async_bidi_rpc, mock_client +): + """Test opening a stream for an existing object where size is None.""" + # Arrange + socket_like_rpc = mock.AsyncMock() + mock_async_bidi_rpc.return_value = socket_like_rpc + socket_like_rpc.open = mock.AsyncMock() + + mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) + mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) + mock_response.resource.size = None + mock_response.resource.generation = GENERATION + mock_response.write_handle = WRITE_HANDLE_PROTO + socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) + + stream = _AsyncWriteObjectStream( + mock_client, BUCKET, OBJECT, generation_number=GENERATION + ) + + # Act + await stream.open() + + # Assert + assert stream.persisted_size == 0 + + +@pytest.mark.asyncio +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" +) +async def test_recv_updates_write_handle(mock_cls_async_bidi_rpc, mock_client): + """Test that recv updates the write_handle if present in the response.""" + # Arrange + write_obj_stream = await instantiate_write_obj_stream( + mock_client, mock_cls_async_bidi_rpc, open=True + ) + + assert write_obj_stream.write_handle == WRITE_HANDLE_PROTO # Initial handle + + # GCS can periodicallly update write handle in their responses. + bidi_write_object_response = _storage_v2.BidiWriteObjectResponse( + write_handle=NEW_WRITE_HANDLE_PROTO + ) + write_obj_stream.socket_like_rpc.recv = AsyncMock( + return_value=bidi_write_object_response + ) + + # Act + response = await write_obj_stream.recv() + + # Assert + write_obj_stream.socket_like_rpc.recv.assert_called_once() + assert response == bidi_write_object_response + # asserts that new write handle has been updated. + assert write_obj_stream.write_handle == NEW_WRITE_HANDLE_PROTO + + +@pytest.mark.asyncio +@mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" +) +async def test_requests_done_updates_write_handle(mock_cls_async_bidi_rpc, mock_client): + """Test that requests_done updates the write_handle if present in the response.""" + # Arrange + write_obj_stream = await instantiate_write_obj_stream( + mock_client, mock_cls_async_bidi_rpc, open=True + ) + assert write_obj_stream.write_handle == WRITE_HANDLE_PROTO # Initial handle + + # new_write_handle = b"new-test-handle" + bidi_write_object_response = _storage_v2.BidiWriteObjectResponse( + write_handle=NEW_WRITE_HANDLE_PROTO + ) + write_obj_stream.socket_like_rpc.send = AsyncMock() + write_obj_stream.socket_like_rpc.recv = AsyncMock( + return_value=bidi_write_object_response + ) + + # Act + await write_obj_stream.requests_done() + + # Assert + write_obj_stream.socket_like_rpc.send.assert_called_once_with(None) + write_obj_stream.socket_like_rpc.recv.assert_called_once() + assert write_obj_stream.write_handle == NEW_WRITE_HANDLE_PROTO From dbd162b3583e32e6f705a51f5c3fef333a9b89d0 Mon Sep 17 00:00:00 2001 From: Pulkit Aggarwal <54775856+Pulkit0110@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:19:22 +0530 Subject: [PATCH 11/23] feat(experimental): integrate writes strategy and appendable object writer (#1695) Integrate all the components required for bidi writes retries --- .../asyncio/async_appendable_object_writer.py | 325 ++++-- .../asyncio/async_grpc_client.py | 1 + .../asyncio/async_multi_range_downloader.py | 2 +- .../asyncio/async_read_object_stream.py | 2 +- .../asyncio/async_write_object_stream.py | 68 +- .../_experimental/asyncio/retry/_helpers.py | 46 +- .../retry/writes_resumption_strategy.py | 75 +- tests/conformance/test_bidi_writes.py | 267 +++++ tests/perf/microbenchmarks/_utils.py | 22 +- tests/perf/microbenchmarks/conftest.py | 19 +- tests/perf/microbenchmarks/reads/config.py | 9 +- .../perf/microbenchmarks/reads/test_reads.py | 6 +- .../perf/microbenchmarks/resource_monitor.py | 2 +- tests/perf/microbenchmarks/writes/config.py | 9 +- .../microbenchmarks/writes/test_writes.py | 40 +- tests/system/test_zonal.py | 1 - .../retry/test_writes_resumption_strategy.py | 457 +++++---- .../test_async_appendable_object_writer.py | 963 +++++++----------- tests/unit/asyncio/test_async_grpc_client.py | 11 +- .../asyncio/test_async_write_object_stream.py | 716 ++++--------- 20 files changed, 1526 insertions(+), 1515 deletions(-) create mode 100644 tests/conformance/test_bidi_writes.py diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index eda21019d..7795a2ac5 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -22,10 +22,16 @@ """ from io import BufferedReader -from typing import Optional, Union +import io +import logging +from typing import List, Optional, Tuple, Union -from google_crc32c import Checksum from google.api_core import exceptions +from google.api_core.retry_async import AsyncRetry +from google.rpc import status_pb2 +from google.cloud._storage_v2.types import BidiWriteObjectRedirectedError +from google.cloud._storage_v2.types.storage import BidiWriteObjectRequest + from . import _utils from google.cloud import _storage_v2 @@ -35,10 +41,72 @@ from google.cloud.storage._experimental.asyncio.async_write_object_stream import ( _AsyncWriteObjectStream, ) +from google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager import ( + _BidiStreamRetryManager, +) +from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import ( + _WriteResumptionStrategy, + _WriteState, +) +from google.cloud.storage._experimental.asyncio.retry._helpers import ( + _extract_bidi_writes_redirect_proto, +) _MAX_CHUNK_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB _DEFAULT_FLUSH_INTERVAL_BYTES = 16 * 1024 * 1024 # 16 MiB +_BIDI_WRITE_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +def _is_write_retryable(exc): + """Predicate to determine if a write operation should be retried.""" + + if isinstance( + exc, + ( + exceptions.InternalServerError, + exceptions.ServiceUnavailable, + exceptions.DeadlineExceeded, + exceptions.TooManyRequests, + BidiWriteObjectRedirectedError, + ), + ): + logger.warning(f"Retryable write exception encountered: {exc}") + return True + + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + if isinstance(grpc_error, BidiWriteObjectRedirectedError): + return True + + trailers = grpc_error.trailing_metadata() + if not trailers: + return False + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: + return True + except Exception: + logger.error( + "Error unpacking redirect details from gRPC error. Exception: ", + {exc}, + ) + return False + return False class AsyncAppendableObjectWriter: @@ -128,13 +196,7 @@ def __init__( self.write_handle = write_handle self.generation = generation - self.write_obj_stream = _AsyncWriteObjectStream( - client=self.client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation, - write_handle=self.write_handle, - ) + self.write_obj_stream: Optional[_AsyncWriteObjectStream] = None self._is_stream_open: bool = False # `offset` is the latest size of the object without staleless. self.offset: Optional[int] = None @@ -156,6 +218,8 @@ def __init__( f"flush_interval must be a multiple of {_MAX_CHUNK_SIZE_BYTES}, but provided {self.flush_interval}" ) self.bytes_appended_since_last_flush = 0 + self._routing_token: Optional[str] = None + self.object_resource: Optional[_storage_v2.Object] = None async def state_lookup(self) -> int: """Returns the persisted_size @@ -175,11 +239,25 @@ async def state_lookup(self) -> int: ) ) response = await self.write_obj_stream.recv() - _utils.update_write_handle_if_exists(self, response) self.persisted_size = response.persisted_size return self.persisted_size - async def open(self) -> None: + def _on_open_error(self, exc): + """Extracts routing token and write handle on redirect error during open.""" + redirect_proto = _extract_bidi_writes_redirect_proto(exc) + if redirect_proto: + if redirect_proto.routing_token: + self._routing_token = redirect_proto.routing_token + if redirect_proto.write_handle: + self.write_handle = redirect_proto.write_handle + if redirect_proto.generation: + self.generation = redirect_proto.generation + + async def open( + self, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: """Opens the underlying bidi-gRPC stream. :raises ValueError: If the stream is already open. @@ -188,15 +266,80 @@ async def open(self) -> None: if self._is_stream_open: raise ValueError("Underlying bidi-gRPC stream is already open") - await self.write_obj_stream.open() - self._is_stream_open = True - if self.generation is None: - self.generation = self.write_obj_stream.generation_number - self.write_handle = self.write_obj_stream.write_handle - self.persisted_size = self.write_obj_stream.persisted_size + if retry_policy is None: + retry_policy = AsyncRetry( + predicate=_is_write_retryable, on_error=self._on_open_error + ) + else: + original_on_error = retry_policy._on_error + + def combined_on_error(exc): + self._on_open_error(exc) + if original_on_error: + original_on_error(exc) + + retry_policy = AsyncRetry( + predicate=_is_write_retryable, + initial=retry_policy._initial, + maximum=retry_policy._maximum, + multiplier=retry_policy._multiplier, + deadline=retry_policy._deadline, + on_error=combined_on_error, + ) + + async def _do_open(): + current_metadata = list(metadata) if metadata else [] + + # Cleanup stream from previous failed attempt, if any. + if self.write_obj_stream: + if self.write_obj_stream.is_stream_open: + try: + await self.write_obj_stream.close() + except Exception as e: + logger.warning( + "Error closing previous write stream during open retry. Got exception: ", + {e}, + ) + self.write_obj_stream = None + self._is_stream_open = False + + self.write_obj_stream = _AsyncWriteObjectStream( + client=self.client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation, + write_handle=self.write_handle, + routing_token=self._routing_token, + ) + + if self._routing_token: + current_metadata.append( + ("x-goog-request-params", f"routing_token={self._routing_token}") + ) + + await self.write_obj_stream.open( + metadata=current_metadata if metadata else None + ) + + if self.write_obj_stream.generation_number: + self.generation = self.write_obj_stream.generation_number + if self.write_obj_stream.write_handle: + self.write_handle = self.write_obj_stream.write_handle + if self.write_obj_stream.persisted_size is not None: + self.persisted_size = self.write_obj_stream.persisted_size + + self._is_stream_open = True + self._routing_token = None - async def append(self, data: bytes) -> None: - """Appends data to the Appendable object. + await retry_policy(_do_open)() + + async def append( + self, + data: bytes, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Appends data to the Appendable object with automatic retries. calling `self.append` will append bytes at the end of the current size ie. `self.offset` bytes relative to the begining of the object. @@ -209,56 +352,110 @@ async def append(self, data: bytes) -> None: :type data: bytes :param data: The bytes to append to the object. - :rtype: None + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the operation. - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ + :type metadata: List[Tuple[str, str]] + :param metadata: (Optional) The metadata to be sent with the request. + :raises ValueError: If the stream is not open. + """ if not self._is_stream_open: raise ValueError("Stream is not open. Call open() before append().") - total_bytes = len(data) - if total_bytes == 0: - # TODO: add warning. + if not data: + logger.debug("No data provided to append; returning without action.") return - if self.offset is None: - assert self.persisted_size is not None - self.offset = self.persisted_size - - start_idx = 0 - while start_idx < total_bytes: - end_idx = min(start_idx + _MAX_CHUNK_SIZE_BYTES, total_bytes) - data_chunk = data[start_idx:end_idx] - is_last_chunk = end_idx == total_bytes - chunk_size = end_idx - start_idx - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - write_offset=self.offset, - checksummed_data=_storage_v2.ChecksummedData( - content=data_chunk, - crc32c=int.from_bytes(Checksum(data_chunk).digest(), "big"), - ), - state_lookup=is_last_chunk, - flush=is_last_chunk - or ( - self.bytes_appended_since_last_flush + chunk_size - >= self.flush_interval - ), - ) - ) - self.offset += chunk_size - self.bytes_appended_since_last_flush += chunk_size - if self.bytes_appended_since_last_flush >= self.flush_interval: - self.bytes_appended_since_last_flush = 0 + if retry_policy is None: + retry_policy = AsyncRetry(predicate=_is_write_retryable) + + strategy = _WriteResumptionStrategy() + buffer = io.BytesIO(data) + attempt_count = 0 + + def send_and_recv_generator( + requests: List[BidiWriteObjectRequest], + state: dict[str, _WriteState], + metadata: Optional[List[Tuple[str, str]]] = None, + ): + async def generator(): + nonlocal attempt_count + nonlocal requests + attempt_count += 1 + resp = None + write_state = state["write_state"] + # If this is a retry or redirect, we must re-open the stream + if attempt_count > 1 or write_state.routing_token: + logger.info( + f"Re-opening the stream with attempt_count: {attempt_count}" + ) + if self.write_obj_stream and self.write_obj_stream.is_stream_open: + await self.write_obj_stream.close() + + current_metadata = list(metadata) if metadata else [] + if write_state.routing_token: + current_metadata.append( + ( + "x-goog-request-params", + f"routing_token={write_state.routing_token}", + ) + ) + self._routing_token = write_state.routing_token + + self._is_stream_open = False + await self.open(metadata=current_metadata) + + write_state.persisted_size = self.persisted_size + write_state.write_handle = self.write_handle + write_state.routing_token = None + + write_state.user_buffer.seek(write_state.persisted_size) + write_state.bytes_sent = write_state.persisted_size + write_state.bytes_since_last_flush = 0 + + requests = strategy.generate_requests(state) + + num_requests = len(requests) + for i, chunk_req in enumerate(requests): + if i == num_requests - 1: + chunk_req.state_lookup = True + chunk_req.flush = True + await self.write_obj_stream.send(chunk_req) + + resp = await self.write_obj_stream.recv() + if resp: + if resp.persisted_size is not None: + self.persisted_size = resp.persisted_size + state["write_state"].persisted_size = resp.persisted_size + self.offset = self.persisted_size + if resp.write_handle: + self.write_handle = resp.write_handle + state["write_state"].write_handle = resp.write_handle + self.bytes_appended_since_last_flush = 0 + + yield resp + + return generator() + + # State initialization + write_state = _WriteState(_MAX_CHUNK_SIZE_BYTES, buffer, self.flush_interval) + write_state.write_handle = self.write_handle + write_state.persisted_size = self.persisted_size + write_state.bytes_sent = self.persisted_size + write_state.bytes_since_last_flush = self.bytes_appended_since_last_flush + + retry_manager = _BidiStreamRetryManager( + _WriteResumptionStrategy(), + lambda r, s: send_and_recv_generator(r, s, metadata), + ) + await retry_manager.execute({"write_state": write_state}, retry_policy) - if is_last_chunk: - response = await self.write_obj_stream.recv() - _utils.update_write_handle_if_exists(self, response) - self.persisted_size = response.persisted_size - self.offset = self.persisted_size - self.bytes_appended_since_last_flush = 0 - start_idx = end_idx + # Sync local markers + self.write_obj_stream.persisted_size = write_state.persisted_size + self.write_obj_stream.write_handle = write_state.write_handle + self.bytes_appended_since_last_flush = write_state.bytes_since_last_flush + self.persisted_size = write_state.persisted_size + self.offset = write_state.persisted_size async def simple_flush(self) -> None: """Flushes the data to the server. @@ -277,6 +474,7 @@ async def simple_flush(self) -> None: flush=True, ) ) + self.bytes_appended_since_last_flush = 0 async def flush(self) -> int: """Flushes the data to the server. @@ -297,9 +495,9 @@ async def flush(self) -> int: ) ) response = await self.write_obj_stream.recv() - _utils.update_write_handle_if_exists(self, response) self.persisted_size = response.persisted_size self.offset = self.persisted_size + self.bytes_appended_since_last_flush = 0 return self.persisted_size async def close(self, finalize_on_close=False) -> Union[int, _storage_v2.Object]: @@ -354,7 +552,6 @@ async def finalize(self) -> _storage_v2.Object: _storage_v2.BidiWriteObjectRequest(finish_write=True) ) response = await self.write_obj_stream.recv() - _utils.update_write_handle_if_exists(self, response) self.object_resource = response.resource self.persisted_size = self.object_resource.size await self.write_obj_stream.close() diff --git a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py index b455b1c29..e985f2252 100644 --- a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py +++ b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py @@ -42,6 +42,7 @@ class AsyncGrpcClient: (Optional) Whether to attempt to use DirectPath for gRPC connections. Defaults to ``True``. """ + def __init__( self, credentials=None, diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 66a9f0057..1d1c63efd 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -493,4 +493,4 @@ async def close(self): @property def is_stream_open(self) -> bool: - return self._is_stream_open \ No newline at end of file + return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index 15772da87..b59c7d162 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -195,4 +195,4 @@ async def recv(self) -> _storage_v2.BidiReadObjectResponse: @property def is_stream_open(self) -> bool: - return self._is_stream_open \ No newline at end of file + return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index 682438dea..b73a43d1b 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -21,9 +21,9 @@ if you want to use these Rapid Storage APIs. """ -from typing import Optional -from . import _utils +from typing import List, Optional, Tuple from google.cloud import _storage_v2 +from google.cloud.storage._experimental.asyncio import _utils from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( _AsyncAbstractObjectStream, @@ -72,6 +72,7 @@ def __init__( object_name: str, generation_number: Optional[int] = None, # None means new object write_handle: Optional[_storage_v2.BidiWriteHandle] = None, + routing_token: Optional[str] = None, ) -> None: if client is None: raise ValueError("client must be provided") @@ -87,6 +88,7 @@ def __init__( ) self.client: AsyncGrpcClient.grpc_client = client self.write_handle: Optional[_storage_v2.BidiWriteHandle] = write_handle + self.routing_token: Optional[str] = routing_token self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" @@ -101,7 +103,7 @@ def __init__( self.persisted_size = 0 self.object_resource: Optional[_storage_v2.Object] = None - async def open(self) -> None: + async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: """ Opens the bidi-gRPC connection to write to the object. @@ -110,7 +112,7 @@ async def open(self) -> None: :rtype: None :raises ValueError: If the stream is already open. - :raises google.api_core.exceptions.FailedPrecondition: + :raises google.api_core.exceptions.FailedPrecondition: if `generation_number` is 0 and object already exists. """ if self._is_stream_open: @@ -121,9 +123,6 @@ async def open(self) -> None: # Created object type would be Appendable Object. # if `generation_number` == 0 new object will be created only if there # isn't any existing object. - is_open_via_write_handle = ( - self.write_handle is not None and self.generation_number - ) if self.generation_number is None or self.generation_number == 0: self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( write_object_spec=_storage_v2.WriteObjectSpec( @@ -140,44 +139,46 @@ async def open(self) -> None: bucket=self._full_bucket_name, object=self.object_name, generation=self.generation_number, - write_handle=self.write_handle, + write_handle=self.write_handle if self.write_handle else None, + routing_token=self.routing_token if self.routing_token else None, ), ) + + request_param_values = [f"bucket={self._full_bucket_name}"] + final_metadata = [] + if metadata: + for key, value in metadata: + if key == "x-goog-request-params": + request_param_values.append(value) + else: + final_metadata.append((key, value)) + + final_metadata.append(("x-goog-request-params", ",".join(request_param_values))) + self.socket_like_rpc = AsyncBidiRpc( - self.rpc, initial_request=self.first_bidi_write_req, metadata=self.metadata + self.rpc, + initial_request=self.first_bidi_write_req, + metadata=final_metadata, ) await self.socket_like_rpc.open() # this is actually 1 send response = await self.socket_like_rpc.recv() self._is_stream_open = True - if is_open_via_write_handle: - # Don't use if not response.persisted_size because this will be true - # if persisted_size==0 (0 is considered "Falsy" in Python) - if response.persisted_size is None: - raise ValueError( - "Failed to obtain persisted_size after opening the stream via write_handle" - ) + + if response.persisted_size: self.persisted_size = response.persisted_size - else: - if not response.resource: - raise ValueError( - "Failed to obtain object resource after opening the stream" - ) - if not response.resource.generation: - raise ValueError( - "Failed to obtain object generation after opening the stream" - ) + + if response.resource: if not response.resource.size: # Appending to a 0 byte appendable object. self.persisted_size = 0 else: self.persisted_size = response.resource.size - if not response.write_handle: - raise ValueError("Failed to obtain write_handle after opening the stream") + self.generation_number = response.resource.generation - self.generation_number = response.resource.generation - self.write_handle = response.write_handle + if response.write_handle: + self.write_handle = response.write_handle async def close(self) -> None: """Closes the bidi-gRPC connection.""" @@ -220,7 +221,14 @@ async def recv(self) -> _storage_v2.BidiWriteObjectResponse: if not self._is_stream_open: raise ValueError("Stream is not open") response = await self.socket_like_rpc.recv() - _utils.update_write_handle_if_exists(self, response) + # Update write_handle if present in response + if response: + if response.write_handle: + self.write_handle = response.write_handle + if response.persisted_size is not None: + self.persisted_size = response.persisted_size + if response.resource and response.resource.size: + self.persisted_size = response.resource.size return response @property diff --git a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py index 627bf5944..d9ad2462e 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py +++ b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py @@ -18,12 +18,19 @@ from typing import Tuple, Optional from google.api_core import exceptions -from google.cloud._storage_v2.types import BidiReadObjectRedirectedError +from google.cloud._storage_v2.types import ( + BidiReadObjectRedirectedError, + BidiWriteObjectRedirectedError, +) from google.rpc import status_pb2 _BIDI_READ_REDIRECTED_TYPE_URL = ( "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" ) +_BIDI_WRITE_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" +) +logger = logging.getLogger(__name__) def _handle_redirect( @@ -78,6 +85,41 @@ def _handle_redirect( read_handle = redirect_proto.read_handle break except Exception as e: - logging.ERROR(f"Error unpacking redirect: {e}") + logger.error(f"Error unpacking redirect: {e}") return routing_token, read_handle + + +def _extract_bidi_writes_redirect_proto(exc: Exception): + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + + if grpc_error: + if isinstance(grpc_error, BidiWriteObjectRedirectedError): + return grpc_error + + if hasattr(grpc_error, "trailing_metadata"): + trailers = grpc_error.trailing_metadata() + if not trailers: + return + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: + redirect_proto = BidiWriteObjectRedirectedError.deserialize( + detail.value + ) + return redirect_proto + except Exception: + logger.error("Error unpacking redirect details from gRPC error.") + pass diff --git a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py index c6ae36339..09b22cb8e 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, IO, Iterable, Optional, Union +from typing import Any, Dict, IO, List, Optional, Union import google_crc32c from google.cloud._storage_v2.types import storage as storage_type @@ -20,32 +20,36 @@ from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( _BaseResumptionStrategy, ) +from google.cloud.storage._experimental.asyncio.retry._helpers import ( + _extract_bidi_writes_redirect_proto, +) class _WriteState: """A helper class to track the state of a single upload operation. - :type spec: :class:`google.cloud.storage_v2.types.AppendObjectSpec` - :param spec: The specification for the object to write. - :type chunk_size: int :param chunk_size: The size of chunks to write to the server. :type user_buffer: IO[bytes] :param user_buffer: The data source. + + :type flush_interval: int + :param flush_interval: The flush interval at which the data is flushed. """ def __init__( self, - spec: Union[storage_type.AppendObjectSpec, storage_type.WriteObjectSpec], chunk_size: int, user_buffer: IO[bytes], + flush_interval: int, ): - self.spec = spec self.chunk_size = chunk_size self.user_buffer = user_buffer self.persisted_size: int = 0 self.bytes_sent: int = 0 + self.bytes_since_last_flush: int = 0 + self.flush_interval: int = flush_interval self.write_handle: Union[bytes, storage_type.BidiWriteHandle, None] = None self.routing_token: Optional[str] = None self.is_finalized: bool = False @@ -56,31 +60,14 @@ class _WriteResumptionStrategy(_BaseResumptionStrategy): def generate_requests( self, state: Dict[str, Any] - ) -> Iterable[storage_type.BidiWriteObjectRequest]: + ) -> List[storage_type.BidiWriteObjectRequest]: """Generates BidiWriteObjectRequests to resume or continue the upload. - For Appendable Objects, every stream opening should send an - AppendObjectSpec. If resuming, the `write_handle` is added to that spec. - This method is not applicable for `open` methods. """ write_state: _WriteState = state["write_state"] - initial_request = storage_type.BidiWriteObjectRequest() - - # Determine if we need to send WriteObjectSpec or AppendObjectSpec - if isinstance(write_state.spec, storage_type.WriteObjectSpec): - initial_request.write_object_spec = write_state.spec - else: - if write_state.write_handle: - write_state.spec.write_handle = write_state.write_handle - - if write_state.routing_token: - write_state.spec.routing_token = write_state.routing_token - initial_request.append_object_spec = write_state.spec - - yield initial_request - + requests = [] # The buffer should already be seeked to the correct position (persisted_size) # by the `recover_state_on_failure` method before this is called. while not write_state.is_finalized: @@ -88,7 +75,7 @@ def generate_requests( # End of File detection if not chunk: - return + break checksummed_data = storage_type.ChecksummedData(content=chunk) checksum = google_crc32c.Checksum(chunk) @@ -98,16 +85,25 @@ def generate_requests( write_offset=write_state.bytes_sent, checksummed_data=checksummed_data, ) - write_state.bytes_sent += len(chunk) + chunk_len = len(chunk) + write_state.bytes_sent += chunk_len + write_state.bytes_since_last_flush += chunk_len + + if write_state.bytes_since_last_flush >= write_state.flush_interval: + request.flush = True + # reset counter after marking flush + write_state.bytes_since_last_flush = 0 - yield request + requests.append(request) + return requests def update_state_from_response( self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any] ) -> None: """Processes a server response and updates the write state.""" write_state: _WriteState = state["write_state"] - + if response is None: + return if response.persisted_size: write_state.persisted_size = response.persisted_size @@ -129,18 +125,23 @@ async def recover_state_on_failure( last confirmed 'persisted_size' from the server. """ write_state: _WriteState = state["write_state"] - cause = getattr(error, "cause", error) - # Extract routing token and potentially a new write handle for redirection. - if isinstance(cause, BidiWriteObjectRedirectedError): - if cause.routing_token: - write_state.routing_token = cause.routing_token + redirect_proto = None - redirect_handle = getattr(cause, "write_handle", None) - if redirect_handle: - write_state.write_handle = redirect_handle + if isinstance(error, BidiWriteObjectRedirectedError): + redirect_proto = error + else: + redirect_proto = _extract_bidi_writes_redirect_proto(error) + + # Extract routing token and potentially a new write handle for redirection. + if redirect_proto: + if redirect_proto.routing_token: + write_state.routing_token = redirect_proto.routing_token + if redirect_proto.write_handle: + write_state.write_handle = redirect_proto.write_handle # We must assume any data sent beyond 'persisted_size' was lost. # Reset the user buffer to the last known good byte confirmed by the server. write_state.user_buffer.seek(write_state.persisted_size) write_state.bytes_sent = write_state.persisted_size + write_state.bytes_since_last_flush = 0 diff --git a/tests/conformance/test_bidi_writes.py b/tests/conformance/test_bidi_writes.py new file mode 100644 index 000000000..90dfaf5f8 --- /dev/null +++ b/tests/conformance/test_bidi_writes.py @@ -0,0 +1,267 @@ +import asyncio +import uuid +import grpc +import requests + +from google.api_core import exceptions +from google.auth import credentials as auth_credentials +from google.cloud import _storage_v2 as storage_v2 + +from google.api_core.retry_async import AsyncRetry +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) + +# --- Configuration --- +PROJECT_NUMBER = "12345" # A dummy project number is fine for the testbench. +GRPC_ENDPOINT = "localhost:8888" +HTTP_ENDPOINT = "http://localhost:9000" +CONTENT = b"A" * 1024 * 10 # 10 KB + + +def _is_retryable(exc): + return isinstance( + exc, + ( + exceptions.InternalServerError, + exceptions.ServiceUnavailable, + exceptions.DeadlineExceeded, + exceptions.TooManyRequests, + exceptions.Aborted, # For Redirects + ), + ) + + +async def run_test_scenario( + gapic_client, + http_client, + bucket_name, + object_name, + scenario, +): + """Runs a single fault-injection test scenario.""" + print(f"\n--- RUNNING SCENARIO: {scenario['name']} ---") + retry_count = 0 + + def on_retry_error(exc): + nonlocal retry_count + retry_count += 1 + print(f"Retry attempt {retry_count} triggered by: {type(exc).__name__}") + + custom_retry = AsyncRetry( + predicate=_is_retryable, + on_error=on_retry_error, + initial=0.1, # Short backoff for fast tests + multiplier=1.0, + ) + + use_default = scenario.get("use_default_policy", False) + policy_to_pass = None if use_default else custom_retry + + retry_test_id = None + try: + # 1. Create a Retry Test resource on the testbench. + retry_test_config = { + "instructions": {scenario["method"]: [scenario["instruction"]]}, + "transport": "GRPC", + } + resp = http_client.post(f"{HTTP_ENDPOINT}/retry_test", json=retry_test_config) + resp.raise_for_status() + retry_test_id = resp.json()["id"] + + # 2. Set up writer and metadata for fault injection. + writer = AsyncAppendableObjectWriter( + gapic_client, + bucket_name, + object_name, + ) + fault_injection_metadata = (("x-retry-test-id", retry_test_id),) + + # 3. Execute the write and assert the outcome. + try: + await writer.open( + metadata=fault_injection_metadata, retry_policy=policy_to_pass + ) + await writer.append( + CONTENT, metadata=fault_injection_metadata, retry_policy=policy_to_pass + ) + # await writer.finalize() + await writer.close(finalize_on_close=True) + + # If an exception was expected, this line should not be reached. + if scenario["expected_error"] is not None: + raise AssertionError( + f"Expected exception {scenario['expected_error']} was not raised." + ) + + # 4. Verify the object content. + read_request = storage_v2.ReadObjectRequest( + bucket=f"projects/_/buckets/{bucket_name}", + object=object_name, + ) + read_stream = await gapic_client.read_object(request=read_request) + data = b"" + async for chunk in read_stream: + data += chunk.checksummed_data.content + assert data == CONTENT + if scenario["expected_error"] is None: + # Scenarios like 503, 500, smarter resumption, and redirects + # SHOULD trigger at least one retry attempt. + if not use_default: + assert ( + retry_count > 0 + ), f"Test passed but no retry was actually triggered for {scenario['name']}!" + else: + print("Successfully recovered using library's default policy.") + print(f"Success: {scenario['name']}") + + except Exception as e: + if scenario["expected_error"] is None or not isinstance( + e, scenario["expected_error"] + ): + raise + + if not use_default: + assert ( + retry_count == 0 + ), f"Retry was incorrectly triggered for non-retriable error in {scenario['name']}!" + print(f"Success: caught expected exception for {scenario['name']}: {e}") + + finally: + # 5. Clean up the Retry Test resource. + if retry_test_id: + http_client.delete(f"{HTTP_ENDPOINT}/retry_test/{retry_test_id}") + + +async def main(): + """Main function to set up resources and run all test scenarios.""" + channel = grpc.aio.insecure_channel(GRPC_ENDPOINT) + creds = auth_credentials.AnonymousCredentials() + transport = storage_v2.services.storage.transports.StorageGrpcAsyncIOTransport( + channel=channel, + credentials=creds, + ) + gapic_client = storage_v2.StorageAsyncClient(transport=transport) + http_client = requests.Session() + + bucket_name = f"grpc-test-bucket-{uuid.uuid4().hex[:8]}" + object_name_prefix = "retry-test-object-" + + # Define all test scenarios + test_scenarios = [ + { + "name": "Retry on Service Unavailable (503)", + "method": "storage.objects.insert", + "instruction": "return-503", + "expected_error": None, + }, + { + "name": "Retry on 500", + "method": "storage.objects.insert", + "instruction": "return-500", + "expected_error": None, + }, + { + "name": "Retry on 504", + "method": "storage.objects.insert", + "instruction": "return-504", + "expected_error": None, + }, + { + "name": "Retry on 429", + "method": "storage.objects.insert", + "instruction": "return-429", + "expected_error": None, + }, + { + "name": "Smarter Resumption: Retry 503 after partial data", + "method": "storage.objects.insert", + "instruction": "return-503-after-2K", + "expected_error": None, + }, + { + "name": "Retry on BidiWriteObjectRedirectedError", + "method": "storage.objects.insert", + "instruction": "redirect-send-handle-and-token-tokenval", + "expected_error": None, + }, + { + "name": "Fail on 401", + "method": "storage.objects.insert", + "instruction": "return-401", + "expected_error": exceptions.Unauthorized, + }, + { + "name": "Default Policy: Retry on 503", + "method": "storage.objects.insert", + "instruction": "return-503", + "expected_error": None, + "use_default_policy": True, + }, + { + "name": "Default Policy: Retry on 503", + "method": "storage.objects.insert", + "instruction": "return-500", + "expected_error": None, + "use_default_policy": True, + }, + { + "name": "Default Policy: Retry on BidiWriteObjectRedirectedError", + "method": "storage.objects.insert", + "instruction": "redirect-send-handle-and-token-tokenval", + "expected_error": None, + "use_default_policy": True, + }, + { + "name": "Default Policy: Smarter Ressumption", + "method": "storage.objects.insert", + "instruction": "return-503-after-2K", + "expected_error": None, + "use_default_policy": True, + }, + ] + + try: + bucket_resource = storage_v2.Bucket(project=f"projects/{PROJECT_NUMBER}") + create_bucket_request = storage_v2.CreateBucketRequest( + parent="projects/_", bucket_id=bucket_name, bucket=bucket_resource + ) + await gapic_client.create_bucket(request=create_bucket_request) + + for i, scenario in enumerate(test_scenarios): + object_name = f"{object_name_prefix}{i}" + await run_test_scenario( + gapic_client, + http_client, + bucket_name, + object_name, + scenario, + ) + + except Exception: + import traceback + + traceback.print_exc() + finally: + # Clean up the test bucket. + try: + list_objects_req = storage_v2.ListObjectsRequest( + parent=f"projects/_/buckets/{bucket_name}", + ) + list_objects_res = await gapic_client.list_objects(request=list_objects_req) + async for obj in list_objects_res: + delete_object_req = storage_v2.DeleteObjectRequest( + bucket=f"projects/_/buckets/{bucket_name}", object=obj.name + ) + await gapic_client.delete_object(request=delete_object_req) + + delete_bucket_req = storage_v2.DeleteBucketRequest( + name=f"projects/_/buckets/{bucket_name}" + ) + await gapic_client.delete_bucket(request=delete_bucket_req) + except Exception as e: + print(f"Warning: Cleanup failed: {e}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/perf/microbenchmarks/_utils.py b/tests/perf/microbenchmarks/_utils.py index b2589f2ac..ff29b8783 100644 --- a/tests/perf/microbenchmarks/_utils.py +++ b/tests/perf/microbenchmarks/_utils.py @@ -43,7 +43,7 @@ def publish_benchmark_extra_info( object_size = params.file_size_bytes num_files = params.num_files - total_uploaded_mib = (object_size / (1024 * 1024) * num_files) + total_uploaded_mib = object_size / (1024 * 1024) * num_files min_throughput = total_uploaded_mib / benchmark.stats["max"] max_throughput = total_uploaded_mib / benchmark.stats["min"] mean_throughput = total_uploaded_mib / benchmark.stats["mean"] @@ -80,8 +80,8 @@ def publish_benchmark_extra_info( # Get benchmark name, rounds, and iterations name = benchmark.name - rounds = benchmark.stats['rounds'] - iterations = benchmark.stats['iterations'] + rounds = benchmark.stats["rounds"] + iterations = benchmark.stats["iterations"] # Header for throughput table header = "\n\n" + "-" * 125 + "\n" @@ -98,13 +98,15 @@ def publish_benchmark_extra_info( print(row) print("-" * 125) + class RandomBytesIO(io.RawIOBase): """ A file-like object that generates random bytes using os.urandom. It enforces a fixed size and an upper safety cap. """ + # 10 GiB default safety cap - DEFAULT_CAP = 10 * 1024 * 1024 * 1024 + DEFAULT_CAP = 10 * 1024 * 1024 * 1024 def __init__(self, size, max_size=DEFAULT_CAP): """ @@ -114,9 +116,11 @@ def __init__(self, size, max_size=DEFAULT_CAP): """ if size is None: raise ValueError("Size must be defined (cannot be infinite).") - + if size > max_size: - raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).") + raise ValueError( + f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB)." + ) self._size = size self._pos = 0 @@ -125,15 +129,15 @@ def read(self, n=-1): # 1. Handle "read all" (n=-1) if n is None or n < 0: n = self._size - self._pos - + # 2. Handle EOF (End of File) if self._pos >= self._size: return b"" - + # 3. Clamp read amount to remaining size # This ensures we stop exactly at `size` bytes. n = min(n, self._size - self._pos) - + # 4. Generate data data = os.urandom(n) self._pos += len(data) diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py index f66f9bf6c..8caf4d656 100644 --- a/tests/perf/microbenchmarks/conftest.py +++ b/tests/perf/microbenchmarks/conftest.py @@ -49,6 +49,7 @@ def storage_client(): with contextlib.closing(client): yield client + @pytest.fixture def monitor(): """ @@ -57,6 +58,7 @@ def monitor(): """ return ResourceMonitor + def publish_resource_metrics(benchmark: Any, monitor: ResourceMonitor) -> None: """ Helper function to publish resource monitor results to the extra_info property. @@ -73,10 +75,13 @@ def publish_resource_metrics(benchmark: Any, monitor: ResourceMonitor) -> None: async def upload_appendable_object(bucket_name, object_name, object_size, chunk_size): # flush interval set to little over 1GiB to minimize number of flushes. - # this method is to write "appendable" objects which will be used for + # this method is to write "appendable" objects which will be used for # benchmarking reads, hence not concerned performance of writes here. writer = AsyncAppendableObjectWriter( - AsyncGrpcClient().grpc_client, bucket_name, object_name, writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024 ** 2} + AsyncGrpcClient().grpc_client, + bucket_name, + object_name, + writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024**2}, ) await writer.open() uploaded_bytes = 0 @@ -106,11 +111,15 @@ def _upload_worker(args): upload_appendable_object(bucket_name, object_name, object_size, chunk_size) ) else: - uploaded_bytes = upload_simple_object(bucket_name, object_name, object_size, chunk_size) + uploaded_bytes = upload_simple_object( + bucket_name, object_name, object_size, chunk_size + ) return object_name, uploaded_bytes -def _create_files(num_files, bucket_name, bucket_type, object_size, chunk_size=1024 * 1024 * 1024): +def _create_files( + num_files, bucket_name, bucket_type, object_size, chunk_size=1024 * 1024 * 1024 +): """ Create/Upload objects for benchmarking and return a list of their names. """ @@ -148,4 +157,4 @@ def workload_params(request): params.bucket_type, params.file_size_bytes, ) - return params, files_names \ No newline at end of file + return params, files_names diff --git a/tests/perf/microbenchmarks/reads/config.py b/tests/perf/microbenchmarks/reads/config.py index b9cfb1a92..7d83e3f8e 100644 --- a/tests/perf/microbenchmarks/reads/config.py +++ b/tests/perf/microbenchmarks/reads/config.py @@ -54,8 +54,13 @@ def _get_params() -> Dict[str, List[ReadParameters]]: rounds = common_params["rounds"] bucket_map = { - "zonal": os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", config['defaults']['DEFAULT_RAPID_ZONAL_BUCKET']), - "regional": os.environ.get("DEFAULT_STANDARD_BUCKET", config['defaults']['DEFAULT_STANDARD_BUCKET']) + "zonal": os.environ.get( + "DEFAULT_RAPID_ZONAL_BUCKET", + config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"], + ), + "regional": os.environ.get( + "DEFAULT_STANDARD_BUCKET", config["defaults"]["DEFAULT_STANDARD_BUCKET"] + ), } for workload in config["workload"]: diff --git a/tests/perf/microbenchmarks/reads/test_reads.py b/tests/perf/microbenchmarks/reads/test_reads.py index 81f180d75..13a0a49b3 100644 --- a/tests/perf/microbenchmarks/reads/test_reads.py +++ b/tests/perf/microbenchmarks/reads/test_reads.py @@ -180,6 +180,7 @@ def download_files_using_mrd_multi_coro(loop, client, files, other_params, chunk Returns: float: The maximum latency (in seconds) among all coroutines. """ + async def main(): if len(files) == 1: result = await download_chunks_using_mrd_async( @@ -356,8 +357,7 @@ def download_files_mp_mc_wrapper(files_names, params, chunks, bucket_type): @pytest.mark.parametrize( "workload_params", - all_params["read_seq_multi_process"] + - all_params["read_rand_multi_process"], + all_params["read_seq_multi_process"] + all_params["read_rand_multi_process"], indirect=True, ids=lambda p: p.name, ) @@ -412,4 +412,4 @@ def target_wrapper(*args, **kwargs): blobs_to_delete.extend( storage_client.bucket(params.bucket_name).blob(f) for f in files_names - ) \ No newline at end of file + ) diff --git a/tests/perf/microbenchmarks/resource_monitor.py b/tests/perf/microbenchmarks/resource_monitor.py index be6ae7025..8ad2a27b7 100644 --- a/tests/perf/microbenchmarks/resource_monitor.py +++ b/tests/perf/microbenchmarks/resource_monitor.py @@ -96,4 +96,4 @@ def throughput_mb_s(self): """Calculates combined network throughput.""" if self.duration <= 0: return 0.0 - return (self.net_sent_mb + self.net_recv_mb) / self.duration \ No newline at end of file + return (self.net_sent_mb + self.net_recv_mb) / self.duration diff --git a/tests/perf/microbenchmarks/writes/config.py b/tests/perf/microbenchmarks/writes/config.py index bc1548511..d823260f9 100644 --- a/tests/perf/microbenchmarks/writes/config.py +++ b/tests/perf/microbenchmarks/writes/config.py @@ -48,8 +48,13 @@ def get_write_params() -> Dict[str, List[WriteParameters]]: rounds = common_params["rounds"] bucket_map = { - "zonal": os.environ.get("DEFAULT_RAPID_ZONAL_BUCKET", config['defaults']['DEFAULT_RAPID_ZONAL_BUCKET']), - "regional": os.environ.get("DEFAULT_STANDARD_BUCKET", config['defaults']['DEFAULT_STANDARD_BUCKET']) + "zonal": os.environ.get( + "DEFAULT_RAPID_ZONAL_BUCKET", + config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"], + ), + "regional": os.environ.get( + "DEFAULT_STANDARD_BUCKET", config["defaults"]["DEFAULT_STANDARD_BUCKET"] + ), } for workload in config["workload"]: diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index 2b3c57abd..d952b1f87 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -31,9 +31,14 @@ import pytest from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter +from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + AsyncAppendableObjectWriter, +) -from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info, RandomBytesIO +from tests.perf.microbenchmarks._utils import ( + publish_benchmark_extra_info, + RandomBytesIO, +) from tests.perf.microbenchmarks.conftest import publish_resource_metrics import tests.perf.microbenchmarks.writes.config as config from google.cloud import storage @@ -41,10 +46,12 @@ # Get write parameters all_params = config.get_write_params() + async def create_client(): """Initializes async client and gets the current event loop.""" return AsyncGrpcClient().grpc_client + async def upload_chunks_using_grpc_async(client, filename, other_params): """Uploads a file in chunks using the gRPC API asynchronously. @@ -81,6 +88,7 @@ async def upload_chunks_using_grpc_async(client, filename, other_params): elapsed_time = end_time - start_time return elapsed_time / 1_000_000_000 + def upload_chunks_using_grpc(loop, client, filename, other_params): """Wrapper to run the async gRPC upload in a synchronous context. @@ -97,6 +105,7 @@ def upload_chunks_using_grpc(loop, client, filename, other_params): upload_chunks_using_grpc_async(client, filename, other_params) ) + def upload_using_json(_, json_client, filename, other_params): """Uploads a file using the JSON API. @@ -124,6 +133,7 @@ def upload_using_json(_, json_client, filename, other_params): elapsed_time = end_time - start_time return elapsed_time / 1_000_000_000 + @pytest.mark.parametrize( "workload_params", all_params["write_seq"], @@ -179,13 +189,16 @@ def target_wrapper(*args, **kwargs): task.cancel() loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) loop.close() - publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_benchmark_extra_info( + benchmark, params, benchmark_group="write", true_times=output_times + ) publish_resource_metrics(benchmark, m) blobs_to_delete.extend( storage_client.bucket(params.bucket_name).blob(f) for f in files_names ) + def upload_files_using_grpc_multi_coro(loop, client, files, other_params): """Uploads multiple files concurrently using gRPC with asyncio. @@ -198,17 +211,17 @@ def upload_files_using_grpc_multi_coro(loop, client, files, other_params): Returns: float: The maximum latency observed among all coroutines. """ + async def main(): tasks = [] for f in files: - tasks.append( - upload_chunks_using_grpc_async(client, f, other_params) - ) + tasks.append(upload_chunks_using_grpc_async(client, f, other_params)) return await asyncio.gather(*tasks) results = loop.run_until_complete(main()) return max(results) + def upload_files_using_json_multi_threaded(_, json_client, files, other_params): """Uploads multiple files concurrently using the JSON API with a ThreadPoolExecutor. @@ -235,6 +248,7 @@ def upload_files_using_json_multi_threaded(_, json_client, files, other_params): return max(results) + @pytest.mark.parametrize( "workload_params", all_params["write_seq_multi_coros"], @@ -292,13 +306,16 @@ def target_wrapper(*args, **kwargs): task.cancel() loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) loop.close() - publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_benchmark_extra_info( + benchmark, params, benchmark_group="write", true_times=output_times + ) publish_resource_metrics(benchmark, m) blobs_to_delete.extend( storage_client.bucket(params.bucket_name).blob(f) for f in files_names ) + def _upload_files_worker(files_to_upload, other_params, bucket_type): """A worker function for multi-processing uploads. @@ -330,12 +347,12 @@ def _upload_files_worker(files_to_upload, other_params, bucket_type): loop.close() return result else: # regional - json_client = storage.Client() return upload_files_using_json_multi_threaded( None, json_client, files_to_upload, other_params ) + def upload_files_mp_mc_wrapper(files_names, params): """Wrapper for multi-process, multi-coroutine uploads. @@ -370,6 +387,7 @@ def upload_files_mp_mc_wrapper(files_names, params): return max(results) + @pytest.mark.parametrize( "workload_params", all_params["write_seq_multi_process"], @@ -406,9 +424,11 @@ def target_wrapper(*args, **kwargs): ), ) finally: - publish_benchmark_extra_info(benchmark, params, benchmark_group="write", true_times=output_times) + publish_benchmark_extra_info( + benchmark, params, benchmark_group="write", true_times=output_times + ) publish_resource_metrics(benchmark, m) blobs_to_delete.extend( storage_client.bucket(params.bucket_name).blob(f) for f in files_names - ) \ No newline at end of file + ) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 0bd61ff53..40c84a2fb 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -49,7 +49,6 @@ def event_loop(): @pytest.fixture(scope="session") def grpc_clients(event_loop): - # grpc clients has to be instantiated in the event loop, # otherwise grpc creates it's own event loop and attaches to the client. # Which will lead to deadlock because client running in one event loop and diff --git a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py index 7d8b7934e..ce48e21c7 100644 --- a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py +++ b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py @@ -13,12 +13,13 @@ # limitations under the License. import io -import unittest import unittest.mock as mock from datetime import datetime import pytest import google_crc32c +from google.rpc import status_pb2 +from google.api_core import exceptions from google.cloud._storage_v2.types import storage as storage_type from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import ( @@ -28,136 +29,173 @@ from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError -class TestWriteResumptionStrategy(unittest.TestCase): - def _get_target_class(self): - return _WriteResumptionStrategy +@pytest.fixture +def strategy(): + """Fixture to provide a WriteResumptionStrategy instance.""" + return _WriteResumptionStrategy() - def _make_one(self, *args, **kwargs): - return self._get_target_class()(*args, **kwargs) - def test_ctor(self): - strategy = self._make_one() - self.assertIsInstance(strategy, self._get_target_class()) +class TestWriteResumptionStrategy: + """Test suite for WriteResumptionStrategy.""" - def test_generate_requests_initial_new_object(self): - """ - Verify the initial request sequence for a new upload (WriteObjectSpec). - """ - strategy = self._make_one() - mock_buffer = io.BytesIO(b"0123456789") - # Use WriteObjectSpec for new objects - mock_spec = storage_type.WriteObjectSpec( - resource=storage_type.Object(name="test-object") + # ------------------------------------------------------------------------- + # Tests for generate_requests + # ------------------------------------------------------------------------- + + def test_generate_requests_initial_chunking(self, strategy): + """Verify initial data generation starts at offset 0 and chunks correctly.""" + mock_buffer = io.BytesIO(b"abcdefghij") + write_state = _WriteState( + chunk_size=3, user_buffer=mock_buffer, flush_interval=10 ) - state = { - "write_state": _WriteState( - mock_spec, chunk_size=4, user_buffer=mock_buffer - ), - } + state = {"write_state": write_state} + + requests = strategy.generate_requests(state) + + # Expected: 4 requests (3, 3, 3, 1) + assert len(requests) == 4 - requests = list(strategy.generate_requests(state)) + # Verify Request 1 + assert requests[0].write_offset == 0 + assert requests[0].checksummed_data.content == b"abc" - # Check first request (Spec) - self.assertEqual(requests[0].write_object_spec, mock_spec) - self.assertFalse(requests[0].state_lookup) + # Verify Request 2 + assert requests[1].write_offset == 3 + assert requests[1].checksummed_data.content == b"def" - # Check data chunks - self.assertEqual(requests[1].write_offset, 0) - self.assertEqual(requests[1].checksummed_data.content, b"0123") - self.assertEqual(requests[2].write_offset, 4) - self.assertEqual(requests[2].checksummed_data.content, b"4567") - self.assertEqual(requests[3].write_offset, 8) - self.assertEqual(requests[3].checksummed_data.content, b"89") + # Verify Request 3 + assert requests[2].write_offset == 6 + assert requests[2].checksummed_data.content == b"ghi" - # Total requests: 1 Spec + 3 Chunks - self.assertEqual(len(requests), 4) + # Verify Request 4 + assert requests[3].write_offset == 9 + assert requests[3].checksummed_data.content == b"j" - def test_generate_requests_initial_existing_object(self): + def test_generate_requests_resumption(self, strategy): """ - Verify the initial request sequence for appending to an existing object (AppendObjectSpec). + Verify request generation when resuming. + The strategy should generate chunks starting from the current 'bytes_sent'. """ - strategy = self._make_one() - mock_buffer = io.BytesIO(b"0123") - # Use AppendObjectSpec for existing objects - mock_spec = storage_type.AppendObjectSpec( - object_="test-object", bucket="test-bucket" + mock_buffer = io.BytesIO(b"0123456789") + write_state = _WriteState( + chunk_size=4, user_buffer=mock_buffer, flush_interval=10 ) - state = { - "write_state": _WriteState( - mock_spec, chunk_size=4, user_buffer=mock_buffer - ), - } - requests = list(strategy.generate_requests(state)) + # Simulate resumption state: 4 bytes already sent/persisted + write_state.persisted_size = 4 + write_state.bytes_sent = 4 + # Buffer must be seeked to 4 before calling generate + mock_buffer.seek(4) - # Check first request (Spec) - self.assertEqual(requests[0].append_object_spec, mock_spec) - self.assertFalse(requests[0].state_lookup) + state = {"write_state": write_state} - # Check data chunk - self.assertEqual(requests[1].write_offset, 0) - self.assertEqual(requests[1].checksummed_data.content, b"0123") + requests = strategy.generate_requests(state) - def test_generate_requests_empty_file(self): - """ - Verify request sequence for an empty file. Should just be the Spec. - """ - strategy = self._make_one() + # Since 4 bytes are done, we expect remaining 6 bytes: [4 bytes, 2 bytes] + assert len(requests) == 2 + + # Check first generated request starts at offset 4 + assert requests[0].write_offset == 4 + assert requests[0].checksummed_data.content == b"4567" + + # Check second generated request starts at offset 8 + assert requests[1].write_offset == 8 + assert requests[1].checksummed_data.content == b"89" + + def test_generate_requests_empty_file(self, strategy): + """Verify request sequence for an empty file.""" mock_buffer = io.BytesIO(b"") - mock_spec = storage_type.AppendObjectSpec(object_="test-object") - state = { - "write_state": _WriteState( - mock_spec, chunk_size=4, user_buffer=mock_buffer - ), - } + write_state = _WriteState( + chunk_size=4, user_buffer=mock_buffer, flush_interval=10 + ) + state = {"write_state": write_state} - requests = list(strategy.generate_requests(state)) + requests = strategy.generate_requests(state) - self.assertEqual(len(requests), 1) - self.assertEqual(requests[0].append_object_spec, mock_spec) + assert len(requests) == 0 - def test_generate_requests_resumption(self): - """ - Verify request sequence when resuming an upload. - """ - strategy = self._make_one() - mock_buffer = io.BytesIO(b"0123456789") - mock_spec = storage_type.AppendObjectSpec(object_="test-object") + def test_generate_requests_checksum_verification(self, strategy): + """Verify CRC32C is calculated correctly for each chunk.""" + chunk_data = b"test_data" + mock_buffer = io.BytesIO(chunk_data) + write_state = _WriteState( + chunk_size=10, user_buffer=mock_buffer, flush_interval=10 + ) + state = {"write_state": write_state} - write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer) - write_state.persisted_size = 4 - write_state.bytes_sent = 4 - write_state.write_handle = storage_type.BidiWriteHandle(handle=b"test-handle") - mock_buffer.seek(4) + requests = strategy.generate_requests(state) + + expected_crc = google_crc32c.Checksum(chunk_data).digest() + expected_int = int.from_bytes(expected_crc, "big") + assert requests[0].checksummed_data.crc32c == expected_int + + def test_generate_requests_flush_logic_exact_interval(self, strategy): + """Verify the flush bit is set exactly when the interval is reached.""" + mock_buffer = io.BytesIO(b"A" * 12) + # 2 byte chunks, flush every 4 bytes + write_state = _WriteState( + chunk_size=2, user_buffer=mock_buffer, flush_interval=4 + ) + state = {"write_state": write_state} + + requests = strategy.generate_requests(state) + + # Request index 1 (4 bytes total) should have flush=True + assert requests[0].flush is False + assert requests[1].flush is True + + # Request index 2 (8 bytes total) should have flush=True + assert requests[2].flush is False + assert requests[3].flush is True + + # Request index 3 (12 bytes total) should have flush=True + assert requests[4].flush is False + assert requests[5].flush is True + + # Verify counter reset in state + assert write_state.bytes_since_last_flush == 0 + def test_generate_requests_flush_logic_data_less_than_interval(self, strategy): + """Verify flush is not set if data sent is less than interval.""" + mock_buffer = io.BytesIO(b"A" * 5) + # Flush every 10 bytes + write_state = _WriteState( + chunk_size=2, user_buffer=mock_buffer, flush_interval=10 + ) state = {"write_state": write_state} - requests = list(strategy.generate_requests(state)) + requests = strategy.generate_requests(state) + + # Total 5 bytes < 10 bytes interval + for req in requests: + assert req.flush is False + + assert write_state.bytes_since_last_flush == 5 - # Check first request has handle and lookup - self.assertEqual( - requests[0].append_object_spec.write_handle.handle, b"test-handle" + def test_generate_requests_honors_finalized_state(self, strategy): + """If state is already finalized, no requests should be generated.""" + mock_buffer = io.BytesIO(b"data") + write_state = _WriteState( + chunk_size=4, user_buffer=mock_buffer, flush_interval=10 ) + write_state.is_finalized = True + state = {"write_state": write_state} - # Check data starts from offset 4 - self.assertEqual(requests[1].write_offset, 4) - self.assertEqual(requests[1].checksummed_data.content, b"4567") - self.assertEqual(requests[2].write_offset, 8) - self.assertEqual(requests[2].checksummed_data.content, b"89") + requests = strategy.generate_requests(state) + assert len(requests) == 0 @pytest.mark.asyncio - async def test_generate_requests_after_failure_and_recovery(self): + async def test_generate_requests_after_failure_and_recovery(self, strategy): """ - Verify recovery and resumption flow. + Verify recovery and resumption flow (Integration of recover + generate). """ - strategy = self._make_one() - mock_buffer = io.BytesIO(b"0123456789abcdef") - mock_spec = storage_type.AppendObjectSpec(object_="test-object") - state = { - "write_state": _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer) - } - write_state = state["write_state"] + mock_buffer = io.BytesIO(b"0123456789abcdef") # 16 bytes + write_state = _WriteState( + chunk_size=4, user_buffer=mock_buffer, flush_interval=10 + ) + state = {"write_state": write_state} + # Simulate initial progress: sent 8 bytes write_state.bytes_sent = 8 mock_buffer.seek(8) @@ -169,122 +207,167 @@ async def test_generate_requests_after_failure_and_recovery(self): state, ) + # Simulate Failure Triggering Recovery await strategy.recover_state_on_failure(Exception("network error"), state) - self.assertEqual(mock_buffer.tell(), 4) - self.assertEqual(write_state.bytes_sent, 4) + # Assertions after recovery + # 1. Buffer should rewind to persisted_size (4) + assert mock_buffer.tell() == 4 + # 2. bytes_sent should track persisted_size (4) + assert write_state.bytes_sent == 4 - requests = list(strategy.generate_requests(state)) + requests = strategy.generate_requests(state) - self.assertTrue(requests[0].state_lookup) - self.assertEqual( - requests[0].append_object_spec.write_handle.handle, b"handle-1" - ) + # Remaining data from offset 4 to 16 (12 bytes total) + # Chunks: [4-8], [8-12], [12-16] + assert len(requests) == 3 - self.assertEqual(requests[1].write_offset, 4) - self.assertEqual(requests[1].checksummed_data.content, b"4567") + # Verify resumption offset + assert requests[0].write_offset == 4 + assert requests[0].checksummed_data.content == b"4567" - def test_update_state_from_response(self): - """Verify state updates from server responses.""" - strategy = self._make_one() - mock_buffer = io.BytesIO(b"0123456789") - mock_spec = storage_type.AppendObjectSpec(object_="test-object") - state = { - "write_state": _WriteState( - mock_spec, chunk_size=4, user_buffer=mock_buffer - ), - } - write_state = state["write_state"] + # ------------------------------------------------------------------------- + # Tests for update_state_from_response + # ------------------------------------------------------------------------- - response1 = storage_type.BidiWriteObjectResponse( - write_handle=storage_type.BidiWriteHandle(handle=b"handle-1") + def test_update_state_from_response_all_fields(self, strategy): + """Verify all fields from a BidiWriteObjectResponse update the state.""" + write_state = _WriteState( + chunk_size=4, user_buffer=io.BytesIO(), flush_interval=10 ) - strategy.update_state_from_response(response1, state) - self.assertEqual(write_state.write_handle.handle, b"handle-1") + state = {"write_state": write_state} - response2 = storage_type.BidiWriteObjectResponse(persisted_size=1024) - strategy.update_state_from_response(response2, state) - self.assertEqual(write_state.persisted_size, 1024) + # 1. Update persisted_size + strategy.update_state_from_response( + storage_type.BidiWriteObjectResponse(persisted_size=123), state + ) + assert write_state.persisted_size == 123 - final_resource = storage_type.Object( - name="test-object", bucket="b", size=2048, finalize_time=datetime.now() + # 2. Update write_handle + handle = storage_type.BidiWriteHandle(handle=b"new-handle") + strategy.update_state_from_response( + storage_type.BidiWriteObjectResponse(write_handle=handle), state ) - response3 = storage_type.BidiWriteObjectResponse(resource=final_resource) - strategy.update_state_from_response(response3, state) - self.assertEqual(write_state.persisted_size, 2048) - self.assertTrue(write_state.is_finalized) + assert write_state.write_handle == handle - @pytest.mark.asyncio - async def test_recover_state_on_failure_handles_redirect(self): - """ - Verify redirection error handling. - """ - strategy = self._make_one() - mock_buffer = mock.MagicMock(spec=io.BytesIO) - mock_spec = storage_type.AppendObjectSpec(object_="test-object") + # 3. Update from Resource (finalization) + resource = storage_type.Object(size=1000, finalize_time=datetime.now()) + strategy.update_state_from_response( + storage_type.BidiWriteObjectResponse(resource=resource), state + ) + assert write_state.persisted_size == 1000 + assert write_state.is_finalized - write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer) + def test_update_state_from_response_none(self, strategy): + """Verify None response doesn't crash.""" + write_state = _WriteState( + chunk_size=4, user_buffer=io.BytesIO(), flush_interval=10 + ) state = {"write_state": write_state} + strategy.update_state_from_response(None, state) + assert write_state.persisted_size == 0 - redirect_error = BidiWriteObjectRedirectedError(routing_token="new-token-123") - wrapped_error = Exception("RPC error") - wrapped_error.cause = redirect_error - - await strategy.recover_state_on_failure(wrapped_error, state) - - self.assertEqual(write_state.routing_token, "new-token-123") - mock_buffer.seek.assert_called_with(0) + # ------------------------------------------------------------------------- + # Tests for recover_state_on_failure + # ------------------------------------------------------------------------- @pytest.mark.asyncio - async def test_recover_state_on_failure_handles_redirect_with_handle(self): - """Verify redirection that includes a write handle.""" - strategy = self._make_one() - mock_buffer = mock.MagicMock(spec=io.BytesIO) - mock_spec = storage_type.AppendObjectSpec(object_="test-object") + async def test_recover_state_on_failure_rewind_logic(self, strategy): + """Verify buffer seek and counter resets on generic failure (Non-redirect).""" + mock_buffer = io.BytesIO(b"0123456789") + write_state = _WriteState( + chunk_size=2, user_buffer=mock_buffer, flush_interval=100 + ) - write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer) - state = {"write_state": write_state} + # Simulate progress: sent 8 bytes, but server only persisted 4 + write_state.bytes_sent = 8 + write_state.persisted_size = 4 + write_state.bytes_since_last_flush = 2 + mock_buffer.seek(8) - redirect_error = BidiWriteObjectRedirectedError( - routing_token="new-token-456", write_handle=b"redirect-handle" + # Simulate generic 503 error without trailers + await strategy.recover_state_on_failure( + exceptions.ServiceUnavailable("busy"), {"write_state": write_state} ) - wrapped_error = Exception("RPC error") - wrapped_error.cause = redirect_error - await strategy.recover_state_on_failure(wrapped_error, state) + # Buffer must be seeked back to 4 + assert mock_buffer.tell() == 4 + assert write_state.bytes_sent == 4 + # Flush counter must be reset to avoid incorrect firing after resume + assert write_state.bytes_since_last_flush == 0 - self.assertEqual(write_state.routing_token, "new-token-456") - self.assertEqual(write_state.write_handle, b"redirect-handle") + @pytest.mark.asyncio + async def test_recover_state_on_failure_direct_redirect(self, strategy): + """Verify handling when the error is a BidiWriteObjectRedirectedError.""" + write_state = _WriteState( + chunk_size=4, user_buffer=io.BytesIO(), flush_interval=100 + ) + state = {"write_state": write_state} - mock_buffer.seek.assert_called_with(0) + redirect = BidiWriteObjectRedirectedError( + routing_token="tok-1", + write_handle=storage_type.BidiWriteHandle(handle=b"h-1"), + ) - def test_generate_requests_sends_crc32c_checksum(self): - strategy = self._make_one() - mock_buffer = io.BytesIO(b"0123") - mock_spec = storage_type.AppendObjectSpec(object_="test-object") - state = { - "write_state": _WriteState( - mock_spec, chunk_size=4, user_buffer=mock_buffer - ), - } + await strategy.recover_state_on_failure(redirect, state) - requests = list(strategy.generate_requests(state)) + assert write_state.routing_token == "tok-1" + assert write_state.write_handle.handle == b"h-1" - expected_crc = google_crc32c.Checksum(b"0123") - expected_crc_int = int.from_bytes(expected_crc.digest(), "big") - self.assertEqual(requests[1].checksummed_data.crc32c, expected_crc_int) + @pytest.mark.asyncio + async def test_recover_state_on_failure_wrapped_redirect(self, strategy): + """Verify handling when RedirectedError is inside Aborted.errors.""" + write_state = _WriteState( + chunk_size=4, user_buffer=io.BytesIO(), flush_interval=10 + ) - def test_generate_requests_with_routing_token(self): - strategy = self._make_one() - mock_buffer = io.BytesIO(b"") - mock_spec = storage_type.AppendObjectSpec(object_="test-object") + redirect = BidiWriteObjectRedirectedError(routing_token="tok-wrapped") + # google-api-core Aborted often wraps multiple errors + error = exceptions.Aborted("conflict", errors=[redirect]) - write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer) - write_state.routing_token = "redirected-token" - state = {"write_state": write_state} + await strategy.recover_state_on_failure(error, {"write_state": write_state}) + + assert write_state.routing_token == "tok-wrapped" - requests = list(strategy.generate_requests(state)) + @pytest.mark.asyncio + async def test_recover_state_on_failure_trailer_metadata_redirect(self, strategy): + """Verify complex parsing from 'grpc-status-details-bin' in trailers.""" + write_state = _WriteState( + chunk_size=4, user_buffer=io.BytesIO(), flush_interval=10 + ) - self.assertEqual( - requests[0].append_object_spec.routing_token, "redirected-token" + redirect_proto = BidiWriteObjectRedirectedError(routing_token="metadata-token") + status = status_pb2.Status() + detail = status.details.add() + detail.type_url = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" ) + detail.value = BidiWriteObjectRedirectedError.serialize(redirect_proto) + + # FIX: No spec= here, because Aborted doesn't have trailing_metadata in its base definition + mock_error = mock.MagicMock() + mock_error.errors = [] + mock_error.trailing_metadata.return_value = [ + ("grpc-status-details-bin", status.SerializeToString()) + ] + + with mock.patch( + "google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy._extract_bidi_writes_redirect_proto", + return_value=redirect_proto, + ): + await strategy.recover_state_on_failure( + mock_error, {"write_state": write_state} + ) + + assert write_state.routing_token == "metadata-token" + + def test_write_state_initialization(self): + """Verify WriteState starts with clean counters.""" + buffer = io.BytesIO(b"test") + ws = _WriteState(chunk_size=10, user_buffer=buffer, flush_interval=100) + + assert ws.persisted_size == 0 + assert ws.bytes_sent == 0 + assert ws.bytes_since_last_flush == 0 + assert ws.flush_interval == 100 + assert not ws.is_finalized diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py index 2a9747f28..abb26be7a 100644 --- a/tests/unit/asyncio/test_async_appendable_object_writer.py +++ b/tests/unit/asyncio/test_async_appendable_object_writer.py @@ -12,23 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from io import BytesIO +import io +import unittest.mock as mock +from unittest.mock import AsyncMock, MagicMock import pytest -from unittest import mock - -from google_crc32c import Checksum from google.api_core import exceptions +from google.rpc import status_pb2 +from google.cloud._storage_v2.types import storage as storage_type +from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, -) -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( + _is_write_retryable, _MAX_CHUNK_SIZE_BYTES, _DEFAULT_FLUSH_INTERVAL_BYTES, ) -from google.cloud import _storage_v2 - +# Constants BUCKET = "test-bucket" OBJECT = "test-object" GENERATION = 123 @@ -37,637 +37,378 @@ EIGHT_MIB = 8 * 1024 * 1024 -@pytest.fixture -def mock_client(): - """Mock the async gRPC client.""" - return mock.AsyncMock() - - -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -def test_init(mock_write_object_stream, mock_client): - """Test the constructor of AsyncAppendableObjectWriter.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - - assert writer.client == mock_client - assert writer.bucket_name == BUCKET - assert writer.object_name == OBJECT - assert writer.generation is None - assert writer.write_handle is None - assert not writer.is_stream_open - assert writer.offset is None - assert writer.persisted_size is None - assert writer.bytes_appended_since_last_flush == 0 - - mock_write_object_stream.assert_called_once_with( - client=mock_client, - bucket_name=BUCKET, - object_name=OBJECT, - generation_number=None, - write_handle=None, - ) - assert writer.write_obj_stream == mock_write_object_stream.return_value - - -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -def test_init_with_optional_args(mock_write_object_stream, mock_client): - """Test the constructor with optional arguments.""" - writer = AsyncAppendableObjectWriter( - mock_client, - BUCKET, - OBJECT, - generation=GENERATION, - write_handle=WRITE_HANDLE, - ) - - assert writer.generation == GENERATION - assert writer.write_handle == WRITE_HANDLE - assert writer.bytes_appended_since_last_flush == 0 - - mock_write_object_stream.assert_called_once_with( - client=mock_client, - bucket_name=BUCKET, - object_name=OBJECT, - generation_number=GENERATION, - write_handle=WRITE_HANDLE, - ) - - -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -def test_init_with_writer_options(mock_write_object_stream, mock_client): - """Test the constructor with optional arguments.""" - writer = AsyncAppendableObjectWriter( - mock_client, - BUCKET, - OBJECT, - writer_options={"FLUSH_INTERVAL_BYTES": EIGHT_MIB}, - ) - - assert writer.flush_interval == EIGHT_MIB - assert writer.bytes_appended_since_last_flush == 0 - - mock_write_object_stream.assert_called_once_with( - client=mock_client, - bucket_name=BUCKET, - object_name=OBJECT, - generation_number=None, - write_handle=None, - ) - - -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -def test_init_with_flush_interval_less_than_chunk_size_raises_error(mock_client): - """Test that an OutOfRange error is raised if flush_interval is less than the chunk size.""" - - with pytest.raises(exceptions.OutOfRange): - AsyncAppendableObjectWriter( - mock_client, - BUCKET, - OBJECT, - writer_options={"FLUSH_INTERVAL_BYTES": _MAX_CHUNK_SIZE_BYTES - 1}, +class TestIsWriteRetryable: + """Exhaustive tests for retry predicate logic.""" + + def test_standard_transient_errors(self, mock_appendable_writer): + for exc in [ + exceptions.InternalServerError("500"), + exceptions.ServiceUnavailable("503"), + exceptions.DeadlineExceeded("timeout"), + exceptions.TooManyRequests("429"), + ]: + assert _is_write_retryable(exc) + + def test_aborted_with_redirect_proto(self, mock_appendable_writer): + # Direct redirect error wrapped in Aborted + redirect = BidiWriteObjectRedirectedError(routing_token="token") + exc = exceptions.Aborted("aborted", errors=[redirect]) + assert _is_write_retryable(exc) + + def test_aborted_with_trailers(self, mock_appendable_writer): + # Setup Status with Redirect Detail + status = status_pb2.Status() + detail = status.details.add() + detail.type_url = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" ) + # Mock error with trailing_metadata method + mock_grpc_error = MagicMock() + mock_grpc_error.trailing_metadata.return_value = [ + ("grpc-status-details-bin", status.SerializeToString()) + ] -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -def test_init_with_flush_interval_not_multiple_of_chunk_size_raises_error(mock_client): - """Test that an OutOfRange error is raised if flush_interval is not a multiple of the chunk size.""" - - with pytest.raises(exceptions.OutOfRange): - AsyncAppendableObjectWriter( - mock_client, - BUCKET, - OBJECT, - writer_options={"FLUSH_INTERVAL_BYTES": _MAX_CHUNK_SIZE_BYTES + 1}, - ) + # Aborted wraps the grpc error + exc = exceptions.Aborted("aborted", errors=[mock_grpc_error]) + assert _is_write_retryable(exc) + def test_aborted_without_metadata(self, mock_appendable_writer): + mock_grpc_error = MagicMock() + mock_grpc_error.trailing_metadata.return_value = [] + exc = exceptions.Aborted("bare aborted", errors=[mock_grpc_error]) + assert not _is_write_retryable(exc) -@mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c") -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" -) -def test_init_raises_if_crc32c_c_extension_is_missing( - mock_grpc_client, mock_google_crc32c -): - mock_google_crc32c.implementation = "python" - - with pytest.raises(exceptions.FailedPrecondition) as exc_info: - AsyncAppendableObjectWriter(mock_grpc_client, "bucket", "object") + def test_non_retryable_errors(self, mock_appendable_writer): + assert not _is_write_retryable(exceptions.BadRequest("400")) + assert not _is_write_retryable(exceptions.NotFound("404")) - assert "The google-crc32c package is not installed with C support" in str( - exc_info.value - ) - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_state_lookup(mock_write_object_stream, mock_client): - """Test state_lookup method.""" - # Arrange - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=PERSISTED_SIZE) +@pytest.fixture +def mock_appendable_writer(): + """Fixture to provide a mock AsyncAppendableObjectWriter setup.""" + mock_client = mock.AsyncMock() + # Internal stream class patch + stream_patcher = mock.patch( + "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" ) + mock_stream_cls = stream_patcher.start() + mock_stream = mock_stream_cls.return_value - expected_request = _storage_v2.BidiWriteObjectRequest(state_lookup=True) - - # Act - response = await writer.state_lookup() - - # Assert - mock_stream.send.assert_awaited_once_with(expected_request) - mock_stream.recv.assert_awaited_once() - assert writer.persisted_size == PERSISTED_SIZE - assert response == PERSISTED_SIZE - - -@pytest.mark.asyncio -async def test_state_lookup_without_open_raises_value_error(mock_client): - """Test that state_lookup raises an error if the stream is not open.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, - match="Stream is not open. Call open\\(\\) before state_lookup\\(\\).", - ): - await writer.state_lookup() - + # Configure all async methods explicitly + mock_stream.open = AsyncMock() + mock_stream.close = AsyncMock() + mock_stream.send = AsyncMock() + mock_stream.recv = AsyncMock() -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_open_appendable_object_writer(mock_write_object_stream, mock_client): - """Test the open method.""" - # Arrange - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - mock_stream = mock_write_object_stream.return_value - mock_stream.open = mock.AsyncMock() - - mock_stream.generation_number = GENERATION - mock_stream.write_handle = WRITE_HANDLE + # Default mock properties + mock_stream.is_stream_open = False mock_stream.persisted_size = 0 - - # Act - await writer.open() - - # Assert - mock_stream.open.assert_awaited_once() - assert writer.is_stream_open - assert writer.generation == GENERATION - assert writer.write_handle == WRITE_HANDLE - assert writer.persisted_size == 0 - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_open_appendable_object_writer_existing_object( - mock_write_object_stream, mock_client -): - """Test the open method.""" - # Arrange - writer = AsyncAppendableObjectWriter( - mock_client, BUCKET, OBJECT, generation=GENERATION - ) - mock_stream = mock_write_object_stream.return_value - mock_stream.open = mock.AsyncMock() - mock_stream.generation_number = GENERATION mock_stream.write_handle = WRITE_HANDLE - mock_stream.persisted_size = PERSISTED_SIZE - # Act - await writer.open() + yield { + "mock_client": mock_client, + "mock_stream_cls": mock_stream_cls, + "mock_stream": mock_stream, + } - # Assert - mock_stream.open.assert_awaited_once() - assert writer.is_stream_open - assert writer.generation == GENERATION - assert writer.write_handle == WRITE_HANDLE - assert writer.persisted_size == PERSISTED_SIZE - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_open_when_already_open_raises_error( - mock_write_object_stream, mock_client -): - """Test that opening an already open writer raises a ValueError.""" - # Arrange - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True # Manually set to open - - # Act & Assert - with pytest.raises(ValueError, match="Underlying bidi-gRPC stream is already open"): - await writer.open() + stream_patcher.stop() -@pytest.mark.asyncio -async def test_unimplemented_methods_raise_error(mock_client): - """Test that all currently unimplemented methods raise NotImplementedError.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) +class TestAsyncAppendableObjectWriter: + def _make_one(self, mock_client, **kwargs): + return AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT, **kwargs) - with pytest.raises(NotImplementedError): - await writer.append_from_string("data") + # ------------------------------------------------------------------------- + # Initialization & Configuration Tests + # ------------------------------------------------------------------------- - with pytest.raises(NotImplementedError): - await writer.append_from_stream(mock.Mock()) + def test_init_defaults(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + assert writer.bucket_name == BUCKET + assert writer.object_name == OBJECT + assert writer.persisted_size is None + assert writer.bytes_appended_since_last_flush == 0 + assert writer.flush_interval == _DEFAULT_FLUSH_INTERVAL_BYTES + def test_init_with_writer_options(self, mock_appendable_writer): + writer = self._make_one( + mock_appendable_writer["mock_client"], + writer_options={"FLUSH_INTERVAL_BYTES": EIGHT_MIB}, + ) + assert writer.flush_interval == EIGHT_MIB + + def test_init_validation_chunk_size_raises(self, mock_appendable_writer): + with pytest.raises(exceptions.OutOfRange): + self._make_one( + mock_appendable_writer["mock_client"], + writer_options={"FLUSH_INTERVAL_BYTES": _MAX_CHUNK_SIZE_BYTES - 1}, + ) + + def test_init_validation_multiple_raises(self, mock_appendable_writer): + with pytest.raises(exceptions.OutOfRange): + self._make_one( + mock_appendable_writer["mock_client"], + writer_options={"FLUSH_INTERVAL_BYTES": _MAX_CHUNK_SIZE_BYTES + 1}, + ) + + def test_init_raises_if_crc32c_missing(self, mock_appendable_writer): + with mock.patch( + "google.cloud.storage._experimental.asyncio._utils.google_crc32c" + ) as mock_crc: + mock_crc.implementation = "python" + with pytest.raises(exceptions.FailedPrecondition): + self._make_one(mock_appendable_writer["mock_client"]) + + # ------------------------------------------------------------------------- + # Stream Lifecycle Tests + # ------------------------------------------------------------------------- + + @pytest.mark.asyncio + async def test_state_lookup(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + + mock_appendable_writer[ + "mock_stream" + ].recv.return_value = storage_type.BidiWriteObjectResponse(persisted_size=100) + + size = await writer.state_lookup() + + mock_appendable_writer["mock_stream"].send.assert_awaited_once() + assert size == 100 + assert writer.persisted_size == 100 + + @pytest.mark.asyncio + async def test_open_success(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + mock_appendable_writer["mock_stream"].generation_number = 456 + mock_appendable_writer["mock_stream"].write_handle = b"new-h" + mock_appendable_writer["mock_stream"].persisted_size = 0 -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_flush(mock_write_object_stream, mock_client): - """Test that flush sends the correct request and updates state.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=1024) - ) - - persisted_size = await writer.flush() + await writer.open() - expected_request = _storage_v2.BidiWriteObjectRequest(flush=True, state_lookup=True) - mock_stream.send.assert_awaited_once_with(expected_request) - mock_stream.recv.assert_awaited_once() - assert writer.persisted_size == 1024 - assert writer.offset == 1024 - assert persisted_size == 1024 + assert writer._is_stream_open + assert writer.generation == 456 + assert writer.write_handle == b"new-h" + mock_appendable_writer["mock_stream"].open.assert_awaited_once() + + def test_on_open_error_redirection(self, mock_appendable_writer): + """Verify redirect info is extracted from helper.""" + writer = self._make_one(mock_appendable_writer["mock_client"]) + redirect = BidiWriteObjectRedirectedError( + routing_token="rt1", + write_handle=storage_type.BidiWriteHandle(handle=b"h1"), + generation=777, + ) + with mock.patch( + "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._extract_bidi_writes_redirect_proto", + return_value=redirect, + ): + writer._on_open_error(exceptions.Aborted("redirect")) + + assert writer._routing_token == "rt1" + assert writer.write_handle.handle == b"h1" + assert writer.generation == 777 + + # ------------------------------------------------------------------------- + # Append Tests + # ------------------------------------------------------------------------- + + @pytest.mark.asyncio + async def test_append_basic_success(self, mock_appendable_writer): + """Verify append orchestrates manager and drives the internal generator.""" + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + writer.persisted_size = 0 + + data = b"test-data" + + with mock.patch( + "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._BidiStreamRetryManager" + ) as MockManager: + + async def mock_execute(state, policy): + factory = MockManager.call_args[0][1] + dummy_reqs = [storage_type.BidiWriteObjectRequest()] + gen = factory(dummy_reqs, state) + + mock_appendable_writer["mock_stream"].recv.side_effect = [ + storage_type.BidiWriteObjectResponse( + persisted_size=len(data), + write_handle=storage_type.BidiWriteHandle(handle=b"h2"), + ), + None, + ] + async for _ in gen: + pass + + MockManager.return_value.execute.side_effect = mock_execute + await writer.append(data) + + assert writer.persisted_size == len(data) + sent_req = mock_appendable_writer["mock_stream"].send.call_args[0][0] + assert sent_req.state_lookup + assert sent_req.flush + + @pytest.mark.asyncio + async def test_append_recovery_reopens_stream(self, mock_appendable_writer): + """Verifies re-opening logic on retry.""" + writer = self._make_one( + mock_appendable_writer["mock_client"], write_handle=b"h1" + ) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + # Setup mock to allow close() call + mock_appendable_writer["mock_stream"].is_stream_open = True + + async def mock_open(metadata=None): + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + writer._is_stream_open = True + writer.persisted_size = 5 + writer.write_handle = b"h_recovered" + + with mock.patch.object( + writer, "open", side_effect=mock_open + ) as mock_writer_open: + with mock.patch( + "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._BidiStreamRetryManager" + ) as MockManager: + + async def mock_execute(state, policy): + factory = MockManager.call_args[0][1] + # Simulate Attempt 1 fail + gen1 = factory([], state) + try: + await gen1.__anext__() + except Exception: + pass + # Simulate Attempt 2 + gen2 = factory([], state) + mock_appendable_writer["mock_stream"].recv.return_value = None + async for _ in gen2: + pass + + MockManager.return_value.execute.side_effect = mock_execute + await writer.append(b"0123456789") + + mock_appendable_writer["mock_stream"].close.assert_awaited() + mock_writer_open.assert_awaited() + assert writer.persisted_size == 5 + + @pytest.mark.asyncio + async def test_append_unimplemented_string_raises(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + with pytest.raises(NotImplementedError): + await writer.append_from_string("test") + + # ------------------------------------------------------------------------- + # Flush, Close, Finalize + # ------------------------------------------------------------------------- + + @pytest.mark.asyncio + async def test_flush_resets_counters(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + writer.bytes_appended_since_last_flush = 100 + + mock_appendable_writer[ + "mock_stream" + ].recv.return_value = storage_type.BidiWriteObjectResponse(persisted_size=200) -@pytest.mark.asyncio -async def test_flush_without_open_raises_value_error(mock_client): - """Test that flush raises an error if the stream is not open.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, match="Stream is not open. Call open\\(\\) before flush\\(\\)." - ): await writer.flush() + assert writer.bytes_appended_since_last_flush == 0 + assert writer.persisted_size == 200 -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_simple_flush(mock_write_object_stream, mock_client): - """Test that flush sends the correct request and updates state.""" - # Arrange - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - - # Act - await writer.simple_flush() - - # Assert - mock_stream.send.assert_awaited_once_with( - _storage_v2.BidiWriteObjectRequest(flush=True) - ) + @pytest.mark.asyncio + async def test_simple_flush(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + writer.bytes_appended_since_last_flush = 50 - -@pytest.mark.asyncio -async def test_simple_flush_without_open_raises_value_error(mock_client): - """Test that flush raises an error if the stream is not open.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, - match="Stream is not open. Call open\\(\\) before simple_flush\\(\\).", - ): await writer.simple_flush() - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_close(mock_write_object_stream, mock_client): - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.offset = 1024 - writer.persisted_size = 1024 - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=1024) - ) - mock_stream.close = mock.AsyncMock() - writer.finalize = mock.AsyncMock() - - persisted_size = await writer.close() - - writer.finalize.assert_not_awaited() - mock_stream.close.assert_awaited_once() - assert writer.offset is None - assert persisted_size == 1024 - assert not writer.is_stream_open - - -@pytest.mark.asyncio -async def test_close_without_open_raises_value_error(mock_client): - """Test that close raises an error if the stream is not open.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, match="Stream is not open. Call open\\(\\) before close\\(\\)." - ): - await writer.close() - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_finalize_on_close(mock_write_object_stream, mock_client): - """Test close with finalizing.""" - # Arrange - mock_resource = _storage_v2.Object(name=OBJECT, bucket=BUCKET, size=2048) - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.offset = 1024 - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(resource=mock_resource) - ) - mock_stream.close = mock.AsyncMock() - - # Act - result = await writer.close(finalize_on_close=True) - - # Assert - mock_stream.close.assert_awaited_once() - assert not writer.is_stream_open - assert writer.offset is None - assert writer.object_resource == mock_resource - assert writer.persisted_size == 2048 - assert result == mock_resource - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_finalize(mock_write_object_stream, mock_client): - """Test that finalize sends the correct request and updates state.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - mock_resource = _storage_v2.Object(name=OBJECT, bucket=BUCKET, size=123) - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(resource=mock_resource) - ) - mock_stream.close = mock.AsyncMock() - - gcs_object = await writer.finalize() - - mock_stream.send.assert_awaited_once_with( - _storage_v2.BidiWriteObjectRequest(finish_write=True) - ) - mock_stream.recv.assert_awaited_once() - mock_stream.close.assert_awaited_once() - assert writer.object_resource == mock_resource - assert writer.persisted_size == 123 - assert gcs_object == mock_resource - assert not writer.is_stream_open - assert writer.offset is None - - -@pytest.mark.asyncio -async def test_finalize_without_open_raises_value_error(mock_client): - """Test that finalize raises an error if the stream is not open.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, match="Stream is not open. Call open\\(\\) before finalize\\(\\)." - ): - await writer.finalize() - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_append_raises_error_if_not_open(mock_write_object_stream, mock_client): - """Test that append raises an error if the stream is not open.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, match="Stream is not open. Call open\\(\\) before append\\(\\)." - ): - await writer.append(b"some data") - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_append_with_empty_data(mock_write_object_stream, mock_client): - """Test that append does nothing if data is empty.""" - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - - await writer.append(b"") - - mock_stream.send.assert_not_awaited() - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_append_sends_data_in_chunks(mock_write_object_stream, mock_client): - """Test that append sends data in chunks and updates offset.""" - from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( - _MAX_CHUNK_SIZE_BYTES, - ) - - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.persisted_size = 100 - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - - data = b"a" * (_MAX_CHUNK_SIZE_BYTES + 1) - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=100 + len(data)) - ) - - await writer.append(data) - - assert mock_stream.send.await_count == 2 - first_request = mock_stream.send.await_args_list[0].args[0] - second_request = mock_stream.send.await_args_list[1].args[0] - - # First chunk - assert first_request.write_offset == 100 - assert len(first_request.checksummed_data.content) == _MAX_CHUNK_SIZE_BYTES - assert first_request.checksummed_data.crc32c == int.from_bytes( - Checksum(data[:_MAX_CHUNK_SIZE_BYTES]).digest(), byteorder="big" - ) - assert not first_request.flush - assert not first_request.state_lookup - - # Second chunk (last chunk) - assert second_request.write_offset == 100 + _MAX_CHUNK_SIZE_BYTES - assert len(second_request.checksummed_data.content) == 1 - assert second_request.checksummed_data.crc32c == int.from_bytes( - Checksum(data[_MAX_CHUNK_SIZE_BYTES:]).digest(), byteorder="big" - ) - assert second_request.flush - assert second_request.state_lookup - - assert writer.offset == 100 + len(data) - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_append_flushes_when_buffer_is_full( - mock_write_object_stream, mock_client -): - """Test that append flushes the stream when the buffer size is reached.""" - - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.persisted_size = 0 - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock() - - data = b"a" * _DEFAULT_FLUSH_INTERVAL_BYTES - await writer.append(data) - - num_chunks = _DEFAULT_FLUSH_INTERVAL_BYTES // _MAX_CHUNK_SIZE_BYTES - assert mock_stream.send.await_count == num_chunks - - # All but the last request should not have flush or state_lookup set. - for i in range(num_chunks - 1): - request = mock_stream.send.await_args_list[i].args[0] - assert not request.flush - assert not request.state_lookup - - # The last request should have flush and state_lookup set. - last_request = mock_stream.send.await_args_list[-1].args[0] - assert last_request.flush - assert last_request.state_lookup - assert writer.bytes_appended_since_last_flush == 0 - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_append_handles_large_data(mock_write_object_stream, mock_client): - """Test that append handles data larger than the buffer size.""" - - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.persisted_size = 0 - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - mock_stream.recv = mock.AsyncMock() - - data = b"a" * (_DEFAULT_FLUSH_INTERVAL_BYTES * 2 + 1) - await writer.append(data) - - flushed_requests = [ - call.args[0] for call in mock_stream.send.await_args_list if call.args[0].flush - ] - assert len(flushed_requests) == 3 - - last_request = mock_stream.send.await_args_list[-1].args[0] - assert last_request.state_lookup - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" -) -async def test_append_data_two_times(mock_write_object_stream, mock_client): - """Test that append sends data correctly when called multiple times.""" - from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( - _MAX_CHUNK_SIZE_BYTES, - ) - - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.persisted_size = 0 - mock_stream = mock_write_object_stream.return_value - mock_stream.send = mock.AsyncMock() - - data1 = b"a" * (_MAX_CHUNK_SIZE_BYTES + 10) - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse(persisted_size=len(data1)) - ) - await writer.append(data1) - - assert mock_stream.send.await_count == 2 - last_request_data1 = mock_stream.send.await_args_list[-1].args[0] - assert last_request_data1.flush - assert last_request_data1.state_lookup - - data2 = b"b" * (_MAX_CHUNK_SIZE_BYTES + 20) - mock_stream.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse( - persisted_size=len(data2) + len(data1) + mock_appendable_writer["mock_stream"].send.assert_awaited_with( + storage_type.BidiWriteObjectRequest(flush=True) ) - ) - await writer.append(data2) - - assert mock_stream.send.await_count == 4 - last_request_data2 = mock_stream.send.await_args_list[-1].args[0] - assert last_request_data2.flush - assert last_request_data2.state_lookup - - total_data_length = len(data1) + len(data2) - assert writer.offset == total_data_length - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "file_size, block_size", - [ - (10, 4 * 1024), - (0, _DEFAULT_FLUSH_INTERVAL_BYTES), - (20 * 1024 * 1024, _DEFAULT_FLUSH_INTERVAL_BYTES), - (16 * 1024 * 1024, _DEFAULT_FLUSH_INTERVAL_BYTES), - ], -) -async def test_append_from_file(file_size, block_size, mock_client): - # arrange - fp = BytesIO(b"a" * file_size) - writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT) - writer._is_stream_open = True - writer.append = mock.AsyncMock() - - # act - await writer.append_from_file(fp, block_size=block_size) - - # assert - exepected_calls = ( - file_size // block_size - if file_size % block_size == 0 - else file_size // block_size + 1 - ) - assert writer.append.await_count == exepected_calls + assert writer.bytes_appended_since_last_flush == 0 + + @pytest.mark.asyncio + async def test_close_without_finalize(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + writer.persisted_size = 50 + + size = await writer.close() + + mock_appendable_writer["mock_stream"].close.assert_awaited() + assert not writer._is_stream_open + assert size == 50 + + @pytest.mark.asyncio + async def test_finalize_lifecycle(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.write_obj_stream = mock_appendable_writer["mock_stream"] + + resource = storage_type.Object(size=999) + mock_appendable_writer[ + "mock_stream" + ].recv.return_value = storage_type.BidiWriteObjectResponse(resource=resource) + + res = await writer.finalize() + + assert res == resource + assert writer.persisted_size == 999 + mock_appendable_writer["mock_stream"].send.assert_awaited_with( + storage_type.BidiWriteObjectRequest(finish_write=True) + ) + mock_appendable_writer["mock_stream"].close.assert_awaited() + assert not writer._is_stream_open + + @pytest.mark.asyncio + async def test_close_with_finalize_on_close(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.finalize = AsyncMock() + + await writer.close(finalize_on_close=True) + writer.finalize.assert_awaited_once() + + # ------------------------------------------------------------------------- + # Helper Tests + # ------------------------------------------------------------------------- + + @pytest.mark.asyncio + async def test_append_from_file(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + writer._is_stream_open = True + writer.append = AsyncMock() + + fp = io.BytesIO(b"a" * 12) + await writer.append_from_file(fp, block_size=4) + + assert writer.append.await_count == 3 + + @pytest.mark.asyncio + async def test_methods_require_open_stream_raises(self, mock_appendable_writer): + writer = self._make_one(mock_appendable_writer["mock_client"]) + methods = [ + writer.append(b"data"), + writer.flush(), + writer.simple_flush(), + writer.close(), + writer.finalize(), + writer.state_lookup(), + ] + for coro in methods: + with pytest.raises(ValueError, match="Stream is not open"): + await coro diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index 02b416f6b..f94729516 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest from unittest import mock from google.auth import credentials as auth_credentials from google.auth.credentials import AnonymousCredentials @@ -29,7 +28,7 @@ def _make_credentials(spec=None): return mock.Mock(spec=spec) -class TestAsyncGrpcClient(unittest.TestCase): +class TestAsyncGrpcClient: @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_constructor_default_options(self, mock_async_storage_client): # Arrange @@ -63,7 +62,6 @@ def test_constructor_default_options(self, mock_async_storage_client): @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_constructor_with_client_info(self, mock_async_storage_client): - mock_transport_cls = mock.MagicMock() mock_async_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials() @@ -86,7 +84,6 @@ def test_constructor_with_client_info(self, mock_async_storage_client): @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_constructor_disables_directpath(self, mock_async_storage_client): - mock_transport_cls = mock.MagicMock() mock_async_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials() @@ -108,7 +105,6 @@ def test_constructor_disables_directpath(self, mock_async_storage_client): @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_grpc_client_property(self, mock_grpc_gapic_client): - # Arrange mock_transport_cls = mock.MagicMock() mock_grpc_gapic_client.get_transport_class.return_value = mock_transport_cls @@ -152,11 +148,10 @@ def test_grpc_client_property(self, mock_grpc_gapic_client): client_info=mock_client_info, client_options=mock_client_options, ) - self.assertIs(retrieved_client, mock_grpc_gapic_client.return_value) + assert retrieved_client is mock_grpc_gapic_client.return_value @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): - # Arrange mock_transport_cls = mock.MagicMock() mock_grpc_gapic_client.get_transport_class.return_value = mock_transport_cls @@ -171,7 +166,7 @@ def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): retrieved_client = client.grpc_client # Assert - self.assertIs(retrieved_client, mock_grpc_gapic_client.return_value) + assert retrieved_client is mock_grpc_gapic_client.return_value primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() expected_options = (("grpc.primary_user_agent", primary_user_agent),) diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index 92ba2925a..7bfa2cea0 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import unittest.mock as mock +from unittest.mock import AsyncMock, MagicMock import pytest -from unittest import mock -from unittest.mock import AsyncMock from google.cloud.storage._experimental.asyncio.async_write_object_stream import ( _AsyncWriteObjectStream, ) @@ -24,562 +24,196 @@ BUCKET = "my-bucket" OBJECT = "my-object" GENERATION = 12345 -WRITE_HANDLE_BYTES = b"test-handle" -NEW_WRITE_HANDLE_BYTES = b"new-test-handle" -WRITE_HANDLE_PROTO = _storage_v2.BidiWriteHandle(handle=WRITE_HANDLE_BYTES) -NEW_WRITE_HANDLE_PROTO = _storage_v2.BidiWriteHandle(handle=NEW_WRITE_HANDLE_BYTES) +WRITE_HANDLE = b"test-handle" +FULL_BUCKET_PATH = f"projects/_/buckets/{BUCKET}" @pytest.fixture def mock_client(): - """Mock the async gRPC client.""" - mock_transport = mock.AsyncMock() + """Fixture to provide a mock gRPC client.""" + client = MagicMock() + # Mocking transport internal structures + mock_transport = MagicMock() mock_transport.bidi_write_object = mock.sentinel.bidi_write_object mock_transport._wrapped_methods = { mock.sentinel.bidi_write_object: mock.sentinel.wrapped_bidi_write_object } - - mock_gapic_client = mock.AsyncMock() - mock_gapic_client._transport = mock_transport - - client = mock.AsyncMock() - client._client = mock_gapic_client + client._client._transport = mock_transport return client -async def instantiate_write_obj_stream(mock_client, mock_cls_async_bidi_rpc, open=True): - """Helper to create an instance of _AsyncWriteObjectStream and open it by default.""" - socket_like_rpc = AsyncMock() - mock_cls_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.open = AsyncMock() - socket_like_rpc.send = AsyncMock() - socket_like_rpc.close = AsyncMock() - - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) - mock_response.resource.generation = GENERATION - mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE_PROTO - socket_like_rpc.recv = AsyncMock(return_value=mock_response) - - write_obj_stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - - if open: - await write_obj_stream.open() - - return write_obj_stream - - -def test_async_write_object_stream_init(mock_client): - """Test the constructor of _AsyncWriteObjectStream.""" - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - - assert stream.client == mock_client - assert stream.bucket_name == BUCKET - assert stream.object_name == OBJECT - assert stream.generation_number is None - assert stream.write_handle is None - assert stream._full_bucket_name == f"projects/_/buckets/{BUCKET}" - assert stream.rpc == mock.sentinel.wrapped_bidi_write_object - assert stream.metadata == ( - ("x-goog-request-params", f"bucket=projects/_/buckets/{BUCKET}"), - ) - assert stream.socket_like_rpc is None - assert not stream._is_stream_open - assert stream.first_bidi_write_req is None - assert stream.persisted_size == 0 - assert stream.object_resource is None - - -def test_async_write_object_stream_init_with_generation_and_handle(mock_client): - """Test the constructor with optional arguments.""" - stream = _AsyncWriteObjectStream( - mock_client, - BUCKET, - OBJECT, - generation_number=GENERATION, - write_handle=WRITE_HANDLE_PROTO, - ) - - assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE_PROTO - - -def test_async_write_object_stream_init_raises_value_error(): - """Test that the constructor raises ValueError for missing arguments.""" - with pytest.raises(ValueError, match="client must be provided"): - _AsyncWriteObjectStream(None, BUCKET, OBJECT) - - with pytest.raises(ValueError, match="bucket_name must be provided"): - _AsyncWriteObjectStream(mock.Mock(), None, OBJECT) - - with pytest.raises(ValueError, match="object_name must be provided"): - _AsyncWriteObjectStream(mock.Mock(), BUCKET, None) - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_for_new_object(mock_async_bidi_rpc, mock_client): - """Test opening a stream for a new object.""" - # Arrange - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.open = mock.AsyncMock() - - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) - mock_response.resource.generation = GENERATION - mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE_PROTO - socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) - - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - - # Act - await stream.open() - - # Assert - assert stream._is_stream_open - socket_like_rpc.open.assert_called_once() - socket_like_rpc.recv.assert_called_once() - assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE_PROTO - assert stream.persisted_size == 0 - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_for_new_object_with_generation_zero( - mock_async_bidi_rpc, mock_client -): - """Test opening a stream for a new object.""" - # Arrange - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.open = mock.AsyncMock() - - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) - mock_response.resource.generation = GENERATION - mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE_PROTO - socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) - - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT, generation_number=0) - - # Act - await stream.open() - - # Assert - mock_async_bidi_rpc.assert_called_once() - _, call_kwargs = mock_async_bidi_rpc.call_args - initial_request = call_kwargs["initial_request"] - assert initial_request.write_object_spec.if_generation_match == 0 - assert stream._is_stream_open - socket_like_rpc.open.assert_called_once() - socket_like_rpc.recv.assert_called_once() - assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE_PROTO - assert stream.persisted_size == 0 - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_for_existing_object(mock_async_bidi_rpc, mock_client): - """Test opening a stream for an existing object.""" - # Arrange - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.open = mock.AsyncMock() - - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) - mock_response.resource.size = 1024 - mock_response.resource.generation = GENERATION - mock_response.write_handle = WRITE_HANDLE_PROTO - socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) - - stream = _AsyncWriteObjectStream( - mock_client, BUCKET, OBJECT, generation_number=GENERATION - ) - - # Act - await stream.open() - - # Assert - assert stream._is_stream_open - socket_like_rpc.open.assert_called_once() - socket_like_rpc.recv.assert_called_once() - assert stream.generation_number == GENERATION - assert stream.write_handle == WRITE_HANDLE_PROTO - assert stream.persisted_size == 1024 - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_when_already_open_raises_error(mock_async_bidi_rpc, mock_client): - """Test that opening an already open stream raises a ValueError.""" - # Arrange - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.open = mock.AsyncMock() - - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) - mock_response.resource.generation = GENERATION - mock_response.resource.size = 0 - mock_response.write_handle = WRITE_HANDLE_PROTO - socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) - - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - await stream.open() - - # Act & Assert - with pytest.raises(ValueError, match="Stream is already open"): - await stream.open() - +class TestAsyncWriteObjectStream: + """Test suite for AsyncWriteObjectStream.""" -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_raises_error_on_missing_object_resource( - mock_async_bidi_rpc, mock_client -): - """Test that open raises ValueError if object_resource is not in the response.""" - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - - mock_reponse = mock.AsyncMock() - type(mock_reponse).resource = mock.PropertyMock(return_value=None) - socket_like_rpc.recv.return_value = mock_reponse - - # Note: Don't use below code as unittest library automatically assigns an - # `AsyncMock` object to an attribute, if not set. - # socket_like_rpc.recv.return_value = mock.AsyncMock( - # return_value=_storage_v2.BidiWriteObjectResponse(resource=None) - # ) - - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, match="Failed to obtain object resource after opening the stream" - ): - await stream.open() + # ------------------------------------------------------------------------- + # Initialization Tests + # ------------------------------------------------------------------------- - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_raises_error_on_missing_generation( - mock_async_bidi_rpc, mock_client -): - """Test that open raises ValueError if generation is not in the response.""" - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - - # Configure the mock response object - mock_response = mock.AsyncMock() - type(mock_response.resource).generation = mock.PropertyMock(return_value=None) - socket_like_rpc.recv.return_value = mock_response - - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - with pytest.raises( - ValueError, match="Failed to obtain object generation after opening the stream" - ): + def test_init_basic(self, mock_client): + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + assert stream.bucket_name == BUCKET + assert stream.object_name == OBJECT + assert stream._full_bucket_name == FULL_BUCKET_PATH + assert stream.metadata == ( + ("x-goog-request-params", f"bucket={FULL_BUCKET_PATH}"), + ) + assert not stream.is_stream_open + + def test_init_raises_value_error(self, mock_client): + with pytest.raises(ValueError, match="client must be provided"): + _AsyncWriteObjectStream(None, BUCKET, OBJECT) + with pytest.raises(ValueError, match="bucket_name must be provided"): + _AsyncWriteObjectStream(mock_client, None, OBJECT) + with pytest.raises(ValueError, match="object_name must be provided"): + _AsyncWriteObjectStream(mock_client, BUCKET, None) + + # ------------------------------------------------------------------------- + # Open Stream Tests + # ------------------------------------------------------------------------- + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + ) + @pytest.mark.asyncio + async def test_open_new_object(self, mock_rpc_cls, mock_client): + mock_rpc = mock_rpc_cls.return_value + mock_rpc.open = AsyncMock() + + # We don't use spec here to avoid descriptor issues with nested protos + mock_response = MagicMock() + mock_response.persisted_size = 0 + mock_response.resource.generation = GENERATION + mock_response.resource.size = 0 + mock_response.write_handle = WRITE_HANDLE + mock_rpc.recv = AsyncMock(return_value=mock_response) + + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) await stream.open() - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_raises_error_on_missing_write_handle( - mock_async_bidi_rpc, mock_client -): - """Test that open raises ValueError if write_handle is not in the response.""" - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.recv = mock.AsyncMock( - return_value=_storage_v2.BidiWriteObjectResponse( - resource=_storage_v2.Object(generation=GENERATION), write_handle=None + # Check if BidiRpc was initialized with WriteObjectSpec + call_args = mock_rpc_cls.call_args + initial_request = call_args.kwargs["initial_request"] + assert initial_request.write_object_spec is not None + assert initial_request.write_object_spec.resource.name == OBJECT + assert initial_request.write_object_spec.appendable + + assert stream.is_stream_open + assert stream.write_handle == WRITE_HANDLE + assert stream.generation_number == GENERATION + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + ) + @pytest.mark.asyncio + async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): + mock_rpc = mock_rpc_cls.return_value + mock_rpc.open = AsyncMock() + + # Ensure resource is None so persisted_size logic doesn't get overwritten by child mocks + mock_response = MagicMock() + mock_response.persisted_size = 1024 + mock_response.resource = None + mock_response.write_handle = WRITE_HANDLE + mock_rpc.recv = AsyncMock(return_value=mock_response) + + stream = _AsyncWriteObjectStream( + mock_client, + BUCKET, + OBJECT, + generation_number=GENERATION, + routing_token="token-123", ) - ) - stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) - with pytest.raises(ValueError, match="Failed to obtain write_handle"): await stream.open() - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_raises_error_on_missing_persisted_size_with_write_handle( - mock_async_bidi_rpc, mock_client -): - """Test that open raises ValueError if persisted_size is None when opened via write_handle.""" - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - - # - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.persisted_size = None # This is the key part of the test - mock_response.write_handle = ( - WRITE_HANDLE_PROTO # Ensure write_handle is present to avoid that error - ) - socket_like_rpc.recv.return_value = mock_response - - # ACT - stream = _AsyncWriteObjectStream( - mock_client, - BUCKET, - OBJECT, - write_handle=WRITE_HANDLE_PROTO, - generation_number=GENERATION, - ) - - with pytest.raises( - ValueError, - match="Failed to obtain persisted_size after opening the stream via write_handle", - ): + # Verify AppendObjectSpec attributes + initial_request = mock_rpc_cls.call_args.kwargs["initial_request"] + assert initial_request.append_object_spec is not None + assert initial_request.append_object_spec.generation == GENERATION + assert initial_request.append_object_spec.routing_token == "token-123" + assert stream.persisted_size == 1024 + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + ) + @pytest.mark.asyncio + async def test_open_metadata_merging(self, mock_rpc_cls, mock_client): + mock_rpc = mock_rpc_cls.return_value + mock_rpc.open = AsyncMock() + mock_rpc.recv = AsyncMock(return_value=MagicMock(resource=None)) + + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + extra_metadata = [("x-custom", "val"), ("x-goog-request-params", "extra=param")] + + await stream.open(metadata=extra_metadata) + + # Verify that metadata combined bucket and extra params + passed_metadata = mock_rpc_cls.call_args.kwargs["metadata"] + meta_dict = dict(passed_metadata) + assert meta_dict["x-custom"] == "val" + # Params should be comma separated + params = meta_dict["x-goog-request-params"] + assert f"bucket={FULL_BUCKET_PATH}" in params + assert "extra=param" in params + + @pytest.mark.asyncio + async def test_open_already_open_raises(self, mock_client): + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + stream._is_stream_open = True + with pytest.raises(ValueError, match="already open"): + await stream.open() + + # ------------------------------------------------------------------------- + # Send & Recv & Close Tests + # ------------------------------------------------------------------------- + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + ) + @pytest.mark.asyncio + async def test_send_and_recv_logic(self, mock_rpc_cls, mock_client): + # Setup open stream + mock_rpc = mock_rpc_cls.return_value + mock_rpc.open = AsyncMock() + mock_rpc.send = AsyncMock() # Crucial: Must be AsyncMock + mock_rpc.recv = AsyncMock(return_value=MagicMock(resource=None)) + + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) await stream.open() - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_close(mock_cls_async_bidi_rpc, mock_client): - """Test that close successfully closes the stream.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=True - ) - write_obj_stream.requests_done = AsyncMock() - - # Act - await write_obj_stream.close() - - # Assert - write_obj_stream.requests_done.assert_called_once() - write_obj_stream.socket_like_rpc.close.assert_called_once() - assert not write_obj_stream.is_stream_open - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_close_without_open_should_raise_error( - mock_cls_async_bidi_rpc, mock_client -): - """Test that closing a stream that is not open raises a ValueError.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=False - ) - - # Act & Assert - with pytest.raises(ValueError, match="Stream is not open"): - await write_obj_stream.close() - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_send(mock_cls_async_bidi_rpc, mock_client): - """Test that send calls the underlying rpc's send method.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=True - ) - - # Act - bidi_write_object_request = _storage_v2.BidiWriteObjectRequest() - await write_obj_stream.send(bidi_write_object_request) - - # Assert - write_obj_stream.socket_like_rpc.send.assert_called_once_with( - bidi_write_object_request - ) - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_send_without_open_should_raise_error( - mock_cls_async_bidi_rpc, mock_client -): - """Test that sending on a stream that is not open raises a ValueError.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=False - ) - - # Act & Assert - with pytest.raises(ValueError, match="Stream is not open"): - await write_obj_stream.send(_storage_v2.BidiWriteObjectRequest()) - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_recv(mock_cls_async_bidi_rpc, mock_client): - """Test that recv calls the underlying rpc's recv method.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=True - ) - bidi_write_object_response = _storage_v2.BidiWriteObjectResponse() - write_obj_stream.socket_like_rpc.recv = AsyncMock( - return_value=bidi_write_object_response - ) - - # Act - response = await write_obj_stream.recv() - - # Assert - write_obj_stream.socket_like_rpc.recv.assert_called_once() - assert response == bidi_write_object_response - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_recv_without_open_should_raise_error( - mock_cls_async_bidi_rpc, mock_client -): - """Test that receiving on a stream that is not open raises a ValueError.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=False - ) - - # Act & Assert - with pytest.raises(ValueError, match="Stream is not open"): - await write_obj_stream.recv() - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_requests_done(mock_cls_async_bidi_rpc, mock_client): - """Test that requests_done signals the end of requests.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=True - ) - write_obj_stream.socket_like_rpc.send = AsyncMock() - write_obj_stream.socket_like_rpc.recv = AsyncMock() - - # Act - await write_obj_stream.requests_done() - - # Assert - write_obj_stream.socket_like_rpc.send.assert_called_once_with(None) - write_obj_stream.socket_like_rpc.recv.assert_called_once() - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_open_for_existing_object_with_none_size( - mock_async_bidi_rpc, mock_client -): - """Test opening a stream for an existing object where size is None.""" - # Arrange - socket_like_rpc = mock.AsyncMock() - mock_async_bidi_rpc.return_value = socket_like_rpc - socket_like_rpc.open = mock.AsyncMock() - - mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse) - mock_response.resource = mock.MagicMock(spec=_storage_v2.Object) - mock_response.resource.size = None - mock_response.resource.generation = GENERATION - mock_response.write_handle = WRITE_HANDLE_PROTO - socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response) - - stream = _AsyncWriteObjectStream( - mock_client, BUCKET, OBJECT, generation_number=GENERATION - ) - - # Act - await stream.open() - - # Assert - assert stream.persisted_size == 0 - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_recv_updates_write_handle(mock_cls_async_bidi_rpc, mock_client): - """Test that recv updates the write_handle if present in the response.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=True - ) - - assert write_obj_stream.write_handle == WRITE_HANDLE_PROTO # Initial handle - - # GCS can periodicallly update write handle in their responses. - bidi_write_object_response = _storage_v2.BidiWriteObjectResponse( - write_handle=NEW_WRITE_HANDLE_PROTO - ) - write_obj_stream.socket_like_rpc.recv = AsyncMock( - return_value=bidi_write_object_response - ) - - # Act - response = await write_obj_stream.recv() - - # Assert - write_obj_stream.socket_like_rpc.recv.assert_called_once() - assert response == bidi_write_object_response - # asserts that new write handle has been updated. - assert write_obj_stream.write_handle == NEW_WRITE_HANDLE_PROTO - - -@pytest.mark.asyncio -@mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" -) -async def test_requests_done_updates_write_handle(mock_cls_async_bidi_rpc, mock_client): - """Test that requests_done updates the write_handle if present in the response.""" - # Arrange - write_obj_stream = await instantiate_write_obj_stream( - mock_client, mock_cls_async_bidi_rpc, open=True - ) - assert write_obj_stream.write_handle == WRITE_HANDLE_PROTO # Initial handle - - # new_write_handle = b"new-test-handle" - bidi_write_object_response = _storage_v2.BidiWriteObjectResponse( - write_handle=NEW_WRITE_HANDLE_PROTO - ) - write_obj_stream.socket_like_rpc.send = AsyncMock() - write_obj_stream.socket_like_rpc.recv = AsyncMock( - return_value=bidi_write_object_response - ) - - # Act - await write_obj_stream.requests_done() - - # Assert - write_obj_stream.socket_like_rpc.send.assert_called_once_with(None) - write_obj_stream.socket_like_rpc.recv.assert_called_once() - assert write_obj_stream.write_handle == NEW_WRITE_HANDLE_PROTO + # Test Send + req = _storage_v2.BidiWriteObjectRequest(write_offset=0) + await stream.send(req) + mock_rpc.send.assert_awaited_with(req) + + # Test Recv with state update + mock_response = MagicMock() + mock_response.persisted_size = 5000 + mock_response.write_handle = b"new-handle" + mock_response.resource = None + mock_rpc.recv.return_value = mock_response + + res = await stream.recv() + assert res.persisted_size == 5000 + assert stream.persisted_size == 5000 + assert stream.write_handle == b"new-handle" + + @pytest.mark.asyncio + async def test_close_success(self, mock_client): + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + stream._is_stream_open = True + stream.socket_like_rpc = AsyncMock() + stream.socket_like_rpc.close = AsyncMock() + + await stream.close() + stream.socket_like_rpc.close.assert_awaited_once() + assert not stream.is_stream_open + + @pytest.mark.asyncio + async def test_methods_require_open_raises(self, mock_client): + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + with pytest.raises(ValueError, match="Stream is not open"): + await stream.send(MagicMock()) + with pytest.raises(ValueError, match="Stream is not open"): + await stream.recv() + with pytest.raises(ValueError, match="Stream is not open"): + await stream.close() From 5ac2808a69195c688ed42c3604d4bfadbb602a66 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 28 Jan 2026 15:36:37 +0530 Subject: [PATCH 12/23] feat: add context manager to mrd (#1724) feat: add context manager to mrd --- .../asyncio/async_multi_range_downloader.py | 10 +++++ tests/system/test_zonal.py | 14 +++---- .../test_async_multi_range_downloader.py | 41 +++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 1d1c63efd..5981ff7fc 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -209,6 +209,16 @@ def __init__( self._download_ranges_id_to_pending_read_ids = {} self.persisted_size: Optional[int] = None # updated after opening the stream + async def __aenter__(self): + """Opens the underlying bidi-gRPC connection to read from the object.""" + await self.open() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Closes the underlying bidi-gRPC connection.""" + if self.is_stream_open: + await self.close() + def _on_open_error(self, exc): """Extracts routing token and read handle on redirect error during open.""" routing_token, read_handle = _handle_redirect(exc) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 40c84a2fb..8019156dd 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -116,19 +116,19 @@ async def _run(): assert object_metadata.size == object_size assert int(object_metadata.checksums.crc32c) == object_checksum - mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name) buffer = BytesIO() - await mrd.open() - # (0, 0) means read the whole object - await mrd.download_ranges([(0, 0, buffer)]) - await mrd.close() + async with AsyncMultiRangeDownloader( + grpc_client, _ZONAL_BUCKET, object_name + ) as mrd: + # (0, 0) means read the whole object + await mrd.download_ranges([(0, 0, buffer)]) + assert mrd.persisted_size == object_size + assert buffer.getvalue() == object_data - assert mrd.persisted_size == object_size # Clean up; use json client (i.e. `storage_client` fixture) to delete. blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) del writer - del mrd gc.collect() event_loop.run_until_complete(_run()) diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 2f0600f8d..5a8d6c6f1 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -401,3 +401,44 @@ async def test_download_ranges_raises_on_checksum_mismatch( assert "Checksum mismatch" in str(exc_info.value) mock_checksum_class.assert_called_once_with(test_data) + + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", + new_callable=AsyncMock, + ) + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.close", + new_callable=AsyncMock, + ) + @mock.patch( + "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + ) + @pytest.mark.asyncio + async def test_async_context_manager_calls_open_and_close( + self, mock_grpc_client, mock_close, mock_open + ): + # Arrange + mrd = AsyncMultiRangeDownloader( + mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + ) + + # To simulate the behavior of open and close changing the stream state + async def open_side_effect(): + mrd._is_stream_open = True + + async def close_side_effect(): + mrd._is_stream_open = False + + mock_open.side_effect = open_side_effect + mock_close.side_effect = close_side_effect + mrd._is_stream_open = False + + # Act + async with mrd as downloader: + # Assert + mock_open.assert_called_once() + assert downloader == mrd + assert mrd.is_stream_open + + mock_close.assert_called_once() + assert not mrd.is_stream_open From 721ea2dd6c6db2aa91fd3b90e56a831aaaa64061 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 28 Jan 2026 16:35:32 +0530 Subject: [PATCH 13/23] fix: instance grpc client once per process in benchmarks (#1725) fix: instance grpc client once per process in benchmarks. Other Changes * One gRPC client per process * One async io event loop per process * One process pool per benchmark. Fixes b/474536663 , b/473669384 --- .../perf/microbenchmarks/reads/test_reads.py | 63 ++++++++++--------- tests/system/test_zonal.py | 2 +- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/tests/perf/microbenchmarks/reads/test_reads.py b/tests/perf/microbenchmarks/reads/test_reads.py index 13a0a49b3..324938e94 100644 --- a/tests/perf/microbenchmarks/reads/test_reads.py +++ b/tests/perf/microbenchmarks/reads/test_reads.py @@ -300,44 +300,46 @@ def target_wrapper(*args, **kwargs): ) -def _download_files_worker(files_to_download, other_params, chunks, bucket_type): - # For regional buckets, a new client must be created for each process. - # For zonal, the same is done for consistency. +# --- Global Variables for Worker Process --- +worker_loop = None +worker_client = None +worker_json_client = None + + +def _worker_init(bucket_type): + """Initializes a persistent event loop and client for each worker process.""" + global worker_loop, worker_client, worker_json_client if bucket_type == "zonal": - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - client = loop.run_until_complete(create_client()) - try: - # download_files_using_mrd_multi_coro returns max latency of coros - result = download_files_using_mrd_multi_coro( - loop, client, files_to_download, other_params, chunks - ) - finally: - tasks = asyncio.all_tasks(loop=loop) - for task in tasks: - task.cancel() - loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True)) - loop.close() - return result + worker_loop = asyncio.new_event_loop() + asyncio.set_event_loop(worker_loop) + worker_client = worker_loop.run_until_complete(create_client()) else: # regional from google.cloud import storage - json_client = storage.Client() + worker_json_client = storage.Client() + + +def _download_files_worker(files_to_download, other_params, chunks, bucket_type): + if bucket_type == "zonal": + # The loop and client are already initialized in _worker_init. + # download_files_using_mrd_multi_coro returns max latency of coros + return download_files_using_mrd_multi_coro( + worker_loop, worker_client, files_to_download, other_params, chunks + ) + else: # regional # download_files_using_json_multi_threaded returns max latency of threads return download_files_using_json_multi_threaded( - None, json_client, files_to_download, other_params, chunks + None, worker_json_client, files_to_download, other_params, chunks ) -def download_files_mp_mc_wrapper(files_names, params, chunks, bucket_type): - num_processes = params.num_processes +def download_files_mp_mc_wrapper(pool, files_names, params, chunks, bucket_type): num_coros = params.num_coros # This is n, number of files per process # Distribute filenames to processes filenames_per_process = [ files_names[i : i + num_coros] for i in range(0, len(files_names), num_coros) ] - args = [ ( filenames, @@ -348,10 +350,7 @@ def download_files_mp_mc_wrapper(files_names, params, chunks, bucket_type): for filenames in filenames_per_process ] - ctx = multiprocessing.get_context("spawn") - with ctx.Pool(processes=num_processes) as pool: - results = pool.starmap(_download_files_worker, args) - + results = pool.starmap(_download_files_worker, args) return max(results) @@ -386,10 +385,16 @@ def test_downloads_multi_proc_multi_coro( logging.info("randomizing chunks") random.shuffle(chunks) + ctx = multiprocessing.get_context("spawn") + pool = ctx.Pool( + processes=params.num_processes, + initializer=_worker_init, + initargs=(params.bucket_type,), + ) output_times = [] def target_wrapper(*args, **kwargs): - result = download_files_mp_mc_wrapper(*args, **kwargs) + result = download_files_mp_mc_wrapper(pool, *args, **kwargs) output_times.append(result) return output_times @@ -407,6 +412,8 @@ def target_wrapper(*args, **kwargs): ), ) finally: + pool.close() + pool.join() publish_benchmark_extra_info(benchmark, params, true_times=output_times) publish_resource_metrics(benchmark, m) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 8019156dd..eb9df582c 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -264,7 +264,7 @@ async def _run(): event_loop.run_until_complete(_run()) - +@pytest.mark.skip(reason='Flaky test b/478129078') def test_mrd_open_with_read_handle(event_loop, grpc_client_direct): object_name = f"test_read_handl-{str(uuid.uuid4())[:4]}" From 8e21a7fe54d0a043f31937671003630a1985a5d2 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 28 Jan 2026 16:54:59 +0530 Subject: [PATCH 14/23] feat: expose finalized_time in blob.py applicable for GET_OBJECT in ZB (#1719) feat: expose finalized_time in blob.py applicable for GET_OBJECT in ZB --- google/cloud/storage/blob.py | 13 +++++++++++++ samples/snippets/storage_get_metadata.py | 1 + tests/unit/test_blob.py | 23 +++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 746334d1c..0b022985f 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -5034,6 +5034,19 @@ def hard_delete_time(self): if hard_delete_time is not None: return _rfc3339_nanos_to_datetime(hard_delete_time) + @property + def finalized_time(self): + """If this object has been soft-deleted, returns the time at which it will be permanently deleted. + + :rtype: :class:`datetime.datetime` or ``NoneType`` + :returns: + (readonly) The time that the object will be permanently deleted. + Note this property is only set for soft-deleted objects. + """ + finalize_time = self._properties.get("finalizedTime", None) + if finalize_time is not None: + return _rfc3339_nanos_to_datetime(finalize_time) + def _get_host_name(connection): """Returns the host name from the given connection. diff --git a/samples/snippets/storage_get_metadata.py b/samples/snippets/storage_get_metadata.py index 7216efdb4..1e332b445 100644 --- a/samples/snippets/storage_get_metadata.py +++ b/samples/snippets/storage_get_metadata.py @@ -34,6 +34,7 @@ def blob_metadata(bucket_name, blob_name): blob = bucket.get_blob(blob_name) print(f"Blob: {blob.name}") + print(f"Blob finalization: {blob.finalized_time}") print(f"Bucket: {blob.bucket.name}") print(f"Storage class: {blob.storage_class}") print(f"ID: {blob.id}") diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index a8abb1571..2359de501 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -228,6 +228,29 @@ def test__set_properties_w_kms_key_name(self): ) self._set_properties_helper(kms_key_name=kms_resource) + def test_finalized_time_property_is_none(self): + BLOB_NAME = "blob-name" + bucket = _Bucket() + blob = self._make_one(BLOB_NAME, bucket=bucket) + self.assertIsNone(blob.finalized_time) + + def test_finalized_time_property_is_not_none(self): + from google.cloud.storage import blob as blob_module + + BLOB_NAME = "blob-name" + bucket = _Bucket() + blob = self._make_one(BLOB_NAME, bucket=bucket) + + timestamp = "2024-07-29T12:34:56.123456Z" + blob._properties["finalizedTime"] = timestamp + + mock_datetime = mock.Mock() + with mock.patch.object( + blob_module, "_rfc3339_nanos_to_datetime", return_value=mock_datetime + ) as mocked: + self.assertEqual(blob.finalized_time, mock_datetime) + mocked.assert_called_once_with(timestamp) + def test_chunk_size_ctor(self): from google.cloud.storage.blob import Blob From e730bf50c4584f737ab86b2e409ddb27b40d2cec Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Wed, 28 Jan 2026 19:21:08 +0530 Subject: [PATCH 15/23] fix!: Change contructors of MRD and AAOW AsyncGrpcClient.grpc_client to AsyncGrpcClient (#1727) Change contructors of MRD and AAOW `AsyncGrpcClient.grpc_client` to `AsyncGrpcClient`. This will help us extend multiple methods in AsyncGrpcClient in future. More details in b/479030029 --- .../asyncio/async_appendable_object_writer.py | 6 +- .../asyncio/async_multi_range_downloader.py | 12 +- ...rage_create_and_write_appendable_object.py | 2 +- ...orage_finalize_appendable_object_upload.py | 2 +- ...orage_open_multiple_objects_ranged_read.py | 2 +- ...torage_open_object_multiple_ranged_read.py | 2 +- .../storage_open_object_read_full_object.py | 2 +- .../storage_open_object_single_ranged_read.py | 2 +- ...rage_pause_and_resume_appendable_upload.py | 2 +- .../storage_read_appendable_object_tail.py | 2 +- .../zonal_buckets/zonal_snippets_test.py | 2 +- tests/perf/microbenchmarks/conftest.py | 2 +- .../perf/microbenchmarks/reads/test_reads.py | 2 +- .../microbenchmarks/writes/test_writes.py | 2 +- tests/system/test_zonal.py | 2 +- .../test_async_appendable_object_writer.py | 3 +- .../test_async_multi_range_downloader.py | 109 ++++++------------ 17 files changed, 60 insertions(+), 96 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index 7795a2ac5..1cc099043 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -114,7 +114,7 @@ class AsyncAppendableObjectWriter: def __init__( self, - client: AsyncGrpcClient.grpc_client, + client: AsyncGrpcClient, bucket_name: str, object_name: str, generation: Optional[int] = None, @@ -155,7 +155,7 @@ def __init__( await writer.close() ``` - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` :param client: async grpc client to use for making API requests. :type bucket_name: str @@ -304,7 +304,7 @@ async def _do_open(): self._is_stream_open = False self.write_obj_stream = _AsyncWriteObjectStream( - client=self.client, + client=self.client.grpc_client, bucket_name=self.bucket_name, object_name=self.object_name, generation_number=self.generation, diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 5981ff7fc..340f087da 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -125,7 +125,7 @@ class AsyncMultiRangeDownloader: @classmethod async def create_mrd( cls, - client: AsyncGrpcClient.grpc_client, + client: AsyncGrpcClient, bucket_name: str, object_name: str, generation_number: Optional[int] = None, @@ -136,7 +136,7 @@ async def create_mrd( """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC object for reading. - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` :param client: The asynchronous client to use for making API requests. :type bucket_name: str @@ -168,7 +168,7 @@ async def create_mrd( def __init__( self, - client: AsyncGrpcClient.grpc_client, + client: AsyncGrpcClient, bucket_name: str, object_name: str, generation_number: Optional[int] = None, @@ -177,7 +177,7 @@ def __init__( """Constructor for AsyncMultiRangeDownloader, clients are not adviced to use it directly. Instead it's adviced to use the classmethod `create_mrd`. - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` :param client: The asynchronous client to use for making API requests. :type bucket_name: str @@ -273,7 +273,7 @@ async def _do_open(): self._is_stream_open = False self.read_obj_str = _AsyncReadObjectStream( - client=self.client, + client=self.client.grpc_client, bucket_name=self.bucket_name, object_name=self.object_name, generation_number=self.generation_number, @@ -432,7 +432,7 @@ async def generator(): # Re-initialize stream self.read_obj_str = _AsyncReadObjectStream( - client=self.client, + client=self.client.grpc_client, bucket_name=self.bucket_name, object_name=self.object_name, generation_number=self.generation_number, diff --git a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py index 87da6cfdd..f00a6ba80 100644 --- a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py +++ b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py @@ -35,7 +35,7 @@ async def storage_create_and_write_appendable_object( """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() writer = AsyncAppendableObjectWriter( client=grpc_client, bucket_name=bucket_name, diff --git a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py index 4a4b64289..971658997 100644 --- a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py +++ b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py @@ -33,7 +33,7 @@ async def storage_finalize_appendable_object_upload( """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() writer = AsyncAppendableObjectWriter( client=grpc_client, bucket_name=bucket_name, diff --git a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py index c22023a5c..ce2ba678b 100644 --- a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py @@ -36,7 +36,7 @@ async def storage_open_multiple_objects_ranged_read( grpc_client: an existing grpc_client to use, this is only for testing. """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() async def _download_range(object_name): """Helper coroutine to download a range from a single object.""" diff --git a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py index 55d92d9da..02c3bace5 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py @@ -33,7 +33,7 @@ async def storage_open_object_multiple_ranged_read( grpc_client: an existing grpc_client to use, this is only for testing. """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) diff --git a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py index 044c42b71..b4cad6718 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py +++ b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py @@ -33,7 +33,7 @@ async def storage_open_object_read_full_object( grpc_client: an existing grpc_client to use, this is only for testing. """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() # mrd = Multi-Range-Downloader mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) diff --git a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py index 41effce36..b013cc938 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py @@ -33,7 +33,7 @@ async def storage_open_object_single_ranged_read( grpc_client: an existing grpc_client to use, this is only for testing. """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() mrd = AsyncMultiRangeDownloader(grpc_client, bucket_name, object_name) diff --git a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py index db54bb821..3fb17ceae 100644 --- a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py +++ b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py @@ -32,7 +32,7 @@ async def storage_pause_and_resume_appendable_upload( grpc_client: an existing grpc_client to use, this is only for testing. """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() writer1 = AsyncAppendableObjectWriter( client=grpc_client, diff --git a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py index 587513056..1134f28d6 100644 --- a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py +++ b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py @@ -90,7 +90,7 @@ async def read_appendable_object_tail( grpc_client: an existing grpc_client to use, this is only for testing. """ if grpc_client is None: - grpc_client = AsyncGrpcClient().grpc_client + grpc_client = AsyncGrpcClient() writer = AsyncAppendableObjectWriter( client=grpc_client, bucket_name=bucket_name, diff --git a/samples/snippets/zonal_buckets/zonal_snippets_test.py b/samples/snippets/zonal_buckets/zonal_snippets_test.py index e45d21c47..736576eb5 100644 --- a/samples/snippets/zonal_buckets/zonal_snippets_test.py +++ b/samples/snippets/zonal_buckets/zonal_snippets_test.py @@ -48,7 +48,7 @@ async def create_async_grpc_client(): """Initializes async client and gets the current event loop.""" - return AsyncGrpcClient().grpc_client + return AsyncGrpcClient() # Forcing a single event loop for the whole test session diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py index 8caf4d656..c09e9ce93 100644 --- a/tests/perf/microbenchmarks/conftest.py +++ b/tests/perf/microbenchmarks/conftest.py @@ -78,7 +78,7 @@ async def upload_appendable_object(bucket_name, object_name, object_size, chunk_ # this method is to write "appendable" objects which will be used for # benchmarking reads, hence not concerned performance of writes here. writer = AsyncAppendableObjectWriter( - AsyncGrpcClient().grpc_client, + AsyncGrpcClient(), bucket_name, object_name, writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024**2}, diff --git a/tests/perf/microbenchmarks/reads/test_reads.py b/tests/perf/microbenchmarks/reads/test_reads.py index 324938e94..2b5f80d42 100644 --- a/tests/perf/microbenchmarks/reads/test_reads.py +++ b/tests/perf/microbenchmarks/reads/test_reads.py @@ -47,7 +47,7 @@ async def create_client(): """Initializes async client and gets the current event loop.""" - return AsyncGrpcClient().grpc_client + return AsyncGrpcClient() async def download_chunks_using_mrd_async(client, filename, other_params, chunks): diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index d952b1f87..5648ada0d 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -49,7 +49,7 @@ async def create_client(): """Initializes async client and gets the current event loop.""" - return AsyncGrpcClient().grpc_client + return AsyncGrpcClient() async def upload_chunks_using_grpc_async(client, filename, other_params): diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index eb9df582c..4d46353e2 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -36,7 +36,7 @@ async def create_async_grpc_client(attempt_direct_path=True): """Initializes async client and gets the current event loop.""" - return AsyncGrpcClient(attempt_direct_path=attempt_direct_path).grpc_client + return AsyncGrpcClient(attempt_direct_path=attempt_direct_path) @pytest.fixture(scope="session") diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py index abb26be7a..88f4864de 100644 --- a/tests/unit/asyncio/test_async_appendable_object_writer.py +++ b/tests/unit/asyncio/test_async_appendable_object_writer.py @@ -87,7 +87,8 @@ def test_non_retryable_errors(self, mock_appendable_writer): @pytest.fixture def mock_appendable_writer(): """Fixture to provide a mock AsyncAppendableObjectWriter setup.""" - mock_client = mock.AsyncMock() + mock_client = mock.MagicMock() + mock_client.grpc_client = mock.AsyncMock() # Internal stream class patch stream_patcher = mock.patch( "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 5a8d6c6f1..09cf0fa09 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -46,13 +46,15 @@ def create_read_ranges(self, num_ranges): @pytest.mark.asyncio async def _make_mock_mrd( self, - mock_grpc_client, mock_cls_async_read_object_stream, bucket_name=_TEST_BUCKET_NAME, object_name=_TEST_OBJECT_NAME, generation_number=_TEST_GENERATION_NUMBER, read_handle=_TEST_READ_HANDLE, ): + mock_client = mock.MagicMock() + mock_client.grpc_client = mock.AsyncMock() + mock_stream = mock_cls_async_read_object_stream.return_value mock_stream.open = AsyncMock() mock_stream.generation_number = _TEST_GENERATION_NUMBER @@ -60,29 +62,22 @@ async def _make_mock_mrd( mock_stream.read_handle = _TEST_READ_HANDLE mrd = await AsyncMultiRangeDownloader.create_mrd( - mock_grpc_client, bucket_name, object_name, generation_number, read_handle + mock_client, bucket_name, object_name, generation_number, read_handle ) - return mrd + return mrd, mock_client @mock.patch( "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio - async def test_create_mrd( - self, mock_grpc_client, mock_cls_async_read_object_stream - ): + async def test_create_mrd(self, mock_cls_async_read_object_stream): # Arrange & Act - mrd = await self._make_mock_mrd( - mock_grpc_client, mock_cls_async_read_object_stream - ) + mrd, mock_client = await self._make_mock_mrd(mock_cls_async_read_object_stream) # Assert mock_cls_async_read_object_stream.assert_called_once_with( - client=mock_grpc_client, + client=mock_client.grpc_client, bucket_name=_TEST_BUCKET_NAME, object_name=_TEST_OBJECT_NAME, generation_number=_TEST_GENERATION_NUMBER, @@ -91,7 +86,7 @@ async def test_create_mrd( mrd.read_obj_str.open.assert_called_once() - assert mrd.client == mock_grpc_client + assert mrd.client == mock_client assert mrd.bucket_name == _TEST_BUCKET_NAME assert mrd.object_name == _TEST_OBJECT_NAME assert mrd.generation_number == _TEST_GENERATION_NUMBER @@ -105,12 +100,9 @@ async def test_create_mrd( @mock.patch( "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio async def test_download_ranges_via_async_gather( - self, mock_grpc_client, mock_cls_async_read_object_stream, mock_random_int + self, mock_cls_async_read_object_stream, mock_random_int ): # Arrange data = b"these_are_18_chars" @@ -120,9 +112,7 @@ async def test_download_ranges_via_async_gather( Checksum(data[10:16]).digest(), "big" ) - mock_mrd = await self._make_mock_mrd( - mock_grpc_client, mock_cls_async_read_object_stream - ) + mock_mrd, _ = await self._make_mock_mrd(mock_cls_async_read_object_stream) mock_random_int.side_effect = [456, 91011] @@ -181,21 +171,16 @@ async def test_download_ranges_via_async_gather( @mock.patch( "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio async def test_download_ranges( - self, mock_grpc_client, mock_cls_async_read_object_stream, mock_random_int + self, mock_cls_async_read_object_stream, mock_random_int ): # Arrange data = b"these_are_18_chars" crc32c = Checksum(data).digest() crc32c_int = int.from_bytes(crc32c, "big") - mock_mrd = await self._make_mock_mrd( - mock_grpc_client, mock_cls_async_read_object_stream - ) + mock_mrd, _ = await self._make_mock_mrd(mock_cls_async_read_object_stream) mock_random_int.side_effect = [456] @@ -232,16 +217,12 @@ async def test_download_ranges( ) assert buffer.getvalue() == data - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio - async def test_downloading_ranges_with_more_than_1000_should_throw_error( - self, mock_grpc_client - ): + async def test_downloading_ranges_with_more_than_1000_should_throw_error(self): # Arrange + mock_client = mock.MagicMock() mrd = AsyncMultiRangeDownloader( - mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + mock_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME ) # Act + Assert @@ -257,16 +238,13 @@ async def test_downloading_ranges_with_more_than_1000_should_throw_error( @mock.patch( "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio async def test_opening_mrd_more_than_once_should_throw_error( - self, mock_grpc_client, mock_cls_async_read_object_stream + self, mock_cls_async_read_object_stream ): # Arrange - mrd = await self._make_mock_mrd( - mock_grpc_client, mock_cls_async_read_object_stream + mrd, _ = await self._make_mock_mrd( + mock_cls_async_read_object_stream ) # mock mrd is already opened # Act + Assert @@ -279,14 +257,11 @@ async def test_opening_mrd_more_than_once_should_throw_error( @mock.patch( "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio - async def test_close_mrd(self, mock_grpc_client, mock_cls_async_read_object_stream): + async def test_close_mrd(self, mock_cls_async_read_object_stream): # Arrange - mrd = await self._make_mock_mrd( - mock_grpc_client, mock_cls_async_read_object_stream + mrd, _ = await self._make_mock_mrd( + mock_cls_async_read_object_stream ) # mock mrd is already opened mrd.read_obj_str.close = AsyncMock() @@ -296,14 +271,12 @@ async def test_close_mrd(self, mock_grpc_client, mock_cls_async_read_object_stre # Assert assert not mrd.is_stream_open - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio - async def test_close_mrd_not_opened_should_throw_error(self, mock_grpc_client): + async def test_close_mrd_not_opened_should_throw_error(self): # Arrange + mock_client = mock.MagicMock() mrd = AsyncMultiRangeDownloader( - mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + mock_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME ) # Act + Assert with pytest.raises(ValueError) as exc: @@ -313,16 +286,12 @@ async def test_close_mrd_not_opened_should_throw_error(self, mock_grpc_client): assert str(exc.value) == "Underlying bidi-gRPC stream is not open" assert not mrd.is_stream_open - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio - async def test_downloading_without_opening_should_throw_error( - self, mock_grpc_client - ): + async def test_downloading_without_opening_should_throw_error(self): # Arrange + mock_client = mock.MagicMock() mrd = AsyncMultiRangeDownloader( - mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + mock_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME ) # Act + Assert @@ -334,16 +303,14 @@ async def test_downloading_without_opening_should_throw_error( assert not mrd.is_stream_open @mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c") - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) def test_init_raises_if_crc32c_c_extension_is_missing( - self, mock_grpc_client, mock_google_crc32c + self, mock_google_crc32c ): mock_google_crc32c.implementation = "python" + mock_client = mock.MagicMock() with pytest.raises(exceptions.FailedPrecondition) as exc_info: - AsyncMultiRangeDownloader(mock_grpc_client, "bucket", "object") + AsyncMultiRangeDownloader(mock_client, "bucket", "object") assert "The google-crc32c package is not installed with C support" in str( exc_info.value @@ -353,16 +320,14 @@ def test_init_raises_if_crc32c_c_extension_is_missing( @mock.patch( "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy.Checksum" ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) async def test_download_ranges_raises_on_checksum_mismatch( - self, mock_client, mock_checksum_class + self, mock_checksum_class ): from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) + mock_client = mock.MagicMock() mock_stream = mock.AsyncMock( spec=async_read_object_stream._AsyncReadObjectStream ) @@ -410,16 +375,14 @@ async def test_download_ranges_raises_on_checksum_mismatch( "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.close", new_callable=AsyncMock, ) - @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" - ) @pytest.mark.asyncio async def test_async_context_manager_calls_open_and_close( - self, mock_grpc_client, mock_close, mock_open + self, mock_close, mock_open ): # Arrange + mock_client = mock.MagicMock() mrd = AsyncMultiRangeDownloader( - mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME + mock_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME ) # To simulate the behavior of open and close changing the stream state From 7b319469d2e495ea0bf7367f3949190e8f5d9fff Mon Sep 17 00:00:00 2001 From: Pulkit Aggarwal <54775856+Pulkit0110@users.noreply.github.com> Date: Thu, 29 Jan 2026 08:41:32 +0530 Subject: [PATCH 16/23] feat: add default user agent for grpc (#1726) Adding a default gcloud-python/{__version__} for grpc client just like the json client --- .../asyncio/async_grpc_client.py | 19 ++-- tests/unit/asyncio/test_async_grpc_client.py | 90 +++++++++++++------ 2 files changed, 74 insertions(+), 35 deletions(-) diff --git a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py index e985f2252..47c371cd6 100644 --- a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py +++ b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py @@ -18,25 +18,22 @@ from google.cloud._storage_v2.services.storage.transports.base import ( DEFAULT_CLIENT_INFO, ) +from google.cloud.storage import __version__ class AsyncGrpcClient: """An asynchronous client for interacting with Google Cloud Storage using the gRPC API. - :type credentials: :class:`~google.auth.credentials.Credentials` :param credentials: (Optional) The OAuth2 Credentials to use for this client. If not passed, falls back to the default inferred from the environment. - :type client_info: :class:`~google.api_core.client_info.ClientInfo` :param client_info: The client info used to send a user-agent string along with API requests. If ``None``, then default info will be used. - :type client_options: :class:`~google.api_core.client_options.ClientOptions` :param client_options: (Optional) Client options used to set user options on the client. - :type attempt_direct_path: bool :param attempt_direct_path: (Optional) Whether to attempt to use DirectPath for gRPC connections. @@ -51,6 +48,18 @@ def __init__( *, attempt_direct_path=True, ): + if client_info is None: + client_info = DEFAULT_CLIENT_INFO + else: + client_info = client_info + client_info.client_library_version = __version__ + # TODO: When metrics all use gccl, this should be removed #9552 + if client_info.user_agent is None: # pragma: no branch + client_info.user_agent = "" + agent_version = f"gcloud-python/{__version__}" + if agent_version not in client_info.user_agent: + client_info.user_agent += f" {agent_version} " + self._grpc_client = self._create_async_grpc_client( credentials=credentials, client_info=client_info, @@ -69,8 +78,6 @@ def _create_async_grpc_client( "grpc_asyncio" ) - if client_info is None: - client_info = DEFAULT_CLIENT_INFO primary_user_agent = client_info.to_user_agent() channel = transport_cls.create_channel( diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index f94729516..b0edfafa6 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -16,10 +16,8 @@ from google.auth import credentials as auth_credentials from google.auth.credentials import AnonymousCredentials from google.api_core import client_info as client_info_lib +from google.cloud.storage import __version__ from google.cloud.storage._experimental.asyncio import async_grpc_client -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( - DEFAULT_CLIENT_INFO, -) def _make_credentials(spec=None): @@ -36,9 +34,6 @@ def test_constructor_default_options(self, mock_async_storage_client): mock_async_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials() - primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() - expected_options = (("grpc.primary_user_agent", primary_user_agent),) - # Act async_grpc_client.AsyncGrpcClient(credentials=mock_creds) @@ -46,6 +41,13 @@ def test_constructor_default_options(self, mock_async_storage_client): mock_async_storage_client.get_transport_class.assert_called_once_with( "grpc_asyncio" ) + kwargs = mock_async_storage_client.call_args.kwargs + client_info = kwargs["client_info"] + agent_version = f"gcloud-python/{__version__}" + assert agent_version in client_info.user_agent + primary_user_agent = client_info.to_user_agent() + expected_options = (("grpc.primary_user_agent", primary_user_agent),) + mock_transport_cls.create_channel.assert_called_once_with( attempt_direct_path=True, credentials=mock_creds, @@ -54,11 +56,8 @@ def test_constructor_default_options(self, mock_async_storage_client): mock_channel = mock_transport_cls.create_channel.return_value mock_transport_cls.assert_called_once_with(channel=mock_channel) mock_transport = mock_transport_cls.return_value - mock_async_storage_client.assert_called_once_with( - transport=mock_transport, - client_options=None, - client_info=DEFAULT_CLIENT_INFO, - ) + assert kwargs["transport"] is mock_transport + assert kwargs["client_options"] is None @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_constructor_with_client_info(self, mock_async_storage_client): @@ -73,6 +72,8 @@ def test_constructor_with_client_info(self, mock_async_storage_client): credentials=mock_creds, client_info=client_info ) + agent_version = f"gcloud-python/{__version__}" + assert agent_version in client_info.user_agent primary_user_agent = client_info.to_user_agent() expected_options = (("grpc.primary_user_agent", primary_user_agent),) @@ -92,7 +93,11 @@ def test_constructor_disables_directpath(self, mock_async_storage_client): credentials=mock_creds, attempt_direct_path=False ) - primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() + kwargs = mock_async_storage_client.call_args.kwargs + client_info = kwargs["client_info"] + agent_version = f"gcloud-python/{__version__}" + assert agent_version in client_info.user_agent + primary_user_agent = client_info.to_user_agent() expected_options = (("grpc.primary_user_agent", primary_user_agent),) mock_transport_cls.create_channel.assert_called_once_with( @@ -109,43 +114,43 @@ def test_grpc_client_property(self, mock_grpc_gapic_client): mock_transport_cls = mock.MagicMock() mock_grpc_gapic_client.get_transport_class.return_value = mock_transport_cls channel_sentinel = mock.sentinel.channel - mock_transport_cls.create_channel.return_value = channel_sentinel - mock_transport_cls.return_value = mock.sentinel.transport + mock_transport_instance = mock.sentinel.transport + mock_transport_cls.return_value = mock_transport_instance mock_creds = _make_credentials() - mock_client_info = mock.MagicMock(spec=client_info_lib.ClientInfo) - mock_client_info.to_user_agent.return_value = "test-user-agent" + # Use a real ClientInfo instance instead of a mock to properly test user agent logic + client_info = client_info_lib.ClientInfo(user_agent="test-user-agent") mock_client_options = mock.sentinel.client_options mock_attempt_direct_path = mock.sentinel.attempt_direct_path # Act client = async_grpc_client.AsyncGrpcClient( credentials=mock_creds, - client_info=mock_client_info, + client_info=client_info, client_options=mock_client_options, attempt_direct_path=mock_attempt_direct_path, ) + retrieved_client = client.grpc_client # This is what is being tested - mock_grpc_gapic_client.get_transport_class.return_value = mock_transport_cls + # Assert - verify that gcloud-python agent version was added + agent_version = f"gcloud-python/{__version__}" + assert agent_version in client_info.user_agent + # Also verify original user_agent is still there + assert "test-user-agent" in client_info.user_agent - mock_transport_cls.create_channel.return_value = channel_sentinel - mock_transport_instance = mock.sentinel.transport - mock_transport_cls.return_value = mock_transport_instance - - retrieved_client = client.grpc_client + primary_user_agent = client_info.to_user_agent() + expected_options = (("grpc.primary_user_agent", primary_user_agent),) - # Assert - expected_options = (("grpc.primary_user_agent", "test-user-agent"),) mock_transport_cls.create_channel.assert_called_once_with( attempt_direct_path=mock_attempt_direct_path, credentials=mock_creds, options=expected_options, ) - mock_transport_cls.assere_with(channel=channel_sentinel) + mock_transport_cls.assert_called_once_with(channel=channel_sentinel) mock_grpc_gapic_client.assert_called_once_with( transport=mock_transport_instance, - client_info=mock_client_info, + client_info=client_info, client_options=mock_client_options, ) assert retrieved_client is mock_grpc_gapic_client.return_value @@ -168,7 +173,11 @@ def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): # Assert assert retrieved_client is mock_grpc_gapic_client.return_value - primary_user_agent = DEFAULT_CLIENT_INFO.to_user_agent() + kwargs = mock_grpc_gapic_client.call_args.kwargs + client_info = kwargs["client_info"] + agent_version = f"gcloud-python/{__version__}" + assert agent_version in client_info.user_agent + primary_user_agent = client_info.to_user_agent() expected_options = (("grpc.primary_user_agent", primary_user_agent),) mock_transport_cls.create_channel.assert_called_once_with( @@ -176,4 +185,27 @@ def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): credentials=anonymous_creds, options=expected_options, ) - mock_transport_cls.assert_called_once_with(channel=channel_sentinel) + + @mock.patch("google.cloud._storage_v2.StorageAsyncClient") + def test_user_agent_with_custom_client_info(self, mock_async_storage_client): + """Test that gcloud-python user agent is appended to existing user agent. + + Regression test similar to test__http.py::TestConnection::test_duplicate_user_agent + """ + mock_transport_cls = mock.MagicMock() + mock_async_storage_client.get_transport_class.return_value = mock_transport_cls + mock_creds = _make_credentials() + + # Create a client_info with an existing user_agent + client_info = client_info_lib.ClientInfo(user_agent="custom-app/1.0") + + # Act + async_grpc_client.AsyncGrpcClient( + credentials=mock_creds, + client_info=client_info, + ) + + # Assert - verify that gcloud-python version was appended + agent_version = f"gcloud-python/{__version__}" + expected_user_agent = f"custom-app/1.0 {agent_version} " + assert client_info.user_agent == expected_user_agent From 74c9ecc54173420bfcd48498a8956088a035af50 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Thu, 29 Jan 2026 14:28:28 +0530 Subject: [PATCH 17/23] feat: Move Zonal Buckets features of `_experimental` (#1728) feat: Move Zonal Buckets features of `_experimental` to --- .../storage/_experimental/asyncio/_utils.py | 48 +- .../asyncio/async_abstract_object_stream.py | 74 +-- .../asyncio/async_appendable_object_writer.py | 600 +----------------- .../asyncio/async_grpc_client.py | 112 +--- .../asyncio/async_multi_range_downloader.py | 511 +-------------- .../asyncio/async_read_object_stream.py | 203 +----- .../asyncio/async_write_object_stream.py | 241 +------ .../_experimental/asyncio/retry/_helpers.py | 130 +--- .../asyncio/retry/base_strategy.py | 90 +-- .../retry/bidi_stream_retry_manager.py | 74 +-- .../retry/reads_resumption_strategy.py | 162 +---- .../retry/writes_resumption_strategy.py | 152 +---- .../storage/_experimental/grpc_client.py | 129 +--- google/cloud/storage/asyncio/_utils.py | 41 ++ .../asyncio/async_abstract_object_stream.py | 67 ++ .../asyncio/async_appendable_object_writer.py | 586 +++++++++++++++++ .../storage/asyncio/async_grpc_client.py | 108 ++++ .../asyncio/async_multi_range_downloader.py | 506 +++++++++++++++ .../asyncio/async_read_object_stream.py | 188 ++++++ .../asyncio/async_write_object_stream.py | 227 +++++++ .../cloud/storage/asyncio/retry/_helpers.py | 125 ++++ .../storage/asyncio/retry/base_strategy.py | 83 +++ .../retry/bidi_stream_retry_manager.py | 69 ++ .../retry/reads_resumption_strategy.py | 157 +++++ .../retry/writes_resumption_strategy.py | 147 +++++ google/cloud/storage/grpc_client.py | 122 ++++ ...rage_create_and_write_appendable_object.py | 4 +- ...orage_finalize_appendable_object_upload.py | 4 +- ...orage_open_multiple_objects_ranged_read.py | 11 +- ...torage_open_object_multiple_ranged_read.py | 4 +- .../storage_open_object_read_full_object.py | 4 +- .../storage_open_object_single_ranged_read.py | 4 +- ...rage_pause_and_resume_appendable_upload.py | 4 +- .../storage_read_appendable_object_tail.py | 6 +- .../zonal_buckets/zonal_snippets_test.py | 4 +- tests/perf/microbenchmarks/conftest.py | 4 +- .../perf/microbenchmarks/reads/test_reads.py | 4 +- .../microbenchmarks/writes/test_writes.py | 4 +- tests/system/test_zonal.py | 6 +- .../retry/test_bidi_stream_retry_manager.py | 4 +- .../retry/test_reads_resumption_strategy.py | 4 +- .../retry/test_writes_resumption_strategy.py | 4 +- .../test_async_appendable_object_writer.py | 12 +- tests/unit/asyncio/test_async_grpc_client.py | 2 +- .../test_async_multi_range_downloader.py | 30 +- .../asyncio/test_async_read_object_stream.py | 52 +- .../asyncio/test_async_write_object_stream.py | 10 +- tests/unit/test_grpc_client.py | 17 +- 48 files changed, 2633 insertions(+), 2517 deletions(-) create mode 100644 google/cloud/storage/asyncio/_utils.py create mode 100644 google/cloud/storage/asyncio/async_abstract_object_stream.py create mode 100644 google/cloud/storage/asyncio/async_appendable_object_writer.py create mode 100644 google/cloud/storage/asyncio/async_grpc_client.py create mode 100644 google/cloud/storage/asyncio/async_multi_range_downloader.py create mode 100644 google/cloud/storage/asyncio/async_read_object_stream.py create mode 100644 google/cloud/storage/asyncio/async_write_object_stream.py create mode 100644 google/cloud/storage/asyncio/retry/_helpers.py create mode 100644 google/cloud/storage/asyncio/retry/base_strategy.py create mode 100644 google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py create mode 100644 google/cloud/storage/asyncio/retry/reads_resumption_strategy.py create mode 100644 google/cloud/storage/asyncio/retry/writes_resumption_strategy.py create mode 100644 google/cloud/storage/grpc_client.py diff --git a/google/cloud/storage/_experimental/asyncio/_utils.py b/google/cloud/storage/_experimental/asyncio/_utils.py index 170a0cfae..7e81a4bc7 100644 --- a/google/cloud/storage/_experimental/asyncio/_utils.py +++ b/google/cloud/storage/_experimental/asyncio/_utils.py @@ -1,41 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import google_crc32c +# Import everything from the new stable module +from google.cloud.storage.asyncio._utils import * # noqa -from google.api_core import exceptions - - -def raise_if_no_fast_crc32c(): - """Check if the C-accelerated version of google-crc32c is available. - - If not, raise an error to prevent silent performance degradation. - - raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available. - returns: True if the C extension is available. - rtype: bool - - """ - if google_crc32c.implementation != "c": - raise exceptions.FailedPrecondition( - "The google-crc32c package is not installed with C support. " - "C extension is required for faster data integrity checks." - "For more information, see https://github.com/googleapis/python-crc32c." - ) - - -def update_write_handle_if_exists(obj, response): - """Update the write_handle attribute of an object if it exists in the response.""" - if hasattr(response, "write_handle") and response.write_handle is not None: - obj.write_handle = response.write_handle +warnings.warn( + "google.cloud.storage._experimental.asyncio._utils has been moved to google.cloud.storage.asyncio._utils. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py index 26cbab7a0..538241bd2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_abstract_object_stream.py @@ -1,67 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import abc -from typing import Any, Optional +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_abstract_object_stream import * # noqa - -class _AsyncAbstractObjectStream(abc.ABC): - """Abstract base class to represent gRPC bidi-stream for GCS ``Object``. - - Concrete implementation of this class could be ``_AsyncReadObjectStream`` - or ``_AsyncWriteObjectStream``. - - :type bucket_name: str - :param bucket_name: (Optional) The name of the bucket containing the object. - - :type object_name: str - :param object_name: (Optional) The name of the object. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. - - :type handle: Any - :param handle: (Optional) The handle for the object, could be read_handle or - write_handle, based on how the stream is used. - """ - - def __init__( - self, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - handle: Optional[Any] = None, - ) -> None: - super().__init__() - self.bucket_name: str = bucket_name - self.object_name: str = object_name - self.generation_number: Optional[int] = generation_number - self.handle: Optional[Any] = handle - - @abc.abstractmethod - async def open(self) -> None: - pass - - @abc.abstractmethod - async def close(self) -> None: - pass - - @abc.abstractmethod - async def send(self, protobuf: Any) -> None: - pass - - @abc.abstractmethod - async def recv(self) -> Any: - pass +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_abstract_object_stream has been moved to google.cloud.storage.asyncio.async_abstract_object_stream. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py index 1cc099043..53b813643 100644 --- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py +++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py @@ -1,595 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) +import warnings -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM (Technical Account Manager) -if you want to use these Rapid Storage APIs. +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_appendable_object_writer import * # noqa -""" -from io import BufferedReader -import io -import logging -from typing import List, Optional, Tuple, Union - -from google.api_core import exceptions -from google.api_core.retry_async import AsyncRetry -from google.rpc import status_pb2 -from google.cloud._storage_v2.types import BidiWriteObjectRedirectedError -from google.cloud._storage_v2.types.storage import BidiWriteObjectRequest - - -from . import _utils -from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( - AsyncGrpcClient, -) -from google.cloud.storage._experimental.asyncio.async_write_object_stream import ( - _AsyncWriteObjectStream, -) -from google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager import ( - _BidiStreamRetryManager, -) -from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import ( - _WriteResumptionStrategy, - _WriteState, -) -from google.cloud.storage._experimental.asyncio.retry._helpers import ( - _extract_bidi_writes_redirect_proto, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_appendable_object_writer has been moved to google.cloud.storage.asyncio.async_appendable_object_writer. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - - -_MAX_CHUNK_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB -_DEFAULT_FLUSH_INTERVAL_BYTES = 16 * 1024 * 1024 # 16 MiB -_BIDI_WRITE_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" -) -logger = logging.getLogger(__name__) - - -def _is_write_retryable(exc): - """Predicate to determine if a write operation should be retried.""" - - if isinstance( - exc, - ( - exceptions.InternalServerError, - exceptions.ServiceUnavailable, - exceptions.DeadlineExceeded, - exceptions.TooManyRequests, - BidiWriteObjectRedirectedError, - ), - ): - logger.warning(f"Retryable write exception encountered: {exc}") - return True - - grpc_error = None - if isinstance(exc, exceptions.Aborted) and exc.errors: - grpc_error = exc.errors[0] - if isinstance(grpc_error, BidiWriteObjectRedirectedError): - return True - - trailers = grpc_error.trailing_metadata() - if not trailers: - return False - - status_details_bin = None - for key, value in trailers: - if key == "grpc-status-details-bin": - status_details_bin = value - break - - if status_details_bin: - status_proto = status_pb2.Status() - try: - status_proto.ParseFromString(status_details_bin) - for detail in status_proto.details: - if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: - return True - except Exception: - logger.error( - "Error unpacking redirect details from gRPC error. Exception: ", - {exc}, - ) - return False - return False - - -class AsyncAppendableObjectWriter: - """Class for appending data to a GCS Appendable Object asynchronously.""" - - def __init__( - self, - client: AsyncGrpcClient, - bucket_name: str, - object_name: str, - generation: Optional[int] = None, - write_handle: Optional[_storage_v2.BidiWriteHandle] = None, - writer_options: Optional[dict] = None, - ): - """ - Class for appending data to a GCS Appendable Object. - - Example usage: - - ``` - - from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient - from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter - import asyncio - - client = AsyncGrpcClient().grpc_client - bucket_name = "my-bucket" - object_name = "my-appendable-object" - - # instantiate the writer - writer = AsyncAppendableObjectWriter(client, bucket_name, object_name) - # open the writer, (underlying gRPC bidi-stream will be opened) - await writer.open() - - # append data, it can be called multiple times. - await writer.append(b"hello world") - await writer.append(b"some more data") - - # optionally flush data to persist. - await writer.flush() - - # close the gRPC stream. - # Please note closing the program will also close the stream, - # however it's recommended to close the stream if no more data to append - # to clean up gRPC connection (which means CPU/memory/network resources) - await writer.close() - ``` - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` - :param client: async grpc client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the GCS bucket containing the object. - - :type object_name: str - :param object_name: The name of the GCS Appendable Object to be written. - - :type generation: Optional[int] - :param generation: (Optional) If present, creates writer for that - specific revision of that object. Use this to append data to an - existing Appendable Object. - - Setting to ``0`` makes the `writer.open()` succeed only if - object doesn't exist in the bucket (useful for not accidentally - overwriting existing objects). - - Warning: If `None`, a new object is created. If an object with the - same name already exists, it will be overwritten the moment - `writer.open()` is called. - - :type write_handle: _storage_v2.BidiWriteHandle - :param write_handle: (Optional) An handle for writing the object. - If provided, opening the bidi-gRPC connection will be faster. - - :type writer_options: dict - :param writer_options: (Optional) A dictionary of writer options. - Supported options: - - "FLUSH_INTERVAL_BYTES": int - The number of bytes to append before "persisting" data in GCS - servers. Default is `_DEFAULT_FLUSH_INTERVAL_BYTES`. - Must be a multiple of `_MAX_CHUNK_SIZE_BYTES`. - """ - _utils.raise_if_no_fast_crc32c() - self.client = client - self.bucket_name = bucket_name - self.object_name = object_name - self.write_handle = write_handle - self.generation = generation - - self.write_obj_stream: Optional[_AsyncWriteObjectStream] = None - self._is_stream_open: bool = False - # `offset` is the latest size of the object without staleless. - self.offset: Optional[int] = None - # `persisted_size` is the total_bytes persisted in the GCS server. - # Please note: `offset` and `persisted_size` are same when the stream is - # opened. - self.persisted_size: Optional[int] = None - if writer_options is None: - writer_options = {} - self.flush_interval = writer_options.get( - "FLUSH_INTERVAL_BYTES", _DEFAULT_FLUSH_INTERVAL_BYTES - ) - if self.flush_interval < _MAX_CHUNK_SIZE_BYTES: - raise exceptions.OutOfRange( - f"flush_interval must be >= {_MAX_CHUNK_SIZE_BYTES} , but provided {self.flush_interval}" - ) - if self.flush_interval % _MAX_CHUNK_SIZE_BYTES != 0: - raise exceptions.OutOfRange( - f"flush_interval must be a multiple of {_MAX_CHUNK_SIZE_BYTES}, but provided {self.flush_interval}" - ) - self.bytes_appended_since_last_flush = 0 - self._routing_token: Optional[str] = None - self.object_resource: Optional[_storage_v2.Object] = None - - async def state_lookup(self) -> int: - """Returns the persisted_size - - :rtype: int - :returns: persisted size. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before state_lookup().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - state_lookup=True, - ) - ) - response = await self.write_obj_stream.recv() - self.persisted_size = response.persisted_size - return self.persisted_size - - def _on_open_error(self, exc): - """Extracts routing token and write handle on redirect error during open.""" - redirect_proto = _extract_bidi_writes_redirect_proto(exc) - if redirect_proto: - if redirect_proto.routing_token: - self._routing_token = redirect_proto.routing_token - if redirect_proto.write_handle: - self.write_handle = redirect_proto.write_handle - if redirect_proto.generation: - self.generation = redirect_proto.generation - - async def open( - self, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Opens the underlying bidi-gRPC stream. - - :raises ValueError: If the stream is already open. - - """ - if self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is already open") - - if retry_policy is None: - retry_policy = AsyncRetry( - predicate=_is_write_retryable, on_error=self._on_open_error - ) - else: - original_on_error = retry_policy._on_error - - def combined_on_error(exc): - self._on_open_error(exc) - if original_on_error: - original_on_error(exc) - - retry_policy = AsyncRetry( - predicate=_is_write_retryable, - initial=retry_policy._initial, - maximum=retry_policy._maximum, - multiplier=retry_policy._multiplier, - deadline=retry_policy._deadline, - on_error=combined_on_error, - ) - - async def _do_open(): - current_metadata = list(metadata) if metadata else [] - - # Cleanup stream from previous failed attempt, if any. - if self.write_obj_stream: - if self.write_obj_stream.is_stream_open: - try: - await self.write_obj_stream.close() - except Exception as e: - logger.warning( - "Error closing previous write stream during open retry. Got exception: ", - {e}, - ) - self.write_obj_stream = None - self._is_stream_open = False - - self.write_obj_stream = _AsyncWriteObjectStream( - client=self.client.grpc_client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation, - write_handle=self.write_handle, - routing_token=self._routing_token, - ) - - if self._routing_token: - current_metadata.append( - ("x-goog-request-params", f"routing_token={self._routing_token}") - ) - - await self.write_obj_stream.open( - metadata=current_metadata if metadata else None - ) - - if self.write_obj_stream.generation_number: - self.generation = self.write_obj_stream.generation_number - if self.write_obj_stream.write_handle: - self.write_handle = self.write_obj_stream.write_handle - if self.write_obj_stream.persisted_size is not None: - self.persisted_size = self.write_obj_stream.persisted_size - - self._is_stream_open = True - self._routing_token = None - - await retry_policy(_do_open)() - - async def append( - self, - data: bytes, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Appends data to the Appendable object with automatic retries. - - calling `self.append` will append bytes at the end of the current size - ie. `self.offset` bytes relative to the begining of the object. - - This method sends the provided `data` to the GCS server in chunks. - and persists data in GCS at every `_DEFAULT_FLUSH_INTERVAL_BYTES` bytes - or at the last chunk whichever is earlier. Persisting is done by setting - `flush=True` on request. - - :type data: bytes - :param data: The bytes to append to the object. - - :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` - :param retry_policy: (Optional) The retry policy to use for the operation. - - :type metadata: List[Tuple[str, str]] - :param metadata: (Optional) The metadata to be sent with the request. - - :raises ValueError: If the stream is not open. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before append().") - if not data: - logger.debug("No data provided to append; returning without action.") - return - - if retry_policy is None: - retry_policy = AsyncRetry(predicate=_is_write_retryable) - - strategy = _WriteResumptionStrategy() - buffer = io.BytesIO(data) - attempt_count = 0 - - def send_and_recv_generator( - requests: List[BidiWriteObjectRequest], - state: dict[str, _WriteState], - metadata: Optional[List[Tuple[str, str]]] = None, - ): - async def generator(): - nonlocal attempt_count - nonlocal requests - attempt_count += 1 - resp = None - write_state = state["write_state"] - # If this is a retry or redirect, we must re-open the stream - if attempt_count > 1 or write_state.routing_token: - logger.info( - f"Re-opening the stream with attempt_count: {attempt_count}" - ) - if self.write_obj_stream and self.write_obj_stream.is_stream_open: - await self.write_obj_stream.close() - - current_metadata = list(metadata) if metadata else [] - if write_state.routing_token: - current_metadata.append( - ( - "x-goog-request-params", - f"routing_token={write_state.routing_token}", - ) - ) - self._routing_token = write_state.routing_token - - self._is_stream_open = False - await self.open(metadata=current_metadata) - - write_state.persisted_size = self.persisted_size - write_state.write_handle = self.write_handle - write_state.routing_token = None - - write_state.user_buffer.seek(write_state.persisted_size) - write_state.bytes_sent = write_state.persisted_size - write_state.bytes_since_last_flush = 0 - - requests = strategy.generate_requests(state) - - num_requests = len(requests) - for i, chunk_req in enumerate(requests): - if i == num_requests - 1: - chunk_req.state_lookup = True - chunk_req.flush = True - await self.write_obj_stream.send(chunk_req) - - resp = await self.write_obj_stream.recv() - if resp: - if resp.persisted_size is not None: - self.persisted_size = resp.persisted_size - state["write_state"].persisted_size = resp.persisted_size - self.offset = self.persisted_size - if resp.write_handle: - self.write_handle = resp.write_handle - state["write_state"].write_handle = resp.write_handle - self.bytes_appended_since_last_flush = 0 - - yield resp - - return generator() - - # State initialization - write_state = _WriteState(_MAX_CHUNK_SIZE_BYTES, buffer, self.flush_interval) - write_state.write_handle = self.write_handle - write_state.persisted_size = self.persisted_size - write_state.bytes_sent = self.persisted_size - write_state.bytes_since_last_flush = self.bytes_appended_since_last_flush - - retry_manager = _BidiStreamRetryManager( - _WriteResumptionStrategy(), - lambda r, s: send_and_recv_generator(r, s, metadata), - ) - await retry_manager.execute({"write_state": write_state}, retry_policy) - - # Sync local markers - self.write_obj_stream.persisted_size = write_state.persisted_size - self.write_obj_stream.write_handle = write_state.write_handle - self.bytes_appended_since_last_flush = write_state.bytes_since_last_flush - self.persisted_size = write_state.persisted_size - self.offset = write_state.persisted_size - - async def simple_flush(self) -> None: - """Flushes the data to the server. - Please note: Unlike `flush` it does not do `state_lookup` - - :rtype: None - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before simple_flush().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - flush=True, - ) - ) - self.bytes_appended_since_last_flush = 0 - - async def flush(self) -> int: - """Flushes the data to the server. - - :rtype: int - :returns: The persisted size after flush. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before flush().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest( - flush=True, - state_lookup=True, - ) - ) - response = await self.write_obj_stream.recv() - self.persisted_size = response.persisted_size - self.offset = self.persisted_size - self.bytes_appended_since_last_flush = 0 - return self.persisted_size - - async def close(self, finalize_on_close=False) -> Union[int, _storage_v2.Object]: - """Closes the underlying bidi-gRPC stream. - - :type finalize_on_close: bool - :param finalize_on_close: Finalizes the Appendable Object. No more data - can be appended. - - rtype: Union[int, _storage_v2.Object] - returns: Updated `self.persisted_size` by default after closing the - bidi-gRPC stream. However, if `finalize_on_close=True` is passed, - returns the finalized object resource. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before close().") - - if finalize_on_close: - return await self.finalize() - - await self.write_obj_stream.close() - - self._is_stream_open = False - self.offset = None - return self.persisted_size - - async def finalize(self) -> _storage_v2.Object: - """Finalizes the Appendable Object. - - Note: Once finalized no more data can be appended. - This method is different from `close`. if `.close()` is called data may - still be appended to object at a later point in time by opening with - generation number. - (i.e. `open(..., generation=)`. - However if `.finalize()` is called no more data can be appended to the - object. - - rtype: google.cloud.storage_v2.types.Object - returns: The finalized object resource. - - :raises ValueError: If the stream is not open (i.e., `open()` has not - been called). - """ - if not self._is_stream_open: - raise ValueError("Stream is not open. Call open() before finalize().") - - await self.write_obj_stream.send( - _storage_v2.BidiWriteObjectRequest(finish_write=True) - ) - response = await self.write_obj_stream.recv() - self.object_resource = response.resource - self.persisted_size = self.object_resource.size - await self.write_obj_stream.close() - - self._is_stream_open = False - self.offset = None - return self.object_resource - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open - - # helper methods. - async def append_from_string(self, data: str): - """ - str data will be encoded to bytes using utf-8 encoding calling - - self.append(data.encode("utf-8")) - """ - raise NotImplementedError("append_from_string is not implemented yet.") - - async def append_from_stream(self, stream_obj): - """ - At a time read a chunk of data (16MiB) from `stream_obj` - and call self.append(chunk) - """ - raise NotImplementedError("append_from_stream is not implemented yet.") - - async def append_from_file( - self, file_obj: BufferedReader, block_size: int = _DEFAULT_FLUSH_INTERVAL_BYTES - ): - """ - Appends data to an Appendable Object using file_handle which is opened - for reading in binary mode. - - :type file_obj: file - :param file_obj: A file handle opened in binary mode for reading. - - """ - while block := file_obj.read(block_size): - await self.append(block) diff --git a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py index 47c371cd6..558ff0c5a 100644 --- a/google/cloud/storage/_experimental/asyncio/async_grpc_client.py +++ b/google/cloud/storage/_experimental/asyncio/async_grpc_client.py @@ -1,107 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -"""An async client for interacting with Google Cloud Storage using the gRPC API.""" +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_grpc_client import * # noqa -from google.cloud import _storage_v2 as storage_v2 -from google.cloud._storage_v2.services.storage.transports.base import ( - DEFAULT_CLIENT_INFO, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_grpc_client has been moved to google.cloud.storage.asyncio.async_grpc_client. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.cloud.storage import __version__ - - -class AsyncGrpcClient: - """An asynchronous client for interacting with Google Cloud Storage using the gRPC API. - :type credentials: :class:`~google.auth.credentials.Credentials` - :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed, falls back to the default - inferred from the environment. - :type client_info: :class:`~google.api_core.client_info.ClientInfo` - :param client_info: - The client info used to send a user-agent string along with API - requests. If ``None``, then default info will be used. - :type client_options: :class:`~google.api_core.client_options.ClientOptions` - :param client_options: (Optional) Client options used to set user options - on the client. - :type attempt_direct_path: bool - :param attempt_direct_path: - (Optional) Whether to attempt to use DirectPath for gRPC connections. - Defaults to ``True``. - """ - - def __init__( - self, - credentials=None, - client_info=None, - client_options=None, - *, - attempt_direct_path=True, - ): - if client_info is None: - client_info = DEFAULT_CLIENT_INFO - else: - client_info = client_info - client_info.client_library_version = __version__ - # TODO: When metrics all use gccl, this should be removed #9552 - if client_info.user_agent is None: # pragma: no branch - client_info.user_agent = "" - agent_version = f"gcloud-python/{__version__}" - if agent_version not in client_info.user_agent: - client_info.user_agent += f" {agent_version} " - - self._grpc_client = self._create_async_grpc_client( - credentials=credentials, - client_info=client_info, - client_options=client_options, - attempt_direct_path=attempt_direct_path, - ) - - def _create_async_grpc_client( - self, - credentials=None, - client_info=None, - client_options=None, - attempt_direct_path=True, - ): - transport_cls = storage_v2.StorageAsyncClient.get_transport_class( - "grpc_asyncio" - ) - - primary_user_agent = client_info.to_user_agent() - - channel = transport_cls.create_channel( - attempt_direct_path=attempt_direct_path, - credentials=credentials, - options=(("grpc.primary_user_agent", primary_user_agent),), - ) - transport = transport_cls(channel=channel) - - return storage_v2.StorageAsyncClient( - transport=transport, - client_info=client_info, - client_options=client_options, - ) - - @property - def grpc_client(self): - """The underlying gRPC client. - - This property gives users direct access to the `_storage_v2.StorageAsyncClient` - instance. This can be useful for accessing - newly added or experimental RPCs that are not yet exposed through - the high-level GrpcClient. - Returns: - google.cloud._storage_v2.StorageAsyncClient: The configured GAPIC client. - """ - return self._grpc_client diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py index 340f087da..bfc2c7c2b 100644 --- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py @@ -1,506 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from __future__ import annotations -import asyncio -import logging -from google.api_core import exceptions -from google.api_core.retry_async import AsyncRetry -from google.cloud.storage._experimental.asyncio.retry._helpers import _handle_redirect -from google.rpc import status_pb2 +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_multi_range_downloader import * # noqa -from typing import List, Optional, Tuple, Any, Dict - -from ._utils import raise_if_no_fast_crc32c -from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( - _AsyncReadObjectStream, -) -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( - AsyncGrpcClient, -) -from google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager import ( - _BidiStreamRetryManager, -) -from google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy import ( - _ReadResumptionStrategy, - _DownloadState, -) - -from io import BytesIO -from google.cloud import _storage_v2 -from google.cloud.storage._helpers import generate_random_56_bit_integer - - -_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 -_BIDI_READ_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_multi_range_downloader has been moved to google.cloud.storage.asyncio.async_multi_range_downloader. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - -logger = logging.getLogger(__name__) - - -def _is_read_retryable(exc): - """Predicate to determine if a read operation should be retried.""" - if isinstance( - exc, - ( - exceptions.InternalServerError, - exceptions.ServiceUnavailable, - exceptions.DeadlineExceeded, - exceptions.TooManyRequests, - ), - ): - return True - - if not isinstance(exc, exceptions.Aborted) or not exc.errors: - return False - - try: - grpc_error = exc.errors[0] - trailers = grpc_error.trailing_metadata() - if not trailers: - return False - - status_details_bin = next( - (v for k, v in trailers if k == "grpc-status-details-bin"), None - ) - - if not status_details_bin: - return False - - status_proto = status_pb2.Status() - status_proto.ParseFromString(status_details_bin) - return any( - detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL - for detail in status_proto.details - ) - except Exception as e: - logger.error(f"Error parsing status_details_bin: {e}") - return False - - -class AsyncMultiRangeDownloader: - """Provides an interface for downloading multiple ranges of a GCS ``Object`` - concurrently. - - Example usage: - - .. code-block:: python - - client = AsyncGrpcClient().grpc_client - mrd = await AsyncMultiRangeDownloader.create_mrd( - client, bucket_name="chandrasiri-rs", object_name="test_open9" - ) - my_buff1 = open('my_fav_file.txt', 'wb') - my_buff2 = BytesIO() - my_buff3 = BytesIO() - my_buff4 = any_object_which_provides_BytesIO_like_interface() - await mrd.download_ranges( - [ - # (start_byte, bytes_to_read, writeable_buffer) - (0, 100, my_buff1), - (100, 20, my_buff2), - (200, 123, my_buff3), - (300, 789, my_buff4), - ] - ) - - # verify data in buffers... - assert my_buff2.getbuffer().nbytes == 20 - - - """ - - @classmethod - async def create_mrd( - cls, - client: AsyncGrpcClient, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - read_handle: Optional[_storage_v2.BidiReadHandle] = None, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> AsyncMultiRangeDownloader: - """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC - object for reading. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` - :param client: The asynchronous client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the bucket containing the object. - - :type object_name: str - :param object_name: The name of the object to be read. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific - revision of this object. - - :type read_handle: _storage_v2.BidiReadHandle - :param read_handle: (Optional) An existing handle for reading the object. - If provided, opening the bidi-gRPC connection will be faster. - - :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` - :param retry_policy: (Optional) The retry policy to use for the ``open`` operation. - - :type metadata: List[Tuple[str, str]] - :param metadata: (Optional) The metadata to be sent with the ``open`` request. - - :rtype: :class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader` - :returns: An initialized AsyncMultiRangeDownloader instance for reading. - """ - mrd = cls(client, bucket_name, object_name, generation_number, read_handle) - await mrd.open(retry_policy=retry_policy, metadata=metadata) - return mrd - - def __init__( - self, - client: AsyncGrpcClient, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - read_handle: Optional[_storage_v2.BidiReadHandle] = None, - ) -> None: - """Constructor for AsyncMultiRangeDownloader, clients are not adviced to - use it directly. Instead it's adviced to use the classmethod `create_mrd`. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient` - :param client: The asynchronous client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the bucket containing the object. - - :type object_name: str - :param object_name: The name of the object to be read. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. - - :type read_handle: _storage_v2.BidiReadHandle - :param read_handle: (Optional) An existing read handle. - """ - - raise_if_no_fast_crc32c() - - self.client = client - self.bucket_name = bucket_name - self.object_name = object_name - self.generation_number = generation_number - self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle - self.read_obj_str: Optional[_AsyncReadObjectStream] = None - self._is_stream_open: bool = False - self._routing_token: Optional[str] = None - self._read_id_to_writable_buffer_dict = {} - self._read_id_to_download_ranges_id = {} - self._download_ranges_id_to_pending_read_ids = {} - self.persisted_size: Optional[int] = None # updated after opening the stream - - async def __aenter__(self): - """Opens the underlying bidi-gRPC connection to read from the object.""" - await self.open() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - """Closes the underlying bidi-gRPC connection.""" - if self.is_stream_open: - await self.close() - - def _on_open_error(self, exc): - """Extracts routing token and read handle on redirect error during open.""" - routing_token, read_handle = _handle_redirect(exc) - if routing_token: - self._routing_token = routing_token - if read_handle: - self.read_handle = read_handle - - async def open( - self, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Opens the bidi-gRPC connection to read from the object.""" - if self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is already open") - - if retry_policy is None: - retry_policy = AsyncRetry( - predicate=_is_read_retryable, on_error=self._on_open_error - ) - else: - original_on_error = retry_policy._on_error - - def combined_on_error(exc): - self._on_open_error(exc) - if original_on_error: - original_on_error(exc) - - retry_policy = AsyncRetry( - predicate=_is_read_retryable, - initial=retry_policy._initial, - maximum=retry_policy._maximum, - multiplier=retry_policy._multiplier, - deadline=retry_policy._deadline, - on_error=combined_on_error, - ) - - async def _do_open(): - current_metadata = list(metadata) if metadata else [] - - # Cleanup stream from previous failed attempt, if any. - if self.read_obj_str: - if self.read_obj_str.is_stream_open: - try: - await self.read_obj_str.close() - except exceptions.GoogleAPICallError as e: - logger.warning( - f"Failed to close existing stream during resumption: {e}" - ) - self.read_obj_str = None - self._is_stream_open = False - - self.read_obj_str = _AsyncReadObjectStream( - client=self.client.grpc_client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation_number, - read_handle=self.read_handle, - ) - - if self._routing_token: - current_metadata.append( - ("x-goog-request-params", f"routing_token={self._routing_token}") - ) - self._routing_token = None - - await self.read_obj_str.open( - metadata=current_metadata if current_metadata else None - ) - - if self.read_obj_str.generation_number: - self.generation_number = self.read_obj_str.generation_number - if self.read_obj_str.read_handle: - self.read_handle = self.read_obj_str.read_handle - if self.read_obj_str.persisted_size is not None: - self.persisted_size = self.read_obj_str.persisted_size - - self._is_stream_open = True - - await retry_policy(_do_open)() - - async def download_ranges( - self, - read_ranges: List[Tuple[int, int, BytesIO]], - lock: asyncio.Lock = None, - retry_policy: Optional[AsyncRetry] = None, - metadata: Optional[List[Tuple[str, str]]] = None, - ) -> None: - """Downloads multiple byte ranges from the object into the buffers - provided by user with automatic retries. - - :type read_ranges: List[Tuple[int, int, "BytesIO"]] - :param read_ranges: A list of tuples, where each tuple represents a - combination of byte_range and writeable buffer in format - - (`start_byte`, `bytes_to_read`, `writeable_buffer`). Buffer has - to be provided by the user, and user has to make sure appropriate - memory is available in the application to avoid out-of-memory crash. - - Special cases: - if the value of `bytes_to_read` is 0, it'll be interpreted as - download all contents until the end of the file from `start_byte`. - Examples: - * (0, 0, buffer) : downloads 0 to end , i.e. entire object. - * (100, 0, buffer) : downloads from 100 to end. - - - :type lock: asyncio.Lock - :param lock: (Optional) An asyncio lock to synchronize sends and recvs - on the underlying bidi-GRPC stream. This is required when multiple - coroutines are calling this method concurrently. - - i.e. Example usage with multiple coroutines: - - ``` - lock = asyncio.Lock() - task1 = asyncio.create_task(mrd.download_ranges(ranges1, lock)) - task2 = asyncio.create_task(mrd.download_ranges(ranges2, lock)) - await asyncio.gather(task1, task2) - - ``` - - If user want to call this method serially from multiple coroutines, - then providing a lock is not necessary. - - ``` - await mrd.download_ranges(ranges1) - await mrd.download_ranges(ranges2) - - # ... some other code code... - - ``` - - :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` - :param retry_policy: (Optional) The retry policy to use for the operation. - - :raises ValueError: if the underlying bidi-GRPC stream is not open. - :raises ValueError: if the length of read_ranges is more than 1000. - :raises DataCorruption: if a checksum mismatch is detected while reading data. - - """ - - if len(read_ranges) > 1000: - raise ValueError( - "Invalid input - length of read_ranges cannot be more than 1000" - ) - - if not self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is not open") - - if lock is None: - lock = asyncio.Lock() - - if retry_policy is None: - retry_policy = AsyncRetry(predicate=_is_read_retryable) - - # Initialize Global State for Retry Strategy - download_states = {} - for read_range in read_ranges: - read_id = generate_random_56_bit_integer() - download_states[read_id] = _DownloadState( - initial_offset=read_range[0], - initial_length=read_range[1], - user_buffer=read_range[2], - ) - - initial_state = { - "download_states": download_states, - "read_handle": self.read_handle, - "routing_token": None, - } - - # Track attempts to manage stream reuse - attempt_count = 0 - - def send_ranges_and_get_bytes( - requests: List[_storage_v2.ReadRange], - state: Dict[str, Any], - metadata: Optional[List[Tuple[str, str]]] = None, - ): - async def generator(): - nonlocal attempt_count - attempt_count += 1 - - if attempt_count > 1: - logger.info( - f"Resuming download (attempt {attempt_count - 1}) for {len(requests)} ranges." - ) - - async with lock: - current_handle = state.get("read_handle") - current_token = state.get("routing_token") - - # We reopen if it's a redirect (token exists) OR if this is a retry - # (not first attempt). This prevents trying to send data on a dead - # stream from a previous failed attempt. - should_reopen = ( - (attempt_count > 1) - or (current_token is not None) - or (metadata is not None) - ) - - if should_reopen: - if current_token: - logger.info( - f"Re-opening stream with routing token: {current_token}" - ) - # Close existing stream if any - if self.read_obj_str and self.read_obj_str.is_stream_open: - await self.read_obj_str.close() - - # Re-initialize stream - self.read_obj_str = _AsyncReadObjectStream( - client=self.client.grpc_client, - bucket_name=self.bucket_name, - object_name=self.object_name, - generation_number=self.generation_number, - read_handle=current_handle, - ) - - # Inject routing_token into metadata if present - current_metadata = list(metadata) if metadata else [] - if current_token: - current_metadata.append( - ( - "x-goog-request-params", - f"routing_token={current_token}", - ) - ) - - await self.read_obj_str.open( - metadata=current_metadata if current_metadata else None - ) - self._is_stream_open = True - - pending_read_ids = {r.read_id for r in requests} - - # Send Requests - for i in range( - 0, len(requests), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST - ): - batch = requests[i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST] - await self.read_obj_str.send( - _storage_v2.BidiReadObjectRequest(read_ranges=batch) - ) - - while pending_read_ids: - response = await self.read_obj_str.recv() - if response is None: - break - if response.object_data_ranges: - for data_range in response.object_data_ranges: - if data_range.range_end: - pending_read_ids.discard( - data_range.read_range.read_id - ) - yield response - - return generator() - - strategy = _ReadResumptionStrategy() - retry_manager = _BidiStreamRetryManager( - strategy, lambda r, s: send_ranges_and_get_bytes(r, s, metadata=metadata) - ) - - await retry_manager.execute(initial_state, retry_policy) - - if initial_state.get("read_handle"): - self.read_handle = initial_state["read_handle"] - - async def close(self): - """ - Closes the underlying bidi-gRPC connection. - """ - if not self._is_stream_open: - raise ValueError("Underlying bidi-gRPC stream is not open") - - if self.read_obj_str: - await self.read_obj_str.close() - self.read_obj_str = None - self._is_stream_open = False - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py index b59c7d162..cb39386f2 100644 --- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py @@ -1,198 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) +import warnings -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM(Technical Account Manager) -if you want to use these APIs. +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_read_object_stream import * # noqa -""" - -from typing import List, Optional, Tuple -from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - _AsyncAbstractObjectStream, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_read_object_stream has been moved to google.cloud.storage.asyncio.async_read_object_stream. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - -from google.api_core.bidi_async import AsyncBidiRpc - - -class _AsyncReadObjectStream(_AsyncAbstractObjectStream): - """Class representing a gRPC bidi-stream for reading data from a GCS ``Object``. - - This class provides a unix socket-like interface to a GCS ``Object``, with - methods like ``open``, ``close``, ``send``, and ``recv``. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` - :param client: async grpc client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the GCS ``bucket`` containing the object. - - :type object_name: str - :param object_name: The name of the GCS ``object`` to be read. - - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of - this object. - - :type read_handle: _storage_v2.BidiReadHandle - :param read_handle: (Optional) An existing handle for reading the object. - If provided, opening the bidi-gRPC connection will be faster. - """ - - def __init__( - self, - client: AsyncGrpcClient.grpc_client, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, - read_handle: Optional[_storage_v2.BidiReadHandle] = None, - ) -> None: - if client is None: - raise ValueError("client must be provided") - if bucket_name is None: - raise ValueError("bucket_name must be provided") - if object_name is None: - raise ValueError("object_name must be provided") - - super().__init__( - bucket_name=bucket_name, - object_name=object_name, - generation_number=generation_number, - ) - self.client: AsyncGrpcClient.grpc_client = client - self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle - - self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" - - self.rpc = self.client._client._transport._wrapped_methods[ - self.client._client._transport.bidi_read_object - ] - self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) - self.socket_like_rpc: Optional[AsyncBidiRpc] = None - self._is_stream_open: bool = False - self.persisted_size: Optional[int] = None - - async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: - """Opens the bidi-gRPC connection to read from the object. - - This method sends an initial request to start the stream and receives - the first response containing metadata and a read handle. - - Args: - metadata (Optional[List[Tuple[str, str]]]): Additional metadata - to send with the initial stream request, e.g., for routing tokens. - """ - if self._is_stream_open: - raise ValueError("Stream is already open") - - read_handle = self.read_handle if self.read_handle else None - - read_object_spec = _storage_v2.BidiReadObjectSpec( - bucket=self._full_bucket_name, - object=self.object_name, - generation=self.generation_number if self.generation_number else None, - read_handle=read_handle, - ) - self.first_bidi_read_req = _storage_v2.BidiReadObjectRequest( - read_object_spec=read_object_spec - ) - - # Build the x-goog-request-params header - request_params = [f"bucket={self._full_bucket_name}"] - other_metadata = [] - if metadata: - for key, value in metadata: - if key == "x-goog-request-params": - request_params.append(value) - else: - other_metadata.append((key, value)) - - current_metadata = other_metadata - current_metadata.append(("x-goog-request-params", ",".join(request_params))) - - self.socket_like_rpc = AsyncBidiRpc( - self.rpc, - initial_request=self.first_bidi_read_req, - metadata=current_metadata, - ) - await self.socket_like_rpc.open() # this is actually 1 send - response = await self.socket_like_rpc.recv() - # populated only in the first response of bidi-stream and when opened - # without using `read_handle` - if hasattr(response, "metadata") and response.metadata: - if self.generation_number is None: - self.generation_number = response.metadata.generation - # update persisted size - self.persisted_size = response.metadata.size - - if response and response.read_handle: - self.read_handle = response.read_handle - - self._is_stream_open = True - - async def close(self) -> None: - """Closes the bidi-gRPC connection.""" - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.requests_done() - await self.socket_like_rpc.close() - self._is_stream_open = False - - async def requests_done(self): - """Signals that all requests have been sent.""" - - await self.socket_like_rpc.send(None) - await self.socket_like_rpc.recv() - - async def send( - self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest - ) -> None: - """Sends a request message on the stream. - - Args: - bidi_read_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): - The request message to send. This is typically used to specify - the read offset and limit. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.socket_like_rpc.send(bidi_read_object_request) - - async def recv(self) -> _storage_v2.BidiReadObjectResponse: - """Receives a response from the stream. - - This method waits for the next message from the server, which could - contain object data or metadata. - - Returns: - :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: - The response message from the server. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - response = await self.socket_like_rpc.recv() - # Update read_handle if present in response - if response and response.read_handle: - self.read_handle = response.read_handle - return response - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py index b73a43d1b..132e2c9d0 100644 --- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py @@ -1,236 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -NOTE: -This is _experimental module for upcoming support for Rapid Storage. -(https://cloud.google.com/blog/products/storage-data-transfer/high-performance-storage-innovations-for-ai-hpc#:~:text=your%20AI%20workloads%3A-,Rapid%20Storage,-%3A%20A%20new) +import warnings -APIs may not work as intended and are not stable yet. Feature is not -GA(Generally Available) yet, please contact your TAM(Technical Account Manager) -if you want to use these Rapid Storage APIs. +# Import everything from the new stable module +from google.cloud.storage.asyncio.async_write_object_stream import * # noqa -""" -from typing import List, Optional, Tuple -from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio import _utils -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import ( - _AsyncAbstractObjectStream, +warnings.warn( + "google.cloud.storage._experimental.asyncio.async_write_object_stream has been moved to google.cloud.storage.asyncio.async_write_object_stream. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.api_core.bidi_async import AsyncBidiRpc - - -class _AsyncWriteObjectStream(_AsyncAbstractObjectStream): - """Class representing a gRPC bidi-stream for writing data from a GCS - ``Appendable Object``. - - This class provides a unix socket-like interface to a GCS ``Object``, with - methods like ``open``, ``close``, ``send``, and ``recv``. - - :type client: :class:`~google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` - :param client: async grpc client to use for making API requests. - - :type bucket_name: str - :param bucket_name: The name of the GCS ``bucket`` containing the object. - - :type object_name: str - :param object_name: The name of the GCS ``Appendable Object`` to be write. - - :type generation_number: int - :param generation_number: (Optional) If present, creates writer for that - specific revision of that object. Use this to append data to an - existing Appendable Object. - - Setting to ``0`` makes the `writer.open()` succeed only if - object doesn't exist in the bucket (useful for not accidentally - overwriting existing objects). - - Warning: If `None`, a new object is created. If an object with the - same name already exists, it will be overwritten the moment - `writer.open()` is called. - - :type write_handle: _storage_v2.BidiWriteHandle - :param write_handle: (Optional) An existing handle for writing the object. - If provided, opening the bidi-gRPC connection will be faster. - """ - - def __init__( - self, - client: AsyncGrpcClient.grpc_client, - bucket_name: str, - object_name: str, - generation_number: Optional[int] = None, # None means new object - write_handle: Optional[_storage_v2.BidiWriteHandle] = None, - routing_token: Optional[str] = None, - ) -> None: - if client is None: - raise ValueError("client must be provided") - if bucket_name is None: - raise ValueError("bucket_name must be provided") - if object_name is None: - raise ValueError("object_name must be provided") - - super().__init__( - bucket_name=bucket_name, - object_name=object_name, - generation_number=generation_number, - ) - self.client: AsyncGrpcClient.grpc_client = client - self.write_handle: Optional[_storage_v2.BidiWriteHandle] = write_handle - self.routing_token: Optional[str] = routing_token - - self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" - - self.rpc = self.client._client._transport._wrapped_methods[ - self.client._client._transport.bidi_write_object - ] - - self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) - self.socket_like_rpc: Optional[AsyncBidiRpc] = None - self._is_stream_open: bool = False - self.first_bidi_write_req = None - self.persisted_size = 0 - self.object_resource: Optional[_storage_v2.Object] = None - - async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: - """ - Opens the bidi-gRPC connection to write to the object. - - This method sends an initial request to start the stream and receives - the first response containing metadata and a write handle. - - :rtype: None - :raises ValueError: If the stream is already open. - :raises google.api_core.exceptions.FailedPrecondition: - if `generation_number` is 0 and object already exists. - """ - if self._is_stream_open: - raise ValueError("Stream is already open") - - # Create a new object or overwrite existing one if generation_number - # is None. This makes it consistent with GCS JSON API behavior. - # Created object type would be Appendable Object. - # if `generation_number` == 0 new object will be created only if there - # isn't any existing object. - if self.generation_number is None or self.generation_number == 0: - self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( - write_object_spec=_storage_v2.WriteObjectSpec( - resource=_storage_v2.Object( - name=self.object_name, bucket=self._full_bucket_name - ), - appendable=True, - if_generation_match=self.generation_number, - ), - ) - else: - self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( - append_object_spec=_storage_v2.AppendObjectSpec( - bucket=self._full_bucket_name, - object=self.object_name, - generation=self.generation_number, - write_handle=self.write_handle if self.write_handle else None, - routing_token=self.routing_token if self.routing_token else None, - ), - ) - - request_param_values = [f"bucket={self._full_bucket_name}"] - final_metadata = [] - if metadata: - for key, value in metadata: - if key == "x-goog-request-params": - request_param_values.append(value) - else: - final_metadata.append((key, value)) - - final_metadata.append(("x-goog-request-params", ",".join(request_param_values))) - - self.socket_like_rpc = AsyncBidiRpc( - self.rpc, - initial_request=self.first_bidi_write_req, - metadata=final_metadata, - ) - - await self.socket_like_rpc.open() # this is actually 1 send - response = await self.socket_like_rpc.recv() - self._is_stream_open = True - - if response.persisted_size: - self.persisted_size = response.persisted_size - - if response.resource: - if not response.resource.size: - # Appending to a 0 byte appendable object. - self.persisted_size = 0 - else: - self.persisted_size = response.resource.size - - self.generation_number = response.resource.generation - - if response.write_handle: - self.write_handle = response.write_handle - - async def close(self) -> None: - """Closes the bidi-gRPC connection.""" - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.requests_done() - await self.socket_like_rpc.close() - self._is_stream_open = False - - async def requests_done(self): - """Signals that all requests have been sent.""" - - await self.socket_like_rpc.send(None) - _utils.update_write_handle_if_exists(self, await self.socket_like_rpc.recv()) - - async def send( - self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest - ) -> None: - """Sends a request message on the stream. - - Args: - bidi_write_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): - The request message to send. This is typically used to specify - the read offset and limit. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - await self.socket_like_rpc.send(bidi_write_object_request) - - async def recv(self) -> _storage_v2.BidiWriteObjectResponse: - """Receives a response from the stream. - - This method waits for the next message from the server, which could - contain object data or metadata. - - Returns: - :class:`~google.cloud._storage_v2.types.BidiWriteObjectResponse`: - The response message from the server. - """ - if not self._is_stream_open: - raise ValueError("Stream is not open") - response = await self.socket_like_rpc.recv() - # Update write_handle if present in response - if response: - if response.write_handle: - self.write_handle = response.write_handle - if response.persisted_size is not None: - self.persisted_size = response.persisted_size - if response.resource and response.resource.size: - self.persisted_size = response.resource.size - return response - - @property - def is_stream_open(self) -> bool: - return self._is_stream_open diff --git a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py index d9ad2462e..092986f58 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py +++ b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py @@ -1,125 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from __future__ import annotations +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry._helpers import * # noqa -import logging -from typing import Tuple, Optional - -from google.api_core import exceptions -from google.cloud._storage_v2.types import ( - BidiReadObjectRedirectedError, - BidiWriteObjectRedirectedError, -) -from google.rpc import status_pb2 - -_BIDI_READ_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry._helpers has been moved to google.cloud.storage.asyncio.retry._helpers. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -_BIDI_WRITE_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" -) -logger = logging.getLogger(__name__) - - -def _handle_redirect( - exc: Exception, -) -> Tuple[Optional[str], Optional[bytes]]: - """ - Extracts routing token and read handle from a gRPC error. - - :type exc: Exception - :param exc: The exception to parse. - - :rtype: Tuple[Optional[str], Optional[bytes]] - :returns: A tuple of (routing_token, read_handle). - """ - routing_token = None - read_handle = None - - grpc_error = None - if isinstance(exc, exceptions.Aborted) and exc.errors: - grpc_error = exc.errors[0] - - if grpc_error: - if isinstance(grpc_error, BidiReadObjectRedirectedError): - routing_token = grpc_error.routing_token - if grpc_error.read_handle: - read_handle = grpc_error.read_handle - return routing_token, read_handle - - if hasattr(grpc_error, "trailing_metadata"): - trailers = grpc_error.trailing_metadata() - if not trailers: - return None, None - - status_details_bin = None - for key, value in trailers: - if key == "grpc-status-details-bin": - status_details_bin = value - break - - if status_details_bin: - status_proto = status_pb2.Status() - try: - status_proto.ParseFromString(status_details_bin) - for detail in status_proto.details: - if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL: - redirect_proto = BidiReadObjectRedirectedError.deserialize( - detail.value - ) - if redirect_proto.routing_token: - routing_token = redirect_proto.routing_token - if redirect_proto.read_handle: - read_handle = redirect_proto.read_handle - break - except Exception as e: - logger.error(f"Error unpacking redirect: {e}") - - return routing_token, read_handle - - -def _extract_bidi_writes_redirect_proto(exc: Exception): - grpc_error = None - if isinstance(exc, exceptions.Aborted) and exc.errors: - grpc_error = exc.errors[0] - - if grpc_error: - if isinstance(grpc_error, BidiWriteObjectRedirectedError): - return grpc_error - - if hasattr(grpc_error, "trailing_metadata"): - trailers = grpc_error.trailing_metadata() - if not trailers: - return - - status_details_bin = None - for key, value in trailers: - if key == "grpc-status-details-bin": - status_details_bin = value - break - - if status_details_bin: - status_proto = status_pb2.Status() - try: - status_proto.ParseFromString(status_details_bin) - for detail in status_proto.details: - if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: - redirect_proto = BidiWriteObjectRedirectedError.deserialize( - detail.value - ) - return redirect_proto - except Exception: - logger.error("Error unpacking redirect details from gRPC error.") - pass diff --git a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py index ff193f109..58c58136c 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py @@ -1,83 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import abc -from typing import Any, Iterable +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.base_strategy import * # noqa - -class _BaseResumptionStrategy(abc.ABC): - """Abstract base class defining the interface for a bidi stream resumption strategy. - - This class defines the skeleton for a pluggable strategy that contains - all the service-specific logic for a given bidi operation (e.g., reads - or writes). This allows a generic retry manager to handle the common - retry loop while sending the state management and request generation - to a concrete implementation of this class. - """ - - @abc.abstractmethod - def generate_requests(self, state: Any) -> Iterable[Any]: - """Generates the next batch of requests based on the current state. - - This method is called at the beginning of each retry attempt. It should - inspect the provided state object and generate the appropriate list of - request protos to send to the server. For example, a read strategy - would use this to implement "Smarter Resumption" by creating smaller - `ReadRange` requests for partially downloaded ranges. For bidi-writes, - it will set the `write_offset` field to the persisted size received - from the server in the next request. - - :type state: Any - :param state: An object containing all the state needed for the - operation (e.g., requested ranges, user buffers, - bytes written). - """ - pass - - @abc.abstractmethod - def update_state_from_response(self, response: Any, state: Any) -> None: - """Updates the state based on a successful server response. - - This method is called for every message received from the server. It is - responsible for processing the response and updating the shared state - object. - - :type response: Any - :param response: The response message received from the server. - - :type state: Any - :param state: The shared state object for the operation, which will be - mutated by this method. - """ - pass - - @abc.abstractmethod - async def recover_state_on_failure(self, error: Exception, state: Any) -> None: - """Prepares the state for the next retry attempt after a failure. - - This method is called when a retriable gRPC error occurs. It is - responsible for performing any necessary actions to ensure the next - retry attempt can succeed. For bidi reads, its primary role is to - handle the `BidiReadObjectRedirectError` by extracting the - `routing_token` and updating the state. For bidi writes, it will update - the state to reflect any bytes that were successfully persisted before - the failure. - - :type error: :class:`Exception` - :param error: The exception that was caught by the retry engine. - - :type state: Any - :param state: The shared state object for the operation. - """ - pass +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.base_strategy has been moved to google.cloud.storage.asyncio.retry.base_strategy. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py index a8caae4eb..331ee1326 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py +++ b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py @@ -1,69 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -import logging -from typing import Any, AsyncIterator, Callable +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.bidi_stream_retry_manager import * # noqa -from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( - _BaseResumptionStrategy, +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager has been moved to google.cloud.storage.asyncio.retry.bidi_stream_retry_manager. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) - -logger = logging.getLogger(__name__) - - -class _BidiStreamRetryManager: - """Manages the generic retry loop for a bidi streaming operation.""" - - def __init__( - self, - strategy: _BaseResumptionStrategy, - send_and_recv: Callable[..., AsyncIterator[Any]], - ): - """Initializes the retry manager. - Args: - strategy: The strategy for managing the state of a specific - bidi operation (e.g., reads or writes). - send_and_recv: An async callable that opens a new gRPC stream. - """ - self._strategy = strategy - self._send_and_recv = send_and_recv - - async def execute(self, initial_state: Any, retry_policy): - """ - Executes the bidi operation with the configured retry policy. - Args: - initial_state: An object containing all state for the operation. - retry_policy: The `google.api_core.retry.AsyncRetry` object to - govern the retry behavior for this specific operation. - """ - state = initial_state - - async def attempt(): - requests = self._strategy.generate_requests(state) - stream = self._send_and_recv(requests, state) - try: - async for response in stream: - self._strategy.update_state_from_response(response, state) - return - except Exception as e: - if retry_policy._predicate(e): - logger.info( - f"Bidi stream operation failed: {e}. Attempting state recovery and retry." - ) - await self._strategy.recover_state_on_failure(e, state) - raise e - - wrapped_attempt = retry_policy(attempt) - - await wrapped_attempt() diff --git a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py index 916b82e6e..8f7051b6a 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py @@ -1,157 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from typing import Any, Dict, List, IO -import logging +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.reads_resumption_strategy import * # noqa -from google_crc32c import Checksum -from google.cloud import _storage_v2 as storage_v2 -from google.cloud.storage.exceptions import DataCorruption -from google.cloud.storage._experimental.asyncio.retry._helpers import ( - _handle_redirect, +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy has been moved to google.cloud.storage.asyncio.retry.reads_resumption_strategy. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( - _BaseResumptionStrategy, -) - - -_BIDI_READ_REDIRECTED_TYPE_URL = ( - "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" -) -logger = logging.getLogger(__name__) - - -class _DownloadState: - """A helper class to track the state of a single range download.""" - - def __init__( - self, initial_offset: int, initial_length: int, user_buffer: IO[bytes] - ): - self.initial_offset = initial_offset - self.initial_length = initial_length - self.user_buffer = user_buffer - self.bytes_written = 0 - self.next_expected_offset = initial_offset - self.is_complete = False - - -class _ReadResumptionStrategy(_BaseResumptionStrategy): - """The concrete resumption strategy for bidi reads.""" - - def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]: - """Generates new ReadRange requests for all incomplete downloads. - - :type state: dict - :param state: A dictionary mapping a read_id to its corresponding - _DownloadState object. - """ - pending_requests = [] - download_states: Dict[int, _DownloadState] = state["download_states"] - - for read_id, read_state in download_states.items(): - if not read_state.is_complete: - new_offset = read_state.initial_offset + read_state.bytes_written - - # Calculate remaining length. If initial_length is 0 (read to end), - # it stays 0. Otherwise, subtract bytes_written. - new_length = 0 - if read_state.initial_length > 0: - new_length = read_state.initial_length - read_state.bytes_written - - new_request = storage_v2.ReadRange( - read_offset=new_offset, - read_length=new_length, - read_id=read_id, - ) - pending_requests.append(new_request) - return pending_requests - - def update_state_from_response( - self, response: storage_v2.BidiReadObjectResponse, state: Dict[str, Any] - ) -> None: - """Processes a server response, performs integrity checks, and updates state.""" - - # Capture read_handle if provided. - if response.read_handle: - state["read_handle"] = response.read_handle - - download_states = state["download_states"] - - for object_data_range in response.object_data_ranges: - # Ignore empty ranges or ranges for IDs not in our state - # (e.g., from a previously cancelled request on the same stream). - if not object_data_range.read_range: - logger.warning( - "Received response with missing read_range field; ignoring." - ) - continue - - read_id = object_data_range.read_range.read_id - if read_id not in download_states: - logger.warning( - f"Received data for unknown or stale read_id {read_id}; ignoring." - ) - continue - - read_state = download_states[read_id] - - # Offset Verification - chunk_offset = object_data_range.read_range.read_offset - if chunk_offset != read_state.next_expected_offset: - raise DataCorruption( - response, - f"Offset mismatch for read_id {read_id}. " - f"Expected {read_state.next_expected_offset}, got {chunk_offset}", - ) - - # Checksum Verification - # We must validate data before updating state or writing to buffer. - data = object_data_range.checksummed_data.content - server_checksum = object_data_range.checksummed_data.crc32c - - if server_checksum is not None: - client_checksum = int.from_bytes(Checksum(data).digest(), "big") - if server_checksum != client_checksum: - raise DataCorruption( - response, - f"Checksum mismatch for read_id {read_id}. " - f"Server sent {server_checksum}, client calculated {client_checksum}.", - ) - - # Update State & Write Data - chunk_size = len(data) - read_state.user_buffer.write(data) - read_state.bytes_written += chunk_size - read_state.next_expected_offset += chunk_size - - # Final Byte Count Verification - if object_data_range.range_end: - read_state.is_complete = True - if ( - read_state.initial_length != 0 - and read_state.bytes_written > read_state.initial_length - ): - raise DataCorruption( - response, - f"Byte count mismatch for read_id {read_id}. " - f"Expected {read_state.initial_length}, got {read_state.bytes_written}", - ) - - async def recover_state_on_failure(self, error: Exception, state: Any) -> None: - """Handles BidiReadObjectRedirectedError for reads.""" - routing_token, read_handle = _handle_redirect(error) - if routing_token: - state["routing_token"] = routing_token - if read_handle: - state["read_handle"] = read_handle diff --git a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py index 09b22cb8e..7d2493841 100644 --- a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py +++ b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py @@ -1,147 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -from typing import Any, Dict, IO, List, Optional, Union +# Import everything from the new stable module +from google.cloud.storage.asyncio.retry.writes_resumption_strategy import * # noqa -import google_crc32c -from google.cloud._storage_v2.types import storage as storage_type -from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError -from google.cloud.storage._experimental.asyncio.retry.base_strategy import ( - _BaseResumptionStrategy, +warnings.warn( + "google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy has been moved to google.cloud.storage.asyncio.retry.writes_resumption_strategy. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, ) -from google.cloud.storage._experimental.asyncio.retry._helpers import ( - _extract_bidi_writes_redirect_proto, -) - - -class _WriteState: - """A helper class to track the state of a single upload operation. - - :type chunk_size: int - :param chunk_size: The size of chunks to write to the server. - - :type user_buffer: IO[bytes] - :param user_buffer: The data source. - - :type flush_interval: int - :param flush_interval: The flush interval at which the data is flushed. - """ - - def __init__( - self, - chunk_size: int, - user_buffer: IO[bytes], - flush_interval: int, - ): - self.chunk_size = chunk_size - self.user_buffer = user_buffer - self.persisted_size: int = 0 - self.bytes_sent: int = 0 - self.bytes_since_last_flush: int = 0 - self.flush_interval: int = flush_interval - self.write_handle: Union[bytes, storage_type.BidiWriteHandle, None] = None - self.routing_token: Optional[str] = None - self.is_finalized: bool = False - - -class _WriteResumptionStrategy(_BaseResumptionStrategy): - """The concrete resumption strategy for bidi writes.""" - - def generate_requests( - self, state: Dict[str, Any] - ) -> List[storage_type.BidiWriteObjectRequest]: - """Generates BidiWriteObjectRequests to resume or continue the upload. - - This method is not applicable for `open` methods. - """ - write_state: _WriteState = state["write_state"] - - requests = [] - # The buffer should already be seeked to the correct position (persisted_size) - # by the `recover_state_on_failure` method before this is called. - while not write_state.is_finalized: - chunk = write_state.user_buffer.read(write_state.chunk_size) - - # End of File detection - if not chunk: - break - - checksummed_data = storage_type.ChecksummedData(content=chunk) - checksum = google_crc32c.Checksum(chunk) - checksummed_data.crc32c = int.from_bytes(checksum.digest(), "big") - - request = storage_type.BidiWriteObjectRequest( - write_offset=write_state.bytes_sent, - checksummed_data=checksummed_data, - ) - chunk_len = len(chunk) - write_state.bytes_sent += chunk_len - write_state.bytes_since_last_flush += chunk_len - - if write_state.bytes_since_last_flush >= write_state.flush_interval: - request.flush = True - # reset counter after marking flush - write_state.bytes_since_last_flush = 0 - - requests.append(request) - return requests - - def update_state_from_response( - self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any] - ) -> None: - """Processes a server response and updates the write state.""" - write_state: _WriteState = state["write_state"] - if response is None: - return - if response.persisted_size: - write_state.persisted_size = response.persisted_size - - if response.write_handle: - write_state.write_handle = response.write_handle - - if response.resource: - write_state.persisted_size = response.resource.size - if response.resource.finalize_time: - write_state.is_finalized = True - - async def recover_state_on_failure( - self, error: Exception, state: Dict[str, Any] - ) -> None: - """ - Handles errors, specifically BidiWriteObjectRedirectedError, and rewinds state. - - This method rewinds the user buffer and internal byte tracking to the - last confirmed 'persisted_size' from the server. - """ - write_state: _WriteState = state["write_state"] - - redirect_proto = None - - if isinstance(error, BidiWriteObjectRedirectedError): - redirect_proto = error - else: - redirect_proto = _extract_bidi_writes_redirect_proto(error) - - # Extract routing token and potentially a new write handle for redirection. - if redirect_proto: - if redirect_proto.routing_token: - write_state.routing_token = redirect_proto.routing_token - if redirect_proto.write_handle: - write_state.write_handle = redirect_proto.write_handle - - # We must assume any data sent beyond 'persisted_size' was lost. - # Reset the user buffer to the last known good byte confirmed by the server. - write_state.user_buffer.seek(write_state.persisted_size) - write_state.bytes_sent = write_state.persisted_size - write_state.bytes_since_last_flush = 0 diff --git a/google/cloud/storage/_experimental/grpc_client.py b/google/cloud/storage/_experimental/grpc_client.py index 7a739b7b7..99ecbe044 100644 --- a/google/cloud/storage/_experimental/grpc_client.py +++ b/google/cloud/storage/_experimental/grpc_client.py @@ -1,122 +1,11 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +import warnings -"""A client for interacting with Google Cloud Storage using the gRPC API.""" +# Import everything from the new stable module +from google.cloud.storage.grpc_client import * # noqa -from google.cloud.client import ClientWithProject -from google.cloud import _storage_v2 as storage_v2 - -_marker = object() - - -class GrpcClient(ClientWithProject): - """A client for interacting with Google Cloud Storage using the gRPC API. - - :type project: str or None - :param project: The project which the client acts on behalf of. If not - passed, falls back to the default inferred from the - environment. - - :type credentials: :class:`~google.auth.credentials.Credentials` - :param credentials: (Optional) The OAuth2 Credentials to use for this - client. If not passed, falls back to the default - inferred from the environment. - - :type client_info: :class:`~google.api_core.client_info.ClientInfo` - :param client_info: - The client info used to send a user-agent string along with API - requests. If ``None``, then default info will be used. Generally, - you only need to set this if you're developing your own library - or partner tool. - - :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` - :param client_options: (Optional) Client options used to set user options - on the client. A non-default universe domain or API endpoint should be - set through client_options. - - :type api_key: string - :param api_key: - (Optional) An API key. Mutually exclusive with any other credentials. - This parameter is an alias for setting `client_options.api_key` and - will supersede any API key set in the `client_options` parameter. - - :type attempt_direct_path: bool - :param attempt_direct_path: - (Optional) Whether to attempt to use DirectPath for gRPC connections. - This provides a direct, unproxied connection to GCS for lower latency - and higher throughput, and is highly recommended when running on Google - Cloud infrastructure. Defaults to ``True``. - """ - - def __init__( - self, - project=_marker, - credentials=None, - client_info=None, - client_options=None, - *, - api_key=None, - attempt_direct_path=True, - ): - super(GrpcClient, self).__init__(project=project, credentials=credentials) - - if isinstance(client_options, dict): - if api_key: - client_options["api_key"] = api_key - elif client_options is None: - client_options = {} if not api_key else {"api_key": api_key} - elif api_key: - client_options.api_key = api_key - - self._grpc_client = self._create_gapic_client( - credentials=credentials, - client_info=client_info, - client_options=client_options, - attempt_direct_path=attempt_direct_path, - ) - - def _create_gapic_client( - self, - credentials=None, - client_info=None, - client_options=None, - attempt_direct_path=True, - ): - """Creates and configures the low-level GAPIC `storage_v2` client.""" - transport_cls = storage_v2.StorageClient.get_transport_class("grpc") - - channel = transport_cls.create_channel(attempt_direct_path=attempt_direct_path) - - transport = transport_cls(credentials=credentials, channel=channel) - - return storage_v2.StorageClient( - credentials=credentials, - transport=transport, - client_info=client_info, - client_options=client_options, - ) - - @property - def grpc_client(self): - """The underlying gRPC client. - - This property gives users direct access to the `storage_v2.StorageClient` - instance. This can be useful for accessing - newly added or experimental RPCs that are not yet exposed through - the high-level GrpcClient. - - Returns: - google.cloud.storage_v2.StorageClient: The configured GAPIC client. - """ - return self._grpc_client +warnings.warn( + "google.cloud.storage._experimental.grpc_client has been moved to google.cloud.storage.grpc_client. " + "Please update your imports.", + DeprecationWarning, + stacklevel=2, +) diff --git a/google/cloud/storage/asyncio/_utils.py b/google/cloud/storage/asyncio/_utils.py new file mode 100644 index 000000000..170a0cfae --- /dev/null +++ b/google/cloud/storage/asyncio/_utils.py @@ -0,0 +1,41 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google_crc32c + +from google.api_core import exceptions + + +def raise_if_no_fast_crc32c(): + """Check if the C-accelerated version of google-crc32c is available. + + If not, raise an error to prevent silent performance degradation. + + raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available. + returns: True if the C extension is available. + rtype: bool + + """ + if google_crc32c.implementation != "c": + raise exceptions.FailedPrecondition( + "The google-crc32c package is not installed with C support. " + "C extension is required for faster data integrity checks." + "For more information, see https://github.com/googleapis/python-crc32c." + ) + + +def update_write_handle_if_exists(obj, response): + """Update the write_handle attribute of an object if it exists in the response.""" + if hasattr(response, "write_handle") and response.write_handle is not None: + obj.write_handle = response.write_handle diff --git a/google/cloud/storage/asyncio/async_abstract_object_stream.py b/google/cloud/storage/asyncio/async_abstract_object_stream.py new file mode 100644 index 000000000..26cbab7a0 --- /dev/null +++ b/google/cloud/storage/asyncio/async_abstract_object_stream.py @@ -0,0 +1,67 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from typing import Any, Optional + + +class _AsyncAbstractObjectStream(abc.ABC): + """Abstract base class to represent gRPC bidi-stream for GCS ``Object``. + + Concrete implementation of this class could be ``_AsyncReadObjectStream`` + or ``_AsyncWriteObjectStream``. + + :type bucket_name: str + :param bucket_name: (Optional) The name of the bucket containing the object. + + :type object_name: str + :param object_name: (Optional) The name of the object. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type handle: Any + :param handle: (Optional) The handle for the object, could be read_handle or + write_handle, based on how the stream is used. + """ + + def __init__( + self, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + handle: Optional[Any] = None, + ) -> None: + super().__init__() + self.bucket_name: str = bucket_name + self.object_name: str = object_name + self.generation_number: Optional[int] = generation_number + self.handle: Optional[Any] = handle + + @abc.abstractmethod + async def open(self) -> None: + pass + + @abc.abstractmethod + async def close(self) -> None: + pass + + @abc.abstractmethod + async def send(self, protobuf: Any) -> None: + pass + + @abc.abstractmethod + async def recv(self) -> Any: + pass diff --git a/google/cloud/storage/asyncio/async_appendable_object_writer.py b/google/cloud/storage/asyncio/async_appendable_object_writer.py new file mode 100644 index 000000000..3ab06f8ba --- /dev/null +++ b/google/cloud/storage/asyncio/async_appendable_object_writer.py @@ -0,0 +1,586 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from io import BufferedReader +import io +import logging +from typing import List, Optional, Tuple, Union + +from google.api_core import exceptions +from google.api_core.retry_async import AsyncRetry +from google.rpc import status_pb2 +from google.cloud._storage_v2.types import BidiWriteObjectRedirectedError +from google.cloud._storage_v2.types.storage import BidiWriteObjectRequest + + +from . import _utils +from google.cloud import _storage_v2 +from google.cloud.storage.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) +from google.cloud.storage.asyncio.async_write_object_stream import ( + _AsyncWriteObjectStream, +) +from google.cloud.storage.asyncio.retry.bidi_stream_retry_manager import ( + _BidiStreamRetryManager, +) +from google.cloud.storage.asyncio.retry.writes_resumption_strategy import ( + _WriteResumptionStrategy, + _WriteState, +) +from google.cloud.storage.asyncio.retry._helpers import ( + _extract_bidi_writes_redirect_proto, +) + + +_MAX_CHUNK_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB +_DEFAULT_FLUSH_INTERVAL_BYTES = 16 * 1024 * 1024 # 16 MiB +_BIDI_WRITE_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +def _is_write_retryable(exc): + """Predicate to determine if a write operation should be retried.""" + + if isinstance( + exc, + ( + exceptions.InternalServerError, + exceptions.ServiceUnavailable, + exceptions.DeadlineExceeded, + exceptions.TooManyRequests, + BidiWriteObjectRedirectedError, + ), + ): + logger.warning(f"Retryable write exception encountered: {exc}") + return True + + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + if isinstance(grpc_error, BidiWriteObjectRedirectedError): + return True + + trailers = grpc_error.trailing_metadata() + if not trailers: + return False + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: + return True + except Exception: + logger.error( + "Error unpacking redirect details from gRPC error. Exception: ", + {exc}, + ) + return False + return False + + +class AsyncAppendableObjectWriter: + """Class for appending data to a GCS Appendable Object asynchronously.""" + + def __init__( + self, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation: Optional[int] = None, + write_handle: Optional[_storage_v2.BidiWriteHandle] = None, + writer_options: Optional[dict] = None, + ): + """ + Class for appending data to a GCS Appendable Object. + + Example usage: + + ``` + + from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient + from google.cloud.storage.asyncio.async_appendable_object_writer import AsyncAppendableObjectWriter + import asyncio + + client = AsyncGrpcClient() + bucket_name = "my-bucket" + object_name = "my-appendable-object" + + # instantiate the writer + writer = AsyncAppendableObjectWriter(client, bucket_name, object_name) + # open the writer, (underlying gRPC bidi-stream will be opened) + await writer.open() + + # append data, it can be called multiple times. + await writer.append(b"hello world") + await writer.append(b"some more data") + + # optionally flush data to persist. + await writer.flush() + + # close the gRPC stream. + # Please note closing the program will also close the stream, + # however it's recommended to close the stream if no more data to append + # to clean up gRPC connection (which means CPU/memory/network resources) + await writer.close() + ``` + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient` + :param client: async grpc client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the GCS bucket containing the object. + + :type object_name: str + :param object_name: The name of the GCS Appendable Object to be written. + + :type generation: Optional[int] + :param generation: (Optional) If present, creates writer for that + specific revision of that object. Use this to append data to an + existing Appendable Object. + + Setting to ``0`` makes the `writer.open()` succeed only if + object doesn't exist in the bucket (useful for not accidentally + overwriting existing objects). + + Warning: If `None`, a new object is created. If an object with the + same name already exists, it will be overwritten the moment + `writer.open()` is called. + + :type write_handle: _storage_v2.BidiWriteHandle + :param write_handle: (Optional) An handle for writing the object. + If provided, opening the bidi-gRPC connection will be faster. + + :type writer_options: dict + :param writer_options: (Optional) A dictionary of writer options. + Supported options: + - "FLUSH_INTERVAL_BYTES": int + The number of bytes to append before "persisting" data in GCS + servers. Default is `_DEFAULT_FLUSH_INTERVAL_BYTES`. + Must be a multiple of `_MAX_CHUNK_SIZE_BYTES`. + """ + _utils.raise_if_no_fast_crc32c() + self.client = client + self.bucket_name = bucket_name + self.object_name = object_name + self.write_handle = write_handle + self.generation = generation + + self.write_obj_stream: Optional[_AsyncWriteObjectStream] = None + self._is_stream_open: bool = False + # `offset` is the latest size of the object without staleless. + self.offset: Optional[int] = None + # `persisted_size` is the total_bytes persisted in the GCS server. + # Please note: `offset` and `persisted_size` are same when the stream is + # opened. + self.persisted_size: Optional[int] = None + if writer_options is None: + writer_options = {} + self.flush_interval = writer_options.get( + "FLUSH_INTERVAL_BYTES", _DEFAULT_FLUSH_INTERVAL_BYTES + ) + if self.flush_interval < _MAX_CHUNK_SIZE_BYTES: + raise exceptions.OutOfRange( + f"flush_interval must be >= {_MAX_CHUNK_SIZE_BYTES} , but provided {self.flush_interval}" + ) + if self.flush_interval % _MAX_CHUNK_SIZE_BYTES != 0: + raise exceptions.OutOfRange( + f"flush_interval must be a multiple of {_MAX_CHUNK_SIZE_BYTES}, but provided {self.flush_interval}" + ) + self.bytes_appended_since_last_flush = 0 + self._routing_token: Optional[str] = None + self.object_resource: Optional[_storage_v2.Object] = None + + async def state_lookup(self) -> int: + """Returns the persisted_size + + :rtype: int + :returns: persisted size. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before state_lookup().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest( + state_lookup=True, + ) + ) + response = await self.write_obj_stream.recv() + self.persisted_size = response.persisted_size + return self.persisted_size + + def _on_open_error(self, exc): + """Extracts routing token and write handle on redirect error during open.""" + redirect_proto = _extract_bidi_writes_redirect_proto(exc) + if redirect_proto: + if redirect_proto.routing_token: + self._routing_token = redirect_proto.routing_token + if redirect_proto.write_handle: + self.write_handle = redirect_proto.write_handle + if redirect_proto.generation: + self.generation = redirect_proto.generation + + async def open( + self, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Opens the underlying bidi-gRPC stream. + + :raises ValueError: If the stream is already open. + + """ + if self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is already open") + + if retry_policy is None: + retry_policy = AsyncRetry( + predicate=_is_write_retryable, on_error=self._on_open_error + ) + else: + original_on_error = retry_policy._on_error + + def combined_on_error(exc): + self._on_open_error(exc) + if original_on_error: + original_on_error(exc) + + retry_policy = AsyncRetry( + predicate=_is_write_retryable, + initial=retry_policy._initial, + maximum=retry_policy._maximum, + multiplier=retry_policy._multiplier, + deadline=retry_policy._deadline, + on_error=combined_on_error, + ) + + async def _do_open(): + current_metadata = list(metadata) if metadata else [] + + # Cleanup stream from previous failed attempt, if any. + if self.write_obj_stream: + if self.write_obj_stream.is_stream_open: + try: + await self.write_obj_stream.close() + except Exception as e: + logger.warning( + "Error closing previous write stream during open retry. Got exception: ", + {e}, + ) + self.write_obj_stream = None + self._is_stream_open = False + + self.write_obj_stream = _AsyncWriteObjectStream( + client=self.client.grpc_client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation, + write_handle=self.write_handle, + routing_token=self._routing_token, + ) + + if self._routing_token: + current_metadata.append( + ("x-goog-request-params", f"routing_token={self._routing_token}") + ) + + await self.write_obj_stream.open( + metadata=current_metadata if metadata else None + ) + + if self.write_obj_stream.generation_number: + self.generation = self.write_obj_stream.generation_number + if self.write_obj_stream.write_handle: + self.write_handle = self.write_obj_stream.write_handle + if self.write_obj_stream.persisted_size is not None: + self.persisted_size = self.write_obj_stream.persisted_size + + self._is_stream_open = True + self._routing_token = None + + await retry_policy(_do_open)() + + async def append( + self, + data: bytes, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Appends data to the Appendable object with automatic retries. + + calling `self.append` will append bytes at the end of the current size + ie. `self.offset` bytes relative to the begining of the object. + + This method sends the provided `data` to the GCS server in chunks. + and persists data in GCS at every `_DEFAULT_FLUSH_INTERVAL_BYTES` bytes + or at the last chunk whichever is earlier. Persisting is done by setting + `flush=True` on request. + + :type data: bytes + :param data: The bytes to append to the object. + + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the operation. + + :type metadata: List[Tuple[str, str]] + :param metadata: (Optional) The metadata to be sent with the request. + + :raises ValueError: If the stream is not open. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before append().") + if not data: + logger.debug("No data provided to append; returning without action.") + return + + if retry_policy is None: + retry_policy = AsyncRetry(predicate=_is_write_retryable) + + strategy = _WriteResumptionStrategy() + buffer = io.BytesIO(data) + attempt_count = 0 + + def send_and_recv_generator( + requests: List[BidiWriteObjectRequest], + state: dict[str, _WriteState], + metadata: Optional[List[Tuple[str, str]]] = None, + ): + async def generator(): + nonlocal attempt_count + nonlocal requests + attempt_count += 1 + resp = None + write_state = state["write_state"] + # If this is a retry or redirect, we must re-open the stream + if attempt_count > 1 or write_state.routing_token: + logger.info( + f"Re-opening the stream with attempt_count: {attempt_count}" + ) + if self.write_obj_stream and self.write_obj_stream.is_stream_open: + await self.write_obj_stream.close() + + current_metadata = list(metadata) if metadata else [] + if write_state.routing_token: + current_metadata.append( + ( + "x-goog-request-params", + f"routing_token={write_state.routing_token}", + ) + ) + self._routing_token = write_state.routing_token + + self._is_stream_open = False + await self.open(metadata=current_metadata) + + write_state.persisted_size = self.persisted_size + write_state.write_handle = self.write_handle + write_state.routing_token = None + + write_state.user_buffer.seek(write_state.persisted_size) + write_state.bytes_sent = write_state.persisted_size + write_state.bytes_since_last_flush = 0 + + requests = strategy.generate_requests(state) + + num_requests = len(requests) + for i, chunk_req in enumerate(requests): + if i == num_requests - 1: + chunk_req.state_lookup = True + chunk_req.flush = True + await self.write_obj_stream.send(chunk_req) + + resp = await self.write_obj_stream.recv() + if resp: + if resp.persisted_size is not None: + self.persisted_size = resp.persisted_size + state["write_state"].persisted_size = resp.persisted_size + self.offset = self.persisted_size + if resp.write_handle: + self.write_handle = resp.write_handle + state["write_state"].write_handle = resp.write_handle + self.bytes_appended_since_last_flush = 0 + + yield resp + + return generator() + + # State initialization + write_state = _WriteState(_MAX_CHUNK_SIZE_BYTES, buffer, self.flush_interval) + write_state.write_handle = self.write_handle + write_state.persisted_size = self.persisted_size + write_state.bytes_sent = self.persisted_size + write_state.bytes_since_last_flush = self.bytes_appended_since_last_flush + + retry_manager = _BidiStreamRetryManager( + _WriteResumptionStrategy(), + lambda r, s: send_and_recv_generator(r, s, metadata), + ) + await retry_manager.execute({"write_state": write_state}, retry_policy) + + # Sync local markers + self.write_obj_stream.persisted_size = write_state.persisted_size + self.write_obj_stream.write_handle = write_state.write_handle + self.bytes_appended_since_last_flush = write_state.bytes_since_last_flush + self.persisted_size = write_state.persisted_size + self.offset = write_state.persisted_size + + async def simple_flush(self) -> None: + """Flushes the data to the server. + Please note: Unlike `flush` it does not do `state_lookup` + + :rtype: None + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before simple_flush().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest( + flush=True, + ) + ) + self.bytes_appended_since_last_flush = 0 + + async def flush(self) -> int: + """Flushes the data to the server. + + :rtype: int + :returns: The persisted size after flush. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before flush().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest( + flush=True, + state_lookup=True, + ) + ) + response = await self.write_obj_stream.recv() + self.persisted_size = response.persisted_size + self.offset = self.persisted_size + self.bytes_appended_since_last_flush = 0 + return self.persisted_size + + async def close(self, finalize_on_close=False) -> Union[int, _storage_v2.Object]: + """Closes the underlying bidi-gRPC stream. + + :type finalize_on_close: bool + :param finalize_on_close: Finalizes the Appendable Object. No more data + can be appended. + + rtype: Union[int, _storage_v2.Object] + returns: Updated `self.persisted_size` by default after closing the + bidi-gRPC stream. However, if `finalize_on_close=True` is passed, + returns the finalized object resource. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before close().") + + if finalize_on_close: + return await self.finalize() + + await self.write_obj_stream.close() + + self._is_stream_open = False + self.offset = None + return self.persisted_size + + async def finalize(self) -> _storage_v2.Object: + """Finalizes the Appendable Object. + + Note: Once finalized no more data can be appended. + This method is different from `close`. if `.close()` is called data may + still be appended to object at a later point in time by opening with + generation number. + (i.e. `open(..., generation=)`. + However if `.finalize()` is called no more data can be appended to the + object. + + rtype: google.cloud.storage_v2.types.Object + returns: The finalized object resource. + + :raises ValueError: If the stream is not open (i.e., `open()` has not + been called). + """ + if not self._is_stream_open: + raise ValueError("Stream is not open. Call open() before finalize().") + + await self.write_obj_stream.send( + _storage_v2.BidiWriteObjectRequest(finish_write=True) + ) + response = await self.write_obj_stream.recv() + self.object_resource = response.resource + self.persisted_size = self.object_resource.size + await self.write_obj_stream.close() + + self._is_stream_open = False + self.offset = None + return self.object_resource + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open + + # helper methods. + async def append_from_string(self, data: str): + """ + str data will be encoded to bytes using utf-8 encoding calling + + self.append(data.encode("utf-8")) + """ + raise NotImplementedError("append_from_string is not implemented yet.") + + async def append_from_stream(self, stream_obj): + """ + At a time read a chunk of data (16MiB) from `stream_obj` + and call self.append(chunk) + """ + raise NotImplementedError("append_from_stream is not implemented yet.") + + async def append_from_file( + self, file_obj: BufferedReader, block_size: int = _DEFAULT_FLUSH_INTERVAL_BYTES + ): + """ + Appends data to an Appendable Object using file_handle which is opened + for reading in binary mode. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + """ + while block := file_obj.read(block_size): + await self.append(block) diff --git a/google/cloud/storage/asyncio/async_grpc_client.py b/google/cloud/storage/asyncio/async_grpc_client.py new file mode 100644 index 000000000..474b961b4 --- /dev/null +++ b/google/cloud/storage/asyncio/async_grpc_client.py @@ -0,0 +1,108 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An async client for interacting with Google Cloud Storage using the gRPC API.""" + +from google.cloud import _storage_v2 as storage_v2 +from google.cloud._storage_v2.services.storage.transports.base import ( + DEFAULT_CLIENT_INFO, +) +from google.cloud.storage import __version__ + + +class AsyncGrpcClient: + """An asynchronous client for interacting with Google Cloud Storage using the gRPC API. + + :type credentials: :class:`~google.auth.credentials.Credentials` + :param credentials: (Optional) The OAuth2 Credentials to use for this + client. If not passed, falls back to the default + inferred from the environment. + + :type client_info: :class:`~google.api_core.client_info.ClientInfo` + :param client_info: + The client info used to send a user-agent string along with API + requests. If ``None``, then default info will be used. + + :type client_options: :class:`~google.api_core.client_options.ClientOptions` + :param client_options: (Optional) Client options used to set user options + on the client. + + :type attempt_direct_path: bool + :param attempt_direct_path: + (Optional) Whether to attempt to use DirectPath for gRPC connections. + Defaults to ``True``. + """ + + def __init__( + self, + credentials=None, + client_info=None, + client_options=None, + *, + attempt_direct_path=True, + ): + if client_info is None: + client_info = DEFAULT_CLIENT_INFO + client_info.client_library_version = __version__ + if client_info.user_agent is None: + client_info.user_agent = "" + agent_version = f"gcloud-python/{__version__}" + if agent_version not in client_info.user_agent: + client_info.user_agent += f" {agent_version} " + + self._grpc_client = self._create_async_grpc_client( + credentials=credentials, + client_info=client_info, + client_options=client_options, + attempt_direct_path=attempt_direct_path, + ) + + def _create_async_grpc_client( + self, + credentials=None, + client_info=None, + client_options=None, + attempt_direct_path=True, + ): + transport_cls = storage_v2.StorageAsyncClient.get_transport_class( + "grpc_asyncio" + ) + + primary_user_agent = client_info.to_user_agent() + + channel = transport_cls.create_channel( + attempt_direct_path=attempt_direct_path, + credentials=credentials, + options=(("grpc.primary_user_agent", primary_user_agent),), + ) + transport = transport_cls(channel=channel) + + return storage_v2.StorageAsyncClient( + transport=transport, + client_info=client_info, + client_options=client_options, + ) + + @property + def grpc_client(self): + """The underlying gRPC client. + + This property gives users direct access to the `_storage_v2.StorageAsyncClient` + instance. This can be useful for accessing + newly added or experimental RPCs that are not yet exposed through + the high-level GrpcClient. + Returns: + google.cloud._storage_v2.StorageAsyncClient: The configured GAPIC client. + """ + return self._grpc_client diff --git a/google/cloud/storage/asyncio/async_multi_range_downloader.py b/google/cloud/storage/asyncio/async_multi_range_downloader.py new file mode 100644 index 000000000..993fc9522 --- /dev/null +++ b/google/cloud/storage/asyncio/async_multi_range_downloader.py @@ -0,0 +1,506 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations +import asyncio +import logging +from google.api_core import exceptions +from google.api_core.retry_async import AsyncRetry +from google.cloud.storage.asyncio.retry._helpers import _handle_redirect +from google.rpc import status_pb2 + +from typing import List, Optional, Tuple, Any, Dict + +from ._utils import raise_if_no_fast_crc32c +from google.cloud.storage.asyncio.async_read_object_stream import ( + _AsyncReadObjectStream, +) +from google.cloud.storage.asyncio.async_grpc_client import ( + AsyncGrpcClient, +) +from google.cloud.storage.asyncio.retry.bidi_stream_retry_manager import ( + _BidiStreamRetryManager, +) +from google.cloud.storage.asyncio.retry.reads_resumption_strategy import ( + _ReadResumptionStrategy, + _DownloadState, +) + +from io import BytesIO +from google.cloud import _storage_v2 +from google.cloud.storage._helpers import generate_random_56_bit_integer + + +_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100 +_BIDI_READ_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +) + +logger = logging.getLogger(__name__) + + +def _is_read_retryable(exc): + """Predicate to determine if a read operation should be retried.""" + if isinstance( + exc, + ( + exceptions.InternalServerError, + exceptions.ServiceUnavailable, + exceptions.DeadlineExceeded, + exceptions.TooManyRequests, + ), + ): + return True + + if not isinstance(exc, exceptions.Aborted) or not exc.errors: + return False + + try: + grpc_error = exc.errors[0] + trailers = grpc_error.trailing_metadata() + if not trailers: + return False + + status_details_bin = next( + (v for k, v in trailers if k == "grpc-status-details-bin"), None + ) + + if not status_details_bin: + return False + + status_proto = status_pb2.Status() + status_proto.ParseFromString(status_details_bin) + return any( + detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL + for detail in status_proto.details + ) + except Exception as e: + logger.error(f"Error parsing status_details_bin: {e}") + return False + + +class AsyncMultiRangeDownloader: + """Provides an interface for downloading multiple ranges of a GCS ``Object`` + concurrently. + + Example usage: + + .. code-block:: python + + client = AsyncGrpcClient() + mrd = await AsyncMultiRangeDownloader.create_mrd( + client, bucket_name="chandrasiri-rs", object_name="test_open9" + ) + my_buff1 = open('my_fav_file.txt', 'wb') + my_buff2 = BytesIO() + my_buff3 = BytesIO() + my_buff4 = any_object_which_provides_BytesIO_like_interface() + await mrd.download_ranges( + [ + # (start_byte, bytes_to_read, writeable_buffer) + (0, 100, my_buff1), + (100, 20, my_buff2), + (200, 123, my_buff3), + (300, 789, my_buff4), + ] + ) + + # verify data in buffers... + assert my_buff2.getbuffer().nbytes == 20 + + + """ + + @classmethod + async def create_mrd( + cls, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> AsyncMultiRangeDownloader: + """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC + object for reading. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient` + :param client: The asynchronous client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. + + :type object_name: str + :param object_name: The name of the object to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific + revision of this object. + + :type read_handle: _storage_v2.BidiReadHandle + :param read_handle: (Optional) An existing handle for reading the object. + If provided, opening the bidi-gRPC connection will be faster. + + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the ``open`` operation. + + :type metadata: List[Tuple[str, str]] + :param metadata: (Optional) The metadata to be sent with the ``open`` request. + + :rtype: :class:`~google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader` + :returns: An initialized AsyncMultiRangeDownloader instance for reading. + """ + mrd = cls(client, bucket_name, object_name, generation_number, read_handle) + await mrd.open(retry_policy=retry_policy, metadata=metadata) + return mrd + + def __init__( + self, + client: AsyncGrpcClient, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, + ) -> None: + """Constructor for AsyncMultiRangeDownloader, clients are not adviced to + use it directly. Instead it's adviced to use the classmethod `create_mrd`. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient` + :param client: The asynchronous client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the bucket containing the object. + + :type object_name: str + :param object_name: The name of the object to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type read_handle: _storage_v2.BidiReadHandle + :param read_handle: (Optional) An existing read handle. + """ + + raise_if_no_fast_crc32c() + + self.client = client + self.bucket_name = bucket_name + self.object_name = object_name + self.generation_number = generation_number + self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle + self.read_obj_str: Optional[_AsyncReadObjectStream] = None + self._is_stream_open: bool = False + self._routing_token: Optional[str] = None + self._read_id_to_writable_buffer_dict = {} + self._read_id_to_download_ranges_id = {} + self._download_ranges_id_to_pending_read_ids = {} + self.persisted_size: Optional[int] = None # updated after opening the stream + + async def __aenter__(self): + """Opens the underlying bidi-gRPC connection to read from the object.""" + await self.open() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Closes the underlying bidi-gRPC connection.""" + if self.is_stream_open: + await self.close() + + def _on_open_error(self, exc): + """Extracts routing token and read handle on redirect error during open.""" + routing_token, read_handle = _handle_redirect(exc) + if routing_token: + self._routing_token = routing_token + if read_handle: + self.read_handle = read_handle + + async def open( + self, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Opens the bidi-gRPC connection to read from the object.""" + if self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is already open") + + if retry_policy is None: + retry_policy = AsyncRetry( + predicate=_is_read_retryable, on_error=self._on_open_error + ) + else: + original_on_error = retry_policy._on_error + + def combined_on_error(exc): + self._on_open_error(exc) + if original_on_error: + original_on_error(exc) + + retry_policy = AsyncRetry( + predicate=_is_read_retryable, + initial=retry_policy._initial, + maximum=retry_policy._maximum, + multiplier=retry_policy._multiplier, + deadline=retry_policy._deadline, + on_error=combined_on_error, + ) + + async def _do_open(): + current_metadata = list(metadata) if metadata else [] + + # Cleanup stream from previous failed attempt, if any. + if self.read_obj_str: + if self.read_obj_str.is_stream_open: + try: + await self.read_obj_str.close() + except exceptions.GoogleAPICallError as e: + logger.warning( + f"Failed to close existing stream during resumption: {e}" + ) + self.read_obj_str = None + self._is_stream_open = False + + self.read_obj_str = _AsyncReadObjectStream( + client=self.client.grpc_client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation_number, + read_handle=self.read_handle, + ) + + if self._routing_token: + current_metadata.append( + ("x-goog-request-params", f"routing_token={self._routing_token}") + ) + self._routing_token = None + + await self.read_obj_str.open( + metadata=current_metadata if current_metadata else None + ) + + if self.read_obj_str.generation_number: + self.generation_number = self.read_obj_str.generation_number + if self.read_obj_str.read_handle: + self.read_handle = self.read_obj_str.read_handle + if self.read_obj_str.persisted_size is not None: + self.persisted_size = self.read_obj_str.persisted_size + + self._is_stream_open = True + + await retry_policy(_do_open)() + + async def download_ranges( + self, + read_ranges: List[Tuple[int, int, BytesIO]], + lock: asyncio.Lock = None, + retry_policy: Optional[AsyncRetry] = None, + metadata: Optional[List[Tuple[str, str]]] = None, + ) -> None: + """Downloads multiple byte ranges from the object into the buffers + provided by user with automatic retries. + + :type read_ranges: List[Tuple[int, int, "BytesIO"]] + :param read_ranges: A list of tuples, where each tuple represents a + combination of byte_range and writeable buffer in format - + (`start_byte`, `bytes_to_read`, `writeable_buffer`). Buffer has + to be provided by the user, and user has to make sure appropriate + memory is available in the application to avoid out-of-memory crash. + + Special cases: + if the value of `bytes_to_read` is 0, it'll be interpreted as + download all contents until the end of the file from `start_byte`. + Examples: + * (0, 0, buffer) : downloads 0 to end , i.e. entire object. + * (100, 0, buffer) : downloads from 100 to end. + + + :type lock: asyncio.Lock + :param lock: (Optional) An asyncio lock to synchronize sends and recvs + on the underlying bidi-GRPC stream. This is required when multiple + coroutines are calling this method concurrently. + + i.e. Example usage with multiple coroutines: + + ``` + lock = asyncio.Lock() + task1 = asyncio.create_task(mrd.download_ranges(ranges1, lock)) + task2 = asyncio.create_task(mrd.download_ranges(ranges2, lock)) + await asyncio.gather(task1, task2) + + ``` + + If user want to call this method serially from multiple coroutines, + then providing a lock is not necessary. + + ``` + await mrd.download_ranges(ranges1) + await mrd.download_ranges(ranges2) + + # ... some other code code... + + ``` + + :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry` + :param retry_policy: (Optional) The retry policy to use for the operation. + + :raises ValueError: if the underlying bidi-GRPC stream is not open. + :raises ValueError: if the length of read_ranges is more than 1000. + :raises DataCorruption: if a checksum mismatch is detected while reading data. + + """ + + if len(read_ranges) > 1000: + raise ValueError( + "Invalid input - length of read_ranges cannot be more than 1000" + ) + + if not self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is not open") + + if lock is None: + lock = asyncio.Lock() + + if retry_policy is None: + retry_policy = AsyncRetry(predicate=_is_read_retryable) + + # Initialize Global State for Retry Strategy + download_states = {} + for read_range in read_ranges: + read_id = generate_random_56_bit_integer() + download_states[read_id] = _DownloadState( + initial_offset=read_range[0], + initial_length=read_range[1], + user_buffer=read_range[2], + ) + + initial_state = { + "download_states": download_states, + "read_handle": self.read_handle, + "routing_token": None, + } + + # Track attempts to manage stream reuse + attempt_count = 0 + + def send_ranges_and_get_bytes( + requests: List[_storage_v2.ReadRange], + state: Dict[str, Any], + metadata: Optional[List[Tuple[str, str]]] = None, + ): + async def generator(): + nonlocal attempt_count + attempt_count += 1 + + if attempt_count > 1: + logger.info( + f"Resuming download (attempt {attempt_count - 1}) for {len(requests)} ranges." + ) + + async with lock: + current_handle = state.get("read_handle") + current_token = state.get("routing_token") + + # We reopen if it's a redirect (token exists) OR if this is a retry + # (not first attempt). This prevents trying to send data on a dead + # stream from a previous failed attempt. + should_reopen = ( + (attempt_count > 1) + or (current_token is not None) + or (metadata is not None) + ) + + if should_reopen: + if current_token: + logger.info( + f"Re-opening stream with routing token: {current_token}" + ) + # Close existing stream if any + if self.read_obj_str and self.read_obj_str.is_stream_open: + await self.read_obj_str.close() + + # Re-initialize stream + self.read_obj_str = _AsyncReadObjectStream( + client=self.client.grpc_client, + bucket_name=self.bucket_name, + object_name=self.object_name, + generation_number=self.generation_number, + read_handle=current_handle, + ) + + # Inject routing_token into metadata if present + current_metadata = list(metadata) if metadata else [] + if current_token: + current_metadata.append( + ( + "x-goog-request-params", + f"routing_token={current_token}", + ) + ) + + await self.read_obj_str.open( + metadata=current_metadata if current_metadata else None + ) + self._is_stream_open = True + + pending_read_ids = {r.read_id for r in requests} + + # Send Requests + for i in range( + 0, len(requests), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST + ): + batch = requests[i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST] + await self.read_obj_str.send( + _storage_v2.BidiReadObjectRequest(read_ranges=batch) + ) + + while pending_read_ids: + response = await self.read_obj_str.recv() + if response is None: + break + if response.object_data_ranges: + for data_range in response.object_data_ranges: + if data_range.range_end: + pending_read_ids.discard( + data_range.read_range.read_id + ) + yield response + + return generator() + + strategy = _ReadResumptionStrategy() + retry_manager = _BidiStreamRetryManager( + strategy, lambda r, s: send_ranges_and_get_bytes(r, s, metadata=metadata) + ) + + await retry_manager.execute(initial_state, retry_policy) + + if initial_state.get("read_handle"): + self.read_handle = initial_state["read_handle"] + + async def close(self): + """ + Closes the underlying bidi-gRPC connection. + """ + if not self._is_stream_open: + raise ValueError("Underlying bidi-gRPC stream is not open") + + if self.read_obj_str: + await self.read_obj_str.close() + self.read_obj_str = None + self._is_stream_open = False + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open diff --git a/google/cloud/storage/asyncio/async_read_object_stream.py b/google/cloud/storage/asyncio/async_read_object_stream.py new file mode 100644 index 000000000..d456f16cc --- /dev/null +++ b/google/cloud/storage/asyncio/async_read_object_stream.py @@ -0,0 +1,188 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Tuple +from google.cloud import _storage_v2 +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_abstract_object_stream import ( + _AsyncAbstractObjectStream, +) + +from google.api_core.bidi_async import AsyncBidiRpc + + +class _AsyncReadObjectStream(_AsyncAbstractObjectStream): + """Class representing a gRPC bidi-stream for reading data from a GCS ``Object``. + + This class provides a unix socket-like interface to a GCS ``Object``, with + methods like ``open``, ``close``, ``send``, and ``recv``. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :param client: async grpc client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the GCS ``bucket`` containing the object. + + :type object_name: str + :param object_name: The name of the GCS ``object`` to be read. + + :type generation_number: int + :param generation_number: (Optional) If present, selects a specific revision of + this object. + + :type read_handle: _storage_v2.BidiReadHandle + :param read_handle: (Optional) An existing handle for reading the object. + If provided, opening the bidi-gRPC connection will be faster. + """ + + def __init__( + self, + client: AsyncGrpcClient.grpc_client, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, + read_handle: Optional[_storage_v2.BidiReadHandle] = None, + ) -> None: + if client is None: + raise ValueError("client must be provided") + if bucket_name is None: + raise ValueError("bucket_name must be provided") + if object_name is None: + raise ValueError("object_name must be provided") + + super().__init__( + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation_number, + ) + self.client: AsyncGrpcClient.grpc_client = client + self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle + + self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" + + self.rpc = self.client._client._transport._wrapped_methods[ + self.client._client._transport.bidi_read_object + ] + self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) + self.socket_like_rpc: Optional[AsyncBidiRpc] = None + self._is_stream_open: bool = False + self.persisted_size: Optional[int] = None + + async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: + """Opens the bidi-gRPC connection to read from the object. + + This method sends an initial request to start the stream and receives + the first response containing metadata and a read handle. + + Args: + metadata (Optional[List[Tuple[str, str]]]): Additional metadata + to send with the initial stream request, e.g., for routing tokens. + """ + if self._is_stream_open: + raise ValueError("Stream is already open") + + read_handle = self.read_handle if self.read_handle else None + + read_object_spec = _storage_v2.BidiReadObjectSpec( + bucket=self._full_bucket_name, + object=self.object_name, + generation=self.generation_number if self.generation_number else None, + read_handle=read_handle, + ) + self.first_bidi_read_req = _storage_v2.BidiReadObjectRequest( + read_object_spec=read_object_spec + ) + + # Build the x-goog-request-params header + request_params = [f"bucket={self._full_bucket_name}"] + other_metadata = [] + if metadata: + for key, value in metadata: + if key == "x-goog-request-params": + request_params.append(value) + else: + other_metadata.append((key, value)) + + current_metadata = other_metadata + current_metadata.append(("x-goog-request-params", ",".join(request_params))) + + self.socket_like_rpc = AsyncBidiRpc( + self.rpc, + initial_request=self.first_bidi_read_req, + metadata=current_metadata, + ) + await self.socket_like_rpc.open() # this is actually 1 send + response = await self.socket_like_rpc.recv() + # populated only in the first response of bidi-stream and when opened + # without using `read_handle` + if hasattr(response, "metadata") and response.metadata: + if self.generation_number is None: + self.generation_number = response.metadata.generation + # update persisted size + self.persisted_size = response.metadata.size + + if response and response.read_handle: + self.read_handle = response.read_handle + + self._is_stream_open = True + + async def close(self) -> None: + """Closes the bidi-gRPC connection.""" + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.requests_done() + await self.socket_like_rpc.close() + self._is_stream_open = False + + async def requests_done(self): + """Signals that all requests have been sent.""" + + await self.socket_like_rpc.send(None) + await self.socket_like_rpc.recv() + + async def send( + self, bidi_read_object_request: _storage_v2.BidiReadObjectRequest + ) -> None: + """Sends a request message on the stream. + + Args: + bidi_read_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): + The request message to send. This is typically used to specify + the read offset and limit. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.socket_like_rpc.send(bidi_read_object_request) + + async def recv(self) -> _storage_v2.BidiReadObjectResponse: + """Receives a response from the stream. + + This method waits for the next message from the server, which could + contain object data or metadata. + + Returns: + :class:`~google.cloud._storage_v2.types.BidiReadObjectResponse`: + The response message from the server. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + response = await self.socket_like_rpc.recv() + # Update read_handle if present in response + if response and response.read_handle: + self.read_handle = response.read_handle + return response + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open diff --git a/google/cloud/storage/asyncio/async_write_object_stream.py b/google/cloud/storage/asyncio/async_write_object_stream.py new file mode 100644 index 000000000..721183962 --- /dev/null +++ b/google/cloud/storage/asyncio/async_write_object_stream.py @@ -0,0 +1,227 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Tuple +from google.cloud import _storage_v2 +from google.cloud.storage.asyncio import _utils +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_abstract_object_stream import ( + _AsyncAbstractObjectStream, +) +from google.api_core.bidi_async import AsyncBidiRpc + + +class _AsyncWriteObjectStream(_AsyncAbstractObjectStream): + """Class representing a gRPC bidi-stream for writing data from a GCS + ``Appendable Object``. + + This class provides a unix socket-like interface to a GCS ``Object``, with + methods like ``open``, ``close``, ``send``, and ``recv``. + + :type client: :class:`~google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client` + :param client: async grpc client to use for making API requests. + + :type bucket_name: str + :param bucket_name: The name of the GCS ``bucket`` containing the object. + + :type object_name: str + :param object_name: The name of the GCS ``Appendable Object`` to be write. + + :type generation_number: int + :param generation_number: (Optional) If present, creates writer for that + specific revision of that object. Use this to append data to an + existing Appendable Object. + + Setting to ``0`` makes the `writer.open()` succeed only if + object doesn't exist in the bucket (useful for not accidentally + overwriting existing objects). + + Warning: If `None`, a new object is created. If an object with the + same name already exists, it will be overwritten the moment + `writer.open()` is called. + + :type write_handle: _storage_v2.BidiWriteHandle + :param write_handle: (Optional) An existing handle for writing the object. + If provided, opening the bidi-gRPC connection will be faster. + """ + + def __init__( + self, + client: AsyncGrpcClient.grpc_client, + bucket_name: str, + object_name: str, + generation_number: Optional[int] = None, # None means new object + write_handle: Optional[_storage_v2.BidiWriteHandle] = None, + routing_token: Optional[str] = None, + ) -> None: + if client is None: + raise ValueError("client must be provided") + if bucket_name is None: + raise ValueError("bucket_name must be provided") + if object_name is None: + raise ValueError("object_name must be provided") + + super().__init__( + bucket_name=bucket_name, + object_name=object_name, + generation_number=generation_number, + ) + self.client: AsyncGrpcClient.grpc_client = client + self.write_handle: Optional[_storage_v2.BidiWriteHandle] = write_handle + self.routing_token: Optional[str] = routing_token + + self._full_bucket_name = f"projects/_/buckets/{self.bucket_name}" + + self.rpc = self.client._client._transport._wrapped_methods[ + self.client._client._transport.bidi_write_object + ] + + self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),) + self.socket_like_rpc: Optional[AsyncBidiRpc] = None + self._is_stream_open: bool = False + self.first_bidi_write_req = None + self.persisted_size = 0 + self.object_resource: Optional[_storage_v2.Object] = None + + async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None: + """ + Opens the bidi-gRPC connection to write to the object. + + This method sends an initial request to start the stream and receives + the first response containing metadata and a write handle. + + :rtype: None + :raises ValueError: If the stream is already open. + :raises google.api_core.exceptions.FailedPrecondition: + if `generation_number` is 0 and object already exists. + """ + if self._is_stream_open: + raise ValueError("Stream is already open") + + # Create a new object or overwrite existing one if generation_number + # is None. This makes it consistent with GCS JSON API behavior. + # Created object type would be Appendable Object. + # if `generation_number` == 0 new object will be created only if there + # isn't any existing object. + if self.generation_number is None or self.generation_number == 0: + self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( + write_object_spec=_storage_v2.WriteObjectSpec( + resource=_storage_v2.Object( + name=self.object_name, bucket=self._full_bucket_name + ), + appendable=True, + if_generation_match=self.generation_number, + ), + ) + else: + self.first_bidi_write_req = _storage_v2.BidiWriteObjectRequest( + append_object_spec=_storage_v2.AppendObjectSpec( + bucket=self._full_bucket_name, + object=self.object_name, + generation=self.generation_number, + write_handle=self.write_handle if self.write_handle else None, + routing_token=self.routing_token if self.routing_token else None, + ), + ) + + request_param_values = [f"bucket={self._full_bucket_name}"] + final_metadata = [] + if metadata: + for key, value in metadata: + if key == "x-goog-request-params": + request_param_values.append(value) + else: + final_metadata.append((key, value)) + + final_metadata.append(("x-goog-request-params", ",".join(request_param_values))) + + self.socket_like_rpc = AsyncBidiRpc( + self.rpc, + initial_request=self.first_bidi_write_req, + metadata=final_metadata, + ) + + await self.socket_like_rpc.open() # this is actually 1 send + response = await self.socket_like_rpc.recv() + self._is_stream_open = True + + if response.persisted_size: + self.persisted_size = response.persisted_size + + if response.resource: + if not response.resource.size: + # Appending to a 0 byte appendable object. + self.persisted_size = 0 + else: + self.persisted_size = response.resource.size + + self.generation_number = response.resource.generation + + if response.write_handle: + self.write_handle = response.write_handle + + async def close(self) -> None: + """Closes the bidi-gRPC connection.""" + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.requests_done() + await self.socket_like_rpc.close() + self._is_stream_open = False + + async def requests_done(self): + """Signals that all requests have been sent.""" + + await self.socket_like_rpc.send(None) + _utils.update_write_handle_if_exists(self, await self.socket_like_rpc.recv()) + + async def send( + self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest + ) -> None: + """Sends a request message on the stream. + + Args: + bidi_write_object_request (:class:`~google.cloud._storage_v2.types.BidiReadObjectRequest`): + The request message to send. This is typically used to specify + the read offset and limit. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + await self.socket_like_rpc.send(bidi_write_object_request) + + async def recv(self) -> _storage_v2.BidiWriteObjectResponse: + """Receives a response from the stream. + + This method waits for the next message from the server, which could + contain object data or metadata. + + Returns: + :class:`~google.cloud._storage_v2.types.BidiWriteObjectResponse`: + The response message from the server. + """ + if not self._is_stream_open: + raise ValueError("Stream is not open") + response = await self.socket_like_rpc.recv() + # Update write_handle if present in response + if response: + if response.write_handle: + self.write_handle = response.write_handle + if response.persisted_size is not None: + self.persisted_size = response.persisted_size + if response.resource and response.resource.size: + self.persisted_size = response.resource.size + return response + + @property + def is_stream_open(self) -> bool: + return self._is_stream_open diff --git a/google/cloud/storage/asyncio/retry/_helpers.py b/google/cloud/storage/asyncio/retry/_helpers.py new file mode 100644 index 000000000..d9ad2462e --- /dev/null +++ b/google/cloud/storage/asyncio/retry/_helpers.py @@ -0,0 +1,125 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import logging +from typing import Tuple, Optional + +from google.api_core import exceptions +from google.cloud._storage_v2.types import ( + BidiReadObjectRedirectedError, + BidiWriteObjectRedirectedError, +) +from google.rpc import status_pb2 + +_BIDI_READ_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +) +_BIDI_WRITE_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiWriteObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +def _handle_redirect( + exc: Exception, +) -> Tuple[Optional[str], Optional[bytes]]: + """ + Extracts routing token and read handle from a gRPC error. + + :type exc: Exception + :param exc: The exception to parse. + + :rtype: Tuple[Optional[str], Optional[bytes]] + :returns: A tuple of (routing_token, read_handle). + """ + routing_token = None + read_handle = None + + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + + if grpc_error: + if isinstance(grpc_error, BidiReadObjectRedirectedError): + routing_token = grpc_error.routing_token + if grpc_error.read_handle: + read_handle = grpc_error.read_handle + return routing_token, read_handle + + if hasattr(grpc_error, "trailing_metadata"): + trailers = grpc_error.trailing_metadata() + if not trailers: + return None, None + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL: + redirect_proto = BidiReadObjectRedirectedError.deserialize( + detail.value + ) + if redirect_proto.routing_token: + routing_token = redirect_proto.routing_token + if redirect_proto.read_handle: + read_handle = redirect_proto.read_handle + break + except Exception as e: + logger.error(f"Error unpacking redirect: {e}") + + return routing_token, read_handle + + +def _extract_bidi_writes_redirect_proto(exc: Exception): + grpc_error = None + if isinstance(exc, exceptions.Aborted) and exc.errors: + grpc_error = exc.errors[0] + + if grpc_error: + if isinstance(grpc_error, BidiWriteObjectRedirectedError): + return grpc_error + + if hasattr(grpc_error, "trailing_metadata"): + trailers = grpc_error.trailing_metadata() + if not trailers: + return + + status_details_bin = None + for key, value in trailers: + if key == "grpc-status-details-bin": + status_details_bin = value + break + + if status_details_bin: + status_proto = status_pb2.Status() + try: + status_proto.ParseFromString(status_details_bin) + for detail in status_proto.details: + if detail.type_url == _BIDI_WRITE_REDIRECTED_TYPE_URL: + redirect_proto = BidiWriteObjectRedirectedError.deserialize( + detail.value + ) + return redirect_proto + except Exception: + logger.error("Error unpacking redirect details from gRPC error.") + pass diff --git a/google/cloud/storage/asyncio/retry/base_strategy.py b/google/cloud/storage/asyncio/retry/base_strategy.py new file mode 100644 index 000000000..ff193f109 --- /dev/null +++ b/google/cloud/storage/asyncio/retry/base_strategy.py @@ -0,0 +1,83 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from typing import Any, Iterable + + +class _BaseResumptionStrategy(abc.ABC): + """Abstract base class defining the interface for a bidi stream resumption strategy. + + This class defines the skeleton for a pluggable strategy that contains + all the service-specific logic for a given bidi operation (e.g., reads + or writes). This allows a generic retry manager to handle the common + retry loop while sending the state management and request generation + to a concrete implementation of this class. + """ + + @abc.abstractmethod + def generate_requests(self, state: Any) -> Iterable[Any]: + """Generates the next batch of requests based on the current state. + + This method is called at the beginning of each retry attempt. It should + inspect the provided state object and generate the appropriate list of + request protos to send to the server. For example, a read strategy + would use this to implement "Smarter Resumption" by creating smaller + `ReadRange` requests for partially downloaded ranges. For bidi-writes, + it will set the `write_offset` field to the persisted size received + from the server in the next request. + + :type state: Any + :param state: An object containing all the state needed for the + operation (e.g., requested ranges, user buffers, + bytes written). + """ + pass + + @abc.abstractmethod + def update_state_from_response(self, response: Any, state: Any) -> None: + """Updates the state based on a successful server response. + + This method is called for every message received from the server. It is + responsible for processing the response and updating the shared state + object. + + :type response: Any + :param response: The response message received from the server. + + :type state: Any + :param state: The shared state object for the operation, which will be + mutated by this method. + """ + pass + + @abc.abstractmethod + async def recover_state_on_failure(self, error: Exception, state: Any) -> None: + """Prepares the state for the next retry attempt after a failure. + + This method is called when a retriable gRPC error occurs. It is + responsible for performing any necessary actions to ensure the next + retry attempt can succeed. For bidi reads, its primary role is to + handle the `BidiReadObjectRedirectError` by extracting the + `routing_token` and updating the state. For bidi writes, it will update + the state to reflect any bytes that were successfully persisted before + the failure. + + :type error: :class:`Exception` + :param error: The exception that was caught by the retry engine. + + :type state: Any + :param state: The shared state object for the operation. + """ + pass diff --git a/google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py b/google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py new file mode 100644 index 000000000..23bffb63d --- /dev/null +++ b/google/cloud/storage/asyncio/retry/bidi_stream_retry_manager.py @@ -0,0 +1,69 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any, AsyncIterator, Callable + +from google.cloud.storage.asyncio.retry.base_strategy import ( + _BaseResumptionStrategy, +) + +logger = logging.getLogger(__name__) + + +class _BidiStreamRetryManager: + """Manages the generic retry loop for a bidi streaming operation.""" + + def __init__( + self, + strategy: _BaseResumptionStrategy, + send_and_recv: Callable[..., AsyncIterator[Any]], + ): + """Initializes the retry manager. + Args: + strategy: The strategy for managing the state of a specific + bidi operation (e.g., reads or writes). + send_and_recv: An async callable that opens a new gRPC stream. + """ + self._strategy = strategy + self._send_and_recv = send_and_recv + + async def execute(self, initial_state: Any, retry_policy): + """ + Executes the bidi operation with the configured retry policy. + Args: + initial_state: An object containing all state for the operation. + retry_policy: The `google.api_core.retry.AsyncRetry` object to + govern the retry behavior for this specific operation. + """ + state = initial_state + + async def attempt(): + requests = self._strategy.generate_requests(state) + stream = self._send_and_recv(requests, state) + try: + async for response in stream: + self._strategy.update_state_from_response(response, state) + return + except Exception as e: + if retry_policy._predicate(e): + logger.info( + f"Bidi stream operation failed: {e}. Attempting state recovery and retry." + ) + await self._strategy.recover_state_on_failure(e, state) + raise e + + wrapped_attempt = retry_policy(attempt) + + await wrapped_attempt() diff --git a/google/cloud/storage/asyncio/retry/reads_resumption_strategy.py b/google/cloud/storage/asyncio/retry/reads_resumption_strategy.py new file mode 100644 index 000000000..468954332 --- /dev/null +++ b/google/cloud/storage/asyncio/retry/reads_resumption_strategy.py @@ -0,0 +1,157 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, List, IO +import logging + +from google_crc32c import Checksum +from google.cloud import _storage_v2 as storage_v2 +from google.cloud.storage.exceptions import DataCorruption +from google.cloud.storage.asyncio.retry._helpers import ( + _handle_redirect, +) +from google.cloud.storage.asyncio.retry.base_strategy import ( + _BaseResumptionStrategy, +) + + +_BIDI_READ_REDIRECTED_TYPE_URL = ( + "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError" +) +logger = logging.getLogger(__name__) + + +class _DownloadState: + """A helper class to track the state of a single range download.""" + + def __init__( + self, initial_offset: int, initial_length: int, user_buffer: IO[bytes] + ): + self.initial_offset = initial_offset + self.initial_length = initial_length + self.user_buffer = user_buffer + self.bytes_written = 0 + self.next_expected_offset = initial_offset + self.is_complete = False + + +class _ReadResumptionStrategy(_BaseResumptionStrategy): + """The concrete resumption strategy for bidi reads.""" + + def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]: + """Generates new ReadRange requests for all incomplete downloads. + + :type state: dict + :param state: A dictionary mapping a read_id to its corresponding + _DownloadState object. + """ + pending_requests = [] + download_states: Dict[int, _DownloadState] = state["download_states"] + + for read_id, read_state in download_states.items(): + if not read_state.is_complete: + new_offset = read_state.initial_offset + read_state.bytes_written + + # Calculate remaining length. If initial_length is 0 (read to end), + # it stays 0. Otherwise, subtract bytes_written. + new_length = 0 + if read_state.initial_length > 0: + new_length = read_state.initial_length - read_state.bytes_written + + new_request = storage_v2.ReadRange( + read_offset=new_offset, + read_length=new_length, + read_id=read_id, + ) + pending_requests.append(new_request) + return pending_requests + + def update_state_from_response( + self, response: storage_v2.BidiReadObjectResponse, state: Dict[str, Any] + ) -> None: + """Processes a server response, performs integrity checks, and updates state.""" + + # Capture read_handle if provided. + if response.read_handle: + state["read_handle"] = response.read_handle + + download_states = state["download_states"] + + for object_data_range in response.object_data_ranges: + # Ignore empty ranges or ranges for IDs not in our state + # (e.g., from a previously cancelled request on the same stream). + if not object_data_range.read_range: + logger.warning( + "Received response with missing read_range field; ignoring." + ) + continue + + read_id = object_data_range.read_range.read_id + if read_id not in download_states: + logger.warning( + f"Received data for unknown or stale read_id {read_id}; ignoring." + ) + continue + + read_state = download_states[read_id] + + # Offset Verification + chunk_offset = object_data_range.read_range.read_offset + if chunk_offset != read_state.next_expected_offset: + raise DataCorruption( + response, + f"Offset mismatch for read_id {read_id}. " + f"Expected {read_state.next_expected_offset}, got {chunk_offset}", + ) + + # Checksum Verification + # We must validate data before updating state or writing to buffer. + data = object_data_range.checksummed_data.content + server_checksum = object_data_range.checksummed_data.crc32c + + if server_checksum is not None: + client_checksum = int.from_bytes(Checksum(data).digest(), "big") + if server_checksum != client_checksum: + raise DataCorruption( + response, + f"Checksum mismatch for read_id {read_id}. " + f"Server sent {server_checksum}, client calculated {client_checksum}.", + ) + + # Update State & Write Data + chunk_size = len(data) + read_state.user_buffer.write(data) + read_state.bytes_written += chunk_size + read_state.next_expected_offset += chunk_size + + # Final Byte Count Verification + if object_data_range.range_end: + read_state.is_complete = True + if ( + read_state.initial_length != 0 + and read_state.bytes_written > read_state.initial_length + ): + raise DataCorruption( + response, + f"Byte count mismatch for read_id {read_id}. " + f"Expected {read_state.initial_length}, got {read_state.bytes_written}", + ) + + async def recover_state_on_failure(self, error: Exception, state: Any) -> None: + """Handles BidiReadObjectRedirectedError for reads.""" + routing_token, read_handle = _handle_redirect(error) + if routing_token: + state["routing_token"] = routing_token + if read_handle: + state["read_handle"] = read_handle diff --git a/google/cloud/storage/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/asyncio/retry/writes_resumption_strategy.py new file mode 100644 index 000000000..b98b9b2e7 --- /dev/null +++ b/google/cloud/storage/asyncio/retry/writes_resumption_strategy.py @@ -0,0 +1,147 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, IO, List, Optional, Union + +import google_crc32c +from google.cloud._storage_v2.types import storage as storage_type +from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError +from google.cloud.storage.asyncio.retry.base_strategy import ( + _BaseResumptionStrategy, +) +from google.cloud.storage.asyncio.retry._helpers import ( + _extract_bidi_writes_redirect_proto, +) + + +class _WriteState: + """A helper class to track the state of a single upload operation. + + :type chunk_size: int + :param chunk_size: The size of chunks to write to the server. + + :type user_buffer: IO[bytes] + :param user_buffer: The data source. + + :type flush_interval: int + :param flush_interval: The flush interval at which the data is flushed. + """ + + def __init__( + self, + chunk_size: int, + user_buffer: IO[bytes], + flush_interval: int, + ): + self.chunk_size = chunk_size + self.user_buffer = user_buffer + self.persisted_size: int = 0 + self.bytes_sent: int = 0 + self.bytes_since_last_flush: int = 0 + self.flush_interval: int = flush_interval + self.write_handle: Union[bytes, storage_type.BidiWriteHandle, None] = None + self.routing_token: Optional[str] = None + self.is_finalized: bool = False + + +class _WriteResumptionStrategy(_BaseResumptionStrategy): + """The concrete resumption strategy for bidi writes.""" + + def generate_requests( + self, state: Dict[str, Any] + ) -> List[storage_type.BidiWriteObjectRequest]: + """Generates BidiWriteObjectRequests to resume or continue the upload. + + This method is not applicable for `open` methods. + """ + write_state: _WriteState = state["write_state"] + + requests = [] + # The buffer should already be seeked to the correct position (persisted_size) + # by the `recover_state_on_failure` method before this is called. + while not write_state.is_finalized: + chunk = write_state.user_buffer.read(write_state.chunk_size) + + # End of File detection + if not chunk: + break + + checksummed_data = storage_type.ChecksummedData(content=chunk) + checksum = google_crc32c.Checksum(chunk) + checksummed_data.crc32c = int.from_bytes(checksum.digest(), "big") + + request = storage_type.BidiWriteObjectRequest( + write_offset=write_state.bytes_sent, + checksummed_data=checksummed_data, + ) + chunk_len = len(chunk) + write_state.bytes_sent += chunk_len + write_state.bytes_since_last_flush += chunk_len + + if write_state.bytes_since_last_flush >= write_state.flush_interval: + request.flush = True + # reset counter after marking flush + write_state.bytes_since_last_flush = 0 + + requests.append(request) + return requests + + def update_state_from_response( + self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any] + ) -> None: + """Processes a server response and updates the write state.""" + write_state: _WriteState = state["write_state"] + if response is None: + return + if response.persisted_size: + write_state.persisted_size = response.persisted_size + + if response.write_handle: + write_state.write_handle = response.write_handle + + if response.resource: + write_state.persisted_size = response.resource.size + if response.resource.finalize_time: + write_state.is_finalized = True + + async def recover_state_on_failure( + self, error: Exception, state: Dict[str, Any] + ) -> None: + """ + Handles errors, specifically BidiWriteObjectRedirectedError, and rewinds state. + + This method rewinds the user buffer and internal byte tracking to the + last confirmed 'persisted_size' from the server. + """ + write_state: _WriteState = state["write_state"] + + redirect_proto = None + + if isinstance(error, BidiWriteObjectRedirectedError): + redirect_proto = error + else: + redirect_proto = _extract_bidi_writes_redirect_proto(error) + + # Extract routing token and potentially a new write handle for redirection. + if redirect_proto: + if redirect_proto.routing_token: + write_state.routing_token = redirect_proto.routing_token + if redirect_proto.write_handle: + write_state.write_handle = redirect_proto.write_handle + + # We must assume any data sent beyond 'persisted_size' was lost. + # Reset the user buffer to the last known good byte confirmed by the server. + write_state.user_buffer.seek(write_state.persisted_size) + write_state.bytes_sent = write_state.persisted_size + write_state.bytes_since_last_flush = 0 diff --git a/google/cloud/storage/grpc_client.py b/google/cloud/storage/grpc_client.py new file mode 100644 index 000000000..7a739b7b7 --- /dev/null +++ b/google/cloud/storage/grpc_client.py @@ -0,0 +1,122 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A client for interacting with Google Cloud Storage using the gRPC API.""" + +from google.cloud.client import ClientWithProject +from google.cloud import _storage_v2 as storage_v2 + +_marker = object() + + +class GrpcClient(ClientWithProject): + """A client for interacting with Google Cloud Storage using the gRPC API. + + :type project: str or None + :param project: The project which the client acts on behalf of. If not + passed, falls back to the default inferred from the + environment. + + :type credentials: :class:`~google.auth.credentials.Credentials` + :param credentials: (Optional) The OAuth2 Credentials to use for this + client. If not passed, falls back to the default + inferred from the environment. + + :type client_info: :class:`~google.api_core.client_info.ClientInfo` + :param client_info: + The client info used to send a user-agent string along with API + requests. If ``None``, then default info will be used. Generally, + you only need to set this if you're developing your own library + or partner tool. + + :type client_options: :class:`~google.api_core.client_options.ClientOptions` or :class:`dict` + :param client_options: (Optional) Client options used to set user options + on the client. A non-default universe domain or API endpoint should be + set through client_options. + + :type api_key: string + :param api_key: + (Optional) An API key. Mutually exclusive with any other credentials. + This parameter is an alias for setting `client_options.api_key` and + will supersede any API key set in the `client_options` parameter. + + :type attempt_direct_path: bool + :param attempt_direct_path: + (Optional) Whether to attempt to use DirectPath for gRPC connections. + This provides a direct, unproxied connection to GCS for lower latency + and higher throughput, and is highly recommended when running on Google + Cloud infrastructure. Defaults to ``True``. + """ + + def __init__( + self, + project=_marker, + credentials=None, + client_info=None, + client_options=None, + *, + api_key=None, + attempt_direct_path=True, + ): + super(GrpcClient, self).__init__(project=project, credentials=credentials) + + if isinstance(client_options, dict): + if api_key: + client_options["api_key"] = api_key + elif client_options is None: + client_options = {} if not api_key else {"api_key": api_key} + elif api_key: + client_options.api_key = api_key + + self._grpc_client = self._create_gapic_client( + credentials=credentials, + client_info=client_info, + client_options=client_options, + attempt_direct_path=attempt_direct_path, + ) + + def _create_gapic_client( + self, + credentials=None, + client_info=None, + client_options=None, + attempt_direct_path=True, + ): + """Creates and configures the low-level GAPIC `storage_v2` client.""" + transport_cls = storage_v2.StorageClient.get_transport_class("grpc") + + channel = transport_cls.create_channel(attempt_direct_path=attempt_direct_path) + + transport = transport_cls(credentials=credentials, channel=channel) + + return storage_v2.StorageClient( + credentials=credentials, + transport=transport, + client_info=client_info, + client_options=client_options, + ) + + @property + def grpc_client(self): + """The underlying gRPC client. + + This property gives users direct access to the `storage_v2.StorageClient` + instance. This can be useful for accessing + newly added or experimental RPCs that are not yet exposed through + the high-level GrpcClient. + + Returns: + google.cloud.storage_v2.StorageClient: The configured GAPIC client. + """ + return self._grpc_client diff --git a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py index f00a6ba80..725eeb2bd 100644 --- a/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py +++ b/samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py @@ -17,10 +17,10 @@ import argparse import asyncio -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient # [START storage_create_and_write_appendable_object] diff --git a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py index 971658997..807fe40a5 100644 --- a/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py +++ b/samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py @@ -17,10 +17,10 @@ import argparse import asyncio -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient # [START storage_finalize_appendable_object_upload] diff --git a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py index ce2ba678b..bed580d36 100644 --- a/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py @@ -14,15 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Downloads a range of bytes from multiple objects concurrently.""" +"""Downloads a range of bytes from multiple objects concurrently. +Example usage: + ```python samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py \ + --bucket_name \ + --object_names ``` +""" import argparse import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import ( +from google.cloud.storage.asyncio.async_grpc_client import ( AsyncGrpcClient, ) -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py index 02c3bace5..b0f64c486 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py @@ -18,8 +18,8 @@ import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py index b4cad6718..2e18caabe 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py +++ b/samples/snippets/zonal_buckets/storage_open_object_read_full_object.py @@ -18,8 +18,8 @@ import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py index b013cc938..74bec43f6 100644 --- a/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py +++ b/samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py @@ -18,8 +18,8 @@ import asyncio from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py index 3fb17ceae..c758dc641 100644 --- a/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py +++ b/samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py @@ -17,10 +17,10 @@ import argparse import asyncio -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient # [START storage_pause_and_resume_appendable_upload] diff --git a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py index 1134f28d6..9e4dcd738 100644 --- a/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py +++ b/samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py @@ -20,11 +20,11 @@ from datetime import datetime from io import BytesIO -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) diff --git a/samples/snippets/zonal_buckets/zonal_snippets_test.py b/samples/snippets/zonal_buckets/zonal_snippets_test.py index 736576eb5..6852efe22 100644 --- a/samples/snippets/zonal_buckets/zonal_snippets_test.py +++ b/samples/snippets/zonal_buckets/zonal_snippets_test.py @@ -20,8 +20,8 @@ from google.cloud.storage import Client import contextlib -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) diff --git a/tests/perf/microbenchmarks/conftest.py b/tests/perf/microbenchmarks/conftest.py index c09e9ce93..e748c6e43 100644 --- a/tests/perf/microbenchmarks/conftest.py +++ b/tests/perf/microbenchmarks/conftest.py @@ -22,10 +22,10 @@ import os import uuid from google.cloud import storage -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient from tests.perf.microbenchmarks.writes.parameters import WriteParameters _OBJECT_NAME_PREFIX = "micro-benchmark" diff --git a/tests/perf/microbenchmarks/reads/test_reads.py b/tests/perf/microbenchmarks/reads/test_reads.py index 2b5f80d42..d51102cea 100644 --- a/tests/perf/microbenchmarks/reads/test_reads.py +++ b/tests/perf/microbenchmarks/reads/test_reads.py @@ -30,8 +30,8 @@ import pytest -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) from tests.perf.microbenchmarks._utils import publish_benchmark_extra_info diff --git a/tests/perf/microbenchmarks/writes/test_writes.py b/tests/perf/microbenchmarks/writes/test_writes.py index 5648ada0d..02a0f5e4f 100644 --- a/tests/perf/microbenchmarks/writes/test_writes.py +++ b/tests/perf/microbenchmarks/writes/test_writes.py @@ -30,8 +30,8 @@ import logging import pytest -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, ) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 4d46353e2..ed59f9a4a 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -11,12 +11,12 @@ import gc # current library imports -from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, _DEFAULT_FLUSH_INTERVAL_BYTES, ) -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) from google.api_core.exceptions import FailedPrecondition diff --git a/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py index 6c837ec5c..e0eba9030 100644 --- a/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py +++ b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py @@ -18,10 +18,10 @@ from google.api_core import exceptions from google.api_core.retry_async import AsyncRetry -from google.cloud.storage._experimental.asyncio.retry import ( +from google.cloud.storage.asyncio.retry import ( bidi_stream_retry_manager as manager, ) -from google.cloud.storage._experimental.asyncio.retry import base_strategy +from google.cloud.storage.asyncio.retry import base_strategy def _is_retriable(exc): diff --git a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py index 62a05f19a..1e31961b6 100644 --- a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py +++ b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py @@ -20,7 +20,7 @@ from google.api_core import exceptions from google.cloud import _storage_v2 as storage_v2 -from google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy import ( +from google.cloud.storage.asyncio.retry.reads_resumption_strategy import ( _DownloadState, _ReadResumptionStrategy, ) @@ -28,7 +28,7 @@ _READ_ID = 1 LOGGER_NAME = ( - "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy" + "google.cloud.storage.asyncio.retry.reads_resumption_strategy" ) diff --git a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py index ce48e21c7..ca354e84a 100644 --- a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py +++ b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py @@ -22,7 +22,7 @@ from google.api_core import exceptions from google.cloud._storage_v2.types import storage as storage_type -from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import ( +from google.cloud.storage.asyncio.retry.writes_resumption_strategy import ( _WriteState, _WriteResumptionStrategy, ) @@ -352,7 +352,7 @@ async def test_recover_state_on_failure_trailer_metadata_redirect(self, strategy ] with mock.patch( - "google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy._extract_bidi_writes_redirect_proto", + "google.cloud.storage.asyncio.retry.writes_resumption_strategy._extract_bidi_writes_redirect_proto", return_value=redirect_proto, ): await strategy.recover_state_on_failure( diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py index 88f4864de..02fb3238d 100644 --- a/tests/unit/asyncio/test_async_appendable_object_writer.py +++ b/tests/unit/asyncio/test_async_appendable_object_writer.py @@ -21,7 +21,7 @@ from google.rpc import status_pb2 from google.cloud._storage_v2.types import storage as storage_type from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError -from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import ( +from google.cloud.storage.asyncio.async_appendable_object_writer import ( AsyncAppendableObjectWriter, _is_write_retryable, _MAX_CHUNK_SIZE_BYTES, @@ -91,7 +91,7 @@ def mock_appendable_writer(): mock_client.grpc_client = mock.AsyncMock() # Internal stream class patch stream_patcher = mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" + "google.cloud.storage.asyncio.async_appendable_object_writer._AsyncWriteObjectStream" ) mock_stream_cls = stream_patcher.start() mock_stream = mock_stream_cls.return_value @@ -156,7 +156,7 @@ def test_init_validation_multiple_raises(self, mock_appendable_writer): def test_init_raises_if_crc32c_missing(self, mock_appendable_writer): with mock.patch( - "google.cloud.storage._experimental.asyncio._utils.google_crc32c" + "google.cloud.storage.asyncio._utils.google_crc32c" ) as mock_crc: mock_crc.implementation = "python" with pytest.raises(exceptions.FailedPrecondition): @@ -206,7 +206,7 @@ def test_on_open_error_redirection(self, mock_appendable_writer): ) with mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._extract_bidi_writes_redirect_proto", + "google.cloud.storage.asyncio.async_appendable_object_writer._extract_bidi_writes_redirect_proto", return_value=redirect, ): writer._on_open_error(exceptions.Aborted("redirect")) @@ -230,7 +230,7 @@ async def test_append_basic_success(self, mock_appendable_writer): data = b"test-data" with mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._BidiStreamRetryManager" + "google.cloud.storage.asyncio.async_appendable_object_writer._BidiStreamRetryManager" ) as MockManager: async def mock_execute(state, policy): @@ -277,7 +277,7 @@ async def mock_open(metadata=None): writer, "open", side_effect=mock_open ) as mock_writer_open: with mock.patch( - "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._BidiStreamRetryManager" + "google.cloud.storage.asyncio.async_appendable_object_writer._BidiStreamRetryManager" ) as MockManager: async def mock_execute(state, policy): diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index b0edfafa6..3d897d336 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -16,8 +16,8 @@ from google.auth import credentials as auth_credentials from google.auth.credentials import AnonymousCredentials from google.api_core import client_info as client_info_lib +from google.cloud.storage.asyncio import async_grpc_client from google.cloud.storage import __version__ -from google.cloud.storage._experimental.asyncio import async_grpc_client def _make_credentials(spec=None): diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 09cf0fa09..8d4d6d31d 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -20,10 +20,10 @@ from google.api_core import exceptions from google_crc32c import Checksum -from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( +from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) -from google.cloud.storage._experimental.asyncio import async_read_object_stream +from google.cloud.storage.asyncio import async_read_object_stream from io import BytesIO from google.cloud.storage.exceptions import DataCorruption @@ -68,7 +68,7 @@ async def _make_mock_mrd( return mrd, mock_client @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_create_mrd(self, mock_cls_async_read_object_stream): @@ -95,10 +95,10 @@ async def test_create_mrd(self, mock_cls_async_read_object_stream): assert mrd.is_stream_open @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" + "google.cloud.storage.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_download_ranges_via_async_gather( @@ -166,10 +166,10 @@ async def test_download_ranges_via_async_gather( assert second_buffer.getvalue() == data[10:16] @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" + "google.cloud.storage.asyncio.async_multi_range_downloader.generate_random_56_bit_integer" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_download_ranges( @@ -236,7 +236,7 @@ async def test_downloading_ranges_with_more_than_1000_should_throw_error(self): ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_opening_mrd_more_than_once_should_throw_error( @@ -255,7 +255,7 @@ async def test_opening_mrd_more_than_once_should_throw_error( assert str(exc.value) == "Underlying bidi-gRPC stream is already open" @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" ) @pytest.mark.asyncio async def test_close_mrd(self, mock_cls_async_read_object_stream): @@ -302,7 +302,7 @@ async def test_downloading_without_opening_should_throw_error(self): assert str(exc.value) == "Underlying bidi-gRPC stream is not open" assert not mrd.is_stream_open - @mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c") + @mock.patch("google.cloud.storage.asyncio._utils.google_crc32c") def test_init_raises_if_crc32c_c_extension_is_missing( self, mock_google_crc32c ): @@ -318,12 +318,12 @@ def test_init_raises_if_crc32c_c_extension_is_missing( @pytest.mark.asyncio @mock.patch( - "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy.Checksum" + "google.cloud.storage.asyncio.retry.reads_resumption_strategy.Checksum" ) async def test_download_ranges_raises_on_checksum_mismatch( self, mock_checksum_class ): - from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( + from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) @@ -359,7 +359,7 @@ async def test_download_ranges_raises_on_checksum_mismatch( with pytest.raises(DataCorruption) as exc_info: with mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer", + "google.cloud.storage.asyncio.async_multi_range_downloader.generate_random_56_bit_integer", return_value=0, ): await mrd.download_ranges([(0, len(test_data), BytesIO())]) @@ -368,11 +368,11 @@ async def test_download_ranges_raises_on_checksum_mismatch( mock_checksum_class.assert_called_once_with(test_data) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", + "google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.open", new_callable=AsyncMock, ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.close", + "google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader.close", new_callable=AsyncMock, ) @pytest.mark.asyncio diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 18e0c464d..4e8a494b5 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -17,8 +17,8 @@ from unittest.mock import AsyncMock from google.cloud import _storage_v2 -from google.cloud.storage._experimental.asyncio import async_read_object_stream -from google.cloud.storage._experimental.asyncio.async_read_object_stream import ( +from google.cloud.storage.asyncio import async_read_object_stream +from google.cloud.storage.asyncio.async_read_object_stream import ( _AsyncReadObjectStream, ) @@ -80,10 +80,10 @@ async def instantiate_read_obj_stream_with_read_handle( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): # Arrange @@ -111,10 +111,10 @@ def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_open(mock_client, mock_cls_async_bidi_rpc): @@ -137,10 +137,10 @@ async def test_open(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_open_with_read_handle(mock_client, mock_cls_async_bidi_rpc): @@ -163,10 +163,10 @@ async def test_open_with_read_handle(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_open_when_already_open_should_raise_error( @@ -186,10 +186,10 @@ async def test_open_when_already_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_close(mock_client, mock_cls_async_bidi_rpc): @@ -209,10 +209,10 @@ async def test_close(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_requests_done(mock_client, mock_cls_async_bidi_rpc): @@ -233,10 +233,10 @@ async def test_requests_done(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_close_without_open_should_raise_error( @@ -256,10 +256,10 @@ async def test_close_without_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_send(mock_client, mock_cls_async_bidi_rpc): @@ -279,10 +279,10 @@ async def test_send(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_send_without_open_should_raise_error( @@ -302,10 +302,10 @@ async def test_send_without_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_recv(mock_client, mock_cls_async_bidi_rpc): @@ -327,10 +327,10 @@ async def test_recv(mock_client, mock_cls_async_bidi_rpc): @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_recv_without_open_should_raise_error( @@ -350,10 +350,10 @@ async def test_recv_without_open_should_raise_error( @mock.patch( - "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" ) @mock.patch( - "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" + "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @pytest.mark.asyncio async def test_recv_updates_read_handle_on_refresh( diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index 7bfa2cea0..f4a7862d6 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -16,7 +16,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from google.cloud.storage._experimental.asyncio.async_write_object_stream import ( +from google.cloud.storage.asyncio.async_write_object_stream import ( _AsyncWriteObjectStream, ) from google.cloud import _storage_v2 @@ -72,7 +72,7 @@ def test_init_raises_value_error(self, mock_client): # ------------------------------------------------------------------------- @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_open_new_object(self, mock_rpc_cls, mock_client): @@ -102,7 +102,7 @@ async def test_open_new_object(self, mock_rpc_cls, mock_client): assert stream.generation_number == GENERATION @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): @@ -133,7 +133,7 @@ async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): assert stream.persisted_size == 1024 @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_open_metadata_merging(self, mock_rpc_cls, mock_client): @@ -167,7 +167,7 @@ async def test_open_already_open_raises(self, mock_client): # ------------------------------------------------------------------------- @mock.patch( - "google.cloud.storage._experimental.asyncio.async_write_object_stream.AsyncBidiRpc" + "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" ) @pytest.mark.asyncio async def test_send_and_recv_logic(self, mock_rpc_cls, mock_client): diff --git a/tests/unit/test_grpc_client.py b/tests/unit/test_grpc_client.py index 9eca1b280..eb048ff42 100644 --- a/tests/unit/test_grpc_client.py +++ b/tests/unit/test_grpc_client.py @@ -16,6 +16,7 @@ from unittest import mock from google.auth import credentials as auth_credentials from google.api_core import client_options as client_options_lib +from google.cloud.storage import grpc_client def _make_credentials(spec=None): @@ -30,7 +31,6 @@ class TestGrpcClient(unittest.TestCase): def test_constructor_defaults_and_options( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -71,12 +71,11 @@ def test_constructor_defaults_and_options( # 4. Assert the client instance holds the mocked GAPIC client. self.assertIs(client.grpc_client, mock_storage_client.return_value) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_disables_direct_path( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -94,12 +93,11 @@ def test_constructor_disables_direct_path( attempt_direct_path=False ) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_initialize_with_api_key( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -124,10 +122,9 @@ def test_constructor_initialize_with_api_key( client_options={"api_key": "test-api-key"}, ) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_grpc_client_property(self, mock_storage_client, mock_base_client): - from google.cloud.storage._experimental import grpc_client mock_creds = _make_credentials() mock_base_client.return_value._credentials = mock_creds @@ -138,12 +135,11 @@ def test_grpc_client_property(self, mock_storage_client, mock_base_client): self.assertIs(retrieved_client, mock_storage_client.return_value) - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_with_api_key_and_client_options( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls @@ -173,12 +169,11 @@ def test_constructor_with_api_key_and_client_options( ) self.assertEqual(client_options_obj.api_key, "new-test-key") - @mock.patch("google.cloud.storage._experimental.grpc_client.ClientWithProject") + @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_constructor_with_api_key_and_dict_options( self, mock_storage_client, mock_base_client ): - from google.cloud.storage._experimental import grpc_client mock_creds = _make_credentials() mock_base_instance = mock_base_client.return_value From 08bc7082db7392f13bc8c51511b4afa9c7b157c9 Mon Sep 17 00:00:00 2001 From: Pulkit Aggarwal <54775856+Pulkit0110@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:09:57 +0530 Subject: [PATCH 18/23] feat: update generation for MRD (#1730) Adding a support to pass `generation` in the constructor of MRD. It is done to make it compatible with the other public interfaces. Earlier, `generation_number` was exposed as the parameter which would still be valid after these changes. But using `generation` is more preferred. --- .../asyncio/async_multi_range_downloader.py | 38 ++++++++---- tests/system/test_zonal.py | 3 +- .../retry/test_reads_resumption_strategy.py | 4 +- .../test_async_multi_range_downloader.py | 58 ++++++++++++++++--- .../asyncio/test_async_read_object_stream.py | 48 ++++----------- .../asyncio/test_async_write_object_stream.py | 16 ++--- tests/unit/test_grpc_client.py | 6 -- 7 files changed, 95 insertions(+), 78 deletions(-) diff --git a/google/cloud/storage/asyncio/async_multi_range_downloader.py b/google/cloud/storage/asyncio/async_multi_range_downloader.py index 993fc9522..8ad4d319f 100644 --- a/google/cloud/storage/asyncio/async_multi_range_downloader.py +++ b/google/cloud/storage/asyncio/async_multi_range_downloader.py @@ -128,10 +128,11 @@ async def create_mrd( client: AsyncGrpcClient, bucket_name: str, object_name: str, - generation_number: Optional[int] = None, + generation: Optional[int] = None, read_handle: Optional[_storage_v2.BidiReadHandle] = None, retry_policy: Optional[AsyncRetry] = None, metadata: Optional[List[Tuple[str, str]]] = None, + **kwargs, ) -> AsyncMultiRangeDownloader: """Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC object for reading. @@ -145,8 +146,8 @@ async def create_mrd( :type object_name: str :param object_name: The name of the object to be read. - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific + :type generation: int + :param generation: (Optional) If present, selects a specific revision of this object. :type read_handle: _storage_v2.BidiReadHandle @@ -162,7 +163,14 @@ async def create_mrd( :rtype: :class:`~google.cloud.storage.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader` :returns: An initialized AsyncMultiRangeDownloader instance for reading. """ - mrd = cls(client, bucket_name, object_name, generation_number, read_handle) + mrd = cls( + client, + bucket_name, + object_name, + generation=generation, + read_handle=read_handle, + **kwargs, + ) await mrd.open(retry_policy=retry_policy, metadata=metadata) return mrd @@ -171,8 +179,9 @@ def __init__( client: AsyncGrpcClient, bucket_name: str, object_name: str, - generation_number: Optional[int] = None, + generation: Optional[int] = None, read_handle: Optional[_storage_v2.BidiReadHandle] = None, + **kwargs, ) -> None: """Constructor for AsyncMultiRangeDownloader, clients are not adviced to use it directly. Instead it's adviced to use the classmethod `create_mrd`. @@ -186,20 +195,27 @@ def __init__( :type object_name: str :param object_name: The name of the object to be read. - :type generation_number: int - :param generation_number: (Optional) If present, selects a specific revision of + :type generation: int + :param generation: (Optional) If present, selects a specific revision of this object. :type read_handle: _storage_v2.BidiReadHandle :param read_handle: (Optional) An existing read handle. """ + if "generation_number" in kwargs: + if generation is not None: + raise TypeError( + "Cannot set both 'generation' and 'generation_number'. " + "Use 'generation' for new code." + ) + generation = kwargs.pop("generation_number") raise_if_no_fast_crc32c() self.client = client self.bucket_name = bucket_name self.object_name = object_name - self.generation_number = generation_number + self.generation = generation self.read_handle: Optional[_storage_v2.BidiReadHandle] = read_handle self.read_obj_str: Optional[_AsyncReadObjectStream] = None self._is_stream_open: bool = False @@ -276,7 +292,7 @@ async def _do_open(): client=self.client.grpc_client, bucket_name=self.bucket_name, object_name=self.object_name, - generation_number=self.generation_number, + generation_number=self.generation, read_handle=self.read_handle, ) @@ -291,7 +307,7 @@ async def _do_open(): ) if self.read_obj_str.generation_number: - self.generation_number = self.read_obj_str.generation_number + self.generation = self.read_obj_str.generation_number if self.read_obj_str.read_handle: self.read_handle = self.read_obj_str.read_handle if self.read_obj_str.persisted_size is not None: @@ -435,7 +451,7 @@ async def generator(): client=self.client.grpc_client, bucket_name=self.bucket_name, object_name=self.object_name, - generation_number=self.generation_number, + generation_number=self.generation, read_handle=current_handle, ) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index ed59f9a4a..42164d364 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -264,7 +264,8 @@ async def _run(): event_loop.run_until_complete(_run()) -@pytest.mark.skip(reason='Flaky test b/478129078') + +@pytest.mark.skip(reason="Flaky test b/478129078") def test_mrd_open_with_read_handle(event_loop, grpc_client_direct): object_name = f"test_read_handl-{str(uuid.uuid4())[:4]}" diff --git a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py index 1e31961b6..1055127eb 100644 --- a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py +++ b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py @@ -27,9 +27,7 @@ from google.cloud._storage_v2.types.storage import BidiReadObjectRedirectedError _READ_ID = 1 -LOGGER_NAME = ( - "google.cloud.storage.asyncio.retry.reads_resumption_strategy" -) +LOGGER_NAME = "google.cloud.storage.asyncio.retry.reads_resumption_strategy" class TestDownloadState(unittest.TestCase): diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py index 8d4d6d31d..c912dc995 100644 --- a/tests/unit/asyncio/test_async_multi_range_downloader.py +++ b/tests/unit/asyncio/test_async_multi_range_downloader.py @@ -49,7 +49,7 @@ async def _make_mock_mrd( mock_cls_async_read_object_stream, bucket_name=_TEST_BUCKET_NAME, object_name=_TEST_OBJECT_NAME, - generation_number=_TEST_GENERATION_NUMBER, + generation=_TEST_GENERATION_NUMBER, read_handle=_TEST_READ_HANDLE, ): mock_client = mock.MagicMock() @@ -62,7 +62,7 @@ async def _make_mock_mrd( mock_stream.read_handle = _TEST_READ_HANDLE mrd = await AsyncMultiRangeDownloader.create_mrd( - mock_client, bucket_name, object_name, generation_number, read_handle + mock_client, bucket_name, object_name, generation, read_handle ) return mrd, mock_client @@ -89,7 +89,7 @@ async def test_create_mrd(self, mock_cls_async_read_object_stream): assert mrd.client == mock_client assert mrd.bucket_name == _TEST_BUCKET_NAME assert mrd.object_name == _TEST_OBJECT_NAME - assert mrd.generation_number == _TEST_GENERATION_NUMBER + assert mrd.generation == _TEST_GENERATION_NUMBER assert mrd.read_handle == _TEST_READ_HANDLE assert mrd.persisted_size == _TEST_OBJECT_SIZE assert mrd.is_stream_open @@ -303,9 +303,7 @@ async def test_downloading_without_opening_should_throw_error(self): assert not mrd.is_stream_open @mock.patch("google.cloud.storage.asyncio._utils.google_crc32c") - def test_init_raises_if_crc32c_c_extension_is_missing( - self, mock_google_crc32c - ): + def test_init_raises_if_crc32c_c_extension_is_missing(self, mock_google_crc32c): mock_google_crc32c.implementation = "python" mock_client = mock.MagicMock() @@ -317,9 +315,7 @@ def test_init_raises_if_crc32c_c_extension_is_missing( ) @pytest.mark.asyncio - @mock.patch( - "google.cloud.storage.asyncio.retry.reads_resumption_strategy.Checksum" - ) + @mock.patch("google.cloud.storage.asyncio.retry.reads_resumption_strategy.Checksum") async def test_download_ranges_raises_on_checksum_mismatch( self, mock_checksum_class ): @@ -405,3 +401,47 @@ async def close_side_effect(): mock_close.assert_called_once() assert not mrd.is_stream_open + + @mock.patch( + "google.cloud.storage.asyncio.async_multi_range_downloader._AsyncReadObjectStream" + ) + @pytest.mark.asyncio + async def test_create_mrd_with_generation_number( + self, mock_cls_async_read_object_stream + ): + # Arrange + mock_client = mock.MagicMock() + mock_client.grpc_client = mock.AsyncMock() + + mock_stream = mock_cls_async_read_object_stream.return_value + mock_stream.open = AsyncMock() + mock_stream.generation_number = _TEST_GENERATION_NUMBER + mock_stream.persisted_size = _TEST_OBJECT_SIZE + mock_stream.read_handle = _TEST_READ_HANDLE + + # Act + mrd = await AsyncMultiRangeDownloader.create_mrd( + mock_client, + _TEST_BUCKET_NAME, + _TEST_OBJECT_NAME, + generation_number=_TEST_GENERATION_NUMBER, + read_handle=_TEST_READ_HANDLE, + ) + + # Assert + assert mrd.generation == _TEST_GENERATION_NUMBER + + @pytest.mark.asyncio + async def test_create_mrd_with_both_generation_and_generation_number(self): + # Arrange + mock_client = mock.MagicMock() + + # Act & Assert + with pytest.raises(TypeError): + await AsyncMultiRangeDownloader.create_mrd( + mock_client, + _TEST_BUCKET_NAME, + _TEST_OBJECT_NAME, + generation=_TEST_GENERATION_NUMBER, + generation_number=_TEST_GENERATION_NUMBER, + ) diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py index 4e8a494b5..2d2f28edd 100644 --- a/tests/unit/asyncio/test_async_read_object_stream.py +++ b/tests/unit/asyncio/test_async_read_object_stream.py @@ -79,9 +79,7 @@ async def instantiate_read_obj_stream_with_read_handle( return read_obj_stream -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -110,9 +108,7 @@ def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc): assert read_obj_stream.rpc == rpc_sentinel -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -136,9 +132,7 @@ async def test_open(mock_client, mock_cls_async_bidi_rpc): assert read_obj_stream.is_stream_open -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -162,9 +156,7 @@ async def test_open_with_read_handle(mock_client, mock_cls_async_bidi_rpc): assert read_obj_stream.is_stream_open -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -185,9 +177,7 @@ async def test_open_when_already_open_should_raise_error( assert str(exc.value) == "Stream is already open" -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -208,9 +198,7 @@ async def test_close(mock_client, mock_cls_async_bidi_rpc): assert not read_obj_stream.is_stream_open -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -232,9 +220,7 @@ async def test_requests_done(mock_client, mock_cls_async_bidi_rpc): read_obj_stream.socket_like_rpc.recv.assert_called_once() -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -255,9 +241,7 @@ async def test_close_without_open_should_raise_error( assert str(exc.value) == "Stream is not open" -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -278,9 +262,7 @@ async def test_send(mock_client, mock_cls_async_bidi_rpc): ) -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -301,9 +283,7 @@ async def test_send_without_open_should_raise_error( assert str(exc.value) == "Stream is not open" -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -326,9 +306,7 @@ async def test_recv(mock_client, mock_cls_async_bidi_rpc): assert response == bidi_read_object_response -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) @@ -349,9 +327,7 @@ async def test_recv_without_open_should_raise_error( assert str(exc.value) == "Stream is not open" -@mock.patch( - "google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc" -) +@mock.patch("google.cloud.storage.asyncio.async_read_object_stream.AsyncBidiRpc") @mock.patch( "google.cloud.storage.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client" ) diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index f4a7862d6..77e2ef091 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -71,9 +71,7 @@ def test_init_raises_value_error(self, mock_client): # Open Stream Tests # ------------------------------------------------------------------------- - @mock.patch( - "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" - ) + @mock.patch("google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc") @pytest.mark.asyncio async def test_open_new_object(self, mock_rpc_cls, mock_client): mock_rpc = mock_rpc_cls.return_value @@ -101,9 +99,7 @@ async def test_open_new_object(self, mock_rpc_cls, mock_client): assert stream.write_handle == WRITE_HANDLE assert stream.generation_number == GENERATION - @mock.patch( - "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" - ) + @mock.patch("google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc") @pytest.mark.asyncio async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): mock_rpc = mock_rpc_cls.return_value @@ -132,9 +128,7 @@ async def test_open_existing_object_with_token(self, mock_rpc_cls, mock_client): assert initial_request.append_object_spec.routing_token == "token-123" assert stream.persisted_size == 1024 - @mock.patch( - "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" - ) + @mock.patch("google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc") @pytest.mark.asyncio async def test_open_metadata_merging(self, mock_rpc_cls, mock_client): mock_rpc = mock_rpc_cls.return_value @@ -166,9 +160,7 @@ async def test_open_already_open_raises(self, mock_client): # Send & Recv & Close Tests # ------------------------------------------------------------------------- - @mock.patch( - "google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc" - ) + @mock.patch("google.cloud.storage.asyncio.async_write_object_stream.AsyncBidiRpc") @pytest.mark.asyncio async def test_send_and_recv_logic(self, mock_rpc_cls, mock_client): # Setup open stream diff --git a/tests/unit/test_grpc_client.py b/tests/unit/test_grpc_client.py index eb048ff42..6dbbfbaa6 100644 --- a/tests/unit/test_grpc_client.py +++ b/tests/unit/test_grpc_client.py @@ -31,7 +31,6 @@ class TestGrpcClient(unittest.TestCase): def test_constructor_defaults_and_options( self, mock_storage_client, mock_base_client ): - mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials(spec=["_base", "_get_project_id"]) @@ -76,7 +75,6 @@ def test_constructor_defaults_and_options( def test_constructor_disables_direct_path( self, mock_storage_client, mock_base_client ): - mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials() @@ -98,7 +96,6 @@ def test_constructor_disables_direct_path( def test_constructor_initialize_with_api_key( self, mock_storage_client, mock_base_client ): - mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls mock_creds = _make_credentials() @@ -125,7 +122,6 @@ def test_constructor_initialize_with_api_key( @mock.patch("google.cloud.storage.grpc_client.ClientWithProject") @mock.patch("google.cloud._storage_v2.StorageClient") def test_grpc_client_property(self, mock_storage_client, mock_base_client): - mock_creds = _make_credentials() mock_base_client.return_value._credentials = mock_creds @@ -140,7 +136,6 @@ def test_grpc_client_property(self, mock_storage_client, mock_base_client): def test_constructor_with_api_key_and_client_options( self, mock_storage_client, mock_base_client ): - mock_transport_cls = mock.MagicMock() mock_storage_client.get_transport_class.return_value = mock_transport_cls mock_transport = mock_transport_cls.return_value @@ -174,7 +169,6 @@ def test_constructor_with_api_key_and_client_options( def test_constructor_with_api_key_and_dict_options( self, mock_storage_client, mock_base_client ): - mock_creds = _make_credentials() mock_base_instance = mock_base_client.return_value mock_base_instance._credentials = mock_creds From c8dd7a0b124c395b7b60189ee78f47aba8d51f7d Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Fri, 30 Jan 2026 12:09:13 +0530 Subject: [PATCH 19/23] feat: expose `DELETE_OBJECT` in `AsyncGrpcClient` (#1718) Expose `DELETE_OBJECT` in `AsyncGrpcClient` --- .../storage/asyncio/async_grpc_client.py | 52 +++++++++++++++++++ tests/system/test_zonal.py | 33 +++++++++++- tests/unit/asyncio/test_async_grpc_client.py | 45 ++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) diff --git a/google/cloud/storage/asyncio/async_grpc_client.py b/google/cloud/storage/asyncio/async_grpc_client.py index 474b961b4..3ff846b5f 100644 --- a/google/cloud/storage/asyncio/async_grpc_client.py +++ b/google/cloud/storage/asyncio/async_grpc_client.py @@ -106,3 +106,55 @@ def grpc_client(self): google.cloud._storage_v2.StorageAsyncClient: The configured GAPIC client. """ return self._grpc_client + + async def delete_object( + self, + bucket_name, + object_name, + generation=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + **kwargs, + ): + """Deletes an object and its metadata. + + :type bucket_name: str + :param bucket_name: The name of the bucket in which the object resides. + + :type object_name: str + :param object_name: The name of the object to delete. + + :type generation: int + :param generation: + (Optional) If present, permanently deletes a specific generation + of an object. + + :type if_generation_match: int + :param if_generation_match: (Optional) + + :type if_generation_not_match: int + :param if_generation_not_match: (Optional) + + :type if_metageneration_match: int + :param if_metageneration_match: (Optional) + + :type if_metageneration_not_match: int + :param if_metageneration_not_match: (Optional) + + + """ + # The gRPC API requires the bucket name to be in the format "projects/_/buckets/bucket_name" + bucket_path = f"projects/_/buckets/{bucket_name}" + request = storage_v2.DeleteObjectRequest( + bucket=bucket_path, + object=object_name, + generation=generation, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + **kwargs, + ) + await self._grpc_client.delete_object(request=request) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 42164d364..9bd870860 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -19,7 +19,7 @@ from google.cloud.storage.asyncio.async_multi_range_downloader import ( AsyncMultiRangeDownloader, ) -from google.api_core.exceptions import FailedPrecondition +from google.api_core.exceptions import FailedPrecondition, NotFound pytestmark = pytest.mark.skipif( @@ -570,3 +570,34 @@ async def _run(): blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) event_loop.run_until_complete(_run()) + + +def test_delete_object_using_grpc_client(event_loop, grpc_client_direct): + """ + Test that a new writer when specifies `None` overrides the existing object. + """ + object_name = f"test_append_with_generation-{uuid.uuid4()}" + + async def _run(): + writer = AsyncAppendableObjectWriter( + grpc_client_direct, _ZONAL_BUCKET, object_name, generation=0 + ) + + # Empty object is created. + await writer.open() + await writer.append(b"some_bytes") + await writer.close() + + await grpc_client_direct.delete_object(_ZONAL_BUCKET, object_name) + + # trying to get raises raises 404. + with pytest.raises(NotFound): + # TODO: Remove this once GET_OBJECT is exposed in `AsyncGrpcClient` + await grpc_client_direct._grpc_client.get_object( + bucket=f"projects/_/buckets/{_ZONAL_BUCKET}", object_=object_name + ) + # cleanup + del writer + gc.collect() + + event_loop.run_until_complete(_run()) diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index 3d897d336..e7d649e72 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -13,6 +13,7 @@ # limitations under the License. from unittest import mock +import pytest from google.auth import credentials as auth_credentials from google.auth.credentials import AnonymousCredentials from google.api_core import client_info as client_info_lib @@ -185,6 +186,7 @@ def test_grpc_client_with_anon_creds(self, mock_grpc_gapic_client): credentials=anonymous_creds, options=expected_options, ) + mock_transport_cls.assert_called_once_with(channel=channel_sentinel) @mock.patch("google.cloud._storage_v2.StorageAsyncClient") def test_user_agent_with_custom_client_info(self, mock_async_storage_client): @@ -209,3 +211,46 @@ def test_user_agent_with_custom_client_info(self, mock_async_storage_client): agent_version = f"gcloud-python/{__version__}" expected_user_agent = f"custom-app/1.0 {agent_version} " assert client_info.user_agent == expected_user_agent + + @mock.patch("google.cloud._storage_v2.StorageAsyncClient") + @pytest.mark.asyncio + async def test_delete_object(self, mock_async_storage_client): + # Arrange + mock_transport_cls = mock.MagicMock() + mock_async_storage_client.get_transport_class.return_value = mock_transport_cls + mock_gapic_client = mock.AsyncMock() + mock_async_storage_client.return_value = mock_gapic_client + + client = async_grpc_client.AsyncGrpcClient( + credentials=_make_credentials(spec=AnonymousCredentials) + ) + + bucket_name = "bucket" + object_name = "object" + generation = 123 + if_generation_match = 456 + if_generation_not_match = 789 + if_metageneration_match = 111 + if_metageneration_not_match = 222 + + # Act + await client.delete_object( + bucket_name, + object_name, + generation=generation, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + ) + + # Assert + call_args, call_kwargs = mock_gapic_client.delete_object.call_args + request = call_kwargs["request"] + assert request.bucket == "projects/_/buckets/bucket" + assert request.object == "object" + assert request.generation == generation + assert request.if_generation_match == if_generation_match + assert request.if_generation_not_match == if_generation_not_match + assert request.if_metageneration_match == if_metageneration_match + assert request.if_metageneration_not_match == if_metageneration_not_match From 2ef63396dca1c36f9b0f0f3cf87a61b5aa4bd465 Mon Sep 17 00:00:00 2001 From: Pulkit Aggarwal <54775856+Pulkit0110@users.noreply.github.com> Date: Mon, 2 Feb 2026 13:38:37 +0530 Subject: [PATCH 20/23] fix: receive eof while closing reads stream (#1733) When `writer.close()` is called without setting finalize_on_close flag, we need to get two responses: 1) to get the persisted_size 2) eof response That's why added a check if the first response is not eof, then again receive the response from the stream. --- .../asyncio/async_write_object_stream.py | 13 ++++- .../asyncio/test_async_write_object_stream.py | 48 +++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/google/cloud/storage/asyncio/async_write_object_stream.py b/google/cloud/storage/asyncio/async_write_object_stream.py index 721183962..319f394dd 100644 --- a/google/cloud/storage/asyncio/async_write_object_stream.py +++ b/google/cloud/storage/asyncio/async_write_object_stream.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import List, Optional, Tuple +import grpc from google.cloud import _storage_v2 from google.cloud.storage.asyncio import _utils from google.cloud.storage.asyncio.async_grpc_client import AsyncGrpcClient @@ -181,9 +182,17 @@ async def close(self) -> None: async def requests_done(self): """Signals that all requests have been sent.""" - await self.socket_like_rpc.send(None) - _utils.update_write_handle_if_exists(self, await self.socket_like_rpc.recv()) + + # The server may send a final "EOF" response immediately, or it may + # first send an intermediate response followed by the EOF response depending on whether the object was finalized or not. + first_resp = await self.socket_like_rpc.recv() + _utils.update_write_handle_if_exists(self, first_resp) + + if first_resp != grpc.aio.EOF: + self.persisted_size = first_resp.persisted_size + second_resp = await self.socket_like_rpc.recv() + assert second_resp == grpc.aio.EOF async def send( self, bidi_write_object_request: _storage_v2.BidiWriteObjectRequest diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py index 77e2ef091..4e952336b 100644 --- a/tests/unit/asyncio/test_async_write_object_stream.py +++ b/tests/unit/asyncio/test_async_write_object_stream.py @@ -15,6 +15,8 @@ import unittest.mock as mock from unittest.mock import AsyncMock, MagicMock import pytest +import grpc + from google.cloud.storage.asyncio.async_write_object_stream import ( _AsyncWriteObjectStream, @@ -194,11 +196,57 @@ async def test_close_success(self, mock_client): stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) stream._is_stream_open = True stream.socket_like_rpc = AsyncMock() + + stream.socket_like_rpc.send = AsyncMock() + first_resp = _storage_v2.BidiWriteObjectResponse(persisted_size=100) + stream.socket_like_rpc.recv = AsyncMock(side_effect=[first_resp, grpc.aio.EOF]) stream.socket_like_rpc.close = AsyncMock() await stream.close() stream.socket_like_rpc.close.assert_awaited_once() assert not stream.is_stream_open + assert stream.persisted_size == 100 + + @pytest.mark.asyncio + async def test_close_with_persisted_size_then_eof(self, mock_client): + """Test close when first recv has persisted_size, second is EOF.""" + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + stream._is_stream_open = True + stream.socket_like_rpc = AsyncMock() + + # First response has persisted_size (NOT EOF, intermediate) + persisted_resp = _storage_v2.BidiWriteObjectResponse(persisted_size=500) + # Second response is EOF (None) + eof_resp = grpc.aio.EOF + + stream.socket_like_rpc.send = AsyncMock() + stream.socket_like_rpc.recv = AsyncMock(side_effect=[persisted_resp, eof_resp]) + stream.socket_like_rpc.close = AsyncMock() + + await stream.close() + + # Verify two recv calls: first has persisted_size (NOT EOF), so read second (EOF) + assert stream.socket_like_rpc.recv.await_count == 2 + assert stream.persisted_size == 500 + assert not stream.is_stream_open + + @pytest.mark.asyncio + async def test_close_with_grpc_aio_eof_response(self, mock_client): + """Test close when first recv is grpc.aio.EOF sentinel.""" + stream = _AsyncWriteObjectStream(mock_client, BUCKET, OBJECT) + stream._is_stream_open = True + stream.socket_like_rpc = AsyncMock() + + # First recv returns grpc.aio.EOF (explicit sentinel from finalize) + stream.socket_like_rpc.send = AsyncMock() + stream.socket_like_rpc.recv = AsyncMock(return_value=grpc.aio.EOF) + stream.socket_like_rpc.close = AsyncMock() + + await stream.close() + + # Verify only one recv call (grpc.aio.EOF=EOF, so don't read second) + assert stream.socket_like_rpc.recv.await_count == 1 + assert not stream.is_stream_open @pytest.mark.asyncio async def test_methods_require_open_raises(self, mock_client): From 7a00dfb73696d6db98747509e9f2f2bd0c94c53a Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Mon, 2 Feb 2026 14:40:44 +0530 Subject: [PATCH 21/23] chore: Add README for running zonal buckets samples (#1734) chore: Add README for running zonal buckets samples --- samples/README.md | 9 ++- samples/snippets/zonal_buckets/README.md | 78 ++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 samples/snippets/zonal_buckets/README.md diff --git a/samples/README.md b/samples/README.md index 490af710a..118a778cb 100644 --- a/samples/README.md +++ b/samples/README.md @@ -34,8 +34,15 @@ for more detailed instructions. ``` source /bin/activate ``` +3. To run samples for [Zonal Buckets](https://github.com/googleapis/python-storage/tree/main/samples/snippets/zonal_buckets) -3. Install the dependencies needed to run the samples. + ``` + pip install "google-cloud-storage[grpc]" + python samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py --bucket_name --object_name + + ``` + +4. Install the dependencies needed to run the samples. ``` cd samples/snippets pip install -r requirements.txt diff --git a/samples/snippets/zonal_buckets/README.md b/samples/snippets/zonal_buckets/README.md new file mode 100644 index 000000000..71c17e5c3 --- /dev/null +++ b/samples/snippets/zonal_buckets/README.md @@ -0,0 +1,78 @@ +# Google Cloud Storage - Zonal Buckets Snippets + +This directory contains snippets for interacting with Google Cloud Storage zonal buckets. + +## Prerequisites + +- A Google Cloud Platform project with the Cloud Storage API enabled. +- A zonal Google Cloud Storage bucket. + +## Running the snippets + +### Create and write to an appendable object + +This snippet uploads an appendable object to a zonal bucket. + +```bash +python samples/snippets/zonal_buckets/storage_create_and_write_appendable_object.py --bucket_name --object_name +``` + +### Finalize an appendable object upload + +This snippet creates, writes to, and finalizes an appendable object. + +```bash +python samples/snippets/zonal_buckets/storage_finalize_appendable_object_upload.py --bucket_name --object_name +``` + +### Pause and resume an appendable object upload + +This snippet demonstrates pausing and resuming an appendable object upload. + +```bash +python samples/snippets/zonal_buckets/storage_pause_and_resume_appendable_upload.py --bucket_name --object_name +``` + +### Tail an appendable object + +This snippet demonstrates tailing an appendable GCS object, similar to `tail -f`. + +```bash +python samples/snippets/zonal_buckets/storage_read_appendable_object_tail.py --bucket_name --object_name --duration +``` + + +### Download a range of bytes from an object + +This snippet downloads a range of bytes from an object. + +```bash +python samples/snippets/zonal_buckets/storage_open_object_single_ranged_read.py --bucket_name --object_name --start_byte --size +``` + + +### Download multiple ranges of bytes from a single object + +This snippet downloads multiple ranges of bytes from a single object into different buffers. + +```bash +python samples/snippets/zonal_buckets/storage_open_object_multiple_ranged_read.py --bucket_name --object_name +``` + +### Download the entire content of an object + +This snippet downloads the entire content of an object using a multi-range downloader. + +```bash +python samples/snippets/zonal_buckets/storage_open_object_read_full_object.py --bucket_name --object_name +``` + + + +### Download a range of bytes from multiple objects concurrently + +This snippet downloads a range of bytes from multiple objects concurrently. + +```bash +python samples/snippets/zonal_buckets/storage_open_multiple_objects_ranged_read.py --bucket_name --object_names +``` \ No newline at end of file From 0e5ec29bc6a31b77bcfba4254cef5bffb199095c Mon Sep 17 00:00:00 2001 From: Pulkit Aggarwal <54775856+Pulkit0110@users.noreply.github.com> Date: Mon, 2 Feb 2026 18:07:16 +0530 Subject: [PATCH 22/23] feat: add get_object method for async grpc client (#1735) This method can be used to fetch the metadata of an object using the async grpc API. --- .../storage/asyncio/async_grpc_client.py | 63 ++++++++++++++++ tests/system/test_zonal.py | 53 +++++++++++++ tests/unit/asyncio/test_async_grpc_client.py | 75 +++++++++++++++++++ 3 files changed, 191 insertions(+) diff --git a/google/cloud/storage/asyncio/async_grpc_client.py b/google/cloud/storage/asyncio/async_grpc_client.py index 3ff846b5f..640e7fe38 100644 --- a/google/cloud/storage/asyncio/async_grpc_client.py +++ b/google/cloud/storage/asyncio/async_grpc_client.py @@ -158,3 +158,66 @@ async def delete_object( **kwargs, ) await self._grpc_client.delete_object(request=request) + + async def get_object( + self, + bucket_name, + object_name, + generation=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + soft_deleted=None, + **kwargs, + ): + """Retrieves an object's metadata. + + In the gRPC API, this is performed by the GetObject RPC, which + returns the object resource (metadata) without the object's data. + + :type bucket_name: str + :param bucket_name: The name of the bucket in which the object resides. + + :type object_name: str + :param object_name: The name of the object. + + :type generation: int + :param generation: + (Optional) If present, selects a specific generation of an object. + + :type if_generation_match: int + :param if_generation_match: (Optional) Precondition for object generation match. + + :type if_generation_not_match: int + :param if_generation_not_match: (Optional) Precondition for object generation mismatch. + + :type if_metageneration_match: int + :param if_metageneration_match: (Optional) Precondition for metageneration match. + + :type if_metageneration_not_match: int + :param if_metageneration_not_match: (Optional) Precondition for metageneration mismatch. + + :type soft_deleted: bool + :param soft_deleted: + (Optional) If True, return the soft-deleted version of this object. + + :rtype: :class:`google.cloud._storage_v2.types.Object` + :returns: The object metadata resource. + """ + bucket_path = f"projects/_/buckets/{bucket_name}" + + request = storage_v2.GetObjectRequest( + bucket=bucket_path, + object=object_name, + generation=generation, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + soft_deleted=soft_deleted or False, + **kwargs, + ) + + # Calls the underlying GAPIC StorageAsyncClient.get_object method + return await self._grpc_client.get_object(request=request) diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py index 9bd870860..fd5d841c3 100644 --- a/tests/system/test_zonal.py +++ b/tests/system/test_zonal.py @@ -601,3 +601,56 @@ async def _run(): gc.collect() event_loop.run_until_complete(_run()) + +def test_get_object_after_appendable_write( + grpc_clients, + grpc_client_direct, + event_loop, + storage_client, + blobs_to_delete, +): + """Test getting object metadata after writing with AsyncAppendableObjectWriter. + + This test: + 1. Creates a test object using AsyncAppendableObjectWriter + 2. Appends content to the object (without finalizing) + 3. Closes the write stream + 4. Fetches the object metadata using AsyncGrpcClient.get_object() + 5. Verifies the object size matches the written data + """ + + async def _run(): + grpc_client = grpc_client_direct + object_name = f"test-get-object-{uuid.uuid4().hex}" + test_data = b"Some test data bytes." + expected_size = len(test_data) + + writer = AsyncAppendableObjectWriter( + grpc_client, + _ZONAL_BUCKET, + object_name, + ) + + await writer.open() + await writer.append(test_data) + await writer.close(finalize_on_close=False) + + obj = await grpc_client.get_object( + bucket_name=_ZONAL_BUCKET, + object_name=object_name, + ) + + # Assert + assert obj is not None + assert obj.name == object_name + assert obj.bucket == f"projects/_/buckets/{_ZONAL_BUCKET}" + assert obj.size == expected_size, ( + f"Expected object size {expected_size}, got {obj.size}" + ) + + # Cleanup + blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name)) + del writer + gc.collect() + + event_loop.run_until_complete(_run()) diff --git a/tests/unit/asyncio/test_async_grpc_client.py b/tests/unit/asyncio/test_async_grpc_client.py index e7d649e72..f193acb60 100644 --- a/tests/unit/asyncio/test_async_grpc_client.py +++ b/tests/unit/asyncio/test_async_grpc_client.py @@ -254,3 +254,78 @@ async def test_delete_object(self, mock_async_storage_client): assert request.if_generation_not_match == if_generation_not_match assert request.if_metageneration_match == if_metageneration_match assert request.if_metageneration_not_match == if_metageneration_not_match + + @mock.patch("google.cloud._storage_v2.StorageAsyncClient") + @pytest.mark.asyncio + async def test_get_object(self, mock_async_storage_client): + # Arrange + mock_transport_cls = mock.MagicMock() + mock_async_storage_client.get_transport_class.return_value = mock_transport_cls + mock_gapic_client = mock.AsyncMock() + mock_async_storage_client.return_value = mock_gapic_client + + client = async_grpc_client.AsyncGrpcClient( + credentials=_make_credentials(spec=AnonymousCredentials) + ) + + bucket_name = "bucket" + object_name = "object" + + # Act + await client.get_object( + bucket_name, + object_name, + ) + + # Assert + call_args, call_kwargs = mock_gapic_client.get_object.call_args + request = call_kwargs["request"] + assert request.bucket == "projects/_/buckets/bucket" + assert request.object == "object" + assert request.soft_deleted is False + + @mock.patch("google.cloud._storage_v2.StorageAsyncClient") + @pytest.mark.asyncio + async def test_get_object_with_all_parameters(self, mock_async_storage_client): + # Arrange + mock_transport_cls = mock.MagicMock() + mock_async_storage_client.get_transport_class.return_value = mock_transport_cls + mock_gapic_client = mock.AsyncMock() + mock_async_storage_client.return_value = mock_gapic_client + + client = async_grpc_client.AsyncGrpcClient( + credentials=_make_credentials(spec=AnonymousCredentials) + ) + + bucket_name = "bucket" + object_name = "object" + generation = 123 + if_generation_match = 456 + if_generation_not_match = 789 + if_metageneration_match = 111 + if_metageneration_not_match = 222 + soft_deleted = True + + # Act + await client.get_object( + bucket_name, + object_name, + generation=generation, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + soft_deleted=soft_deleted, + ) + + # Assert + call_args, call_kwargs = mock_gapic_client.get_object.call_args + request = call_kwargs["request"] + assert request.bucket == "projects/_/buckets/bucket" + assert request.object == "object" + assert request.generation == generation + assert request.if_generation_match == if_generation_match + assert request.if_generation_not_match == if_generation_not_match + assert request.if_metageneration_match == if_metageneration_match + assert request.if_metageneration_not_match == if_metageneration_not_match + assert request.soft_deleted is True From 77c1edaa6de4058e545ad1b68bde5507b75b8e37 Mon Sep 17 00:00:00 2001 From: Chandra Shekhar Sirimala Date: Mon, 2 Feb 2026 18:46:22 +0530 Subject: [PATCH 23/23] chore: librarian release pull request: 20260202T123858Z (#1736) PR created by the Librarian CLI to initialize a release. Merging this PR will auto trigger a release. Librarian Version: v1.0.2-0.20251119154421-36c3e21ad3ac Language Image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:8e2c32496077054105bd06c54a59d6a6694287bc053588e24debe6da6920ad91
google-cloud-storage: 3.9.0 ## [3.9.0](https://github.com/googleapis/python-storage/compare/v3.8.0...v3.9.0) (2026-02-02) ### Features * update generation for MRD (#1730) ([08bc7082](https://github.com/googleapis/python-storage/commit/08bc7082)) * add get_object method for async grpc client (#1735) ([0e5ec29b](https://github.com/googleapis/python-storage/commit/0e5ec29b)) * Add micro-benchmarks for reads comparing standard (regional) vs rapid (zonal) buckets. (#1697) ([1917649f](https://github.com/googleapis/python-storage/commit/1917649f)) * Add support for opening via `write_handle` and fix `write_handle` type (#1715) ([2bc15fa5](https://github.com/googleapis/python-storage/commit/2bc15fa5)) * add samples for appendable objects writes and reads ([2e1a1eb5](https://github.com/googleapis/python-storage/commit/2e1a1eb5)) * add samples for appendable objects writes and reads (#1705) ([2e1a1eb5](https://github.com/googleapis/python-storage/commit/2e1a1eb5)) * add context manager to mrd (#1724) ([5ac2808a](https://github.com/googleapis/python-storage/commit/5ac2808a)) * Move Zonal Buckets features of `_experimental` (#1728) ([74c9ecc5](https://github.com/googleapis/python-storage/commit/74c9ecc5)) * add default user agent for grpc (#1726) ([7b319469](https://github.com/googleapis/python-storage/commit/7b319469)) * expose finalized_time in blob.py applicable for GET_OBJECT in ZB (#1719) ([8e21a7fe](https://github.com/googleapis/python-storage/commit/8e21a7fe)) * expose `DELETE_OBJECT` in `AsyncGrpcClient` (#1718) ([c8dd7a0b](https://github.com/googleapis/python-storage/commit/c8dd7a0b)) * send `user_agent` to grpc channel (#1712) ([cdb2486b](https://github.com/googleapis/python-storage/commit/cdb2486b)) * integrate writes strategy and appendable object writer (#1695) ([dbd162b3](https://github.com/googleapis/python-storage/commit/dbd162b3)) * Add micro-benchmarks for writes comparing standard (regional) vs rapid (zonal) buckets. (#1707) ([dbe9d8b8](https://github.com/googleapis/python-storage/commit/dbe9d8b8)) * add support for `generation=0` to avoid overwriting existing objects and add `is_stream_open` support (#1709) ([ea0f5bf8](https://github.com/googleapis/python-storage/commit/ea0f5bf8)) * add support for `generation=0` to prevent overwriting existing objects ([ea0f5bf8](https://github.com/googleapis/python-storage/commit/ea0f5bf8)) * add `is_stream_open` property to AsyncAppendableObjectWriter for stream status check ([ea0f5bf8](https://github.com/googleapis/python-storage/commit/ea0f5bf8)) ### Bug Fixes * receive eof while closing reads stream (#1733) ([2ef63396](https://github.com/googleapis/python-storage/commit/2ef63396)) * update write handle on every recv() (#1716) ([5d9fafe1](https://github.com/googleapis/python-storage/commit/5d9fafe1)) * implement requests_done method to signal end of requests in async streams. Gracefully close streams. (#1700) ([6c160794](https://github.com/googleapis/python-storage/commit/6c160794)) * implement requests_done method to signal end of requests in async streams. Gracefully close streams. ([6c160794](https://github.com/googleapis/python-storage/commit/6c160794)) * instance grpc client once per process in benchmarks (#1725) ([721ea2dd](https://github.com/googleapis/python-storage/commit/721ea2dd)) * Fix formatting in setup.py dependencies list (#1713) ([cc4831d7](https://github.com/googleapis/python-storage/commit/cc4831d7)) * Change contructors of MRD and AAOW AsyncGrpcClient.grpc_client to AsyncGrpcClient (#1727) ([e730bf50](https://github.com/googleapis/python-storage/commit/e730bf50))
--- .librarian/state.yaml | 2 +- CHANGELOG.md | 34 +++++++++++++++++++ google/cloud/_storage_v2/gapic_version.py | 2 +- google/cloud/storage/version.py | 2 +- .../snippet_metadata_google.storage.v2.json | 2 +- 5 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index e20e4bc40..80e2355be 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,7 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:8e2c32496077054105bd06c54a59d6a6694287bc053588e24debe6da6920ad91 libraries: - id: google-cloud-storage - version: 3.8.0 + version: 3.9.0 last_generated_commit: 5400ccce473c439885bd6bf2924fd242271bfcab apis: - path: google/storage/v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index e371dd766..4c46db115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,40 @@ [1]: https://pypi.org/project/google-cloud-storage/#history +## [3.9.0](https://github.com/googleapis/python-storage/compare/v3.8.0...v3.9.0) (2026-02-02) + + +### Features + +* add get_object method for async grpc client (#1735) ([0e5ec29bc6a31b77bcfba4254cef5bffb199095c](https://github.com/googleapis/python-storage/commit/0e5ec29bc6a31b77bcfba4254cef5bffb199095c)) +* expose `DELETE_OBJECT` in `AsyncGrpcClient` (#1718) ([c8dd7a0b124c395b7b60189ee78f47aba8d51f7d](https://github.com/googleapis/python-storage/commit/c8dd7a0b124c395b7b60189ee78f47aba8d51f7d)) +* update generation for MRD (#1730) ([08bc7082db7392f13bc8c51511b4afa9c7b157c9](https://github.com/googleapis/python-storage/commit/08bc7082db7392f13bc8c51511b4afa9c7b157c9)) +* Move Zonal Buckets features of `_experimental` (#1728) ([74c9ecc54173420bfcd48498a8956088a035af50](https://github.com/googleapis/python-storage/commit/74c9ecc54173420bfcd48498a8956088a035af50)) +* add default user agent for grpc (#1726) ([7b319469d2e495ea0bf7367f3949190e8f5d9fff](https://github.com/googleapis/python-storage/commit/7b319469d2e495ea0bf7367f3949190e8f5d9fff)) +* expose finalized_time in blob.py applicable for GET_OBJECT in ZB (#1719) ([8e21a7fe54d0a043f31937671003630a1985a5d2](https://github.com/googleapis/python-storage/commit/8e21a7fe54d0a043f31937671003630a1985a5d2)) +* add context manager to mrd (#1724) ([5ac2808a69195c688ed42c3604d4bfadbb602a66](https://github.com/googleapis/python-storage/commit/5ac2808a69195c688ed42c3604d4bfadbb602a66)) +* integrate writes strategy and appendable object writer (#1695) ([dbd162b3583e32e6f705a51f5c3fef333a9b89d0](https://github.com/googleapis/python-storage/commit/dbd162b3583e32e6f705a51f5c3fef333a9b89d0)) +* Add support for opening via `write_handle` and fix `write_handle` type (#1715) ([2bc15fa570683ba584230c51b439d189dbdcd580](https://github.com/googleapis/python-storage/commit/2bc15fa570683ba584230c51b439d189dbdcd580)) +* Add micro-benchmarks for writes comparing standard (regional) vs rapid (zonal) buckets. (#1707) ([dbe9d8b89d975dfbed8c830a5687ccfafea51d5f](https://github.com/googleapis/python-storage/commit/dbe9d8b89d975dfbed8c830a5687ccfafea51d5f)) +* Add micro-benchmarks for reads comparing standard (regional) vs rapid (zonal) buckets. (#1697) ([1917649fac41481da1adea6c2a9f4ab1298a34c4](https://github.com/googleapis/python-storage/commit/1917649fac41481da1adea6c2a9f4ab1298a34c4)) +* send `user_agent` to grpc channel (#1712) ([cdb2486bb051dcbfbffc2510aff6aacede5e54d3](https://github.com/googleapis/python-storage/commit/cdb2486bb051dcbfbffc2510aff6aacede5e54d3)) +* add samples for appendable objects writes and reads (#1705) ([2e1a1eb5cbe1c909f1f892a0cc74fe63c8ef36ff](https://github.com/googleapis/python-storage/commit/2e1a1eb5cbe1c909f1f892a0cc74fe63c8ef36ff)) +* add samples for appendable objects writes and reads ([2e1a1eb5cbe1c909f1f892a0cc74fe63c8ef36ff](https://github.com/googleapis/python-storage/commit/2e1a1eb5cbe1c909f1f892a0cc74fe63c8ef36ff)) +* add support for `generation=0` to avoid overwriting existing objects and add `is_stream_open` support (#1709) ([ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae](https://github.com/googleapis/python-storage/commit/ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae)) +* add support for `generation=0` to prevent overwriting existing objects ([ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae](https://github.com/googleapis/python-storage/commit/ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae)) +* add `is_stream_open` property to AsyncAppendableObjectWriter for stream status check ([ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae](https://github.com/googleapis/python-storage/commit/ea0f5bf8316f4bfcff2728d9d1baa68dde6ebdae)) + + +### Bug Fixes + +* receive eof while closing reads stream (#1733) ([2ef63396dca1c36f9b0f0f3cf87a61b5aa4bd465](https://github.com/googleapis/python-storage/commit/2ef63396dca1c36f9b0f0f3cf87a61b5aa4bd465)) +* Change contructors of MRD and AAOW AsyncGrpcClient.grpc_client to AsyncGrpcClient (#1727) ([e730bf50c4584f737ab86b2e409ddb27b40d2cec](https://github.com/googleapis/python-storage/commit/e730bf50c4584f737ab86b2e409ddb27b40d2cec)) +* instance grpc client once per process in benchmarks (#1725) ([721ea2dd6c6db2aa91fd3b90e56a831aaaa64061](https://github.com/googleapis/python-storage/commit/721ea2dd6c6db2aa91fd3b90e56a831aaaa64061)) +* update write handle on every recv() (#1716) ([5d9fafe1466b5ccb1db4a814967a5cc8465148a2](https://github.com/googleapis/python-storage/commit/5d9fafe1466b5ccb1db4a814967a5cc8465148a2)) +* Fix formatting in setup.py dependencies list (#1713) ([cc4831d7e253b265b0b96e08b5479f4c759be442](https://github.com/googleapis/python-storage/commit/cc4831d7e253b265b0b96e08b5479f4c759be442)) +* implement requests_done method to signal end of requests in async streams. Gracefully close streams. (#1700) ([6c160794afded5e8f4179399f1fe5248e32bf707](https://github.com/googleapis/python-storage/commit/6c160794afded5e8f4179399f1fe5248e32bf707)) +* implement requests_done method to signal end of requests in async streams. Gracefully close streams. ([6c160794afded5e8f4179399f1fe5248e32bf707](https://github.com/googleapis/python-storage/commit/6c160794afded5e8f4179399f1fe5248e32bf707)) + ## [3.8.0](https://github.com/googleapis/python-storage/compare/v3.7.0...v3.8.0) (2026-01-13) diff --git a/google/cloud/_storage_v2/gapic_version.py b/google/cloud/_storage_v2/gapic_version.py index 4a7c666d6..0d5599e8b 100644 --- a/google/cloud/_storage_v2/gapic_version.py +++ b/google/cloud/_storage_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "3.8.0" # {x-release-please-version} +__version__ = "3.9.0" # {x-release-please-version} diff --git a/google/cloud/storage/version.py b/google/cloud/storage/version.py index 8f4ba4810..0bc275357 100644 --- a/google/cloud/storage/version.py +++ b/google/cloud/storage/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.8.0" +__version__ = "3.9.0" diff --git a/samples/generated_samples/snippet_metadata_google.storage.v2.json b/samples/generated_samples/snippet_metadata_google.storage.v2.json index 7fcd587f3..1889f0c5d 100644 --- a/samples/generated_samples/snippet_metadata_google.storage.v2.json +++ b/samples/generated_samples/snippet_metadata_google.storage.v2.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-storage", - "version": "3.8.0" + "version": "3.9.0" }, "snippets": [ {