diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6df17303f..77a642659 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -3,10 +3,9 @@
#
# For syntax help see:
# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax
-# Note: This file is autogenerated. To make changes to the codeowner team, please update .repo-metadata.json.
# @googleapis/yoshi-python @googleapis/gcs-sdk-team are the default owners for changes in this repo
-* @googleapis/yoshi-python @googleapis/gcs-sdk-team
+* @googleapis/yoshi-python @googleapis/gcs-sdk-team @googleapis/gcs-fs
# @googleapis/python-samples-reviewers @googleapis/gcs-sdk-team are the default owners for samples changes
/samples/ @googleapis/python-samples-reviewers @googleapis/gcs-sdk-team
diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index 0d304cfe2..19c1d0ba4 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -9,12 +9,6 @@ branchProtectionRules:
requiredStatusCheckContexts:
- 'Kokoro'
- 'cla/google'
- - 'Kokoro system-3.12'
+ - 'Kokoro system-3.14'
+ - 'Kokoro system-3.9'
- 'OwlBot Post Processor'
-- pattern: python2
- requiresCodeOwnerReviews: true
- requiresStrictStatusChecks: true
- requiredStatusCheckContexts:
- - 'Kokoro'
- - 'cla/google'
- - 'Kokoro system-2.7'
diff --git a/.kokoro/presubmit/prerelease-deps.cfg b/.kokoro/presubmit/prerelease-deps.cfg
new file mode 100644
index 000000000..3595fb43f
--- /dev/null
+++ b/.kokoro/presubmit/prerelease-deps.cfg
@@ -0,0 +1,7 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+ key: "NOX_SESSION"
+ value: "prerelease_deps"
+}
diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg
index b158096f0..5423df92a 100644
--- a/.kokoro/presubmit/presubmit.cfg
+++ b/.kokoro/presubmit/presubmit.cfg
@@ -1,6 +1,7 @@
# Format: //devtools/kokoro/config/proto/build.proto
-# Disable system tests.
+# Disable system tests in this presubmit because they are run in separate
+# presubmit jobs, whose configs are in system-3.xx.cfg files.
env_vars: {
key: "RUN_SYSTEM_TESTS"
value: "false"
diff --git a/.kokoro/presubmit/system-3.12.cfg b/.kokoro/presubmit/system-3.14.cfg
similarity index 91%
rename from .kokoro/presubmit/system-3.12.cfg
rename to .kokoro/presubmit/system-3.14.cfg
index d4cca031b..fcc70a922 100644
--- a/.kokoro/presubmit/system-3.12.cfg
+++ b/.kokoro/presubmit/system-3.14.cfg
@@ -3,7 +3,7 @@
# Only run this nox session.
env_vars: {
key: "NOX_SESSION"
- value: "system-3.12"
+ value: "system-3.14"
}
# Credentials needed to test universe domain.
diff --git a/.kokoro/presubmit/system-3.9.cfg b/.kokoro/presubmit/system-3.9.cfg
new file mode 100644
index 000000000..d21467d02
--- /dev/null
+++ b/.kokoro/presubmit/system-3.9.cfg
@@ -0,0 +1,13 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+ key: "NOX_SESSION"
+ value: "system-3.9"
+}
+
+# Credentials needed to test universe domain.
+env_vars: {
+ key: "SECRET_MANAGER_KEYS"
+ value: "client-library-test-universe-domain-credential"
+}
\ No newline at end of file
diff --git a/.librarian/generator-input/.repo-metadata.json b/.librarian/generator-input/.repo-metadata.json
index f644429bc..bd870f959 100644
--- a/.librarian/generator-input/.repo-metadata.json
+++ b/.librarian/generator-input/.repo-metadata.json
@@ -12,7 +12,7 @@
"api_id": "storage.googleapis.com",
"requires_billing": true,
"default_version": "v2",
- "codeowner_team": "@googleapis/gcs-sdk-team",
+ "codeowner_team": "@googleapis/yoshi-python @googleapis/gcs-sdk-team @googleapis/gcs-fs",
"api_shortname": "storage",
"api_description": "is a durable and highly available object storage service. Google Cloud Storage is almost infinitely scalable and guarantees consistency: when a write succeeds, the latest copy of the object will be returned to any GET, globally."
}
diff --git a/.librarian/generator-input/noxfile.py b/.librarian/generator-input/noxfile.py
index 16cf97b01..ca527decd 100644
--- a/.librarian/generator-input/noxfile.py
+++ b/.librarian/generator-input/noxfile.py
@@ -26,9 +26,9 @@
BLACK_VERSION = "black==23.7.0"
BLACK_PATHS = ["docs", "google", "tests", "noxfile.py", "setup.py"]
-DEFAULT_PYTHON_VERSION = "3.12"
-SYSTEM_TEST_PYTHON_VERSIONS = ["3.12"]
-UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
+DEFAULT_PYTHON_VERSION = "3.14"
+SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.14"]
+UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
CONFORMANCE_TEST_PYTHON_VERSIONS = ["3.12"]
CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
@@ -51,6 +51,7 @@
"unit-3.11",
"unit-3.12",
"unit-3.13",
+ "unit-3.14",
# cover must be last to avoid error `No data to report`
"cover",
]
diff --git a/.librarian/generator-input/setup.py b/.librarian/generator-input/setup.py
index 2c4504749..89971aa33 100644
--- a/.librarian/generator-input/setup.py
+++ b/.librarian/generator-input/setup.py
@@ -94,6 +94,7 @@
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Operating System :: OS Independent",
"Topic :: Internet",
],
diff --git a/.librarian/state.yaml b/.librarian/state.yaml
index 1502e804d..e20e4bc40 100644
--- a/.librarian/state.yaml
+++ b/.librarian/state.yaml
@@ -1,7 +1,7 @@
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:8e2c32496077054105bd06c54a59d6a6694287bc053588e24debe6da6920ad91
libraries:
- id: google-cloud-storage
- version: 3.6.0
+ version: 3.8.0
last_generated_commit: 5400ccce473c439885bd6bf2924fd242271bfcab
apis:
- path: google/storage/v2
diff --git a/.repo-metadata.json b/.repo-metadata.json
index f644429bc..bd870f959 100644
--- a/.repo-metadata.json
+++ b/.repo-metadata.json
@@ -12,7 +12,7 @@
"api_id": "storage.googleapis.com",
"requires_billing": true,
"default_version": "v2",
- "codeowner_team": "@googleapis/gcs-sdk-team",
+ "codeowner_team": "@googleapis/yoshi-python @googleapis/gcs-sdk-team @googleapis/gcs-fs",
"api_shortname": "storage",
"api_description": "is a durable and highly available object storage service. Google Cloud Storage is almost infinitely scalable and guarantees consistency: when a write succeeds, the latest copy of the object will be returned to any GET, globally."
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index da1f2149b..e371dd766 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,26 @@
[1]: https://pypi.org/project/google-cloud-storage/#history
+## [3.8.0](https://github.com/googleapis/python-storage/compare/v3.7.0...v3.8.0) (2026-01-13)
+
+
+### Features
+
+* flush the last chunk in append method (#1699) ([89bfe7a5fcd0391da35e9ceccc185279782b5420](https://github.com/googleapis/python-storage/commit/89bfe7a5fcd0391da35e9ceccc185279782b5420))
+* add write resumption strategy (#1663) ([a57ea0ec786a84c7ae9ed82c6ae5d38ecadba4af](https://github.com/googleapis/python-storage/commit/a57ea0ec786a84c7ae9ed82c6ae5d38ecadba4af))
+* add bidi stream retry manager. (#1632) ([d90f0ee09902a21b186106bcf0a8cb0b81b34340](https://github.com/googleapis/python-storage/commit/d90f0ee09902a21b186106bcf0a8cb0b81b34340))
+* implement "append_from_file" (#1686) ([1333c956da18b4db753cda98c41c3619c84caf69](https://github.com/googleapis/python-storage/commit/1333c956da18b4db753cda98c41c3619c84caf69))
+* make flush size configurable (#1677) ([f7095faf0a81239894ff9d277849788b62eb6ac5](https://github.com/googleapis/python-storage/commit/f7095faf0a81239894ff9d277849788b62eb6ac5))
+* compute chunk wise checksum for bidi_writes (#1675) ([139390cb01f93a2d61e7ec201e3637dffe0b2a34](https://github.com/googleapis/python-storage/commit/139390cb01f93a2d61e7ec201e3637dffe0b2a34))
+* expose persisted size in mrd (#1671) ([0e2961bef285fc064174a5c18e3db05c7a682521](https://github.com/googleapis/python-storage/commit/0e2961bef285fc064174a5c18e3db05c7a682521))
+
+
+### Bug Fixes
+
+* add system test for opening with read_handle (#1672) ([6dc711dacd4d38c573aa4ca9ad71fe412c0e49c1](https://github.com/googleapis/python-storage/commit/6dc711dacd4d38c573aa4ca9ad71fe412c0e49c1))
+* no state lookup while opening bidi-write stream (#1636) ([2d5a7b16846a69f3a911844971241899f60cce14](https://github.com/googleapis/python-storage/commit/2d5a7b16846a69f3a911844971241899f60cce14))
+* close write object stream always (#1661) ([4a609a4b3f4ba1396825911cb02f8a9649135cd5](https://github.com/googleapis/python-storage/commit/4a609a4b3f4ba1396825911cb02f8a9649135cd5))
+
## [3.7.0](https://github.com/googleapis/python-storage/compare/v3.6.0...v3.7.0) (2025-12-09)
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 316d8b266..1c1817212 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -22,7 +22,7 @@ In order to add a feature:
documentation.
- The feature must work fully on the following CPython versions:
- 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.
+ 3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13 and 3.14 on both UNIX and Windows.
- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
@@ -69,8 +69,7 @@ We use `nox `__ to instrument our tests.
- To test your changes, run unit tests with ``nox``::
- $ nox -s unit-2.7
- $ nox -s unit-3.7
+ $ nox -s unit-3.9
$ ...
.. note::
@@ -133,14 +132,11 @@ Running System Tests
- To run system tests, you can execute::
- $ nox -s system-3.8
- $ nox -s system-2.7
+ $ nox -s system-3.14
.. note::
- System tests are only configured to run under Python 2.7 and
- Python 3.8. For expediency, we do not run them in older versions
- of Python 3.
+ System tests are configured to run under Python 3.14 in ``noxfile.py``.
This alone will not run the tests. You'll need to change some local
auth settings and change some configuration in your project to
@@ -202,25 +198,27 @@ Supported Python Versions
We support:
-- `Python 3.5`_
-- `Python 3.6`_
- `Python 3.7`_
- `Python 3.8`_
+- `Python 3.9`_
+- `Python 3.10`_
+- `Python 3.11`_
+- `Python 3.12`_
+- `Python 3.13`_
+- `Python 3.14`_
-.. _Python 3.5: https://docs.python.org/3.5/
-.. _Python 3.6: https://docs.python.org/3.6/
.. _Python 3.7: https://docs.python.org/3.7/
.. _Python 3.8: https://docs.python.org/3.8/
-
+.. _Python 3.9: https://docs.python.org/3.9/
+.. _Python 3.10: https://docs.python.org/3.10/
+.. _Python 3.11: https://docs.python.org/3.11/
+.. _Python 3.12: https://docs.python.org/3.12/
+.. _Python 3.13: https://docs.python.org/3.13/
+.. _Python 3.14: https://docs.python.org/3.14/
Supported versions can be found in our ``noxfile.py`` `config`_.
-.. _config: https://github.com/googleapis/python-storage/blob/main/noxfile.py
-
-Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020.
-
-We also explicitly decided to support Python 3 beginning with version
-3.5. Reasons for this include:
+We also explicitly decided to support Python 3 beginning with version 3.9. Reasons for this include:
- Encouraging use of newest versions of Python 3
- Taking the lead of `prominent`_ open-source `projects`_
diff --git a/cloudbuild/run_zonal_tests.sh b/cloudbuild/run_zonal_tests.sh
index ef94e629b..283ed6826 100644
--- a/cloudbuild/run_zonal_tests.sh
+++ b/cloudbuild/run_zonal_tests.sh
@@ -6,6 +6,7 @@ sudo apt-get update && sudo apt-get install -y git python3-pip python3-venv
# Clone the repository and checkout the specific commit from the build trigger.
git clone https://github.com/googleapis/python-storage.git
cd python-storage
+git fetch origin "refs/pull/${_PR_NUMBER}/head"
git checkout ${COMMIT_SHA}
@@ -22,5 +23,6 @@ pip install -e .
echo '--- Setting up environment variables on VM ---'
export ZONAL_BUCKET=${_ZONAL_BUCKET}
export RUN_ZONAL_SYSTEM_TESTS=True
-echo '--- Running Zonal tests on VM ---'
+CURRENT_ULIMIT=$(ulimit -n)
+echo '--- Running Zonal tests on VM with ulimit set to ---' $CURRENT_ULIMIT
pytest -vv -s --log-format='%(asctime)s %(levelname)s %(message)s' --log-date-format='%H:%M:%S' tests/system/test_zonal.py
diff --git a/cloudbuild/zb-system-tests-cloudbuild.yaml b/cloudbuild/zb-system-tests-cloudbuild.yaml
index be790ebd4..562eae175 100644
--- a/cloudbuild/zb-system-tests-cloudbuild.yaml
+++ b/cloudbuild/zb-system-tests-cloudbuild.yaml
@@ -2,8 +2,26 @@ substitutions:
_REGION: "us-central1"
_ZONE: "us-central1-a"
_SHORT_BUILD_ID: ${BUILD_ID:0:8}
+ _VM_NAME: "py-sdk-sys-test-${_SHORT_BUILD_ID}"
+ _ULIMIT: "10000" # 10k, for gRPC bidi streams
+
+
steps:
+ # Step 0: Generate a persistent SSH key for this build run.
+ # This prevents gcloud from adding a new key to the OS Login profile on every ssh/scp command.
+ - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
+ id: "generate-ssh-key"
+ entrypoint: "bash"
+ args:
+ - "-c"
+ - |
+ mkdir -p /workspace/.ssh
+ # Generate the SSH key
+ ssh-keygen -t rsa -f /workspace/.ssh/google_compute_engine -N '' -C gcb
+ # Save the public key content to a file for the cleanup step
+ cat /workspace/.ssh/google_compute_engine.pub > /workspace/gcb_ssh_key.pub
+ waitFor: ["-"]
# Step 1 Create a GCE VM to run the tests.
# The VM is created in the same zone as the buckets to test rapid storage features.
@@ -15,7 +33,7 @@ steps:
- "compute"
- "instances"
- "create"
- - "gcsfs-test-vm-${_SHORT_BUILD_ID}"
+ - "${_VM_NAME}"
- "--project=${PROJECT_ID}"
- "--zone=${_ZONE}"
- "--machine-type=e2-medium"
@@ -38,33 +56,46 @@ steps:
set -e
# Wait for the VM to be fully initialized and SSH to be ready.
for i in {1..10}; do
- if gcloud compute ssh gcsfs-test-vm-${_SHORT_BUILD_ID} --zone=${_ZONE} --internal-ip --command="echo VM is ready"; then
+ if gcloud compute ssh ${_VM_NAME} --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine --command="echo VM is ready"; then
break
fi
echo "Waiting for VM to become available... (attempt $i/10)"
sleep 15
done
# copy the script to the VM
- gcloud compute scp cloudbuild/run_zonal_tests.sh gcsfs-test-vm-${_SHORT_BUILD_ID}:~ --zone=${_ZONE} --internal-ip
+ gcloud compute scp cloudbuild/run_zonal_tests.sh ${_VM_NAME}:~ --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine
# Execute the script on the VM via SSH.
# Capture the exit code to ensure cleanup happens before the build fails.
set +e
- gcloud compute ssh gcsfs-test-vm-${_SHORT_BUILD_ID} --zone=${_ZONE} --internal-ip --command="COMMIT_SHA=${COMMIT_SHA} _ZONAL_BUCKET=${_ZONAL_BUCKET} bash run_zonal_tests.sh"
+ gcloud compute ssh ${_VM_NAME} --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine --command="ulimit -n {_ULIMIT}; COMMIT_SHA=${COMMIT_SHA} _ZONAL_BUCKET=${_ZONAL_BUCKET} _PR_NUMBER=${_PR_NUMBER} bash run_zonal_tests.sh"
EXIT_CODE=$?
set -e
echo "--- Deleting GCE VM ---"
- gcloud compute instances delete "gcsfs-test-vm-${_SHORT_BUILD_ID}" --zone=${_ZONE} --quiet
+ gcloud compute instances delete "${_VM_NAME}" --zone=${_ZONE} --quiet
# Exit with the original exit code from the test script.
exit $$EXIT_CODE
waitFor:
- "create-vm"
+ - "generate-ssh-key"
+
+ - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
+ id: "cleanup-ssh-key"
+ entrypoint: "bash"
+ args:
+ - "-c"
+ - |
+ echo "--- Removing SSH key from OS Login profile to prevent accumulation ---"
+ gcloud compute os-login ssh-keys remove \
+ --key-file=/workspace/gcb_ssh_key.pub || true
+ waitFor:
+ - "run-tests-and-delete-vm"
timeout: "3600s" # 60 minutes
options:
logging: CLOUD_LOGGING_ONLY
pool:
- name: "projects/${PROJECT_ID}/locations/us-central1/workerPools/cloud-build-worker-pool"
\ No newline at end of file
+ name: "projects/${PROJECT_ID}/locations/us-central1/workerPools/cloud-build-worker-pool"
diff --git a/google/cloud/_storage_v2/gapic_version.py b/google/cloud/_storage_v2/gapic_version.py
index d69b0530e..4a7c666d6 100644
--- a/google/cloud/_storage_v2/gapic_version.py
+++ b/google/cloud/_storage_v2/gapic_version.py
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-__version__ = "3.6.0" # {x-release-please-version}
+__version__ = "3.8.0" # {x-release-please-version}
diff --git a/google/cloud/storage/_experimental/asyncio/_utils.py b/google/cloud/storage/_experimental/asyncio/_utils.py
new file mode 100644
index 000000000..32d83a586
--- /dev/null
+++ b/google/cloud/storage/_experimental/asyncio/_utils.py
@@ -0,0 +1,35 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import google_crc32c
+
+from google.api_core import exceptions
+
+
+def raise_if_no_fast_crc32c():
+ """Check if the C-accelerated version of google-crc32c is available.
+
+ If not, raise an error to prevent silent performance degradation.
+
+ raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available.
+ returns: True if the C extension is available.
+ rtype: bool
+
+ """
+ if google_crc32c.implementation != "c":
+ raise exceptions.FailedPrecondition(
+ "The google-crc32c package is not installed with C support. "
+ "C extension is required for faster data integrity checks."
+ "For more information, see https://github.com/googleapis/python-crc32c."
+ )
diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py
index d34c844d5..c808cb52a 100644
--- a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py
+++ b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py
@@ -21,7 +21,13 @@
if you want to use these Rapid Storage APIs.
"""
+from io import BufferedReader
from typing import Optional, Union
+
+from google_crc32c import Checksum
+from google.api_core import exceptions
+
+from ._utils import raise_if_no_fast_crc32c
from google.cloud import _storage_v2
from google.cloud.storage._experimental.asyncio.async_grpc_client import (
AsyncGrpcClient,
@@ -32,7 +38,7 @@
_MAX_CHUNK_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB
-_MAX_BUFFER_SIZE_BYTES = 16 * 1024 * 1024 # 16 MiB
+_DEFAULT_FLUSH_INTERVAL_BYTES = 16 * 1024 * 1024 # 16 MiB
class AsyncAppendableObjectWriter:
@@ -45,6 +51,7 @@ def __init__(
object_name: str,
generation=None,
write_handle=None,
+ writer_options: Optional[dict] = None,
):
"""
Class for appending data to a GCS Appendable Object.
@@ -100,6 +107,7 @@ def __init__(
:param write_handle: (Optional) An existing handle for writing the object.
If provided, opening the bidi-gRPC connection will be faster.
"""
+ raise_if_no_fast_crc32c()
self.client = client
self.bucket_name = bucket_name
self.object_name = object_name
@@ -114,8 +122,27 @@ def __init__(
write_handle=self.write_handle,
)
self._is_stream_open: bool = False
+ # `offset` is the latest size of the object without staleless.
self.offset: Optional[int] = None
+ # `persisted_size` is the total_bytes persisted in the GCS server.
+ # Please note: `offset` and `persisted_size` are same when the stream is
+ # opened.
self.persisted_size: Optional[int] = None
+ if writer_options is None:
+ writer_options = {}
+ self.flush_interval = writer_options.get(
+ "FLUSH_INTERVAL_BYTES", _DEFAULT_FLUSH_INTERVAL_BYTES
+ )
+ # TODO: add test case for this.
+ if self.flush_interval < _MAX_CHUNK_SIZE_BYTES:
+ raise exceptions.OutOfRange(
+ f"flush_interval must be >= {_MAX_CHUNK_SIZE_BYTES} , but provided {self.flush_interval}"
+ )
+ if self.flush_interval % _MAX_CHUNK_SIZE_BYTES != 0:
+ raise exceptions.OutOfRange(
+ f"flush_interval must be a multiple of {_MAX_CHUNK_SIZE_BYTES}, but provided {self.flush_interval}"
+ )
+ self.bytes_appended_since_last_flush = 0
async def state_lookup(self) -> int:
"""Returns the persisted_size
@@ -152,17 +179,18 @@ async def open(self) -> None:
if self.generation is None:
self.generation = self.write_obj_stream.generation_number
self.write_handle = self.write_obj_stream.write_handle
-
- # Update self.persisted_size
- _ = await self.state_lookup()
+ self.persisted_size = self.write_obj_stream.persisted_size
async def append(self, data: bytes) -> None:
"""Appends data to the Appendable object.
- This method sends the provided data to the GCS server in chunks. It
- maintains an internal threshold `_MAX_BUFFER_SIZE_BYTES` and will
- automatically flush the data to make it visible to readers when that
- threshold has reached.
+ calling `self.append` will append bytes at the end of the current size
+ ie. `self.offset` bytes relative to the begining of the object.
+
+ This method sends the provided `data` to the GCS server in chunks.
+ and persists data in GCS at every `_DEFAULT_FLUSH_INTERVAL_BYTES` bytes
+ or at the last chunk whichever is earlier. Persisting is done by setting
+ `flush=True` on request.
:type data: bytes
:param data: The bytes to append to the object.
@@ -184,23 +212,37 @@ async def append(self, data: bytes) -> None:
self.offset = self.persisted_size
start_idx = 0
- bytes_to_flush = 0
while start_idx < total_bytes:
end_idx = min(start_idx + _MAX_CHUNK_SIZE_BYTES, total_bytes)
+ data_chunk = data[start_idx:end_idx]
+ is_last_chunk = end_idx == total_bytes
+ chunk_size = end_idx - start_idx
await self.write_obj_stream.send(
_storage_v2.BidiWriteObjectRequest(
write_offset=self.offset,
checksummed_data=_storage_v2.ChecksummedData(
- content=data[start_idx:end_idx]
+ content=data_chunk,
+ crc32c=int.from_bytes(Checksum(data_chunk).digest(), "big"),
+ ),
+ state_lookup=is_last_chunk,
+ flush=is_last_chunk
+ or (
+ self.bytes_appended_since_last_flush + chunk_size
+ >= self.flush_interval
),
)
)
- chunk_size = end_idx - start_idx
self.offset += chunk_size
- bytes_to_flush += chunk_size
- if bytes_to_flush >= _MAX_BUFFER_SIZE_BYTES:
- await self.simple_flush()
- bytes_to_flush = 0
+ self.bytes_appended_since_last_flush += chunk_size
+
+ if self.bytes_appended_since_last_flush >= self.flush_interval:
+ self.bytes_appended_since_last_flush = 0
+
+ if is_last_chunk:
+ response = await self.write_obj_stream.recv()
+ self.persisted_size = response.persisted_size
+ self.offset = self.persisted_size
+ self.bytes_appended_since_last_flush = 0
start_idx = end_idx
async def simple_flush(self) -> None:
@@ -264,19 +306,24 @@ async def close(self, finalize_on_close=False) -> Union[int, _storage_v2.Object]
raise ValueError("Stream is not open. Call open() before close().")
if finalize_on_close:
- await self.finalize()
- else:
- await self.flush()
- await self.write_obj_stream.close()
+ return await self.finalize()
+
+ await self.write_obj_stream.close()
self._is_stream_open = False
self.offset = None
- return self.object_resource if finalize_on_close else self.persisted_size
+ return self.persisted_size
async def finalize(self) -> _storage_v2.Object:
"""Finalizes the Appendable Object.
Note: Once finalized no more data can be appended.
+ This method is different from `close`. if `.close()` is called data may
+ still be appended to object at a later point in time by opening with
+ generation number.
+ (i.e. `open(..., generation=)`.
+ However if `.finalize()` is called no more data can be appended to the
+ object.
rtype: google.cloud.storage_v2.types.Object
returns: The finalized object resource.
@@ -293,6 +340,10 @@ async def finalize(self) -> _storage_v2.Object:
response = await self.write_obj_stream.recv()
self.object_resource = response.resource
self.persisted_size = self.object_resource.size
+ await self.write_obj_stream.close()
+
+ self._is_stream_open = False
+ self.offset = None
return self.object_resource
# helper methods.
@@ -311,6 +362,16 @@ async def append_from_stream(self, stream_obj):
"""
raise NotImplementedError("append_from_stream is not implemented yet.")
- async def append_from_file(self, file_path: str):
- """Create a file object from `file_path` and call append_from_stream(file_obj)"""
- raise NotImplementedError("append_from_file is not implemented yet.")
+ async def append_from_file(
+ self, file_obj: BufferedReader, block_size: int = _DEFAULT_FLUSH_INTERVAL_BYTES
+ ):
+ """
+ Appends data to an Appendable Object using file_handle which is opened
+ for reading in binary mode.
+
+ :type file_obj: file
+ :param file_obj: A file handle opened in binary mode for reading.
+
+ """
+ while block := file_obj.read(block_size):
+ await self.append(block)
diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py
index 16dce5025..8f16294d8 100644
--- a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py
+++ b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py
@@ -14,54 +14,80 @@
from __future__ import annotations
import asyncio
-import google_crc32c
+import logging
from google.api_core import exceptions
-from google_crc32c import Checksum
+from google.api_core.retry_async import AsyncRetry
+from google.cloud.storage._experimental.asyncio.retry._helpers import _handle_redirect
+from google.rpc import status_pb2
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Any, Dict
+from ._utils import raise_if_no_fast_crc32c
from google.cloud.storage._experimental.asyncio.async_read_object_stream import (
_AsyncReadObjectStream,
)
from google.cloud.storage._experimental.asyncio.async_grpc_client import (
AsyncGrpcClient,
)
+from google.cloud.storage._experimental.asyncio.retry.bidi_stream_retry_manager import (
+ _BidiStreamRetryManager,
+)
+from google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy import (
+ _ReadResumptionStrategy,
+ _DownloadState,
+)
from io import BytesIO
from google.cloud import _storage_v2
-from google.cloud.storage.exceptions import DataCorruption
from google.cloud.storage._helpers import generate_random_56_bit_integer
_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100
+_BIDI_READ_REDIRECTED_TYPE_URL = (
+ "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError"
+)
+logger = logging.getLogger(__name__)
+
+
+def _is_read_retryable(exc):
+ """Predicate to determine if a read operation should be retried."""
+ if isinstance(
+ exc,
+ (
+ exceptions.InternalServerError,
+ exceptions.ServiceUnavailable,
+ exceptions.DeadlineExceeded,
+ exceptions.TooManyRequests,
+ ),
+ ):
+ return True
+
+ if not isinstance(exc, exceptions.Aborted) or not exc.errors:
+ return False
+
+ try:
+ grpc_error = exc.errors[0]
+ trailers = grpc_error.trailing_metadata()
+ if not trailers:
+ return False
+
+ status_details_bin = next(
+ (v for k, v in trailers if k == "grpc-status-details-bin"), None
+ )
-class Result:
- """An instance of this class will be populated and retured for each
- `read_range` provided to ``download_ranges`` method.
-
- """
-
- def __init__(self, bytes_requested: int):
- # only while instantiation, should not be edited later.
- # hence there's no setter, only getter is provided.
- self._bytes_requested: int = bytes_requested
- self._bytes_written: int = 0
-
- @property
- def bytes_requested(self) -> int:
- return self._bytes_requested
-
- @property
- def bytes_written(self) -> int:
- return self._bytes_written
-
- @bytes_written.setter
- def bytes_written(self, value: int):
- self._bytes_written = value
+ if not status_details_bin:
+ return False
- def __repr__(self):
- return f"bytes_requested: {self._bytes_requested}, bytes_written: {self._bytes_written}"
+ status_proto = status_pb2.Status()
+ status_proto.ParseFromString(status_details_bin)
+ return any(
+ detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL
+ for detail in status_proto.details
+ )
+ except Exception as e:
+ logger.error(f"Error parsing status_details_bin: {e}")
+ return False
class AsyncMultiRangeDownloader:
@@ -104,6 +130,8 @@ async def create_mrd(
object_name: str,
generation_number: Optional[int] = None,
read_handle: Optional[bytes] = None,
+ retry_policy: Optional[AsyncRetry] = None,
+ metadata: Optional[List[Tuple[str, str]]] = None,
) -> AsyncMultiRangeDownloader:
"""Initializes a MultiRangeDownloader and opens the underlying bidi-gRPC
object for reading.
@@ -125,11 +153,17 @@ async def create_mrd(
:param read_handle: (Optional) An existing handle for reading the object.
If provided, opening the bidi-gRPC connection will be faster.
+ :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry`
+ :param retry_policy: (Optional) The retry policy to use for the ``open`` operation.
+
+ :type metadata: List[Tuple[str, str]]
+ :param metadata: (Optional) The metadata to be sent with the ``open`` request.
+
:rtype: :class:`~google.cloud.storage._experimental.asyncio.async_multi_range_downloader.AsyncMultiRangeDownloader`
:returns: An initialized AsyncMultiRangeDownloader instance for reading.
"""
mrd = cls(client, bucket_name, object_name, generation_number, read_handle)
- await mrd.open()
+ await mrd.open(retry_policy=retry_policy, metadata=metadata)
return mrd
def __init__(
@@ -160,14 +194,7 @@ def __init__(
:param read_handle: (Optional) An existing read handle.
"""
- # Verify that the fast, C-accelerated version of crc32c is available.
- # If not, raise an error to prevent silent performance degradation.
- if google_crc32c.implementation != "c":
- raise exceptions.NotFound(
- "The google-crc32c package is not installed with C support. "
- "Bidi reads require the C extension for data integrity checks."
- "For more information, see https://github.com/googleapis/python-crc32c."
- )
+ raise_if_no_fast_crc32c()
self.client = client
self.bucket_name = bucket_name
@@ -176,24 +203,65 @@ def __init__(
self.read_handle = read_handle
self.read_obj_str: Optional[_AsyncReadObjectStream] = None
self._is_stream_open: bool = False
-
+ self._routing_token: Optional[str] = None
self._read_id_to_writable_buffer_dict = {}
self._read_id_to_download_ranges_id = {}
self._download_ranges_id_to_pending_read_ids = {}
+ self.persisted_size: Optional[int] = None # updated after opening the stream
- async def open(self) -> None:
- """Opens the bidi-gRPC connection to read from the object.
-
- This method initializes and opens an `_AsyncReadObjectStream` (bidi-gRPC stream) to
- for downloading ranges of data from GCS ``Object``.
+ def _on_open_error(self, exc):
+ """Extracts routing token and read handle on redirect error during open."""
+ routing_token, read_handle = _handle_redirect(exc)
+ if routing_token:
+ self._routing_token = routing_token
+ if read_handle:
+ self.read_handle = read_handle
- "Opening" constitutes fetching object metadata such as generation number
- and read handle and sets them as attributes if not already set.
- """
+ async def open(
+ self,
+ retry_policy: Optional[AsyncRetry] = None,
+ metadata: Optional[List[Tuple[str, str]]] = None,
+ ) -> None:
+ """Opens the bidi-gRPC connection to read from the object."""
if self._is_stream_open:
raise ValueError("Underlying bidi-gRPC stream is already open")
- if self.read_obj_str is None:
+ if retry_policy is None:
+ retry_policy = AsyncRetry(
+ predicate=_is_read_retryable, on_error=self._on_open_error
+ )
+ else:
+ original_on_error = retry_policy._on_error
+
+ def combined_on_error(exc):
+ self._on_open_error(exc)
+ if original_on_error:
+ original_on_error(exc)
+
+ retry_policy = AsyncRetry(
+ predicate=_is_read_retryable,
+ initial=retry_policy._initial,
+ maximum=retry_policy._maximum,
+ multiplier=retry_policy._multiplier,
+ deadline=retry_policy._deadline,
+ on_error=combined_on_error,
+ )
+
+ async def _do_open():
+ current_metadata = list(metadata) if metadata else []
+
+ # Cleanup stream from previous failed attempt, if any.
+ if self.read_obj_str:
+ if self.read_obj_str.is_stream_open:
+ try:
+ await self.read_obj_str.close()
+ except exceptions.GoogleAPICallError as e:
+ logger.warning(
+ f"Failed to close existing stream during resumption: {e}"
+ )
+ self.read_obj_str = None
+ self._is_stream_open = False
+
self.read_obj_str = _AsyncReadObjectStream(
client=self.client,
bucket_name=self.bucket_name,
@@ -201,22 +269,42 @@ async def open(self) -> None:
generation_number=self.generation_number,
read_handle=self.read_handle,
)
- await self.read_obj_str.open()
- self._is_stream_open = True
- if self.generation_number is None:
- self.generation_number = self.read_obj_str.generation_number
- self.read_handle = self.read_obj_str.read_handle
- return
+
+ if self._routing_token:
+ current_metadata.append(
+ ("x-goog-request-params", f"routing_token={self._routing_token}")
+ )
+ self._routing_token = None
+
+ await self.read_obj_str.open(
+ metadata=current_metadata if current_metadata else None
+ )
+
+ if self.read_obj_str.generation_number:
+ self.generation_number = self.read_obj_str.generation_number
+ if self.read_obj_str.read_handle:
+ self.read_handle = self.read_obj_str.read_handle
+ if self.read_obj_str.persisted_size is not None:
+ self.persisted_size = self.read_obj_str.persisted_size
+
+ self._is_stream_open = True
+
+ await retry_policy(_do_open)()
async def download_ranges(
- self, read_ranges: List[Tuple[int, int, BytesIO]], lock: asyncio.Lock = None
+ self,
+ read_ranges: List[Tuple[int, int, BytesIO]],
+ lock: asyncio.Lock = None,
+ retry_policy: Optional[AsyncRetry] = None,
+ metadata: Optional[List[Tuple[str, str]]] = None,
) -> None:
"""Downloads multiple byte ranges from the object into the buffers
- provided by user.
+ provided by user with automatic retries.
:type read_ranges: List[Tuple[int, int, "BytesIO"]]
:param read_ranges: A list of tuples, where each tuple represents a
- byte range (start_byte, bytes_to_read, writeable_buffer). Buffer has
+ combintaion of byte_range and writeable buffer in format -
+ (`start_byte`, `bytes_to_read`, `writeable_buffer`). Buffer has
to be provided by the user, and user has to make sure appropriate
memory is available in the application to avoid out-of-memory crash.
@@ -246,6 +334,8 @@ async def download_ranges(
```
+ :type retry_policy: :class:`~google.api_core.retry_async.AsyncRetry`
+ :param retry_policy: (Optional) The retry policy to use for the operation.
:raises ValueError: if the underlying bidi-GRPC stream is not open.
:raises ValueError: if the length of read_ranges is more than 1000.
@@ -264,72 +354,122 @@ async def download_ranges(
if lock is None:
lock = asyncio.Lock()
- _func_id = generate_random_56_bit_integer()
- read_ids_in_current_func = set()
- for i in range(0, len(read_ranges), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST):
- read_ranges_segment = read_ranges[
- i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST
- ]
+ if retry_policy is None:
+ retry_policy = AsyncRetry(predicate=_is_read_retryable)
+
+ # Initialize Global State for Retry Strategy
+ download_states = {}
+ for read_range in read_ranges:
+ read_id = generate_random_56_bit_integer()
+ download_states[read_id] = _DownloadState(
+ initial_offset=read_range[0],
+ initial_length=read_range[1],
+ user_buffer=read_range[2],
+ )
- read_ranges_for_bidi_req = []
- for j, read_range in enumerate(read_ranges_segment):
- read_id = generate_random_56_bit_integer()
- read_ids_in_current_func.add(read_id)
- self._read_id_to_download_ranges_id[read_id] = _func_id
- self._read_id_to_writable_buffer_dict[read_id] = read_range[2]
- bytes_requested = read_range[1]
- read_ranges_for_bidi_req.append(
- _storage_v2.ReadRange(
- read_offset=read_range[0],
- read_length=bytes_requested,
- read_id=read_id,
+ initial_state = {
+ "download_states": download_states,
+ "read_handle": self.read_handle,
+ "routing_token": None,
+ }
+
+ # Track attempts to manage stream reuse
+ attempt_count = 0
+
+ def send_ranges_and_get_bytes(
+ requests: List[_storage_v2.ReadRange],
+ state: Dict[str, Any],
+ metadata: Optional[List[Tuple[str, str]]] = None,
+ ):
+ async def generator():
+ nonlocal attempt_count
+ attempt_count += 1
+
+ if attempt_count > 1:
+ logger.info(
+ f"Resuming download (attempt {attempt_count - 1}) for {len(requests)} ranges."
)
- )
- async with lock:
- await self.read_obj_str.send(
- _storage_v2.BidiReadObjectRequest(
- read_ranges=read_ranges_for_bidi_req
- )
- )
- self._download_ranges_id_to_pending_read_ids[
- _func_id
- ] = read_ids_in_current_func
-
- while len(self._download_ranges_id_to_pending_read_ids[_func_id]) > 0:
- async with lock:
- response = await self.read_obj_str.recv()
-
- if response is None:
- raise Exception("None response received, something went wrong.")
-
- for object_data_range in response.object_data_ranges:
- if object_data_range.read_range is None:
- raise Exception("Invalid response, read_range is None")
- checksummed_data = object_data_range.checksummed_data
- data = checksummed_data.content
- server_checksum = checksummed_data.crc32c
-
- client_crc32c = Checksum(data).digest()
- client_checksum = int.from_bytes(client_crc32c, "big")
-
- if server_checksum != client_checksum:
- raise DataCorruption(
- response,
- f"Checksum mismatch for read_id {object_data_range.read_range.read_id}. "
- f"Server sent {server_checksum}, client calculated {client_checksum}.",
+ async with lock:
+ current_handle = state.get("read_handle")
+ current_token = state.get("routing_token")
+
+ # We reopen if it's a redirect (token exists) OR if this is a retry
+ # (not first attempt). This prevents trying to send data on a dead
+ # stream from a previous failed attempt.
+ should_reopen = (
+ (attempt_count > 1)
+ or (current_token is not None)
+ or (metadata is not None)
)
- read_id = object_data_range.read_range.read_id
- buffer = self._read_id_to_writable_buffer_dict[read_id]
- buffer.write(data)
+ if should_reopen:
+ if current_token:
+ logger.info(
+ f"Re-opening stream with routing token: {current_token}"
+ )
+ # Close existing stream if any
+ if self.read_obj_str and self.read_obj_str.is_stream_open:
+ await self.read_obj_str.close()
+
+ # Re-initialize stream
+ self.read_obj_str = _AsyncReadObjectStream(
+ client=self.client,
+ bucket_name=self.bucket_name,
+ object_name=self.object_name,
+ generation_number=self.generation_number,
+ read_handle=current_handle,
+ )
+
+ # Inject routing_token into metadata if present
+ current_metadata = list(metadata) if metadata else []
+ if current_token:
+ current_metadata.append(
+ (
+ "x-goog-request-params",
+ f"routing_token={current_token}",
+ )
+ )
+
+ await self.read_obj_str.open(
+ metadata=current_metadata if current_metadata else None
+ )
+ self._is_stream_open = True
+
+ pending_read_ids = {r.read_id for r in requests}
+
+ # Send Requests
+ for i in range(
+ 0, len(requests), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST
+ ):
+ batch = requests[i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST]
+ await self.read_obj_str.send(
+ _storage_v2.BidiReadObjectRequest(read_ranges=batch)
+ )
+
+ while pending_read_ids:
+ response = await self.read_obj_str.recv()
+ if response is None:
+ break
+ if response.object_data_ranges:
+ for data_range in response.object_data_ranges:
+ if data_range.range_end:
+ pending_read_ids.discard(
+ data_range.read_range.read_id
+ )
+ yield response
+
+ return generator()
+
+ strategy = _ReadResumptionStrategy()
+ retry_manager = _BidiStreamRetryManager(
+ strategy, lambda r, s: send_ranges_and_get_bytes(r, s, metadata=metadata)
+ )
- if object_data_range.range_end:
- tmp_dn_ranges_id = self._read_id_to_download_ranges_id[read_id]
- self._download_ranges_id_to_pending_read_ids[
- tmp_dn_ranges_id
- ].remove(read_id)
- del self._read_id_to_download_ranges_id[read_id]
+ await retry_manager.execute(initial_state, retry_policy)
+
+ if initial_state.get("read_handle"):
+ self.read_handle = initial_state["read_handle"]
async def close(self):
"""
@@ -337,7 +477,10 @@ async def close(self):
"""
if not self._is_stream_open:
raise ValueError("Underlying bidi-gRPC stream is not open")
- await self.read_obj_str.close()
+
+ if self.read_obj_str:
+ await self.read_obj_str.close()
+ self.read_obj_str = None
self._is_stream_open = False
@property
diff --git a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py
index ddaaf9a54..7adcdd1c9 100644
--- a/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py
+++ b/google/cloud/storage/_experimental/asyncio/async_read_object_stream.py
@@ -22,7 +22,7 @@
"""
-from typing import Optional
+from typing import List, Optional, Tuple
from google.cloud import _storage_v2
from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient
from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import (
@@ -84,32 +84,66 @@ def __init__(
self.rpc = self.client._client._transport._wrapped_methods[
self.client._client._transport.bidi_read_object
]
- self.first_bidi_read_req = _storage_v2.BidiReadObjectRequest(
- read_object_spec=_storage_v2.BidiReadObjectSpec(
- bucket=self._full_bucket_name, object=object_name
- ),
- )
self.metadata = (("x-goog-request-params", f"bucket={self._full_bucket_name}"),)
self.socket_like_rpc: Optional[AsyncBidiRpc] = None
self._is_stream_open: bool = False
+ self.persisted_size: Optional[int] = None
- async def open(self) -> None:
+ async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None:
"""Opens the bidi-gRPC connection to read from the object.
This method sends an initial request to start the stream and receives
the first response containing metadata and a read handle.
+
+ Args:
+ metadata (Optional[List[Tuple[str, str]]]): Additional metadata
+ to send with the initial stream request, e.g., for routing tokens.
"""
if self._is_stream_open:
raise ValueError("Stream is already open")
+
+ read_handle = self.read_handle if self.read_handle else None
+
+ read_object_spec = _storage_v2.BidiReadObjectSpec(
+ bucket=self._full_bucket_name,
+ object=self.object_name,
+ generation=self.generation_number if self.generation_number else None,
+ read_handle=read_handle,
+ )
+ self.first_bidi_read_req = _storage_v2.BidiReadObjectRequest(
+ read_object_spec=read_object_spec
+ )
+
+ # Build the x-goog-request-params header
+ request_params = [f"bucket={self._full_bucket_name}"]
+ other_metadata = []
+ if metadata:
+ for key, value in metadata:
+ if key == "x-goog-request-params":
+ request_params.append(value)
+ else:
+ other_metadata.append((key, value))
+
+ current_metadata = other_metadata
+ current_metadata.append(("x-goog-request-params", ",".join(request_params)))
+
self.socket_like_rpc = AsyncBidiRpc(
- self.rpc, initial_request=self.first_bidi_read_req, metadata=self.metadata
+ self.rpc,
+ initial_request=self.first_bidi_read_req,
+ metadata=current_metadata,
)
await self.socket_like_rpc.open() # this is actually 1 send
response = await self.socket_like_rpc.recv()
- if self.generation_number is None:
- self.generation_number = response.metadata.generation
+ # populated only in the first response of bidi-stream and when opened
+ # without using `read_handle`
+ if hasattr(response, "metadata") and response.metadata:
+ if self.generation_number is None:
+ self.generation_number = response.metadata.generation
+ # update persisted size
+ self.persisted_size = response.metadata.size
- self.read_handle = response.read_handle
+ if response and response.read_handle:
+ self.read_handle = response.read_handle
self._is_stream_open = True
diff --git a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py
index 6d1fd5b31..183a8eeb1 100644
--- a/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py
+++ b/google/cloud/storage/_experimental/asyncio/async_write_object_stream.py
@@ -117,7 +117,6 @@ async def open(self) -> None:
object=self.object_name,
generation=self.generation_number,
),
- state_lookup=True,
)
self.socket_like_rpc = AsyncBidiRpc(
@@ -136,11 +135,17 @@ async def open(self) -> None:
raise ValueError(
"Failed to obtain object generation after opening the stream"
)
- self.generation_number = response.resource.generation
if not response.write_handle:
raise ValueError("Failed to obtain write_handle after opening the stream")
+ if not response.resource.size:
+ # Appending to a 0 byte appendable object.
+ self.persisted_size = 0
+ else:
+ self.persisted_size = response.resource.size
+
+ self.generation_number = response.resource.generation
self.write_handle = response.write_handle
async def close(self) -> None:
diff --git a/google/cloud/storage/_experimental/asyncio/retry/_helpers.py b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py
new file mode 100644
index 000000000..627bf5944
--- /dev/null
+++ b/google/cloud/storage/_experimental/asyncio/retry/_helpers.py
@@ -0,0 +1,83 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import logging
+from typing import Tuple, Optional
+
+from google.api_core import exceptions
+from google.cloud._storage_v2.types import BidiReadObjectRedirectedError
+from google.rpc import status_pb2
+
+_BIDI_READ_REDIRECTED_TYPE_URL = (
+ "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError"
+)
+
+
+def _handle_redirect(
+ exc: Exception,
+) -> Tuple[Optional[str], Optional[bytes]]:
+ """
+ Extracts routing token and read handle from a gRPC error.
+
+ :type exc: Exception
+ :param exc: The exception to parse.
+
+ :rtype: Tuple[Optional[str], Optional[bytes]]
+ :returns: A tuple of (routing_token, read_handle).
+ """
+ routing_token = None
+ read_handle = None
+
+ grpc_error = None
+ if isinstance(exc, exceptions.Aborted) and exc.errors:
+ grpc_error = exc.errors[0]
+
+ if grpc_error:
+ if isinstance(grpc_error, BidiReadObjectRedirectedError):
+ routing_token = grpc_error.routing_token
+ if grpc_error.read_handle:
+ read_handle = grpc_error.read_handle
+ return routing_token, read_handle
+
+ if hasattr(grpc_error, "trailing_metadata"):
+ trailers = grpc_error.trailing_metadata()
+ if not trailers:
+ return None, None
+
+ status_details_bin = None
+ for key, value in trailers:
+ if key == "grpc-status-details-bin":
+ status_details_bin = value
+ break
+
+ if status_details_bin:
+ status_proto = status_pb2.Status()
+ try:
+ status_proto.ParseFromString(status_details_bin)
+ for detail in status_proto.details:
+ if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL:
+ redirect_proto = BidiReadObjectRedirectedError.deserialize(
+ detail.value
+ )
+ if redirect_proto.routing_token:
+ routing_token = redirect_proto.routing_token
+ if redirect_proto.read_handle:
+ read_handle = redirect_proto.read_handle
+ break
+ except Exception as e:
+ logging.ERROR(f"Error unpacking redirect: {e}")
+
+ return routing_token, read_handle
diff --git a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py
index e32125069..ff193f109 100644
--- a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py
+++ b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py
@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import abc
from typing import Any, Iterable
diff --git a/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py
new file mode 100644
index 000000000..a8caae4eb
--- /dev/null
+++ b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py
@@ -0,0 +1,69 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Any, AsyncIterator, Callable
+
+from google.cloud.storage._experimental.asyncio.retry.base_strategy import (
+ _BaseResumptionStrategy,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class _BidiStreamRetryManager:
+ """Manages the generic retry loop for a bidi streaming operation."""
+
+ def __init__(
+ self,
+ strategy: _BaseResumptionStrategy,
+ send_and_recv: Callable[..., AsyncIterator[Any]],
+ ):
+ """Initializes the retry manager.
+ Args:
+ strategy: The strategy for managing the state of a specific
+ bidi operation (e.g., reads or writes).
+ send_and_recv: An async callable that opens a new gRPC stream.
+ """
+ self._strategy = strategy
+ self._send_and_recv = send_and_recv
+
+ async def execute(self, initial_state: Any, retry_policy):
+ """
+ Executes the bidi operation with the configured retry policy.
+ Args:
+ initial_state: An object containing all state for the operation.
+ retry_policy: The `google.api_core.retry.AsyncRetry` object to
+ govern the retry behavior for this specific operation.
+ """
+ state = initial_state
+
+ async def attempt():
+ requests = self._strategy.generate_requests(state)
+ stream = self._send_and_recv(requests, state)
+ try:
+ async for response in stream:
+ self._strategy.update_state_from_response(response, state)
+ return
+ except Exception as e:
+ if retry_policy._predicate(e):
+ logger.info(
+ f"Bidi stream operation failed: {e}. Attempting state recovery and retry."
+ )
+ await self._strategy.recover_state_on_failure(e, state)
+ raise e
+
+ wrapped_attempt = retry_policy(attempt)
+
+ await wrapped_attempt()
diff --git a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py
index d5d080358..916b82e6e 100644
--- a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py
+++ b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py
@@ -1,11 +1,35 @@
-from typing import Any, List, IO
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, List, IO
+import logging
+
+from google_crc32c import Checksum
from google.cloud import _storage_v2 as storage_v2
from google.cloud.storage.exceptions import DataCorruption
+from google.cloud.storage._experimental.asyncio.retry._helpers import (
+ _handle_redirect,
+)
from google.cloud.storage._experimental.asyncio.retry.base_strategy import (
_BaseResumptionStrategy,
)
-from google.cloud._storage_v2.types.storage import BidiReadObjectRedirectedError
+
+
+_BIDI_READ_REDIRECTED_TYPE_URL = (
+ "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError"
+)
+logger = logging.getLogger(__name__)
class _DownloadState:
@@ -25,7 +49,7 @@ def __init__(
class _ReadResumptionStrategy(_BaseResumptionStrategy):
"""The concrete resumption strategy for bidi reads."""
- def generate_requests(self, state: dict) -> List[storage_v2.ReadRange]:
+ def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]:
"""Generates new ReadRange requests for all incomplete downloads.
:type state: dict
@@ -33,10 +57,17 @@ def generate_requests(self, state: dict) -> List[storage_v2.ReadRange]:
_DownloadState object.
"""
pending_requests = []
- for read_id, read_state in state.items():
+ download_states: Dict[int, _DownloadState] = state["download_states"]
+
+ for read_id, read_state in download_states.items():
if not read_state.is_complete:
new_offset = read_state.initial_offset + read_state.bytes_written
- new_length = read_state.initial_length - read_state.bytes_written
+
+ # Calculate remaining length. If initial_length is 0 (read to end),
+ # it stays 0. Otherwise, subtract bytes_written.
+ new_length = 0
+ if read_state.initial_length > 0:
+ new_length = read_state.initial_length - read_state.bytes_written
new_request = storage_v2.ReadRange(
read_offset=new_offset,
@@ -47,39 +78,80 @@ def generate_requests(self, state: dict) -> List[storage_v2.ReadRange]:
return pending_requests
def update_state_from_response(
- self, response: storage_v2.BidiReadObjectResponse, state: dict
+ self, response: storage_v2.BidiReadObjectResponse, state: Dict[str, Any]
) -> None:
"""Processes a server response, performs integrity checks, and updates state."""
+
+ # Capture read_handle if provided.
+ if response.read_handle:
+ state["read_handle"] = response.read_handle
+
+ download_states = state["download_states"]
+
for object_data_range in response.object_data_ranges:
+ # Ignore empty ranges or ranges for IDs not in our state
+ # (e.g., from a previously cancelled request on the same stream).
+ if not object_data_range.read_range:
+ logger.warning(
+ "Received response with missing read_range field; ignoring."
+ )
+ continue
+
read_id = object_data_range.read_range.read_id
- read_state = state[read_id]
+ if read_id not in download_states:
+ logger.warning(
+ f"Received data for unknown or stale read_id {read_id}; ignoring."
+ )
+ continue
+
+ read_state = download_states[read_id]
# Offset Verification
chunk_offset = object_data_range.read_range.read_offset
if chunk_offset != read_state.next_expected_offset:
- raise DataCorruption(response, f"Offset mismatch for read_id {read_id}")
+ raise DataCorruption(
+ response,
+ f"Offset mismatch for read_id {read_id}. "
+ f"Expected {read_state.next_expected_offset}, got {chunk_offset}",
+ )
+ # Checksum Verification
+ # We must validate data before updating state or writing to buffer.
data = object_data_range.checksummed_data.content
+ server_checksum = object_data_range.checksummed_data.crc32c
+
+ if server_checksum is not None:
+ client_checksum = int.from_bytes(Checksum(data).digest(), "big")
+ if server_checksum != client_checksum:
+ raise DataCorruption(
+ response,
+ f"Checksum mismatch for read_id {read_id}. "
+ f"Server sent {server_checksum}, client calculated {client_checksum}.",
+ )
+
+ # Update State & Write Data
chunk_size = len(data)
+ read_state.user_buffer.write(data)
read_state.bytes_written += chunk_size
read_state.next_expected_offset += chunk_size
- read_state.user_buffer.write(data)
# Final Byte Count Verification
if object_data_range.range_end:
read_state.is_complete = True
if (
read_state.initial_length != 0
- and read_state.bytes_written != read_state.initial_length
+ and read_state.bytes_written > read_state.initial_length
):
raise DataCorruption(
- response, f"Byte count mismatch for read_id {read_id}"
+ response,
+ f"Byte count mismatch for read_id {read_id}. "
+ f"Expected {read_state.initial_length}, got {read_state.bytes_written}",
)
async def recover_state_on_failure(self, error: Exception, state: Any) -> None:
"""Handles BidiReadObjectRedirectedError for reads."""
- # This would parse the gRPC error details, extract the routing_token,
- # and store it on the shared state object.
- cause = getattr(error, "cause", error)
- if isinstance(cause, BidiReadObjectRedirectedError):
- state["routing_token"] = cause.routing_token
+ routing_token, read_handle = _handle_redirect(error)
+ if routing_token:
+ state["routing_token"] = routing_token
+ if read_handle:
+ state["read_handle"] = read_handle
diff --git a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py
new file mode 100644
index 000000000..c6ae36339
--- /dev/null
+++ b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py
@@ -0,0 +1,146 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, IO, Iterable, Optional, Union
+
+import google_crc32c
+from google.cloud._storage_v2.types import storage as storage_type
+from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError
+from google.cloud.storage._experimental.asyncio.retry.base_strategy import (
+ _BaseResumptionStrategy,
+)
+
+
+class _WriteState:
+ """A helper class to track the state of a single upload operation.
+
+ :type spec: :class:`google.cloud.storage_v2.types.AppendObjectSpec`
+ :param spec: The specification for the object to write.
+
+ :type chunk_size: int
+ :param chunk_size: The size of chunks to write to the server.
+
+ :type user_buffer: IO[bytes]
+ :param user_buffer: The data source.
+ """
+
+ def __init__(
+ self,
+ spec: Union[storage_type.AppendObjectSpec, storage_type.WriteObjectSpec],
+ chunk_size: int,
+ user_buffer: IO[bytes],
+ ):
+ self.spec = spec
+ self.chunk_size = chunk_size
+ self.user_buffer = user_buffer
+ self.persisted_size: int = 0
+ self.bytes_sent: int = 0
+ self.write_handle: Union[bytes, storage_type.BidiWriteHandle, None] = None
+ self.routing_token: Optional[str] = None
+ self.is_finalized: bool = False
+
+
+class _WriteResumptionStrategy(_BaseResumptionStrategy):
+ """The concrete resumption strategy for bidi writes."""
+
+ def generate_requests(
+ self, state: Dict[str, Any]
+ ) -> Iterable[storage_type.BidiWriteObjectRequest]:
+ """Generates BidiWriteObjectRequests to resume or continue the upload.
+
+ For Appendable Objects, every stream opening should send an
+ AppendObjectSpec. If resuming, the `write_handle` is added to that spec.
+
+ This method is not applicable for `open` methods.
+ """
+ write_state: _WriteState = state["write_state"]
+
+ initial_request = storage_type.BidiWriteObjectRequest()
+
+ # Determine if we need to send WriteObjectSpec or AppendObjectSpec
+ if isinstance(write_state.spec, storage_type.WriteObjectSpec):
+ initial_request.write_object_spec = write_state.spec
+ else:
+ if write_state.write_handle:
+ write_state.spec.write_handle = write_state.write_handle
+
+ if write_state.routing_token:
+ write_state.spec.routing_token = write_state.routing_token
+ initial_request.append_object_spec = write_state.spec
+
+ yield initial_request
+
+ # The buffer should already be seeked to the correct position (persisted_size)
+ # by the `recover_state_on_failure` method before this is called.
+ while not write_state.is_finalized:
+ chunk = write_state.user_buffer.read(write_state.chunk_size)
+
+ # End of File detection
+ if not chunk:
+ return
+
+ checksummed_data = storage_type.ChecksummedData(content=chunk)
+ checksum = google_crc32c.Checksum(chunk)
+ checksummed_data.crc32c = int.from_bytes(checksum.digest(), "big")
+
+ request = storage_type.BidiWriteObjectRequest(
+ write_offset=write_state.bytes_sent,
+ checksummed_data=checksummed_data,
+ )
+ write_state.bytes_sent += len(chunk)
+
+ yield request
+
+ def update_state_from_response(
+ self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any]
+ ) -> None:
+ """Processes a server response and updates the write state."""
+ write_state: _WriteState = state["write_state"]
+
+ if response.persisted_size:
+ write_state.persisted_size = response.persisted_size
+
+ if response.write_handle:
+ write_state.write_handle = response.write_handle
+
+ if response.resource:
+ write_state.persisted_size = response.resource.size
+ if response.resource.finalize_time:
+ write_state.is_finalized = True
+
+ async def recover_state_on_failure(
+ self, error: Exception, state: Dict[str, Any]
+ ) -> None:
+ """
+ Handles errors, specifically BidiWriteObjectRedirectedError, and rewinds state.
+
+ This method rewinds the user buffer and internal byte tracking to the
+ last confirmed 'persisted_size' from the server.
+ """
+ write_state: _WriteState = state["write_state"]
+ cause = getattr(error, "cause", error)
+
+ # Extract routing token and potentially a new write handle for redirection.
+ if isinstance(cause, BidiWriteObjectRedirectedError):
+ if cause.routing_token:
+ write_state.routing_token = cause.routing_token
+
+ redirect_handle = getattr(cause, "write_handle", None)
+ if redirect_handle:
+ write_state.write_handle = redirect_handle
+
+ # We must assume any data sent beyond 'persisted_size' was lost.
+ # Reset the user buffer to the last known good byte confirmed by the server.
+ write_state.user_buffer.seek(write_state.persisted_size)
+ write_state.bytes_sent = write_state.persisted_size
diff --git a/google/cloud/storage/_media/requests/download.py b/google/cloud/storage/_media/requests/download.py
index 13e049bd3..c5686fcb7 100644
--- a/google/cloud/storage/_media/requests/download.py
+++ b/google/cloud/storage/_media/requests/download.py
@@ -774,6 +774,5 @@ def flush(self):
def has_unconsumed_tail(self) -> bool:
return self._decoder.has_unconsumed_tail
-
else: # pragma: NO COVER
_BrotliDecoder = None # type: ignore # pragma: NO COVER
diff --git a/google/cloud/storage/version.py b/google/cloud/storage/version.py
index dc87b3c5b..8f4ba4810 100644
--- a/google/cloud/storage/version.py
+++ b/google/cloud/storage/version.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-__version__ = "3.7.0"
+__version__ = "3.8.0"
diff --git a/noxfile.py b/noxfile.py
index b89b9d319..1cef2a75f 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -26,9 +26,16 @@
BLACK_VERSION = "black==23.7.0"
BLACK_PATHS = ["docs", "google", "tests", "noxfile.py", "setup.py"]
-DEFAULT_PYTHON_VERSION = "3.12"
-SYSTEM_TEST_PYTHON_VERSIONS = ["3.12"]
-UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
+DEFAULT_PYTHON_VERSION = "3.14"
+SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.14"]
+UNIT_TEST_PYTHON_VERSIONS = [
+ "3.9",
+ "3.10",
+ "3.11",
+ "3.12",
+ "3.13",
+ "3.14",
+]
CONFORMANCE_TEST_PYTHON_VERSIONS = ["3.12"]
CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
@@ -51,6 +58,7 @@
"unit-3.11",
"unit-3.12",
"unit-3.13",
+ "unit-3.14",
# cover must be last to avoid error `No data to report`
"cover",
]
@@ -65,7 +73,7 @@ def lint(session):
"""
# Pin flake8 to 6.0.0
# See https://github.com/googleapis/python-storage/issues/1102
- session.install("flake8==6.0.0", BLACK_VERSION)
+ session.install("flake8", BLACK_VERSION)
session.run(
"black",
"--check",
@@ -118,6 +126,8 @@ def default(session, install_extras=True):
session.install("-e", ".", "-c", constraints_path)
+ session.run("python", "-m", "pip", "freeze")
+
# This dependency is included in setup.py for backwards compatibility only
# and the client library is expected to pass all tests without it. See
# setup.py and README for details.
@@ -182,7 +192,14 @@ def system(session):
# 2021-05-06: defer installing 'google-cloud-*' to after this package,
# in order to work around Python 2.7 googolapis-common-protos
# issue.
- session.install("mock", "pytest", "pytest-rerunfailures", "-c", constraints_path)
+ session.install(
+ "mock",
+ "pytest",
+ "pytest-rerunfailures",
+ "pytest-asyncio",
+ "-c",
+ constraints_path,
+ )
session.install("-e", ".", "-c", constraints_path)
session.install(
"google-cloud-testutils",
@@ -215,10 +232,22 @@ def conftest_retry(session):
if not conformance_test_folder_exists:
session.skip("Conformance tests were not found")
+ constraints_path = str(
+ CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
+ )
+
# Install all test dependencies and pytest plugin to run tests in parallel.
# Then install this package in-place.
- session.install("pytest", "pytest-xdist")
- session.install("-e", ".")
+ session.install(
+ "pytest",
+ "pytest-xdist",
+ "grpcio",
+ "grpcio-status",
+ "grpc-google-iam-v1",
+ "-c",
+ constraints_path,
+ )
+ session.install("-e", ".", "-c", constraints_path)
# Run #CPU processes in parallel if no test session arguments are passed in.
if session.posargs:
diff --git a/samples/generated_samples/snippet_metadata_google.storage.v2.json b/samples/generated_samples/snippet_metadata_google.storage.v2.json
index 4af7ef641..7fcd587f3 100644
--- a/samples/generated_samples/snippet_metadata_google.storage.v2.json
+++ b/samples/generated_samples/snippet_metadata_google.storage.v2.json
@@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-storage",
- "version": "3.6.0"
+ "version": "3.8.0"
},
"snippets": [
{
diff --git a/samples/snippets/snippets_test.py b/samples/snippets/snippets_test.py
index 91018f3dd..1d3c8c1c4 100644
--- a/samples/snippets/snippets_test.py
+++ b/samples/snippets/snippets_test.py
@@ -18,6 +18,7 @@
import tempfile
import time
import uuid
+import sys
from google.cloud import storage
import google.cloud.exceptions
@@ -99,8 +100,10 @@
import storage_upload_with_kms_key
KMS_KEY = os.environ.get("CLOUD_KMS_KEY")
+IS_PYTHON_3_14 = sys.version_info[:2] == (3, 14)
+@pytest.mark.skipif(IS_PYTHON_3_14, reason="b/470276398")
def test_enable_default_kms_key(test_bucket):
storage_set_bucket_default_kms_key.enable_default_kms_key(
bucket_name=test_bucket.name, kms_key_name=KMS_KEY
@@ -305,6 +308,7 @@ def test_upload_blob_from_stream(test_bucket, capsys):
assert "Stream data uploaded to test_upload_blob" in out
+@pytest.mark.skipif(IS_PYTHON_3_14, reason="b/470276398")
def test_upload_blob_with_kms(test_bucket):
blob_name = f"test_upload_with_kms_{uuid.uuid4().hex}"
with tempfile.NamedTemporaryFile() as source_file:
@@ -399,6 +403,7 @@ def test_delete_blob(test_blob):
storage_delete_file.delete_blob(test_blob.bucket.name, test_blob.name)
+@pytest.mark.xfail(reason="wait until b/469643064 is fixed")
def test_make_blob_public(test_public_blob):
storage_make_public.make_blob_public(
test_public_blob.bucket.name, test_public_blob.name
@@ -597,6 +602,7 @@ def test_create_bucket_dual_region(test_bucket_create, capsys):
assert "dual-region" in out
+@pytest.mark.skipif(IS_PYTHON_3_14, reason="b/470276398")
def test_bucket_delete_default_kms_key(test_bucket, capsys):
test_bucket.default_kms_key_name = KMS_KEY
test_bucket.patch()
@@ -620,6 +626,7 @@ def test_get_service_account(capsys):
assert "@gs-project-accounts.iam.gserviceaccount.com" in out
+@pytest.mark.xfail(reason="wait until b/469643064 is fixed")
def test_download_public_file(test_public_blob):
storage_make_public.make_blob_public(
test_public_blob.bucket.name, test_public_blob.name
@@ -644,6 +651,7 @@ def test_define_bucket_website_configuration(test_bucket):
assert bucket._properties["website"] == website_val
+@pytest.mark.skipif(IS_PYTHON_3_14, reason="b/470276398")
def test_object_get_kms_key(test_bucket):
with tempfile.NamedTemporaryFile() as source_file:
storage_upload_with_kms_key.upload_blob_with_kms(
diff --git a/setup.py b/setup.py
index 374a71cf4..b45053856 100644
--- a/setup.py
+++ b/setup.py
@@ -106,6 +106,7 @@
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Operating System :: OS Independent",
"Topic :: Internet",
],
diff --git a/testing/constraints-3.14.txt b/testing/constraints-3.14.txt
index 2ae5a677e..62739fc5d 100644
--- a/testing/constraints-3.14.txt
+++ b/testing/constraints-3.14.txt
@@ -7,7 +7,7 @@
# Then this file should have google-cloud-foo>=1
google-api-core>=2
google-auth>=2
-grpcio>=1
+grpcio>=1.75.1
proto-plus>=1
protobuf>=6
grpc-google-iam-v1>=0
diff --git a/tests/conformance/test_bidi_reads.py b/tests/conformance/test_bidi_reads.py
new file mode 100644
index 000000000..4157182cb
--- /dev/null
+++ b/tests/conformance/test_bidi_reads.py
@@ -0,0 +1,266 @@
+import asyncio
+import io
+import uuid
+import grpc
+import requests
+
+from google.api_core import exceptions
+from google.auth import credentials as auth_credentials
+from google.cloud import _storage_v2 as storage_v2
+
+from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import (
+ AsyncMultiRangeDownloader,
+)
+
+# --- Configuration ---
+PROJECT_NUMBER = "12345" # A dummy project number is fine for the testbench.
+GRPC_ENDPOINT = "localhost:8888"
+HTTP_ENDPOINT = "http://localhost:9000"
+CONTENT_LENGTH = 1024 * 10 # 10 KB
+
+
+def _is_retriable(exc):
+ """Predicate for identifying retriable errors."""
+ return isinstance(
+ exc,
+ (
+ exceptions.ServiceUnavailable,
+ exceptions.Aborted, # Required to retry on redirect
+ exceptions.InternalServerError,
+ exceptions.ResourceExhausted,
+ ),
+ )
+
+
+async def run_test_scenario(
+ gapic_client, http_client, bucket_name, object_name, scenario
+):
+ """Runs a single fault-injection test scenario."""
+ print(f"\n--- RUNNING SCENARIO: {scenario['name']} ---")
+
+ retry_test_id = None
+ try:
+ # 1. Create a Retry Test resource on the testbench.
+ retry_test_config = {
+ "instructions": {scenario["method"]: [scenario["instruction"]]},
+ "transport": "GRPC",
+ }
+ resp = http_client.post(f"{HTTP_ENDPOINT}/retry_test", json=retry_test_config)
+ resp.raise_for_status()
+ retry_test_id = resp.json()["id"]
+
+ # 2. Set up downloader and metadata for fault injection.
+ downloader = await AsyncMultiRangeDownloader.create_mrd(
+ gapic_client, bucket_name, object_name
+ )
+ fault_injection_metadata = (("x-retry-test-id", retry_test_id),)
+
+ buffer = io.BytesIO()
+
+ # 3. Execute the download and assert the outcome.
+ try:
+ await downloader.download_ranges(
+ [(0, 5 * 1024, buffer), (6 * 1024, 4 * 1024, buffer)],
+ metadata=fault_injection_metadata,
+ )
+ # If an exception was expected, this line should not be reached.
+ if scenario["expected_error"] is not None:
+ raise AssertionError(
+ f"Expected exception {scenario['expected_error']} was not raised."
+ )
+
+ assert len(buffer.getvalue()) == 9 * 1024
+
+ except scenario["expected_error"] as e:
+ print(f"Caught expected exception for {scenario['name']}: {e}")
+
+ await downloader.close()
+
+ finally:
+ # 4. Clean up the Retry Test resource.
+ if retry_test_id:
+ http_client.delete(f"{HTTP_ENDPOINT}/retry_test/{retry_test_id}")
+
+
+async def main():
+ """Main function to set up resources and run all test scenarios."""
+ channel = grpc.aio.insecure_channel(GRPC_ENDPOINT)
+ creds = auth_credentials.AnonymousCredentials()
+ transport = storage_v2.services.storage.transports.StorageGrpcAsyncIOTransport(
+ channel=channel, credentials=creds
+ )
+ gapic_client = storage_v2.StorageAsyncClient(transport=transport)
+ http_client = requests.Session()
+
+ bucket_name = f"grpc-test-bucket-{uuid.uuid4().hex[:8]}"
+ object_name = "retry-test-object"
+
+ # Define all test scenarios
+ test_scenarios = [
+ {
+ "name": "Retry on Service Unavailable (503)",
+ "method": "storage.objects.get",
+ "instruction": "return-503",
+ "expected_error": None,
+ },
+ {
+ "name": "Retry on 500",
+ "method": "storage.objects.get",
+ "instruction": "return-500",
+ "expected_error": None,
+ },
+ {
+ "name": "Retry on 504",
+ "method": "storage.objects.get",
+ "instruction": "return-504",
+ "expected_error": None,
+ },
+ {
+ "name": "Retry on 429",
+ "method": "storage.objects.get",
+ "instruction": "return-429",
+ "expected_error": None,
+ },
+ {
+ "name": "Smarter Resumption: Retry 503 after partial data",
+ "method": "storage.objects.get",
+ "instruction": "return-broken-stream-after-2K",
+ "expected_error": None,
+ },
+ {
+ "name": "Retry on BidiReadObjectRedirectedError",
+ "method": "storage.objects.get",
+ "instruction": "redirect-send-handle-and-token-tokenval", # Testbench instruction for redirect
+ "expected_error": None,
+ },
+ ]
+
+ try:
+ # Create a single bucket and object for all tests to use.
+ content = b"A" * CONTENT_LENGTH
+ bucket_resource = storage_v2.Bucket(project=f"projects/{PROJECT_NUMBER}")
+ create_bucket_request = storage_v2.CreateBucketRequest(
+ parent="projects/_", bucket_id=bucket_name, bucket=bucket_resource
+ )
+ await gapic_client.create_bucket(request=create_bucket_request)
+
+ write_spec = storage_v2.WriteObjectSpec(
+ resource=storage_v2.Object(
+ bucket=f"projects/_/buckets/{bucket_name}", name=object_name
+ )
+ )
+
+ async def write_req_gen():
+ yield storage_v2.WriteObjectRequest(
+ write_object_spec=write_spec,
+ checksummed_data={"content": content},
+ finish_write=True,
+ )
+
+ await gapic_client.write_object(requests=write_req_gen())
+
+ # Run all defined test scenarios.
+ for scenario in test_scenarios:
+ await run_test_scenario(
+ gapic_client, http_client, bucket_name, object_name, scenario
+ )
+
+ # Define and run test scenarios specifically for the open() method
+ open_test_scenarios = [
+ {
+ "name": "Open: Retry on 503",
+ "method": "storage.objects.get",
+ "instruction": "return-503",
+ "expected_error": None,
+ },
+ {
+ "name": "Open: Retry on BidiReadObjectRedirectedError",
+ "method": "storage.objects.get",
+ "instruction": "redirect-send-handle-and-token-tokenval",
+ "expected_error": None,
+ },
+ {
+ "name": "Open: Fail Fast on 401",
+ "method": "storage.objects.get",
+ "instruction": "return-401",
+ "expected_error": exceptions.Unauthorized,
+ },
+ ]
+ for scenario in open_test_scenarios:
+ await run_open_test_scenario(
+ gapic_client, http_client, bucket_name, object_name, scenario
+ )
+
+ except Exception:
+ import traceback
+
+ traceback.print_exc()
+ finally:
+ # Clean up the test bucket.
+ try:
+ delete_object_req = storage_v2.DeleteObjectRequest(
+ bucket="projects/_/buckets/" + bucket_name, object=object_name
+ )
+ await gapic_client.delete_object(request=delete_object_req)
+
+ delete_bucket_req = storage_v2.DeleteBucketRequest(
+ name=f"projects/_/buckets/{bucket_name}"
+ )
+ await gapic_client.delete_bucket(request=delete_bucket_req)
+ except Exception as e:
+ print(f"Warning: Cleanup failed: {e}")
+
+
+async def run_open_test_scenario(
+ gapic_client, http_client, bucket_name, object_name, scenario
+):
+ """Runs a fault-injection test scenario specifically for the open() method."""
+ print(f"\n--- RUNNING SCENARIO: {scenario['name']} ---")
+
+ retry_test_id = None
+ try:
+ # 1. Create a Retry Test resource on the testbench.
+ retry_test_config = {
+ "instructions": {scenario["method"]: [scenario["instruction"]]},
+ "transport": "GRPC",
+ }
+ resp = http_client.post(f"{HTTP_ENDPOINT}/retry_test", json=retry_test_config)
+ resp.raise_for_status()
+ retry_test_id = resp.json()["id"]
+ print(f"Retry Test created with ID: {retry_test_id}")
+
+ # 2. Set up metadata for fault injection.
+ fault_injection_metadata = (("x-retry-test-id", retry_test_id),)
+
+ # 3. Execute the open (via create_mrd) and assert the outcome.
+ try:
+ downloader = await AsyncMultiRangeDownloader.create_mrd(
+ gapic_client,
+ bucket_name,
+ object_name,
+ metadata=fault_injection_metadata,
+ )
+
+ # If open was successful, perform a simple download to ensure the stream is usable.
+ buffer = io.BytesIO()
+ await downloader.download_ranges([(0, 1024, buffer)])
+ await downloader.close()
+ assert len(buffer.getvalue()) == 1024
+
+ # If an exception was expected, this line should not be reached.
+ if scenario["expected_error"] is not None:
+ raise AssertionError(
+ f"Expected exception {scenario['expected_error']} was not raised."
+ )
+
+ except scenario["expected_error"] as e:
+ print(f"Caught expected exception for {scenario['name']}: {e}")
+
+ finally:
+ # 4. Clean up the Retry Test resource.
+ if retry_test_id:
+ http_client.delete(f"{HTTP_ENDPOINT}/retry_test/{retry_test_id}")
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/tests/system/test_notification.py b/tests/system/test_notification.py
index 9b631c29b..48c6c4ba8 100644
--- a/tests/system/test_notification.py
+++ b/tests/system/test_notification.py
@@ -60,13 +60,19 @@ def topic_path(storage_client, topic_name):
@pytest.fixture(scope="session")
def notification_topic(storage_client, publisher_client, topic_path, no_mtls):
_helpers.retry_429(publisher_client.create_topic)(request={"name": topic_path})
- policy = publisher_client.get_iam_policy(request={"resource": topic_path})
- binding = policy.bindings.add()
- binding.role = "roles/pubsub.publisher"
- binding.members.append(
- f"serviceAccount:{storage_client.get_service_account_email()}"
- )
- publisher_client.set_iam_policy(request={"resource": topic_path, "policy": policy})
+ try:
+ policy = publisher_client.get_iam_policy(request={"resource": topic_path})
+ binding = policy.bindings.add()
+ binding.role = "roles/pubsub.publisher"
+ binding.members.append(
+ f"serviceAccount:{storage_client.get_service_account_email()}"
+ )
+ publisher_client.set_iam_policy(
+ request={"resource": topic_path, "policy": policy}
+ )
+ yield topic_path
+ finally:
+ publisher_client.delete_topic(request={"topic": topic_path})
def test_notification_create_minimal(
diff --git a/tests/system/test_zonal.py b/tests/system/test_zonal.py
index 909b9ddf1..2ade654ad 100644
--- a/tests/system/test_zonal.py
+++ b/tests/system/test_zonal.py
@@ -4,17 +4,22 @@
from io import BytesIO
# python additional imports
+import google_crc32c
+
import pytest
+import gc
# current library imports
from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient
from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
AsyncAppendableObjectWriter,
+ _DEFAULT_FLUSH_INTERVAL_BYTES,
)
from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import (
AsyncMultiRangeDownloader,
)
+
pytestmark = pytest.mark.skipif(
os.getenv("RUN_ZONAL_SYSTEM_TESTS") != "True",
reason="Zonal system tests need to be explicitly enabled. This helps scheduling tests in Kokoro and Cloud Build.",
@@ -24,28 +29,100 @@
# TODO: replace this with a fixture once zonal bucket creation / deletion
# is supported in grpc client or json client client.
_ZONAL_BUCKET = os.getenv("ZONAL_BUCKET")
+_BYTES_TO_UPLOAD = b"dummy_bytes_to_write_read_and_delete_appendable_object"
+
+
+def _get_equal_dist(a: int, b: int) -> tuple[int, int]:
+ step = (b - a) // 3
+ return a + step, a + 2 * step
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ "object_size",
+ [
+ 256, # less than _chunk size
+ 10 * 1024 * 1024, # less than _MAX_BUFFER_SIZE_BYTES
+ 20 * 1024 * 1024, # greater than _MAX_BUFFER_SIZE
+ ],
+)
+@pytest.mark.parametrize(
+ "attempt_direct_path",
+ [True, False],
+)
+async def test_basic_wrd(
+ storage_client, blobs_to_delete, attempt_direct_path, object_size
+):
+ object_name = f"test_basic_wrd-{str(uuid.uuid4())}"
+
+ # Client instantiation; it cannot be part of fixture because.
+ # grpc_client's event loop and event loop of coroutine running it
+ # (i.e. this test) must be same.
+ # Note:
+ # 1. @pytest.mark.asyncio ensures new event loop for each test.
+ # 2. we can keep the same event loop for entire module but that may
+ # create issues if tests are run in parallel and one test hogs the event
+ # loop slowing down other tests.
+ object_data = os.urandom(object_size)
+ object_checksum = google_crc32c.value(object_data)
+ grpc_client = AsyncGrpcClient(attempt_direct_path=attempt_direct_path).grpc_client
+
+ writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name)
+ await writer.open()
+ await writer.append(object_data)
+ object_metadata = await writer.close(finalize_on_close=True)
+ assert object_metadata.size == object_size
+ assert int(object_metadata.checksums.crc32c) == object_checksum
+
+ mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name)
+ buffer = BytesIO()
+ await mrd.open()
+ # (0, 0) means read the whole object
+ await mrd.download_ranges([(0, 0, buffer)])
+ await mrd.close()
+ assert buffer.getvalue() == object_data
+ assert mrd.persisted_size == object_size
+
+ # Clean up; use json client (i.e. `storage_client` fixture) to delete.
+ blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name))
+ del writer
+ del mrd
+ gc.collect()
@pytest.mark.asyncio
-async def test_basic_wrd(storage_client, blobs_to_delete):
- bytes_to_upload = b"dummy_bytes_to_write_read_and_delete_appendable_object"
+@pytest.mark.parametrize(
+ "object_size",
+ [
+ 10, # less than _chunk size,
+ 10 * 1024 * 1024, # less than _MAX_BUFFER_SIZE_BYTES
+ 20 * 1024 * 1024, # greater than _MAX_BUFFER_SIZE_BYTES
+ ],
+)
+async def test_basic_wrd_in_slices(storage_client, blobs_to_delete, object_size):
object_name = f"test_basic_wrd-{str(uuid.uuid4())}"
# Client instantiation; it cannot be part of fixture because.
# grpc_client's event loop and event loop of coroutine running it
# (i.e. this test) must be same.
# Note:
- # 1. @pytest.mark.asyncio ensures new event for each test.
+ # 1. @pytest.mark.asyncio ensures new event loop for each test.
# 2. we can keep the same event loop for entire module but that may
# create issues if tests are run in parallel and one test hogs the event
# loop slowing down other tests.
+ object_data = os.urandom(object_size)
+ object_checksum = google_crc32c.value(object_data)
grpc_client = AsyncGrpcClient().grpc_client
writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name)
await writer.open()
- await writer.append(bytes_to_upload)
+ mark1, mark2 = _get_equal_dist(0, object_size)
+ await writer.append(object_data[0:mark1])
+ await writer.append(object_data[mark1:mark2])
+ await writer.append(object_data[mark2:])
object_metadata = await writer.close(finalize_on_close=True)
- assert object_metadata.size == len(bytes_to_upload)
+ assert object_metadata.size == object_size
+ assert int(object_metadata.checksums.crc32c) == object_checksum
mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name)
buffer = BytesIO()
@@ -53,7 +130,233 @@ async def test_basic_wrd(storage_client, blobs_to_delete):
# (0, 0) means read the whole object
await mrd.download_ranges([(0, 0, buffer)])
await mrd.close()
- assert buffer.getvalue() == bytes_to_upload
+ assert buffer.getvalue() == object_data
+ assert mrd.persisted_size == object_size
# Clean up; use json client (i.e. `storage_client` fixture) to delete.
blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name))
+ del writer
+ del mrd
+ gc.collect()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ "flush_interval",
+ [
+ 2 * 1024 * 1024,
+ 4 * 1024 * 1024,
+ 8 * 1024 * 1024,
+ _DEFAULT_FLUSH_INTERVAL_BYTES,
+ ],
+)
+async def test_wrd_with_non_default_flush_interval(
+ storage_client,
+ blobs_to_delete,
+ flush_interval,
+):
+ object_name = f"test_basic_wrd-{str(uuid.uuid4())}"
+ object_size = 9 * 1024 * 1024
+
+ # Client instantiation; it cannot be part of fixture because.
+ # grpc_client's event loop and event loop of coroutine running it
+ # (i.e. this test) must be same.
+ # Note:
+ # 1. @pytest.mark.asyncio ensures new event loop for each test.
+ # 2. we can keep the same event loop for entire module but that may
+ # create issues if tests are run in parallel and one test hogs the event
+ # loop slowing down other tests.
+ object_data = os.urandom(object_size)
+ object_checksum = google_crc32c.value(object_data)
+ grpc_client = AsyncGrpcClient().grpc_client
+
+ writer = AsyncAppendableObjectWriter(
+ grpc_client,
+ _ZONAL_BUCKET,
+ object_name,
+ writer_options={"FLUSH_INTERVAL_BYTES": flush_interval},
+ )
+ await writer.open()
+ mark1, mark2 = _get_equal_dist(0, object_size)
+ await writer.append(object_data[0:mark1])
+ await writer.append(object_data[mark1:mark2])
+ await writer.append(object_data[mark2:])
+ object_metadata = await writer.close(finalize_on_close=True)
+ assert object_metadata.size == object_size
+ assert int(object_metadata.checksums.crc32c) == object_checksum
+
+ mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name)
+ buffer = BytesIO()
+ await mrd.open()
+ # (0, 0) means read the whole object
+ await mrd.download_ranges([(0, 0, buffer)])
+ await mrd.close()
+ assert buffer.getvalue() == object_data
+ assert mrd.persisted_size == object_size
+
+ # Clean up; use json client (i.e. `storage_client` fixture) to delete.
+ blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name))
+ del writer
+ del mrd
+ gc.collect()
+
+
+@pytest.mark.asyncio
+async def test_read_unfinalized_appendable_object(storage_client, blobs_to_delete):
+ object_name = f"read_unfinalized_appendable_object-{str(uuid.uuid4())[:4]}"
+ grpc_client = AsyncGrpcClient(attempt_direct_path=True).grpc_client
+
+ writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name)
+ await writer.open()
+ await writer.append(_BYTES_TO_UPLOAD)
+ await writer.flush()
+
+ mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name)
+ buffer = BytesIO()
+ await mrd.open()
+ assert mrd.persisted_size == len(_BYTES_TO_UPLOAD)
+ # (0, 0) means read the whole object
+ await mrd.download_ranges([(0, 0, buffer)])
+ await mrd.close()
+ assert buffer.getvalue() == _BYTES_TO_UPLOAD
+
+ # Clean up; use json client (i.e. `storage_client` fixture) to delete.
+ blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name))
+ del writer
+ del mrd
+ gc.collect()
+
+
+@pytest.mark.asyncio
+async def test_mrd_open_with_read_handle():
+ grpc_client = AsyncGrpcClient().grpc_client
+ object_name = f"test_read_handl-{str(uuid.uuid4())[:4]}"
+ writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name)
+ await writer.open()
+ await writer.append(_BYTES_TO_UPLOAD)
+ await writer.close()
+
+ mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name)
+ await mrd.open()
+ read_handle = mrd.read_handle
+ await mrd.close()
+
+ # Open a new MRD using the `read_handle` obtained above
+ new_mrd = AsyncMultiRangeDownloader(
+ grpc_client, _ZONAL_BUCKET, object_name, read_handle=read_handle
+ )
+ await new_mrd.open()
+ # persisted_size not set when opened with read_handle
+ assert new_mrd.persisted_size is None
+ buffer = BytesIO()
+ await new_mrd.download_ranges([(0, 0, buffer)])
+ await new_mrd.close()
+ assert buffer.getvalue() == _BYTES_TO_UPLOAD
+ del mrd
+ del new_mrd
+ gc.collect()
+
+
+@pytest.mark.asyncio
+async def test_read_unfinalized_appendable_object_with_generation(
+ storage_client, blobs_to_delete
+):
+ object_name = f"read_unfinalized_appendable_object-{str(uuid.uuid4())[:4]}"
+ grpc_client = AsyncGrpcClient(attempt_direct_path=True).grpc_client
+
+ async def _read_and_verify(expected_content, generation=None):
+ """Helper to read object content and verify against expected."""
+ mrd = AsyncMultiRangeDownloader(
+ grpc_client, _ZONAL_BUCKET, object_name, generation
+ )
+ buffer = BytesIO()
+ await mrd.open()
+ try:
+ assert mrd.persisted_size == len(expected_content)
+ await mrd.download_ranges([(0, 0, buffer)])
+ assert buffer.getvalue() == expected_content
+ finally:
+ await mrd.close()
+ return mrd
+
+ # First write
+ writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name)
+ await writer.open()
+ await writer.append(_BYTES_TO_UPLOAD)
+ await writer.flush()
+ generation = writer.generation
+
+ # First read
+ mrd = await _read_and_verify(_BYTES_TO_UPLOAD)
+
+ # Second write, using generation from the first write.
+ writer_2 = AsyncAppendableObjectWriter(
+ grpc_client, _ZONAL_BUCKET, object_name, generation=generation
+ )
+ await writer_2.open()
+ await writer_2.append(_BYTES_TO_UPLOAD)
+ await writer_2.flush()
+
+ # Second read
+ mrd_2 = await _read_and_verify(_BYTES_TO_UPLOAD + _BYTES_TO_UPLOAD, generation)
+
+ # Clean up
+ blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name))
+ del writer
+ del writer_2
+ del mrd
+ del mrd_2
+ gc.collect()
+
+
+@pytest.mark.asyncio
+async def test_append_flushes_and_state_lookup(storage_client, blobs_to_delete):
+ """
+ System test for AsyncAppendableObjectWriter, verifying flushing behavior
+ for both small and large appends.
+ """
+ object_name = f"test-append-flush-varied-size-{uuid.uuid4()}"
+ grpc_client = AsyncGrpcClient().grpc_client
+ writer = AsyncAppendableObjectWriter(grpc_client, _ZONAL_BUCKET, object_name)
+
+ # Schedule for cleanup
+ blobs_to_delete.append(storage_client.bucket(_ZONAL_BUCKET).blob(object_name))
+
+ # --- Part 1: Test with small data ---
+ small_data = b"small data"
+
+ await writer.open()
+ assert writer._is_stream_open
+
+ await writer.append(small_data)
+ persisted_size = await writer.state_lookup()
+ assert persisted_size == len(small_data)
+
+ # --- Part 2: Test with large data ---
+ large_data = os.urandom(38 * 1024 * 1024)
+
+ # Append data larger than the default flush interval (16 MiB).
+ # This should trigger the interval-based flushing logic.
+ await writer.append(large_data)
+
+ # Verify the total data has been persisted.
+ total_size = len(small_data) + len(large_data)
+ persisted_size = await writer.state_lookup()
+ assert persisted_size == total_size
+
+ # --- Part 3: Finalize and verify ---
+ final_object = await writer.close(finalize_on_close=True)
+
+ assert not writer._is_stream_open
+ assert final_object.size == total_size
+
+ # Verify the full content of the object.
+ full_data = small_data + large_data
+ mrd = AsyncMultiRangeDownloader(grpc_client, _ZONAL_BUCKET, object_name)
+ buffer = BytesIO()
+ await mrd.open()
+ # (0, 0) means read the whole object
+ await mrd.download_ranges([(0, 0, buffer)])
+ await mrd.close()
+ content = buffer.getvalue()
+ assert content == full_data
diff --git a/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py
new file mode 100644
index 000000000..6c837ec5c
--- /dev/null
+++ b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py
@@ -0,0 +1,156 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import pytest
+from google.api_core import exceptions
+from google.api_core.retry_async import AsyncRetry
+
+from google.cloud.storage._experimental.asyncio.retry import (
+ bidi_stream_retry_manager as manager,
+)
+from google.cloud.storage._experimental.asyncio.retry import base_strategy
+
+
+def _is_retriable(exc):
+ return isinstance(exc, exceptions.ServiceUnavailable)
+
+
+DEFAULT_TEST_RETRY = AsyncRetry(predicate=_is_retriable, deadline=1)
+
+
+class TestBidiStreamRetryManager:
+ @pytest.mark.asyncio
+ async def test_execute_success_on_first_try(self):
+ mock_strategy = mock.AsyncMock(spec=base_strategy._BaseResumptionStrategy)
+
+ async def mock_send_and_recv(*args, **kwargs):
+ yield "response_1"
+
+ retry_manager = manager._BidiStreamRetryManager(
+ strategy=mock_strategy, send_and_recv=mock_send_and_recv
+ )
+ await retry_manager.execute(initial_state={}, retry_policy=DEFAULT_TEST_RETRY)
+ mock_strategy.generate_requests.assert_called_once()
+ mock_strategy.update_state_from_response.assert_called_once_with(
+ "response_1", {}
+ )
+ mock_strategy.recover_state_on_failure.assert_not_called()
+
+ @pytest.mark.asyncio
+ async def test_execute_success_on_empty_stream(self):
+ mock_strategy = mock.AsyncMock(spec=base_strategy._BaseResumptionStrategy)
+
+ async def mock_send_and_recv(*args, **kwargs):
+ if False:
+ yield
+
+ retry_manager = manager._BidiStreamRetryManager(
+ strategy=mock_strategy, send_and_recv=mock_send_and_recv
+ )
+ await retry_manager.execute(initial_state={}, retry_policy=DEFAULT_TEST_RETRY)
+
+ mock_strategy.generate_requests.assert_called_once()
+ mock_strategy.update_state_from_response.assert_not_called()
+ mock_strategy.recover_state_on_failure.assert_not_called()
+
+ @pytest.mark.asyncio
+ async def test_execute_retries_on_initial_failure_and_succeeds(self):
+ mock_strategy = mock.AsyncMock(spec=base_strategy._BaseResumptionStrategy)
+ attempt_count = 0
+
+ async def mock_send_and_recv(*args, **kwargs):
+ nonlocal attempt_count
+ attempt_count += 1
+ if attempt_count == 1:
+ raise exceptions.ServiceUnavailable("Service is down")
+ else:
+ yield "response_2"
+
+ retry_manager = manager._BidiStreamRetryManager(
+ strategy=mock_strategy, send_and_recv=mock_send_and_recv
+ )
+ retry_policy = AsyncRetry(predicate=_is_retriable, initial=0.01)
+
+ with mock.patch("asyncio.sleep", new_callable=mock.AsyncMock):
+ await retry_manager.execute(initial_state={}, retry_policy=retry_policy)
+
+ assert attempt_count == 2
+ assert mock_strategy.generate_requests.call_count == 2
+ mock_strategy.recover_state_on_failure.assert_called_once()
+ mock_strategy.update_state_from_response.assert_called_once_with(
+ "response_2", {}
+ )
+
+ @pytest.mark.asyncio
+ async def test_execute_retries_and_succeeds_mid_stream(self):
+ """Test retry logic for a stream that fails after yielding some data."""
+ mock_strategy = mock.AsyncMock(spec=base_strategy._BaseResumptionStrategy)
+ attempt_count = 0
+ # Use a list to simulate stream content for each attempt
+ stream_content = [
+ ["response_1", exceptions.ServiceUnavailable("Service is down")],
+ ["response_2"],
+ ]
+
+ async def mock_send_and_recv(*args, **kwargs):
+ nonlocal attempt_count
+ content = stream_content[attempt_count]
+ attempt_count += 1
+ for item in content:
+ if isinstance(item, Exception):
+ raise item
+ else:
+ yield item
+
+ retry_manager = manager._BidiStreamRetryManager(
+ strategy=mock_strategy, send_and_recv=mock_send_and_recv
+ )
+ retry_policy = AsyncRetry(predicate=_is_retriable, initial=0.01)
+
+ with mock.patch("asyncio.sleep", new_callable=mock.AsyncMock) as mock_sleep:
+ await retry_manager.execute(initial_state={}, retry_policy=retry_policy)
+
+ assert attempt_count == 2
+ mock_sleep.assert_called_once()
+
+ assert mock_strategy.generate_requests.call_count == 2
+ mock_strategy.recover_state_on_failure.assert_called_once()
+ assert mock_strategy.update_state_from_response.call_count == 2
+ mock_strategy.update_state_from_response.assert_has_calls(
+ [
+ mock.call("response_1", {}),
+ mock.call("response_2", {}),
+ ]
+ )
+
+ @pytest.mark.asyncio
+ async def test_execute_fails_immediately_on_non_retriable_error(self):
+ mock_strategy = mock.AsyncMock(spec=base_strategy._BaseResumptionStrategy)
+
+ async def mock_send_and_recv(*args, **kwargs):
+ if False:
+ yield
+ raise exceptions.PermissionDenied("Auth error")
+
+ retry_manager = manager._BidiStreamRetryManager(
+ strategy=mock_strategy, send_and_recv=mock_send_and_recv
+ )
+ with pytest.raises(exceptions.PermissionDenied):
+ await retry_manager.execute(
+ initial_state={}, retry_policy=DEFAULT_TEST_RETRY
+ )
+
+ mock_strategy.recover_state_on_failure.assert_not_called()
diff --git a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py
index e6b343f86..62a05f19a 100644
--- a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py
+++ b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py
@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import asyncio
import io
import unittest
-import pytest
+from google_crc32c import Checksum
from google.cloud.storage.exceptions import DataCorruption
from google.api_core import exceptions
@@ -26,6 +27,9 @@
from google.cloud._storage_v2.types.storage import BidiReadObjectRedirectedError
_READ_ID = 1
+LOGGER_NAME = (
+ "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy"
+)
class TestDownloadState(unittest.TestCase):
@@ -45,14 +49,67 @@ def test_initialization(self):
class TestReadResumptionStrategy(unittest.TestCase):
+ def setUp(self):
+ self.strategy = _ReadResumptionStrategy()
+
+ self.state = {"download_states": {}, "read_handle": None, "routing_token": None}
+
+ def _add_download(self, read_id, offset=0, length=100, buffer=None):
+ """Helper to inject a download state into the correct nested location."""
+ if buffer is None:
+ buffer = io.BytesIO()
+ state = _DownloadState(
+ initial_offset=offset, initial_length=length, user_buffer=buffer
+ )
+ self.state["download_states"][read_id] = state
+ return state
+
+ def _create_response(
+ self,
+ content,
+ read_id,
+ offset,
+ crc=None,
+ range_end=False,
+ handle=None,
+ has_read_range=True,
+ ):
+ """Helper to create a response object."""
+ checksummed_data = None
+ if content is not None:
+ if crc is None:
+ c = Checksum(content)
+ crc = int.from_bytes(c.digest(), "big")
+ checksummed_data = storage_v2.ChecksummedData(content=content, crc32c=crc)
+
+ read_range = None
+ if has_read_range:
+ read_range = storage_v2.ReadRange(read_id=read_id, read_offset=offset)
+
+ read_handle_message = None
+ if handle:
+ read_handle_message = storage_v2.BidiReadHandle(handle=handle)
+ self.state["read_handle"] = handle
+
+ return storage_v2.BidiReadObjectResponse(
+ object_data_ranges=[
+ storage_v2.ObjectRangeData(
+ checksummed_data=checksummed_data,
+ read_range=read_range,
+ range_end=range_end,
+ )
+ ],
+ read_handle=read_handle_message,
+ )
+
+ # --- Request Generation Tests ---
+
def test_generate_requests_single_incomplete(self):
"""Test generating a request for a single incomplete download."""
- read_state = _DownloadState(0, 100, io.BytesIO())
+ read_state = self._add_download(_READ_ID, offset=0, length=100)
read_state.bytes_written = 20
- state = {_READ_ID: read_state}
- read_strategy = _ReadResumptionStrategy()
- requests = read_strategy.generate_requests(state)
+ requests = self.strategy.generate_requests(self.state)
self.assertEqual(len(requests), 1)
self.assertEqual(requests[0].read_offset, 20)
@@ -62,173 +119,244 @@ def test_generate_requests_single_incomplete(self):
def test_generate_requests_multiple_incomplete(self):
"""Test generating requests for multiple incomplete downloads."""
read_id2 = 2
- read_state1 = _DownloadState(0, 100, io.BytesIO())
- read_state1.bytes_written = 50
- read_state2 = _DownloadState(200, 100, io.BytesIO())
- state = {_READ_ID: read_state1, read_id2: read_state2}
+ rs1 = self._add_download(_READ_ID, offset=0, length=100)
+ rs1.bytes_written = 50
- read_strategy = _ReadResumptionStrategy()
- requests = read_strategy.generate_requests(state)
+ self._add_download(read_id2, offset=200, length=100)
+
+ requests = self.strategy.generate_requests(self.state)
self.assertEqual(len(requests), 2)
- req1 = next(request for request in requests if request.read_id == _READ_ID)
- req2 = next(request for request in requests if request.read_id == read_id2)
+ requests.sort(key=lambda r: r.read_id)
+
+ req1 = requests[0]
+ req2 = requests[1]
+ self.assertEqual(req1.read_id, _READ_ID)
self.assertEqual(req1.read_offset, 50)
self.assertEqual(req1.read_length, 50)
+
+ self.assertEqual(req2.read_id, read_id2)
self.assertEqual(req2.read_offset, 200)
self.assertEqual(req2.read_length, 100)
+ def test_generate_requests_read_to_end_resumption(self):
+ """Test resumption for 'read to end' (length=0) requests."""
+ read_state = self._add_download(_READ_ID, offset=0, length=0)
+ read_state.bytes_written = 500
+
+ requests = self.strategy.generate_requests(self.state)
+
+ self.assertEqual(len(requests), 1)
+ self.assertEqual(requests[0].read_offset, 500)
+ self.assertEqual(requests[0].read_length, 0)
+
def test_generate_requests_with_complete(self):
"""Test that no request is generated for a completed download."""
- read_state = _DownloadState(0, 100, io.BytesIO())
+ read_state = self._add_download(_READ_ID)
read_state.is_complete = True
- state = {_READ_ID: read_state}
-
- read_strategy = _ReadResumptionStrategy()
- requests = read_strategy.generate_requests(state)
+ requests = self.strategy.generate_requests(self.state)
self.assertEqual(len(requests), 0)
+ def test_generate_requests_multiple_mixed_states(self):
+ """Test generating requests with mixed complete, partial, and fresh states."""
+ s1 = self._add_download(1, length=100)
+ s1.is_complete = True
+
+ s2 = self._add_download(2, offset=0, length=100)
+ s2.bytes_written = 50
+
+ s3 = self._add_download(3, offset=200, length=100)
+ s3.bytes_written = 0
+
+ requests = self.strategy.generate_requests(self.state)
+
+ self.assertEqual(len(requests), 2)
+ requests.sort(key=lambda r: r.read_id)
+
+ self.assertEqual(requests[0].read_id, 2)
+ self.assertEqual(requests[1].read_id, 3)
+
def test_generate_requests_empty_state(self):
"""Test generating requests with an empty state."""
- read_strategy = _ReadResumptionStrategy()
- requests = read_strategy.generate_requests({})
+ requests = self.strategy.generate_requests(self.state)
self.assertEqual(len(requests), 0)
+ # --- Update State and response processing Tests ---
+
def test_update_state_processes_single_chunk_successfully(self):
"""Test updating state from a successful response."""
- buffer = io.BytesIO()
- read_state = _DownloadState(0, 100, buffer)
- state = {_READ_ID: read_state}
+ read_state = self._add_download(_READ_ID, offset=0, length=100)
data = b"test_data"
- read_strategy = _ReadResumptionStrategy()
- response = storage_v2.BidiReadObjectResponse(
- object_data_ranges=[
- storage_v2.types.ObjectRangeData(
- read_range=storage_v2.ReadRange(
- read_id=_READ_ID, read_offset=0, read_length=len(data)
- ),
- checksummed_data=storage_v2.ChecksummedData(content=data),
- )
- ]
- )
+ response = self._create_response(data, _READ_ID, offset=0)
- read_strategy.update_state_from_response(response, state)
+ self.strategy.update_state_from_response(response, self.state)
self.assertEqual(read_state.bytes_written, len(data))
self.assertEqual(read_state.next_expected_offset, len(data))
self.assertFalse(read_state.is_complete)
- self.assertEqual(buffer.getvalue(), data)
+ self.assertEqual(read_state.user_buffer.getvalue(), data)
+
+ def test_update_state_accumulates_chunks(self):
+ """Verify that state updates correctly over multiple chunks."""
+ read_state = self._add_download(_READ_ID, offset=0, length=8)
+
+ resp1 = self._create_response(b"test", _READ_ID, offset=0)
+ self.strategy.update_state_from_response(resp1, self.state)
+
+ self.assertEqual(read_state.bytes_written, 4)
+ self.assertEqual(read_state.user_buffer.getvalue(), b"test")
+
+ resp2 = self._create_response(b"data", _READ_ID, offset=4, range_end=True)
+ self.strategy.update_state_from_response(resp2, self.state)
+
+ self.assertEqual(read_state.bytes_written, 8)
+ self.assertTrue(read_state.is_complete)
+ self.assertEqual(read_state.user_buffer.getvalue(), b"testdata")
+
+ def test_update_state_captures_read_handle(self):
+ """Verify read_handle is extracted from the response."""
+ self._add_download(_READ_ID)
+
+ new_handle = b"optimized_handle"
+ response = self._create_response(b"data", _READ_ID, 0, handle=new_handle)
- def test_update_state_from_response_offset_mismatch(self):
+ self.strategy.update_state_from_response(response, self.state)
+ self.assertEqual(self.state["read_handle"].handle, new_handle)
+
+ def test_update_state_unknown_id(self):
+ """Verify we ignore data for IDs not in our tracking state."""
+ self._add_download(_READ_ID)
+ response = self._create_response(b"ghost", read_id=999, offset=0)
+
+ self.strategy.update_state_from_response(response, self.state)
+ self.assertEqual(self.state["download_states"][_READ_ID].bytes_written, 0)
+
+ def test_update_state_missing_read_range(self):
+ """Verify we ignore ranges without read_range metadata."""
+ response = self._create_response(b"data", _READ_ID, 0, has_read_range=False)
+ self.strategy.update_state_from_response(response, self.state)
+
+ def test_update_state_offset_mismatch(self):
"""Test that an offset mismatch raises DataCorruption."""
- read_state = _DownloadState(0, 100, io.BytesIO())
+ read_state = self._add_download(_READ_ID, offset=0)
read_state.next_expected_offset = 10
- state = {_READ_ID: read_state}
- read_strategy = _ReadResumptionStrategy()
- response = storage_v2.BidiReadObjectResponse(
- object_data_ranges=[
- storage_v2.types.ObjectRangeData(
- read_range=storage_v2.ReadRange(
- read_id=_READ_ID, read_offset=0, read_length=4
- ),
- checksummed_data=storage_v2.ChecksummedData(content=b"data"),
- )
- ]
- )
+ response = self._create_response(b"data", _READ_ID, offset=0)
- with pytest.raises(DataCorruption) as exc_info:
- read_strategy.update_state_from_response(response, state)
- assert "Offset mismatch" in str(exc_info.value)
+ with self.assertRaisesRegex(DataCorruption, "Offset mismatch"):
+ self.strategy.update_state_from_response(response, self.state)
- def test_update_state_from_response_final_byte_count_mismatch(self):
- """Test that a final byte count mismatch raises DataCorruption."""
- read_state = _DownloadState(0, 100, io.BytesIO())
- state = {_READ_ID: read_state}
- read_strategy = _ReadResumptionStrategy()
+ def test_update_state_checksum_mismatch(self):
+ """Test that a CRC32C mismatch raises DataCorruption."""
+ self._add_download(_READ_ID)
+ response = self._create_response(b"data", _READ_ID, offset=0, crc=999999)
- response = storage_v2.BidiReadObjectResponse(
- object_data_ranges=[
- storage_v2.types.ObjectRangeData(
- read_range=storage_v2.ReadRange(
- read_id=_READ_ID, read_offset=0, read_length=4
- ),
- checksummed_data=storage_v2.ChecksummedData(content=b"data"),
- range_end=True,
- )
- ]
- )
+ with self.assertRaisesRegex(DataCorruption, "Checksum mismatch"):
+ self.strategy.update_state_from_response(response, self.state)
+
+ def test_update_state_final_byte_count_mismatch(self):
+ """Test mismatch between expected length and actual bytes written on completion."""
+ self._add_download(_READ_ID, length=100)
+
+ data = b"data" * 30
+ response = self._create_response(data, _READ_ID, offset=0, range_end=True)
- with pytest.raises(DataCorruption) as exc_info:
- read_strategy.update_state_from_response(response, state)
- assert "Byte count mismatch" in str(exc_info.value)
+ with self.assertRaisesRegex(DataCorruption, "Byte count mismatch"):
+ self.strategy.update_state_from_response(response, self.state)
- def test_update_state_from_response_completes_download(self):
+ def test_update_state_completes_download(self):
"""Test that the download is marked complete on range_end."""
- buffer = io.BytesIO()
data = b"test_data"
- read_state = _DownloadState(0, len(data), buffer)
- state = {_READ_ID: read_state}
- read_strategy = _ReadResumptionStrategy()
+ read_state = self._add_download(_READ_ID, length=len(data))
- response = storage_v2.BidiReadObjectResponse(
- object_data_ranges=[
- storage_v2.types.ObjectRangeData(
- read_range=storage_v2.ReadRange(
- read_id=_READ_ID, read_offset=0, read_length=len(data)
- ),
- checksummed_data=storage_v2.ChecksummedData(content=data),
- range_end=True,
- )
- ]
- )
+ response = self._create_response(data, _READ_ID, offset=0, range_end=True)
- read_strategy.update_state_from_response(response, state)
+ self.strategy.update_state_from_response(response, self.state)
self.assertTrue(read_state.is_complete)
self.assertEqual(read_state.bytes_written, len(data))
- self.assertEqual(buffer.getvalue(), data)
- def test_update_state_from_response_completes_download_zero_length(self):
+ def test_update_state_completes_download_zero_length(self):
"""Test completion for a download with initial_length of 0."""
- buffer = io.BytesIO()
+ read_state = self._add_download(_READ_ID, length=0)
data = b"test_data"
- read_state = _DownloadState(0, 0, buffer)
- state = {_READ_ID: read_state}
- read_strategy = _ReadResumptionStrategy()
- response = storage_v2.BidiReadObjectResponse(
- object_data_ranges=[
- storage_v2.types.ObjectRangeData(
- read_range=storage_v2.ReadRange(
- read_id=_READ_ID, read_offset=0, read_length=len(data)
- ),
- checksummed_data=storage_v2.ChecksummedData(content=data),
- range_end=True,
- )
- ]
- )
+ response = self._create_response(data, _READ_ID, offset=0, range_end=True)
- read_strategy.update_state_from_response(response, state)
+ self.strategy.update_state_from_response(response, self.state)
self.assertTrue(read_state.is_complete)
self.assertEqual(read_state.bytes_written, len(data))
- async def test_recover_state_on_failure_handles_redirect(self):
+ def test_update_state_zero_byte_file(self):
+ """Test downloading a completely empty file."""
+ read_state = self._add_download(_READ_ID, length=0)
+
+ response = self._create_response(b"", _READ_ID, offset=0, range_end=True)
+
+ self.strategy.update_state_from_response(response, self.state)
+
+ self.assertTrue(read_state.is_complete)
+ self.assertEqual(read_state.bytes_written, 0)
+ self.assertEqual(read_state.user_buffer.getvalue(), b"")
+
+ def test_update_state_missing_read_range_logs_warning(self):
+ """Verify we log a warning and continue when read_range is missing."""
+ response = self._create_response(b"data", _READ_ID, 0, has_read_range=False)
+
+ # assertLogs captures logs for the given logger name and minimum level
+ with self.assertLogs(LOGGER_NAME, level="WARNING") as cm:
+ self.strategy.update_state_from_response(response, self.state)
+
+ self.assertTrue(
+ any("missing read_range field" in output for output in cm.output)
+ )
+
+ def test_update_state_unknown_id_logs_warning(self):
+ """Verify we log a warning and continue when read_id is unknown."""
+ unknown_id = 999
+ self._add_download(_READ_ID)
+ response = self._create_response(b"ghost", read_id=unknown_id, offset=0)
+
+ with self.assertLogs(LOGGER_NAME, level="WARNING") as cm:
+ self.strategy.update_state_from_response(response, self.state)
+
+ self.assertTrue(
+ any(
+ f"unknown or stale read_id {unknown_id}" in output
+ for output in cm.output
+ )
+ )
+
+ # --- Recovery Tests ---
+
+ def test_recover_state_on_failure_handles_redirect(self):
"""Verify recover_state_on_failure correctly extracts routing_token."""
- strategy = _ReadResumptionStrategy()
+ token = "dummy-routing-token"
+ redirect_error = BidiReadObjectRedirectedError(routing_token=token)
+ final_error = exceptions.Aborted("Retry failed", errors=[redirect_error])
+
+ async def run():
+ await self.strategy.recover_state_on_failure(final_error, self.state)
+
+ asyncio.new_event_loop().run_until_complete(run())
+
+ self.assertEqual(self.state["routing_token"], token)
- state = {}
- self.assertIsNone(state.get("routing_token"))
+ def test_recover_state_ignores_standard_errors(self):
+ """Verify that non-redirect errors do not corrupt the routing token."""
+ self.state["routing_token"] = "existing-token"
- dummy_token = "dummy-routing-token"
- redirect_error = BidiReadObjectRedirectedError(routing_token=dummy_token)
+ std_error = exceptions.ServiceUnavailable("Maintenance")
+ final_error = exceptions.RetryError("Retry failed", cause=std_error)
- final_error = exceptions.RetryError("Retry failed", cause=redirect_error)
+ async def run():
+ await self.strategy.recover_state_on_failure(final_error, self.state)
- await strategy.recover_state_on_failure(final_error, state)
+ asyncio.new_event_loop().run_until_complete(run())
- self.assertEqual(state.get("routing_token"), dummy_token)
+ # Token should remain unchanged
+ self.assertEqual(self.state["routing_token"], "existing-token")
diff --git a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py
new file mode 100644
index 000000000..7d8b7934e
--- /dev/null
+++ b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py
@@ -0,0 +1,290 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+import unittest
+import unittest.mock as mock
+from datetime import datetime
+
+import pytest
+import google_crc32c
+
+from google.cloud._storage_v2.types import storage as storage_type
+from google.cloud.storage._experimental.asyncio.retry.writes_resumption_strategy import (
+ _WriteState,
+ _WriteResumptionStrategy,
+)
+from google.cloud._storage_v2.types.storage import BidiWriteObjectRedirectedError
+
+
+class TestWriteResumptionStrategy(unittest.TestCase):
+ def _get_target_class(self):
+ return _WriteResumptionStrategy
+
+ def _make_one(self, *args, **kwargs):
+ return self._get_target_class()(*args, **kwargs)
+
+ def test_ctor(self):
+ strategy = self._make_one()
+ self.assertIsInstance(strategy, self._get_target_class())
+
+ def test_generate_requests_initial_new_object(self):
+ """
+ Verify the initial request sequence for a new upload (WriteObjectSpec).
+ """
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"0123456789")
+ # Use WriteObjectSpec for new objects
+ mock_spec = storage_type.WriteObjectSpec(
+ resource=storage_type.Object(name="test-object")
+ )
+ state = {
+ "write_state": _WriteState(
+ mock_spec, chunk_size=4, user_buffer=mock_buffer
+ ),
+ }
+
+ requests = list(strategy.generate_requests(state))
+
+ # Check first request (Spec)
+ self.assertEqual(requests[0].write_object_spec, mock_spec)
+ self.assertFalse(requests[0].state_lookup)
+
+ # Check data chunks
+ self.assertEqual(requests[1].write_offset, 0)
+ self.assertEqual(requests[1].checksummed_data.content, b"0123")
+ self.assertEqual(requests[2].write_offset, 4)
+ self.assertEqual(requests[2].checksummed_data.content, b"4567")
+ self.assertEqual(requests[3].write_offset, 8)
+ self.assertEqual(requests[3].checksummed_data.content, b"89")
+
+ # Total requests: 1 Spec + 3 Chunks
+ self.assertEqual(len(requests), 4)
+
+ def test_generate_requests_initial_existing_object(self):
+ """
+ Verify the initial request sequence for appending to an existing object (AppendObjectSpec).
+ """
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"0123")
+ # Use AppendObjectSpec for existing objects
+ mock_spec = storage_type.AppendObjectSpec(
+ object_="test-object", bucket="test-bucket"
+ )
+ state = {
+ "write_state": _WriteState(
+ mock_spec, chunk_size=4, user_buffer=mock_buffer
+ ),
+ }
+
+ requests = list(strategy.generate_requests(state))
+
+ # Check first request (Spec)
+ self.assertEqual(requests[0].append_object_spec, mock_spec)
+ self.assertFalse(requests[0].state_lookup)
+
+ # Check data chunk
+ self.assertEqual(requests[1].write_offset, 0)
+ self.assertEqual(requests[1].checksummed_data.content, b"0123")
+
+ def test_generate_requests_empty_file(self):
+ """
+ Verify request sequence for an empty file. Should just be the Spec.
+ """
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"")
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+ state = {
+ "write_state": _WriteState(
+ mock_spec, chunk_size=4, user_buffer=mock_buffer
+ ),
+ }
+
+ requests = list(strategy.generate_requests(state))
+
+ self.assertEqual(len(requests), 1)
+ self.assertEqual(requests[0].append_object_spec, mock_spec)
+
+ def test_generate_requests_resumption(self):
+ """
+ Verify request sequence when resuming an upload.
+ """
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"0123456789")
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+
+ write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer)
+ write_state.persisted_size = 4
+ write_state.bytes_sent = 4
+ write_state.write_handle = storage_type.BidiWriteHandle(handle=b"test-handle")
+ mock_buffer.seek(4)
+
+ state = {"write_state": write_state}
+
+ requests = list(strategy.generate_requests(state))
+
+ # Check first request has handle and lookup
+ self.assertEqual(
+ requests[0].append_object_spec.write_handle.handle, b"test-handle"
+ )
+
+ # Check data starts from offset 4
+ self.assertEqual(requests[1].write_offset, 4)
+ self.assertEqual(requests[1].checksummed_data.content, b"4567")
+ self.assertEqual(requests[2].write_offset, 8)
+ self.assertEqual(requests[2].checksummed_data.content, b"89")
+
+ @pytest.mark.asyncio
+ async def test_generate_requests_after_failure_and_recovery(self):
+ """
+ Verify recovery and resumption flow.
+ """
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"0123456789abcdef")
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+ state = {
+ "write_state": _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer)
+ }
+ write_state = state["write_state"]
+
+ write_state.bytes_sent = 8
+ mock_buffer.seek(8)
+
+ strategy.update_state_from_response(
+ storage_type.BidiWriteObjectResponse(
+ persisted_size=4,
+ write_handle=storage_type.BidiWriteHandle(handle=b"handle-1"),
+ ),
+ state,
+ )
+
+ await strategy.recover_state_on_failure(Exception("network error"), state)
+
+ self.assertEqual(mock_buffer.tell(), 4)
+ self.assertEqual(write_state.bytes_sent, 4)
+
+ requests = list(strategy.generate_requests(state))
+
+ self.assertTrue(requests[0].state_lookup)
+ self.assertEqual(
+ requests[0].append_object_spec.write_handle.handle, b"handle-1"
+ )
+
+ self.assertEqual(requests[1].write_offset, 4)
+ self.assertEqual(requests[1].checksummed_data.content, b"4567")
+
+ def test_update_state_from_response(self):
+ """Verify state updates from server responses."""
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"0123456789")
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+ state = {
+ "write_state": _WriteState(
+ mock_spec, chunk_size=4, user_buffer=mock_buffer
+ ),
+ }
+ write_state = state["write_state"]
+
+ response1 = storage_type.BidiWriteObjectResponse(
+ write_handle=storage_type.BidiWriteHandle(handle=b"handle-1")
+ )
+ strategy.update_state_from_response(response1, state)
+ self.assertEqual(write_state.write_handle.handle, b"handle-1")
+
+ response2 = storage_type.BidiWriteObjectResponse(persisted_size=1024)
+ strategy.update_state_from_response(response2, state)
+ self.assertEqual(write_state.persisted_size, 1024)
+
+ final_resource = storage_type.Object(
+ name="test-object", bucket="b", size=2048, finalize_time=datetime.now()
+ )
+ response3 = storage_type.BidiWriteObjectResponse(resource=final_resource)
+ strategy.update_state_from_response(response3, state)
+ self.assertEqual(write_state.persisted_size, 2048)
+ self.assertTrue(write_state.is_finalized)
+
+ @pytest.mark.asyncio
+ async def test_recover_state_on_failure_handles_redirect(self):
+ """
+ Verify redirection error handling.
+ """
+ strategy = self._make_one()
+ mock_buffer = mock.MagicMock(spec=io.BytesIO)
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+
+ write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer)
+ state = {"write_state": write_state}
+
+ redirect_error = BidiWriteObjectRedirectedError(routing_token="new-token-123")
+ wrapped_error = Exception("RPC error")
+ wrapped_error.cause = redirect_error
+
+ await strategy.recover_state_on_failure(wrapped_error, state)
+
+ self.assertEqual(write_state.routing_token, "new-token-123")
+ mock_buffer.seek.assert_called_with(0)
+
+ @pytest.mark.asyncio
+ async def test_recover_state_on_failure_handles_redirect_with_handle(self):
+ """Verify redirection that includes a write handle."""
+ strategy = self._make_one()
+ mock_buffer = mock.MagicMock(spec=io.BytesIO)
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+
+ write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer)
+ state = {"write_state": write_state}
+
+ redirect_error = BidiWriteObjectRedirectedError(
+ routing_token="new-token-456", write_handle=b"redirect-handle"
+ )
+ wrapped_error = Exception("RPC error")
+ wrapped_error.cause = redirect_error
+
+ await strategy.recover_state_on_failure(wrapped_error, state)
+
+ self.assertEqual(write_state.routing_token, "new-token-456")
+ self.assertEqual(write_state.write_handle, b"redirect-handle")
+
+ mock_buffer.seek.assert_called_with(0)
+
+ def test_generate_requests_sends_crc32c_checksum(self):
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"0123")
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+ state = {
+ "write_state": _WriteState(
+ mock_spec, chunk_size=4, user_buffer=mock_buffer
+ ),
+ }
+
+ requests = list(strategy.generate_requests(state))
+
+ expected_crc = google_crc32c.Checksum(b"0123")
+ expected_crc_int = int.from_bytes(expected_crc.digest(), "big")
+ self.assertEqual(requests[1].checksummed_data.crc32c, expected_crc_int)
+
+ def test_generate_requests_with_routing_token(self):
+ strategy = self._make_one()
+ mock_buffer = io.BytesIO(b"")
+ mock_spec = storage_type.AppendObjectSpec(object_="test-object")
+
+ write_state = _WriteState(mock_spec, chunk_size=4, user_buffer=mock_buffer)
+ write_state.routing_token = "redirected-token"
+ state = {"write_state": write_state}
+
+ requests = list(strategy.generate_requests(state))
+
+ self.assertEqual(
+ requests[0].append_object_spec.routing_token, "redirected-token"
+ )
diff --git a/tests/unit/asyncio/test_async_appendable_object_writer.py b/tests/unit/asyncio/test_async_appendable_object_writer.py
index a75824f8b..07f7047d8 100644
--- a/tests/unit/asyncio/test_async_appendable_object_writer.py
+++ b/tests/unit/asyncio/test_async_appendable_object_writer.py
@@ -12,12 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from io import BytesIO
import pytest
from unittest import mock
+from google_crc32c import Checksum
+
+from google.api_core import exceptions
from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
AsyncAppendableObjectWriter,
)
+from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
+ _MAX_CHUNK_SIZE_BYTES,
+ _DEFAULT_FLUSH_INTERVAL_BYTES,
+)
from google.cloud import _storage_v2
@@ -26,6 +34,7 @@
GENERATION = 123
WRITE_HANDLE = b"test-write-handle"
PERSISTED_SIZE = 456
+EIGHT_MIB = 8 * 1024 * 1024
@pytest.fixture
@@ -49,6 +58,7 @@ def test_init(mock_write_object_stream, mock_client):
assert not writer._is_stream_open
assert writer.offset is None
assert writer.persisted_size is None
+ assert writer.bytes_appended_since_last_flush == 0
mock_write_object_stream.assert_called_once_with(
client=mock_client,
@@ -75,6 +85,7 @@ def test_init_with_optional_args(mock_write_object_stream, mock_client):
assert writer.generation == GENERATION
assert writer.write_handle == WRITE_HANDLE
+ assert writer.bytes_appended_since_last_flush == 0
mock_write_object_stream.assert_called_once_with(
client=mock_client,
@@ -85,6 +96,77 @@ def test_init_with_optional_args(mock_write_object_stream, mock_client):
)
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
+)
+def test_init_with_writer_options(mock_write_object_stream, mock_client):
+ """Test the constructor with optional arguments."""
+ writer = AsyncAppendableObjectWriter(
+ mock_client,
+ BUCKET,
+ OBJECT,
+ writer_options={"FLUSH_INTERVAL_BYTES": EIGHT_MIB},
+ )
+
+ assert writer.flush_interval == EIGHT_MIB
+ assert writer.bytes_appended_since_last_flush == 0
+
+ mock_write_object_stream.assert_called_once_with(
+ client=mock_client,
+ bucket_name=BUCKET,
+ object_name=OBJECT,
+ generation_number=None,
+ write_handle=None,
+ )
+
+
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
+)
+def test_init_with_flush_interval_less_than_chunk_size_raises_error(mock_client):
+ """Test that an OutOfRange error is raised if flush_interval is less than the chunk size."""
+
+ with pytest.raises(exceptions.OutOfRange):
+ AsyncAppendableObjectWriter(
+ mock_client,
+ BUCKET,
+ OBJECT,
+ writer_options={"FLUSH_INTERVAL_BYTES": _MAX_CHUNK_SIZE_BYTES - 1},
+ )
+
+
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
+)
+def test_init_with_flush_interval_not_multiple_of_chunk_size_raises_error(mock_client):
+ """Test that an OutOfRange error is raised if flush_interval is not a multiple of the chunk size."""
+
+ with pytest.raises(exceptions.OutOfRange):
+ AsyncAppendableObjectWriter(
+ mock_client,
+ BUCKET,
+ OBJECT,
+ writer_options={"FLUSH_INTERVAL_BYTES": _MAX_CHUNK_SIZE_BYTES + 1},
+ )
+
+
+@mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c")
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
+)
+def test_init_raises_if_crc32c_c_extension_is_missing(
+ mock_grpc_client, mock_google_crc32c
+):
+ mock_google_crc32c.implementation = "python"
+
+ with pytest.raises(exceptions.FailedPrecondition) as exc_info:
+ AsyncAppendableObjectWriter(mock_grpc_client, "bucket", "object")
+
+ assert "The google-crc32c package is not installed with C support" in str(
+ exc_info.value
+ )
+
+
@pytest.mark.asyncio
@mock.patch(
"google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
@@ -133,15 +215,10 @@ async def test_open_appendable_object_writer(mock_write_object_stream, mock_clie
writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT)
mock_stream = mock_write_object_stream.return_value
mock_stream.open = mock.AsyncMock()
- mock_stream.send = mock.AsyncMock()
- mock_stream.recv = mock.AsyncMock()
-
- mock_state_response = mock.MagicMock()
- mock_state_response.persisted_size = 1024
- mock_stream.recv.return_value = mock_state_response
mock_stream.generation_number = GENERATION
mock_stream.write_handle = WRITE_HANDLE
+ mock_stream.persisted_size = 0
# Act
await writer.open()
@@ -151,11 +228,37 @@ async def test_open_appendable_object_writer(mock_write_object_stream, mock_clie
assert writer._is_stream_open
assert writer.generation == GENERATION
assert writer.write_handle == WRITE_HANDLE
+ assert writer.persisted_size == 0
- expected_request = _storage_v2.BidiWriteObjectRequest(state_lookup=True)
- mock_stream.send.assert_awaited_once_with(expected_request)
- mock_stream.recv.assert_awaited_once()
- assert writer.persisted_size == 1024
+
+@pytest.mark.asyncio
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
+)
+async def test_open_appendable_object_writer_existing_object(
+ mock_write_object_stream, mock_client
+):
+ """Test the open method."""
+ # Arrange
+ writer = AsyncAppendableObjectWriter(
+ mock_client, BUCKET, OBJECT, generation=GENERATION
+ )
+ mock_stream = mock_write_object_stream.return_value
+ mock_stream.open = mock.AsyncMock()
+
+ mock_stream.generation_number = GENERATION
+ mock_stream.write_handle = WRITE_HANDLE
+ mock_stream.persisted_size = PERSISTED_SIZE
+
+ # Act
+ await writer.open()
+
+ # Assert
+ mock_stream.open.assert_awaited_once()
+ assert writer._is_stream_open
+ assert writer.generation == GENERATION
+ assert writer.write_handle == WRITE_HANDLE
+ assert writer.persisted_size == PERSISTED_SIZE
@pytest.mark.asyncio
@@ -186,9 +289,6 @@ async def test_unimplemented_methods_raise_error(mock_client):
with pytest.raises(NotImplementedError):
await writer.append_from_stream(mock.Mock())
- with pytest.raises(NotImplementedError):
- await writer.append_from_file("file.txt")
-
@pytest.mark.asyncio
@mock.patch(
@@ -264,6 +364,7 @@ async def test_close(mock_write_object_stream, mock_client):
writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT)
writer._is_stream_open = True
writer.offset = 1024
+ writer.persisted_size = 1024
mock_stream = mock_write_object_stream.return_value
mock_stream.send = mock.AsyncMock()
mock_stream.recv = mock.AsyncMock(
@@ -313,7 +414,7 @@ async def test_finalize_on_close(mock_write_object_stream, mock_client):
result = await writer.close(finalize_on_close=True)
# Assert
- mock_stream.close.assert_not_awaited() # Based on new implementation
+ mock_stream.close.assert_awaited_once()
assert not writer._is_stream_open
assert writer.offset is None
assert writer.object_resource == mock_resource
@@ -335,6 +436,7 @@ async def test_finalize(mock_write_object_stream, mock_client):
mock_stream.recv = mock.AsyncMock(
return_value=_storage_v2.BidiWriteObjectResponse(resource=mock_resource)
)
+ mock_stream.close = mock.AsyncMock()
gcs_object = await writer.finalize()
@@ -342,9 +444,12 @@ async def test_finalize(mock_write_object_stream, mock_client):
_storage_v2.BidiWriteObjectRequest(finish_write=True)
)
mock_stream.recv.assert_awaited_once()
+ mock_stream.close.assert_awaited_once()
assert writer.object_resource == mock_resource
assert writer.persisted_size == 123
assert gcs_object == mock_resource
+ assert writer._is_stream_open is False
+ assert writer.offset is None
@pytest.mark.asyncio
@@ -401,25 +506,39 @@ async def test_append_sends_data_in_chunks(mock_write_object_stream, mock_client
writer.persisted_size = 100
mock_stream = mock_write_object_stream.return_value
mock_stream.send = mock.AsyncMock()
- writer.simple_flush = mock.AsyncMock()
data = b"a" * (_MAX_CHUNK_SIZE_BYTES + 1)
+ mock_stream.recv = mock.AsyncMock(
+ return_value=_storage_v2.BidiWriteObjectResponse(
+ persisted_size=100 + len(data)
+ )
+ )
+
await writer.append(data)
assert mock_stream.send.await_count == 2
- first_call = mock_stream.send.await_args_list[0]
- second_call = mock_stream.send.await_args_list[1]
+ first_request = mock_stream.send.await_args_list[0].args[0]
+ second_request = mock_stream.send.await_args_list[1].args[0]
# First chunk
- assert first_call[0][0].write_offset == 100
- assert len(first_call[0][0].checksummed_data.content) == _MAX_CHUNK_SIZE_BYTES
-
- # Second chunk
- assert second_call[0][0].write_offset == 100 + _MAX_CHUNK_SIZE_BYTES
- assert len(second_call[0][0].checksummed_data.content) == 1
+ assert first_request.write_offset == 100
+ assert len(first_request.checksummed_data.content) == _MAX_CHUNK_SIZE_BYTES
+ assert first_request.checksummed_data.crc32c == int.from_bytes(
+ Checksum(data[:_MAX_CHUNK_SIZE_BYTES]).digest(), byteorder="big"
+ )
+ assert not first_request.flush
+ assert not first_request.state_lookup
+
+ # Second chunk (last chunk)
+ assert second_request.write_offset == 100 + _MAX_CHUNK_SIZE_BYTES
+ assert len(second_request.checksummed_data.content) == 1
+ assert second_request.checksummed_data.crc32c == int.from_bytes(
+ Checksum(data[_MAX_CHUNK_SIZE_BYTES:]).digest(), byteorder="big"
+ )
+ assert second_request.flush
+ assert second_request.state_lookup
assert writer.offset == 100 + len(data)
- writer.simple_flush.assert_not_awaited()
@pytest.mark.asyncio
@@ -430,21 +549,31 @@ async def test_append_flushes_when_buffer_is_full(
mock_write_object_stream, mock_client
):
"""Test that append flushes the stream when the buffer size is reached."""
- from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
- _MAX_BUFFER_SIZE_BYTES,
- )
writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT)
writer._is_stream_open = True
writer.persisted_size = 0
mock_stream = mock_write_object_stream.return_value
mock_stream.send = mock.AsyncMock()
- writer.simple_flush = mock.AsyncMock()
+ mock_stream.recv = mock.AsyncMock()
- data = b"a" * _MAX_BUFFER_SIZE_BYTES
+ data = b"a" * _DEFAULT_FLUSH_INTERVAL_BYTES
await writer.append(data)
- writer.simple_flush.assert_awaited_once()
+ num_chunks = _DEFAULT_FLUSH_INTERVAL_BYTES // _MAX_CHUNK_SIZE_BYTES
+ assert mock_stream.send.await_count == num_chunks
+
+ # All but the last request should not have flush or state_lookup set.
+ for i in range(num_chunks - 1):
+ request = mock_stream.send.await_args_list[i].args[0]
+ assert not request.flush
+ assert not request.state_lookup
+
+ # The last request should have flush and state_lookup set.
+ last_request = mock_stream.send.await_args_list[-1].args[0]
+ assert last_request.flush
+ assert last_request.state_lookup
+ assert writer.bytes_appended_since_last_flush == 0
@pytest.mark.asyncio
@@ -453,21 +582,24 @@ async def test_append_flushes_when_buffer_is_full(
)
async def test_append_handles_large_data(mock_write_object_stream, mock_client):
"""Test that append handles data larger than the buffer size."""
- from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
- _MAX_BUFFER_SIZE_BYTES,
- )
writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT)
writer._is_stream_open = True
writer.persisted_size = 0
mock_stream = mock_write_object_stream.return_value
mock_stream.send = mock.AsyncMock()
- writer.simple_flush = mock.AsyncMock()
+ mock_stream.recv = mock.AsyncMock()
- data = b"a" * (_MAX_BUFFER_SIZE_BYTES * 2 + 1)
+ data = b"a" * (_DEFAULT_FLUSH_INTERVAL_BYTES * 2 + 1)
await writer.append(data)
- assert writer.simple_flush.await_count == 2
+ flushed_requests = [
+ call.args[0] for call in mock_stream.send.await_args_list if call.args[0].flush
+ ]
+ assert len(flushed_requests) == 3
+
+ last_request = mock_stream.send.await_args_list[-1].args[0]
+ assert last_request.state_lookup
@pytest.mark.asyncio
@@ -485,14 +617,61 @@ async def test_append_data_two_times(mock_write_object_stream, mock_client):
writer.persisted_size = 0
mock_stream = mock_write_object_stream.return_value
mock_stream.send = mock.AsyncMock()
- writer.simple_flush = mock.AsyncMock()
data1 = b"a" * (_MAX_CHUNK_SIZE_BYTES + 10)
+ mock_stream.recv = mock.AsyncMock(
+ return_value=_storage_v2.BidiWriteObjectResponse(
+ persisted_size= len(data1)
+ )
+ )
await writer.append(data1)
+ assert mock_stream.send.await_count == 2
+ last_request_data1 = mock_stream.send.await_args_list[-1].args[0]
+ assert last_request_data1.flush
+ assert last_request_data1.state_lookup
+
data2 = b"b" * (_MAX_CHUNK_SIZE_BYTES + 20)
+ mock_stream.recv = mock.AsyncMock(
+ return_value=_storage_v2.BidiWriteObjectResponse(
+ persisted_size= len(data2) + len(data1)
+ )
+ )
await writer.append(data2)
+ assert mock_stream.send.await_count == 4
+ last_request_data2 = mock_stream.send.await_args_list[-1].args[0]
+ assert last_request_data2.flush
+ assert last_request_data2.state_lookup
+
total_data_length = len(data1) + len(data2)
assert writer.offset == total_data_length
- assert writer.simple_flush.await_count == 0
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ "file_size, block_size",
+ [
+ (10, 4 * 1024),
+ (0, _DEFAULT_FLUSH_INTERVAL_BYTES),
+ (20 * 1024 * 1024, _DEFAULT_FLUSH_INTERVAL_BYTES),
+ (16 * 1024 * 1024, _DEFAULT_FLUSH_INTERVAL_BYTES),
+ ],
+)
+async def test_append_from_file(file_size, block_size, mock_client):
+ # arrange
+ fp = BytesIO(b"a" * file_size)
+ writer = AsyncAppendableObjectWriter(mock_client, BUCKET, OBJECT)
+ writer._is_stream_open = True
+ writer.append = mock.AsyncMock()
+
+ # act
+ await writer.append_from_file(fp, block_size=block_size)
+
+ # assert
+ exepected_calls = (
+ file_size // block_size
+ if file_size % block_size == 0
+ else file_size // block_size + 1
+ )
+ assert writer.append.await_count == exepected_calls
diff --git a/tests/unit/asyncio/test_async_multi_range_downloader.py b/tests/unit/asyncio/test_async_multi_range_downloader.py
index 668006627..2f0600f8d 100644
--- a/tests/unit/asyncio/test_async_multi_range_downloader.py
+++ b/tests/unit/asyncio/test_async_multi_range_downloader.py
@@ -30,6 +30,7 @@
_TEST_BUCKET_NAME = "test-bucket"
_TEST_OBJECT_NAME = "test-object"
+_TEST_OBJECT_SIZE = 1024 * 1024 # 1 MiB
_TEST_GENERATION_NUMBER = 123456789
_TEST_READ_HANDLE = b"test-handle"
@@ -38,9 +39,7 @@ class TestAsyncMultiRangeDownloader:
def create_read_ranges(self, num_ranges):
ranges = []
for i in range(num_ranges):
- ranges.append(
- _storage_v2.ReadRange(read_offset=i, read_length=1, read_id=i)
- )
+ ranges.append((i, 1, BytesIO()))
return ranges
# helper method
@@ -57,6 +56,7 @@ async def _make_mock_mrd(
mock_stream = mock_cls_async_read_object_stream.return_value
mock_stream.open = AsyncMock()
mock_stream.generation_number = _TEST_GENERATION_NUMBER
+ mock_stream.persisted_size = _TEST_OBJECT_SIZE
mock_stream.read_handle = _TEST_READ_HANDLE
mrd = await AsyncMultiRangeDownloader.create_mrd(
@@ -89,16 +89,6 @@ async def test_create_mrd(
read_handle=_TEST_READ_HANDLE,
)
- mrd.read_obj_str.open.assert_called_once()
- # Assert
- mock_cls_async_read_object_stream.assert_called_once_with(
- client=mock_grpc_client,
- bucket_name=_TEST_BUCKET_NAME,
- object_name=_TEST_OBJECT_NAME,
- generation_number=_TEST_GENERATION_NUMBER,
- read_handle=_TEST_READ_HANDLE,
- )
-
mrd.read_obj_str.open.assert_called_once()
assert mrd.client == mock_grpc_client
@@ -106,6 +96,7 @@ async def test_create_mrd(
assert mrd.object_name == _TEST_OBJECT_NAME
assert mrd.generation_number == _TEST_GENERATION_NUMBER
assert mrd.read_handle == _TEST_READ_HANDLE
+ assert mrd.persisted_size == _TEST_OBJECT_SIZE
assert mrd.is_stream_open
@mock.patch(
@@ -132,7 +123,9 @@ async def test_download_ranges_via_async_gather(
mock_mrd = await self._make_mock_mrd(
mock_grpc_client, mock_cls_async_read_object_stream
)
- mock_random_int.side_effect = [123, 456, 789, 91011] # for _func_id and read_id
+
+ mock_random_int.side_effect = [456, 91011]
+
mock_mrd.read_obj_str.send = AsyncMock()
mock_mrd.read_obj_str.recv = AsyncMock()
@@ -164,12 +157,14 @@ async def test_download_ranges_via_async_gather(
)
],
),
+ None,
]
# Act
buffer = BytesIO()
second_buffer = BytesIO()
lock = asyncio.Lock()
+
task1 = asyncio.create_task(mock_mrd.download_ranges([(0, 18, buffer)], lock))
task2 = asyncio.create_task(
mock_mrd.download_ranges([(10, 6, second_buffer)], lock)
@@ -177,18 +172,6 @@ async def test_download_ranges_via_async_gather(
await asyncio.gather(task1, task2)
# Assert
- mock_mrd.read_obj_str.send.side_effect = [
- _storage_v2.BidiReadObjectRequest(
- read_ranges=[
- _storage_v2.ReadRange(read_offset=0, read_length=18, read_id=456)
- ]
- ),
- _storage_v2.BidiReadObjectRequest(
- read_ranges=[
- _storage_v2.ReadRange(read_offset=10, read_length=6, read_id=91011)
- ]
- ),
- ]
assert buffer.getvalue() == data
assert second_buffer.getvalue() == data[10:16]
@@ -213,22 +196,27 @@ async def test_download_ranges(
mock_mrd = await self._make_mock_mrd(
mock_grpc_client, mock_cls_async_read_object_stream
)
- mock_random_int.side_effect = [123, 456] # for _func_id and read_id
+
+ mock_random_int.side_effect = [456]
+
mock_mrd.read_obj_str.send = AsyncMock()
mock_mrd.read_obj_str.recv = AsyncMock()
- mock_mrd.read_obj_str.recv.return_value = _storage_v2.BidiReadObjectResponse(
- object_data_ranges=[
- _storage_v2.ObjectRangeData(
- checksummed_data=_storage_v2.ChecksummedData(
- content=data, crc32c=crc32c_int
- ),
- range_end=True,
- read_range=_storage_v2.ReadRange(
- read_offset=0, read_length=18, read_id=456
- ),
- )
- ],
- )
+ mock_mrd.read_obj_str.recv.side_effect = [
+ _storage_v2.BidiReadObjectResponse(
+ object_data_ranges=[
+ _storage_v2.ObjectRangeData(
+ checksummed_data=_storage_v2.ChecksummedData(
+ content=data, crc32c=crc32c_int
+ ),
+ range_end=True,
+ read_range=_storage_v2.ReadRange(
+ read_offset=0, read_length=18, read_id=456
+ ),
+ )
+ ],
+ ),
+ None,
+ ]
# Act
buffer = BytesIO()
@@ -317,7 +305,6 @@ async def test_close_mrd_not_opened_should_throw_error(self, mock_grpc_client):
mrd = AsyncMultiRangeDownloader(
mock_grpc_client, _TEST_BUCKET_NAME, _TEST_OBJECT_NAME
)
-
# Act + Assert
with pytest.raises(ValueError) as exc:
await mrd.close()
@@ -346,9 +333,7 @@ async def test_downloading_without_opening_should_throw_error(
assert str(exc.value) == "Underlying bidi-gRPC stream is not open"
assert not mrd.is_stream_open
- @mock.patch(
- "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.google_crc32c"
- )
+ @mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c")
@mock.patch(
"google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
)
@@ -357,7 +342,7 @@ def test_init_raises_if_crc32c_c_extension_is_missing(
):
mock_google_crc32c.implementation = "python"
- with pytest.raises(exceptions.NotFound) as exc_info:
+ with pytest.raises(exceptions.FailedPrecondition) as exc_info:
AsyncMultiRangeDownloader(mock_grpc_client, "bucket", "object")
assert "The google-crc32c package is not installed with C support" in str(
@@ -366,7 +351,7 @@ def test_init_raises_if_crc32c_c_extension_is_missing(
@pytest.mark.asyncio
@mock.patch(
- "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.Checksum"
+ "google.cloud.storage._experimental.asyncio.retry.reads_resumption_strategy.Checksum"
)
@mock.patch(
"google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
@@ -374,6 +359,10 @@ def test_init_raises_if_crc32c_c_extension_is_missing(
async def test_download_ranges_raises_on_checksum_mismatch(
self, mock_client, mock_checksum_class
):
+ from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import (
+ AsyncMultiRangeDownloader,
+ )
+
mock_stream = mock.AsyncMock(
spec=async_read_object_stream._AsyncReadObjectStream
)
@@ -389,7 +378,9 @@ async def test_download_ranges_raises_on_checksum_mismatch(
checksummed_data=_storage_v2.ChecksummedData(
content=test_data, crc32c=server_checksum
),
- read_range=_storage_v2.ReadRange(read_id=0),
+ read_range=_storage_v2.ReadRange(
+ read_id=0, read_offset=0, read_length=len(test_data)
+ ),
range_end=True,
)
]
@@ -402,7 +393,11 @@ async def test_download_ranges_raises_on_checksum_mismatch(
mrd._is_stream_open = True
with pytest.raises(DataCorruption) as exc_info:
- await mrd.download_ranges([(0, len(test_data), BytesIO())])
+ with mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_multi_range_downloader.generate_random_56_bit_integer",
+ return_value=0,
+ ):
+ await mrd.download_ranges([(0, len(test_data), BytesIO())])
assert "Checksum mismatch" in str(exc_info.value)
mock_checksum_class.assert_called_once_with(test_data)
diff --git a/tests/unit/asyncio/test_async_read_object_stream.py b/tests/unit/asyncio/test_async_read_object_stream.py
index 4e4c93dd3..4ba8d34a1 100644
--- a/tests/unit/asyncio/test_async_read_object_stream.py
+++ b/tests/unit/asyncio/test_async_read_object_stream.py
@@ -25,7 +25,9 @@
_TEST_BUCKET_NAME = "test-bucket"
_TEST_OBJECT_NAME = "test-object"
_TEST_GENERATION_NUMBER = 12345
+_TEST_OBJECT_SIZE = 1024 * 1024 # 1 MiB
_TEST_READ_HANDLE = b"test-read-handle"
+_TEST_READ_HANDLE_NEW = b"test-read-handle-new"
async def instantiate_read_obj_stream(mock_client, mock_cls_async_bidi_rpc, open=True):
@@ -37,6 +39,7 @@ async def instantiate_read_obj_stream(mock_client, mock_cls_async_bidi_rpc, open
recv_response = mock.MagicMock(spec=_storage_v2.BidiReadObjectResponse)
recv_response.metadata = mock.MagicMock(spec=_storage_v2.Object)
recv_response.metadata.generation = _TEST_GENERATION_NUMBER
+ recv_response.metadata.size = _TEST_OBJECT_SIZE
recv_response.read_handle = _TEST_READ_HANDLE
socket_like_rpc.recv = AsyncMock(return_value=recv_response)
@@ -52,6 +55,30 @@ async def instantiate_read_obj_stream(mock_client, mock_cls_async_bidi_rpc, open
return read_obj_stream
+async def instantiate_read_obj_stream_with_read_handle(
+ mock_client, mock_cls_async_bidi_rpc, open=True
+):
+ """Helper to create an instance of _AsyncReadObjectStream and open it by default."""
+ socket_like_rpc = AsyncMock()
+ mock_cls_async_bidi_rpc.return_value = socket_like_rpc
+ socket_like_rpc.open = AsyncMock()
+
+ recv_response = mock.MagicMock(spec=_storage_v2.BidiReadObjectResponse)
+ recv_response.read_handle = _TEST_READ_HANDLE_NEW
+ socket_like_rpc.recv = AsyncMock(return_value=recv_response)
+
+ read_obj_stream = _AsyncReadObjectStream(
+ client=mock_client,
+ bucket_name=_TEST_BUCKET_NAME,
+ object_name=_TEST_OBJECT_NAME,
+ )
+
+ if open:
+ await read_obj_stream.open()
+
+ return read_obj_stream
+
+
@mock.patch(
"google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc"
)
@@ -65,12 +92,6 @@ def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc):
mock_client._client._transport._wrapped_methods = {
"bidi_read_object_rpc": rpc_sentinel,
}
- full_bucket_name = f"projects/_/buckets/{_TEST_BUCKET_NAME}"
- first_bidi_read_req = _storage_v2.BidiReadObjectRequest(
- read_object_spec=_storage_v2.BidiReadObjectSpec(
- bucket=full_bucket_name, object=_TEST_OBJECT_NAME
- ),
- )
# Act
read_obj_stream = _AsyncReadObjectStream(
@@ -86,7 +107,6 @@ def test_init_with_bucket_object_generation(mock_client, mock_async_bidi_rpc):
assert read_obj_stream.object_name == _TEST_OBJECT_NAME
assert read_obj_stream.generation_number == _TEST_GENERATION_NUMBER
assert read_obj_stream.read_handle == _TEST_READ_HANDLE
- assert read_obj_stream.first_bidi_read_req == first_bidi_read_req
assert read_obj_stream.rpc == rpc_sentinel
@@ -112,6 +132,33 @@ async def test_open(mock_client, mock_cls_async_bidi_rpc):
assert read_obj_stream.generation_number == _TEST_GENERATION_NUMBER
assert read_obj_stream.read_handle == _TEST_READ_HANDLE
+ assert read_obj_stream.persisted_size == _TEST_OBJECT_SIZE
+ assert read_obj_stream.is_stream_open
+
+
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_read_object_stream.AsyncBidiRpc"
+)
+@mock.patch(
+ "google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
+)
+@pytest.mark.asyncio
+async def test_open_with_read_handle(mock_client, mock_cls_async_bidi_rpc):
+ # arrange
+ read_obj_stream = await instantiate_read_obj_stream_with_read_handle(
+ mock_client, mock_cls_async_bidi_rpc, open=False
+ )
+
+ # act
+ await read_obj_stream.open()
+
+ # assert
+ read_obj_stream.socket_like_rpc.open.assert_called_once()
+ read_obj_stream.socket_like_rpc.recv.assert_called_once()
+
+ assert read_obj_stream.generation_number is None
+ assert read_obj_stream.persisted_size is None
+ assert read_obj_stream.read_handle == _TEST_READ_HANDLE_NEW
assert read_obj_stream.is_stream_open
diff --git a/tests/unit/asyncio/test_async_write_object_stream.py b/tests/unit/asyncio/test_async_write_object_stream.py
index 7fa2123c5..c6ea8a8ff 100644
--- a/tests/unit/asyncio/test_async_write_object_stream.py
+++ b/tests/unit/asyncio/test_async_write_object_stream.py
@@ -55,6 +55,7 @@ async def instantiate_write_obj_stream(mock_client, mock_cls_async_bidi_rpc, ope
mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse)
mock_response.resource = mock.MagicMock(spec=_storage_v2.Object)
mock_response.resource.generation = GENERATION
+ mock_response.resource.size = 0
mock_response.write_handle = WRITE_HANDLE
socket_like_rpc.recv = AsyncMock(return_value=mock_response)
@@ -129,6 +130,7 @@ async def test_open_for_new_object(mock_async_bidi_rpc, mock_client):
mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse)
mock_response.resource = mock.MagicMock(spec=_storage_v2.Object)
mock_response.resource.generation = GENERATION
+ mock_response.resource.size = 0
mock_response.write_handle = WRITE_HANDLE
socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response)
@@ -143,6 +145,7 @@ async def test_open_for_new_object(mock_async_bidi_rpc, mock_client):
socket_like_rpc.recv.assert_called_once()
assert stream.generation_number == GENERATION
assert stream.write_handle == WRITE_HANDLE
+ assert stream.persisted_size == 0
@pytest.mark.asyncio
@@ -158,6 +161,7 @@ async def test_open_for_existing_object(mock_async_bidi_rpc, mock_client):
mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse)
mock_response.resource = mock.MagicMock(spec=_storage_v2.Object)
+ mock_response.resource.size = 1024
mock_response.resource.generation = GENERATION
mock_response.write_handle = WRITE_HANDLE
socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response)
@@ -175,6 +179,7 @@ async def test_open_for_existing_object(mock_async_bidi_rpc, mock_client):
socket_like_rpc.recv.assert_called_once()
assert stream.generation_number == GENERATION
assert stream.write_handle == WRITE_HANDLE
+ assert stream.persisted_size == 1024
@pytest.mark.asyncio
@@ -191,6 +196,7 @@ async def test_open_when_already_open_raises_error(mock_async_bidi_rpc, mock_cli
mock_response = mock.MagicMock(spec=_storage_v2.BidiWriteObjectResponse)
mock_response.resource = mock.MagicMock(spec=_storage_v2.Object)
mock_response.resource.generation = GENERATION
+ mock_response.resource.size = 0
mock_response.write_handle = WRITE_HANDLE
socket_like_rpc.recv = mock.AsyncMock(return_value=mock_response)
diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py
index cbf53b398..a8abb1571 100644
--- a/tests/unit/test_blob.py
+++ b/tests/unit/test_blob.py
@@ -3064,7 +3064,13 @@ def _make_resumable_transport(
fake_response2 = self._mock_requests_response(
http.client.PERMANENT_REDIRECT, headers2
)
- json_body = json.dumps({"size": str(total_bytes), "md5Hash": md5_checksum_value, "crc32c": crc32c_checksum_value})
+ json_body = json.dumps(
+ {
+ "size": str(total_bytes),
+ "md5Hash": md5_checksum_value,
+ "crc32c": crc32c_checksum_value,
+ }
+ )
if data_corruption:
fake_response3 = DataCorruption(None)
else: