8000 feat(blob.py): auto-populate standard headers for non-chunked downloa… · googleapis/python-storage@d8432cd · GitHub
[go: up one dir, main page]

Skip to content

Commit d8432cd

Browse files
feat(blob.py): auto-populate standard headers for non-chunked downloads (#204)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-storage/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #24 🦕 This PR autopopulates the following fields for non-chunked downloads based on the server header response: ``` blob.content_encoding blob.content_type blob.cache_control blob.storage_class blob.content_language blob.md5_hash blob.crc32c ```
1 parent 6eeb855 commit d8432cd

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

google/cloud/storage/blob.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from io import BytesIO
3232
import mimetypes
3333
import os
34+
import re
3435
import warnings
3536
import six
3637

@@ -783,6 +784,34 @@ def _get_download_url(
783784
)
784785
return _add_query_parameters(base_url, name_value_pairs)
785786

787+
def _extract_headers_from_download(self, response):
788+
"""Extract headers from a non-chunked request's http object.
789+
790+
This avoids the need to make a second request for commonly used
791+
headers.
792+
793+
:type response:
794+
:class requests.models.Response
795+
:param response: The server response from downloading a non-chunked file
796+
"""
797+
self.content_encoding = response.headers.get("Content-Encoding", None)
798+
self.content_type = response.headers.get("Content-Type", None)
799+
self.cache_control = response.headers.get("Cache-Control", None)
800+
self.storage_class = response.headers.get("X-Goog-Storage-Class", None)
801+
self.content_language = response.headers.get("Content-Language", None)
802+
# 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
803+
x_goog_hash = response.headers.get("X-Goog-Hash", "")
804+
805+
digests = {}
806+
for encoded_digest in x_goog_hash.split(","):
807+
match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest)
808+
if match:
809+
method, digest = match.groups()
810+
digests[method] = digest
811+
812+
self.crc32c = digests.get("crc32c", None)
813+
self.md5_hash = digests.get("md5", None)
814+
786815
def _do_download(
787816
self,
788817
transport,
@@ -840,8 +869,8 @@ def _do_download(
840869
download = klass(
841870
download_url, stream=file_obj, headers=headers, start=start, end=end
842871
)
843-
download.consume(transport, timeout=timeout)
844-
872+
response = download.consume(transport, timeout=timeout)
873+
self._extract_headers_from_download(response)
845874
else:
846875

847876
if raw_download:

tests/unit/test_blob.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,6 +1451,37 @@ def _download_as_string_helper(self, raw_download, timeout=None):
14511451
stream = blob._do_download.mock_calls[0].args[1]
14521452
self.assertIsInstance(stream, io.BytesIO)
14531453

1454+
def test_download_as_string_w_response_headers(self):
1455+
blob_name = "blob-name"
1456+
client = mock.Mock(spec=["_http"])
1457+
bucket = _Bucket(client)
1458+
media_link = "http://example.com/media/"
1459+
properties = {"mediaLink": media_link}
1460+
blob = self._make_one(blob_name, bucket=bucket, properties=properties)
1461+
1462+
response = self._mock_requests_response(
1463+
http_client.OK,
1464+
headers={
1465+
"Content-Type": "application/json",
1466+
"Content-Language": "ko-kr",
1467+
"Cache-Control": "max-age=1337;public",
1468+
"Content-Encoding": "gzip",
1469+
"X-Goog-Storage-Class": "STANDARD",
1470+
"X-Goog-Hash": "crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==",
1471+
},
1472+
# { "x": 5 } gzipped
1473+
content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00",
1474+
)
1475+
blob._extract_headers_from_download(response)
1476+
1477+
self.assertEqual(blob.content_type, "application/json")
1478+
self.assertEqual(blob.content_language, "ko-kr")
1479+
self.assertEqual(blob.content_encoding, "gzip")
1480+
self.assertEqual(blob.cache_control, "max-age=1337;public")
1481+
self.assertEqual(blob.storage_class, "STANDARD")
1482+
self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ")
1483+
self.assertEqual(blob.crc32c, "4gcgLQ")
1484+
14541485
def test_download_as_string_w_generation_match(self):
14551486
GENERATION_NUMBER = 6
14561487
MEDIA_LINK = "http://example.com/media/"

0 commit comments

Comments
 (0)
0