8000 implement Content-MD5 check for PutObject by bentsku · Pull Request #9064 · localstack/localstack · GitHub
[go: up one dir, main page]

Skip to content

implement Content-MD5 check for PutObject #9064

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions localstack/aws/api/s3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ class BucketLocationConstraint(str):
us_gov_west_1 = "us-gov-west-1"
us_west_1 = "us-west-1"
us_west_2 = "us-west-2"
ap_south_2 = "ap-south-2"
eu_south_2 = "eu-south-2"


class BucketLogsPermission(str):
Expand Down Expand Up @@ -370,8 +368,6 @@ class InventoryOptionalField(str):
IntelligentTieringAccessTier = "IntelligentTieringAccessTier"
BucketKeyStatus = "BucketKeyStatus"
ChecksumAlgorithm = "ChecksumAlgorithm"
ObjectAccessControlList = "ObjectAccessControlList"
ObjectOwner = "ObjectOwner"


class JSONType(str):
Expand Down Expand Up @@ -882,6 +878,13 @@ class NoSuchBucketPolicy(ServiceException):
BucketName: Optional[BucketName]


class InvalidDigest(ServiceException):
code: str = "InvalidDigest"
sender_fault: bool = False
status_code: int = 400
Content_MD5: Optional[ContentMD5]


AbortDate = datetime


Expand Down
15 changes: 15 additions & 0 deletions localstack/aws/spec-patches.json
Original file line number Diff line number Diff line change
Expand Up @@ -1134,6 +1134,21 @@
"value": {
"httpStatusCode": 403
}
},
{
"op": "add",
"path": "/shapes/InvalidDigest",
"value": {
"type": "structure",
"members": {
"Content_MD5": {
"shape": "ContentMD5",
"locationName":"Content-MD5"
}
},
"documentation": "<p>The Content-MD5 you specified was invalid.</p>",
"exception": true
}
}
]
}
19 changes: 19 additions & 0 deletions localstack/services/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
IntelligentTieringConfigurationList,
IntelligentTieringId,
InvalidArgument,
InvalidDigest,
InvalidPartOrder,
InvalidStorageClass,
InvalidTargetBucketForLogging,
Expand Down Expand Up @@ -156,6 +157,7 @@
from localstack.services.s3.utils import (
capitalize_header_name_from_snake_case,
create_redirect_for_post_request,
etag_to_base_64_content_md5,
extract_bucket_key_version_id_from_copy_source,
get_bucket_from_moto,
get_failed_precondition_copy_source,
Expand Down Expand Up @@ -534,6 +536,23 @@ def put_object(
)
raise

# TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a streaming
# body. We can use the specs to verify which operations needs to have the checksum validated
# verify content_md5
if content_md5 := request.get("ContentMD5"):
calculated_md5 = etag_to_base_64_content_md5(response["ETag"].strip('"'))
if calculated_md5 != content_md5:
moto_backend.delete_object(
bucket_name=request["Bucket"],
key_name=request["Key"],
version_id=response.get("VersionId"),
bypass=True,
)
raise InvalidDigest(
"The Content-MD5 you specified was invalid.",
Content_MD5=content_md5,
)

# moto interprets the Expires in query string for presigned URL as an Expires header and use it for the object
# we set it to the correctly parsed value in Request, else we remove it from moto metadata
# we are getting the last set key here so no need for versionId when getting the key
Expand Down
17 changes: 17 additions & 0 deletions localstack/services/s3/provider_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
CopyObjectOutput,
CopyObjectRequest,
InvalidArgument,
InvalidDigest,
InvalidStorageClass,
NoSuchUpload,
PreconditionFailed,
Expand All @@ -44,6 +45,7 @@
from localstack.services.s3.provider import S3Provider
from localstack.services.s3.utils import (
InvalidRequest,
etag_to_base_64_content_md5,
extract_bucket_key_version_id_from_copy_source,
get_bucket_from_moto,
get_key_from_moto_bucket,
Expand Down Expand Up @@ -151,6 +153,21 @@ def put_object(
# the etag is recalculated
response["ETag"] = key_object.etag

# verify content_md5
if content_md5 := request.get("ContentMD5"):
calculated_md5 = etag_to_base_64_content_md5(key_object.etag.strip('"'))
if calculated_md5 != content_md5:
moto_backend.delete_object(
bucket_name=request["Bucket"],
key_name=request["Key"],
version_id=key_object.version_id,
bypass=True,
)
raise InvalidDigest(
"The Content-MD5 you specified was invalid.",
Content_MD5=content_md5,
)

if expires := request.get("Expires"):
key_object.set_expiry(expires)
elif "expires" in key_object.metadata: # if it got added from query string parameter
Expand Down
22 changes: 21 additions & 1 deletion localstack/services/s3/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import base64
import codecs
import datetime
import hashlib
import logging
Expand Down Expand Up @@ -64,7 +66,14 @@
from localstack.services.s3.exceptions import InvalidRequest, MalformedXML
from localstack.utils.aws import arns
from localstack.utils.aws.arns import parse_arn
from localstack.utils.strings import checksum_crc32, checksum_crc32c, hash_sha1, hash_sha256
from localstack.utils.strings import (
checksum_crc32,
checksum_crc32c,
hash_sha1,
hash_sha256,
to_bytes,
to_str,
)
from localstack.utils.urls import localstack_host

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -274,6 +283,17 @@ def verify_checksum(checksum_algorithm: str, data: bytes, request: Dict):
)


def etag_to_base_64_content_md5(etag: ETag) -> str:
"""
Convert an ETag, representing an md5 hexdigest (might be quoted), to its base64 encoded representation
:param etag: an ETag, might be quoted
:return: the base64 value
"""
# get the bytes digest from the hexdigest
byte_digest = codecs.decode(to_bytes(etag.strip('"')), "hex")
return to_str(base64.b64encode(byte_digest))


def decode_aws_chunked_object(
stream: IO[bytes],
buffer: IO[bytes],
Expand Down
13 changes: 13 additions & 0 deletions localstack/services/s3/v3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
IntelligentTieringId,
InvalidArgument,
InvalidBucketName,
InvalidDigest,
InvalidObjectState,
InvalidPartNumber,
InvalidPartOrder,
Expand Down Expand Up @@ -222,6 +223,7 @@
add_expiration_days_to_datetime,
create_redirect_for_post_request,
create_s3_kms_managed_key_for_region,
etag_to_base_64_content_md5,
extract_bucket_key_version_id_from_copy_source,
get_canned_acl,
get_class_attrs_from_spec_class,
Expand Down Expand Up @@ -634,6 +636,17 @@ def put_object(
f"Value for x-amz-checksum-{checksum_algorithm.lower()} header is invalid."
)

# TODO: h F438 andle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a streaming
# body. We can use the specs to verify which operations needs to have the checksum validated
if content_md5 := request.get("ContentMD5"):
calculated_md5 = etag_to_base_64_content_md5(s3_stored_object.etag)
if calculated_md5 != content_md5:
self._storage_backend.remove(bucket_name, s3_object)
raise InvalidDigest(
"The Content-MD5 you specified was invalid.",
Content_MD5=content_md5,
)

s3_bucket.objects.set(key, s3_object)

# in case we are overriding an object, delete the tags entry
Expand Down
35 changes: 32 additions & 3 deletions tests/aws/services/s3/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@
LAMBDA_RUNTIME_PYTHON39,
)
from localstack.services.s3 import constants as s3_constants
from localstack.services.s3.utils import parse_expiration_header, rfc_1123_datetime
from localstack.services.s3.utils import (
etag_to_base_64_content_md5,
parse_expiration_header,
rfc_1123_datetime,
)
from localstack.testing.aws.util import is_aws_cloud
from localstack.testing.pytest import markers
from localstack.testing.snapshots.transformer_utility import TransformerUtility
Expand Down Expand Up @@ -3225,21 +3229,46 @@ def test_precondition_failed_error(self, s3_create_bucket, snapshot, aws_client)
snapshot.match("get-object-if-match", e.value.response)

@markers.aws.validated
@pytest.mark.xfail(reason="Error format is wrong and missing keys")
@pytest.mark.xfail(
condition=LEGACY_S3_PROVIDER, reason="Error format is wrong and missing keys"
)
@markers.snapshot.skip_snapshot_verify(
condition=lambda: not is_native_provider(),
paths=["$..ServerSideEncryption"],
)
def test_s3_invalid_content_md5(self, s3_bucket, snapshot, aws_client):
# put object with invalid content MD5
# TODO: implement ContentMD5 in ASF
content = "something"
response = aws_client.s3.put_object(
Bucket=s3_bucket,
Key="test-key",
Body=content,
)
md = hashlib.md5(content.encode("utf-8")).digest()
content_md5 = base64.b64encode(md).decode("utf-8")
base_64_content_md5 = etag_to_base_64_content_md5(response["ETag"])
assert content_md5 == base_64_content_md5

hashes = ["__invalid__", "000", "not base64 encoded checksum", "MTIz"]
for index, md5hash in enumerate(hashes):
with pytest.raises(ClientError) as e:
aws_client.s3.put_object(
Bucket=s3_bucket,
Key="test-key",
Body="something",
Body=content,
ContentMD5=md5hash,
)
snapshot.match(f"md5-error-{index}", e.value.response)

response = aws_client.s3.put_object(
Bucket=s3_bucket,
Key="test-key",
DD7C Body=content,
ContentMD5=base_64_content_md5,
)
snapshot.match("success-put-object-md5", response)

@markers.aws.validated
@markers.snapshot.skip_snapshot_verify(
condition=is_old_provider, paths=["$..VersionId", "$..ContentLanguage"]
Expand Down
10 changes: 9 additions & 1 deletion tests/aws/services/s3/test_s3.snapshot.json
Original file line number Diff line number Diff line change
Expand Up @@ -1613,7 +1613,7 @@
}
},
"tests/aws/services/s3/test_s3.py::TestS3::test_s3_invalid_content_md5": {
"recorded-date": "03-08-2023, 04:17:53",
"recorded-date": "05-09-2023, 02:58:55",
"recorded-content": {
"md5-error-0": {
"Error": {
Expand Down Expand Up @@ -1658,6 +1658,14 @@
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"success-put-object-md5": {
"ETag": "\"437b930db84b8079c2dd804a71936b5f\"",
"ServerSideEncryption": "AES256",
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 200
}
}
}
},
Expand Down
0