8000 implement S3 MD5 checksum check for UploadPart and improve logic by bentsku · Pull Request #11795 · localstack/localstack · GitHub
[go: up one dir, main page]

Skip to content

implement S3 MD5 checksum check for UploadPart and improve logic #11795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions localstack-core/localstack/aws/api/s3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,14 @@ class ConditionalRequestConflict(ServiceException):
Key: Optional[ObjectKey]


class BadDigest(ServiceException):
code: str = "BadDigest"
sender_fault: bool = False
status_code: int = 400
ExpectedDigest: Optional[ContentMD5]
CalculatedDigest: Optional[ContentMD5]


AbortDate = datetime


Expand Down
20 changes: 20 additions & 0 deletions localstack-core/localstack/aws/spec-patches.json
Original file line number Diff line number Diff line change
Expand Up @@ -1297,6 +1297,26 @@
"documentation": "<p>The conditional request cannot succeed due to a conflicting operation against this resource.</p>",
"exception": true
}
},
{
"op": "add",
"path": "/shapes/BadDigest",
"value": {
"type": "structure",
"members": {
"ExpectedDigest": {
"shape": "ContentMD5"
},
"CalculatedDigest": {
"shape": "ContentMD5"
}
},
"error": {
"httpStatusCode": 400
},
"documentation": "<p>The Content-MD5 you specified did not match what we received.</p>",
"exception": true
}
}
],
"apigatewayv2/2018-11-29/service-2": [
Expand Down
39 changes: 35 additions & 4 deletions localstack-core/localstack/services/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
AccountId,
AnalyticsConfiguration,
AnalyticsId,
BadDigest,
Body,
Bucket,
BucketAlreadyExists,
Expand Down Expand Up @@ -250,6 +251,7 @@
from localstack.services.s3.utils import (
ObjectRange,
add_expiration_days_to_datetime,
base_64_content_md5_to_etag,
create_redirect_for_post_request,
create_s3_kms_managed_key_for_region,
etag_to_base_64_content_md5,
Expand Down Expand Up @@ -653,6 +655,16 @@ def put_object(

version_id = generate_version_id(s3_bucket.versioning_status)

etag_content_md5 = ""
if content_md5 := request.get("ContentMD5"):
# assert that the received ContentMD5 is a properly b64 encoded value that fits a MD5 hash length
etag_content_md5 = base_64_content_md5_to_etag(content_md5)
if not etag_content_md5:
raise InvalidDigest(
"The Content-MD5 you specified was invalid.",
Content_MD5=content_md5,
)

checksum_algorithm = get_s3_checksum_algorithm_from_request(request)
checksum_value = (
request.get(f"Checksum{checksum_algorithm.upper()}") if checksum_algorithm else None
Expand Down Expand Up @@ -741,13 +753,14 @@ def put_object(

# TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a
# streaming body. We can use the specs to verify which operations needs to have the checksum validated
if content_md5 := request.get("ContentMD5"):
if content_md5:
calculated_md5 = etag_to_base_64_content_md5(s3_stored_object.etag)
if calculated_md5 != content_md5:
self._storage_backend.remove(bucket_name, s3_object)
raise InvalidDigest(
"The Content-MD5 you specified was invalid.",
Content_MD5=content_md5,
raise BadDigest(
"The Content-MD5 you specified did not match what we received.",
ExpectedDigest=etag_content_md5,
CalculatedDigest=calculated_md5,
)

s3_bucket.objects.set(key, s3_object)
Expand Down Expand Up @@ -2114,6 +2127,14 @@ def upload_part(
ArgumentValue=part_number,
)

if content_md5 := request.get("ContentMD5"):
# assert that the received ContentMD5 is a properly b64 encoded value that fits a MD5 hash length
if not base_64_content_md5_to_etag(content_md5):
raise InvalidDigest(
"The Content-MD5 you specified was invalid.",
Content_MD5=content_md5,
)

checksum_algorithm = get_s3_checksum_algorithm_from_request(request)
checksum_value = (
request.get(f"Checksum{checksum_algorithm.upper()}") if checksum_algorithm else None
Expand Down Expand Up @@ -2190,6 +2211,16 @@ def upload_part(
f"Value for x-amz-checksum-{checksum_algorithm.lower()} header is invalid."
)

if content_md5:
calculated_md5 = etag_to_base_64_content_md5(s3_part.etag)
if calculated_md5 != content_md5:
stored_multipart.remove_part(s3_part)
raise BadDigest(
"The Content-MD5 you specified did not match what we received.",
ExpectedDigest=content_md5,
CalculatedDigest=calculated_md5,
)

s3_multipart.parts[part_number] = s3_part

response = UploadPartOutput(
Expand Down
23 changes: 22 additions & 1 deletion localstack-core/localstack/services/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
BucketCannedACL,
BucketName,
ChecksumAlgorithm,
ContentMD5,
CopyObjectRequest,
CopySource,
ETag,
Expand Down Expand Up @@ -72,6 +73,7 @@
checksum_crc32c,
hash_sha1,
hash_sha256,
is_base64,
to_bytes,
to_str,
)
Expand Down Expand Up @@ -419,7 +421,7 @@ def verify_checksum(checksum_algorithm: str, data: bytes, request: Dict):

def etag_to_base_64_content_md5(etag: ETag) -> str:
"""
Convert an ETag, representing an md5 hexdigest (might be quoted), to its base64 encoded representation
Convert an 67E6 ETag, representing a MD5 hexdigest (might be quoted), to its base64 encoded representation
:param etag: an ETag, might be quoted
:return: the base64 value
"""
Expand All @@ -428,6 +430,25 @@ def etag_to_base_64_content_md5(etag: ETag) -> str:
return to_str(base64.b64encode(byte_digest))


def base_64_content_md5_to_etag(content_md5: ContentMD5) -> str | None:
"""
Convert a ContentMD5 header, representing a base64 encoded representation of a MD5 binary digest to its ETag value,
hex encoded
:param content_md5: a ContentMD5 header, base64 encoded
:return: the ETag value, hex coded MD5 digest, or None if the input is not valid b64 or the representation of a MD5
hash
"""
if not is_base64(content_md5):
return None
# get the hexdigest from the bytes digest
byte_digest = base64.b64decode(content_md5)
hex_digest = to_str(codecs.encode(byte_digest, "hex"))
if len(hex_digest) != 32:
return None

return hex_digest


def decode_aws_chunked_object(
stream: IO[bytes],
buffer: IO[bytes],
Expand Down
59 changes: 58 additions & 1 deletion tests/aws/services/s3/test_s3.py
F438
Original file line number Diff line number Diff line change
Expand Up @@ -3965,7 +3965,18 @@ def test_s3_invalid_content_md5(self, s3_bucket, snapshot, aws_client):
base_64_content_md5 = etag_to_base_64_content_md5(response["ETag"])
assert content_md5 == base_64_content_md5

hashes = ["__invalid__", "000", "not base64 encoded checksum", "MTIz"]
bad_digest_md5 = base64.b64encode(
hashlib.md5(f"{content}1".encode("utf-8")).digest()
).decode("utf-8")

hashes = [
"__invalid__",
"000",
"not base64 encoded checksum",
"MTIz",
base64.b64encode(b"test-string").decode("utf-8"),
]

for index, md5hash in enumerate(hashes):
with pytest.raises(ClientError) as e:
aws_client.s3.put_object(
Expand All @@ -3976,6 +3987,15 @@ def test_s3_invalid_content_md5(self, s3_bucket, snapshot, aws_client):
)
snapshot.match(f"md5-error-{index}", e.value.response)

with pytest.raises(ClientError) as e:
aws_client.s3.put_object(
Bucket=s3_bucket,
Key="test-key",
Body=content,
ContentMD5=bad_digest_md5,
)
snapshot.match("md5-error-bad-digest", e.value.response)

response = aws_client.s3.put_object(
Bucket=s3_bucket,
Key="test-key",
Expand All @@ -3984,6 +4004,43 @@ def test_s3_invalid_content_md5(self, s3_bucket, snapshot, aws_client):
)
snapshot.match("success-put-object-md5", response)

# also try with UploadPart, same logic
create_multipart = aws_client.s3.create_multipart_upload(Bucket=s3_bucket, Key="multi-key")
upload_id = create_multipart["UploadId"]

for index, md5hash in enumerate(hashes):
with pytest.raises(ClientError) as e:
aws_client.s3.upload_part(
Bucket=s3_bucket,
Key="multi-key",
Body=content,
UploadId=upload_id,
PartNumber=1,
ContentMD5=md5hash,
)
snapshot.match(f"upload-part-md5-error-{index}", e.value.response)

with pytest.raises(ClientError) as e:
aws_client.s3.upload_part(
Bucket=s3_bucket,
Key="multi-key",
Body=content,
UploadId=upload_id,
PartNumber=1,
ContentMD5=bad_digest_md5,
)
snapshot.match("upload-part-md5-bad-digest", e.value.response)

response = aws_client.s3.upload_part(
Bucket=s3_bucket,
Key="multi-key",
Body=content,
UploadId=upload_id,
PartNumber=1,
ContentMD5=base_64_content_md5,
)
snapshot.match("success-upload-part-md5", response)

@markers.aws.validated
@markers.snapshot.skip_snapshot_verify(
condition=is_v2_provider,
Expand Down
100 changes: 99 additions & 1 deletion tests/aws/services/s3/test_s3.snapshot.json
Original file line number Diff line number Diff line change
Expand Up @@ -1581,7 +1581,7 @@
}
},
"tests/aws/services/s3/test_s3.py::TestS3::test_s3_invalid_content_md5": {
"recorded-date": "05-09-2023, 02:58:55",
"recorded-date": "06-11-2024, 18:40:12",
"recorded-content": {
"md5-error-0": {
"Error": {
Expand Down Expand Up @@ -1627,13 +1627,111 @@
"HTTPStatusCode": 400
}
},
"md5-error-4": {
"Error": {
"Code": "InvalidDigest",
"Content-MD5": "dGVzdC1zdHJpbmc=",
"Message": "The Content-MD5 you specified was invalid."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"md5-error-bad-digest": {
"Error": {
"CalculatedDigest": "Q3uTDbhLgHnC3YBKcZNrXw==",
"Code": "BadDigest",
"ExpectedDigest": "09891eb590524e35fc73372cddc5d596",
"Message": "The Content-MD5 you specified did not match what we received."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"success-put-object-md5": {
"ETag": "\"437b930db84b8079c2dd804a71936b5f\"",
"ServerSideEncryption": "AES256",
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 200
}
},
"upload-part-md5-error-0": {
"Error": {
"Code": "InvalidDigest",
"Content-MD5": "__invalid__",
"Message": "The Content-MD5 you specified was invalid."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"upload-part-md5-error-1": {
"Error": {
"Code": "InvalidDigest",
"Content-MD5": "000",
"Message": "The Content-MD5 you specified was invalid."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"upload-part-md5-error-2": {
"Error": {
"Code": "InvalidDigest",
"Content-MD5": "not base64 encoded checksum",
"Message": "The Content-MD5 you specified was invalid."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"upload-part-md5-error-3": {
"Error": {
"Code": "InvalidDigest",
"Content-MD5": "MTIz",
"Message": "The Content-MD5 you specified was invalid."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"upload-part-md5-error-4": {
"Error": {
"Code": "InvalidDigest",
"Content-MD5": "dGVzdC1zdHJpbmc=",
"Message": "The Content-MD5 you specified was invalid."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"upload-part-md5-bad-digest": {
"Error": {
"CalculatedDigest": "Q3uTDbhLgHnC3YBKcZNrXw==",
"Code": "BadDigest",
"ExpectedDigest": "CYketZBSTjX8czcs3cXVlg==",
"Message": "The Content-MD5 you specified did not match what we received."
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 400
}
},
"success-upload-part-md5": {
"ETag": "\"437b930db84b8079c2dd804a71936b5f\"",
"ServerSideEncryption": "AES256",
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 200
}
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion tests/aws/services/s3/test_s3.validation.json
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@
"last_validated_date": "2023-08-03T02:25:47+00:00"
},
"tests/aws/services/s3/test_s3.py::TestS3::test_s3_invalid_content_md5": {
"last_validated_date": "2023-09-05T00:58:55+00:00"
"last_validated_date": "2024-11-06T18:40:12+00:00"
},
"tests/aws/services/s3/test_s3.py::TestS3::test_s3_inventory_report_crud": {
"last_validated_date": "2023-08-03T02:26:19+00:00"
Expand Down
Loading
0