8000 S3: implement ObjectParts in GetObjectAttributes (#12764) · localstack/localstack@99cd6da · GitHub
[go: up one dir, main page]

Skip to content

Commit 99cd6da

Browse files
authored
S3: implement ObjectParts in GetObjectAttributes (#12764)
1 parent ea0a194 commit 99cd6da

File tree

8 files changed

+871
-109
lines changed

8 files changed

+871
-109
lines changed

localstack-core/localstack/services/s3/models.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
ObjectStorageClass,
5353
ObjectVersionId,
5454
Owner,
55+
Part,
5556
PartNumber,
5657
Payer,
5758
Policy,
@@ -91,6 +92,10 @@
9192
_gmt_zone_info = ZoneInfo("GMT")
9293

9394

95+
class InternalObjectPart(Part):
96+
_position: int
97+
98+
9499
# note: not really a need to use a dataclass here, as it has a lot of fields, but only a few are set at creation
95100
class S3Bucket:
96101
name: BucketName
@@ -275,7 +280,7 @@ class S3Object:
275280
website_redirect_location: Optional[WebsiteRedirectLocation]
276281
acl: Optional[AccessControlPolicy]
277282
is_current: bool
278-
parts: Optional[dict[int, tuple[int, int]]]
283+
parts: Optional[dict[int, InternalObjectPart]]
279284
restore: Optional[Restore]
280285
internal_last_modified: int
281286

@@ -498,14 +503,16 @@ def complete_multipart(
498503
object_etag = hashlib.md5(usedforsecurity=False)
499504
has_checksum = self.checksum_algorithm is not None
500505
checksum_hash = None
506+
checksum_key = None
501507
if has_checksum:
508+
checksum_key = f"Checksum{self.checksum_algorithm.upper()}"
502509
if self.checksum_type == ChecksumType.COMPOSITE:
503510
checksum_hash = get_s3_checksum(self.checksum_algorithm)
504511
else:
505512
checksum_hash = CombinedCrcHash(self.checksum_algorithm)
506513

507514
pos = 0
508-
parts_map = {}
515+
parts_map: dict[int, InternalObjectPart] = {}
509516
for index, part in enumerate(parts):
510517
part_number = part["PartNumber"]
511518
part_etag = part["ETag"].strip('"')
@@ -526,7 +533,6 @@ def complete_multipart(
526533
)
527534

528535
if has_checksum:
529-
checksum_key = f"Checksum{self.checksum_algorithm.upper()}"
530536
if not (part_checksum := part.get(checksum_key)):
531537
if self.checksum_type == ChecksumType.COMPOSITE:
532538
# weird case, they still try to validate a different checksum type than the multipart
@@ -575,7 +581,16 @@ def complete_multipart(
575581

576582
object_etag.update(bytes.fromhex(s3_part.etag))
577583
# keep track of the parts size, as it can be queried afterward on the object as a Range
578-
6D40 parts_map[part_number] = (pos, s3_part.size)
584+
internal_part = InternalObjectPart(
585+
_position=pos,
586+
Size=s3_part.size,
587+
ETag=s3_part.etag,
588+
PartNumber=s3_part.part_number,
589+
)
590+
if has_checksum and self.checksum_type == ChecksumType.COMPOSITE:
591+
internal_part[checksum_key] = s3_part.checksum_value
592+
593+
parts_map[part_number] = internal_part
579594
pos += s3_part.size
580595

581596
if mpu_size and mpu_size != pos:

localstack-core/localstack/services/s3/provider.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@
167167
ObjectLockRetention,
168168
ObjectLockToken,
169169
ObjectOwnership,
170+
ObjectPart,
170171
ObjectVersion,
171172
ObjectVersionId,
172173
ObjectVersionStorageClass,
@@ -317,6 +318,7 @@
317318
from localstack.services.s3.website_hosting import register_website_hosting_routes
318319
from localstack.state import AssetDirectory, StateVisitor
319320
from localstack.utils.aws.arns import s3_bucket_name
321+
from localstack.utils.collections import select_from_typed_dict
320322
from localstack.utils.strings import short_uid, to_bytes, to_str
321323

322324
LOG = logging.getLogger(__name__)
@@ -2032,6 +2034,7 @@ def get_object_attributes(
20322034

20332035
object_attrs = request.get("ObjectAttributes", [])
20342036
response = GetObjectAttributesOutput()
2037+
object_checksum_type = getattr(s3_object, "checksum_type", ChecksumType.FULL_OBJECT)
20352038
if "ETag" in object_attrs:
20362039
response["ETag"] = s3_object.etag
20372040
if "StorageClass" in object_attrs:
@@ -2045,7 +2048,7 @@ def get_object_attributes(
20452048
checksum_value = s3_object.checksum_value
20462049
response["Checksum"] = {
20472050
f"Checksum{checksum_algorithm.upper()}": checksum_value,
2048-
"ChecksumType": getattr(s3_object, "checksum_type", ChecksumType.FULL_OBJECT),
2051+
"ChecksumType": object_checksum_type,
20492052
}
20502053

20512054
response["LastModified"] = s3_object.last_modified
@@ -2054,9 +2057,55 @@ def get_object_attributes(
20542057
response["VersionId"] = s3_object.version_id
20552058

20562059
if "ObjectParts" in object_attrs and s3_object.parts:
2057-
# TODO: implements ObjectParts, this is basically a simplified `ListParts` call on the object, we might
2058-
# need to store more data about the Parts once we implement checksums for them
2059-
response["ObjectParts"] = GetObjectAttributesParts(TotalPartsCount=len(s3_object.parts))
2060+
if object_checksum_type == ChecksumType.FULL_OBJECT:
2061+
response["ObjectParts"] = GetObjectAttributesParts(
2062+
TotalPartsCount=len(s3_object.parts)
2063+
)
2064+
else:
2065+
# this is basically a simplified `ListParts` call on the object, only returned when the checksum type is
2066+
# COMPOSITE
2067+
count = 0
2068+
is_truncated = False
2069+
part_number_marker = request.get("PartNumberMarker") or 0
2070+
max_parts = request.get("MaxParts") or 1000
2071+
2072+
parts = []
2073+
all_parts = sorted(s3_object.parts.items())
2074+
last_part_number, last_part = all_parts[-1]
2075+
2076+
# TODO: remove this backward compatibility hack needed for state created with <= 4.5
2077+
# the parts would only be a tuple and would not store the proper state for 4.5 and earlier, so we need
2078+
# to return early
2079+
if isinstance(last_part, tuple):
2080+
response["ObjectParts"] = GetObjectAttributesParts(
2081+
TotalPartsCount=len(s3_object.parts)
2082+
)
2083+
return response
2084+
2085+
for part_number, part in all_parts:
2086+
if part_number <= part_number_marker:
2087+
continue
2088+
part_item = select_from_typed_dict(ObjectPart, part)
2089+
2090+
parts.append(part_item)
2091+
count += 1
2092+
2093+
if count >= max_parts and part["PartNumber"] != last_part_number:
2094+
is_truncated = True
2095+
break
2096+
2097+
object_parts = GetObjectAttributesParts(
2098+
TotalPartsCount=len(s3_object.parts),
2099+
IsTruncated=is_truncated,
2100+
MaxParts=max_parts,
2101+
PartNumberMarker=part_number_marker,
2102+
NextPartNumberMarker=0,
2103+
)
2104+
if parts:
2105+
object_parts["Parts"] = parts
2106+
object_parts["NextPartNumberMarker"] = parts[-1]["PartNumber"]
2107+
2108+
response["ObjectParts"] = object_parts
20602109

20612110
return response
20622111

@@ -2729,8 +2778,6 @@ def list_parts(
27292778
sse_customer_key_md5: SSECustomerKeyMD5 = None,
27302779
**kwargs,
27312780
) -> ListPartsOutput:
2732-
# TODO: implement MaxParts
2733-
# TODO: implements PartNumberMarker
27342781
store, s3_bucket = self._get_cross_account_bucket(context, bucket)
27352782

27362783
if (
@@ -2743,10 +2790,6 @@ def list_parts(
27432790
UploadId=upload_id,
27442791
)
27452792

2746-
# AbortDate: Optional[AbortDate] TODO: lifecycle
2747-
# AbortRuleId: Optional[AbortRuleId] TODO: lifecycle
2748-
# RequestCharged: Optional[RequestCharged]
2749-
27502793
count = 0
27512794
is_truncated = False
27522795
part_number_marker = part_number_marker or 0
@@ F987 -2797,6 +2840,10 @@ def list_parts(
27972840
response["ChecksumAlgorithm"] = s3_multipart.object.checksum_algorithm
27982841
response["ChecksumType"] = getattr(s3_multipart, "checksum_type", None)
27992842

2843+
# AbortDate: Optional[AbortDate] TODO: lifecycle
2844+
# AbortRuleId: Optional[AbortRuleId] TODO: lifecycle
2845+
# RequestCharged: Optional[RequestCharged]
2846+
28002847
return response
28012848

28022849
def list_multipart_uploads(
@@ -4680,7 +4727,13 @@ def get_part_range(s3_object: S3Object, part_number: PartNumber) -> ObjectRange:
46804727
ActualPartCount=len(s3_object.parts),
46814728
)
46824729

4683-
begin, part_length = part_data
4730+
# TODO: remove for next major version 5.0, compatibility for <= 4.5
4731+
if isinstance(part_data, tuple):
4732+
begin, part_length = part_data
4733+
else:
4734+
begin = part_data["_position"]
4735+
part_length = part_data["Size"]
4736+
46844737
end = begin + part_length - 1
46854738
return ObjectRange(
46864739
begin=begin,

tests/aws/services/s3/test_s3.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12298,7 +12298,7 @@ def test_complete_multipart_parts_checksum_composite(
1229812298
object_attrs = aws_client.s3.get_object_attributes(
1229912299
Bucket=s3_bucket,
1230012300
Key=key_name,
12301-
ObjectAttributes=["Checksum", "ETag"],
12301+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1230212302
)
1230312303
snapshot.match("get-object-attrs", object_attrs)
1230412304

@@ -12311,7 +12311,7 @@ def test_complete_multipart_parts_checksum_composite(
1231112311
object_attrs = aws_client.s3.get_object_attributes(
1231212312
Bucket=s3_bucket,
1231312313
Key=dest_key,
12314-
ObjectAttributes=["Checksum", "ETag"],
12314+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1231512315
)
1231612316
snapshot.match("get-copy-object-attrs", object_attrs)
1231712317< F438 /td>

@@ -12595,7 +12595,7 @@ def test_complete_multipart_parts_checksum_full_object(
1259512595
object_attrs = aws_client.s3.get_object_attributes(
1259612596
Bucket=s3_bucket,
1259712597
Key=key_name,
12598-
ObjectAttributes=["Checksum", "ETag"],
12598+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1259912599
)
1260012600
snapshot.match("get-object-attrs", object_attrs)
1260112601

@@ -12608,7 +12608,7 @@ def test_complete_multipart_parts_checksum_full_object(
1260812608
object_attrs = aws_client.s3.get_object_attributes(
1260912609
Bucket=s3_bucket,
1261012610
Key=dest_key,
12611-
ObjectAttributes=["Checksum", "ETag"],
12611+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1261212612
)
1261312613
snapshot.match("get-copy-object-attrs", object_attrs)
1261412614

@@ -12877,7 +12877,7 @@ def test_complete_multipart_parts_checksum_default(self, s3_bucket, snapshot, aw
1287712877
object_attrs = aws_client.s3.get_object_attributes(
1287812878
Bucket=s3_bucket,
1287912879
Key=key_name,
12880-
ObjectAttributes=["Checksum", "ETag"],
12880+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1288112881
)
1288212882
snapshot.match("get-object-attrs", object_attrs)
1288312883

@@ -12890,7 +12890,7 @@ def test_complete_multipart_parts_checksum_default(self, s3_bucket, snapshot, aw
1289012890
object_attrs = aws_client.s3.get_object_attributes(
1289112891
Bucket=s3_bucket,
1289212892
Key=dest_key,
12893-
ObjectAttributes=["Checksum", "ETag"],
12893+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1289412894
)
1289512895
snapshot.match("get-copy-object-attrs", object_attrs)
1289612896

@@ -12959,7 +12959,7 @@ def test_complete_multipart_parts_checksum_full_object_default(
1295912959
object_attrs = aws_client.s3.get_object_attributes(
1296012960
Bucket=s3_bucket,
1296112961
Key=key_name,
12962-
ObjectAttributes=["Checksum", "ETag"],
12962+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1296312963
)
1296412964
snapshot.match("get-object-attrs", object_attrs)
1296512965

@@ -13023,7 +13023,10 @@ def test_multipart_size_validation(self, aws_client, s3_bucket, snapshot):
1302313023
snapshot.match("get-object-attrs", object_attrs)
1302413024

1302513025
@markers.aws.validated
13026-
def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_client):
13026+
@pytest.mark.parametrize("checksum_type", ("COMPOSITE", "FULL_OBJECT"))
13027+
def test_multipart_upload_part_copy_checksum(
13028+
self, s3_bucket, snapshot, aws_client, checksum_type
13029+
):
1302713030
snapshot.add_transformer(
1302813031
[
1302913032
snapshot.transform.key_value("Bucket", reference_replacement=False),
@@ -13044,7 +13047,7 @@ def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_clie
1304413047

1304513048
key_name = "test-multipart-checksum"
1304613049
response = aws_client.s3.create_multipart_upload(
13047-
Bucket=s3_bucket, Key=key_name, ChecksumAlgorithm="SHA256"
13050+
Bucket=s3_bucket, Key=key_name, ChecksumAlgorithm="CRC32C", ChecksumType=checksum_type
1304813051
)
1304913052
snapshot.match("create-mpu-checksum-sha256", response)
1305013053
upload_id = response["UploadId"]
@@ -13075,7 +13078,7 @@ def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_clie
1307513078
{
1307613079
"ETag": upload_part_copy["CopyPartResult"]["ETag"],
1307713080
"PartNumber": 1,
13078-
"ChecksumSHA256": upload_part_copy["CopyPartResult"]["ChecksumSHA256"],
13081+
"ChecksumCRC32C": upload_part_copy["CopyPartResult"]["ChecksumCRC32C"],
1307913082
}
1308013083
]
1308113084
},
@@ -13096,7 +13099,7 @@ def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_clie
1309613099
object_attrs = aws_client.s3.get_object_attributes(
1309713100
Bucket=s3_bucket,
1309813101
Key=key_name,
13099-
ObjectAttributes=["Checksum", "ETag"],
13102+
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
1310013103
)
1310113104
snapshot.match("get-object-attrs", object_attrs)
1310213105

0 commit comments

Comments
 (0)
0