8000 S3: implement ObjectParts in GetObjectAttributes by bentsku · Pull Request #12764 · localstack/localstack · GitHub
[go: up one dir, main page]

Skip to content

S3: implement ObjectParts in GetObjectAttributes #12764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions localstack-core/localstack/services/s3/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
ObjectStorageClass,
ObjectVersionId,
Owner,
Part,
PartNumber,
P 8000 ayer,
Policy,
Expand Down Expand Up @@ -89,6 +90,10 @@
_gmt_zone_info = ZoneInfo("GMT")


class InternalObjectPart(Part):
_position: int


# note: not really a need to use a dataclass here, as it has a lot of fields, but only a few are set at creation
class S3Bucket:
name: BucketName
Expand Down Expand Up @@ -271,7 +276,7 @@ class S3Object:
website_redirect_location: Optional[WebsiteRedirectLocation]
acl: Optional[AccessControlPolicy]
is_current: bool
parts: Optional[dict[int, tuple[int, int]]]
parts: Optional[dict[int, InternalObjectPart]]
restore: Optional[Restore]
internal_last_modified: int

Expand Down Expand Up @@ -494,14 +499,16 @@ def complete_multipart(
object_etag = hashlib.md5(usedforsecurity=False)
has_checksum = self.checksum_algorithm is not None
checksum_hash = None
checksum_key = None
if has_checksum:
checksum_key = f"Checksum{self.checksum_algorithm.upper()}"
if self.checksum_type == ChecksumType.COMPOSITE:
checksum_hash = get_s3_checksum(self.checksum_algorithm)
else:
checksum_hash = CombinedCrcHash(self.checksum_algorithm)

pos = 0
parts_map = {}
parts_map: dict[int, InternalObjectPart] = {}
for index, part in enumerate(parts):
part_number = part["PartNumber"]
part_etag = part["ETag"].strip('"')
Expand All @@ -522,7 +529,6 @@ def complete_multipart(
)

if has_checksum:
checksum_key = f"Checksum{self.checksum_algorithm.upper()}"
if not (part_checksum := part.get(checksum_key)):
if self.checksum_type == ChecksumType.COMPOSITE:
# weird case, they still try to validate a different checksum type than the multipart
Expand Down Expand Up @@ -571,7 +577,16 @@ def complete_multipart(

object_etag.update(bytes.fromhex(s3_part.etag))
# keep track of the parts size, as it can be queried afterward on the object as a Range
parts_map[part_number] = (pos, s3_part.size)
internal_part = InternalObjectPart(
_position=pos,
Size=s3_part.size,
ETag=s3_part.etag,
PartNumber=s3_part.part_number,
)
if has_checksum and self.checksum_type == ChecksumType.COMPOSITE:
internal_part[checksum_key] = s3_part.checksum_value

parts_map[part_number] = internal_part
pos += s3_part.size

if mpu_size and mpu_size != pos:
Expand Down
75 changes: 64 additions & 11 deletions localstack-core/localstack/services/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@
ObjectLockRetention,
ObjectLockToken,
ObjectOwnership,
ObjectPart,
ObjectVersion,
ObjectVersionId,
ObjectVersionStorageClass,
Expand Down Expand Up @@ -312,6 +313,7 @@
from localstack.services.s3.website_hosting import register_website_hosting_routes
from localstack.state import AssetDirectory, StateVisitor
from localstack.utils.aws.arns import s3_bucket_name
from localstack.utils.collections import select_from_typed_dict
from localstack.utils.strings import short_uid, to_bytes, to_str

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -2027,6 +2029,7 @@ def get_object_attributes(

object_attrs = request.get("ObjectAttributes", [])
response = GetObjectAttributesOutput()
object_checksum_type = getattr(s3_object, "checksum_type", ChecksumType.FULL_OBJECT)
if "ETag" in object_attrs:
response["ETag"] = s3_object.etag
if "StorageClass" in object_attrs:
Expand All @@ -2040,7 +2043,7 @@ def get_object_attributes(
checksum_value = s3_object.checksum_value
response["Checksum"] = {
f"Checksum{checksum_algorithm.upper()}": checksum_value,
"ChecksumType": getattr(s3_object, "checksum_type", ChecksumType.FULL_OBJECT),
"ChecksumType": object_checksum_type,
}

response["LastModified"] = s3_object.last_modified
Expand All @@ -2049,9 +2052,55 @@ def get_object_attributes(
response["VersionId"] = s3_object.version_id

if "ObjectParts" in object_attrs and s3_object.parts:
# TODO: implements ObjectParts, this is basically a simplified `ListParts` call on the object, we might
# need to store more data about the Parts once we implement checksums for them
response["ObjectParts"] = GetObjectAttributesParts(TotalPartsCount=len(s3_object.parts))
if object_checksum_type == ChecksumType.FULL_OBJECT:
response["ObjectParts"] = GetObjectAttributesParts(
TotalPartsCount=len(s3_object.parts)
)
else:
# this is basically a simplified `ListParts` call on the object, only returned when the checksum type is
# COMPOSITE
count = 0
is_truncated = False
part_number_marker = request.get("PartNumberMarker") or 0
max_parts = request.get("MaxParts") or 1000

parts = []
all_parts = sorted(s3_object.parts.items())
last_part_number, last_part = all_parts[-1]

# TODO: remove this backward compatibility hack needed for state created with <= 4.5
# the parts would only be a tuple and would not store the proper state for 4.5 and earlier, so we need
# to return early
if isinstance(last_part, tuple):
response["ObjectParts"] = GetObjectAttributesParts(
TotalPartsCount=len(s3_object.parts)
)
return response

for part_number, part in all_parts:
if part_number <= part_number_marker:
continue
part_item = select_from_typed_dict(ObjectPart, part)

parts.append(part_item)
count += 1

if count >= max_parts and part["PartNumber"] != last_part_number:
is_truncated = True
break

object_parts = GetObjectAttributesParts(
TotalPartsCount=len(s3_object.parts),
IsTruncated=is_truncated,
MaxParts=max_parts,
PartNumberMarker=part_number_marker,
NextPartNumberMarker=0,
)
if parts:
object_parts["Parts"] = parts
object_parts["NextPartNumberMarker"] = parts[-1]["PartNumber"]

response["ObjectParts"] = object_parts

return response

Expand Down Expand Up @@ -2724,8 +2773,6 @@ def list_parts(
sse_customer_key_md5: SSECustomerKeyMD5 = None,
**kwargs,
) -> ListPartsOutput:
# TODO: implement MaxParts
# TODO: implements PartNumberMarker
store, s3_bucket = self._get_cross_account_bucket(context, bucket)

if (
Expand All @@ -2738,10 +2785,6 @@ def list_parts(
UploadId=upload_id,
)

# AbortDate: Optional[AbortDate] TODO: lifecycle
# AbortRuleId: Optional[AbortRuleId] TODO: lifecycle
# RequestCharged: Optional[RequestCharged]

count = 0
is_truncated = False
part_number_marker = part_number_marker or 0
Expand Down Expand Up @@ -2792,6 +2835,10 @@ def list_parts(
response["ChecksumAlgorithm"] = s3_multipart.object.checksum_algorithm
response["ChecksumType"] = getattr(s3_multipart, "checksum_type", None)

# AbortDate: Optional[AbortDate] TODO: lifecycle
# AbortRuleId: Optional[AbortRuleId] TODO: lifecycle
# RequestCharged: Optional[RequestCharged]

return response

def list_multipart_uploads(
Expand Down Expand Up @@ -4538,7 +4585,13 @@ def get_part_range(s3_object: S3Object, part_number: PartNumber) -> ObjectRange:
ActualPartCount=len(s3_object.parts),
)

begin, part_length = part_data
# TODO: remove for next major version 5.0, compatibility for <= 4.5
if isinstance(part_data, tuple):
begin, part_length = part_data
else:
begin = part_data["_position"]
part_length = part_data["Size"]

end = begin + part_length - 1
return ObjectRange(
begin=begin,
Expand Down
25 changes: 14 additions & 11 deletions tests/aws/services/s3/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12298,7 +12298,7 @@ def test_complete_multipart_parts_checksum_composite(
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=key_name,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-object-attrs", object_attrs)

Expand All @@ -12311,7 +12311,7 @@ def test_complete_multipart_parts_checksum_composite(
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=dest_key,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-copy-object-attrs", object_attrs)

Expand Down Expand Up @@ -12595,7 +12595,7 @@ def test_complete_multipart_parts_checksum_full_object(
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=key_name,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-object-attrs", object_attrs)

Expand All @@ -12608,7 +12608,7 @@ def test_complete_multipart_parts_checksum_full_object(
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=dest_key,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-copy-object-attrs", object_attrs)

Expand Down Expand Up @@ -12877,7 +12877,7 @@ def test_complete_multipart_parts_checksum_default(self, s3_bucket, snapshot, aw
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=key_name,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-object-attrs", object_attrs)

Expand All @@ -12890,7 +12890,7 @@ def test_complete_multipart_parts_checksum_default(self, s3_bucket, snapshot, aw
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=dest_key,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-copy-object-attrs", object_attrs)

Expand Down Expand Up @@ -12959,7 +12959,7 @@ def test_complete_multipart_parts_checksum_full_object_default(
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=key_name,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-object-attrs", object_attrs)

Expand Down Expand Up @@ -13023,7 +13023,10 @@ def test_multipart_size_validation(self, aws_client, s3_bucket, snapshot):
snapshot.match("get-object-attrs", object_attrs)

@markers.aws.validated
def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_client):
@pytest.mark.parametrize("checksum_type", ("COMPOSITE", "FULL_OBJECT"))
def test_multipart_upload_part_copy_checksum(
self, s3_bucket, snapshot, aws_client, checksum_type
):
snapshot.add_transformer(
[
snapshot.transform.key_value("Bucket", reference_replacement=False),
Expand All @@ -13044,7 +13047,7 @@ def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_clie

key_name = "test-multipart-checksum"
response = aws_client.s3.create_multipart_upload(
Bucket=s3_bucket, Key=key_name, ChecksumAlgorithm="SHA256"
Bucket=s3_bucket, Key=key_name, ChecksumAlgorithm="CRC32C", ChecksumType=checksum_type
)
snapshot.match("create-mpu-checksum-sha256", response)
upload_id = response["UploadId"]
Expand Down Expand Up @@ -13075,7 +13078,7 @@ def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_clie
{
"ETag": upload_part_copy["CopyPartResult"]["ETag"],
"PartNumber": 1,
"ChecksumSHA256": upload_part_copy["CopyPartResult"]["ChecksumSHA256"],
"ChecksumCRC32C": upload_part_copy["CopyPartResult"]["ChecksumCRC32C"],
}
]
},
Expand All @@ -13096,7 +13099,7 @@ def test_multipart_upload_part_copy_checksum(self, s3_bucket, snapshot, aws_clie
object_attrs = aws_client.s3.get_object_attributes(
Bucket=s3_bucket,
Key=key_name,
ObjectAttributes=["Checksum", "ETag"],
ObjectAttributes=["Checksum", "ETag", "ObjectParts"],
)
snapshot.match("get-object-attrs", object_attrs)

Expand Down
Loading
Loading
0