8000 fix ListObjectVersions pagination comparison (#12270) · localstack/localstack@c3866e1 · GitHub
[go: up one dir, main page]

Skip to content

Commit c3866e1

Browse files
authored
fix ListObjectVersions pagination comparison (#12270)
1 parent 904f55a commit c3866e1

File tree

5 files changed

+75
-8
lines changed

5 files changed

+75
-8
lines changed

localstack-core/localstack/services/s3/provider.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@
282282
get_system_metadata_from_request,
283283
get_unique_key_id,
284284
is_bucket_name_valid,
285+
is_version_older_than_other,
285286
parse_copy_source_range_header,
286287
parse_post_object_tagging_xml,
287288
parse_range_header,
@@ -1856,9 +1857,9 @@ def list_object_versions(
18561857
continue
18571858

18581859
# it is possible that the version_id_marker related object has been deleted, in that case, start
1859-
# as soon as the next version id is smaller than the version id marker (meaning this version was
1860+
# as soon as the next version id is older than the version id marker (meaning this version was
18601861
# next after the now-deleted version)
1861-
elif version.version_id < version_id_marker:
1862+
elif is_version_older_than_other(version.version_id, version_id_marker):
18621863
version_key_marker_found = True
18631864

18641865
elif not version_key_marker_found:

localstack-core/localstack/services/s3/utils.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,11 +1039,14 @@ def parse_post_object_tagging_xml(tagging: str) -> Optional[dict]:
10391039

10401040

10411041
def generate_safe_version_id() -> str:
1042-
# the safe b64 encoding is inspired by the stdlib base64.urlsafe_b64encode
1043-
# and also using stdlib secrets.token_urlsafe, but with a different alphabet adapted for S3
1044-
# VersionId cannot have `-` in it, as it fails in XML
1045-
# we need an ever-increasing number, in order to properly implement pagination around ListObjectVersions
1046-
# by prepending the version-id with a global increasing number, we can lexicographically sort the versions
1042+
"""
1043+
Generate a safe version id for XML rendering.
1044+
VersionId cannot have `-` in it, as it fails in XML
1045+
Combine an ever-increasing part in the 8 first characters, and a random element.
1046+
We need the sequence part in order to properly implement pagination around ListObjectVersions.
1047+
By prefixing the version-id with a global increasing number, we can sort the versions
1048+
:return: an S3 VersionId containing a timestamp part in the first 8 characters
1049+
"""
10471050
tok = next(global_version_id_sequence()).to_bytes(length=6) + token_bytes(18)
10481051
return base64.b64encode(tok, altchars=b"._").rstrip(b"=").decode("ascii")
10491052

@@ -1053,3 +1056,11 @@ def global_version_id_sequence():
10531056
start = int(time.time() * 1000)
10541057
# itertools.count is thread safe over the GIL since its getAndIncrement operation is a single python bytecode op
10551058
return itertools.count(start)
1059+
1060+
1061+
def is_version_older_than_other(version_id: str, other: str):
1062+
"""
1063+
Compare the sequence part of a VersionId against the sequence part of a VersionIdMarker. Used for pagination
1064+
See `generate_safe_version_id`
1065+
"""
1066+
return base64.b64decode(version_id, altchars=b"._") < base64.b64decode(other, altchars=b"._")

tests/aws/services/s3/test_s3_list_operations.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,10 @@ def test_list_objects_versions_with_prefix(
492492

493493
@markers.aws.validated
494494
def test_list_objects_versions_with_prefix_only_and_pagination(
495-
self, s3_bucket, snapshot, aws_client, aws_http_client_factory
495+
self,
496+
s3_bucket,
497+
snapshot,
498+
aws_client,
496499
):
497500
snapshot.add_transformer(snapshot.transform.s3_api())
498501
aws_client.s3.put_bucket_versioning(
@@ -559,6 +562,36 @@ def test_list_objects_versions_with_prefix_only_and_pagination(
559562
)
560563
snapshot.match("list-object-version-prefix-page-2-after-delete", page_2_response)
561564

565+
@markers.aws.validated
566+
def test_list_objects_versions_with_prefix_only_and_pagination_many_versions(
567+
self,
568+
s3_bucket,
569+
aws_client,
570+
):
571+
aws_client.s3.put_bucket_versioning(
572+
Bucket=s3_bucket,
573+
VersioningConfiguration={"Status": "Enabled"},
574+
)
575+
# with our internal pagination system, we use characters from the alphabet (lower and upper) + digits
576+
# by creating more than 100 objects, we can make sure we circle all the way around our sequencing, and properly
577+
# paginate over all of them
578+
for _ in range(101):
579+
aws_client.s3.put_object(Bucket=s3_bucket, Key="prefixed_key")
580+
581+
paginator = aws_client.s3.get_paginator("list_object_versions")
582+
# even if the PageIterator looks like it should be an iterator, it's actually an iterable and needs to be
583+
# wrapped in `iter`
584+
page_iterator = iter(
585+
paginator.paginate(
586+
Bucket=s3_bucket, Prefix="prefix", PaginationConfig={"PageSize": 100}
587+
)
588+
)
589+
page_1 = next(page_iterator)
590+
assert len(page_1["Versions"]) == 100
591+
592+
page_2 = next(page_iterator)
593+
assert len(page_2["Versions"]) == 1
594+
562595
@markers.aws.validated
563596
def test_s3_list_object_versions_timestamp_precision(
564597
self, s3_bucket, aws_client, aws_http_client_factory

tests/aws/services/s3/test_s3_list_operations.validation.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
"tests/aws/services/s3/test_s3_list_operations.py::TestS3ListObjectVersions::test_list_objects_versions_with_prefix_only_and_pagination": {
2424
"last_validated_date": "2025-02-13T03:52:21+00:00"
2525
},
26+
"tests/aws/services/s3/test_s3_list_operations.py::TestS3ListObjectVersions::test_list_objects_versions_with_prefix_only_and_pagination_many_versions": {
27+
"last_validated_date": "2025-02-13T20:24:26+00:00"
28+
},
2629
"tests/aws/services/s3/test_s3_list_operations.py::TestS3ListObjectVersions::test_s3_list_object_versions_timestamp_precision": {
2730
"last_validated_date": "2025-01-21T18:15:06+00:00"
2831
},

tests/unit/services/s3/test_s3.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import datetime
22
import os
33
import re
4+
import string
45
import zoneinfo
56
from io import BytesIO
67
from urllib.parse import urlparse
@@ -707,3 +708,21 @@ def test_s3_context_manager(self, tmpdir):
707708
pass
708709

709710
temp_storage_backend.close()
711+
712+
713+
class TestS3VersionIdGenerator:
714+
def test_version_is_xml_safe(self):
715+
# assert than we don't have unsafe characters in 500 different versions id
716+
safe_characters = string.ascii_letters + string.digits + "._"
717+
assert all(
718+
all(char in safe_characters for char in s3_utils.generate_safe_version_id())
719+
for _ in range(500)
720+
)
721+
722+
def test_version_id_ordering(self):
723+
version_ids = [s3_utils.generate_safe_version_id() for _ in range(500)]
724+
725+
# assert that every version id can be ordered with each other
726+
for index, version_id in enumerate(version_ids[1:]):
727+
previous_version = version_ids[index]
728+
assert s3_utils.is_version_older_than_other(previous_version, version_id)

0 commit comments

Comments
 (0)
0