8000 fix S3 handling of special character and trailing slash (#9143) · codeperl/localstack@6a192a7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6a192a7

Browse files
authored
fix S3 handling of special character and trailing slash (localstack#9143)
1 parent 485f9f0 commit 6a192a7

File tree

5 files changed

+194
-17
lines changed

5 files changed

+194
-17
lines changed

localstack/aws/protocol/parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from email.utils import parsedate_to_datetime
7171
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
7272
from typing.io import IO
73+
from urllib.parse import unquote
7374
from xml.etree import ElementTree as ETree
7475

7576
import cbor2
@@ -1067,14 +1068,14 @@ def _parse_shape(
10671068
Special handling of parsing the shape for s3 object-names (=key):
10681069
trailing '/' are valid and need to be preserved, however, the url-matcher removes it from the key
10691070
we check the request.url to verify the name.
1070-
We might want to encode the key to take into account the ones with special characters.
1071+
We decode the key for the comparison with `base_url` in case of special characters.
10711072
"""
10721073
if (
10731074
shape is not None
10741075
and uri_params is not None
10751076
and shape.serialization.get("location") == "uri"
10761077
and shape.serialization.get("name") == "Key"
1077-
and request.base_url.endswith(f"{uri_params['Key']}/")
1078+
and request.base_url.endswith(f"{unquote(uri_para 8000 ms['Key'])}/")
10781079
):
10791080
uri_params = dict(uri_params)
10801081
uri_params["Key"] = uri_params["Key"] + "/"

localstack/services/moto.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from localstack.aws.skeleton import DispatchTable
3131
from localstack.constants import DEFAULT_AWS_ACCOUNT_ID
3232
from localstack.http import Response
33+
from localstack.http.request import get_full_raw_path, get_raw_current_url
3334

3435
MotoDispatcher = Callable[[HttpRequest, str, dict], Response]
3536

@@ -110,9 +111,12 @@ def dispatch_to_moto(context: RequestContext) -> Response:
110111

111112
# this is where we skip the HTTP roundtrip between the moto server and the boto client
112113
dispatch = get_dispatcher(service.service_name, request.path)
113-
114114
try:
115-
response = dispatch(request, request.url, request.headers)
115+
# we use the full_raw_url as moto might do some path decoding (in S3 for example)
116+
raw_url = get_raw_current_url(
117+
request.scheme, request.host, request.root_path, get_full_raw_path(request)
118+
)
119+
response = dispatch(request, raw_url, request.headers)
116120
if not response:
117121
# some operations are only partially implemented by moto
118122
# e.g. the request will be resolved, but then the request method is not handled

localstack/services/s3/v3/provider.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,6 @@ def list_objects(
12911291
) -> ListObjectsOutput:
12921292
store, s3_bucket = self._get_cross_account_bucket(context, bucket)
12931293

1294-
# TODO: URL encode keys (is it done already in serializer?)
12951294
common_prefixes = set()
12961295
count = 0
12971296
is_truncated = False
@@ -1334,7 +1333,7 @@ def list_objects(
13341333

13351334
# TODO: add RestoreStatus if present
13361335
object_data = Object(
1337-
Key=s3_object.key,
1336+
Key=key,
13381337
ETag=s3_object.quoted_etag,
13391338
Owner=s3_bucket.owner, # TODO: verify reality
13401339
Size=s3_object.size,
@@ -1393,7 +1392,6 @@ def list_objects_v2(
13931392
if continuation_token and continuation_token == "":
13941393
raise InvalidArgument("The continuation token provided is incorrect")
13951394

1396-
# TODO: URL encode keys (is it done already in serializer?)
13971395
common_prefixes = set()
13981396
count = 0
13991397
is_truncated = False
@@ -1447,7 +1445,7 @@ def list_objects_v2(
14471445

14481446
# TODO: add RestoreStatus if present
14491447
object_data = Object(
1450-
Key=s3_object.key,
1448+
Key=key,
14511449
ETag=s3_object.quoted_etag,
14521450
Size=s3_object.size,
14531451
LastModified=s3_object.last_modified,
@@ -1506,7 +1504,6 @@ def list_object_versions(
15061504
) -> ListObjectVersionsOutput:
15071505
store, s3_bucket = self._get_cross_account_bucket(context, bucket)
15081506

1509-
# TODO: URL encode keys (is it done already in serializer?)
15101507
common_prefixes = set()
15111508
count = 0
15121509
is_truncated = False
@@ -1557,7 +1554,7 @@ def list_object_versions(
15571554

15581555
if isinstance(version, S3DeleteMarker):
15591556
delete_marker = DeleteMarkerEntry(
1560-
Key=version.key,
1557+
Key=key,
15611558
Owner=s3_bucket.owner,
15621559
VersionId=version.version_id,
15631560
IsLatest=version.is_current,
@@ -1568,7 +1565,7 @@ def list_object_versions(
15681565

15691566
# TODO: add RestoreStatus if present
15701567
object_version = ObjectVersion(
1571-
Key=version.key,
1568+
Key=key,
15721569
ETag=version.quoted_etag,
15731570
Owner=s3_bucket.owner, # TODO: verify reality
15741571
Size=version.size,

tests/aws/services/s3/test_s3.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -531,20 +531,47 @@ def test_upload_file_multipart(self, s3_bucket, tmpdir, snapshot, aws_client):
531531
condition=lambda: not is_native_provider(),
532532
paths=["$..ServerSideEncryption"],
533533
)
534-
@pytest.mark.parametrize("key", ["file%2Fname", "test@key/"])
534+
@pytest.mark.parametrize("key", ["file%2Fname", "test@key/", "test%123", "test%percent"])
535535
def test_put_get_object_special_character(self, s3_bucket, aws_client, snapshot, key):
536536
snapshot.add_transformer(snapshot.transform.s3_api())
537537
resp = aws_client.s3.put_object(Bucket=s3_bucket, Key=key, Body=b"test")
538538
snapshot.match("put-object-special-char", resp)
539539
resp = aws_client.s3.list_objects_v2(Bucket=s3_bucket)
540-
# FIXME: Moto will by default clean up key name, but they will return the cleaned up key name in ListObject...
541-
if "%" not in key or is_aws_cloud():
542-
snapshot.match("list-object-special-char", resp)
540+
snapshot.match("list-object-special-char", resp)
543541
resp = aws_client.s3.get_object(Bucket=s3_bucket, Key=key)
544542
snapshot.match("get-object-special-char", resp)
545543
resp = aws_client.s3.delete_object(Bucket=s3_bucket, Key=key)
546544
snapshot.match("del-object-special-char", resp)
547545

546+
@markers.aws.validated
547+
def test_url_encoded_key(self, s3_bucket, aws_client, snapshot):
548+
"""Boto adds a trailing slash always?"""
549+
snapshot.add_transformer(snapshot.transform.key_value("Name"))
550+
key = "test@key/"
551+
aws_client.s3.put_object(Bucket=s3_bucket, Key=key, Body=b"test-non-encoded")
552+
encoded_key = "test%40key/"
553+
aws_client.s3.put_object(Bucket=s3_bucket, Key=encoded_key, Body=b"test-encoded")
554+
encoded_key_no_trailing = "test%40key"
555+
aws_client.s3.put_object(
556+
Bucket=s3_bucket, Key=encoded_key_no_trailing, Body=b"test-encoded-no-trailing"
557+
)
558+
# assert that one did not override the over, and that both key are different
559+
assert (
560+
aws_client.s3.get_object(Bucket=s3_bucket, Key=key)["Body"].read()
561+
== b"test-non-encoded"
562+
)
563+
assert (
564+
aws_client.s3.get_object(Bucket=s3_bucket, Key=encoded_key)["Body"].read()
565+
== b"test-encoded"
566+
)
567+
assert (
568+
aws_client.s3.get_object(Bucket=s3_bucket, Key=encoded_key_no_trailing)["Body"].read()
569+
== b"test-encoded-no-trailing"
570+
)
571+
572+
resp = aws_client.s3.list_objects_v2(Bucket=s3_bucket)
573+
snapshot.match("list-object-encoded-char", resp)
574+
548575
@markers.aws.validated
549576
@pytest.mark.parametrize("delimiter", ["/", "%2F"])
550577
def test_list_objects_with_prefix(self, s3_create_bucket, delimiter, snapshot, aws_client):

tests/aws/services/s3/test_s3.snapshot.json

Lines changed: 150 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7404,7 +7404,7 @@
74047404
}
74057405
},
74067406
"tests/aws/services/s3/test_s3.py::TestS3::test_put_get_object_special_character[file%2Fname]": {
7407-
"recorded-date": "03-08-2023, 04:13:34",
7407+
"recorded-date": "13-09-2023, 22:47:29",
74087408
"recorded-content": {
74097409
"put-object-special-char": {
74107410
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
@@ -7458,7 +7458,7 @@
74587458
}
74597459
},
74607460
"tests/aws/services/s3/test_s3.py::TestS3::test_put_get_object_special_character[test@key/]": {
7461-
"recorded-date": "03-08-2023, 04:13:37",
7461+
"recorded-date": "13-09-2023, 22:47:32",
74627462
"recorded-content": {
74637463
"put-object-special-char": {
74647464
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
@@ -7511,6 +7511,114 @@
75117511
}
75127512
}
75137513
},
7514+
"tests/aws/services/s3/test_s3.py::TestS3::test_put_get_object_special_character[test%123]": {
7515+
"recorded-date": "13-09-2023, 22:47:34",
7516+
"recorded-content": {
7517+
"put-object-special-char": {
7518+
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
7519+
"ServerSideEncryption": "AES256",
7520+
"ResponseMetadata": {
7521+
"HTTPHeaders": {},
7522+
"HTTPStatusCode": 200
7523+
}
7524+
},
7525+
"list-object-special-char": {
7526+
"Contents": [
7527+
{
7528+
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
7529+
"Key": "test%123",
7530+
"LastModified": "datetime",
7531+
"Size": 4,
7532+
"StorageClass": "STANDARD"
7533+
}
7534+
],
7535+
"EncodingType": "url",
7536+
"IsTruncated": false,
7537+
"KeyCount": 1,
7538+
"MaxKeys": 1000,
7539+
"Name": "<bucket-name:1>",
7540+
"Prefix": "",
7541+
"ResponseMetadata": {
7542+
"HTTPHeaders": {},
7543+
"HTTPStatusCode": 200
7544+
}
7545+
},
7546+
"get-object-special-char": {
7547+
"AcceptRanges": "bytes",
7548+
"Body": "test",
7549+
"ContentLength": 4,
7550+
"ContentType": "binary/octet-stream",
7551+
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
7552+
"LastModified": "datetime",
7553+
"Metadata": {},
7554+
"ServerSideEncryption": "AES256",
7555+
"ResponseMetadata": {
7556+
"HTTPHeaders": {},
7557+
"HTTPStatusCode": 200
7558+
}
7559+
},
7560+
"del-object-special-char": {
7561+
"ResponseMetadata": {
7562+
"HTTPHeaders": {},
7563+
"HTTPStatusCode": 204
7564+
}
7565+
}
7566+
}
7567+
},
7568+
"tests/aws/services/s3/test_s3.py::TestS3::test_put_get_object_special_character[test%percent]": {
7569+
"recorded-date": "13-09-2023, 22:47:36",
7570+
"recorded-content": {
7571+
"put-object-special-char": {
7572+
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
7573+
"ServerSideEncryption": "AES256",
7574+
"ResponseMetadata": {
7575+
"HTTPHeaders": {},
7576+
"HTTPStatusCode": 200
7577+
}
7578+
},
7579+
"list-object-special-char": {
7580+
"Contents": [
7581+
{
7582+
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
7583+
"Key": "test%percent",
7584+
"LastModified": "datetime",
7585+
"Size": 4,
7586+
"StorageClass": "STANDARD"
7587+
}
7588+
],
7589+
"EncodingType": "url",
7590+
"IsTruncated": false,
7591+
"KeyCount": 1,
7592+
"MaxKeys": 1000,
7593+
"Name": "<bucket-name:1>",
7594+
"Prefix": "",
7595+
"ResponseMetadata": {
7596+
"HTTPHeaders": {},
7597+
"HTTPStatusCode": 200
7598+
}
7599+
},
7600+
"get-object-special-char": {
7601+
"AcceptRanges": "bytes",
7602+
"Body": "test",
7603+
"ContentLength": 4,
7604+
"ContentType": "binary/octet-stream",
7605+
"ETag": "\"098f6bcd4621d373cade4e832627b4f6\"",
7606+
"LastModified": "datetime",
7607+
"Metadata": {},
7608+
"ServerSideEncryption": "AES256",
7609+
"ResponseMetadata": {
7610+
"HTTPHeaders": {},
7611+
"HTTPStatusCode": 200
7612+
}
7613+
},
7614+
"del-object-special-char": {
7615+
"ResponseMetadata": {
7616+
"HTTPHeaders": {},
7617+
"HTTPStatusCode": 204
7618+
}
7619+
}
7620+
}
7621+
},
75147622
"tests/aws/services/s3/test_s3.py::TestS3::test_s3_get_object_headers": {
75157623
"recorded-date": "03-08-2023, 04:25:53",
75167624
"recorded-content": {
@@ -10315,5 +10423,45 @@
1031510423
}
1031610424
}
1031710425
}
10426+
},
10427+
"tests/aws/services/s3/test_s3.py::TestS3::test_url_encoded_key": {
10428+
"recorded-date": "14-09-2023, 00:01:41",
10429+
"recorded-content": {
10430+
"list-object-encoded-char": {
10431+
"Contents": [
10432+
{
10433+
"ETag": "\"03dc4443b5f395b54d011fdb7d9e0ae1\"",
10434+
"Key": "test%40key",
10435+
"LastModified": "datetime",
10436+
"Size": 24,
10437+
"StorageClass": "STANDARD"
10438+
},
10439+
{
10440+
"ETag": "\"51a6065890415b4b299dec1aa33d712c\"",
10441+
"Key": "test%40key/",
10442+
"LastModified": "datetime",
10443+
"Size": 12,
10444+
"StorageClass": "STANDARD"
10445+
},
10446+
{
10447+
"ETag": "\"b792145c4a8e8d9ac95d3c2f9f0ac42d\"",
10448+
"Key": "test@key/",
10449+
"LastModified": "datetime",
10450+
"Size": 16,
10451+
"StorageClass": "STANDARD"
10452+
}
10453+
],
10454+
"EncodingType": "url",
10455+
"IsTruncated": false,
10456+
"KeyCount": 3,
10457+
"MaxKeys": 1000,
10458+
"Name": "<name:1>",
10459+
"Prefix": "",
10460+
"ResponseMetadata": {
10461+
"HTTPHeaders": {},
10462+
"HTTPStatusCode": 200
10463+
}
10464+
}
10465+
}
1031810466
}
1031910467
}

0 commit comments

Comments
 (0)
0