From f75b7c4796bfcad60c0bb418e441c00e41a1527e Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Tue, 14 Mar 2023 23:36:54 +0100 Subject: [PATCH 1/2] rework s3 virtual host addressing --- localstack/services/s3/virtual_host.py | 27 ++++++++++++++++++-------- tests/integration/s3/test_s3.py | 19 +++++++++++++++--- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/localstack/services/s3/virtual_host.py b/localstack/services/s3/virtual_host.py index 6afcfb35c5427..a2cc2312ea354 100644 --- a/localstack/services/s3/virtual_host.py +++ b/localstack/services/s3/virtual_host.py @@ -2,7 +2,7 @@ import logging from urllib.parse import urlsplit, urlunsplit -from localstack.config import LEGACY_S3_PROVIDER +from localstack import config from localstack.constants import LOCALHOST_HOSTNAME from localstack.http import Request, Response from localstack.http.proxy import Proxy @@ -13,13 +13,15 @@ LOG = logging.getLogger(__name__) -# virtual-host style: https://{bucket-name}.s3.{region}.localhost.localstack.cloud.com/{key-name} -VHOST_REGEX_PATTERN = f".s3.{LOCALHOST_HOSTNAME}" +# virtual-host style: https://{bucket-name}.s3.{region?}.{domain}:{port?}/{key-name} +# ex: https://{bucket-name}.s3.{region}.localhost.localstack.cloud.com:4566/{key-name} +# ex: https://{bucket-name}.s3.{region}.amazonaws.com/{key-name} +VHOST_REGEX_PATTERN = f".s3." # path addressed request with the region in the hostname # https://s3.{region}.localhost.localstack.cloud.com/{bucket-name}/{key-name} PATH_WITH_REGION_PATTERN = ( - f"s3.{LOCALHOST_HOSTNAME}" + f"s3." ) @@ -31,7 +33,7 @@ class S3VirtualHostProxyHandler: def __call__(self, request: Request, **kwargs) -> Response: # TODO region pattern currently not working -> removing it from url - rewritten_url = self._rewrite_url(request.url, kwargs.get("bucket"), kwargs.get("region")) + rewritten_url = self._rewrite_url(url=request.url, **kwargs) LOG.debug(f"Rewritten original host url: {request.url} to path-style url: {rewritten_url}") @@ -53,16 +55,18 @@ def __call__(self, request: Request, **kwargs) -> Response: return forwarded @staticmethod - def _rewrite_url(url: str, bucket: str, region: str) -> str: + def _rewrite_url(url: str, domain: str, bucket: str, region: str, port: str, **kwargs) -> str: """ Rewrites the url so that it can be forwarded to moto. Used for vhost-style and for any url that contains the region. For vhost style: removes the bucket-name from the host-name and adds it as path - E.g. http://my-bucket.s3.localhost.localstack.cloud:4566 -> http://s3.localhost.localstack.cloud:4566/my-bucket + E.g. https://bucket.s3.localhost.localstack.cloud:4566 -> https://s3.localhost.localstack.cloud:4566/bucket + E.g. https://bucket.s3.amazonaws.com -> https://s3.localhost.localstack.cloud:4566/bucket If the region is contained in the host-name we remove it (for now) as moto cannot handle the region correctly :param url: the original url + :param domain: the domain name :param bucket: the bucket name :param region: the region name :return: re-written url as string @@ -79,10 +83,17 @@ def _rewrite_url(url: str, bucket: str, region: str) -> str: if region: netloc = netloc.replace(f"{region}", "") + # the user can specify whatever domain & port he wants in the Host header + # we need to make sure we're redirecting the request to our edge URL, possibly s3.localhost.localstack.cloud + host = f"{domain}:{port}" if port else domain + edge_host = f"{LOCALHOST_HOSTNAME}:{config.get_edge_port_http()}" + if host != edge_host: + netloc = netloc.replace(host, edge_host) + return urlunsplit((splitted.scheme, netloc, path, splitted.query, splitted.fragment)) -@hooks.on_infra_ready(should_load=not LEGACY_S3_PROVIDER) +@hooks.on_infra_ready(should_load=not config.LEGACY_S3_PROVIDER) def register_virtual_host_routes(): """ Registers the S3 virtual host handler into the edge router. diff --git a/tests/integration/s3/test_s3.py b/tests/integration/s3/test_s3.py index 48fefde7c5aa1..a0bab130a7126 100644 --- a/tests/integration/s3/test_s3.py +++ b/tests/integration/s3/test_s3.py @@ -6034,7 +6034,19 @@ def _get_static_hosting_transformers(snapshot): class TestS3Routing: - def test_access_favicon_via_aws_endpoints(self, s3_bucket, s3_client): + @pytest.mark.only_localstack + @pytest.mark.parametrize( + "domain, use_virtual_address", + [ + ("s3.amazonaws.com", False), + ("s3.amazonaws.com", True), + ("s3.us-west-2.amazonaws.com", False), + ("s3.us-west-2.amazonaws.com", True), + ], + ) + def test_access_favicon_via_aws_endpoints( + self, s3_bucket, s3_client, domain, use_virtual_address + ): """Assert that /favicon.ico objects can be created/accessed/deleted using amazonaws host headers""" s3_key = "favicon.ico" @@ -6042,9 +6054,10 @@ def test_access_favicon_via_aws_endpoints(self, s3_bucket, s3_client): s3_client.put_object(Bucket=s3_bucket, Key=s3_key, Body=content) s3_client.head_object(Bucket=s3_bucket, Key=s3_key) - url = f"{config.get_edge_url()}/{s3_key}" + path = s3_key if use_virtual_address else f"{s3_bucket}/{s3_key}" + url = f"{config.get_edge_url()}/{path}" headers = aws_stack.mock_aws_request_headers("s3") - headers["host"] = f"{s3_bucket}.s3.amazonaws.com" + headers["host"] = f"{s3_bucket}.{domain}" if use_virtual_address else domain # get object via *.amazonaws.com host header result = requests.get(url, headers=headers) From 97c345b14068af8625273cbb12081de3dcc83994 Mon Sep 17 00:00:00 2001 From: Benjamin Simon Date: Wed, 15 Mar 2023 02:43:54 +0100 Subject: [PATCH 2/2] use config.get_edge_url() as forward_base_url --- localstack/services/s3/virtual_host.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localstack/services/s3/virtual_host.py b/localstack/services/s3/virtual_host.py index a2cc2312ea354..9bc2cbc90f9d5 100644 --- a/localstack/services/s3/virtual_host.py +++ b/localstack/services/s3/virtual_host.py @@ -43,7 +43,7 @@ def __call__(self, request: Request, **kwargs) -> Response: copied_headers[S3_VIRTUAL_HOST_FORWARDED_HEADER] = request.headers["host"] # do not preserve the Host when forwarding (to avoid an endless loop) with Proxy( - forward_base_url=f"{forward_to_url.scheme}://{forward_to_url.netloc}", + forward_base_url=config.get_edge_url(), preserve_host=False, ) as proxy: forwarded = proxy.forward(