8000 improve S3 vhost matching on any domain by bentsku · Pull Request #7870 · localstack/localstack · GitHub
[go: up one dir, main page]

Skip to content

improve S3 vhost matching on any domain #7870

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions localstack/services/s3/virtual_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from urllib.parse import urlsplit, urlunsplit

from localstack.config import LEGACY_S3_PROVIDER
from localstack import config
from localstack.constants import LOCALHOST_HOSTNAME
from localstack.http import Request, Response
from localstack.http.proxy import Proxy
Expand All @@ -13,13 +13,15 @@

LOG = logging.getLogger(__name__)

# virtual-host style: https://{bucket-name}.s3.{region}.localhost.localstack.cloud.com/{key-name}
VHOST_REGEX_PATTERN = f"<regex('.*'):bucket>.s3.<regex('({AWS_REGION_REGEX}\\.)?'):region>{LOCALHOST_HOSTNAME}<regex('(?::\\d+)?'):port>"
# virtual-host style: https://{bucket-name}.s3.{region?}.{domain}:{port?}/{key-name}
# ex: https://{bucket-name}.s3.{region}.localhost.localstack.cloud.com:4566/{key-name}
# ex: https://{bucket-name}.s3.{region}.amazonaws.com/{key-name}
VHOST_REGEX_PATTERN = f"<regex('.*'):bucket>.s3.<regex('({AWS_REGION_REGEX}\\.)?'):region><regex('.*'):domain><regex('(?::\\d+)?'):port>"

# path addressed request with the region in the hostname
# https://s3.{region}.localhost.localstack.cloud.com/{bucket-name}/{key-name}
PATH_WITH_REGION_PATTERN = (
f"s3.<regex('({AWS_REGION_REGEX}\\.)'):region>{LOCALHOST_HOSTNAME}<regex('(?::\\d+)?'):port>"
f"s3.<regex('({AWS_REGION_REGEX}\\.)'):region><regex('.*'):domain><regex('(?::\\d+)?'):port>"
)


Expand All @@ -31,7 +33,7 @@ class S3VirtualHostProxyHandler:

def __call__(self, request: Request, **kwargs) -> Response:
# TODO region pattern currently not working -> removing it from url
rewritten_url = self._rewrite_url(request.url, kwargs.get("bucket"), kwargs.get("region"))
rewritten_url = self._rewrite_url(url=request.url, **kwargs)

LOG.debug(f"Rewritten original host url: {request.url} to path-style url: {rewritten_url}")

Expand All @@ -41,7 +43,7 @@ def __call__(self, request: Request, **kwargs) -> Response:
copied_headers[S3_VIRTUAL_HOST_FORWARDED_HEADER] = request.headers["host"]
# do not preserve the Host when forwarding (to avoid an endless loop)
with Proxy(
forward_base_url=f"{forward_to_url.scheme}://{forward_to_url.netloc}",
forward_base_url=config.get_edge_url(),
preserve_host=False,
) as proxy:
forwarded = proxy.forward(
Expand All @@ -53,16 +55,18 @@ def __call__(self, request: Request, **kwargs) -> Response:
return forwarded

@staticmethod
def _rewrite_url(url: str, bucket: str, region: str) -> str:
def _rewrite_url(url: str, domain: str, bucket: str, region: str, port: str, **kwargs) -> str:
"""
Rewrites the url so that it can be forwarded to moto. Used for vhost-style and for any url that contains the region.

For vhost style: removes the bucket-name from the host-name and adds it as path
E.g. http://my-bucket.s3.localhost.localstack.cloud:4566 -> http://s3.localhost.localstack.cloud:4566/my-bucket
E.g. https://bucket.s3.localhost.localstack.cloud:4566 -> https://s3.localhost.localstack.cloud:4566/bucket
E.g. https://bucket.s3.amazonaws.com -> https://s3.localhost.localstack.cloud:4566/bucket

If the region is contained in the host-name we remove it (for now) as moto cannot handle the region correctly

:param url: the original url
:param domain: the domain name
:param bucket: the bucket name
:param region: the region name
:return: re-written url as string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: could add the port parameter to the docstring as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, thanks for catching that!

Expand All @@ -79,10 +83,17 @@ def _rewrite_url(url: str, bucket: str, region: str) -> str:
if region:
netloc = netloc.replace(f"{region}", "")

# the user can specify whatever domain & port he wants in the Host header
# we need to make sure we're redirecting the request to our edge URL, possibly s3.localhost.localstack.cloud
host = f"{domain}:{port}" if port else domain
edge_host = f"{LOCALHOST_HOSTNAME}:{config.get_edge_port_http()}"
if host != edge_host:
netloc = netloc.replace(host, edge_host)

return urlunsplit((splitted.scheme, netloc, path, splitted.query, splitted.fragment))


@hooks.on_infra_ready(should_load=not LEGACY_S3_PROVIDER)
@hooks.on_infra_ready(should_load=not config.LEGACY_S3_PROVIDER)
def register_virtual_host_routes():
"""
Registers the S3 virtual host handler into the edge router.
Expand Down
19 changes: 16 additions & 3 deletions tests/integration/s3/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6034,17 +6034,30 @@ def _get_static_hosting_transformers(snapshot):


class TestS3Routing:
def test_access_favicon_via_aws_endpoints(self, s3_bucket, s3_client):
@pytest.mark.only_localstack
@pytest.mark.parametrize(
"domain, use_virtual_address",
[
("s3.amazonaws.com", False),
("s3.amazonaws.com", True),
("s3.us-west-2.amazonaws.com", False),
("s3.us-west-2.amazonaws.com", True),
],
)
def test_access_favicon_via_aws_endpoints(
self, s3_bucket, s3_client, domain, use_virtual_address
):
"""Assert that /favicon.ico objects can be created/accessed/deleted using amazonaws host headers"""

s3_key = "favicon.ico"
content = b"test 123"
s3_client.put_object(Bucket=s3_bucket, Key=s3_key, Body=content)
s3_client.head_object(Bucket=s3_bucket, Key=s3_key)

url = f"{config.get_edge_url()}/{s3_key}"
path = s3_key if use_virtual_address else f"{s3_bucket}/{s3_key}"
url = f"{config.get_edge_url()}/{path}"
headers = aws_stack.mock_aws_request_headers("s3")
headers["host"] = f"{s3_bucket}.s3.amazonaws.com"
headers["host"] = f"{s3_bucket}.{domain}" if use_virtual_address else domain

# get object via *.amazonaws.com host header
result = requests.get(url, headers=headers)
Expand Down
0