10000 Fix chunked encoding in S3 responses (#2537) · localstack/localstack@6a380e5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6a380e5

Browse files
authored
Fix chunked encoding in S3 responses (#2537)
1 parent 59bf9ec commit 6a380e5

File tree

8 files changed

+130
-55
lines changed

8 files changed

+130
-55
lines changed

localstack/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@
128128
# path prefix for windows volume mounting
129129
WINDOWS_DOCKER_MOUNT_PREFIX = os.environ.get('WINDOWS_DOCKER_MOUNT_PREFIX', '/host_mnt')
130130

131+
# whether to use a proxy server with HTTP/2 support. TODO: remove in the future
132+
USE_HTTP2_SERVER = os.environ.get('USE_HTTP2_SERVER', '').strip() not in FALSE_STRINGS
133+
131134

132135
def has_docker():
133136
try:
@@ -169,7 +172,7 @@ def is_linux():
169172
'START_WEB', 'DOCKER_BRIDGE_IP', 'DEFAULT_REGION', 'LAMBDA_JAVA_OPTS', 'LOCALSTACK_API_KEY',
170173
'LAMBDA_CONTAINER_REGISTRY', 'TEST_AWS_ACCOUNT_ID', 'DISABLE_EVENTS', 'EDGE_PORT',
171174
'EDGE_PORT_HTTP', 'SKIP_INFRA_DOWNLOADS', 'STEPFUNCTIONS_LAMBDA_ENDPOINT',
172-
'WINDOWS_DOCKER_MOUNT_PREFIX']
175+
'WINDOWS_DOCKER_MOUNT_PREFIX', 'USE_HTTP2_SERVER']
173176

174177
for key, value in six.iteritems(DEFAULT_SERVICE_PORTS):
175178
clean_key = key.upper().replace('-', '_')

localstack/services/awslambda/lambda_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
from localstack.services.awslambda.multivalue_transformer import multi_value_dict_for_list
4242
from localstack.utils.common import (to_str, load_file, save_file, TMP_FILES, ensure_readable,
4343
mkdir, unzip, is_zip_file, zip_contains_jar_entries, run, short_uid,
44-
timestamp_millis, parse_chunked_d 23DA ata, now_utc, safe_requests, FuncThread,
45-
isoformat_milliseconds)
44+
timestamp_millis, now_utc, safe_requests, FuncThread, isoformat_milliseconds)
4645
from localstack.utils.analytics import event_publisher
46+
from localstack.utils.http_utils import parse_chunked_data
4747
from localstack.utils.aws.aws_models import LambdaFunction
4848
from localstack.utils.cloudwatch.cloudwatch_util import cloudwatched
4949

localstack/services/generic_proxy.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from localstack.constants import ENV_INTERNAL_TEST_RUN, APPLICATION_JSON
2323
from localstack.utils.server import http2_server
2424
from localstack.utils.common import FuncThread, generate_ssl_cert, to_bytes, json_safe, TMP_THREADS
25+
from localstack.utils.http_utils import uses_chunked_encoding, create_chunked_data
2526
from localstack.utils.aws.aws_responses import LambdaResponse
2627

2728
# set up logger
@@ -31,7 +32,7 @@
3132
SERVER_CERT_PEM_FILE = 'server.test.pem'
3233

3334
# whether to use a proxy server with HTTP/2 support
34-
USE_HTTP2_SERVER = True
35+
USE_HTTP2_SERVER = config.USE_HTTP2_SERVER
3536

3637
# CORS constants
3738
CORS_ALLOWED_HEADERS = ['authorization', 'content-type', 'content-md5', 'cache-control',
@@ -218,22 +219,26 @@ def forward(self, method):
218219
# copy headers and return response
219220
self.send_response(response.status_code)
220221

222+
# set content for chunked encoding
223+
is_chunked = uses_chunked_encoding(response)
224+
if is_chunked:
225+
response._content = create_chunked_data(response._content)
226+
227+
# send headers
221228
content_length_sent = False
222229
for header_key, header_value in iteritems(response.headers):
223230
# filter out certain headers that we don't want to transmit
224231
if header_key.lower() not in ('transfer-encoding', 'date', 'server'):
225232
self.send_header(header_key, header_value)
226233
content_length_sent = content_length_sent or header_key.lower() == 'content-length'
227234

228-
if not content_length_sent:
235+
# fix content-type header if needed
236+
if not content_length_sent and not is_chunked:
229237
self.send_header('Content-Length', '%s' % len(response.content) if response.content else 0)
230238

231239
if isinstance(response, LambdaResponse):
232240
self.send_multi_value_headers(response.multi_value_headers)
233241

234-
# allow pre-flight CORS headers by default
235-
self._send_cors_headers(response)
236-
237242
self.end_headers()
238243
if response.content and len(response.content):
239244
self.wfile.write(to_bytes(response.content))
@@ -263,20 +268,6 @@ def forward(self, method):
263268
except Exception as e:
264269
LOG.warning('Unable to flush write file: %s' % e)
265270

266-
def _send_cors_headers(self, response=None):
267-
# Note: Use "response is not None" here instead of "not response"!
268-
headers = response is not None and response.headers or {}
269-
if 'Access-Control-Allow-Origin' not in headers:
270-
self.send_header('Access-Control-Allow-Origin', '*')
271-
if 'Access-Control-Allow-Methods' not in headers:
272-
self.send_header('Access-Control-Allow-Methods', ','.join(CORS_ALLOWED_METHODS))
273-
if 'Access-Control-Allow-Headers' not in headers:
274-
requested_headers = self.headers.get('Access-Control-Request-Headers', '')
275-
requested_headers = re.split(r'[,\s]+', requested_headers) + CORS_ALLOWED_HEADERS
276-
self.send_header('Access-Control-Allow-Headers', ','.join([h for h in requested_headers if h]))
277-
if 'Access-Control-Expose-Headers' not in headers:
278-
self.send_header('Access-Control-Expose-Headers', ','.join(CORS_EXPOSE_HEADERS))
279-
280271
def _listeners(self):
281272
return self.DEFAULT_LISTENERS + [self.proxy.update_listener]
282273

@@ -289,6 +280,21 @@ def send_multi_value_headers(self, multi_value_headers):
289280
self.send_header(key, value)
290281

291282

283+
def append_cors_headers(response=None):
284+
# Note: Use "response is not None" here instead of "not response"!
285+
headers = response is not None and response.headers or {}
286+
if 'Access-Control-Allow-Origin' not in headers:
287+
headers['Access-Control-Allow-Origin'] = '*'
288+
if 'Access-Control-Allow-Methods' not in headers:
289+
headers['Access-Control-Allow-Methods'] = ','.join(CORS_ALLOWED_METHODS)
290+
if 'Access-Control-Allow-Headers' not in headers:
291+
requested_headers = headers.get('Access-Control-Request-Headers', '')
292+
requested_headers = re.split(r'[,\s]+', requested_headers) + CORS_ALLOWED_HEADERS
293+
headers['Access-Control-Allow-Headers'] = ','.join([h for h in requested_headers if h])
294+
if 'Access-Control-Expose-Headers' not in headers:
295+
headers['Access-Control-Expose-Headers'] = ','.join(CORS_EXPOSE_HEADERS)
296+
297+
292298
def modify_and_forward(method=None, path=None, data_bytes=None, headers=None, forward_base_url=None,
293299
listeners=None, request_handler=None, client_address=None, server_address=None):
294300
listeners = GenericProxyHandler.DEFAULT_LISTENERS + (listeners or [])
@@ -388,6 +394,9 @@ def is_full_url(url):
388394
if isinstance(updated_response, Response):
389395
response = updated_response
390396

397+
# allow pre-flight CORS headers by default
398+
append_cors_headers(response)
399+
391400
return response
392401

393402

localstack/services/s3/s3_listener.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@
1919
from localstack import config, constants
2020
from localstack.config import HOSTNAME, HOSTNAME_EXTERNAL
2121
from localstack.utils.aws import aws_stack
22+
from localstack.services.s3 import multipart_content
2223
from localstack.utils.common import (
2324
short_uid, timestamp_millis, to_str, to_bytes, clone, md5, get_service_protocol
2425
)
2526
from localstack.utils.analytics import event_publisher
26-
from localstack.utils.aws.aws_responses import requests_response
27+
from localstack.utils.http_utils import uses_chunked_encoding
2728
from localstack.utils.persistence import PersistingProxyListener
28-
from localstack.services.s3 import multipart_content
29+
from localstack.utils.aws.aws_responses import requests_response
2930

3031
CONTENT_SHA256_HEADER = 'x-amz-content-sha256'
3132
STREAMING_HMAC_PAYLOAD = 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD'
@@ -357,7 +358,8 @@ def add_reponse_metadata_headers(response):
357358
if response.headers.get('cache-control') is None:
358359
response.headers['cache-control'] = 'no-cache'
359360
if response.headers.get('content-encoding') is None:
360-
response.headers['content-encoding'] = 'identity'
361+
if not uses_chunked_encoding(response):
362+
response.headers['content-encoding'] = 'identity'
361363

362364

363365
def append_last_modified_headers(response, content=None):
@@ -395,7 +397,7 @@ def append_list_objects_marker(method, path, data, response):
395397
query_map = urlparse.parse_qs(parsed.query)
396398
insert = '<Marker>%s</Marker>' % query_map.get('marker')[0]
397399
response._content = content.replace('</ListBucketResult>', '%s</ListBucketResult>' % insert)
398-
response.headers['Content-Length'] = str(len(response._content))
400+
response.headers.pop('Content-Length', None)
399401

400402

401403
def append_metadata_headers(method, query_map, headers):
@@ -482,6 +484,16 @@ def fix_creation_date(method, path, response):
482484
response._content = re.sub(r'([0-9])</CreationDate>', r'\1Z</CreationDate>', to_str(response._content))
483485

484486

487+
def convert_to_chunked_encoding(method, path, response):
488+
if method != 'GET' or path != '/':
489+
return
490+
if response.headers.get('Transfer-Encoding', '').lower() == 'chunked':
491+
return
492+
response.headers['Transfer-Encoding'] = 'chunked'
493+
response.headers.pop('Content-Encoding', None)
494+
response.headers.pop('Content-Length', None)
495+
496+
485497
def fix_etag_for_multipart(data, headers, response):
486498
# Fix for https://github.com/localstack/localstack/issues/1978
487499
if headers.get(CONTENT_SHA256_HEADER) == STREAMING_HMAC_PAYLOAD:
@@ -952,7 +964,7 @@ def forward_request(self, method, path, data, headers):
952964
# https://github.com/scality/S3/issues/237
953965
if headers.get(CONTENT_SHA256_HEADER) == STREAMING_HMAC_PAYLOAD:
954966
modified_data = strip_chunk_signatures(modified_data or data)
955-
headers['content-length'] = headers.get('x-amz-decoded-content-length')
967+
headers['Content-Length'] = headers.get('x-amz-decoded-content-length')
956968

957969
# POST requests to S3 may include a "${filename}" placeholder in the
958970
# key, which should be replaced with an actual file name before storing.
@@ -1126,7 +1138,7 @@ def return_response(self, method, path, data, headers, response, request_handler
11261138
error_object = s3_client.get_object(Bucket=bucket_name, Key=error_doc_key)
11271139
response.status_code = 200
11281140
response._content = error_object['Body'].read()
1129-
response.headers['content-length'] = len(response._content)
1141+
response.headers['Content-Length'] = str(len(response._content))
11301142
except ClientError:
11311143
# Pass on the 404 as usual
11321144
pass
@@ -1209,7 +1221,10 @@ def return_response(self, method, path, data, headers, response, request_handler
12091221
reset_content_length = True
12101222

12111223
if reset_content_length:
1212-
response.headers['content-length'] = len(response._content)
1224+
response.headers['Content-Length'] = str(len(response._content))
1225+
1226+
# convert to chunked encoding, for compatibility with certain SDKs (e.g., AWS PHP SDK)
1227+
convert_to_chunked_encoding(method, path, response)
12131228

12141229

12151230
# instantiate listener

localstack/utils/common.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -655,22 +655,6 @@ def download(url, path, verify_ssl=True):
655655
s.close()
656656

657657

658-
def parse_chunked_data(data):
659-
""" Parse the body of an HTTP message transmitted with chunked transfer encoding. """
660-
data = (data or '').strip()
661-
chunks = []
662-
while data:
663-
length = re.match(r'^([0-9a-zA-Z]+)\r\n.*', data)
664-
if not length:
665-
break
666-
length = length.group(1).lower()
667-
length = i 10000 nt(length, 16)
668-
data = data.partition('\r\n')[2]
669-
chunks.append(data[:length])
670-
data = data[length:].strip()
671-
return ''.join(chunks)
672-
673-
674658
def first_char_to_lower(s):
675659
return '%s%s' % (s[0].lower(), s[1:])
676660

localstack/utils/http_utils.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import re
2+
3+
4+
def uses_chunked_encoding(response):
5+
return response.headers.get('Transfer-Encoding', '').lower() == 'chunked'
6+
7+
8+
def parse_chunked_data(data):
9+
""" Parse the body of an HTTP message transmitted with chunked transfer encoding. """
10+
data = (data or '').strip()
11+
chunks = []
12+
while data:
13+
length = re.match(r'^([0-9a-zA-Z]+)\r\n.*', data)
14+
if not length:
15+
break
16+
length = length.group(1).lower()
17+
length = int(length, 16)
18+
data = data.partition('\r\n')[2]
19+
chunks.append(data[:length])
20+
data = data[length:].strip()
21+
return ''.join(chunks)
22+
23+
24+
def create_chunked_data(data, chunk_size=80):
25+
dl = len(data)
26+
ret = ''
27+
for i in range(dl // chunk_size):
28+
ret += '%s\r\n' % (hex(chunk_size)[2:])
29+
ret += '%s\r\n\r\n' % (data[i * chunk_size: (i + 1) * chunk_size])
30+
31+
if len(data) % chunk_size != 0:
32+
ret += '%s\r\n' % (hex(len(data) % chunk_size)[2:])
33+
ret += '%s\r\n' % (data[-(len(data) % chunk_size):])
34+
35+
ret += '0\r\n\r\n'
36+
return ret

localstack/utils/server/http2_server.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# import types
1+
import os
2+
import ssl
23
import asyncio
34
import logging
45
import traceback
@@ -10,7 +11,8 @@
1011
from hypercorn.config import Config
1112
from hypercorn.asyncio import serve
1213
from localstack import config
13-
from localstack.utils.common import TMP_THREADS, FuncThread
14+
from localstack.utils.common import TMP_THREADS, FuncThread, load_file
15+
from localstack.utils.http_utils import uses_chunked_encoding
1416

1517
LOG = logging.getLogger(__name__)
1618

@@ -73,13 +75,23 @@ async def index(path=None):
7375
response.status_code = 500
7476
return response
7577
if result is not None:
76-
response = await make_response(result.content or '')
77-
multi_value_headers = getattr(result, 'multi_value_headers', {})
78+
is_chunked = uses_chunked_encoding(result)
79+
result_content = result.content or ''
80+
response = await make_response(result_content)
81+
response.status_code = result.status_code
82+
if is_chunked:
83+
response.headers.pop('Content-Length', None)
84+
result.headers.pop('Server', None)
85+
result.headers.pop('Date', None)
7886
response.headers.update(dict(result.headers))
87+
# set multi-value headers
88+
multi_value_headers = getattr(result, 'multi_value_headers', {})
7989
for key, values in multi_value_headers.items():
8090
for value in values:
8191
response.headers.add_header(key, value)
82-
response.status_code = result.status_code
92+
# set default headers, if required
93+
if 'Content-Length' not in response.headers and not is_chunked:
94+
response.headers['Content-Length'] = str(len(result_content) if result_content else 0)
8395
return response
8496

8597
def run_app_sync(*args, loop=None, shutdown_event=None):
@@ -99,7 +111,16 @@ def run_app_sync(*args, loop=None, shutdown_event=None):
99111
if shutdown_event:
100112
run_kwargs['shutdown_trigger'] = shutdown_event.wait
101113
try:
102-
return loop.run_until_complete(serve(app, config, **run_kwargs))
114+
try:
115+
return loop.run_until_complete(serve(app, config, **run_kwargs))
116+
except ssl.SSLError:
117+
c_exists = os.path.exists(cert_file_name)
118+
k_exists = os.path.exists(key_file_name)
119+
c_size = len(load_file(cert_file_name)) if c_exists else 0
120+
k_size = len(load_file(key_file_name)) if k_exists else 0
121+
LOG.warning('Unable to create SSL context. Cert files exist: %s %s (%sB), %s %s (%sB)' %
122+
(cert_file_name, c_exists, c_size, key_file_name, k_exists, k_size))
123+
raise
103124
finally:
104125
try:
105126
_cancel_all_tasks(loop)

tests/unit/test_misc.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
import datetime
44
import unittest
55
from requests.models import Response
6+
from localstack import config
7+
from localstack.services import infra
68
from localstack.utils.aws import aws_stack
79
from localstack.utils.bootstrap import PortMappings
810
from localstack.services.generic_proxy import GenericProxy, ProxyListener
9-
from localstack.utils.common import (
10-
download, parallelize, TMP_FILES, load_file, parse_chunked_data, json_safe, now_utc)
11-
from localstack.services import infra
12-
from localstack import config
11+
from localstack.utils.common import download, parallelize, TMP_FILES, load_file, json_safe, now_utc
12+
from localstack.utils.http_utils import parse_chunked_data, create_chunked_data
1313

1414

1515
class TestMisc(unittest.TestCase):
@@ -24,9 +24,16 @@ def test_parse_chunked_data(self):
2424
# See: https://en.wikipedia.org/wiki/Chunked_transfer_encoding
2525
chunked = '4\r\nWiki\r\n5\r\npedia\r\nE\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n'
2626
expected = 'Wikipedia in\r\n\r\nchunks.'
27+
28+
# test parsing
2729
parsed = parse_chunked_data(chunked)
2830
self.assertEqual(parsed.strip(), expected.strip())
2931

32+
# test roundtrip
33+
chunked_computed = create_chunked_data(expected)
34+
parsed = parse_chunked_data(chunked_computed)
35+
self.assertEqual(parsed.strip(), expected.strip())
36+
3037
def test_convert_yaml_date_strings(self):
3138
yaml_source = 'Version: 2012-10-17'
3239
obj = yaml.safe_load(yaml_source)

0 commit comments

Comments
 (0)
0