8000 Move the check to http.client where it belongs. · python/cpython@bde613c · GitHub
[go: up one dir, main page]

Skip to content

Commit bde613c

Browse files
committed
Move the check to http.client where it belongs.
1 parent bba966c commit bde613c

File tree

3 files changed

+33
-22
lines changed

3 files changed

+33
-22
lines changed

Lib/http/client.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@
137137
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
138138
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
139139

140+
# These characters are not allowed within http URL paths.
141+
# https://tools.ietf.org/html/rfc3986#section-3.3
142+
# in order to prevent CVE-2019-9740.
143+
_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\x9f]')
144+
# Arguably only these _should_ allowed:
145+
# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
146+
# We are more lenient for assumed real world compatibility purposes.
147+
140148
# We always set the Content-Length header for these methods because some
141149
# servers will otherwise respond with a 411
142150
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -1079,6 +1087,9 @@ def putrequest(self, method, url, skip_host=False,
10791087
self._method = method
10801088
if not url:
10811089
url = '/'
1090+
# Prevent CVE-2019-9740.
1091+
if _contains_disallowed_url_pchar_re.search(url):
1092+
raise ValueError("URL can't contain control characters. {url!r}")
10821093
request = '%s %s %s' % (method, url, self._http_vsn_str)
10831094

10841095
# Non-ASCII characters should have been eliminated earlier

Lib/test/test_urllib.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,27 @@ def test_willclose(self):
329329
finally:
330330
self.unfakehttp()
331331

332+
def test_url_with_newline_header_injection_rejected(self):
333+
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
334+
host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
335+
schemeless_url = "//" + host + ":8080/test/?test=a"
336+
try:
337+
# We explicitly test urllib.request.urlopen() instead of the top
338+
# level 'def urlopen()' function defined in this... (quite ugly)
339+
# test suite. they use different url opening codepaths. plain
340+
# urlopen uses FancyURLOpener which goes via a codepath that
341+
# calls urllib.parse.quote() on the URL which makes all of the
342+
# above attempts at injection within the url _path_ safe.
343+
with self.assertRaisesRegex(ValueError, "can't contain control"):
344+
urllib.request.urlopen(f"http:{schemeless_url}")
345+
with self.assertRaisesRegex(ValueError, "can't contain control"):
346+
urllib.request.urlopen(f"https:{schemeless_url}")
347+
resp = urlopen(f"http:{schemeless_url}")
348+
self.assertNotIn('\r', resp.geturl())
349+
self.assertNotIn('\n', resp.geturl())
350+
finally:
351+
self.unfakehttp()
352+
332353
def test_read_0_9(self):
333354
# "0.9" response accepted (but not "simple responses" without
334355
# a status line)

Lib/urllib/parse.py

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -991,8 +991,6 @@ 106EE def splittype(url):
991991

992992

993993
_typeprog = None
994-
_control_char_re = None
995-
_schemes_disallowing_control_chars = frozenset({'http', 'https', 'ftp'})
996994
def _splittype(url):
997995
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
998996
global _typeprog
@@ -1002,26 +1000,7 @@ def _splittype(url):
10021000
match = _typeprog.match(url)
10031001
if match:
10041002
scheme, data = match.groups()
1005-
scheme = scheme.lower()
1006-
if scheme in _schemes_disallowing_control_chars:
1007-
# Sanity check url data to avoid control characters.
1008-
# https://bugs.python.org/issue14826
1009-
# https://bugs.python.org/issue36276
1010-
# The same control characters check was adopted by Golang in:
1011-
# https://go-review.googlesource.com/c/go/+/159157
1012-
# Isn't it odd to be performing validation within this utility
1013-
# function? Yes... but it is in wide use in all of the right
1014-
# places where URLs need a sanity check to avoid potential security
1015-
# issues in newline delimited text based protocol implementations.
1016-
# This way many things get it for free without every use needing to
1017-
# be updated to explicitly sanity check the path contents.
1018-
global _control_char_re
1019-
if _control_char_re is None:
1020-
_control_char_re = re.compile('[\x00-\x1f\x7f-\x9f]')
1021-
if _control_char_re.search(data):
1022-
raise ValueError(f"{scheme} URL can't contain control "
1023-
f"characters. {data!r}")
1024-
return scheme, data
1003+
return scheme.lower(), data
10251004
return None, url
10261005

10271006

0 commit comments

Comments
 (0)
0