8000 bpo-30458: Disallow control chars in http URLs. by gpshead · Pull Request #12755 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-30458: Disallow control chars in http URLs. #12755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 1, 2019
Merged
Prev Previous commit
Next Next commit
Move the check to http.client where it belongs.
  • Loading branch information
gpshead committed Apr 10, 2019
commit bde613c092c9836ad9756aba96c251c99fc16d91
11 changes: 11 additions & 0 deletions Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search

# These characters are not allowed within http URL paths.
# https://tools.ietf.org/html/rfc3986#section-3.3
# in order to prevent CVE-2019-9740.
_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\x9f]')
# Arguably only these _should_ allowed:
# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
# We are more lenient for assumed real world compatibility purposes.

# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
Expand Down Expand Up @@ -1079,6 +1087,9 @@ def putrequest(self, method, url, skip_host=False,
self._method = method
if not url:
url = '/'
# Prevent CVE-2019-9740.
if _contains_disallowed_url_pchar_re.search(url):
raise ValueError("URL can't contain control characters. {url!r}")
request = '%s %s %s' % (method, url, self._http_vsn_str)

# Non-ASCII characters should have been eliminated earlier
Expand Down
21 changes: 21 additions & 0 deletions Lib/test/test_urllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,27 @@ def test_willclose(self):
finally:
self.unfakehttp()

def test_url_with_newline_header_injection_rejected(self):
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
schemeless_url = "//" + host + ":8080/test/?test=a"
try:
# We explicitly test urllib.request.urlopen() instead of the top
# level 'def urlopen()' function defined in this... (quite ugly)
# test suite. they use different url opening codepaths. plain
# urlopen uses FancyURLOpener which goes via a codepath that
# calls urllib.parse.quote() on the URL which makes all of the
# above attempts at injection within the url _path_ safe.
with self.assertRaisesRegex(ValueError, "can't contain control"):
urllib.request.urlopen(f"http:{schemeless_url}")
with self.assertRaisesRegex(ValueError, "can't contain control"):
urllib.request.urlopen(f"https:{schemeless_url}")
resp = urlopen(f"http:{schemeless_url}")
self.assertNotIn('\r', resp.geturl())
self.assertNotIn('\n', resp.geturl())
finally:
self.unfakehttp()

def test_read_0_9(self):
# "0.9" response accepted (but not "simple responses" without
# a status line)
Expand Down
23 changes: 1 addition & 22 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,8 +991,6 @@ def splittype(url):


_typeprog = None
_control_char_re = None
_schemes_disallowing_control_chars = frozenset({'http', 'https', 'ftp'})
def _splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
Expand All @@ -1002,26 +1000,7 @@ def _splittype(url):
match = _typeprog.match(url)
if match:
scheme, data = match.groups()
scheme = scheme.lower()
if scheme in _schemes_disallowing_control_chars:
# Sanity check url data to avoid control characters.
# https://bugs.python.org/issue14826
# https://bugs.python.org/issue36276
# The same control characters check was adopted by Golang in:
# https://go-review.googlesource.com/c/go/+/159157
# Isn't it odd to be performing validation within this utility
# function? Yes... but it is in wide use in all of the right
# places where URLs need a sanity check to avoid potential security
# issues in newline delimited text based protocol implementations.
# This way many things get it for free without every use needing to
# be updated to explicitly sanity check the path contents.
global _control_char_re
if _control_char_re is None:
_control_char_re = re.compile('[\x00-\x1f\x7f-\x9f]')
if _control_char_re.search(data):
raise ValueError(f"{scheme} URL can't contain control "
f"characters. {data!r}")
return scheme, data
return scheme.lower(), data
return None, url


Expand Down
0