From 16bf7809ebba7c3b2b2d3e813777ba83b3819eaf Mon Sep 17 00:00:00 2001 From: matthewbelisle-wf Date: Fri, 19 Oct 2018 05:52:59 -0500 Subject: [PATCH 1/5] bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660) Adding `max_num_fields` to `cgi.FieldStorage` to make DOS attacks harder by limiting the number of `MiniFieldStorage` objects created by `FieldStorage`. (cherry picked from commit 209144831b0a19715bda3bd72b14a3e6192d9cc1) --- Lib/cgi.py | 48 ++++++++++++---- Lib/test/test_cgi.py | 55 +++++++++++++++++++ Lib/urlparse.py | 21 ++++++- .../2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst | 2 + 4 files changed, 111 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst diff --git a/Lib/cgi.py b/Lib/cgi.py index 7c51b44db1a9bd..93b0f12c1fec14 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -184,11 +184,12 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0): return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): """Parse a query given as a string argument.""" warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead", PendingDeprecationWarning, 2) - return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing) + return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing, + max_num_fields) def parse_multipart(fp, pdict): """Parse multipart input. @@ -393,7 +394,8 @@ class FieldStorage: """ def __init__(self, fp=None, headers=None, outerboundary="", - environ=os.environ, keep_blank_values=0, strict_parsing=0): + environ=os.environ, keep_blank_values=0, strict_parsing=0, + max_num_fields=None): """Constructor. Read multipart/* until last part. Arguments, all optional: @@ -420,10 +422,14 @@ def __init__(self, fp=None, headers=None, outerboundary="", If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + max_num_fields: int. If set, then __init__ throws a ValueError + if there are more than n fields read by parse_qsl(). + """ method = 'GET' self.keep_blank_values = keep_blank_values self.strict_parsing = strict_parsing + self.max_num_fields = max_num_fields if 'REQUEST_METHOD' in environ: method = environ['REQUEST_METHOD'].upper() self.qs_on_post = None @@ -606,10 +612,9 @@ def read_urlencoded(self): qs = self.fp.read(self.length) if self.qs_on_post: qs += '&' + self.qs_on_post - self.list = list = [] - for key, value in urlparse.parse_qsl(qs, self.keep_blank_values, - self.strict_parsing): - list.append(MiniFieldStorage(key, value)) + query = urlparse.parse_qsl(qs, self.keep_blank_values, + self.strict_parsing, self.max_num_fields) + self.list = [MiniFieldStorage(key, value) for key, value in query] self.skip_lines() FieldStorageClass = None @@ -621,20 +626,39 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,) self.list = [] if self.qs_on_post: - for key, value in urlparse.parse_qsl(self.qs_on_post, - self.keep_blank_values, self.strict_parsing): - self.list.append(MiniFieldStorage(key, value)) + query = urlparse.parse_qsl(self.qs_on_post, + self.keep_blank_values, + self.strict_parsing, + self.max_num_fields) + self.list.extend(MiniFieldStorage(key, value) + for key, value in query) FieldStorageClass = None + # Propagate max_num_fields into the sub class appropriately + max_num_fields = self.max_num_fields + sub_max_num_fields = self.max_num_fields + if max_num_fields is not None: + sub_max_num_fields -= len(self.list) + klass = self.FieldStorageClass or self.__class__ part = klass(self.fp, {}, ib, - environ, keep_blank_values, strict_parsing) + environ, keep_blank_values, strict_parsing, + sub_max_num_fields) + # Throw first part away while not part.done: headers = rfc822.Message(self.fp) part = klass(self.fp, headers, ib, - environ, keep_blank_values, strict_parsing) + environ, keep_blank_values, strict_parsing, + sub_max_num_fields) + + if max_num_fields is not None and part.list: + max_num_fields -= len(part.list) + sub_max_num_fields -= len(part.list) + self.list.append(part) + if max_num_fields is not None and max_num_fields < len(self.list): + raise ValueError('Max number of fields exceeded') self.skip_lines() def read_single(self): diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index c9cf09525d713a..743c2afbd4cd24 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -1,3 +1,4 @@ +from io import BytesIO from test.test_support import run_unittest, check_warnings import cgi import os @@ -316,6 +317,60 @@ def testQSAndUrlEncode(self): v = gen_result(data, environ) self.assertEqual(self._qs_result, v) + def test_max_num_fields(self): + # For application/x-www-form-urlencoded + data = '&'.join(['a=a']*11) + environ = { + 'CONTENT_LENGTH': str(len(data)), + 'CONTENT_TYPE': 'application/x-www-form-urlencoded', + 'REQUEST_METHOD': 'POST', + } + + with self.assertRaises(ValueError): + cgi.FieldStorage( + fp=BytesIO(data.encode()), + environ=environ, + max_num_fields=10, + ) + + # For multipart/form-data + data = """---123 +Content-Disposition: form-data; name="a" + +3 +---123 +Content-Type: application/x-www-form-urlencoded + +a=4 +---123 +Content-Type: application/x-www-form-urlencoded + +a=5 +---123-- +""" + environ = { + 'CONTENT_LENGTH': str(len(data)), + 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', + 'QUERY_STRING': 'a=1&a=2', + 'REQUEST_METHOD': 'POST', + } + + # 2 GET entities + # 1 top level POST entities + # 1 entity within the second POST entity + # 1 entity within the third POST entity + with self.assertRaises(ValueError): + cgi.FieldStorage( + fp=BytesIO(data.encode()), + environ=environ, + max_num_fields=4, + ) + cgi.FieldStorage( + fp=BytesIO(data.encode()), + environ=environ, + max_num_fields=5, + ) + def testQSAndFormData(self): data = """ ---123 diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 4cd3d6743a960f..f7c2b032b0971e 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -361,7 +361,7 @@ def unquote(s): append(item) return ''.join(res) -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): +def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): """Parse a query given as a string argument. Arguments: @@ -378,16 +378,20 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0): strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + + max_num_fields: int. If set, then throws a ValueError if there + are more than n fields read by parse_qsl(). """ dict = {} - for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): + for name, value in parse_qsl(qs, keep_blank_values, strict_parsing, + max_num_fields): if name in dict: dict[name].append(value) else: dict[name] = [value] return dict -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): """Parse a query given as a string argument. Arguments: @@ -404,8 +408,19 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + max_num_fields: int. If set, then throws a ValueError if there + are more than n fields read by parse_qsl(). + Returns a list, as G-d intended. """ + # If max_num_fields is defined then check that the number of fields + # is less than max_num_fields. This prevents a memory exhaustion DOS + # attack via post bodies with many fields. + if max_num_fields is not None: + num_fields = 1 + qs.count('&') + qs.count(';') + if max_num_fields < num_fields: + raise ValueError('Max number of fields exceeded') + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: diff --git a/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst new file mode 100644 index 00000000000000..90c146ce834edf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst @@ -0,0 +1,2 @@ +Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by +limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``. From f8966df2bdd7507589b5eaa66b02860f12b57135 Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Mon, 22 Oct 2018 10:06:43 -0500 Subject: [PATCH 2/5] Simpler max_num_fields logic --- Lib/cgi.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/cgi.py b/Lib/cgi.py index 93b0f12c1fec14..5b903e0347739c 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -636,29 +636,29 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): # Propagate max_num_fields into the sub class appropriately max_num_fields = self.max_num_fields - sub_max_num_fields = self.max_num_fields if max_num_fields is not None: - sub_max_num_fields -= len(self.list) + max_num_fields -= len(self.list) klass = self.FieldStorageClass or self.__class__ part = klass(self.fp, {}, ib, environ, keep_blank_values, strict_parsing, - sub_max_num_fields) + max_num_fields) # Throw first part away while not part.done: headers = rfc822.Message(self.fp) part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing, - sub_max_num_fields) + max_num_fields) - if max_num_fields is not None and part.list: - max_num_fields -= len(part.list) - sub_max_num_fields -= len(part.list) + if max_num_fields is not None: + max_num_fields -= 1 + if part.list: + max_num_fields -= len(part.list) + if max_num_fields < 0: + raise ValueError('Max number of fields exceeded') self.list.append(part) - if max_num_fields is not None and max_num_fields < len(self.list): - raise ValueError('Max number of fields exceeded') self.skip_lines() def read_single(self): From 90ab0d5acd4662e6d176674c76e2c55b05892492 Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Tue, 30 Oct 2018 10:20:23 -0500 Subject: [PATCH 3/5] Adding Doc/library entries for max_num_fields --- Doc/library/cgi.rst | 4 ++-- Doc/library/urlparse.rst | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst index 1bfdb390678492..ecd62c8c019463 100644 --- a/Doc/library/cgi.rst +++ b/Doc/library/cgi.rst @@ -292,12 +292,12 @@ algorithms implemented in this module in other circumstances. passed to :func:`urlparse.parse_qs` unchanged. -.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]]) +.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) This function is deprecated in this module. Use :func:`urlparse.parse_qs` instead. It is maintained here only for backward compatibility. -.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]]) +.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) This function is deprecated in this module. Use :func:`urlparse.parse_qsl` instead. It is maintained here only for backward compatibility. diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst index b933dda3d24259..9519b5b8fab9bd 100644 --- a/Doc/library/urlparse.rst +++ b/Doc/library/urlparse.rst @@ -126,7 +126,7 @@ The :mod:`urlparse` module defines the following functions: Added IPv6 URL parsing capabilities. -.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]]) +.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) Parse a query string given as a string argument (data of type :mimetype:`application/x-www-form-urlencoded`). Data are returned as a @@ -143,14 +143,20 @@ The :mod:`urlparse` module defines the following functions: parsing errors. If false (the default), errors are silently ignored. If true, errors raise a :exc:`ValueError` exception. + The optional argument *max_num_fields* is a flag for the maximum number of + fields to read. If set, then throws a :exc:`ValueError` if there are more + than *max_num_fields* fields read. Default is *None*. + Use the :func:`urllib.urlencode` function to convert such dictionaries into query strings. .. versionadded:: 2.6 Copied from the :mod:`cgi` module. + .. versionchanged:: 2.7.16 + Added *max_num_fields* param. -.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]]) +.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) Parse a query string given as a string argument (data of type :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of @@ -166,12 +172,18 @@ The :mod:`urlparse` module defines the following functions: parsing errors. If false (the default), errors are silently ignored. If true, errors raise a :exc:`ValueError` exception. + The optional argument *max_num_fields* is a flag for the maximum number of + fields to read. If set, then throws a :exc:`ValueError` if there are more + than *max_num_fields* fields read. Default is *None*. + Use the :func:`urllib.urlencode` function to convert such lists of pairs into query strings. .. versionadded:: 2.6 Copied from the :mod:`cgi` module. + .. versionchanged:: 2.7.16 + Added *max_num_fields* param. .. function:: urlunparse(parts) From 1767079519278d86331ee3622fd4f9f36093f823 Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Tue, 30 Oct 2018 14:53:22 -0500 Subject: [PATCH 4/5] param -> parameter --- Doc/library/urlparse.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst index 9519b5b8fab9bd..7fcd6b8b7290e3 100644 --- a/Doc/library/urlparse.rst +++ b/Doc/library/urlparse.rst @@ -154,7 +154,7 @@ The :mod:`urlparse` module defines the following functions: Copied from the :mod:`cgi` module. .. versionchanged:: 2.7.16 - Added *max_num_fields* param. + Added *max_num_fields* parameter. .. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) @@ -183,7 +183,7 @@ The :mod:`urlparse` module defines the following functions: Copied from the :mod:`cgi` module. .. versionchanged:: 2.7.16 - Added *max_num_fields* param. + Added *max_num_fields* parameter. .. function:: urlunparse(parts) From c5d4fe3adc0f969f285a478538c5bd649acf36cb Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Tue, 30 Oct 2018 15:38:09 -0500 Subject: [PATCH 5/5] Changing max_num_fields wording --- Doc/library/urlparse.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst index 7fcd6b8b7290e3..22249da54fbb7d 100644 --- a/Doc/library/urlparse.rst +++ b/Doc/library/urlparse.rst @@ -143,9 +143,9 @@ The :mod:`urlparse` module defines the following functions: parsing errors. If false (the default), errors are silently ignored. If true, errors raise a :exc:`ValueError` exception. - The optional argument *max_num_fields* is a flag for the maximum number of - fields to read. If set, then throws a :exc:`ValueError` if there are more - than *max_num_fields* fields read. Default is *None*. + The optional argument *max_num_fields* is the maximum number of fields to + read. If set, then throws a :exc:`ValueError` if there are more than + *max_num_fields* fields read. Use the :func:`urllib.urlencode` function to convert such dictionaries into query strings. @@ -172,9 +172,9 @@ The :mod:`urlparse` module defines the following functions: parsing errors. If false (the default), errors are silently ignored. If true, errors raise a :exc:`ValueError` exception. - The optional argument *max_num_fields* is a flag for the maximum number of - fields to read. If set, then throws a :exc:`ValueError` if there are more - than *max_num_fields* fields read. Default is *None*. + The optional argument *max_num_fields* is the maximum number of fields to + read. If set, then throws a :exc:`ValueError` if there are more than + *max_num_fields* fields read. Use the :func:`urllib.urlencode` function to convert such lists of pairs into query strings.