8000 [2.7] bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660) by matthewbelisle-wf · Pull Request #9969 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[2.7] bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660) #9969

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Doc/library/cgi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ algorithms implemented in this module in other circumstances.
passed to :func:`urlparse.parse_qs` unchanged.


.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]])
.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])

This function is deprecated in this module. Use :func:`urlparse.parse_qs`
instead. It is maintained here only for backward compatibility.

.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]])
.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])

This function is deprecated in this module. Use :func:`urlparse.parse_qsl`
instead. It is maintained here only for backward compatibility.
Expand Down
16 changes: 14 additions & 2 deletions Doc/library/urlparse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ The :mod:`urlparse` module defines the following functions:
Added IPv6 URL parsing capabilities.


.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]])
.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])

Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
Expand All @@ -143,14 +143,20 @@ The :mod:`urlparse` module defines the following functions:
parsing errors. If false (the default), errors are silently ignored. If true,
errors raise a :exc:`ValueError` exception.

The optional argument *max_num_fields* is the maximum number of fields to
read. If set, then throws a :exc:`ValueError` if there are more than
*max_num_fields* fields read.

Use the :func:`urllib.urlencode` function to convert such dictionaries into
query strings.

.. versionadded:: 2.6
Copied from the :mod:`cgi` module.

.. versionchanged:: 2.7.16
Added *max_num_fields* parameter.

.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]])
.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])

Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
Expand All @@ -166,12 +172,18 @@ The :mod:`urlparse` module defines the following functions:
parsing errors. If false (the default), errors are silently ignored. If true,
errors raise a :exc:`ValueError` exception.

The optional argument *max_num_fields* is the maximum number of fields to
read. If set, then throws a :exc:`ValueError` if there are more than
*max_num_fields* fields read.

Use the :func:`urllib.urlencode` function to convert such lists of pairs into
query strings.

.. versionadded:: 2.6
Copied from the :mod:`cgi` module.

.. versionchanged:: 2.7.16
Added *max_num_fields* parameter.

.. function:: urlunparse(parts)

Expand Down
48 changes: 36 additions & 12 deletions Lib/cgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,12 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)


def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
"""Parse a query given as a string argument."""
warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead",
PendingDeprecationWarning, 2)
return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing)
return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing,
max_num_fields)

def parse_multipart(fp, pdict):
"""Parse multipart input.
Expand Down Expand Up @@ -393,7 +394,8 @@ class FieldStorage:
"""

def __init__(self, fp=None, headers=None, outerboundary="",
environ=os.environ, keep_blank_values=0, strict_parsing=0):
environ=os.environ, keep_blank_values=0, strict_parsing=0,
max_num_fields=None):
"""Constructor. Read multipart/* until last part.

Arguments, all optional:
Expand All @@ -420,10 +422,14 @@ def __init__(self, fp=None, headers=None, outerboundary="",
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.

max_num_fields: int. If set, then __init__ throws a ValueError
if there are more than n fields read by parse_qsl().

"""
method = 'GET'
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
self.max_num_fields = max_num_fields
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
Expand Down Expand Up @@ -606,10 +612,9 @@ def read_urlencoded(self):
qs = self.fp.read(self.length)
if self.qs_on_post:
qs += '&' + self.qs_on_post
self.list = list = []
for key, value in urlparse.parse_qsl(qs, self.keep_blank_values,
self.strict_parsing):
list.append(MiniFieldStorage(key, value))
query = urlparse.parse_qsl(qs, self.keep_blank_values,
self.strict_parsing, self.max_num_fields)
self.list = [MiniFieldStorage(key, value) for key, value in query]
self.skip_lines()

FieldStorageClass = None
Expand All @@ -621,19 +626,38 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,)
self.list = []
if self.qs_on_post:
for key, value in urlparse.parse_qsl(self.qs_on_post,
self.keep_blank_values, self.strict_parsing):
self.list.append(MiniFieldStorage(key, value))
query = urlparse.parse_qsl(self.qs_on_post,
self.keep_blank_values,
self.strict_parsing,
self.max_num_fields)
self.list.extend(MiniFieldStorage(key, value)
for key, value in query)
FieldStorageClass = None

# Propagate max_num_fields into the sub class appropriately
max_num_fields = self.max_num_fields
if max_num_fields is not None:
max_num_fields -= len(self.list)

klass = self.FieldStorageClass or self.__class__
part = klass(self.fp, {}, ib,
environ, keep_blank_values, strict_parsing)
environ, keep_blank_values, strict_parsing,
max_num_fields)

# Throw first part away
while not part.done:
headers = rfc822.Message(self.fp)
part = klass(self.fp, headers, ib,
environ, keep_blank_values, strict_parsing)
environ, keep_blank_values, strict_parsing,
max_num_fields)

if max_num_fields is not None:
max_num_fields -= 1
if part.list:
max_num_fields -= len(part.list)
if max_num_fields < 0:
raise ValueError('Max number of fields exceeded')

self.list.append(part)
self.skip_lines()

Expand Down
55 changes: 55 additions & 0 deletions Lib/test/test_cgi.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from io import BytesIO
from test.test_support import run_unittest, check_warnings
import cgi
import os
Expand Down Expand Up @@ -316,6 +317,60 @@ def testQSAndUrlEncode(self):
v = gen_result(data, environ)
self.assertEqual(self._qs_result, v)

def test_max_num_fields(self):
# For application/x-www-form-urlencoded
data = '&'.join(['a=a']*11)
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'REQUEST_METHOD': 'POST',
}

with self.assertRaises(ValueError):
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=10,
)

# For multipart/form-data
data = """---123
Content-Disposition: form-data; name="a"

3
---123
Content-Type: application/x-www-form-urlencoded

a=4
---123
Content-Type: application/x-www-form-urlencoded

a=5
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'a=1&a=2',
'REQUEST_METHOD': 'POST',
}

# 2 GET entities
# 1 top level POST entities
# 1 entity within the second POST entity
# 1 entity within the third POST entity
with self.assertRaises(ValueError):
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=4,
)
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=5,
)

def testQSAndFormData(self):
data = """
---123
Expand Down
21 changes: 18 additions & 3 deletions Lib/urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def unquote(s):
append(item)
return ''.join(res)

def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
"""Parse a query given as a string argument.

Arguments:
Expand All @@ -378,16 +378,20 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.

max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
"""
dict = {}
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
max_num_fields):
if name in dict:
dict[name].append(value)
else:
dict[name] = [value]
return dict

def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
"""Parse a query given as a string argument.

Arguments:
Expand All @@ -404,8 +408,19 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
false (the default), errors are silently ignored. If true,
errors raise a ValueError exception.

max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().

Returns a list, as G-d intended.
"""
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
num_fields = 1 + qs.count('&') + qs.count(';')
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')

pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by
limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.
0