From 91b5b3772307ebb80cd519feeddc4ecf52f11a84 Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Mon, 21 Aug 2023 19:13:05 -0500 Subject: [PATCH 1/3] gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) --- Doc/whatsnew/3.13.rst | 8 +++ Lib/email/utils.py | 71 ++++++++++++++++++-- Lib/test/test_email/test_email.py | 108 ++++++++++++++++++++++-------- 3 files changed, 152 insertions(+), 35 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index bfab868d1c5b62..98dc01fc63bbe8 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -122,6 +122,14 @@ dbm from the database. (Contributed by Dong-hee Na in :gh:`107122`.) +email +----- + +* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return + ``('', '')`` 2-tuples in more situations where invalid email addresses are + encountered instead of potentially inaccurate values. + (Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.) + io -- diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 81da5394ea1695..d592456d02388b 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -106,12 +106,62 @@ def formataddr(pair, charset='utf-8'): return address +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + s = v.replace('\\(', '').replace('\\)', '') + if s.count('(') != s.count(')'): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values + def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(str(v) for v in fieldvalues) + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If the resulting list of parsed address is greater than number of + fieldvalues in the input list a parsing error has occurred, so a list + containing a single empty 2-tuple [('', '')] is returned in its place. + This is done to avoid invalid output. + + Malformed input: getaddresses(['alice@example.com ']) + Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + Safe output: [('', '')] + """ + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + all = COMMASPACE.join(v for v in fieldvalues) a = _AddressList(all) - return a.addresslist + result = _post_parse_validation(a.addresslist) + + # When a comma is used in the Real Name part it is not a deliminator + # So strip those out before counting the commas + pattern = r'"[^"]*,[^"]*"|\'[^\']*,[^\']\'*' + n = 0 + for v in fieldvalues: + v = re.sub(pattern, '', v) + n += v.count(',') + 1 + + if len(result) != n: + return [('', '')] + + return result def _format_timetuple_and_zone(timetuple, zone): @@ -212,9 +262,18 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index cdb6ef1275e520..27286db025c8a9 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3319,32 +3319,92 @@ def test_getaddresses(self): [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) - def test_getaddresses_comma_in_name(self): - """GH-106669 regression test.""" - self.assertEqual( - utils.getaddresses( - [ - '"Bud, Person" ', - 'aperson@dom.ain (Al Person)', - '"Mariusz Felisiak" ', - ] - ), - [ - ('Bud, Person', 'bperson@dom.ain'), - ('Al Person', 'aperson@dom.ain'), - ('Mariusz Felisiak', 'to@example.com'), - ], - ) + def test_getaddresses_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043""" + eq = self.assertEqual + eq(utils.getaddresses(['alice@example.org(']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org)']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org<']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org>']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org@']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org,']), + [('', 'alice@example.org'), ('', 'bob@example.com')]) + eq(utils.getaddresses(['alice@example.org;']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org:']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org.']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org"']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org[']), + [('', '')]) + eq(utils.getaddresses(['alice@example.org]']), + [('', '')]) + + def test_parseaddr_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043""" + eq = self.assertEqual + eq(utils.parseaddr(['alice@example.org(']), + ('', '')) + eq(utils.parseaddr(['alice@example.org)']), + ('', '')) + eq(utils.parseaddr(['alice@example.org<']), + ('', '')) + eq(utils.parseaddr(['alice@example.org>']), + ('', '')) + eq(utils.parseaddr(['alice@example.org@']), + ('', '')) + eq(utils.parseaddr(['alice@example.org,']), + ('', '')) + eq(utils.parseaddr(['alice@example.org;']), + ('', '')) + eq(utils.parseaddr(['alice@example.org:']), + ('', '')) + eq(utils.parseaddr(['alice@example.org.']), + ('', '')) + eq(utils.parseaddr(['alice@example.org"']), + ('', '')) + eq(utils.parseaddr(['alice@example.org[']), + ('', '')) + eq(utils.parseaddr(['alice@example.org]']), + ('', '')) def test_getaddresses_nasty(self): eq = self.assertEqual + eq(utils.getaddresses(['"Sürname, Firstname" ']), + [('Sürname, Firstname', 'to@example.com')]) eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) + eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + eq(utils.getaddresses( + [r'Pete(A nice \) chap) ']), + [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) + eq(utils.getaddresses( + ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), + [('', '')]) + eq(utils.getaddresses( + ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), + [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) + eq(utils.getaddresses( + ['John Doe ']), + [('John Doe (comment)', 'jdoe@machine.example')]) + eq(utils.getaddresses( + ['"Mary Smith: Personal Account" ']), + [('Mary Smith: Personal Account', 'smith@home.example')]) + eq(utils.getaddresses( + ['Undisclosed recipients:;']), + [('', '')]) + eq(utils.getaddresses( + [r', "Giant; \"Big\" Box" ']), + [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" @@ -3712,16 +3772,6 @@ def test_bytes_header_parser(self): self.assertIsInstance(msg.get_payload(), str) self.assertIsInstance(msg.get_payload(decode=True), bytes) - def test_header_parser_multipart_is_valid(self): - # Don't flag valid multipart emails as having defects - with openfile('msg_47.txt', encoding="utf-8") as fp: - msgdata = fp.read() - - parser = email.parser.Parser(policy=email.policy.default) - parsed_msg = parser.parsestr(msgdata, headersonly=True) - - self.assertEqual(parsed_msg.defects, []) - def test_bytes_parser_does_not_close_file(self): with openfile('msg_02.txt', 'rb') as fp: email.parser.BytesParser().parse(fp) From 4afc95234d26d1ce7158a631cb79bd2c27c1b6de Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Mon, 21 Aug 2023 19:27:23 -0500 Subject: [PATCH 2/3] Also strip out escaped commas before counting --- Lib/email/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index d592456d02388b..a392da514b17b4 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -152,7 +152,7 @@ def getaddresses(fieldvalues): # When a comma is used in the Real Name part it is not a deliminator # So strip those out before counting the commas - pattern = r'"[^"]*,[^"]*"|\'[^\']*,[^\']\'*' + pattern = r'"[^"]*,[^"]*"|\'[^\']*,[^\']\'*|\\,' n = 0 for v in fieldvalues: v = re.sub(pattern, '', v) From 30d3ed76d254da1d30f3e8b28546789c8a875640 Mon Sep 17 00:00:00 2001 From: Thomas Dwyer Date: Fri, 8 Sep 2023 19:08:13 -0500 Subject: [PATCH 3/3] Only account for comma in double-quotes and added tests for this --- Lib/email/utils.py | 2 +- Lib/test/test_email/test_email.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index a392da514b17b4..c37fea3ea8056e 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -152,7 +152,7 @@ def getaddresses(fieldvalues): # When a comma is used in the Real Name part it is not a deliminator # So strip those out before counting the commas - pattern = r'"[^"]*,[^"]*"|\'[^\']*,[^\']\'*|\\,' + pattern = r'"[^"]*,[^"]*"' n = 0 for v in fieldvalues: v = re.sub(pattern, '', v) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 27286db025c8a9..992f5e961cac09 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3320,7 +3320,7 @@ def test_getaddresses(self): ('Bud Person', 'bperson@dom.ain')]) def test_getaddresses_parsing_errors(self): - """Test for parsing errors from CVE-2023-27043""" + """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" eq = self.assertEqual eq(utils.getaddresses(['alice@example.org(']), [('', '')]) @@ -3346,9 +3346,11 @@ def test_getaddresses_parsing_errors(self): [('', '')]) eq(utils.getaddresses(['alice@example.org]']), [('', '')]) + eq(utils.getaddresses(['"Alice, alice@example.org" ']), + [('Alice, alice@example.org', 'bob@example.com')]) def test_parseaddr_parsing_errors(self): - """Test for parsing errors from CVE-2023-27043""" + """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" eq = self.assertEqual eq(utils.parseaddr(['alice@example.org(']), ('', '')) @@ -3374,6 +3376,8 @@ def test_parseaddr_parsing_errors(self): ('', '')) eq(utils.parseaddr(['alice@example.org]']), ('', '')) + eq(utils.parseaddr(['"Alice, alice@example.org" ']), + ('Alice, alice@example.org', 'bob@example.com')) def test_getaddresses_nasty(self): eq = self.assertEqual