From e429b2a9222039d266f1f2d2810ae8b383696095 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 5 Jun 2025 10:28:11 -0700 Subject: [PATCH] gh-134155: fix AttributeError in email._header_value_parser.get_address (GH-134194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Append the defect to defects instead of to the parse tree. (cherry picked from commit d9cad074d52fe31327429fd81e4d2eeea3dbe35b) Co-authored-by: Sergey Miryanov Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Lib/email/_header_value_parser.py | 4 +- .../test_email/test__header_value_parser.py | 45 +++++++++++++++++++ ...-05-18-23-46-21.gh-issue-134152.30HwbX.rst | 1 + 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index f11fa83d45ed2d..91243378dc0441 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1575,7 +1575,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1594,9 +1594,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index fd4ac2c404ce47..179e236ecdfd7f 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2491,6 +2491,38 @@ def test_get_address_quoted_strings_in_atom_list(self): self.assertEqual(address.all_mailboxes[0].domain, 'example.com') self.assertEqual(address.all_mailboxes[0].addr_spec, '"example example"@example.com') + def test_get_address_with_invalid_domain(self): + address = self._test_get_x(parser.get_address, + '', + '', + [errors.InvalidHeaderDefect, # missing trailing '>' on angle-addr + errors.InvalidHeaderDefect, # end of input inside domain-literal + ], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertEqual(address.all_mailboxes[0].domain, '[]') + self.assertEqual(address.all_mailboxes[0].local_part, 'T') + self.assertEqual(address.all_mailboxes[0].token_type, 'invalid-mailbox') + self.assertEqual(address[0].token_type, 'invalid-mailbox') + + address = self._test_get_x(parser.get_address, + '!an??:=m==fr2@[C', + '!an??:=m==fr2@[C];', + '!an??:=m==fr2@[C];', + [errors.InvalidHeaderDefect, # end of header in group + errors.InvalidHeaderDefect, # end of input inside domain-literal + ], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertEqual(address.all_mailboxes[0].domain, '[C]') + self.assertEqual(address.all_mailboxes[0].local_part, '=m==fr2') + self.assertEqual(address.all_mailboxes[0].token_type, 'invalid-mailbox') + self.assertEqual(address[0].token_type, 'group') # get_address_list @@ -2765,6 +2797,19 @@ def test_parse_valid_message_id(self): ) self.assertEqual(message_id.token_type, 'message-id') + def test_parse_message_id_with_invalid_domain(self): + message_id = self._test_parse_x( + parser.parse_message_id, + "", + "", + [errors.ObsoleteHeaderDefect] + [errors.InvalidHeaderDefect] * 2, + [], + ) + self.assertEqual(message_id.token_type, 'message-id') + self.assertEqual(str(message_id.all_defects[-1]), + "end of input inside domain-literal") + def test_parse_message_id_with_remaining(self): message_id = self._test_parse_x( parser.parse_message_id, diff --git a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst new file mode 100644 index 00000000000000..911a4a59ea6079 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst @@ -0,0 +1 @@ +:mod:`email`: Fix parsing of email message ID with invalid domain.