8000 [3.11] gh-80222: Fix email address header folding with long quoted-string (GH-122753) by miss-islington · Pull Request #129009 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.11] gh-80222: Fix email address header folding with long quoted-string (GH-122753) #129009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-80222: Fix email address header folding with long quoted-string (G…
…H-122753)

Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
(cherry picked from commit 5aaf416)

Co-authored-by: Mike Edmunds <medmunds@gmail.com>
  • Loading branch information
medmunds authored and miss-islington committed Jan 19, 2025
commit fb09ca718a33abbf25e2739a0ade4d7eabfea41b
19 changes: 18 additions & 1 deletion Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,16 @@
NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET


def make_quoted_pairs(value):
"""Escape dquote and backslash for use within a quoted-string."""
return str(value).replace('\\', '\\\\').replace('"', '\\"')


def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
escaped = make_quoted_pairs(value)
return f'"{escaped}"'


# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r'''
Expand Down Expand Up @@ -2866,6 +2874,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
if part.token_type == 'bare-quoted-string':
# To fold a quoted string we need to create a list of terminal
# tokens that will render the leading and trailing quotes
# and use quoted pairs in the value as appropriate.
newparts = (
[ValueTerminal('"', 'ptext')] +
[ValueTerminal(make_quoted_pairs(p), 'ptext')
for p in newparts] +
[ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
Expand Down
31 changes: 29 additions & 2 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2991,13 +2991,40 @@ def test_address_list_with_list_separator_after_fold(self):
self._test(parser.get_address_list(to)[0],
f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n')

a = '.' * 79
a = '.' * 79 # ('.' is a special, so must be in quoted-string.)
to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>'
self._test(parser.get_address_list(to)[0],
f'{a}\n'
f'"{a}"\n'
' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
'<beautiful@example.com>\n')

def test_address_list_with_specials_in_long_quoted_string(self):
# Regression for gh-80222.
policy = self.policy.clone(max_line_length=40)
cases = [
# (to, folded)
('"Exfiltrator <spy@example.org> (unclosed comment?" <to@example.com>',
'"Exfiltrator <spy@example.org> (unclosed\n'
' comment?" <to@example.com>\n'),
('"Escaped \\" chars \\\\ in quoted-string stay escaped" <to@example.com>',
'"Escaped \\" chars \\\\ in quoted-string\n'
' stay escaped" <to@example.com>\n'),
('This long display name does not need quotes <to@example.com>',
'This long display name does not need\n'
' quotes <to@example.com>\n'),
('"Quotes are not required but are retained here" <to@example.com>',
'"Quotes are not required but are\n'
' retained here" <to@example.com>\n'),
('"A quoted-string, it can be a valid local-part"@example.com',
'"A quoted-string, it can be a valid\n'
' local-part"@example.com\n'),
('"local-part-with-specials@but-no-fws.cannot-fold"@example.com',
'"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'),
]
for (to, folded) in cases:
with self.subTest(to=to):
self._test(parser.get_address_list(to)[0], folded, policy=policy)

# XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fix bug in the folding of quoted strings when flattening an email message using
a modern email policy. Previously when a quoted string was folded so that
it spanned more than one line, the surrounding quotes and internal escapes
would be omitted. This could theoretically be used to spoof header lines
using a carefully constructed quoted string if the resulting rendered email
was transmitted or re-parsed.
Loading
0