8000 [3.11] gh-121284: Fix email address header folding with parsed encode… · python/cpython@0a66052 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0a66052

Browse files
miss-islingtonmedmundsbitdancerencukou
authored
[3.11] gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) (GH-131405)
Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] (cherry picked from commit 295b53d) Co-authored-by: Mike Edmunds <medmunds@gmail.com> Co-authored-by: R David Murray <rdmurray@bitdance.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
1 parent 4588712 commit 0a66052

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

Lib/email/_header_value_parser.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ def get_fws(value):
10471047
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
10481048
return fws, newvalue
10491049

1050-
def get_encoded_word(value):
1050+
def get_encoded_word(value, terminal_type='vtext'):
10511051
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
10521052
10531053
"""
@@ -1086,7 +1086,7 @@ def get_encoded_word(value):
10861086
ew.append(token)
10871087
continue
10881088
chars, *remainder = _wsp_splitter(text, 1)
1089-
vtext = ValueTerminal(chars, 'vtext')
1089+
vtext = ValueTerminal(chars, terminal_type)
10901090
_validate_xtext(vtext)
10911091
ew.append(vtext)
10921092
text = ''.join(remainder)
@@ -1128,7 +1128,7 @@ def get_unstructured(value):
11281128
valid_ew = True
11291129
if value.startswith('=?'):
11301130
try:
1131-
token, value = get_encoded_word(value)
1131+
token, value = get_encoded_word(value, 'utext')
11321132
except _InvalidEwError:
11331133
valid_ew = False
11341134
except errors.HeaderParseError:
@@ -1157,7 +1157,7 @@ def get_unstructured(value):
11571157
# the parser to go in an infinite loop.
11581158
if valid_ew and rfc2047_matcher.search(tok):
11591159
tok, *remainder = value.partition('=?')
1160-
vtext = ValueTerminal(tok, 'vtext')
1160+
vtext = ValueTerminal(tok, 'utext')
11611161
_validate_xtext(vtext)
11621162
unstructured.append(vtext)
11631163
value = ''.join(remainder)
@@ -2792,7 +2792,7 @@ def _refold_parse_tree(parse_tree, *, policy):
27922792
continue
27932793
tstr = str(part)
27942794
if not want_encoding:
2795-
if part.token_type == 'ptext':
2795+
if part.token_type in ('ptext', 'vtext'):
27962796
# Encode if tstr contains special characters.
27972797
want_encoding = not SPECIALSNL.isdisjoint(tstr)
27982798
else:

Lib/test/test_email/test__header_value_parser.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2985,6 +2985,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
29852985
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
29862986
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
29872987

2988+
def test_address_list_with_specials_in_encoded_word(self):
2989+
# An encoded-word parsed from a structured header must remain
2990+
# encoded when it contains specials. Regression for gh-121284.
2991+
policy = self.policy.clone(max_line_length=40)
2992+
cases = [
2993+
# (to, folded)
2994+
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
2995+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
2996+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
2997+
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
2998+
'This long name does not need\n'
2999+
' encoded-word <to@example.com>\n'),
3000+
('"A véry long name with, comma" <to@example.com>',
3001+
# (This isn't the best fold point, but it's not invalid.)
3002+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
3003+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
3004+
('"A véry long name containing a, comma" <to@example.com>',
3005+
'A =?utf-8?q?v=C3=A9ry?= long name\n'
3006+
' containing =?utf-8?q?a=2C?= comma\n'
3007+
' <to@example.com>\n'),
3008+
]
3009+
for (to, folded) in cases:
3010+
with self.subTest(to=to):
3011+
self._test(parser.get_address_list(to)[0], folded, policy=policy)
3012+
29883013
def test_address_list_with_list_separator_after_fold(self):
29893014
a = 'x' * 66 + '@example.com'
29903015
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Fix bug in the folding of rfc2047 encoded-words when flattening an email message
2+
using a modern email policy. Previously when an encoded-word was too long
3+
for a line, it would be decoded, split across lines, and re-encoded. But commas
4+
and other special characters in the original text could be left unencoded and
5+
unquoted. This could theoretically be used to spoof header lines using
6+
a carefully constructed encoded-word if the resulting rendered email was
7+
transmitted or re-parsed.

0 commit comments

Comments
 (0)
0