GH-100884: email/_header_value_parser: don't encode list separators

t-8ch · t-8ch · commit 75a89ca883f6 · 2023-01-09T14:08:19.000Z
ListSeparator should not be encoded. This could happen when a long line pushes its separator to the next line, which would have been encoded. Fixes #100884
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
@@ -949,6 +949,7 @@ class _InvalidEwError(errors.HeaderParseError):
 # up other parse trees.  Maybe should have  tests for that, too.
 DOT = ValueTerminal('.', 'dot')
 ListSeparator = ValueTerminal(',', 'list-separator')
+ListSeparator.as_ew_allowed = False
 RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
 
 #
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
@@ -2946,6 +2946,11 @@ def test_address_list_with_unicode_names_in_quotes(self):
             '=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
                 ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
 
+    def test_address_list_with_list_separator_after_fold(self):
+        to = '0123456789' * 8 + '@foo, ä <foo@bar>'
+        self._test(parser.get_address_list(to)[0],
+                   '0123456789' * 8 + '@foo,\n =?utf-8?q?=C3=A4?= <foo@bar>\n')
+
     # XXX Need tests with comments on various sides of a unicode token,
     # and with unicode tokens in the comments.  Spaces inside the quotes
     # currently don't do the right thing.
diff --git a/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst b/Misc/NEWS.d/next/Library/2023-01-09-14-08-02.gh-issue-100884.DcmdLl.rst
@@ -0,0 +1,2 @@
+email: fix misfolding of comma in address-lists over multiple lines in
+combination with unicode encoding.

Original file line number	Diff line number	Diff line change
`@@ -949,6 +949,7 @@ class _InvalidEwError(errors.HeaderParseError):`
`949`	`949`	`# up other parse trees. Maybe should have tests for that, too.`
`950`	`950`	`DOT = ValueTerminal('.', 'dot')`
`951`	`951`	`ListSeparator = ValueTerminal(',', 'list-separator')`
	`952`	`+ListSeparator.as_ew_allowed = False`
`952`	`953`	`RouteComponentMarker = ValueTerminal('@', 'route-component-marker')`
`953`	`954`
`954`	`955`	`#`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+email: fix misfolding of comma in address-lists over multiple lines in`
	`2`	`+combination with unicode encoding.`