From c9187e23b32fd7509973bbbaf1f56d8760459818 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Tue, 24 May 2022 15:46:29 +0100 Subject: [PATCH 1/7] Write tests for fieldnames that are followed by whitespace --- Lib/test/test_email/data/msg_47.txt | 7 +++++++ Lib/test/test_email/test_email.py | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 Lib/test/test_email/data/msg_47.txt diff --git a/Lib/test/test_email/data/msg_47.txt b/Lib/test/test_email/data/msg_47.txt new file mode 100644 index 00000000000000..999b32b08c8c6a --- /dev/null +++ b/Lib/test/test_email/data/msg_47.txt @@ -0,0 +1,7 @@ +Subject: Regarding messages containing whitespace that follow field names +To: receiver@example.org +x-whitespace-after-fieldname : value +Date: Fri, 20 May 2022 18:13:19 +1200 +From: sender@example.org + +Field names can be followed by arbitrary whitespace diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 69f883a3673f26..1744094659ca62 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -429,6 +429,13 @@ def test_get_param_funky_continuation_lines(self): msg = self._msgobj('msg_22.txt') self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') + def test_whitespace_after_fieldname(self): + # As part of obsolete email syntax, fieldnames can be followed by arbitrary whitespace + msg = self._msgobj("msg_47.txt") + + self.assertEqual(msg["x-whitespace-after-fieldname"], "value") + self.assertEqual(msg.get_payload(), "Field names can be followed by arbitrary whitespace\n") + # test_headerregistry.TestContentTypeHeader.semis_inside_quotes def test_get_param_with_semis_in_quotes(self): msg = email.message_from_string( From e250213a2251ce147893ae7cc83d33de26014212 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Tue, 24 May 2022 15:47:03 +0100 Subject: [PATCH 2/7] Support obsolete email syntax Field names that are followed by whitespace should be accepted by the parser --- Lib/email/_policybase.py | 10 +++++----- Lib/email/feedparser.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index c9cbadd2a80c48..a7c1310fefee22 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -292,15 +292,15 @@ def _sanitize_header(self, name, value): def header_source_parse(self, sourcelines): """+ - The name is parsed as everything up to the ':' and returned unmodified. - The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and - stripping any trailing carriage return or linefeed characters. + The name is parsed as everything up to the ':' and returned stripped + of any trailing whitespace. The value is determined by stripping leading + whitespace off the remainder of the first line, joining all subsequent + lines together, and stripping any trailing carriage return or linefeed characters. """ name, value = sourcelines[0].split(':', 1) value = value.lstrip(' \t') + ''.join(sourcelines[1:]) - return (name, value.rstrip('\r\n')) + return (name.rstrip(' \t'), value.rstrip('\r\n')) def header_store_parse(self, name, value): """+ diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 97d3f5144d606f..c1476386f2ac3c 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -34,7 +34,7 @@ NLCRE_crack = re.compile(r'(\r\n|\r|\n)') # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') +headerRE = re.compile(r'^(From |[\041-\071\073-\176]*[ \t]*:|[\t ])') EMPTYSTRING = '' NL = '\n' From 07e93faed26a5833caea593e72a2d9d105fb1f84 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Tue, 24 May 2022 15:55:15 +0100 Subject: [PATCH 3/7] Add news entry --- .../next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst diff --git a/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst b/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst new file mode 100644 index 00000000000000..2e8eb9ddeb03b9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst @@ -0,0 +1,2 @@ +The emamil library now parses messages that use obsolete email syntac where +header field names can be followed by whitespace From 63ee1e4bc6dc70d3e6c80831c2b1270082d64cf6 Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Tue, 24 May 2022 15:56:39 +0100 Subject: [PATCH 4/7] Update documentation --- Doc/library/email.policy.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst index bf53b9520fc723..5b727c08161ea3 100644 --- a/Doc/library/email.policy.rst +++ b/Doc/library/email.policy.rst @@ -454,9 +454,10 @@ added matters. To illustrate:: The name is parsed as everything up to the '``:``' and returned - unmodified. The value is determined by stripping leading whitespace off - the remainder of the first line, joining all subsequent lines together, - and stripping any trailing carriage return or linefeed characters. + stripped of trailing whitespace. The value is determined by stripping + leading whitespace off the remainder of the first line, joining all + subsequent lines together, and stripping any trailing carriage + return or linefeed characters. .. method:: header_store_parse(name, value) From e096ed3c70a61ff00e984bab4a146c8392cb077c Mon Sep 17 00:00:00 2001 From: Sam Ezeh Date: Wed, 25 May 2022 18:12:03 +0100 Subject: [PATCH 5/7] Fix typos in news entry --- .../next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst b/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst index 2e8eb9ddeb03b9..78569f809445e6 100644 --- a/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst +++ b/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst @@ -1,2 +1,2 @@ -The emamil library now parses messages that use obsolete email syntac where +The email library now parses messages that use obsolete email syntax where header field names can be followed by whitespace From e03401d1fe5d24ae7f7f06b645488b6408ca630e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 21 Feb 2024 22:25:41 +0200 Subject: [PATCH 6/7] Update Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst --- .../Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst b/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst index 78569f809445e6..7cbfbd7443a46e 100644 --- a/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst +++ b/Misc/NEWS.d/next/Library/2022-05-24-15-55-09.gh-issue-93158.KH0YPY.rst @@ -1,2 +1,2 @@ -The email library now parses messages that use obsolete email syntax where -header field names can be followed by whitespace +The :mod:`email` library now parses messages that use obsolete email syntax where +header field names can be followed by whitespace. From f05798b3a41477b170338f4e93b10874bdf8c748 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 21 Feb 2024 22:28:52 +0200 Subject: [PATCH 7/7] Resolve conflict with test file name. --- Lib/test/test_email/data/{msg_47.txt => msg_48.txt} | 0 Lib/test/test_email/test_email.py | 5 +++-- 2 files changed, 3 insertions(+), 2 deletions(-) rename Lib/test/test_email/data/{msg_47.txt => msg_48.txt} (100%) diff --git a/Lib/test/test_email/data/msg_47.txt b/Lib/test/test_email/data/msg_48.txt similarity index 100% rename from Lib/test/test_email/data/msg_47.txt rename to Lib/test/test_email/data/msg_48.txt diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 1744094659ca62..48bdc65e21207b 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -431,10 +431,11 @@ def test_get_param_funky_continuation_lines(self): def test_whitespace_after_fieldname(self): # As part of obsolete email syntax, fieldnames can be followed by arbitrary whitespace - msg = self._msgobj("msg_47.txt") + msg = self._msgobj("msg_48.txt") self.assertEqual(msg["x-whitespace-after-fieldname"], "value") - self.assertEqual(msg.get_payload(), "Field names can be followed by arbitrary whitespace\n") + self.assertEqual(msg.get_payload(), + "Field names can be followed by arbitrary whitespace\n") # test_headerregistry.TestContentTypeHeader.semis_inside_quotes def test_get_param_with_semis_in_quotes(self):