8000 Changes as per review · python/cpython@f1015d8 · GitHub
[go: up one dir, main page]

Skip to content

Commit f1015d8

Browse files
committed
Changes as per review
1 parent 5f0f8f3 commit f1015d8

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

Lib/email/_header_value_parser.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
from email import _encoded_words as _ew
7676
from email import errors
7777
from email import utils
78-
from email.header import ecre as rfc2047_matcher
7978
#
8079
# Useful constants and functions
8180
#
@@ -96,6 +95,18 @@
9695
def quote_string(value):
9796
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
9897

98+
# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
99+
rfc2047_matcher = re.compile(r'''
100+
=\? # literal =?
101+
[^?]* # charset
102+
\? # literal ?
103+
[qQbB] # literal 'q' or 'b', case insensitive
104+
\? # literal ?
105+
.*? # encoded word
106+
\?= # literal ?=
107+
''', re.VERBOSE | re.MULTILINE)
108+
109+
99110
#
100111
# TokenList and its subclasses
101112
#
@@ -1049,8 +1060,8 @@ def get_encoded_word(value):
10491060
_validate_xtext(vtext)
10501061
ew.append(vtext)
10511062
text = ''.join(remainder)
1052-
# Encoded words should be followed by a LWS.
1053-
if value and value[0] != ' ':
1063+
# Encoded words should be followed by a WS
1064+
if value and value[0] not in WSP:
10541065
ew.defects.append(errors.InvalidHeaderDefect(
10551066
"missing trailing whitespace after encoded-word"))
10561067
return ew, value
@@ -1106,7 +1117,8 @@ def get_unstructured(value):
11061117
continue
11071118
tok, *remainder = _wsp_splitter(value, 1)
11081119
# Split in the middle of an atom if there is a rfc2047 encoded word
1109-
# which does not have WS on both sides.
1120+
# which does not have WSP on both sides. The defect will be registered
1121+
# the next time through the loop.
11101122
if rfc2047_matcher.search(tok):
11111123
tok, *remainder = value.partition('=?')
11121124
vtext = ValueTerminal(tok, 'vtext')
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
Email headers containing 2047 encoded words with no leading whitespace are
2-
parsed correctly. Also, missing trailing whitespaces now register a defect
3-
instead of silently ignoring.
1+
Email headers containing RFC2047 encoded words are parsed despite the missing
2+
whitespace, and a defect registered. Also missing trailing whitespace after
3+
encoded words is now registered as a defect.
4+

0 commit comments

Comments
 (0)
0