8000 Changes as per review · python/cpython@b7fa036 · GitHub
[go: up one dir, main page]

Skip to content

Commit b7fa036

Browse files
committed
Changes as per review
1 parent 5f0f8f3 commit b7fa036

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

Lib/email/_header_value_parser.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
from email import _encoded_words as _ew
7676
from email import errors
7777
from email import utils
78-
from email.header import ecre as rfc2047_matcher
78+
7979
#
8080
# Useful constants and functions
8181
#
@@ -96,6 +96,18 @@
9696
def quote_string(value):
9797
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
9898

99+
# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
100+
rfc2047_matcher = re.compile(r'''
101+
=\? # literal =?
102+
[^?]* # charset
103+
\? # literal ?
104+
[qQbB] # literal 'q' or 'b', case insensitive
105+
\? # literal ?
106+
.*? # encoded word
107+
\?= # literal ?=
108+
''', re.VERBOSE | re.MULTILINE)
109+
110+
99111
#
100112
# TokenList and its subclasses
101113
#
@@ -1049,8 +1061,8 @@ def get_encoded_word(value):
10491061
_validate_xtext(vtext)
10501062
ew.append(vtext)
10511063
text = ''.join(remainder)
1052-
# Encoded words should be followed by a LWS.
1053-
if value and value[0] != ' ':
1064+
# Encoded words should be followed by a WS
1065+
if value and value[0] not in WSP:
10541066
ew.defects.append(errors.InvalidHeaderDefect(
10551067
"missing trailing whitespace after encoded-word"))
10561068
return ew, value
@@ -1106,7 +1118,8 @@ def get_unstructured(value):
11061118
continue
11071119
tok, *remainder = _wsp_splitter(value, 1)
11081120
# Split in the middle of an atom if there is a rfc2047 encoded word
1109-
# which does not have WS on both sides.
1121+
# which does not have WSP on both sides. The defect will be registered
1122+
# the next time through the loop.
11101123
if rfc2047_matcher.search(tok):
11111124
tok, *remainder = value.partition('=?')
11121125
vtext = ValueTerminal(tok, 'vtext')
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
Email headers containing 2047 encoded words with no leading whitespace are
2-
parsed correctly. Also, missing trailing whitespaces now register a defect
3-
instead of silently ignoring.
1+
Email headers containing RFC2047 encoded words are parsed despite the missing
2+
whitespace, and a defect registered. Also missing trailing whitespace after
3+
encoded words is now registered as a defect.
4+

0 commit comments

Comments
 (0)
0