|
75 | 75 | from email import _encoded_words as _ew
|
76 | 76 | from email import errors
|
77 | 77 | from email import utils
|
78 |
| -from email.header import ecre as rfc2047_matcher |
79 | 78 | #
|
80 | 79 | # Useful constants and functions
|
81 | 80 | #
|
|
96 | 95 | def quote_string(value):
|
97 | 96 | return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
|
98 | 97 |
|
| 98 | +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= |
| 99 | +rfc2047_matcher = re.compile(r''' |
| 100 | + =\? # literal =? |
| 101 | + [^?]* # charset |
| 102 | + \? # literal ? |
| 103 | + [qQbB] # literal 'q' or 'b', case insensitive |
| 104 | + \? # literal ? |
| 105 | + .*? # encoded word |
| 106 | + \?= # literal ?= |
| 107 | +''', re.VERBOSE | re.MULTILINE) |
| 108 | + |
| 109 | + |
99 | 110 | #
|
100 | 111 | # TokenList and its subclasses
|
101 | 112 | #
|
@@ -1049,8 +1060,8 @@ def get_encoded_word(value):
|
1049 | 1060 | _validate_xtext(vtext)
|
1050 | 1061 | ew.append(vtext)
|
1051 | 1062 | text = ''.join(remainder)
|
1052 |
| - # Encoded words should be followed by a LWS. |
1053 |
| - if value and value[0] != ' ': |
| 1063 | + # Encoded words should be followed by a WS |
| 1064 | + if value and value[0] not in WSP: |
1054 | 1065 | ew.defects.append(errors.InvalidHeaderDefect(
|
1055 | 1066 | "missing trailing whitespace after encoded-word"))
|
1056 | 1067 | return ew, value
|
@@ -1106,7 +1117,8 @@ def get_unstructured(value):
|
1106 | 1117 | continue
|
1107 | 1118 | tok, *remainder = _wsp_splitter(value, 1)
|
1108 | 1119 | # Split in the middle of an atom if there is a rfc2047 encoded word
|
1109 |
| - # which does not have WS on both sides. |
| 1120 | + # which does not have WSP on both sides. The defect will be registered |
| 1121 | + # the next time through the loop. |
1110 | 1122 | if rfc2047_matcher.search(tok):
|
1111 | 1123 | tok, *remainder = value.partition('=?')
|
1112 | 1124 | vtext = ValueTerminal(tok, 'vtext')
|
|
0 commit comments