|
62 | 62 |
|
63 | 63 | # regex based decoder.
|
64 | 64 | _q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
|
65 |
| - lambda m: bytes([int(m.group(1), 16)])) |
| 65 | + lambda m: bytes.fromhex(m.group(1).decode())) |
66 | 66 |
|
67 | 67 | def decode_q(encoded):
|
68 | 68 | encoded = encoded.replace(b'_', b' ')
|
@@ -98,30 +98,42 @@ def len_q(bstring):
|
98 | 98 | #
|
99 | 99 |
|
100 | 100 | def decode_b(encoded):
|
101 |
| - defects = [] |
| 101 | + # First try encoding with validate=True, fixing the padding if needed. |
| 102 | + # This will succeed only if encoded includes no invalid characters. |
102 | 103 | pad_err = len(encoded) % 4
|
103 |
| - if pad_err: |
104 |
| - defects.append(errors.InvalidBase64PaddingDefect()) |
105 |
| - padded_encoded = encoded + b'==='[:4-pad_err] |
106 |
| - else: |
107 |
| - padded_encoded = encoded |
| 104 | + missing_padding = b'==='[:4-pad_err] if pad_err else b'' |
108 | 105 | try:
|
109 |
| - return base64.b64decode(padded_encoded, validate=True), defects |
| 106 | + return ( |
| 107 | + base64.b64decode(encoded + missing_padding, validate=True), |
| 108 | + [errors.InvalidBase64PaddingDefect()] if pad_err else [], |
| 109 | + ) |
110 | 110 | except binascii.Error:
|
111 |
| - # Since we had correct padding, this must an invalid char error. |
112 |
| - defects = [errors.InvalidBase64CharactersDefect()] |
| 111 | + # Since we had correct padding, this is likely an invalid char error. |
| 112 | + # |
113 | 113 | # The non-alphabet characters are ignored as far as padding
|
114 |
| - # goes, but we don't know how many there are. So we'll just |
115 |
| - # try various padding lengths until something works. |
116 |
| - for i in 0, 1, 2, 3: |
| 114 | + # goes, but we don't know how many there are. So try without adding |
| 115 | + # padding to see if it works. |
| 116 | + try: |
| 117 | + return ( |
| 118 | + base64.b64decode(encoded, validate=False), |
| 119 | + [errors.InvalidBase64CharactersDefect()], |
| 120 | + ) |
| 121 | + except binascii.Error: |
| 122 | + # Add as much padding as could possibly be necessary (extra padding |
| 123 | + # is ignored). |
117 | 124 | try:
|
118 |
| - return base64.b64decode(encoded+b'='*i, validate=False), defects |
| 125 | + return ( |
| 126 | + base64.b64decode(encoded + b'==', validate=False), |
| 127 | + [errors.InvalidBase64CharactersDefect(), |
| 128 | + errors.InvalidBase64PaddingDefect()], |
| 129 | + ) |
119 | 130 | except binascii.Error:
|
120 |
| - if i==0: |
121 |
| - defects.append(errors.InvalidBase64PaddingDefect()) |
122 |
| - else: |
123 |
| - # This should never happen. |
124 |
| - raise AssertionError("unexpected binascii.Error") |
| 131 | + # This only happens when the encoded string's length is 1 more |
| 132 | + # than a multiple of 4, which is invalid. |
| 133 | + # |
| 134 | + # bpo-27397: Just return the encoded string since there's no |
| 135 | + # way to decode. |
| 136 | + return encoded, [errors.InvalidBase64LengthDefect()] |
125 | 137 |
|
126 | 138 | def encode_b(bstring):
|
127 | 139 | return base64.b64encode(bstring).decode('ascii')
|
@@ -167,15 +179,15 @@ def decode(ew):
|
167 | 179 | # Turn the CTE decoded bytes into unicode.
|
168 | 180 | try:
|
169 | 181 | string = bstring.decode(charset)
|
170 |
| - except UnicodeError: |
| 182 | + except UnicodeDecodeError: |
171 | 183 | defects.append(errors.UndecodableBytesDefect("Encoded word "
|
172 |
| - "contains bytes not decodable using {} charset".format(charset))) |
| 184 | + f"contains bytes not decodable using {charset!r} charset")) |
173 | 185 | string = bstring.decode(charset, 'surrogateescape')
|
174 |
| - except LookupError: |
| 186 | + except (LookupError, UnicodeEncodeError): |
175 | 187 | string = bstring.decode('ascii', 'surrogateescape')
|
176 | 188 | if charset.lower() != 'unknown-8bit':
|
177 |
| - defects.append(errors.CharsetError("Unknown charset {} " |
178 |
| - "in encoded word; decoded as unknown bytes".format(charset))) |
| 189 | + defects.append(errors.CharsetError(f"Unknown charset {charset!r} " |
| 190 | + f"in encoded word; decoded as unknown bytes")) |
179 | 191 | return string, charset, lang, defects
|
180 | 192 |
3D8B
|
181 | 193 |
|
|
0 commit comments