|
3 | 3 | from six.moves import http_client
|
4 | 4 |
|
5 | 5 | import codecs
|
6 |
| -import platform |
7 | 6 | import re
|
8 | 7 |
|
9 | 8 | from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
@@ -32,16 +31,15 @@ class BufferedIOBase(object):
|
32 | 31 |
|
33 | 32 | invalid_unicode_template = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF%s]"
|
34 | 33 |
|
35 |
| -if platform.python_implementation() == "Jython": |
36 |
| - # Jython does not allow the use of solitary surrogate escapes |
37 |
| - # (\uD800-\uDFFF) in literals or other usage. This is because it |
38 |
| - # uses UTF-16, which is based on the use of such surrogates. |
39 |
| - invalid_unicode_re = re.compile(invalid_unicode_template % "") |
40 |
| -else: |
41 |
| - # Instead use one extra step of indirection and create surrogates with |
42 |
| - # unichr |
| 34 | +if utils.supports_lone_surrogates: |
| 35 | + # Use one extra step of indirection and create surrogates with |
| 36 | + # unichr. Not using this indirection would introduce an illegal |
| 37 | + # unicode literal on platforms not supporting such lone |
| 38 | + # surrogates. |
43 | 39 | invalid_unicode_re = re.compile(invalid_unicode_template % (
|
44 | 40 | "%s-%s" % (unichr(0xD800), unichr(0xDFFF)),))
|
| 41 | +else: |
| 42 | + invalid_unicode_re = re.compile(invalid_unicode_template % "") |
45 | 43 |
|
46 | 44 | non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
47 | 45 | 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
@@ -177,8 +175,8 @@ def __init__(self, source):
|
177 | 175 |
|
178 | 176 | """
|
179 | 177 |
|
180 |
| - if platform.python_implementation() == "Jython": |
181 |
| - # By its nature Jython's UTF-16 support does not allow |
| 178 | + if not utils.supports_lone_surrogates: |
| 179 | + # Such platforms will have already checked for such |
182 | 180 | # surrogate errors, so no need to do this checking.
|
183 | 181 | self.reportCharacterErrors = None
|
184 | 182 | self.replaceCharactersRegexp = None
|
@@ -288,9 +286,7 @@ def readChunk(self, chunkSize=None):
|
288 | 286 | self._bufferedCharacter = data[-1]
|
289 | 287 | data = data[:-1]
|
290 | 288 |
|
291 |
| - if platform.python_implementation() != "Jython": |
292 |
| - # data is already Unicode, so Jython already has dealt |
293 |
| - # with any surrogate character errors, no need to go here |
| 289 | + if utils.supports_lone_surrogates: |
294 | 290 | self.reportCharacterErrors(data)
|
295 | 291 |
|
296 | 292 | # Replace invalid characters
|
|
0 commit comments