8000 Add assertions that we have a byte-string before detecting encoding. · awesome-python/html5lib-python@7d24149 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7d24149

Browse files
committed
Add assertions that we have a byte-string before detecting encoding.
Also add code-path for when we have StringIO but not cStringIO.
1 parent fec1bb5 commit 7d24149

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

html5lib/inputstream.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,11 @@ def openStream(self, source):
198198
try:
199199
from io import BytesIO
200200
except:
201-
# 2to3 converts this line to: from io import StringIO
202-
from cStringIO import StringIO as BytesIO
201+
try:
202+
# 2to3 converts this line to: from io import StringIO
203+
from cStringIO import StringIO as BytesIO
204+
except:
205+
from StringIO import StringIO as BytesIO
203206
stream = BytesIO(source)
204207

205208
if (not(hasattr(stream, "tell") and hasattr(stream, "seek")) or
@@ -227,6 +230,7 @@ def detectEncoding(self, parseMeta=True, chardet=True):
227230
detector = UniversalDetector()
228231
while not detector.done:
229232
buffer = self.rawStream.read(self.numBytesChardet)
233+
assert isinstance(buffer, str)
230234
if not buffer:
231235
break
232236
buffers.append(buffer)
@@ -275,6 +279,7 @@ def detectBOM(self):
275279

276280
# Go to beginning of file and read in 4 bytes
277281
string = self.rawStream.read(4)
282+
assert isinstance(string, str)
278283

279284
# Try detecting the BOM using bytes from the string
280285
encoding = bomDict.get(string[:3]) # UTF-8
@@ -297,6 +302,7 @@ def detectEncodingMeta(self):
297302
"""Report the encoding declared by the meta element
298303
"""
299304
buffer = self.rawStream.read(self.numBytesMeta)
305+
assert isinstance(buffer, str)
300306
parser = EncodingParser(buffer)
301307
self.rawStream.seek(0)
302308
encoding = parser.getEncoding()

0 commit comments

Comments
 (0)
0