8000 detect UTF-32 before UTF-16 · awesome-python/html5lib-python@d071325 · GitHub
[go: up one dir, main page]

Skip to content

Commit d071325

Browse files
committed
detect UTF-32 before UTF-16
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40597
1 parent 346cc04 commit d071325

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

src/inputstream.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,16 @@ def detectBOM(self):
123123
string = self.rawStream.read(4)
124124

125125
# Try detecting the BOM using bytes from the string
126-
encoding = bomDict.get(string[:3]) # UTF-8
126+
encoding = bomDict.get(string[:3]) # UTF-8
127127
seek = 3
128128
if not encoding:
129-
encoding = bomDict.get(string[:2]) # UTF-16
130-
seek = 2
129+
# Need to detect UTF-32 before UTF-16
130+
encoding = bomDict.get(string) # UTF-32
131+
seek = 4
131132
if not encoding:
132-
encoding = bomDict.get(string) # UTF-32
133-
seek = 4
133+
encoding = bomDict.get(string[:2]) # UTF-16
134+
seek = 2
135+
134136

135137
#AT - move this to the caller?
136138
# Set the read position past the BOM if one was found, otherwise

0 commit comments

Comments
 (0)
0