8000 Generate parse errors for nulls encountered in the input stream · html5lib/html5lib-python@2350d56 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2350d56

Browse files
committed
Generate parse errors for nulls encountered in the input stream
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40833
1 parent b32b741 commit 2350d56

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

src/html5lib/inputstream.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
5353
self.dataStream = codecs.getreader(self.charEncoding)(self.rawStream, 'replace')
5454

5555
self.queue = []
56+
self.errors = []
5657

5758
self.line = self.col = 0
5859
self.lineLengths = []
@@ -214,7 +215,10 @@ def char(self):
214215
return EOF
215216

216217
# Normalize newlines and null characters
217-
if c == '\x00': c = u'\uFFFD'
218+
if c == '\x00':
219+
self.errors.append('null character found in input stream, '
220+
'replaced with U+FFFD')
221+
c = u'\uFFFD'
218222
if c == '\r':
219223
c = self.dataStream.read(1, 1)
220224
if c != '\n':

src/html5lib/tokenizer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ def __iter__(self):
9393
# Start processing. When EOF is reached self.state will return False
9494
# instead of True and the loop will terminate.
9595
while self.state():
96+
while self.stream.errors:
97+
yield {"type": "ParseError", "data": self.stream.errors.pop(0)}
9698
while self.tokenQueue:
9799
yield self.tokenQueue.pop(0)
98100

0 commit comments

Comments
 (0)
0