8000 fix processDoctype; remove atheistParseError; introduce correct flag · awesome-python/html5lib-python@bec1488 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit bec1488

Browse files
committed
fix processDoctype; remove atheistParseError; introduce correct flag
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40748
1 parent 28edc78 commit bec1488

File tree

3 files changed

+25
-25
lines changed

3 files changed

+25
-25
lines changed

src/html5parser.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,12 @@ def _parse(self, stream, innerHTML=False, container="div",
116116
method = getattr(self.phase, "process%s" % type, None)
117117
if type in ("Characters", "SpaceCharacters", "Comment"):
118118
method(token["data"])
119-
elif type in ("StartTag", "Doctype"):
119+
elif type == "StartTag":
120120
method(token["name"], token["data"])
121121
elif type == "EndTag":
122122
method(token["name"])
123+
elif type == "Doctype":
124+
method(token["name"], token["publicId"], token["systemId"], token["correct"])
123125
else:
124126
self.parseError(token["data"])
125127

@@ -161,10 +163,6 @@ def parseError(self, data="XXX ERROR MESSAGE NEEDED"):
161163
if self.strict:
162164
raise ParseError
163165

164-
def atheistParseError(self):
165-
"""This error is not an error"""
166-
pass
167-
168166
def normalizeToken(self, token):
169167
""" HTML5 specific normalizations to the token stream """
170168

@@ -174,9 +172,7 @@ def normalizeToken(self, token):
174172
# element. If it matches a void element atheists did the wrong
175173
# thing and if it doesn't it's wrong for everyone.
176174

177-
if token["name"] in voidElements:
178-
self.atheistParseError()
179-
else:
175+
if token["name"] not in voidElements:
180176
self.parseError(_("Solidus (/) incorrectly placed in tag."))
181177

182178
token["type"] = "StartTag"
@@ -286,7 +282,7 @@ def processComment(self, data):
286282
# overridden.
287283
self.tree.insertComment(data, self.tree.openElements[-1])
288284

289-
def processDoctype(self, name, error):
285+
def processDoctype(self, name, publicId, systemId, correct):
290286
self.parser.parseError(_("Unexpected DOCTYPE. Ignored."))
291287

292288
def processSpaceCharacters(self, data):
@@ -322,9 +318,11 @@ def processEOF(self):
322318
def processComment(self, data):
323319
self.tree.insertComment(data, self.tree.document)
324320

325-
def processDoctype(self, name, error):
326-
if error:
321+
def processDoctype(self, name, publicId, systemId, correct):
322+
if name.translate(asciiUpper2Lower) != "html" or publicId != None or\
323+
systemId != None:
327324
self.parser.parseError(_("Erroneous DOCTYPE."))
325+
# XXX need to check quirks mode here
328326
self.tree.insertDoctype(name)
329327
self.parser.phase = self.parser.phases["rootElement"]
330328

src/tokenizer.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -608,8 +608,8 @@ def markupDeclarationOpenState(self):
608608
# Put in explicit EOF check
609609
if (not EOF in charStack and
610610
"".join(charStack).upper() == u"DOCTYPE"):
611-
self.currentToken = {"type":"Doctype", "name":"", "publicId":"",
612-
"systemId":"", "data":True}
611+
self.currentToken = {"type":"Doctype", "name":"",
612+
"publicId":None, "systemId":None, "correct":True}
613613
self.state = self.states["doctype"]
614614
else:
615615
self.tokenQueue.append({"type": "ParseError", "data":
@@ -689,11 +689,13 @@ def beforeDoctypeNameState(self):
689689
elif data == u">":
690690
self.tokenQueue.append({"type": "ParseError", "data":
691691
_("Unexpected > character. Expected DOCTYPE name.")})
692+
self.currentToken["correct"] = False
692693
self.tokenQueue.append(self.currentToken)
693694
self.state = self.states["data"]
694695
elif data == EOF:
695696
self.tokenQueue.append({"type": "ParseError", "data":
696697
_("Unexpected end of file. Expected DOCTYPE name.")})
698+
self.currentToken["correct"] = False
697699
self.tokenQueue.append(self.currentToken)
698700
self.state = self.states["data"]
699701
else:
@@ -725,7 +727,7 @@ def afterDoctypeNameState(self):
725727
self.tokenQueue.append(self.currentToken)
726728
self.state = self.states["data"]
727729
elif data == EOF:
728-
self.currentToken["data"] = True
730+
# XXX check spec ... self.currentToken["correct"] = False
729731
self.stream.queue.append(data)
730732
self.tokenQueue.append({"type": "ParseError", "data":
731733
_("Unexpected end of file in DOCTYPE.")})
@@ -759,13 +761,13 @@ def beforeDoctypePublicIdentifierState(self):
759761
elif data == ">":
760762
self.tokenQueue.append({"type": "ParseError", "data":
761763
_("Unexpected end of DOCTYPE.")})
762-
self.currentToken["data"] = True
764+
self.currentToken["correct"] = False
763765
self.tokenQueue.append(self.currentToken)
764766
self.state = self.states["data"]
765767
elif data == EOF:
766768
self.tokenQueue.append({"type": "ParseError", "data":
767769
_("Unexpected end of file in DOCTYPE.")})
768-
self.currentToken["data"] = True
770+
self.currentToken["correct"] = False
769771
self.tokenQueue.append(self.currentToken)
770772
self.state = self.states["data"]
771773
else:
@@ -781,7 +783,7 @@ def doctypePublicIdentifierDoubleQuotedState(self):
781783
elif data == EOF:
782784
self.tokenQueue.append({"type": "ParseError", "data":
783785
_("Unexpected end of file in DOCTYPE.")})
784-
self.currentToken["data"] = True
786+
self.currentToken["correct"] = False
785787
self.tokenQueue.append(self.currentToken)
786788
self.state = self.states["data"]
787789
else:
@@ -795,7 +797,7 @@ def doctypePublicIdentifierSingleQuotedState(self):
795797
elif data == EOF:
796798
self.tokenQueue.append({"type": "ParseError", "data&q F438 uot;:
797799
_("Unexpected end of file in DOCTYPE.")})
798-
self.currentToken["data"] = True
800+
self.currentToken["correct"] = False
799801
self.tokenQueue.append(self.currentToken)
800802
self.state = self.states["data"]
801803
else:
@@ -816,7 +818,7 @@ def afterDoctypePublicIdentifierState(self):
816818
elif data == EOF:
817819
self.tokenQueue.append({"type": "ParseError", "data":
818820
_("Unexpected end of file in DOCTYPE.")})
819-
self.currentToken["data"] = True
821+
self.currentToken["correct"] = False
820822
self.tokenQueue.append(self.currentToken)
821823
self.state = self.states["data"]
822824
else:
@@ -838,7 +840,7 @@ def beforeDoctypeSystemIdentifierState(self):
838840
elif data == EOF:
839841
self.tokenQueue.append({"type": "ParseError", "data":
840842
_("Unexpected end of file in DOCTYPE.")})
841-
self.currentToken["data"] = True
843+
self.currentToken["correct"] = False
842844
self.tokenQueue.append(self.currentToken)
843845
self.state = self.states["data"]
844846
else:
@@ -854,7 +856,7 @@ def doctypeSystemIdentifierDoubleQuotedState(self):
854856
elif data == EOF:
855857
self.tokenQueue.append({"type": "ParseError", "data":
856858
_("Unexpected end of file in DOCTYPE.")})
857-
self.currentToken["data"] = True
859+
self.currentToken["correct"] = False
858860
self.tokenQueue.append(self.currentToken)
859861
self.state = self.states["data"]
860862
else:
@@ -868,7 +870,7 @@ def doctypeSystemIdentifierSingleQuotedState(self):
868870
elif data == EOF:
869871
self.tokenQueue.append({"type": "ParseError", "data":
870872
_("Unexpected end of file in DOCTYPE.")})
871-
self.currentToken["data"] = True
873+
self.currentToken["correct"] = False
872874
self.tokenQueue.append(self.currentToken)
873875
self.state = self.states["data"]
874876
else:
@@ -885,7 +887,7 @@ def afterDoctypeSystemIdentifierState(self):
885887
elif data == EOF:
886888
self.tokenQueue.append({"type": "ParseError", "data":
887889
_("Unexpected end of file in DOCTYPE.")})
888-
self.currentToken["data"] = True
890+
self.currentToken["correct"] = False
889891
self.tokenQueue.append(self.currentToken)
890892
self.state = self.states["data"]
891893
else:
@@ -896,7 +898,7 @@ def afterDoctypeSystemIdentifierState(self):
896898

897899
def bogusDoctypeState(self):
898900
data = self.stream.char()
899-
self.currentToken["data"] = True
901+
self.currentToken["correct"] = False
900902
if data == u">":
901903
self.tokenQueue.append(self.currentToken)
902904
self.state = self.states["data"]

tests/test_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def parse(self, stream, encoding=None, innerHTML=False):
3333
return self.outputTokens
3434

3535
def processDoctype(self, token):
36-
self.outputTokens.append([u"DOCTYPE", token["name"], token["data"]])
36+
self.outputTokens.append([u"DOCTYPE", token["name"], token["correct"]])
3737

3838
def processStartTag(self, token):
3939
self.outputTokens.append([u"StartTag", token["name"], token["data"]])

0 commit comments

Comments
 (0)
0