10000 Teach lint & treewalkers that elements are only void in HTML ns · ezc/html5lib-python@66ef026 · GitHub
[go: up one dir, main page]

Skip to content

Commit 66ef026

Browse files
committed
Teach lint & treewalkers that elements are only void in HTML ns
1 parent 270a2ca commit 66ef026

File tree

3 files changed

+20
-10
lines changed

3 files changed

+20
-10
lines changed

html5lib/filters/lint.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from six import text_type
44

55
from . import _base
6-
from ..constants import voidElements
6+
from ..constants import namespaces, voidElements
77

88
from ..constants import spaceCharacters
99
spaceCharacters = "".join(spaceCharacters)
@@ -19,17 +19,22 @@ def __iter__(self):
1919
for token in _base.Filter.__iter__(self):
2020
type = token["type"]
2121
if type in ("StartTag", "EmptyTag"):
22+
namespace = token["namespace"]
2223
name = token["name"]
24+
if namespace is not None and not isinstance(namespace, text_type):
25+
raise LintError("Tag namespace is not a string or None: %(name)r" % {"name": namespace})
26+
if namespace == "":
27+
raise LintError("Empty tag namespace")
2328
if not isinstance(name, text_type):
2429
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
2530
if not name:
2631
raise LintError("Empty tag name")
27-
if type == "StartTag" and name in voidElements:
32+
if type == "StartTag" and (not namespace or namespace == namespaces["html"]) and name in voidElements:
2833
raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
29-
elif type == "EmptyTag" and name not in voidElements:
34+
elif type == "EmptyTag" and (not namespace or namespace == namespaces["html"]) and name not in voidElements:
3035
raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
3136
if type == "StartTag":
32-
open_elements.append(name)
37+
open_elements.append((namespace, name))
3338
for (namespace, localname), value in token["data"].items():
3439
if namespace is not None and not isinstance(namespace, text_type):
3540
raise LintError("Attribute namespace is not a string or None: %(name)r" % {"name": namespace})
@@ -43,15 +48,20 @@ def __iter__(self):
4348
raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
4449

4550
elif type == "EndTag":
51+
namespace = token["namespace"]
4652
name = token["name"]
53+
if namespace is not None and not isinstance(namespace, text_type):
54+
raise LintError("Tag namespace is not a string or None: %(name)r" % {"name": namespace})
55+
if namespace == "":
56+
raise LintError("Empty tag namespace")
4757
if not isinstance(name, text_type):
4858
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
4959
if not name:
5060
raise LintError("Empty tag name")
51-
if name in voidElements:
61+
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
5262
raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
5363
start_name = open_elements.pop()
54-
if start_name != name:
64+
if start_name != (namespace, name):
5565
raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
5666

5767
elif type == "Comment":

html5lib/treewalkers/_base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from six import text_type, string_types
33

44
from xml.dom import Node
5-
from ..constants import voidElements, spaceCharacters
5+
from ..constants import namespaces, voidElements, spaceCharacters
66

77
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
88
"TreeWalker", "NonRecursiveTreeWalker"]
@@ -154,7 +154,7 @@ def __iter__(self):
154154

155155
elif type == ELEMENT:
156156
namespace, name, attributes, hasChildren = details
157-
if name in voidElements:
157+
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
158158
for token in self.emptyTag(namespace, name, attributes,
159159
hasChildren):
160160
yield token
@@ -187,7 +187,7 @@ def __iter__(self):
187187
type, details = details[0], details[1:]
188188
if type == ELEMENT:
189189
namespace, name, attributes, hasChildren = details
190-
if name not in voidElements:
190+
if (namespace and namespace != namespaces["html"]) or name not in voidElements:
191191
yield self.endTag(namespace, name)
192192
if self.tree is currentNode:
193193
currentNode = None

html5lib/treewalkers/genshistream.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def tokens(self, event, next):
4848
elif kind == END:
4949
name = data.localname
5050
namespace = data.namespace
51-
if name not in voidElements:
51+
if namespace != namespaces["html"] or name not in voidElements:
5252
yield self.endTag(namespace, name)
5353

5454
elif kind == COMMENT:

0 commit comments

Comments
 (0)
0