8000 Start working on amking treewalkers work with namespaces. WIP · html5lib/html5lib-python@8d2f6b0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8d2f6b0

Browse files
committed
Start working on amking treewalkers work with namespaces. WIP
--HG-- branch : svgmathml extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/branches/svgmathml%401303
1 parent ad36429 commit 8d2f6b0

File tree

5 files changed

+40
-18
lines changed

5 files changed

+40
-18
lines changed

src/html5lib/treewalkers/_base.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,22 @@ def normalizeAttrs(self, attrs):
2222
return [(unicode(name),unicode(value)) for name,value in attrs]
2323

2424
def emptyTag(self, name, attrs, hasChildren=False):
25-
yield {"type": "EmptyTag", "name": unicode(name), \
26-
"data": self.normalizeAttrs(attrs)}
25+
yield {"type": "EmptyTag", "name": unicode(name),
26+
"data": self.normalizeAttrs(attrs)}
2727
if hasChildren:
2828
yield self.error(_("Void element has children"))
2929

30-
def startTag(self, name, attrs):
31-
return {"type": "StartTag", "name": unicode(name), \
32-
"data": self.normalizeAttrs(attrs)}
30+
def startTag(self, namespace, name, attrs):
31+
return {"type": "StartTag",
32+
"name": unicode(name),
33+
"namespace":unicode(namespace),
34+
"data": self.normalizeAttrs(attrs)}
3335

34-
def endTag(self, name):
35-
return {"type": "EndTag", "name": unicode(name), "data": []}
36+
def endTag(self, namespace, name):
37+
return {"type": "EndTag",
38+
"name": unicode(name),
39+
"namespace":unicode(namespace)
40+
"data": []}
3641

3742
def text(self, data):
3843
data = unicode(data)
@@ -64,9 +69,9 @@ class RecursiveTreeWalker(TreeWalker):
6469
def walkChildren(self, node):
6570
raise NodeImplementedError
6671

67-
def element(self, node, name, attrs, hasChildren):
72+
def element(self, node, namespace, name, attrs, hasChildren):
6873
if name in voidElements:
69-
for token in self.emptyTag(name, attrs, hasChildren):
74+
for token in self.emptyTag(namespace, name, attrs, hasChildren):
7075
yield token
7176
else:
7277
yield self.startTag(name, attrs)
@@ -113,14 +118,14 @@ def __iter__(self):
113118
yield token
114119

115120
elif type == ELEMENT:
116-
name, attributes, hasChildren = details
121+
namespace, name, attributes, hasChildren = details
117122
if name in voidElements:
118-
for token in self.emptyTag(name, attributes, hasChildren):
123+
for token in self.emptyTag(namespace, name, attributes, hasChildren):
119124
yield token
120125
hasChildren = False
121126
else:
122127
endTag = name
123-
yield self.startTag(name, attributes)
128+
yield self.startTag(namespace, name, attributes)
124129

125130
elif type == COMMENT:
126131
yield self.comment(details[0])
@@ -143,7 +148,7 @@ def __iter__(self):
143148
details = self.getNodeDetails(currentNode)
144149
type, details = details[0], details[1:]
145150
if type == ELEMENT:
146-
name, attributes, hasChildren = details
151+
namespace, name, attributes, hasChildren = details
147152
if name not in voidElements:
148153
yield self.endTag(name)
149154
nextSibling = self.getNextSibling(currentNode)

src/html5lib/treewalkers/dom.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def getNodeDetails(self, node):
1616
return _base.TEXT, node.nodeValue
1717

1818
elif node.nodeType == Node.ELEMENT_NODE:
19-
return _base.ELEMENT, node.nodeName, node.attributes.items(), node.hasChildNodes
19+
return (_base.ELEMENT, node.namespaceURI, node.nodeName,
20+
node.attributes.items(), node.hasChildNodes)
2021

2122
elif node.nodeType == Node.COMMENT_NODE:
2223
return _base.COMMENT, node.nodeValue

src/html5lib/treewalkers/etree.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33

44
import new
55
import copy
6+
import re
67

78
import _base
89
from html5lib.constants import voidElements
910

11+
tag_regexp = re.compile("{([^}]*)}(.*)")
12+
1013
moduleCache = {}
1114

1215
def getETreeModule(ElementTreeImplementation):
@@ -60,7 +63,14 @@ def getNodeDetails(self, node):
6063

6164
else:
6265
#This is assumed to be an ordinary element
63-
return _base.ELEMENT, node.tag, node.attrib.items(), len(node) or node.text
66+
match = tag_regexp.match(node.tag)
67+
if match:
68+
namespace, tag = match.groups()
69+
else:
70+
namespace = None
71+
tag = node.tag
72+
return (_base.ELEMENT, namespace, tag,
73+
node.attrib.items(), len(node) or node.text)
6474

6575
def getFirstChild(self, node):
6676
if isinstance(node, tuple):

src/html5lib/treewalkers/lxmletree.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,13 @@ def getNodeDetails(self, node):
127127

128128
else:
129129
#This is assumed to be an ordinary element
130-
return (_base.ELEMENT, self.filter.fromXmlName(node.tag),
130+
match = tag_regexp.match(node.tag)
131+
if match:
132+
namespace, tag = match.groups()
133+
else:
134+
namespace = None
135+
tag = node.tag
136+
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
131137
[(self.filter.fromXmlName(name), value) for
132138
name,value in node.attrib.iteritems()],
133139
len(node) > 0 or node.text)

src/html5lib/treewalkers/simpletree.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ def getNodeDetails(self, node):
3232
return _base.TEXT, node.value
3333

3434
elif node.type == 5: # Element
35-
return _base.ELEMENT, node.name, \
36-
node.attributes.items(), node.hasContent()
35+
return (_base.ELEMENT, node.namespace, node.name,
36+
node.attributes.items(), node.hasContent())
3737

3838
elif node.type == 6: # CommentNode
3939
return _base.COMMENT, node.data

0 commit comments

Comments
 (0)
0