8000 Support for multiple types of elementtree. Warning - interface change… · awesome-python/html5lib-python@0939d37 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0939d37

Browse files
committed
Support for multiple types of elementtree. Warning - interface changed! One new test faliure but it's attribute order so not significant (bug in test suite)
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40555
1 parent be3ab83 commit 0939d37

File tree

4 files changed

+281
-234
lines changed

4 files changed

+281
-234
lines changed

src/treebuilders/__init__.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,12 @@
3535

3636
import dom
3737
import simpletree
38+
import etree
3839

39-
try:
40-
import etree
41-
except:
42-
pass
40+
41+
def getTreebuilder(treeType, implementation=None, **kwargs):
42+
treeType = treeType.lower()
43+
if treeType in ("dom", "simpletree"):
44+
return globals()[treeType].TreeBuilder
45+
elif "name == etree":
46+
return etree.getETreeModule(implementation, **kwargs).TreeBuilder

src/treebuilders/etree.py

Lines changed: 239 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,240 @@
1-
import etreefull
1+
import _base
2+
import new
23

3-
class TreeBuilder(etreefull.TreeBuilder):
4-
def getDocument(self):
5-
return self.document._element.find("html")
4+
def getETreeModule(ElementTreeImplementation, fullTree=False):
5+
mod = new.module("_" + ElementTreeImplementation.__name__+"builder")
6+
objs = getETreeBuilder(ElementTreeImplementation, fullTree)
7+
mod.__dict__.update(objs)
8+
return mod
9+
10+
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
11+
ElementTree = ElementTreeImplementation
12+
class Element(_base.Node):
13+
def __init__(self, name):
14+
self._element = ElementTree.Element(name)
15+
self.name = name
16+
self.parent = None
17+
self._childNodes = []
18+
self._flags = []
19+
20+
def _setName(self, name):
21+
self._element.tag = name
22+
23+
def _getName(self):
24+
return self._element.tag
25+
26+
name = property(_getName, _setName)
27+
28+
def _getAttributes(self):
29+
return self._element.attrib
30+
31+
def _setAttributes(self, attributes):
32+
#Delete existing attributes first
33+
#XXX - there may be a better way to do this...
34+
for key in self._element.attrib.keys():
35+
del self._element.attrib[key]
36+
for key, value in attributes.iteritems():
37+
self._element.set(key, value)
38+
39+
attributes = property(_getAttributes, _setAttributes)
40+
41+
def _getChildNodes(self):
42+
return self._childNodes
43+
44+
def _setChildNodes(self, value):
45+
del self._element[:]
46+
self._childNodes = []
47+
for element in value:
48+
self.insertChild(element)
49+
50+
childNodes = property(_getChildNodes, _setChildNodes)
51+
52+
def hasContent(self):
53+
"""Return true if the node has children or text"""
54+
return bool(self._element.text or self._element.getchildren())
55+
56+
def appendChild(self, node):
57+
self._childNodes.append(node)
58+
self._element.append(node._element)
59+
node.parent = self
60+
61+
def insertBefore(self, node, refNode):
62+
index = self._element.getchildren().index(refNode._element)
63+
self._element.insert(index, node._element)
64+
node.parent = self
65+
66+
def removeChild(self, node):
67+
self._element.remove(node._element)
68+
node.parent=None
69+
70+
def insertText(self, data, insertBefore=None):
71+
if not(len(self._element)):
72+
if not self._element.text:
73+
self._element.text = ""
74+
self._element.text += data
75+
elif insertBefore is None:
76+
#Insert the text as the tail of the last child element
77+
if not self._element[-1].tail:
78+
self._element[-1].tail = ""
79+
self._element[-1].tail += data
80+
else:
81+
#Insert the text before the specified node
82+
children = self._element.getchildren()
83+
index = children.index(insertBefore._element)
84+
if index > 0:
85+
if not self._element[index-1].tail:
86+
self._element[index-1].tail = ""
87+
self._element[index-1].tail += data
88+
else:
89+
if not self._element.text:
90+
self._element.text = ""
91+
self._element.text += data
92+
93+
def cloneNode(self):
94+
element = Element(self.name)
95+
element.attributes = self.attributes
96+
return element
97+
98+
def reparentChildren(self, newParent):
99+
if newParent.childNodes:
100+
newParent.childNodes[-1]._element.tail += self._element.text
101+
else:
102+
if not newParent._element.text:
103+
newParent._element.text = ""
104+
if self._element.text is not None:
105+
newParent._element.text += self._element.text
106+
self._element.text = ""
107+
_base.Node.reparentChildren(self, newParent)
108+
109+
class Comment(Element):
110+
def __init__(self, data):
111+
#Use the superclass constructor to set all properties on the
112+
#wrapper element
113+
self._element = ElementTree.Comment(data)
114+
self.parent = None
115+
self._childNodes = []
116+
self._flags = []
117+
118+
def _getData(self):
119+
return self._element.text
120+
121+
def _setData(self, value):
122+
self._element.text = value
123+
124+
data = property(_getData, _setData)
125+
126+
class DocumentType(Element):
127+
def __init__(self, name):
128+
Element.__init__(self, "<!DOCTYPE>")
129+
self._element.text = name
130+
131+
class Document(Element):
132+
def __init__(self):
133+
Element.__init__(self, "<DOCUMENT_ROOT>")
134+
135+
class DocumentFragment(Element):
136+
def __init__(self):
137+
Element.__init__(self, "<DOCUMENT_FRAGMENT>")
138+
139+
def testSerializer(element):
140+
rv = []
141+
finalText = None
142+
def serializeElement(element, indent=0):
143+
if not(hasattr(element, "tag")):
144+
element = element.getroot()
145+
if element.tag == "<!DOCTYPE>":
146+
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
147+
elif element.tag == "<DOCUMENT_ROOT>":
148+
rv.append("#document")
149+
if element.text:
150+
rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
151+
if element.tail:
152+
finalText = element.tail
153+
elif type(element.tag) == type(ElementTree.Comment):
154+
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
155+
else:
156+
rv.append("|%s<%s>"%(' '*indent, element.tag))
157+
if hasattr(element, "attrib"):
158+
for name, value in element.attrib.iteritems():
159+
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
160+
if element.text:
161+
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
162+
indent += 2
163+
for child in element.getchildren():
164+
serializeElement(child, indent)
165+
if element.tail:
166+
rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
167+
serializeElement(element, 0)
168+
169+
if finalText is not None:
170+
rv.append("|%s\"%s\""%(' '*2, finalText))
171+
172+
return "\n".join(rv)
173+
174+
def tostring(element):
175+
"""Serialize an element and its child nodes to a string"""
176+
rv = []
177+
finalText = None
178+
def serializeElement(element):
179+
if type(element) == type(ElementTree.ElementTree):
180+
element = element.getroot()
181+
182+
if element.tag == "<!DOCTYPE>":
183+
rv.append("<!DOCTYPE %s>"%(element.text,))
184+
elif element.tag == "<DOCUMENT_ROOT>":
185+
if element.text:
186+
rv.append(element.text)
187+
if element.tail:
188+
finalText = element.tail
189+
190+
for child in element.getchildren():
191+
serializeElement(child)
192+
193+
elif type(element.tag) == type(ElementTree.Comment):
194+
rv.append("<!--%s-->"%(element.text,))
195+
else:
196+
#This is assumed to be an ordinary element
197+
if not element.attrib:
198+
rv.append("<%s>"%(element.tag,))
199+
else:
200+
attr = " ".join(["%s=\"%s\""%(name, value)
201+
for name, value in element.attrib.iteritems()])
202+
rv.append("<%s %s>"%(element.tag, attr))
203+
if element.text:
204+
rv.append(element.text)
205+
206+
for child in element.getchildren():
207+
serializeElement(child)
208+
209+
rv.append("</%s>"%(element.tag,))
210+
211+
if element.tail:
212+
rv.append(element.tail)
213+
214+
serializeElement(element)
215+
216+
if finalText is not None:
217+
rv.append("%s\""%(' '*2, finalText))
218+
219+
return "".join(rv)
220+
221+
class TreeBuilder(_base.TreeBuilder):
222+
documentClass = Document
223+
doctypeClass = DocumentType
224+
elementClass = Element
225+
commentClass = Comment
226+
fragmentClass = DocumentFragment
227+
228+
def testSerializer(self, element):
229+
return testSerializer(element)
230+
231+
def getDocument(self):
232+
if fullTree:
233+
return self.document._element
234+
else:
235+
return self.document._element.find("html")
236+
237+
def getFragment(self):
238+
return ElementTree.ElementTree(_base.TreeBuilder.getFragment(self)._element)
239+
240+
return locals()

0 commit comments

Comments
 (0)
0