8000 Add html serializer to dom · awesome-python/html5lib-python@83ccd6c · GitHub
[go: up one dir, main page]

Skip to content

Commit 83ccd6c

Browse files
committed
Add html serializer to dom
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40549
1 parent fd655cc commit 83ccd6c

File tree

3 files changed

+38
-8
lines changed

3 files changed

+38
-8
lines changed

examples/sanitizer/sanitizer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,9 @@ def _sanitizeTree(self, tree, escapeRemovedMarkup):
118118
return tree
119119

120120
def acceptableURI(self, uri):
121-
return urlparse.urlparse(uri)[0] in self.acceptable_schemes
121+
#This is wrong
122+
parsedURI = urlparse.urlparse(uri)
123+
return parsedURI[0] in self.acceptable_schemes or not parsedURI[0]
122124

123125
def nodeToText(self, node, endTag=False):
124126
"""Create an unescaped text node containing a serialization of node's

src/treebuilders/dom.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import _base
22
from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
33
import new
4+
from xml.sax.saxutils import escape
5+
from constants import voidElements
46

57
import re
68
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
@@ -86,9 +88,9 @@ def elementClass(self, name):
8688
return NodeBuilder(self.dom.createElement(name))
8789

8890
def commentClass(self, data):
89-
return NodeBuilder(self.dom.createComment(data))
90-
91-
def fragmentClass(self):
91+
return NodeBuilder(self.dom.createComment(data))
92+
93+
def fragmentClass(self):
9294
return NodeBuilder(self.dom.createDocumentFragment())
9395

9496
def appendChild(self, node):
@@ -98,9 +100,9 @@ def testSerializer(self, element):
98100
return testSerializer(element)
99101

100102
def getDocument(self):
101-
return self.dom
102-
103-
def getFragment(self):
103+
return self.dom
104+
105+
def getFragment(self):
104106
return _base.TreeBuilder.getFragment(self).element
105107

106108
def insertText(self, data, parent=None):
@@ -143,6 +145,32 @@ def serializeElement(element, indent=0):
143145

144146
return "\n".join(rv)
145147

148+
class HTMLSerializer(object):
149+
def serialize(self, node):
150+
rv = self.serializeNode(node)
151+
for child in node.childNodes:
152+
rv += self.serialize(child)
153+
if node.nodeType == Node.ELEMENT_NODE and node.nodeName not in voidElements:
154+
rv += "</%s>\n"%node.nodeName
155+
return rv
156+
157+
def serializeNode(self, node):
158+
if node.nodeType == Node.TEXT_NODE:
159+
rv = node.nodeValue
160+
elif node.nodeType == Node.ELEMENT_NODE:
161+
rv = "<%s"%node.nodeName
162+
if node.hasAttributes():
163+
rv = rv+"".join([" %s='%s'"%(key, escape(value)) for key,value in
164+
node.attributes.items()])
165+
rv += ">"
166+
elif node.nodeType == Node.COMMENT_NODE:
167+
rv = "<!-- %s -->" % escape(node.nodeValue)
168+
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
169+
rv = "<!DOCTYPE %s>" % node.name
170+
else:
171+
rv = ""
172+
return rv
173+
146174
def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
147175
if node.nodeType == Node.ELEMENT_NODE:
148176
if not nsmap:

src/treebuilders/simpletree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def serializeNode(self, node):
210210
node.attributes.iteritems()])
211211
rv += ">"
212212 elif node.type == CommentNode.type:
213-
rv = "<!-- %s -->" % escape(self.data)
213+
rv = "<!-- %s -->" % escape(node.data)
214214
elif node.type == DocumentType.type:
215215
rv = "<!DOCTYPE %s>" % node.name
216216
else:

0 commit comments

Comments
 (0)
0