8000 Ensure that the XML prolog doesn't make it into the DOM · html5lib/html5lib-python@2edd450 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2edd450

Browse files
committed
Ensure that the XML prolog doesn't make it into the DOM
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40834
1 parent 2350d56 commit 2edd450

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

src/html5lib/liberalxmlparser.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class XHTMLParser(XMLParser):
6969

7070
def __init__(self, *args, **kwargs):
7171
html5parser.HTMLParser.__init__(self, *args, **kwargs)
72+
self.phases["initial"] = XmlInitialPhase(self, self.tree)
7273
self.phases["rootElement"] = XhmlRootPhase(self, self.tree)
7374

7475
def normalizeToken(self, token):
@@ -101,7 +102,19 @@ def insertHtmlElement(self):
101102
self.tree.document.appendChild(element)
102103
self.parser.phase = self.parser.phases["beforeHead"]
103104

105+
class XmlInitialPhase(html5parser.InitialPhase):
106+
""" Consume XML Prologs """
107+
def processComment(self, data):
108+
if not data.startswith('?xml') or not data.endswith('?'):
109+
html5parser.InitialPhase.processComment(self, data)
110+
104111
class XmlRootPhase(html5parser.Phase):
112+
""" Consume XML Prologs """
113+
def processComment(self, data):
114+
print repr(data)
115+
if not data.startswith('?xml') or not data.endswith('?'):
116+
html5parser.InitialPhase.processComment(self, data)
117+
105118
""" Prime the Xml parser """
106119
def __getattr__(self, name):
107120
self.tree.openElements.append(self.tree.document)

tests/test_lxp.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,17 @@ def test_title(self):
214214
self.assertXhtmlEquals("""<html xmlns="http://www.w3.org/1999/xhtml">
215215
<head><title>1 &lt; 2 &amp; 3</title></head>
216216
<body>
217+
</body></html>""")
218+
219+
def test_prolog(self):
220+
self.assertXhtmlEquals("""<?xml version="1.0" encoding="UTF-8" ?>
221+
<html xmlns="http://www.w3.org/1999/xhtml">
222+
<head><title>PROLOG</title></head>
223+
<body>
224+
</body></html>""",
225+
"""<html xmlns="http://www.w3.org/1999/xhtml">
226+
<head><title>PROLOG</title></head>
227+
<body>
217228
</body></html>""")
218229

219230
def buildTestSuite():

0 commit comments

Comments
 (0)
0