10000 undo cdata processing in liberal xml parsing · html5lib/html5lib-python@e30e89a · GitHub
[go: up one dir, main page]

Skip to content

Commit e30e89a

Browse files
committed
undo cdata processing in liberal xml parsing
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40806
1 parent cfe209b commit e30e89a

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

src/liberalxmlparser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
"""
1616

1717
import html5parser
18-
from constants import voidElements
18+
from constants import voidElements, contentModelFlags
1919
import gettext
2020
_ = gettext.gettext
2121

2222
from xml.dom import XHTML_NAMESPACE
23+
from xml.sax.saxutils import unescape
2324

2425
class XMLParser(html5parser.HTMLParser):
2526
""" liberal XML parser """
@@ -47,6 +48,11 @@ def normalizeToken(self, token):
4748
if token["data"]:
4849
self.parseError(_("End tag contains unexpected attributes."))
4950

51+
elif token["type"] == "Characters":
52+
# un-escape rcdataElements (e.g. style, script)
53+
if self.tokenizer.contentModelFlag == contentModelFlags["CDATA"]:
54+
token["data"] = unescape(token["data"])
55+
5056
elif token["type"] == "Comment":
5157
# Rescue CDATA from the comments
5258
if (token["data"].startswith("[CDATA[") and

tests/test_lxp.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,16 +193,29 @@ def test_xlink(self):
193193

194194
def test_br(self):
195195
self.assertXhtmlEquals("""<html xmlns="http://www.w3.org/1999/xhtml">
196-
<head><title>XLINK</title></head>
196+
<head><title>BR</title></head>
197197
<body>
198198
<br/>
199199
</body></html>""")
200200

201201
def test_strong(self):
202202
self.assertXhtmlEquals("""<html xmlns="http://www.w3.org/1999/xhtml">
203-
<head><title>XLINK</title></head>
203+
<head><title>STRONG</title></head>
204204
<body>
205205
<strong></strong>
206+
</body></html>""")
207+
208+
def test_script(self):
209+
self.assertXhtmlEquals("""<html xmlns="http://www.w3.org/1999/xhtml">
210+
<head><title>SCRIPT</title></head>
211+
<body>
212+
<script>1 &lt; 2 &amp; 3</script>
213+
</body></html>""")
214+
215+
def test_title(self):
216+
self.assertXhtmlEquals("""<html xmlns="http://www.w3.org/1999/xhtml">
217+
<head><title>1 &lt; 2 &amp; 3</title></head>
218+
<body>
206219
</body></html>""")
207220

208221
def buildTestSuite():

0 commit comments

Comments
 (0)
0