8000 Escape angle brackets even when a character encoding is specified. · awesome-python/html5lib-python@e52b6c8 · GitHub
[go: up one dir, main page]

Skip to content

Commit e52b6c8

Browse files
committed
Escape angle brackets even when a character encoding is specified.
Ultimately we will need an option to escape brackets when found in attribute values to keep the XHTML crowd happy. --HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40780
1 parent 50f68b2 commit e52b6c8

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

src/serializer.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
from filters.inject_meta_charset import Filter as InjectMetaCharsetFilter
1313

1414
from constants import voidElements, booleanAttributes, spaceCharacters
15+
from constants import rcdataElements
16+
17+
from xml.sax.saxutils import escape
1518

1619
spaceCharacters = u"".join(spaceCharacters)
1720

@@ -49,11 +52,10 @@ def htmlentityreplace_errors(exc):
4952

5053
del register_error
5154

52-
def escape_text(text, encoding):
53-
return text.replace("&", "&").encode(encoding, unicode_encode_errors)
55+
def encode(text, encoding):
56+
return text.encode(encoding, unicode_encode_errors)
5457

5558
class HTMLSerializer(object):
56-
cdata_elements = frozenset(("style", "script", "xmp", "iframe", "noembed", "noframes", "noscript"))
5759

5860
quote_attr_values = False
5961
quote_char = '"'
@@ -109,16 +111,13 @@ def serialize(self, treewalker, encoding=None):
109111
else:
110112
yield token["data"]
111113
elif encoding:
112-
yield escape_text(token["data"], encoding)
114+
yield encode(escape(token["data"]), encoding)
113115
else:
114-
yield token["data"] \
115-
.replace("&", "&") \
116-
.replace("<", "&lt;") \
117-
.replace(">", "&gt;")
116+
yield escape(token["data"])
118117

119118
elif type in ("StartTag", "EmptyTag"):
120119
name = token["name"]
121-
if name in self.cdata_elements:
120+
if name in rcdataElements:
122121
in_cdata = True
123122
elif in_cdata:
124123
self.serializeError(_("Unexpected child element of a CDATA element"))
@@ -142,10 +141,9 @@ def serialize(self, treewalker, encoding=None):
142141
else:
143142
quote_attr = reduce(lambda x,y: x or (y in v),
144143
spaceCharacters + "<>\"'", False)
144+
v = v.replace("&", "&amp;")
145145
if encoding:
146-
v = escape_text(v, encoding)
147-
else:
148-
v = v.replace("&", "&amp;")
146+
v = encode(v, encoding)
149147
if quote_attr:
150148
quote_char = self.quote_char
151149
if self.use_best_quote_char:
@@ -174,7 +172,7 @@ def serialize(self, treewalker, encoding=None):
174172

175173
elif type == "EndTag":
176174
name = token["name"]
177-
if name in self.cdata_elements:
175+
if name in rcdataElements:
178176
in_cdata = False
179177
elif in_cdata:
180178
self.serializeError(_("Unexpected child element of a CDATA element"))

0 commit comments

Comments
 (0)
0