12
12
from filters .inject_meta_charset import Filter as InjectMetaCharsetFilter
13
13
14
14
from constants import voidElements , booleanAttributes , spaceCharacters
15
+ from constants import rcdataElements
16
+
17
+ from xml .sax .saxutils import escape
15
18
16
19
spaceCharacters = u"" .join (spaceCharacters )
17
20
@@ -49,11 +52,10 @@ def htmlentityreplace_errors(exc):
49
52
50
53
del register_error
51
54
52
- def escape_text (text , encoding ):
53
- return text .replace ( "&" , "&" ). encode (encoding , unicode_encode_errors )
55
+ def encode (text , encoding ):
56
+ return text .encode (encoding , unicode_encode_errors )
54
57
55
58
class HTMLSerializer (object ):
56
- cdata_elements = frozenset (("style" , "script" , "xmp" , "iframe" , "noembed" , "noframes" , "noscript" ))
57
59
58
60
quote_attr_values = False
59
61
quote_char = '"'
@@ -109,16 +111,13 @@ def serialize(self, treewalker, encoding=None):
109
111
else :
110
112
yield token ["data" ]
111
113
elif encoding :
112
- yield escape_text ( token ["data" ], encoding )
114
+ yield encode ( escape ( token ["data" ]) , encoding )
113
115
else :
114
- yield token ["data" ] \
115
- .replace ("&" , "&" ) \
116
- .replace ("<" , "<" ) \
117
- .replace (">" , ">" )
116
+ yield escape (token ["data" ])
118
117
119
118
elif type in ("StartTag" , "EmptyTag" ):
120
119
name = token ["name" ]
121
- if name in self . cdata_elements :
120
+ if name in rcdataElements :
122
121
in_cdata = True
123
122
elif in_cdata :
124
123
self .serializeError (_ ("Unexpected child element of a CDATA element" ))
@@ -142,10 +141,9 @@ def serialize(self, treewalker, encoding=None):
142
141
else :
143
142
quote_attr = reduce (lambda x ,y : x or (y in v ),
144
143
spaceCharacters + "<>\" '" , False )
144
+ v = v .replace ("&" , "&" )
145
145
if encoding :
146
- v = escape_text (v , encoding )
147
- else :
148
- v = v .replace ("&" , "&" )
146
+ v = encode (v , encoding )
149
147
if quote_attr :
150
148
quote_char = self .quote_char
151
149
if self .use_best_quote_char :
@@ -174,7 +172,7 @@ def serialize(self, treewalker, encoding=None):
174
172
175
173
elif type == "EndTag" :
176
174
name = token ["name" ]
177
- if name in self . cdata_elements :
175
+ if name in rcdataElements :
178
176
in_cdata = False
179
177
elif in_cdata :
180
178
self .serializeError (_ ("Unexpected child element of a CDATA element" ))
0 commit comments