8000 port inject_meta_charset filter to Python · awesome-python/html5lib-python@9673c0c · GitHub
[go: up one dir, main page]

Skip to content

Commit 9673c0c

Browse files
committed
port inject_meta_charset filter to Python
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40733
1 parent aca1b91 commit 9673c0c

File tree

2 files changed

+54
-10
lines changed

2 files changed

+54
-10
lines changed

src/filters/inject_meta_charset.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import _base
2+
3+
class Filter(_base.Filter):
4+
def __init__(self, source, encoding):
5+
_base.Filter.__init__(self, source)
6+
self.encoding = encoding
7+
8+
def __iter__(self):
9+
state = "pre_head"
10+
meta_found = (self.encoding is None)
11+
pending = []
12+
13+
for token in _base.Filter.__iter__(self):
14+
type = token["type"]
15+
if type == "StartTag":
16+
if token["name"].lower() == "head":
17+
state = "in_head"
18+
19+
elif type == "EmptyTag":
20+
if token["name"].lower() == "meta":
21+
# replace charset with actual encoding
22+
for i,(name,value) in enumerate(token["data"]):
23+
if name == 'charset':
24+
token["data"][i] = (token["data"][i][0], self.encoding)
25+
meta_found = True
26+
27+
elif token["name"].lower() == "head" and not meta_found:
28+
# insert meta into empty head
29+
yield {"type": "StartTag", "name": "head",
30+
"data": token["data"]}
31+
yield {"type": "EmptyTag", "name": "meta",
32+
"data": [["charset", self.encoding]]}
33+
yield {"type": "EndTag", "name": "head"}
34+
meta_found = True
35+
continue
36+
37+
elif type == "EndTag":
38+
if token["name"].lower() == "head" and pending:
39+
# insert meta into head (if necessary) and flush pending queue
40+
yield pending.pop(0)
41+
if not meta_found:
42+
yield {"type": "EmptyTag", "name": "meta",
43+
"data": [["charset", self.encoding]]}
44+
while pending:
45+
yield pending.pop(0)
46+
meta_found = True
47+
state = "post_head"
48+
49+
if state == "in_head":
50+
pending.append(token)
51+
else:
52+
yield token

src/serializer.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from filters.whitespace import Filter as WhitespaceFilter
1111
from filters.optionaltags import Filter as OptionalTagFilter
12+
from filters.inject_meta_charset import Filter as InjectMetaCharsetFilter
1213

1314
from constants import voidElements, booleanAttributes, spaceCharacters
1415

@@ -83,7 +84,7 @@ def serialize(self, treewalker, encoding=None):
8384
in_cdata = False
8485
self.errors = []
8586
if encoding and self.inject_meta_charset:
86-
treewalker = self.filter_inject_meta_charset(treewalker, encoding)
87+
treewalker = InjectMetaCharsetFilter(treewalker, encoding)
8788
# XXX: WhitespaceFilter should be used before OptionalTagFilter
8889
# for maximum efficiently of this latter filter
8990
if self.strip_whitespace:
@@ -206,15 +207,6 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
206207
if self.strict:
207208
raise SerializeError
208209

209-
def filter_inject_meta_charset(self, treewalker, encoding):
210-
done = False
211-
for token in treewalker:
212-
if not done and token["type"] == "StartTag" \
213-
and token["name"].lower() == "head":
214-
yield {"type": "EmptyTag", "name": "meta", \
215-
"data": {"charset": encoding}}
216-
yield token
217-
218210
def SerializeError(Exception):
219211
"""Error in serialized tree"""
220212
pass

0 commit comments

Comments
 (0)
0