|
| 1 | +import _base |
| 2 | + |
| 3 | +class Filter(_base.Filter): |
| 4 | + def __init__(self, source, encoding): |
| 5 | + _base.Filter.__init__(self, source) |
| 6 | + self.encoding = encoding |
| 7 | + |
| 8 | + def __iter__(self): |
| 9 | + state = "pre_head" |
| 10 | + meta_found = (self.encoding is None) |
| 11 | + pending = [] |
| 12 | + |
| 13 | + for token in _base.Filter.__iter__(self): |
| 14 | + type = token["type"] |
| 15 | + if type == "StartTag": |
| 16 | + if token["name"].lower() == "head": |
| 17 | + state = "in_head" |
| 18 | + |
| 19 | + elif type == "EmptyTag": |
| 20 | + if token["name"].lower() == "meta": |
| 21 | + # replace charset with actual encoding |
| 22 | + for i,(name,value) in enumerate(token["data"]): |
| 23 | + if name == 'charset': |
| 24 | + token["data"][i] = (token["data"][i][0], self.encoding) |
| 25 | + meta_found = True |
| 26 | + |
| 27 | + elif token["name"].lower() == "head" and not meta_found: |
| 28 | + # insert meta into empty head |
| 29 | + yield {"type": "StartTag", "name": "head", |
| 30 | + "data": token["data"]} |
| 31 | + yield {"type": "EmptyTag", "name": "meta", |
| 32 | + "data": [["charset", self.encoding]]} |
| 33 | + yield {"type": "EndTag", "name": "head"} |
| 34 | + meta_found = True |
| 35 | + continue |
| 36 | + |
| 37 | + elif type == "EndTag": |
| 38 | + if token["name"].lower() == "head" and pending: |
| 39 | + # insert meta into head (if necessary) and flush pending queue |
| 40 | + yield pending.pop(0) |
| 41 | + if not meta_found: |
| 42 | + yield {"type": "EmptyTag", "name": "meta", |
| 43 | + "data": [["charset", self.encoding]]} |
| 44 | + while pending: |
| 45 | + yield pending.pop(0) |
| 46 | + meta_found = True |
| 47 | + state = "post_head" |
| 48 | + |
| 49 | + if state == "in_head": |
| 50 | + pending.append(token) |
| 51 | + else: |
| 52 | + yield token |
0 commit comments