Closed
Description
Reported by devin.bayer, Jun 7, 2011
version html5lib-0.95_dev
/html5lib/html5parser.py line 242 in parseFragment self._parse(stream, True, container=container, encoding=encoding) /html5lib/html5parser.py line 110 in _parse parser=self, **kwargs) TypeError: __init__() got an unexpected keyword argument 'parser'
Jun 7, 2011 devin.bayer
This is a workaround and slightly safer design. There is no need for the mixin or to hardcode the
__init__
arguments:
from html5lib import HTMLParser
from html5lib.tokenizer import HTMLTokenizer
from html5lib.sanitizer import HTMLSanitizerMixin
from cgi import escape
class Sanitizer(HTMLTokenizer):
def __init__(self, *a, **kw):
HTMLTokenizer.__init__(self, *a, **kw)
self._saner = HTMLSanitizerMixin()
def __iter__(self):
for token in HTMLTokenizer.__iter__(self):
saner = self._saner.sanitize_token(token)
if saner: yield saner
PARSER = HTMLParser(tokenizer=Sanitizer)
def sanitize(html):
return PARSER.parseFragment(html).toxml()