diff --git a/.gitignore b/.gitignore index 73d97fec..ce463d19 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ stats.prof # We have no interest in built Sphinx files /doc/_build + +venv diff --git a/html5lib/constants.py b/html5lib/constants.py index 5735d7b6..d1477b31 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -3088,12 +3088,19 @@ "ParseError": 7, "JinjaStatementStartTag": 8, "JinjaStatementEndTag": 9, - "JinjaStatementTag": 10, + "JinjaStatement": 10, "JinjaVariableStartTag": 11, "JinjaVariableEndTag": 12, "JinjaVariable": 13, "JinjaFilter": 14, - "JinjaPipe": 15 + "JinjaPipe": 15, + "JinjaArgumentStartTag": 16, + "JinjaArgumentEndTag": 17, + "JinjaArgument": 18, + "JinjaExtendTag": 19, + "JinjaIncludeTag": 20, + "JinjaImportTag": 21, + "JinjaComment": 22 } tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 9d836e16..9768ab06 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -168,12 +168,19 @@ def mainLoop(self): ParseErrorToken = tokenTypes["ParseError"] JinjaStatementStartTag = tokenTypes["JinjaStatementStartTag"] JinjaStatementEndTag = tokenTypes["JinjaStatementEndTag"] - JinjaStatementTag = tokenTypes["JinjaStatementTag"] + JinjaStatement = tokenTypes["JinjaStatement"] JinjaVariableStartTag = tokenTypes["JinjaVariableStartTag"] JinjaVariableEndTag = tokenTypes["JinjaVariableEndTag"] JinjaVariable = tokenTypes["JinjaVariable"] JinjaPipe = tokenTypes["JinjaPipe"] JinjaFilter = tokenTypes["JinjaFilter"] + JinjaArgumentStartTag = tokenTypes["JinjaArgumentStartTag"] + JinjaArgumentEndTag = tokenTypes["JinjaArgumentEndTag"] + JinjaArgument = tokenTypes["JinjaArgument"] + JinjaExtendTag = tokenTypes["JinjaExtendTag"] + JinjaIncludeTag = tokenTypes["JinjaIncludeTag"] + JinjaImportTag = tokenTypes["JinjaImportTag"] + JinjaComment = tokenTypes["JinjaComment"] for token in self.normalizedTokens(): new_token = token @@ -190,8 +197,11 @@ def mainLoop(self): new_token = None else: if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe): - log.debug(u"Type is a jinja tag") phase = self.phases["inJinjaVariable"] + elif type in (JinjaStatementStartTag, JinjaStatementEndTag, JinjaStatement): + phase = self.phases["inJinjaStatement"] + elif type in (JinjaArgumentStartTag, JinjaArgumentEndTag, JinjaArgument): + phase = self.phases["inJinjaArgument"] elif ( len(self.tree.openElements) == 0 or currentNodeNamespace == self.tree.defaultNamespace or @@ -224,8 +234,8 @@ def mainLoop(self): new_token = phase.processJinjaStatementStartTag(new_token) elif type == JinjaStatementEndTag: new_token = phase.processJinjaStatementEndTag(new_token) - elif type == JinjaStatementTag: - new_token = phase.processJinjaStatementTag(new_token) + elif type == JinjaStatement: + new_token = phase.processJinjaStatement(new_token) elif type == JinjaVariableStartTag: new_token = phase.processJinjaVariableStartTag(new_token) elif type == JinjaVariableEndTag: @@ -234,8 +244,22 @@ def mainLoop(self): new_token = phase.processJinjaVariable(new_token) elif type == JinjaPipe: new_token = phase.processJinjaPipe(new_token) + elif type == JinjaComment: + new_token = phase.processJinjaComment(new_token) elif type == JinjaFilter: new_token = phase.processJinjaFilter(new_token) + elif type == JinjaArgumentStartTag: + new_token = phase.processJinjaArgumentStartTag(new_token) + elif type == JinjaArgumentEndTag: + new_token = phase.processJinjaArgumentEndTag(new_token) + elif type == JinjaArgument: + new_token = phase.processJinjaArgument(new_token) + elif type == JinjaExtendTag: + new_token = phase.processJinjaExtendTag(new_token) + elif type == JinjaIncludeTag: + new_token = phase.processJinjaIncludeTag(new_token) + elif type == JinjaImportTag: + new_token = phase.processJinjaImportTag(new_token) if (type == StartTagToken and token["selfClosing"] and not token["selfClosingAcknowledged"]): @@ -432,7 +456,6 @@ def resetInsertionMode(self): new_phase = self.phases["inBody"] break - #log.debug(u"Changing phase to {}".format(new_phase)) self.phase = new_phase def parseRCDataRawtext(self, token, contentType): @@ -450,7 +473,6 @@ def parseRCDataRawtext(self, token, contentType): self.originalPhase = self.phase - log.debug(u"Changing phase to text") self.phase = self.phases["text"] @@ -517,7 +539,7 @@ def processJinjaStatementStartTag(self, token): def processJinjaStatementEndTag(self, token): pass - def processJinjaStatementTag(self, token): + def processJinjaStatement(self, token): pass def processJinjaVariableStartTag(self, token): @@ -529,6 +551,31 @@ def processJinjaVariableEndTag(self, token): def processJinjaVariable(self, token): pass + def processJinjaExtendTag(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + def processJinjaIncludeTag(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + def processJinjaComment(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + def processJinjaImportTag(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + def processJinjaArgumentStartTag(self, token): + pass + + def processJinjaArgumentEndTag(self, token): + pass + + def processJinjaArgument(self, token): + pass + def processJinjaPipe(self, token): pass @@ -553,19 +600,13 @@ def processEndTag(self, token): class InJinjaVariablePhase(Phase): def processJinjaVariableStartTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinja: Start Tag") self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) def processJinjaVariableEndTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinja: End Tag {}".format(token["name"])) for node in self.tree.openElements[::-1]: - log.debug(u"InJinja: Open tag {} token {}".format(node, token)) if node.name == token["name"]: self.tree.generateImpliedEndTags(exclude=token["name"]) - log.debug(u"InJinja: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"])) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while self.tree.openElements.pop() != node: @@ -573,7 +614,6 @@ def processJinjaVariableEndTag(self, token): break else: if node.nameTuple in specialElements: - log.debug(u"Nametuple {} in {}".format(node.nameTuple, specialElements)) self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) break @@ -589,6 +629,68 @@ def processJinjaFilter(self, token): element = self.tree.createElementWithoutNamespace(token) self.tree.openElements[-1].appendChild(element) + class InJinjaStatementPhase(Phase): + def processJinjaStatementStartTag(self, token): + if token['name'] == 'jinjaelse': + self.closeOpenIf(token) + elif token['name'] == 'jinjaelif': + self.closeOpenIf(token) + + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + + def closeOpenIf(self, token): + #import logging + #log = logging.getLogger(u"html5lib") + + for node in self.tree.openElements[::-1]: + #log.debug(u"Prev {} Cur {}".format(node.name, token['name'])) + + if node.name == token["name"] or (node.name in ["jinjaif", "jinjaelif"] and token["name"] in ["jinjaelse", "jinjaelif"]): + self.tree.generateImpliedEndTags(exclude=token["name"]) + + if self.tree.openElements[-1].name in ["jinjaif", "jinjaelif"] and token["name"] in ["jinjaelse", "jinjaelif"]: + pass + elif self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + while self.tree.openElements.pop() != node: + pass + + break + else: + if node.nameTuple in specialElements: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + break + + def processJinjaStatementEndTag(self, token): + import logging + log = logging.getLogger(u"html5lib") + + for node in self.tree.openElements[::-1]: + if node.name == token["name"] or (node.name in ["jinjaelse", "jinjaelif"] and token["name"] == "jinjaif"): + self.tree.generateImpliedEndTags(exclude=token["name"]) + + if self.tree.openElements[-1].name in ["jinjaelse", "jinjaelif"] and token["name"] == "jinjaif": + pass + elif self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + while self.tree.openElements.pop() != node: + pass + break + else: + log.debug(u"Node {}".format(node.name)) + self.tree.openElements.pop() + + def processJinjaStatement(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + class InJinjaArgumentPhase(Phase): + def processJinjaArgument(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].childNodes[-1].appendChild(element) + class InitialPhase(Phase): def processSpaceCharacters(self, token): pass @@ -882,8 +984,6 @@ def startTagOther(self, token): def endTagHead(self, token): node = self.parser.tree.openElements.pop() assert node.name == "head", "Expected head got %s" % node.name - log = logging.getLogger(u"html5lib") - log.debug(u"Switching phase to afterHead") self.parser.phase = self.parser.phases["afterHead"] def endTagHtmlBodyBr(self, token): @@ -894,8 +994,6 @@ def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): - log = logging.getLogger(u"html5lib") - log.debug(u"Implied end head tag") self.endTagHead(impliedTagToken("head")) # XXX If we implement a parser for which scripting is disabled we need to @@ -966,8 +1064,6 @@ def endTagOther(self, token): def anythingElse(self): self.tree.insertElement(impliedTagToken("body", "StartTag")) - log = logging.getLogger(u"html5lib") - log.debug(u"Changing phase to body") self.parser.phase = self.parser.phases["inBody"] self.parser.framesetOK = True @@ -1080,6 +1176,9 @@ def processEOF(self): "tfoot", "th", "thead", "tr", "body", "html")) for node in self.tree.openElements[::-1]: + if node.name.startswith("jinja"): + continue + if node.name not in allowed_elements: self.parser.parseError("expected-closing-tag-but-got-eof") break @@ -2794,6 +2893,8 @@ def processEndTag(self, token): # XXX "inHeadNoscript": InHeadNoScriptPhase, "afterHead": AfterHeadPhase, "inJinjaVariable": InJinjaVariablePhase, + "inJinjaStatement": InJinjaStatementPhase, + "inJinjaArgument": InJinjaArgumentPhase, "inBody": InBodyPhase, "text": TextPhase, "inTable": InTablePhase, diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 9e03b931..6eb74fbd 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -4,6 +4,7 @@ import codecs import re +import logging from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase from .constants import encodings, ReparseException @@ -43,6 +44,8 @@ class BufferedIOBase(object): # Cache for charsUntil() charsUntilRegEx = {} +log = logging.getLogger(u"html5lib") + class BufferedStream(object): """Buffering for streams that do not have buffering of their own diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py new file mode 100644 index 00000000..9194faf8 --- /dev/null +++ b/html5lib/tests/test_jinja.py @@ -0,0 +1,549 @@ +import html5lib +import unittest +import logging + +log = logging.getLogger(__name__) + + +def dump(tree, tabs=0): + log.debug(u"{}Tag '{}' - {} children - Value = {} - Text = {}".format( + "".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None, tree.text)) + + for child in tree: + dump(child, tabs + 1) + + +class JinjaTestCase(unittest.TestCase): + def setUp(self): + self.parser = html5lib.HTMLParser(strict=True, namespaceHTMLElements=False, tree=html5lib.treebuilders.getTreeBuilder("etree", fullTree=True)) + + def assertTree(self, root, spec): + self.assertEqual(len(root), len(spec)) + + for child, spec_child in zip(root, spec): + self.assertEqual(child.tag, spec_child['tag']) + + if 'text' in spec_child: + self.assertEqual(child.text, spec_child['text']) + + if 'value' in spec_child: + self.assertEqual(child.attrib['value'], spec_child['value']) + + if 'children' in spec_child: + self.assertTree(child, spec_child['children']) + else: + self.assertEqual(len(child), 0) + + if 'attrs' in spec_child: + for k, v in spec_child['attrs'].iteritems(): + self.assertIn(k, child.attrib) + self.assertEqual(v, child.attrib[k]) + + def test_var_1(self): + html_string = """

{{ hi }}

""" + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }] + }] + }]) + + def test_var_2(self): + html_string = """

{{ a.b }}

""" + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a.b' + }] + }] + }]) + + def test_filter_1(self): + html_string = """

{{ hi | yo }}

""" + + tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'yo' + }] + }] + }]) + + def test_filter_2(self): + html_string = """

{{ hi | yo("hi") }}

""" + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'yo', + 'children': [{ + 'tag': 'jinjaargument', + 'value': '"hi"' + }] + }] + }] + }]) + + def test_filter_3(self): + html_string = """

{{ hi | yo("hi", "mike") }}

""" + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'yo', + 'children': [{ + 'tag': 'jinjaargument', + 'value': '"hi"' + }, { + 'tag': 'jinjaargument', + 'value': '"mike"' + }] + }] + }] + }]) + + def test_jinja_block(self): + html_string = """ + {% block title %}Hi{% endblock %} + """ + + tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'jinjablock', + 'text': 'Hi' + }]) + + def test_jinja_block_named(self): + html_string = """ + {% block title %}Hi{% endblock title %} + """ + + tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'jinjablock', + 'text': 'Hi' + }]) + + def test_jinja_block_in_title(self): + html_string = """ + {% block title %}{% endblock %} + """ + + tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'title', + 'children': [{ + 'tag': 'jinjablock', + 'value': 'title' + }] + }]) + + def test_jinja_for(self): + html_string = """ + {% for a in b %} + {{ a }} + {% endfor %} + """ + + tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'jinjafor', + 'value': 'a in b', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a' + }] + }] + }]) + + def test_complete_doc(self): + html_string = """ + + + + My Webpage + + + + +

My Webpage

+ {{ a_variable }} + + + """ + + tree = self.parser.parse(html_string) + + self.assertTree(tree, [{ + 'tag': '', + 'text': 'html' + }, { + 'tag': 'html', + 'children': [{ + 'tag': 'head', + 'children': [{ + 'tag': 'title', + 'text': 'My Webpage' + }] + }, { + 'tag': 'body', + 'children': [{ + 'tag': 'ul', + 'children': [{ + 'tag': 'jinjafor', + 'value': 'item in navigation', + 'children': [{ + 'tag': 'li', + 'children': [{ + 'tag': 'a', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'item.caption' + }] + }] + }] + }] + }] + }, { + 'tag': 'h1', + 'text': 'My Webpage' + }, { + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a_variable' + }] + }] + }] + }]) + + def test_jinja_if(self): + html_string = """ + {% if True %}yay{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay' + }]) + + def test_jinja_if_else(self): + html_string = """ + {% if True %}yay{% else %}boo{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay' + }, { + 'tag': 'jinjaelse', + 'text': 'boo' + }]) + + def test_jinja_if_elif_else(self): + html_string = """ + {% if True %}yay{% elif False %}too{% else %}boo{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay' + }, { + 'tag': 'jinjaelif', + 'text': 'too' + }, { + 'tag': 'jinjaelse', + 'text': 'boo' + }]) + + def test_jinja_if_lstrip(self): + html_string = """ + {%+ if True %}yay{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay', + 'attrs': { + 'lstrip': False + } + }]) + + def test_jinja_strip_blocks(self): + html_string = """ + {% for item in seq -%} + {{ item }} + {%- endfor %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjafor', + 'attrs': { + 'rstrip': True + }, + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'item' + }] + }] + }]) + + def test_jinja_extend(self): + html_string = """ + {% extends "base.html" %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaextends', + 'value': '"base.html"' + }]) + + def test_jinja_include(self): + html_string = """ + {% include ['special_sidebar.html', 'sidebar.html'] ignore missing %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjainclude', + 'value': "['special_sidebar.html', 'sidebar.html'] ignore missing" + }]) + + def test_jinja_import(self): + html_string = """ + {% import 'forms.html' as forms %} + {% from 'forms.html' import input as input_field, textarea %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaimport', + 'value': "'forms.html' as forms" + }, { + 'tag': 'jinjaimport', + 'value': "'forms.html' import input as input_field, textarea" + }]) + + def test_inline_if(self): + html_string = """ + {{ '[%s]' % page.title if page.title }} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': "'[%s]'" + }, { + 'tag': 'jinjavariable', + 'value': "%" + }, { + 'tag': 'jinjavariable', + 'value': "page.title" + }, { + 'tag': 'jinjavariable', + 'value': "if" + }, { + 'tag': 'jinjavariable', + 'value': "page.title" + }] + }]) + + def test_comment(self): + html_string = """ + {# {{ '[%s]' % page.title if page.title }} #} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjacomment', + 'value': "{{ '[%s]' % page.title if page.title }} " + }]) + + def test_file(self): + html_string = """ +

{{ (term_price.price / term_price.term.num_cycles) | currency }}/month

+ + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'h4', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': '(term_price.price' + }, { + 'tag': 'jinjavariable', + 'value': '/' + }, { + 'tag': 'jinjavariable', + 'value': 'term_price.term.num_cycles)' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'currency' + }] + }] + }]) + + def test_embedded_block(self): + html_string = """ + Whatever + + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + #self.fail() + + #self.assertTree(tree, [{ + #'tag': 'jinjacomment', + #'value': "{{ '[%s]' % page.title if page.title }} " + #}]) + + def test_open_block(self): + html_string = """ + + + {% extends "base.html" %} + + {% block header_tag %} +
+ {% endblock %} + + {% block header_content %} + {{ super() }} + +
+
+

Get handpicked books delivered every month.

+

Praesent dapibus, neque id cursus faucibus, tortor neque egestas augue, eu vulputate magna eros eu erat. Faucibus, tortor praesent neque id dapibus.

+
+ +
+
+ + {% endblock %} + + {% block page_content %} + +
+ +
+ +
+
+
+

What is BookSea?

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec odio. Quisque volutpat tmattis eros. Nullam malesuada erat ut turpis. Suspendisse urna nibh, viverra non, semper suscipit, posuere a, pede. Donec nec justo eget felis facilisis fermentum. Aliquam porttitor mauris sit amet orci. Aenean dignissim pellentesque felis.Morbi in sem quis dui placerat ornare. Pellentesque odio nisi, euismod in, pharetra a, ultricies in, diam. Sed arcu. Cras consequat.

+

Praesent dapibus, neque id cursus faucibus, tortor neque egestas augue, eu vulputate magna eros eu erat. Aliquam erat volutpat. Nam dui mi, tincidunt quis, accumsan porttitor, facilisis luctus, metus.

+
+
+
+ + {% include "components/descriptions.html" %} + """ + tree = self.parser.parseFragment(html_string) + dump(tree) diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 20bbdf31..c90ca678 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -40,7 +40,7 @@ def test_namespace_html_elements_1_dom(self): def test_namespace_html_elements_0_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=True) doc = parser.parse("") - self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],)) + self.assertEqual(list(doc)[0].tag, "{%s}html" % (namespaces["html"],)) def test_namespace_html_elements_1_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=False) diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 09e705ff..411bd026 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -233,7 +233,7 @@ def processEntityInAttribute(self, allowedChar): """ self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) - def emitCurrentToken(self): + def emitCurrentToken(self, resetState=True): """This method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted. @@ -251,16 +251,20 @@ def emitCurrentToken(self): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "self-closing-flag-on-end-tag"}) self.tokenQueue.append(token) - self.state = self.dataState + + if resetState: + self.state = self.dataState # Below are the various tokenizer states worked out. def dataState(self): data = self.stream.char() + if data == "&": self.state = self.entityDataState elif data == "<": self.state = self.tagOpenState elif data == "{": + self.prevState = self.state self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], @@ -280,7 +284,7 @@ def dataState(self): # have already been appended to lastFourChars and will have broken # any sequences else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) + chars = self.stream.charsUntil(("&", "<", "\u0000", "{")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True @@ -305,16 +309,155 @@ def jinjaOpenState(self): self.state = self.jinjaVariableState elif data == "%": + self.state = self.jinjaStatementStartState + elif data == "#": + self.state = self.jinjaCommentStartState + else: + self.stream.unget(data) + self.stream.unget("{") + chars = self.stream.charsUntil(("&", "<", "\u0000", "{")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + + return True + + def jinjaStatementStartState(self): + data = self.stream.char() + + if data in spaceCharacters: + pass + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-jinja-statement"}) + self.state = self.prevState + else: + attrs = {} + + if data in ['-', '+']: + attrs['lstrip'] = False + + data = self.stream.char() + while data in spaceCharacters: + data = self.stream.char() + + block_type = data + self.stream.charsUntil(frozenset(("%")) | spaceCharacters) + + block_definition = self.stream.charsUntil(frozenset(("%", "\u0000"))) + + block_definition = block_definition.strip(" \t") + + if block_definition and block_definition[-1] == '-': + attrs['rstrip'] = True + block_definition = block_definition[:-1].rstrip() + + attrs.update({ + "value": block_definition, + "position": self.stream.position() + }) + + if block_type.startswith("end"): + block_type = block_type.replace("end", "") + attrs['value'] = block_type.lower() + + self.tokenQueue.append({ + "type": tokenTypes["JinjaStatementEndTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": attrs, + "selfClosing": False + }) + elif block_type == "extends": + self.tokenQueue.append({ + "type": tokenTypes["JinjaExtendTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": attrs, + "selfClosing": True + }) + elif block_type == "include": + self.tokenQueue.append({ + "type": tokenTypes["JinjaIncludeTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": attrs, + "selfClosing": True + }) + elif block_type in ["import", "from"]: + self.tokenQueue.append({ + "type": tokenTypes["JinjaImportTag"], + 'name': u"jinjaimport", + "data": attrs, + "selfClosing": True + }) + else: + self.tokenQueue.append({ + "type": tokenTypes["JinjaStatementStartTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": attrs, + "selfClosing": False + }) + + data = self.stream.char() + if data != '%': + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "no-close-of-jinja-statement"}) + data = self.stream.char() + if data != '}': + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "no-close-of-jinja-statement"}) + + self.state = self.dataState + + return True + + def jinjaCommentStartState(self): + data = self.stream.char() + + if data in spaceCharacters: + pass + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-jinja-statement"}) + self.state = self.prevState + else: + comment_text = data + self.stream.charsUntil(frozenset(("#", "\u0000"))) + next_two = self.stream.char() + + if next_two: + next_two += self.stream.char() + + if not next_two or len(next_two) < 2: + log.debug(u"Comment text {} = {}".format(comment_text, len(self.stream.chunk))) + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-jinja-comment-closing-tag-but-got-eof", + "datavars": {"data": data}}) + self.state = self.bogusCommentState + return True + + while next_two != "#}": + comment_text += self.stream.chunk + self.stream.charsUntil(frozenset(("#", "\u0000"))) + + next_two = self.stream.char() + + if next_two: + next_two += self.stream.char() + + if not next_two or len(next_two) < 2: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-jinja-comment-closing-tag-but-got-eof", + "datavars": {"data": data}}) + self.state = self.bogusCommentState + return True + self.tokenQueue.append({ - "type": tokenTypes["JinjaStatementStartTag"], - "name": "{%", "data": {}, - "namespace": None, - "selfClosing": False + "type": tokenTypes["JinjaComment"], + 'name': u"jinjacomment", + "data": { + "value": comment_text, + "position": self.stream.position() + }, + "selfClosing": True }) - self.state = self.jinjaStatementState + self.state = self.dataState - #self.state = self.dataState return True def jinjaStatementEndState(self): @@ -324,15 +467,15 @@ def jinjaStatementEndState(self): if data == "}": self.tokenQueue.append({ "type": tokenTypes["JinjaStatementEndTag"], - "name": "%}", "data": [], + "name": "jinjastatementend", "data": [], "selfClosing": False }) - self.state = self.dataState + self.state = self.prevState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-statement-closing-tag-but-got-eof", "datavars": {"data": data}}) - self.state = self.dataState + self.state = self.prevState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-statement-closing-tag-but-got-char", @@ -340,7 +483,6 @@ def jinjaStatementEndState(self): self.stream.unget(data) self.state = self.bogusCommentState - #self.state = self.dataState return True def jinjaVariableEndState(self): @@ -353,12 +495,12 @@ def jinjaVariableEndState(self): "name": u"jinjavariabletag", "data": [], "selfClosing": False }) - self.state = self.dataState + self.state = self.prevState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-variable-closing-tag-but-got-eof", "datavars": {"data": data}}) - self.state = self.dataState + self.state = self.prevState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-variable-closing-tag-but-got-char", @@ -366,7 +508,6 @@ def jinjaVariableEndState(self): self.stream.unget(data) self.state = self.bogusCommentState - #self.state = self.dataState return True def jinjaStatementState(self): @@ -377,11 +518,18 @@ def jinjaStatementState(self): elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-jinja-statement"}) - self.state = self.dataState + self.state = self.prevState else: chars = self.stream.charsUntil(("%", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["JinjaStatementTag"], "data": - data + chars}) + self.tokenQueue.append({ + "type": tokenTypes["JinjaStatement"], + 'name': "jinjastatement", + "data": { + "value": data + chars, + "position": self.stream.position() + }, + "selfClosing": False + }) return True @@ -390,12 +538,21 @@ def jinjaVariableState(self): if data == "}": self.state = self.jinjaVariableEndState - #elif data == "(": - #self.state = self.jinjaArgState + elif data == "(" and self.currentToken['type'] in [tokenTypes["JinjaVariable"], tokenTypes["JinjaFilter"]]: + self.currentToken = { + "type": tokenTypes["JinjaArgumentStartTag"], + "name": u"jinjaargumentstarttag", "data": {}, + "namespace": None, + "selfClosing": False + } + + self.tokenQueue.append(self.currentToken) + + self.state = self.jinjaArgState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-jinja-variable"}) - self.state = self.dataState + self.state = self.prevState elif data in spaceCharacters: # Skip spaces pass @@ -407,8 +564,9 @@ def jinjaVariableState(self): }} self.tokenQueue.append(self.currentToken) # If this is the first token after the variable start tag - elif self.currentToken['type'] == tokenTypes["JinjaVariableStartTag"]: - #log.debug(u"Got start tag {}".format(("|", "}", "\u0000") | spaceCharacters)) + elif self.currentToken['type'] == tokenTypes["JinjaVariableStartTag"]\ + or self.currentToken['type'] == tokenTypes["JinjaVariable"]: + #log.debug(u"Got start tag {}".format(("|", "}", "\u0000") | spaceCharacters)) chars = self.stream.charsUntil(frozenset(("(", "|", "}", "\u0000")) | spaceCharacters) self.currentToken = {"type": tokenTypes["JinjaVariable"], @@ -433,12 +591,44 @@ def jinjaVariableState(self): return True + def jinjaArgState(self): + data = self.stream.char() + + if data == ")": + self.tokenQueue.append({ + "type": tokenTypes["JinjaArgumentEndTag"], + "name": u"jinjaargumentendtag", "data": [], + "selfClosing": False + }) + self.state = self.jinjaVariableState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-jinja-argument"}) + self.state = self.prevState + elif data in spaceCharacters or data in [',']: + # Skip spaces + pass + else: + chars = self.stream.charsUntil(frozenset((",", ")"))) + + self.currentToken = {"type": tokenTypes["JinjaArgument"], + "name": "jinjaargument", "selfClosing": True, "data": { + "value": data + chars, + "position": self.stream.position(), + }} + self.tokenQueue.append(self.currentToken) + + return True + def rcdataState(self): data = self.stream.char() if data == "&": self.state = self.characterReferenceInRcdata elif data == "<": self.state = self.rcdataLessThanSignState + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == EOF: # Tokenization ends. return False @@ -471,6 +661,9 @@ def rawtextState(self): data = self.stream.char() if data == "<": self.state = self.rawtextLessThanSignState + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) @@ -489,6 +682,9 @@ def scriptDataState(self): data = self.stream.char() if data == "<": self.state = self.scriptDataLessThanSignState + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) @@ -508,6 +704,9 @@ def plaintextState(self): if data == EOF: # Tokenization ends. return False + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 03d51275..5d68fcd8 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -63,7 +63,6 @@ def _getAttributes(self): return self._element.attrib def _setAttributes(self, attributes): - log.debug(u"Attributes {}".format(attributes)) # Delete existing attributes first # XXX - there may be a better way to do this... for key in list(self._element.attrib.keys()):