10000 Jinja by aelaguiz · Pull Request #1 · cratejoy/html5lib-python · GitHub
[go: up one dir, main page]

Skip to content

Jinja #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Parsing works a lot better
  • Loading branch information
aelaguiz committed Dec 26, 2014
commit 24ecb5bc0ba5269b071060fa79b6468d0c9e1002
2 changes: 1 addition & 1 deletion html5lib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3088,7 +3088,7 @@
"ParseError": 7,
"JinjaStatementStartTag": 8,
"JinjaStatementEndTag": 9,
"JinjaStatementTag": 10,
"JinjaStatement": 10,
"JinjaVariableStartTag": 11,
"JinjaVariableEndTag": 12,
"JinjaVariable": 13,
Expand Down
47 changes: 43 additions & 4 deletions html5lib/html5parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def mainLoop(self):
ParseErrorToken = tokenTypes["ParseError"]
JinjaStatementStartTag = tokenTypes["JinjaStatementStartTag"]
JinjaStatementEndTag = tokenTypes["JinjaStatementEndTag"]
JinjaStatementTag = tokenTypes["JinjaStatementTag"]
JinjaStatement = tokenTypes["JinjaStatement"]
JinjaVariableStartTag = tokenTypes["JinjaVariableStartTag"]
JinjaVariableEndTag = tokenTypes["JinjaVariableEndTag"]
JinjaVariable = tokenTypes["JinjaVariable"]
Expand All @@ -195,6 +195,9 @@ def mainLoop(self):
if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe):
log.debug(u"Type is a jinja variable tag")
phase = self.phases["inJinjaVariable"]
elif type in (JinjaStatementStartTag, JinjaStatementEndTag, JinjaStatement):
log.debug(u"Type is a jinja statement tag")
phase = self.phases["inJinjaStatement"]
elif type in (JinjaArgumentStartTag, JinjaArgumentEndTag, JinjaArgument):
log.debug(u"Type is a jinja argument tag")
phase = self.phases["inJinjaArgument"]
Expand Down Expand Up @@ -230,8 +233,8 @@ def mainLoop(self):
new_token = phase.processJinjaStatementStartTag(new_token)
elif type == JinjaStatementEndTag:
new_token = phase.processJinjaStatementEndTag(new_token)
elif type == JinjaStatementTag:
new_token = phase.processJinjaStatementTag(new_token)
elif type == JinjaStatement:
new_token = phase.processJinjaStatement(new_token)
elif type == JinjaVariableStartTag:
new_token = phase.processJinjaVariableStartTag(new_token)
elif type == JinjaVariableEndTag:
Expand Down Expand Up @@ -518,6 +521,8 @@ def processDoctype(self, token):
self.parser.parseError("unexpected-doctype")

def processCharacters(self, token):
log = logging.getLogger(u"html5lib")
log.debug(u"Inserting text {}.format(token)")
self.tree.insertText(token["data"])

def processSpaceCharacters(self, token):
Expand All @@ -529,7 +534,7 @@ def processJinjaStatementStartTag(self, token):
def processJinjaStatementEndTag(self, token):
pass

def processJinjaStatementTag(self, token):
def processJinjaStatement(self, token):
pass

def processJinjaVariableStartTag(self, token):
Expand Down Expand Up @@ -610,6 +615,36 @@ def processJinjaFilter(self, token):
element = self.tree.createElementWithoutNamespace(token)
self.tree.openElements[-1].appendChild(element)

class InJinjaStatementPhase(Phase):
def processJinjaStatementStartTag(self, token):
log = logging.getLogger('html5lib')
log.debug(u"InJinjaStatement: Start Tag")
self.tree.reconstructActiveFormattingElements()
self.tree.insertElement(token)

def processJinjaStatementEndTag(self, token):
log = logging.getLogger('html5lib')
log.debug(u"InJinjaStatement: End Tag {}".format(token["name"]))
for node in self.tree.openElements[::-1]:
log.debug(u"InJinjaStatement: Open tag {} token {}".format(node, token))
if node.name == token["name"]:
self.tree.generateImpliedEndTags(exclude=token["name"])
log.debug(u"InJinjaStatement: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"]))
if self.tree.openElements[-1].name != token["name"]:
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
while self.tree.openElements.pop() != node:
pass
break
else:
if node.nameTuple in specialElements:
log.debug(u"InJinjaStatement Nametuple {} in {}".format(node.nameTuple, specialElements))
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
break

def processJinjaStatement(self, token):
element = self.tree.createElementWithoutNamespace(token)
self.tree.openElements[-1].appendChild(element)

class InJinjaArgumentPhase(Phase):
def processJinjaArgumentStartTag(self, token):
log = logging.getLogger('html5lib')
Expand Down Expand Up @@ -1136,6 +1171,9 @@ def processSpaceCharactersDropNewline(self, token):
self.tree.insertText(data)

def processCharacters(self, token):
import logging
log = logging.getLogger(u"html5lib")
log.debug(u"In Body phase processing Characters {}".format(token))
if token["data"] == "\u0000":
# The tokenizer should always emit null on its own
return
Expand Down Expand Up @@ -2831,6 +2869,7 @@ def processEndTag(self, token):
# XXX "inHeadNoscript": InHeadNoScriptPhase,
"afterHead": AfterHeadPhase,
"inJinjaVariable": InJinjaVariablePhase,
"inJinjaStatement": InJinjaStatementPhase,
"inJinjaArgument": InJinjaArgumentPhase,
"inBody": InBodyPhase,
"text": TextPhase,
Expand Down
119 changes: 105 additions & 14 deletions html5lib/tests/test_jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,48 @@


def dump(tree, tabs=0):
log.debug(u"{}Tag '{}' - {} children - Value = {}".format(
"".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None))
log.debug(u"{}Tag '{}' - {} children - Value = {} - Text = {}".format(
"".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None, tree.text))

for child in tree:
dump(child, tabs + 1)


class JinjaTestCase(unittest.TestCase):
def test_var_1(self):
parser = html5lib.HTMLParser(strict=False)
def setUp(self):
self.parser = html5lib.HTMLParser(strict=False, namespaceHTMLElements=False)

def test_var_1(self):
html_string = """<h1>{{ hi }}</h1>"""

tree = parser.parseFragment(html_string)
tree = self.parser.parseFragment(html_string)

h1 = tree[0]
self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1")
jt = h1[0]
var1 = jt[0]
self.assertEqual(h1.tag, "h1")
self.assertEqual(var1.tag, 'jinjavariable')
self.assertEqual(var1.attrib['value'], 'hi')

def test_filter_1(self):
parser = html5lib.HTMLParser(strict=False)
def test_var_2(self):
html_string = """<h1>{{ a.b }}</h1>"""

tree = self.parser.parseFragment(html_string)

h1 = tree[0]
jt = h1[0]
var1 = jt[0]
self.assertEqual(h1.tag, "h1")
self.assertEqual(var1.tag, 'jinjavariable')
self.assertEqual(var1.attrib['value'], 'a.b')

def test_filter_1(self):
html_string = """<h1>{{ hi | yo }}</h1>"""

tree = parser.parseFragment(html_string)
tree = self.parser.parseFragment(html_string)

h1 = tree[0]
self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1")
self.assertEqual(h1.tag, "h1")

jt = h1[0]

Expand All @@ -48,15 +63,40 @@ def test_filter_1(self):
self.assertEqual(yo.attrib['value'], 'yo')

def test_filter_2(self):
parser = html5lib.HTMLParser(strict=False)

html_string = """<h1>{{ hi | yo("hi") }}</h1>"""

tree = parser.parseFragment(html_string)
tree = self.parser.parseFragment(html_string)
dump(tree)

h1 = tree[0]
self.assertEqual(h1.tag, "h1")

jt = h1[0]

hi = jt[0]
pipe1 = jt[1]
yo = jt[2]

self.assertEqual(hi.tag, 'jinjavariable')
self.assertEqual(hi.attrib['value'], 'hi')
self.assertEqual(pipe1.tag, 'jinjapipe')
self.assertEqual(pipe1.attrib['value'], '|')
self.assertEqual(yo.tag, 'jinjafilter')
self.assertEqual(yo.attrib['value'], 'yo')

arg1 = yo[0]

self.assertEqual(arg1.tag, 'jinjaargument')
self.assertEqual(arg1.attrib['value'], '"hi"')

def test_filter_3(self):
html_string = """<h1>{{ hi | yo("hi", "mike") }}</h1>"""

tree = self.parser.parseFragment(html_string)
dump(tree)

h1 = tree[0]
self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1")
self.assertEqual(h1.tag, "h1")

jt = h1[0]

Expand All @@ -72,6 +112,57 @@ def test_filter_2(self):
self.assertEqual(yo.attrib['value'], 'yo')

arg1 = yo[0]
arg2 = yo[1]

self.assertEqual(arg1.tag, 'jinjaargument')
self.assertEqual(arg1.attrib['value'], '"hi"')
self.assertEqual(arg2.tag, 'jinjaargument')
self.assertEqual(arg2.attrib['value'], '"mike"')

def test_jinja_block(self):
html_string = """
{% block title %}Hi{% endblock %}
"""

tree = self.parser.parseFragment(html_string)
dump(tree)

block = tree[0]

self.assertEqual(block.tag, 'jinjablock')
self.assertEqual(block.text, 'Hi')

def test_jinja_block_in_title(self):
html_string = """
<title>{% block title %}{% endblock %}</title>
"""

tree = self.parser.parseFragment(html_string)
dump(tree)

title = tree[0]
block = title[0]

self.assertEqual(title.tag, 'title')
self.assertEqual(block.tag, 'jinjablock')
self.assertEqual(block.attrib['value'], 'title')

def test_jinja_for(self):
html_string = """
{% for a in b %}
{{ a }}
{% endfor %}
"""

tree = self.parser.parseFragment(html_string)
dump(tree)

block = tree[0]
var = block[0]
var1 = var[0]

self.assertEqual(block.tag, 'jinjafor')
self.assertEqual(block.attrib['value'], 'a in b')
self.assertEqual(var.tag, 'jinjavariabletag')
self.assertEqual(var1.tag, 'jinjavariable')
self.assertEqual(var1.attrib['value'], 'a')
Loading
0