8000 WIP - Jinja parsing · cratejoy/html5lib-python@86c6f80 · GitHub
[go: up one dir, main page]

Skip to content

Commit 86c6f80

Browse files
committed
WIP - Jinja parsing
1 parent f756cab commit 86c6f80

File tree

5 files changed

+172
-9
lines changed

5 files changed

+172
-9
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ stats.prof
1818

1919
# We have no interest in built Sphinx files
2020
/doc/_build
21+
22+
venv

html5lib/constants.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3093,7 +3093,10 @@
30933093
"JinjaVariableEndTag": 12,
30943094
"JinjaVariable": 13,
30953095
"JinjaFilter": 14,
3096-
"JinjaPipe": 15
3096+
"JinjaPipe": 15,
3097+
"JinjaArgumentStartTag": 16,
3098+
"JinjaArgumentEndTag": 17,
3099+
"JinjaArgument": 18
30973100
}
30983101

30993102
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],

html5lib/html5parser.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ def mainLoop(self):
174174
JinjaVariable = tokenTypes["JinjaVariable"]
175175
JinjaPipe = tokenTypes["JinjaPipe"]
176176
JinjaFilter = tokenTypes["JinjaFilter"]
177+
JinjaArgumentStartTag = tokenTypes["JinjaArgumentStartTag"]
178+
JinjaArgumentEndTag = tokenTypes["JinjaArgumentEndTag"]
179+
JinjaArgument = tokenTypes["JinjaArgument"]
177180

178181
for token in self.normalizedTokens():
179182
new_token = token
@@ -190,8 +193,11 @@ def mainLoop(self):
190193
new_token = None
191194
else:
192195
if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe):
193-
log.debug(u"Type is a jinja tag")
196+
log.debug(u"Type is a jinja variable tag")
194197
phase = self.phases["inJinjaVariable"]
198+
elif type in (JinjaArgumentStartTag, JinjaArgumentEndTag, JinjaArgument):
199+
log.debug(u"Type is a jinja argument tag")
200+
phase = self.phases["inJinjaArgument"]
195201
elif (
196202
len(self.tree.openElements) == 0 or
197203
currentNodeNamespace == self.tree.defaultNamespace or
@@ -236,6 +242,12 @@ def mainLoop(self):
236242
new_token = phase.processJinjaPipe(new_token)
237243
elif type == JinjaFilter:
238244
new_token = phase.processJinjaFilter(new_token)
245+
elif type == JinjaArgumentStartTag:
246+
new_token = phase.processJinjaArgumentStartTag(new_token)
247+
elif type == JinjaArgumentEndTag:
248+
new_token = phase.processJinjaArgumentEndTag(new_token)
249+
elif type == JinjaArgument:
250+
new_token = phase.processJinjaArgument(new_token)
239251

240252
if (type == StartTagToken and token["selfClosing"]
241253
and not token["selfClosingAcknowledged"]):
@@ -529,6 +541,15 @@ def processJinjaVariableEndTag(self, token):
529541
def processJinjaVariable(self, token):
530542
pass
531543

544+
def processJinjaArgumentStartTag(self, token):
545+
pass
546+
547+
def processJinjaArgumentEndTag(self, token):
548+
pass
549+
550+
def processJinjaArgument(self, token):
551+
pass
552+
532553
def processJinjaPipe(self, token):
533554
pass
534555

@@ -554,26 +575,26 @@ def processEndTag(self, token):
554575
class InJinjaVariablePhase(Phase):
555576
def processJinjaVariableStartTag(self, token):
556577
log = logging.getLogger('html5lib')
557-
log.debug(u"InJinja: Start Tag")
578+
log.debug(u"InJinjaVariable: Start Tag")
558579
self.tree.reconstructActiveFormattingElements()
559580
self.tree.insertElement(token)
560581

561582
def processJinjaVariableEndTag(self, token):
562583
log = logging.getLogger('html5lib')
563-
log.debug(u"InJinja: End Tag {}".format(token["name"]))
584+
log.debug(u"InJinjaVariable: End Tag {}".format(token["name"]))
564585
for node in self.tree.openElements[::-1]:
565-
log.debug(u"InJinja: Open tag {} token {}".format(node, token))
586+
log.debug(u"InJinjaVariable: Open tag {} token {}".format(node, token))
566587
if node.name == token["name"]:
567588
self.tree.generateImpliedEndTags(exclude=token["name"])
568-
log.debug(u"InJinja: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"]))
589+
log.debug(u"InJinjaVariable: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"]))
569590
if self.tree.openElements[-1].name != token["name"]:
570591
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
571592
while self.tree.openElements.pop() != node:
572593
pass
573594
break
574595
else:
575596
if node.nameTuple in specialElements:
576-
log.debug(u"Nametuple {} in {}".format(node.nameTuple, specialElements))
597+
log.debug(u"InJinjaVariable Nametuple {} in {}".format(node.nameTuple, specialElements))
577598
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
578599
break
579600

@@ -589,6 +610,22 @@ def processJinjaFilter(self, token):
589610
element = self.tree.createElementWithoutNamespace(token)
590611
self.tree.openElements[-1].appendChild(element)
591612

613+
class InJinjaArgumentPhase(Phase):
614+
def processJinjaArgumentStartTag(self, token):
615+
log = logging.getLogger('html5lib')
616+
log.debug(u"InJinjaArgument: Start Tag")
617+
618+
def processJinjaArgumentEndTag(self, token):
619+
log = logging.getLogger('html5lib')
620+
log.debug(u"InJinjaArgument: End Tag {}".format(token["name"]))
621+
622+
def processJinjaArgument(self, token):
623+
log = logging.getLogger('html5lib')
624+
log.debug(u"InJinjaArgument: Process Jinja Argument {}".format(token["name"]))
625+
626+
element = self.tree.createElementWithoutNamespace(token)
627+
self.tree.openElements[-1].childNodes[-1].appendChild(element)
628+
592629
class InitialPhase(Phase):
593630
def processSpaceCharacters(self, token):
594631
pass
@@ -2794,6 +2831,7 @@ def processEndTag(self, token):
27942831
# XXX "inHeadNoscript": InHeadNoScriptPhase,
27952832
"afterHead": AfterHeadPhase,
27962833
"inJinjaVariable": InJinjaVariablePhase,
2834+
"inJinjaArgument": InJinjaArgumentPhase,
27972835
"inBody": InBodyPhase,
27982836
"text": TextPhase,
27992837
"inTable": InTablePhase,

html5lib/tests/test_jinja.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import html5lib
2+
import unittest
3+
import logging
4+
5+
log = logging.getLogger(__name__)
6+
7+
8+
def dump(tree, tabs=0):
9+
log.debug(u"{}Tag '{}' - {} children - Value = {}".format(
10+
"".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None))
11+
12+
for child in tree:
13+
dump(child, tabs + 1)
14+
15+
16+
class JinjaTestCase(unittest.TestCase):
17+
def test_var_1(self):
18+
parser = html5lib.HTMLParser(strict=False)
19+
20+
html_string = """<h1>{{ hi }}</h1>"""
21+
22+
tree = parser.parseFragment(html_string)
23+
24+
h1 = tree[0]
25+
self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1")
26+
27+
def test_filter_1(self):
28+
parser = html5lib.HTMLParser(strict=False)
29+
30+
html_string = """<h1>{{ hi | yo }}</h1>"""
31+
32+
tree = parser.parseFragment(html_string)
33+
34+
h1 = tree[0]
35+
self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1")
36+
37+
jt = h1[0]
38+
39+
hi = jt[0]
40+
pipe1 = jt[1]
41+
yo = jt[2]
42+
43+
self.assertEqual(hi.tag, 'jinjavariable')
44+
self.assertEqual(hi.attrib['value'], 'hi')
45+
self.assertEqual(pipe1.tag, 'jinjapipe')
46+
self.assertEqual(pipe1.attrib['value'], '|')
47+
self.assertEqual(yo.tag, 'jinjafilter')
48+
self.assertEqual(yo.attrib['value'], 'yo')
49+
50+
def test_filter_2(self):
51+
parser = html5lib.HTMLParser(strict=False)
52+
53+
html_string = """<h1>{{ hi | yo("hi") }}</h1>"""
54+
55+
tree = parser.parseFragment(html_string)
56+
dump(tree)
57+
58+
h1 = tree[0]
59+
self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1")
60+
61+
jt = h1[0]
62+
63+
hi = jt[0]
64+
pipe1 = jt[1]
65+
yo = jt[2]
66+
67+
self.assertEqual(hi.tag, 'jinjavariable')
68+
self.assertEqual(hi.attrib['value'], 'hi')
69+
self.assertEqual(pipe1.tag, 'jinjapipe')
70+
self.assertEqual(pipe1.attrib['value'], '|')
71+
self.assertEqual(yo.tag, 'jinjafilter')
72+
self.assertEqual(yo.attrib['value'], 'yo')
73+
74+
arg1 = yo[0]
75+
76+
self.assertEqual(arg1.tag, 'jinjaargument')
77+
self.assertEqual(arg1.attrib['value'], '"hi"')

html5lib/tokenizer.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,17 @@ def jinjaVariableState(self):
390390

391391
if data == "}":
392392
self.state = self.jinjaVariableEndState
393-
#elif data == "(":
394-
#self.state = self.jinjaArgState
393+
elif data == "(":
394+
self.currentToken = {
395+
"type": tokenTypes["JinjaArgumentStartTag"],
396+
"name": u"jinjaargumentstarttag", "data": {},
397+
"namespace": None,
398+
"selfClosing": False
399+
}
400+
401+
self.tokenQueue.append(self.currentToken)
402+
403+
self.state = self.jinjaArgState
395404
elif data is EOF:
396405
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
397406
"eof-in-jinja-variable"})
@@ -433,6 +442,40 @@ def jinjaVariableState(self):
433442

434443
return True
435444

445+
def jinjaArgState(self):
446+
data = self.stream.char()
447+
448+
log.debug(u"Arg {}".format(data))
449+
print "Got data", data
450+
451+
if data == ")":
452+
self.tokenQueue.append({
453+
"type": tokenTypes["JinjaArgumentEndTag"],
454+
"name": u"jinjaargumentendtag", "data": [],
455+
"selfClosing": False
456+
})
457+
self.state = self.jinjaVariableState
458+
elif data is EOF:
459+
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
460+
"eof-in-jinja-argument"})
461+
self.state = self.dataState
462+
elif data in spaceCharacters:
463+
# Skip spaces
464+
pass
465+
else:
466+
chars = self.stream.charsUntil(frozenset((",", ")")))
467+
468+
print "Got chars", chars
469+
470+
self.currentToken = {"type": tokenTypes["JinjaArgument"],
471+
"name": "jinjaargument", "selfClosing": True, "data": {
472+
"value": data + chars,
473+
"position": self.stream.position(),
474+
}}
475+
self.tokenQueue.append(self.currentToken)
476+
477+
return True
478+
436479
def rcdataState(self):
437480
data = self.stream.char()
438481
if data == "&":

0 commit comments

Comments
 (0)
0