8000 Added in jinja parsing · cratejoy/html5lib-python@25555a8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 25555a8

Browse files
committed
Added in jinja parsing
1 parent f5fd711 commit 25555a8

File tree

3 files changed

+139
-1
lines changed

3 files changed

+139
-1
lines changed

html5lib/constants.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3085,7 +3085,13 @@
30853085
"EndTag": 4,
30863086
"EmptyTag": 5,
30873087
"Comment": 6,
3088-
"ParseError": 7
3088+
"ParseError": 7,
3089+
"JinjaStatementStartTag": 8,
3090+
"JinjaStatementEndTag": 9,
3091+
"JinjaStatementTag": 10,
3092+
"JinjaVariableStartTag": 11,
3093+
"JinjaVariableEndTag": 12,
3094+
"JinjaVariableTag": 13
30893095
}
30903096

30913097
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],

html5lib/html5parser.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import absolute_import, division, unicode_literals
22
from six import with_metaclass
33

4+
import logging
5+
46
import types
57

68
from . import inputstream
@@ -20,6 +22,9 @@
2022
from .constants import adjustForeignAttributes as adjustForeignAttributesMap
2123

2224

25+
log = logging.getLogger(u"html5lib")
26+
27+
2328
def parse(doc, treebuilder="etree", encoding=None,
2429
namespaceHTMLElements=True):
2530
"""Parse a string or file-like object into a tree"""
@@ -161,6 +166,12 @@ def mainLoop(self):
161166
CommentToken = tokenTypes["Comment"]
162167
DoctypeToken = tokenTypes["Doctype"]
163168
ParseErrorToken = tokenTypes["ParseError"]
169+
JinjaStatementStartTag = tokenTypes["JinjaStatementStartTag"]
170+
JinjaStatementEndTag = tokenTypes["JinjaStatementEndTag"]
171+
JinjaStatementTag = tokenTypes["JinjaStatementTag"]
172+
JinjaVariableStartTag = tokenTypes["JinjaVariableStartTag"]
173+
JinjaVariableEndTag = tokenTypes["JinjaVariableEndTag"]
174+
JinjaVariableTag = tokenTypes["JinjaVariableTag"]
164175

165176
for token in self.normalizedTokens():
166177
new_token = token
@@ -202,6 +213,18 @@ def mainLoop(self):
202213
new_token = phase.processComment(new_token)
203214
elif type == DoctypeToken:
204215
new_token = phase.processDoctype(new_token)
216+
elif type == JinjaStatementStartTag:
217+
new_token = phase.processJinjaStatementStartTag(new_token)
218+
elif type == JinjaStatementEndTag:
219+
new_token = phase.processJinjaStatementEndTag(new_token)
220+
elif type == JinjaStatementTag:
221+
new_token = phase.processJinjaStatementTag(new_token)
222+
elif type == JinjaVariableStartTag:
223+
new_token = phase.processJinjaVariableStartTag(new_token)
224+
elif type == JinjaVariableEndTag:
225+
new_token = phase.processJinjaVariableEndTag(new_token)
226+
elif type == JinjaVariableTag:
227+
new_token = phase.processJinjaVariableTag(new_token)
205228

206229
if (type == StartTagToken and token["selfClosing"]
207230
and not token["selfClosingAcknowledged"]):
@@ -475,6 +498,24 @@ def processCharacters(self, token):
475498
def processSpaceCharacters(self, token):
476499
self.tree.insertText(token["data"])
477500

501+
def processJinjaStatementStartTag(self, token):
502+
pass
503+
504+
def processJinjaStatementEndTag(self, token):
505+
pass
506+
507+
def processJinjaStatementTag(self, token):
508+
pass
509+
510+
def processJinjaVariableStartTag(self, token):
511+
pass
512+
513+
def processJinjaVariableEndTag(self, token):
514+
pass
515+
516+
def processJinjaVariableTag(self, token):
517+
pass
518+
478519
def processStartTag(self, token):
479520
return self.startTagHandler[token["name"]](token)
480521

html5lib/tokenizer.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
except NameError:
66
pass
77

8+
9+
import sys
10+
import logging
11+
812
from collections import deque
913

1014
from .constants import spaceCharacters
@@ -20,6 +24,8 @@
2024

2125
entitiesTrie = Trie(entities)
2226

27+
log = logging.getLogger(u"html5lib")
28+
2329

2430
class HTMLTokenizer(object):
2531
""" This class takes care of tokenizing HTML.
@@ -254,6 +260,8 @@ def dataState(self):
254260
self.state = self.entityDataState
255261
elif data == "<":
256262
self.state = self.tagOpenState
263+
elif data == "{":
264+
self.state = self.jinjaOpenState
257265
elif data == "\u0000":
258266
self.tokenQueue.append({"type": tokenTypes["ParseError"],
259267
"data": "invalid-codepoint"})
@@ -282,6 +290,89 @@ def entityDataState(self):
282290
self.state = self.dataState
283291
return True
284292

293+
def jinjaOpenState(self):
294+
data = self.stream.char()
295+
296+
if data == "{":
297+
self.tokenQueue.append({
298+
"type": tokenTypes["JinjaVariableStartTag"],
299+
"name": "{{", "data": [],
300+
"selfClosing": False
301+
})
302+
303+
self.state = self.jinjaVariableState
304+
elif data == "%":
305+
self.tokenQueue.append({
306+
"type": tokenTypes["JinjaStatementStartTag"],
307+
"name": "{%", "data": [],
308+
"selfClosing": False
309+
})
310+
311+
self.state = self.jinjaStatementState
312+
313+
#self.state = self.dataState
314+
return True
315+
316+
def jinjaStatementEndState(self):
317+
# We got a {
318+
data = self.stream.char()
319+
320+
if data == "}":
321+
self.tokenQueue.append({
322+
"type": tokenTypes["JinjaStatementEndTag"],
323+
"name": "%}", "data": [],
324+
"selfClosing": False
325+
})
326+
self.state = self.dataState
327+
328+
#self.state = self.dataState
329+
return True
330+
331+
def jinjaVariableEndState(self):
332+
# We got a {
333+
data = self.stream.char()
334+
335+
if data == "}":
336+
self.tokenQueue.append({
337+
"type": tokenTypes["JinjaVariableEndTag"],
338+
"name": "}}", "data": [],
339+
"selfClosing": False
340+
})
341+
self.state = self.dataState
342+
343+
#self.state = self.dataState
344+
return True
345+
346+
def jinjaStatementState(self):
347+
data = self.stream.char()
348+
349+
if data == "%":
350+
self.state = self.jinjaStatementEndState
351+
elif data is EOF:
352+
# Tokenization ends.
353+
return False
354+
else:
355+
chars = self.stream.charsUntil(("%", "\u0000"))
356+
self.tokenQueue.append({"type": tokenTypes["JinjaStatementTag"], "data":
357+
data + chars})
358+
359+
return True
360+
361+
def jinjaVariableState(self):
362+
data = self.stream.char()
363+
364+
if data == "}":
365+
self.state = self.jinjaVariableEndState
366+
elif data is EOF:
367+
# Tokenization ends.
368+
return False
369+
else:
370+
chars = self.stream.charsUntil(("}", "\u0000"))
371+
self.tokenQueue.append({"type": tokenTypes["JinjaVariableTag"], "data":
372+
data + chars})
373+
374+
return True
375+
285376
def rcdataState(self):
286377
data = self.stream.char()
287378
if data == "&":

0 commit comments

Comments
 (0)
0