8000 Added a filters submodule with a "lint" filter · awesome-python/html5lib-python@ef0112c · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit ef0112c

Browse files
committed
Added a filters submodule with a "lint" filter
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40684
1 parent d142228 commit ef0112c

File tree

4 files changed

+99
-1
lines changed

4 files changed

+99
-1
lines changed

src/filters/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
import os.path
3+
__path__.append(os.path.dirname(__path__[0]))

src/filters/_base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
class Filter(object):
3+
def __init__(self, source):
4+
self.source = source
5+
6+
def __iter__(self):
7+
return iter(self.source)
8+
9+
def __getattr__(self, name):
10+
return getattr(self.source, name)

src/filters/lint.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from gettext import gettext
2+
_ = gettext
3+
4+
import _base
5+
from constants import cdataElements, rcdataElements, voidElements
6+
7+
from constants import spaceCharacters
8+
spaceCharacters = u"".join(spaceCharacters)
9+
10+
class LintError(Exception): pass
11+
12+
class Filter(_base.Filter):
13+
def __iter__(self):
14+
open_elements = []
15+
contentModelFlag = "PCDATA"
16+
for token in _base.Filter.__iter__(self):
17+
type = token["type"]
18+
if type in ("StartTag", "EmptyTag"):
19+
name = token["name"]
20+
if not isinstance(name, basestring):
21+
raise LintError(_(u"Tag name is not a string: %r") % name)
22+
if not name:
23+
raise LintError(_(u"Empty tag name"))
24+
if type == "StartTag" and name in voidElements:
25+
raise LintError(_(u"Void element reported as StartTag token: %s") % name)
26+
elif type == "EmptyTag" and name not in voidElements:
27+
raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"])
28+
for name, value in token["data"]:
29+
if not isinstance(name, basestring):
30+
raise LintError(_("Attribute name is not a string: %r") % name)
31+
if not name:
32+
raise LintError(_(u"Empty attribute name"))
33+
if not isinstance(value, basestring):
34+
raise LintError(_("Attribute value is not a string: %r") % value)
35+
open_elements.append(name)
36+
if name in cdataElements:
37+
contentModelFlag = "CDATA"
38+
elif name in rcdataElements:
39+
contentModelFlag = "RCDATA"
40+
elif name == "textarea":
41+
contentModelFlag = "PLAINTEXT"
42+
43+
elif type == "EndTag":
44+
name = token["name"]
45+
if not isinstance(name, basestring):
46+
raise LintError(_(u"Tag name is not a string: %r") % name)
47+
if not name:
48+
raise LintError(_(u"Empty tag name"))
49+
if name in voidElements:
50+
raise LintError(_(u"Void element reported as EndTag token: %s") % name)
51+
if open_elements.pop() != name:
52+
raise LintError(_(u"EndTag does not match StartTag: %s") % name)
53+
contentModelFlag = "PCDATA"
54+
55+
elif type == "Comment":
56+
pass
57+
# XXX: This make tests fail
58+
# if token["data"].find("--") >= 0:
59+
# raise LintError(_(u"Comment contains double-dash"))
60+
61+
elif type in ("Characters", "SpaceCharacters"):
62+
data = token["data"]
63+
if not isinstance(data, basestring):
64+
raise LintError(_("Attribute name is not a string: %r") % data)
65+
if not data:
66+
raise LintError(_(u"%s token with empty data") % type)
67+
if type == "SpaceCharacters":
68+
data = data.strip(spaceCharacters)
69+
if data:
70+
raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data)
71+
72+
elif type == "Doctype":
73+
name = token["name"]
74+
if not isinstance(name, basestring):
75+
raise LintError(_(u"Tag name is not a string: %r") % name)
76+
if not name:
77+
raise LintError(_(u"Empty tag name"))
78+
# XXX: what to do with token["data"] ?
79+
80+
else:
81+
raise LintError(_(u"Unknown token type: %s") % type)
82+
83+
yield token

tests/test_treewalkers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020
import treewalkers
2121
import treebuilders
22+
from filters.lint import Filter as LintFilter
2223
#END RELEASE
2324

2425
#RELEASE add
2526
#import html5lib
2627
#from html5lib import html5parser, serializer, treewalkers, treebuilders
28+
#from html5lib.filters.lint import Filter as LintFilter
2729
#END RELEASE
2830

2931
def PullDOMAdapter(node):
@@ -220,7 +222,7 @@ def runTest(self, innerHTML, input, expected, errors, treeClass):
220222
document = p.parse(StringIO.StringIO(input))
221223
document = treeClass.get("adapter", lambda x: x)(document)
222224
try:
223-
output = convertTokens(treeClass["walker"](document))
225+
output = convertTokens(LintFilter(treeClass["walker"](document)))
224226
output = attrlist.sub(sortattrs, output)
225227
expected = attrlist.sub(sortattrs, expected)
226228
errorMsg = "\n".join(["\n\nExpected:", expected,

0 commit comments

Comments
 (0)
0