8000 The Lint Filter ensures that all data (tag and attribute names, attri… · awesome-python/html5lib-python@3ce5bd2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3ce5bd2

Browse files
committed
The Lint Filter ensures that all data (tag and attribute names, attribute values, comment data, text and doctype names) are unicode() strings.
_base.TreeWalker ensures only unicode() strings are emitted. This is probably not the best way but it works and is better than having tokens with non-unicode strings live on token streams. --HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40687
1 parent 77303eb commit 3ce5bd2

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

src/filters/lint.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __iter__(self):
1919
name = token["name"]
2020
if contentModelFlag != "PCDATA":
2121
raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
22-
if not isinstance(name, basestring):
22+
if not isinstance(name, unicode):
2323
raise LintError(_(u"Tag name is not a string: %r") % name)
2424
if not name:
2525
raise LintError(_(u"Empty tag name"))
@@ -30,11 +30,11 @@ def __iter__(self):
3030
if type == "StartTag":
3131
open_elements.append(name)
3232
for name, value in token["data"]:
33-
if not isinstance(name, basestring):
33+
if not isinstance(name, unicode):
3434
raise LintError(_("Attribute name is not a string: %r") % name)
3535
if not name:
3636
raise LintError(_(u"Empty attribute name"))
37-
if not isinstance(value, basestring):
37+
if not isinstance(value, unicode):
3838
raise LintError(_("Attribute value is not a string: %r") % value)
3939
if name in cdataElements:
4040
contentModelFlag = "CDATA"
@@ -45,7 +45,7 @@ def __iter__(self):
4545

4646
elif type == "EndTag":
4747
name = token["name"]
48-
if not isinstance(name, basestring):
48+
if not isinstance(name, unicode):
4949
raise LintError(_(u"Tag name is not a string: %r") % name)
5050
if not name:
5151
raise LintError(_(u"Empty tag name"))
@@ -62,7 +62,7 @@ def __iter__(self):
6262

6363
elif type in ("Characters", "SpaceCharacters"):
6464
data = token["data"]
65-
if not isinstance(data, basestring):
65+
if not isinstance(data, unicode):
6666
raise LintError(_("Attribute name is not a string: %r") % data)
6767
if not data:
6868
raise LintError(_(u"%s token with empty data") % type)
@@ -75,7 +75,7 @@ def __iter__(self):
7575
name = token["name"]
7676
if contentModelFlag != "PCDATA":
7777
raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
78-
if not isinstance(name, basestring):
78+
if not isinstance(name, unicode):
7979
raise LintError(_(u"Tag name is not a string: %r") % name)
8080
if not name:
8181
raise LintError(_(u"Empty tag name"))

src/treewalkers/_base.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,23 @@ def normalizeAttrs(self, attrs):
1919
attrs = []
2020
elif hasattr(attrs, 'items'):
2121
attrs = attrs.items()
22-
return attrs
22+
return [(unicode(name),unicode(value)) for name,value in attrs]
2323

2424
def emptyTag(self, name, attrs, hasChildren=False):
25-
yield {"type": "EmptyTag", "name": name, \
25+
yield {"type": "EmptyTag", "name": unicode(name), \
2626
"data": self.normalizeAttrs(attrs)}
2727
if hasChildren:
2828
yield self.error(_("Void element has children"))
2929

3030
def startTag(self, name, attrs):
31-
return {"type": "StartTag", "name": name, \
31+
return {"type": "StartTag", "name": unicode(name), \
3232
"data": self.normalizeAttrs(attrs)}
3333

3434
def endTag(self, name):
35-
return {"type": "EndTag", "name": name, "data": []}
35+
return {"type": "EndTag", "name": unicode(name), "data": []}
3636

3737
def text(self, data):
38+
data = unicode(data)
3839
middle = data.lstrip(spaceCharacters)
3940
left = data[:len(data)-len(middle)]
4041
if left:
@@ -48,10 +49,10 @@ def text(self, data):
4849
yield {"type": "SpaceCharacters", "data": right}
4950

5051
def comment(self, data):
51-
return {"type": "Comment", "data": data}
52+
return {"type": "Comment", "data": unicode(data)}
5253

5354
def doctype(self, name):
54-
return {"type": "Doctype", "name": name, "data": name.upper() == "HTML"}
55+
return {"type": "Doctype", "name": unicode(name), "data": name.upper() == "HTML"}
5556

5657
def unknown(self, nodeType):
5758
return self.error(_("Unknown node type: ") + nodeType)

0 commit comments

Comments
 (0)
0