8000 The Lint Filter ensures that all data (tag and attribute names, attri… · html5lib/html5lib-python@3ce5bd2 · GitHub
[go: up one dir, main page]

Skip to content 8000

Commit 3ce5bd2

Browse files
committed
The Lint Filter ensures that all data (tag and attribute names, attribute values, comment data, text and doctype names) are unicode() strings.
_base.TreeWalker ensures only unicode() strings are emitted. This is probably not the best way but it works and is better than having tokens with non-unicode strings live on token streams. --HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40687
1 parent 77303eb commit 3ce5bd2

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

src/filters/lint.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __iter__(self):
1919
name = token["name"]
2020
if contentModelFlag != "PCDATA":
2121
raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
22-
if not isinstance(name, basestring):
22+
if not isinstance(name, unicode):
2323
raise LintError(_(u"Tag name is not a string: %r") % name)
2424
if not name:
2525
raise LintError(_(u"Empty tag name"))
@@ -30,11 +30,11 @@ def __iter__(self):
3030
if type == "StartTag":
3131
open_elements.append(name)
3232
for name, value in token["data"]:
33-
if not isinstance(name, basestring):
33+
if not isinstance(name, unicode):
3434
raise LintError(_("Attribute name is not a string: %r") % name)
3535
if not name:
3636
raise LintError(_(u"Empty attribute name"))
37-
if not isinstance(value, basestring):
37+
if not isinstance(value, unicode):
3838
raise LintError(_("Attribute value is not a string: %r") % value)
3939
if name in cdataElements:
4040
contentModelFlag = "CDATA"
@@ -45,7 +45,7 @@ def __iter__(self):
4545

4646
elif type == "EndTag":
4747
name = token["name"]
48-
if not isinstance(name, basestring):
48+
if not isinstance(name, unicode):
4949
raise LintError(_(u"Tag name is not a string: %r") % name)
5050
if not name:
5151
raise LintError(_(u"Empty tag name"))
@@ -62,7 +62,7 @@ def __iter__(self):
6262

6363
elif type in ("Characters", "SpaceCharacters"):
6464
data = token["data"]
65-
if not isinstance(data, basestring):
65+
if not isinstance(data, unicode):
6666
raise LintError(_("Attribute name is not a string: %r") % data)
6767
if not data:
6868
raise LintError(_(u"%s token with empty data") % type)
@@ -75,7 +75,7 @@ def __iter__(self):
7575
name = token["name"]
7676
if contentModelFlag != "PCDATA":
7777
raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
78-
if not isinstance(name, basestring):
78+
if not isinstance(name, unicode):
7979
raise LintError(_(u"Tag name is not a string: %r") % name)
8080
if not name:
8181
raise LintError(_(u"Empty tag name"))

src/treewalkers/_base.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,23 @@ def normalizeAttrs(self, attrs):
1919
attrs = []
2020
elif hasattr(attrs, 'items'):
2121
attrs = attrs.items()
22-
return attrs
22+
return [(unicode(name),unicode(value)) for name,value in attrs]
2323

2424
def emptyTag(self, name, attrs, hasChildren=False):
25-
yield {"type": "EmptyTag", "name": name, \
25+
yield {"type": "EmptyTag", "name": unicode(name), \
2626
"data": self.normalizeAttrs(attrs)}
2727
if hasChildren:
2828
yield self.error(_("Void element has children"))
2929

3030
def startTag(self, name, attrs):
31-
return {"type": "StartTag", "name": name, \
31+
return {"type": "StartTag", "name": unicode(name), \
3232
"data": self.normalizeAttrs(attrs)}
3333

3434
def endTag(self, name):
35-
return {"type": "EndTag", "name": name, "data": []}
35+
return {"type": "EndTag", "name": unicode(name), "data": []}
3636

3737
def text(self, data):
38+
data = unicode(data)
3839
middle = data.lstrip(spaceCharacters)
3940
left = data[:len(data)-len(middle)]
4041
if left:
@@ -48,10 +49,10 @@ def text(self, data):
4849
yield {"type": "SpaceCharacters", "data": right}
4950

5051
def comment(self, data):
51-
return {"type": "Comment", "data": data}
52+
return {"type": "Comment", "data": unicode(data)}
5253

5354
def doctype(self, name):
54-
return {"type": "Doctype", "name": name, "data": name.upper() == "HTML"}
55+
return {"type": "Doctype", "name": unicode(name), "data": name.upper() == "HTML"}
5556

5657
def unknown(self, nodeType):
5758
return self.error(_("Unknown node type: ") + nodeType)

0 commit comments

Comments
 (0)
0