8000 Welcome to Python 3. · awesome-python/html5lib-python@a897f19 · GitHub
[go: up one dir, main page]

Skip to content

Commit a897f19

Browse files
committed
Welcome to Python 3.
We now fail the same three tests on both Py2 and Py3. I'm fairly certain the meta-preparser among other things is broken on Py3, but we have no tests for it. (We should fix that.)
1 parent eb7f702 commit a897f19

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+3469
-3446
lines changed

html5lib/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
tree = html5lib.parse(f)
1212
"""
1313
__version__ = "0.95-dev"
14-
from html5parser import HTMLParser, parse, parseFragment
15-
from treebuilders import getTreeBuilder
16-
from treewalkers import getTreeWalker
17-
from serializer import serialize
14+
from .html5parser import HTMLParser, parse, parseFragment
15+
from .treebuilders import getTreeBuilder
16+
from .treewalkers import getTreeWalker
17+
from .serializer import serialize

html5lib/constants.py

Lines changed: 2423 additions & 2423 deletions
Large diffs are not rendered by default.

html5lib/filters/inject_meta_charset.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import _base
1+
from . import _base
22

33
class Filter(_base.Filter):
44
def __init__(self, source, encoding):
@@ -13,44 +13,44 @@ def __iter__(self):
1313
for token in _base.Filter.__iter__(self):
1414
type = token["type"]
1515
if type == "StartTag":
16-
if token["name"].lower() == u"head":
16+
if token["name"].lower() == "head":
1717
state = "in_head"
1818

1919
elif type == "EmptyTag":
20-
if token["name"].lower() == u"meta":
20+
if token["name"].lower() == "meta":
2121
# replace charset with actual encoding
2222
has_http_equiv_content_type = False
23-
for (namespace,name),value in token["data"].iteritems():
23+
for (namespace,name),value in token["data"].items():
2424
if namespace != None:
2525
continue
26-
elif name.lower() == u'charset':
26+
elif name.lower() == 'charset':
2727
token["data"][(namespace,name)] = self.encoding
2828
meta_found = True
2929
break
30-
elif name == u'http-equiv' and value.lower() == u'content-type':
30+
elif name == 'http-equiv' and value.lower() == 'content-type':
3131
has_http_equiv_content_type = True
3232
else:
33-
if has_http_equiv_content_type and (None, u"content") in token["data"]:
34-
token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
33+
if has_http_equiv_content_type and (None, "content") in token["data"]:
34+
token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
3535
meta_found = True
3636

37-
elif token["name"].lower() == u"head" and not meta_found:
37+
elif token["name"].lower() == "head" and not meta_found:
3838
# insert meta into empty head
39-
yield {"type": "StartTag", "name": u"head",
39+
yield {"type": "StartTag", "name": "head",
4040
"data": token["data"]}
41-
yield {"type": "EmptyTag", "name": u"meta",
42-
"data": {(None, u"charset"): self.encoding}}
43-
yield {"type": "EndTag", "name": u"head"}
41+
yield {"type": "EmptyTag", "name": "meta",
42+
"data": {(None, "charset"): self.encoding}}
43+
yield {"type": "EndTag", "name": "head"}
4444
meta_found = True
4545
continue
4646

4747
elif type == "EndTag":
48-
if token["name"].lower() == u"head" and pending:
48+
if token["name"].lower() == "head" and pending:
4949
# insert meta into head (if necessary) and flush pending queue
5050
yield pending.pop(0)
5151
if not meta_found:
52-
yield {"type": "EmptyTag", "name": u"meta",
53-
"data": {(None, u"charset"): self.encoding}}
52+
yield {"type": "EmptyTag", "name": "meta",
53+
"data": {(None, "charset"): self.encoding}}
5454
while pending:
5555
yield pending.pop(0)
5656
meta_found = True

html5lib/filters/lint.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from gettext import gettext
22
_ = gettext
33

4-
import _base
4+
from . import _base
55
from html5lib.constants import cdataElements, rcdataElements, voidElements
66

77
from html5lib.constants import spaceCharacters
8-
spaceCharacters = u"".join(spaceCharacters)
8+
spaceCharacters = "".join(spaceCharacters)
99

1010
class LintError(Exception): pass
1111

@@ -19,22 +19,22 @@ def __iter__(self):
1919
name = token["name"]
2020
if contentModelFlag != "PCDATA":
2121
raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
22-
if not isinstance(name, unicode):
23-
raise LintError(_(u"Tag name is not a string: %r") % name)
22+
if not isinstance(name, str):
23+
raise LintError(_("Tag name is not a string: %r") % name)
2424
if not name:
25-
raise LintError(_(u"Empty tag name"))
25+
raise LintError(_("Empty tag name"))
2626
if type == "StartTag" and name in voidElements:
27-
raise LintError(_(u"Void element reported as StartTag token: %s") % name)
27+
raise LintError(_("Void element reported as StartTag token: %s") % name)
2828
elif type == "EmptyTag" and name not in voidElements:
29-
raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"])
29+
raise LintError(_("Non-void element reported as EmptyTag token: %s") % token["name"])
3030
if type == "StartTag":
3131
open_elements.append(name)
3232
for name, value in token["data"]:
33-
if not isinstance(name, unicode):
33+
if not isinstance(name, str):
3434
raise LintError(_("Attribute name is not a string: %r") % name)
3535
if not name:
36-
raise LintError(_(u"Empty attribute name"))
37-
if not isinstance(value, unicode):
36+
raise LintError(_("Empty attribute name"))
37+
if not isinstance(value, str):
3838
raise LintError(_("Attribute value is not a string: %r") % value)
3939
if name in cdataElements:
4040
contentModelFlag = "CDATA"
@@ -45,15 +45,15 @@ def __iter__(self):
4545

4646
elif type == "EndTag":
4747
name = token["name"]
48-
if not isinstance(name, unicode):
49-
raise LintError(_(u"Tag name is not a string: %r") % name)
48+
if not isinstance(name, str):
49+
raise LintError(_("Tag name is not a string: %r") % name)
5050
if not name:
51-
raise LintError(_(u"Empty tag name"))
51+
raise LintError(_("Empty tag name"))
5252
if name in voidElements:
53-
raise LintError(_(u"Void element reported as EndTag token: %s") % name)
53+
raise LintError(_("Void element reported as EndTag token: %s") % name)
5454
start_name = open_elements.pop()
5555
if start_name != name:
56-
raise LintError(_(u"EndTag (%s) does not match StartTag (%s)") % (name, start_name))
56+
raise LintError(_("EndTag (%s) does not match StartTag (%s)") % (name, start_name))
5757
contentModelFlag = "PCDATA"
5858

5959
elif type == "Comment":
@@ -62,27 +62,27 @@ def __iter__(self):
6262

6363
elif type in ("Characters", "SpaceCharacters"):
6464
data = token["data"]
65-
if not isinstance(data, unicode):
65+
if not isinstance(data, str):
6666
raise LintError(_("Attribute name is not a string: %r") % data)
6767
1241 if not data:
68-
raise LintError(_(u"%s token with empty data") % type)
68+
raise LintError(_("%s token with empty data") % type)
6969
if type == "SpaceCharacters":
7070
data = data.strip(spaceCharacters)
7171
if data:
72-
raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data)
72+
raise LintError(_("Non-space character(s) found in SpaceCharacters token: ") % data)
7373

7474
elif type == "Doctype":
7575
name = token["name"]
7676
if contentModelFlag != "PCDATA":
7777
raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
78-
if not isinstance(name, unicode):
79-
raise LintError(_(u"Tag name is not a string: %r") % name)
78+
if not isinstance(name, str):
79+
raise LintError(_("Tag name is not a string: %r") % name)
8080
# XXX: what to do with token["data"] ?
8181

8282
elif type in ("ParseError", "SerializeError"):
8383
pass
8484

8585
else:
86-
raise LintError(_(u"Unknown token type: %s") % type)
86+
raise LintError(_("Unknown token type: %s") % type)
8787

8888
yield token

html5lib/filters/optionaltags.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import _base
1+
from . import _base
22

33
class Filter(_base.Filter):
44
def slider(self):

html5lib/filters/sanitizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import _base
1+
from . import _base
22
from html5lib.sanitizer import HTMLSanitizerMixin
33

44
class Filter(_base.Filter, HTMLSanitizerMixin):

html5lib/filters/whitespace.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66

77
import re
88

9-
import _base
9+
from . import _base
1010
from html5lib.constants import rcdataElements, spaceCharacters
11-
spaceCharacters = u"".join(spaceCharacters)
11+
spaceCharacters = "".join(spaceCharacters)
1212

13-
SPACES_REGEX = re.compile(u"[%s]+" % spaceCharacters)
13+
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
1414

1515
class Filter(_base.Filter):
1616

@@ -29,7 +29,7 @@ def __iter__(self):
2929

3030
elif not preserve and type == "SpaceCharacters" and token["data"]:
3131
# Test on token["data"] above to not introduce spaces where there were not
32-
token["data"] = u" "
32+
token["data"] = " "
3333

3434
elif not preserve and type == "Characters":
3535
token["data"] = collapse_spaces(token["data"])

0 commit comments

Comments
 (0)
0