diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index 4748a197..dd2a7a49 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -7,7 +7,7 @@
from .constants import spaceCharacters
from .constants import entities
-from .constants import asciiLetters, asciiUpper2Lower
+from .constants import asciiLetters
from .constants import digits, hexDigits, EOF
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters
@@ -233,7 +233,7 @@ def emitCurrentToken(self):
token = self.currentToken
# Add token to the queue to be yielded
if (token["type"] in tagTokenTypes):
- token["name"] = token["name"].translate(asciiUpper2Lower)
+ token["name"] = token["name"].lower()
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
data = attributeMap(raw)
@@ -927,7 +927,7 @@ def attributeNameState(self):
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
self.currentToken["data"][-1][0] = (
- self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+ self.currentToken["data"][-1][0].lower())
for name, _ in self.currentToken["data"][:-1]:
if self.currentToken["data"][-1][0] == name:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
@@ -1348,10 +1348,10 @@ def beforeDoctypeNameState(self):
def doctypeNameState(self):
data = self.stream.char()
if data in spaceCharacters:
- self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+ self.currentToken["name"] = self.currentToken["name"].lower()
self.state = self.afterDoctypeNameState
elif data == ">":
- self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+ self.currentToken["name"] = self.currentToken["name"].lower()
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
elif data == "\u0000":
@@ -1363,7 +1363,7 @@ def doctypeNameState(self):
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"eof-in-doctype-name"})
self.currentToken["correct"] = False
- self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+ self.currentToken["name"] = self.currentToken["name"].lower()
self.tokenQueue.append(self.currentToken)
self.state = self.dataState
else:
diff --git a/html5lib/constants.py b/html5lib/constants.py
index fe3e237c..d6f8cef1 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -538,14 +538,11 @@
"tr"
])
-asciiLowercase = frozenset(string.ascii_lowercase)
asciiUppercase = frozenset(string.ascii_uppercase)
asciiLetters = frozenset(string.ascii_letters)
digits = frozenset(string.digits)
hexDigits = frozenset(string.hexdigits)
-asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
-
# Heading elements need to be ordered
headingElements = (
"h1",
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 74d829d9..8967f1e2 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -11,7 +11,7 @@
from . import _utils
from .constants import (
- spaceCharacters, asciiUpper2Lower,
+ spaceCharacters,
specialElements, headingElements, cdataElements, rcdataElements,
tokenTypes, tagTokenTypes,
namespaces,
@@ -183,8 +183,7 @@ def isHTMLIntegrationPoint(self, element):
if (element.name == "annotation-xml" and
element.namespace == namespaces["mathml"]):
return ("encoding" in element.attributes and
- element.attributes["encoding"].translate(
- asciiUpper2Lower) in
+ element.attributes["encoding"].lower() in
("text/html", "application/xhtml+xml"))
else:
return (element.namespace, element.name) in htmlIntegrationPointElements
@@ -520,7 +519,7 @@ def processDoctype(self, token):
self.tree.insertDoctype(token)
if publicId != "":
- publicId = publicId.translate(asciiUpper2Lower)
+ publicId = publicId.lower()
if (not correct or token["name"] != "html" or
publicId.startswith(
@@ -1165,7 +1164,7 @@ def startTagInput(self, token):
framesetOK = self.parser.framesetOK
self.startTagVoidFormatting(token)
if ("type" in token["data"] and
- token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+ token["data"]["type"].lower() == "hidden"):
# input type=hidden doesn't change framesetOK
self.parser.framesetOK = framesetOK
@@ -1771,7 +1770,7 @@ def startTagStyleScript(self, token):
def startTagInput(self, token):
if ("type" in token["data"] and
- token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+ token["data"]["type"].lower() == "hidden"):
self.parser.parseError("unexpected-hidden-input-in-table")
self.tree.insertElement(token)
# XXX associate with form
@@ -2512,11 +2511,11 @@ def processStartTag(self, token):
def processEndTag(self, token):
nodeIndex = len(self.tree.openElements) - 1
node = self.tree.openElements[-1]
- if node.name.translate(asciiUpper2Lower) != token["name"]:
+ if node.name.lower() != token["name"]:
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
while True:
- if node.name.translate(asciiUpper2Lower) == token["name"]:
+ if node.name.lower() == token["name"]:
# XXX this isn't in the spec but it seems necessary
if self.parser.phase == self.parser.phases["inTableText"]:
self.parser.phase.flushCharacters()