8000 Get encoding tests running again under nose and Py3 · html5lib/html5lib-python@2816de7 · GitHub
[go: up one dir, main page]

Skip to content

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 2816de7

Browse files
committed
Get encoding tests running again under nose and Py3
1 parent a32418c commit 2816de7

File tree

2 files changed

+32
-39
lines changed

2 files changed

+32
-39
lines changed

html5lib/tests/support.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,12 @@ def __getitem__(self, key):
7474
return dict.get(self, key, self.default)
7575

7676
class TestData(object):
77-
def __init__(self, filename, newTestHeading="data"):
78-
self.f = codecs.open(filename, encoding="utf8")
77+
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
78+
if encoding == None:
79+
self.f = open(filename, mode="rb")
80+
else:
81+
self.f = codecs.open(filename, encoding=encoding)
82+
self.encoding = encoding
7983
self.newTestHeading = newTestHeading
8084

8185
def __del__(self):
@@ -93,7 +97,7 @@ def __iter__(self):
9397
yield self.normaliseOutput(data)
9498
data = DefaultDict(None)
9599
key = heading
96-
data[key]=""
100+
data[key]="" if self.encoding else b""
97101
elif key is not None:
98102
data[key] += line
99103
if data:
@@ -102,15 +106,16 @@ def __iter__(self):
102106
def isSectionHeading(self, line):
103107
"""If the current heading is a test section heading return the heading,
104108
otherwise return False"""
105-
if line.startswith("#"):
109+
#print(line)
110+
if line.startswith("#" if self.encoding else b"#"):
106111
return line[1:].strip()
107112
else:
108113
return False
109114

110115
def normaliseOutput(self, data):
111116
#Remove trailing newlines
112117
for key,value in data.items():
113-
if value.endswith("\n"):
118+
if value.endswith("\n" if self.encoding else b"\n"):
114119
data[key] = value[:-1]
115120
return data
116121

html5lib/tests/test_encoding.py

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,42 +23,30 @@ def test_codec_name_c(self):
2323
def test_codec_name_d(self):
2424
self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
2525

26-
def buildTestSuite():
26+
def runEncodingTest(data, encoding):
27+
p = HTMLParser()
28+
t = p.parse(data, useChardet=False)
29+
encoding = encoding.lower().decode("ascii")
30+
31+
errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
32+
(data, repr(encoding),
33+
repr(p.tokenizer.stream.charEncoding[0])))
34+
assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage
35+
36+
def test_encoding():
2737
for filename in get_data_files("encoding"):
2838
test_name = os.path.basename(filename).replace('.dat',''). \
2939
replace('-','')
30-
tests = TestData(filename, "data")
40+
tests = TestData(filename, b"data", encoding=None)
3141
for idx, test in enumerate(tests):
32-
def encodingTest(self, data=test['data'],
33-
encoding=test['encoding']):
34-
p = HTMLParser()
35-
t = p.parse(data, useChardet=False)
36-
37-
errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
38-
(data, repr(encoding.lower()),
39-
repr(p.tokenizer.stream.charEncoding)))
40-
self.assertEquals(encoding.lower(),
41-
p.tokenizer.stream.charEncoding[0],
42-
errorMessage)
43-
setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
44-
encodingTest)
45-
46-
try:
47-
import chardet
48-
def test_chardet(self):
49-
data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
50-
encoding = inputstream.HTMLInputStream(data).charEncoding
51-
assert encoding[0].lower() == "big5"
52-
setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
53-
except ImportError:
54-
print("chardet not found, skipping chardet tests")
55-
42+
yield (runEncodingTest, test[b'data'], test[b'encoding'])
5643

57-
return unittest.defaultTestLoader.loadTestsFromName(__name__)
58-
59-
def main():
60-
buildTestSuite()
61-
unittest.main()
62-
63-
if __name__ == "__main__":
64-
main()
44+
try:
45+
import chardet
46+
def test_chardet(self):
47+
data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
48+
encoding = inputstream.HTMLInputStream(data).charEncoding
49+
assert encoding[0].lower() == "big5"
50+
setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
51+
except ImportError:
52+
print("chardet not found, skipping chardet tests")

0 commit comments

Comments
 (0)
0