8000 Move to pytest4/5 · Matt5j/html5lib-python@93c3555 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 93c3555

Browse files
authored
Move to pytest4/5
This largely involves moving away from using generators as tests
1 parent 5cd73ef commit 93c3555

7 files changed

+98
-119
lines changed

.pytest.expect

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
pytest-expect file v1
2-
(2, 7, 11, 'final', 0)
3-
b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
4-
b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
2+
(2, 7, 18, 'final', 0)
3+
b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
4+
b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
55
u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
66
u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
77
u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL

html5lib/tests/test_encoding.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
7575
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
7676

7777

78-
def runParserEncodingTest(data, encoding):
78+
def param_encoding():
79+
for filename in get_data_files("encoding"):
80+
tests = _TestData(filename, b"data", encoding=None)
81+
for test in tests:
82+
yield test[b'data'], test[b'encoding']
83+
84+
85+
@pytest.mark.parametrize("data, encoding", param_encoding())
86+
def test_parser_encoding(data, encoding):
7987
p = HTMLParser()
8088
assert p.documentEncoding is None
8189
p.parse(data, useChardet=False)
@@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
8492
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
8593

8694

87-
def runPreScanEncodingTest(data, encoding):
95+
@pytest.mark.parametrize("data, encoding", param_encoding())
96+
def test_prescan_encoding(data, encoding):
8897
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
8998
encoding = encoding.lower().decode("ascii")
9099

@@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
95104
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
96105

97106

98-
def test_encoding():
99-
for filename in get_data_files("encoding"):
100-
tests = _TestData(filename, b"data", encoding=None)
101-
for test in tests:
102-
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
103-
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
104-
105-
106107
# pylint:disable=wrong-import-position
107108
try:
108109
import chardet # noqa

html5lib/tests/test_sanitizer.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,11 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3+
import pytest
4+
35
from html5lib import constants, parseFragment, serialize
46
from html5lib.filters import sanitizer
57

68

7-
def runSanitizerTest(_, expected, input):
8-
parsed = parseFragment(expected)
9-
expected = serialize(parsed,
10-
omit_optional_tags=False,
11-
use_trailing_solidus=True,
12-
space_before_trailing_solidus=False,
13-
quote_attr_values="always",
14-
quote_char='"',
15-
alphabetical_attributes=True)
16-
assert expected == sanitize_html(input)
17-
18-
199
def sanitize_html(stream):
2010
parsed = parseFragment(stream)
2111
serialized = serialize(parsed,
@@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
5949
assert expected == sanitized
6050

6151

62-
def test_sanitizer():
52+
def param_sanitizer():
6353
for ns, tag_name in sanitizer.allowed_elements:
6454
if ns != constants.namespaces["html"]:
6555
continue
6656
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
6757
'tfoot', 'th', 'thead', 'tr', 'select']:
6858
continue # TODO
6959
if tag_name == 'image':
70-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
60+
yield ("test_should_allow_%s_tag" % tag_name,
7161
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
7262
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
7363
elif tag_name == 'br':
74-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
64+
yield ("test_should_allow_%s_tag" % tag_name,
7565
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
7666
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
7767
elif tag_name in constants.voidElements:
78-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
68+
yield ("test_should_allow_%s_tag" % tag_name,
7969
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
8070
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
8171
else:
82-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
72+
yield ("test_should_allow_%s_tag" % tag_name,
8373
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
8474
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
8575

@@ -93,15 +83,15 @@ def test_sanitizer():
9383
attribute_value = 'foo'
9484
if attribute_name in sanitizer.attr_val_is_uri:
9585
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
96-
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
86+
yield ("test_should_allow_%s_attribute" % attribute_name,
9787
"<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
9888
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
9989

10090
for protocol in sanitizer.allowed_protocols:
10191
rest_of_uri = '//sub.domain.tld/path/object.ext'
10292
if protocol == 'data':
10393
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
104-
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
94+
yield ("test_should_allow_uppercase_%s_uris" % protocol,
10595
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
10696
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
10797

@@ -110,11 +100,26 @@ def test_sanitizer():
110100
if protocol == 'data':
111101
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
112102
protocol = protocol.upper()
113-
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
103+
yield ("test_should_allow_uppercase_%s_uris" % protocol,
114104
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
115105
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
116106

117107

108+
@pytest.mark.parametrize("expected, input",
109+
(pytest.param(expected, input, id=id)
110+
for id, expected, input in param_sanitizer()))
111+
def test_sanitizer(expected, input):
112+
parsed = parseFragment(expected)
113+
expected = serialize(parsed,
114+
omit_optional_tags=False,
115+
use_trailing_solidus=True,
116+
space_before_trailing_solidus=False,
117+
quote_attr_values="always",
118+
quote_char='"',
119+
alphabetical_attributes=True)
120+
assert expected == sanitize_html(input)
121+
122+
118123
def test_lowercase_color_codes_in_style():
119124
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
120125
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'

html5lib/tests/test_serializer.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,6 @@ def serialize_html(input, options):
8989
return serializer.render(stream, encoding)
9090

9191

92-
def runSerializerTest(input, expected, options):
93-
encoding = options.get("encoding", None)
94-
95-
if encoding:
96-
expected = list(map(lambda x: x.encode(encoding), expected))
97-
98-
result = serialize_html(input, options)
99-
if len(expected) == 1:
100-
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
101-
elif result not in expected:
102-
assert False, "Expected: %s, Received: %s" % (expected, result)
103-
104-
10592
def throwsWithLatin1(input):
10693
with pytest.raises(UnicodeEncodeError):
10794
serialize_html(input, {"encoding": "iso-8859-1"})
@@ -120,13 +107,13 @@ def testDoctypeSystemId():
120107

121108

122109
def testCdataCharacters():
123-
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
124-
["<style>&amacr;"], {"encoding": "iso-8859-1"})
110+
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
111+
["<style>&amacr;"], {"encoding": "iso-8859-1"})
125112

126113

127114
def testCharacters():
128-
runSerializerTest([["Characters", "\u0101"]],
129-
["&amacr;"], {"encoding": "iso-8859-1"})
115+
test_serializer([["Characters", "\u0101"]],
116+
["&amacr;"], {"encoding": "iso-8859-1"})
130117

131118

132119
def testStartTagName():
@@ -138,9 +125,9 @@ def testAttributeName():
138125

139126

140127
def testAttributeValue():
141-
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
142-
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
143-
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
128+
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
129+
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
130+
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
144131

145132

146133
def testEndTagName():
@@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
165152
else:
166153
output_ = ['<span foo="%s">' % c]
167154
options_ = {"quote_attr_values": "spec"}
168-
runSerializerTest(input_, output_, options_)
155+
test_serializer(input_, output_, options_)
169156

170157

171158
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
@@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
184171
else:
185172
output_ = ['<span foo="%s">' % c]
186173
options_ = {"quote_attr_values": "legacy"}
187-
runSerializerTest(input_, output_, options_)
174+
test_serializer(input_, output_, options_)
188175

189176

190177
@pytest.fixture
@@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
217204
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
218205

219206

220-
def test_serializer():
207+
def param_serializer():
221208
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
222209
with open(filename) as fp:
223210
tests = json.load(fp)
224211
for test in tests['tests']:
225-
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
212+
yield test["input"], test["expected"], test.get("options", {})
213+
214+
215+
@pytest.mark.parametrize("input, expected, options", param_serializer())
216+
def test_serializer(input, expected, options):
217+
encoding = options.get("encoding", None)
218+
219+
if encoding:
220+
expected = list(map(lambda x: x.encode(encoding), expected))
221+
222+
result = serialize_html(input, options)
223+
if len(expected) == 1:
224+
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
225+
elif result not in expected:
226+
assert False, "Expected: %s, Received: %s" % (expected, result)

html5lib/tests/test_treewalkers.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
6161
setter['ElementTree'](docfrag)(name, value)
6262

6363

64-
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
65-
"""tests what happens when we add attributes to the intext"""
66-
treeName, treeClass = tree
67-
if treeClass is None:
68-
pytest.skip("Treebuilder not loaded")
69-
parser = html5parser.HTMLParser(tree=treeClass["builder"])
70-
document = parser.parseFragment(intext)
71-
for nom, val in attrs_to_add:
72-
set_attribute_on_first_child(document, nom, val, treeName)
73-
74-
document = treeClass.get("adapter", lambda x: x)(document)
75-
output = treewalkers.pprint(treeClass["walker"](document))
76-
output = attrlist.sub(sortattrs, output)
77-
if output not in expected:
78-
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
79-
80-
81-
def test_treewalker_six_mix():
64+
def param_treewalker_six_mix():
8265
"""Str/Unicode mix. If str attrs added to tree"""
8366

8467
# On Python 2.x string literals are of type str. Unless, like this
@@ -99,7 +82,25 @@ def test_treewalker_six_mix():
9982

10083
for tree in sorted(treeTypes.items()):
10184
for intext, attrs, expected in sm_tests:
102-
yield runTreewalkerEditTest, intext, expected, attrs, tree
85+
yield intext, expected, attrs, tree
86+
87+
88+
@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
89+
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
90+
"""tests what happens when we add attributes to the intext"""
91+
treeName, treeClass = tree
92+
if treeClass is None:
93+
pytest.skip("Treebuilder not loaded")
94+
parser = html5parser.HTMLParser(tree=treeClass["builder"])
95+
document = parser.parseFragment(intext)
96+
for nom, val in attrs_to_add:
97+
set_attribute_on_first_child(document, nom, val, treeName)
98+
99+
document = treeClass.get("adapter", lambda x: x)(document)
100+
output = treewalkers.pprint(treeClass["walker"](document))
101+
output = attrlist.sub(sortattrs, output)
102+
if output not in expected:
103+
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
103104

104105

105106
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))

html5lib/tests/tree_construction.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,6 @@ def _getParserTests(self, treeName, treeAPIs):
5757
item.add_marker(pytest.mark.parser)
5858
if namespaceHTMLElements:
5959
item.add_marker(pytest.mark.namespaced)
60-
if treeAPIs is None:
61-
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
6260
yield item
6361

6462
def _getTreeWalkerTests(self, treeName, treeAPIs):
@@ -69,8 +67,6 @@ def _getTreeWalkerTests(self, treeName, treeAPIs):
6967
treeAPIs)
7068
item.add_marker(getattr(pytest.mark, treeName))
7169
item.add_marker(pytest.mark.treewalker)
72-
if treeAPIs is None:
73-
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
7470
yield item
7571

7672

@@ -84,12 +80,14 @@ def convertTreeDump(data):
8480
class ParserTest(pytest.Item):
8581
def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
8682
super(ParserTest, self).__init__(name, parent)
87-
self.obj = lambda: 1 # this is to hack around skipif needing a function!
8883
self.test = test
8984
self.treeClass = treeClass
9085
self.namespaceHTMLElements = namespaceHTMLElements
9186

9287
def runtest(self):
88+
if self.treeClass is None:
89+
pytest.skip("Treebuilder not loaded")
90+
9391
p = html5parser.HTMLParser(tree=self.treeClass,
9492
namespaceHTMLElements=self.namespaceHTMLElements)
9593

@@ -147,11 +145,13 @@ def repr_failure(self, excinfo):
147145
class TreeWalkerTest(pytest.Item):
148146
def __init__(self, name, parent, test, treeAPIs):
149147
super(TreeWalkerTest, self).__init__(name, parent)
150-
self.obj = lambda: 1 # this is to hack around skipif needing a function!
151148
self.test = test
152149
self.treeAPIs = treeAPIs
153150

154151
def runtest(self):
152+
if self.treeAPIs is None:
153+
pytest.skip("Treebuilder not loaded")
154+
155155
p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
156156

157157
input = self.test['data']

0 commit comments

Comments
 (0)
0