From 5bb2dcda8e9154c28cc6a50a35504ea001c35240 Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Thu, 21 May 2020 20:18:57 +0100 Subject: [PATCH 1/6] Fix pytest 4 support --- html5lib/tests/test_treewalkers.py | 39 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 67fc89e5..81d5132c 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName): setter['ElementTree'](docfrag)(name, value) -def runTreewalkerEditTest(intext, expected, attrs_to_add, tree): - """tests what happens when we add attributes to the intext""" - treeName, treeClass = tree - if treeClass is None: - pytest.skip("Treebuilder not loaded") - parser = html5parser.HTMLParser(tree=treeClass["builder"]) - document = parser.parseFragment(intext) - for nom, val in attrs_to_add: - set_attribute_on_first_child(document, nom, val, treeName) - - document = treeClass.get("adapter", lambda x: x)(document) - output = treewalkers.pprint(treeClass["walker"](document)) - output = attrlist.sub(sortattrs, output) - if output not in expected: - raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output)) - - -def test_treewalker_six_mix(): +def param_treewalker_six_mix(): """Str/Unicode mix. If str attrs added to tree""" # On Python 2.x string literals are of type str. Unless, like this @@ -99,7 +82,25 @@ def test_treewalker_six_mix(): for tree in sorted(treeTypes.items()): for intext, attrs, expected in sm_tests: - yield runTreewalkerEditTest, intext, expected, attrs, tree + yield intext, expected, attrs, tree + + +@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix()) +def test_treewalker_six_mix(intext, expected, attrs_to_add, tree): + """tests what happens when we add attributes to the intext""" + treeName, treeClass = tree + if treeClass is None: + pytest.skip("Treebuilder not loaded") + parser = html5parser.HTMLParser(tree=treeClass["builder"]) + document = parser.parseFragment(intext) + for nom, val in attrs_to_add: + set_attribute_on_first_child(document, nom, val, treeName) + + document = treeClass.get("adapter", lambda x: x)(document) + output = treewalkers.pprint(treeClass["walker"](document)) + output = attrlist.sub(sortattrs, output) + if output not in expected: + raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output)) @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"])) From 38bb175c92924767d616a68c630752b07d5548d0 Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Thu, 21 May 2020 20:27:03 +0100 Subject: [PATCH 2/6] Fix test_encoding pytest4 --- .pytest.expect | 6 +++--- html5lib/tests/test_encoding.py | 21 +++++++++++---------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/.pytest.expect b/.pytest.expect index 0fa326f0..8fe88aa9 100644 --- a/.pytest.expect +++ b/.pytest.expect @@ -1,7 +1,7 @@ pytest-expect file v1 -(2, 7, 11, 'final', 0) -b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL -b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL +(2, 7, 18, 'final', 0) +b'html5lib/tests/test_encoding.py::test_parser_encoding[\n-iso-8859-2]': FAIL +b'html5lib/tests/test_encoding.py::test_prescan_encoding[\n-iso-8859-2]': FAIL u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 9a411c77..47c4814a 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs): assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input") -def runParserEncodingTest(data, encoding): +def param_encoding(): + for filename in get_data_files("encoding"): + tests = _TestData(filename, b"data", encoding=None) + for test in tests: + yield test[b'data'], test[b'encoding'] + + +@pytest.mark.parametrize("data, encoding", param_encoding()) +def test_parser_encoding(data, encoding): p = HTMLParser() assert p.documentEncoding is None p.parse(data, useChardet=False) @@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding): assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding) -def runPreScanEncodingTest(data, encoding): +@pytest.mark.parametrize("data, encoding", param_encoding()) +def test_prescan_encoding(data, encoding): stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False) encoding = encoding.lower().decode("ascii") @@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding): assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name) -def test_encoding(): - for filename in get_data_files("encoding"): - tests = _TestData(filename, b"data", encoding=None) - for test in tests: - yield (runParserEncodingTest, test[b'data'], test[b'encoding']) - yield (runPreScanEncodingTest, test[b'data'], test[b'encoding']) - - # pylint:disable=wrong-import-position try: import chardet # noqa From b45b18649eb8441a278dde76d8284f56d63948e9 Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Thu, 21 May 2020 22:50:42 +0100 Subject: [PATCH 3/6] serializer --- html5lib/tests/test_serializer.py | 49 ++++++++++++++++--------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index c23592af..bce62459 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -89,19 +89,6 @@ def serialize_html(input, options): return serializer.render(stream, encoding) -def runSerializerTest(input, expected, options): - encoding = options.get("encoding", None) - - if encoding: - expected = list(map(lambda x: x.encode(encoding), expected)) - - result = serialize_html(input, options) - if len(expected) == 1: - assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options)) - elif result not in expected: - assert False, "Expected: %s, Received: %s" % (expected, result) - - def throwsWithLatin1(input): with pytest.raises(UnicodeEncodeError): serialize_html(input, {"encoding": "iso-8859-1"}) @@ -120,13 +107,13 @@ def testDoctypeSystemId(): def testCdataCharacters(): - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], - ["