From 5bb2dcda8e9154c28cc6a50a35504ea001c35240 Mon Sep 17 00:00:00 2001
From: Sam Sneddon <me@gsnedders.com>
Date: Thu, 21 May 2020 20:18:57 +0100
Subject: [PATCH 1/6] Fix pytest 4 support

---
 html5lib/tests/test_treewalkers.py | 39 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 67fc89e5..81d5132c 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
         setter['ElementTree'](docfrag)(name, value)
 
 
-def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
-    """tests what happens when we add attributes to the intext"""
-    treeName, treeClass = tree
-    if treeClass is None:
-        pytest.skip("Treebuilder not loaded")
-    parser = html5parser.HTMLParser(tree=treeClass["builder"])
-    document = parser.parseFragment(intext)
-    for nom, val in attrs_to_add:
-        set_attribute_on_first_child(document, nom, val, treeName)
-
-    document = treeClass.get("adapter", lambda x: x)(document)
-    output = treewalkers.pprint(treeClass["walker"](document))
-    output = attrlist.sub(sortattrs, output)
-    if output not in expected:
-        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
-
-
-def test_treewalker_six_mix():
+def param_treewalker_six_mix():
     """Str/Unicode mix. If str attrs added to tree"""
 
     # On Python 2.x string literals are of type str. Unless, like this
@@ -99,7 +82,25 @@ def test_treewalker_six_mix():
 
     for tree in sorted(treeTypes.items()):
         for intext, attrs, expected in sm_tests:
-            yield runTreewalkerEditTest, intext, expected, attrs, tree
+            yield intext, expected, attrs, tree
+
+
+@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
+def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
+    """tests what happens when we add attributes to the intext"""
+    treeName, treeClass = tree
+    if treeClass is None:
+        pytest.skip("Treebuilder not loaded")
+    parser = html5parser.HTMLParser(tree=treeClass["builder"])
+    document = parser.parseFragment(intext)
+    for nom, val in attrs_to_add:
+        set_attribute_on_first_child(document, nom, val, treeName)
+
+    document = treeClass.get("adapter", lambda x: x)(document)
+    output = treewalkers.pprint(treeClass["walker"](document))
+    output = attrlist.sub(sortattrs, output)
+    if output not in expected:
+        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
 
 
 @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))

From 38bb175c92924767d616a68c630752b07d5548d0 Mon Sep 17 00:00:00 2001
From: Sam Sneddon <me@gsnedders.com>
Date: Thu, 21 May 2020 20:27:03 +0100
Subject: [PATCH 2/6] Fix test_encoding pytest4

---
 .pytest.expect                  |  6 +++---
 html5lib/tests/test_encoding.py | 21 +++++++++++----------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/.pytest.expect b/.pytest.expect
index 0fa326f0..8fe88aa9 100644
--- a/.pytest.expect
+++ b/.pytest.expect
@@ -1,7 +1,7 @@
 pytest-expect file v1
-(2, 7, 11, 'final', 0)
-b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
-b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
+(2, 7, 18, 'final', 0)
+b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
+b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
 u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
 u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
 u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 9a411c77..47c4814a 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
     assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
 
 
-def runParserEncodingTest(data, encoding):
+def param_encoding():
+    for filename in get_data_files("encoding"):
+        tests = _TestData(filename, b"data", encoding=None)
+        for test in tests:
+            yield test[b'data'], test[b'encoding']
+
+
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_parser_encoding(data, encoding):
     p = HTMLParser()
     assert p.documentEncoding is None
     p.parse(data, useChardet=False)
@@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
     assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
 
 
-def runPreScanEncodingTest(data, encoding):
+@pytest.mark.parametrize("data, encoding", param_encoding())
+def test_prescan_encoding(data, encoding):
     stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
     encoding = encoding.lower().decode("ascii")
 
@@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
     assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
 
 
-def test_encoding():
-    for filename in get_data_files("encoding"):
-        tests = _TestData(filename, b"data", encoding=None)
-        for test in tests:
-            yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
-            yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
-
-
 # pylint:disable=wrong-import-position
 try:
     import chardet  # noqa

From b45b18649eb8441a278dde76d8284f56d63948e9 Mon Sep 17 00:00:00 2001
From: Sam Sneddon <me@gsnedders.com>
Date: Thu, 21 May 2020 22:50:42 +0100
Subject: [PATCH 3/6] serializer

---
 html5lib/tests/test_serializer.py | 49 ++++++++++++++++---------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index c23592af..bce62459 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -89,19 +89,6 @@ def serialize_html(input, options):
     return serializer.render(stream, encoding)
 
 
-def runSerializerTest(input, expected, options):
-    encoding = options.get("encoding", None)
-
-    if encoding:
-        expected = list(map(lambda x: x.encode(encoding), expected))
-
-    result = serialize_html(input, options)
-    if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
-    elif result not in expected:
-        assert False, "Expected: %s, Received: %s" % (expected, result)
-
-
 def throwsWithLatin1(input):
     with pytest.raises(UnicodeEncodeError):
         serialize_html(input, {"encoding": "iso-8859-1"})
@@ -120,13 +107,13 @@ def testDoctypeSystemId():
 
 
 def testCdataCharacters():
-    runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
-                      ["<style>&amacr;"], {"encoding": "iso-8859-1"})
+    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
+                    ["<style>&amacr;"], {"encoding": "iso-8859-1"})
 
 
 def testCharacters():
-    runSerializerTest([["Characters", "\u0101"]],
-                      ["&amacr;"], {"encoding": "iso-8859-1"})
+    test_serializer([["Characters", "\u0101"]],
+                    ["&amacr;"], {"encoding": "iso-8859-1"})
 
 
 def testStartTagName():
@@ -138,9 +125,9 @@ def testAttributeName():
 
 
 def testAttributeValue():
-    runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
-                        [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
-                      ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
+    test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
+                      [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
+                    ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
 
 
 def testEndTagName():
@@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
     else:
         output_ = ['<span foo="%s">' % c]
     options_ = {"quote_attr_values": "spec"}
-    runSerializerTest(input_, output_, options_)
+    test_serializer(input_, output_, options_)
 
 
 @pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
@@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
     else:
         output_ = ['<span foo="%s">' % c]
     options_ = {"quote_attr_values": "legacy"}
-    runSerializerTest(input_, output_, options_)
+    test_serializer(input_, output_, options_)
 
 
 @pytest.fixture
@@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
     assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
 
 
-def test_serializer():
+def param_serializer():
     for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
         with open(filename) as fp:
             tests = json.load(fp)
             for test in tests['tests']:
-                yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
+                yield test["input"], test["expected"], test.get("options", {})
+
+
+@pytest.mark.parametrize("input, expected, options", param_serializer())
+def test_serializer(input, expected, options):
+    encoding = options.get("encoding", None)
+
+    if encoding:
+        expected = list(map(lambda x: x.encode(encoding), expected))
+
+    result = serialize_html(input, options)
+    if len(expected) == 1:
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
+    elif result not in expected:
+        assert False, "Expected: %s, Received: %s" % (expected, result)

From 288de28378a25033bae981682f7fa4a4de0f8ac3 Mon Sep 17 00:00:00 2001
From: Sam Sneddon <me@gsnedders.com>
Date: Fri, 22 May 2020 19:21:00 +0100
Subject: [PATCH 4/6] sanitizer

---
 html5lib/tests/test_sanitizer.py | 45 ++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index 45046d57..9a8e7f2d 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -1,21 +1,11 @@
 from __future__ import absolute_import, division, unicode_literals
 
+import pytest
+
 from html5lib import constants, parseFragment, serialize
 from html5lib.filters import sanitizer
 
 
-def runSanitizerTest(_, expected, input):
-    parsed = parseFragment(expected)
-    expected = serialize(parsed,
-                         omit_optional_tags=False,
-                         use_trailing_solidus=True,
-                         space_before_trailing_solidus=False,
-                         quote_attr_values="always",
-                         quote_char='"',
-                         alphabetical_attributes=True)
-    assert expected == sanitize_html(input)
-
-
 def sanitize_html(stream):
     parsed = parseFragment(stream)
     serialized = serialize(parsed,
@@ -59,7 +49,7 @@ def test_data_uri_disallowed_type():
     assert expected == sanitized
 
 
-def test_sanitizer():
+def param_sanitizer():
     for ns, tag_name in sanitizer.allowed_elements:
         if ns != constants.namespaces["html"]:
             continue
@@ -67,19 +57,19 @@ def test_sanitizer():
                         'tfoot', 'th', 'thead', 'tr', 'select']:
             continue  # TODO
         if tag_name == 'image':
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                    "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
                    "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         elif tag_name == 'br':
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                    "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
                    "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         elif tag_name in constants.voidElements:
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                    "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
                    "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         else:
-            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+            yield ("test_should_allow_%s_tag" % tag_name,
                    "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
                    "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
 
@@ -93,7 +83,7 @@ def test_sanitizer():
         attribute_value = 'foo'
         if attribute_name in sanitizer.attr_val_is_uri:
             attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
-        yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
+        yield ("test_should_allow_%s_attribute" % attribute_name,
                "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
                "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
 
@@ -101,7 +91,7 @@ def test_sanitizer():
         rest_of_uri = '//sub.domain.tld/path/object.ext'
         if protocol == 'data':
             rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
-        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+        yield ("test_should_allow_uppercase_%s_uris" % protocol,
                "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
                """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
 
@@ -110,11 +100,26 @@ def test_sanitizer():
         if protocol == 'data':
             rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
         protocol = protocol.upper()
-        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+        yield ("test_should_allow_uppercase_%s_uris" % protocol,
                "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
                """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
 
 
+@pytest.mark.parametrize("expected, input",
+                         (pytest.param(expected, input, id=id)
+                          for id, expected, input in param_sanitizer()))
+def test_sanitizer(expected, input):
+    parsed = parseFragment(expected)
+    expected = serialize(parsed,
+                         omit_optional_tags=False,
+                         use_trailing_solidus=True,
+                         space_before_trailing_solidus=False,
+                         quote_attr_values="always",
+                         quote_char='"',
+                         alphabetical_attributes=True)
+    assert expected == sanitize_html(input)
+
+
 def test_lowercase_color_codes_in_style():
     sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
     expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'

From 3f10121d98ce8ca36b4b856275093dd189fdf1d2 Mon Sep 17 00:00:00 2001
From: Sam Sneddon <me@gsnedders.com>
Date: Fri, 22 May 2020 20:37:19 +0100
Subject: [PATCH 5/6] Update pytest

---
 .pytest.expect                      |  4 +--
 html5lib/tests/tree_construction.py | 12 ++++-----
 requirements-test.txt               | 41 +++++------------------------
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/.pytest.expect b/.pytest.expect
index 8fe88aa9..1b3705a7 100644
--- a/.pytest.expect
+++ b/.pytest.expect
@@ -1,7 +1,7 @@
 pytest-expect file v1
 (2, 7, 18, 'final', 0)
-b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
-b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
+b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
+b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
 u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
 u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
 u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
index 6112d11d..1ef6e725 100644
--- a/html5lib/tests/tree_construction.py
+++ b/html5lib/tests/tree_construction.py
@@ -57,8 +57,6 @@ def _getParserTests(self, treeName, treeAPIs):
             item.add_marker(pytest.mark.parser)
             if namespaceHTMLElements:
                 item.add_marker(pytest.mark.namespaced)
-            if treeAPIs is None:
-                item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
             yield item
 
     def _getTreeWalkerTests(self, treeName, treeAPIs):
@@ -69,8 +67,6 @@ def _getTreeWalkerTests(self, treeName, treeAPIs):
                               treeAPIs)
         item.add_marker(getattr(pytest.mark, treeName))
         item.add_marker(pytest.mark.treewalker)
-        if treeAPIs is None:
-            item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
         yield item
 
 
@@ -84,12 +80,14 @@ def convertTreeDump(data):
 class ParserTest(pytest.Item):
     def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
         super(ParserTest, self).__init__(name, parent)
-        self.obj = lambda: 1  # this is to hack around skipif needing a function!
         self.test = test
         self.treeClass = treeClass
         self.namespaceHTMLElements = namespaceHTMLElements
 
     def runtest(self):
+        if self.treeClass is None:
+            pytest.skip("Treebuilder not loaded")
+
         p = html5parser.HTMLParser(tree=self.treeClass,
                                    namespaceHTMLElements=self.namespaceHTMLElements)
 
@@ -147,11 +145,13 @@ def repr_failure(self, excinfo):
 class TreeWalkerTest(pytest.Item):
     def __init__(self, name, parent, test, treeAPIs):
         super(TreeWalkerTest, self).__init__(name, parent)
-        self.obj = lambda: 1  # this is to hack around skipif needing a function!
         self.test = test
         self.treeAPIs = treeAPIs
 
     def runtest(self):
+        if self.treeAPIs is None:
+            pytest.skip("Treebuilder not loaded")
+
         p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
 
         input = self.test['data']
diff --git a/requirements-test.txt b/requirements-test.txt
index c3aa391d..97065bd9 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -3,37 +3,10 @@
 # make sure we have a deterministic test setup
 
 # pin all of our direct dependencies
-tox==3.14.5
-flake8==3.7.9
-pytest==3.10.1,<4  # see https://github.com/html5lib/html5lib-python/pull/429
-coverage==5.0.3
-pytest-expect==1.1.0
-mock==3.0.5
-
-# and all recursive dependencies
-appdirs==1.4.3
-configparser==4.0.2
-contextlib2==0.6.0.post1
-distlib==0.3.0
-entrypoints==0.3
-enum34==1.1.9
-filelock==3.0.12
-funcsigs==1.0.2
-functools32==3.2.3.post2 ; python_version < '3'
-importlib-metadata==1.5.0
-importlib-resources==1.0.2
-mccabe==0.6.1
-packaging==20.1
-pathlib2==2.3.5
-pluggy==0.13.1
-py==1.8.1
-pycodestyle==2.5.0
-pyflakes==2.1.1
-pyparsing==2.4.6
-scandir==1.10.0
-# six==1.14.0  # ignored because it's also in requirements.txt
-toml==0.10.0
-typing==3.7.4.1
-u-msgpack-python==2.5.2
-virtualenv==20.0.6
-zipp==1.2.0
+tox>=3.15.1,<4
+flake8>=3.8.1,<3.9
+pytest>=4.6.10,<5 ; python_version < '3'
+pytest>=5.4.2,<6 ; python_version >= '3'
+coverage>=5.1,<6
+pytest-expect>=1.1.0,<2
+mock>=3.0.5,<4

From b0073d6e9ef2da814145783a56f3c33dfa1804b2 Mon Sep 17 00:00:00 2001
From: Sam Sneddon <me@gsnedders.com>
Date: Sat, 23 May 2020 00:29:34 +0100
Subject: [PATCH 6/6] Use up to date mock when possible

---
 requirements-test.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/requirements-test.txt b/requirements-test.txt
index 97065bd9..703d0e69 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,12 +1,10 @@
 -r requirements.txt
 
-# make sure we have a deterministic test setup
-
-# pin all of our direct dependencies
 tox>=3.15.1,<4
 flake8>=3.8.1,<3.9
 pytest>=4.6.10,<5 ; python_version < '3'
 pytest>=5.4.2,<6 ; python_version >= '3'
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2
-mock>=3.0.5,<4
+mock>=3.0.5,<4 ; python_version < '3.6'
+mock>=4.0.2,<5 ; python_version >= '3.6'