diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml
new file mode 100644
index 00000000..59f121f0
--- /dev/null
+++ b/.github/workflows/downstream.yml
@@ -0,0 +1,76 @@
+name: downstream
+
+concurrency:
+  group: "${{github.workflow}}-${{github.ref}}"
+  cancel-in-progress: true
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [opened, synchronize]
+    branches:
+      - '*'
+
+jobs:
+  skeleton:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo hello world
+
+  parse5:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          repository: inikulin/parse5
+          submodules: recursive
+      - run: rm -rf test/data/html5lib-tests/
+      - uses: actions/checkout@v2
+        with:
+          path: test/data/html5lib-tests/
+      - uses: actions/setup-node@v3
+        with:
+          node-version: lts/*
+          cache: npm
+      - run: npm ci
+      - run: npm run build --if-present
+      - run: npm run unit-tests
+
+  html5gum:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          repository: untitaker/html5gum
+      - run: rm -rf tests/html5lib-tests/
+      - uses: actions/checkout@v2
+        with:
+          path: tests/html5lib-tests/
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - run: cargo test
+
+  nokogiri:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/sparklemotion/nokogiri-test:mri-3.2
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          repository: sparklemotion/nokogiri
+          path: nokogiri
+      - uses: actions/checkout@v3
+        with:
+          path: nokogiri/test/html5lib-tests
+      - working-directory: nokogiri
+        name: "Run the Nokogiri test suite"
+        run: |
+          bundle install
+          bundle exec rake compile -- --enable-system-libraries
+          bundle exec rake test
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..99f67c50
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,25 @@
+name: lint
+
+concurrency:
+  group: "${{github.workflow}}-${{github.ref}}"
+  cancel-in-progress: true
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [opened, synchronize]
+    branches:
+      - '*'
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - run: ./lint
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..f8b56708
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,79 @@
+# Copyright (c) 2014 GitHub, Inc.
+#
+# Permission is hereby granted,  free of charge,  to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to  use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+doc/_build/
+
+# PyBuilder
+target/
diff --git a/encoding/scripted/tests1.dat b/encoding/scripted/tests1.dat
new file mode 100644
index 00000000..04d18bb9
--- /dev/null
+++ b/encoding/scripted/tests1.dat
@@ -0,0 +1,5 @@
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="ISO-8859-' + '2">')</script>
+#encoding
+iso-8859-2
diff --git a/encoding/tests1.dat b/encoding/tests1.dat
index 77b0e41d..7aa9586d 100644
--- a/encoding/tests1.dat
+++ b/encoding/tests1.dat
@@ -356,12 +356,6 @@ iso-8859-2
 #encoding
 iso-8859-2
 
-#data
-<!DOCTYPE HTML>
-<script>document.write('<meta charset="ISO-8859-' + '2">')</script>
-#encoding
-iso-8859-2
-
 #data
 <!DOCTYPE HTML>
 <script>document.write('<meta charset="iso8859-2">')</script>
diff --git a/lint b/lint
new file mode 100755
index 00000000..19b7f50c
--- /dev/null
+++ b/lint
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+import sys
+
+import lint_lib.lint as lint
+
+sys.exit(lint.main())
diff --git a/lint_lib/__init__.py b/lint_lib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/lint_lib/_vendor-patches/funcparserlib.patch b/lint_lib/_vendor-patches/funcparserlib.patch
new file mode 100644
index 00000000..fc294880
--- /dev/null
+++ b/lint_lib/_vendor-patches/funcparserlib.patch
@@ -0,0 +1,24 @@
+diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py
+index eb2f53f..0f86e6c 100644
+--- a/lint_lib/_vendor/funcparserlib/parser.py
++++ b/lint_lib/_vendor/funcparserlib/parser.py
+@@ -137,19 +137,6 @@ class Parser(object):
+         "('x', 'y')"
+ 
+         ```
+-
+-        !!! Note
+-
+-            You can enable the parsing log this way:
+-
+-            ```python
+-            import logging
+-            logging.basicConfig(level=logging.DEBUG)
+-            import funcparserlib.parser
+-            funcparserlib.parser.debug = True
+-            ```
+-
+-            The way to enable the parsing log may be changed in future versions.
+         """
+         self.name = name
+         return self
diff --git a/lint_lib/_vendor/__init__.py b/lint_lib/_vendor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/lint_lib/_vendor/funcparserlib/LICENSE b/lint_lib/_vendor/funcparserlib/LICENSE
new file mode 100644
index 00000000..31d3a95b
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/LICENSE
@@ -0,0 +1,18 @@
+Copyright © 2009/2021 Andrey Vlasovskikh
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this
+software and associated documentation files (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use, copy, modify,
+merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lint_lib/_vendor/funcparserlib/__init__.py b/lint_lib/_vendor/funcparserlib/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/lint_lib/_vendor/funcparserlib/lexer.py b/lint_lib/_vendor/funcparserlib/lexer.py
new file mode 100644
index 00000000..0a5b5e9e
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/lexer.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+
+# Copyright © 2009/2021 Andrey Vlasovskikh
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies
+# or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from __future__ import unicode_literals
+
+__all__ = ["make_tokenizer", "TokenSpec", "Token", "LexerError"]
+
+import re
+
+
+class LexerError(Exception):
+    def __init__(self, place, msg):
+        self.place = place
+        self.msg = msg
+
+    def __str__(self):
+        s = "cannot tokenize data"
+        line, pos = self.place
+        return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
+
+
+class TokenSpec(object):
+    """A token specification for generating a lexer via `make_tokenizer()`."""
+
+    def __init__(self, type, pattern, flags=0):
+        """Initialize a `TokenSpec` object.
+
+        Parameters:
+            type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
+                `"operator"`)
+            pattern (str): Regexp for matching this token type
+            flags (int, optional): Regexp flags, the second argument of `re.compile()`
+        """
+        self.type = type
+        self.pattern = pattern
+        self.flags = flags
+
+    def __repr__(self):
+        return "TokenSpec(%r, %r, %r)" % (self.type, self.pattern, self.flags)
+
+
+class Token(object):
+    """A token object that represents a substring of certain type in your text.
+
+    You can compare tokens for equality using the `==` operator. Tokens also define
+    custom `repr()` and `str()`.
+
+    Attributes:
+        type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
+            `"operator"`)
+        value (str): Text value of the token
+        start (Optional[Tuple[int, int]]): Start position (_line_, _column_)
+        end (Optional[Tuple[int, int]]): End position (_line_, _column_)
+    """
+
+    def __init__(self, type, value, start=None, end=None):
+        """Initialize a `Token` object."""
+        self.type = type
+        self.value = value
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        return "Token(%r, %r)" % (self.type, self.value)
+
+    def __eq__(self, other):
+        # FIXME: Case sensitivity is assumed here
+        if other is None:
+            return False
+        else:
+            return self.type == other.type and self.value == other.value
+
+    def _pos_str(self):
+        if self.start is None or self.end is None:
+            return ""
+        else:
+            sl, sp = self.start
+            el, ep = self.end
+            return "%d,%d-%d,%d:" % (sl, sp, el, ep)
+
+    def __str__(self):
+        s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
+        return s.strip()
+
+    @property
+    def name(self):
+        return self.value
+
+    def pformat(self):
+        return "%s %s '%s'" % (
+            self._pos_str().ljust(20),  # noqa
+            self.type.ljust(14),
+            self.value,
+        )
+
+
+def make_tokenizer(specs):
+    # noinspection GrazieInspection
+    """Make a function that tokenizes text based on the regexp specs.
+
+    Type: `(Sequence[TokenSpec | Tuple]) -> Callable[[str], Iterable[Token]]`
+
+    A token spec is `TokenSpec` instance.
+
+    !!! Note
+
+        For legacy reasons, a token spec may also be a tuple of (_type_, _args_), where
+        _type_ sets the value of `Token.type` for the token, and _args_ are the
+        positional arguments for `re.compile()`: either just (_pattern_,) or
+        (_pattern_, _flags_).
+
+    It returns a tokenizer function that takes a string and returns an iterable of
+    `Token` objects, or raises `LexerError` if it cannot tokenize the string according
+    to its token specs.
+
+    Examples:
+
+    ```pycon
+    >>> tokenize = make_tokenizer([
+    ...     TokenSpec("space", r"\\s+"),
+    ...     TokenSpec("id", r"\\w+"),
+    ...     TokenSpec("op", r"[,!]"),
+    ... ])
+    >>> text = "Hello, World!"
+    >>> [t for t in tokenize(text) if t.type != "space"]  # noqa
+    [Token('id', 'Hello'), Token('op', ','), Token('id', 'World'), Token('op', '!')]
+    >>> text = "Bye?"
+    >>> list(tokenize(text))
+    Traceback (most recent call last):
+        ...
+    lexer.LexerError: cannot tokenize data: 1,4: "Bye?"
+
+    ```
+    """
+    compiled = []
+    for spec in specs:
+        if isinstance(spec, TokenSpec):
+            c = spec.type, re.compile(spec.pattern, spec.flags)
+        else:
+            name, args = spec
+            c = name, re.compile(*args)
+        compiled.append(c)
+
+    def match_specs(s, i, position):
+        line, pos = position
+        for type, regexp in compiled:
+            m = regexp.match(s, i)
+            if m is not None:
+                value = m.group()
+                nls = value.count("\n")
+                n_line = line + nls
+                if nls == 0:
+                    n_pos = pos + len(value)
+                else:
+                    n_pos = len(value) - value.rfind("\n") - 1
+                return Token(type, value, (line, pos + 1), (n_line, n_pos))
+        else:
+            err_line = s.splitlines()[line - 1]
+            raise LexerError((line, pos + 1), err_line)
+
+    def f(s):
+        length = len(s)
+        line, pos = 1, 0
+        i = 0
+        while i < length:
+            t = match_specs(s, i, (line, pos))
+            yield t
+            line, pos = t.end
+            i += len(t.value)
+
+    return f
+
+
+# This is an example of token specs. See also [this article][1] for a
+# discussion of searching for multiline comments using regexps (including `*?`).
+#
+#   [1]: http://ostermiller.org/findcomment.html
+_example_token_specs = [
+    TokenSpec("COMMENT", r"\(\*(.|[\r\n])*?\*\)", re.MULTILINE),
+    TokenSpec("COMMENT", r"\{(.|[\r\n])*?\}", re.MULTILINE),
+    TokenSpec("COMMENT", r"//.*"),
+    TokenSpec("NL", r"[\r\n]+"),
+    TokenSpec("SPACE", r"[ \t\r\n]+"),
+    TokenSpec("NAME", r"[A-Za-z_][A-Za-z_0-9]*"),
+    TokenSpec("REAL", r"[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*"),
+    TokenSpec("INT", r"[0-9]+"),
+    TokenSpec("INT", r"\$[0-9A-Fa-f]+"),
+    TokenSpec("OP", r"(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]"),
+    TokenSpec("STRING", r"'([^']|(''))*'"),
+    TokenSpec("CHAR", r"#[0-9]+"),
+    TokenSpec("CHAR", r"#\$[0-9A-Fa-f]+"),
+]
+# tokenize = make_tokenizer(_example_token_specs)
diff --git a/lint_lib/_vendor/funcparserlib/lexer.pyi b/lint_lib/_vendor/funcparserlib/lexer.pyi
new file mode 100644
index 00000000..b1e88fe7
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/lexer.pyi
@@ -0,0 +1,34 @@
+from typing import Tuple, Optional, Callable, Iterable, Text, Sequence
+
+_Place = Tuple[int, int]
+_Spec = Tuple[Text, Tuple]
+
+class Token:
+    type: Text
+    value: Text
+    start: Optional[_Place]
+    end: Optional[_Place]
+    name: Text
+    def __init__(
+        self,
+        type: Text,
+        value: Text,
+        start: Optional[_Place] = ...,
+        end: Optional[_Place] = ...,
+    ) -> None: ...
+    def pformat(self) -> Text: ...
+
+class TokenSpec:
+    name: Text
+    pattern: Text
+    flags: int
+    def __init__(self, name: Text, pattern: Text, flags: int = ...) -> None: ...
+
+def make_tokenizer(
+    specs: Sequence[TokenSpec | _Spec],
+) -> Callable[[Text], Iterable[Token]]: ...
+
+class LexerError(Exception):
+    place: Tuple[int, int]
+    msg: Text
+    def __init__(self, place: _Place, msg: Text) -> None: ...
diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py
new file mode 100644
index 00000000..0bbac7f5
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/parser.py
@@ -0,0 +1,872 @@
+# -*- coding: utf-8 -*-
+
+# Copyright © 2009/2021 Andrey Vlasovskikh
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies
+# or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+"""Functional parsing combinators.
+
+Parsing combinators define an internal domain-specific language (DSL) for describing
+the parsing rules of a grammar. The DSL allows you to start with a few primitive
+parsers, then combine your parsers to get more complex ones, and finally cover
+the whole grammar you want to parse.
+
+The structure of the language:
+
+* Class `Parser`
+    * All the primitives and combinators of the language return `Parser` objects
+    * It defines the main `Parser.parse(tokens)` method
+* Primitive parsers
+    * `tok(type, value)`, `a(value)`, `some(pred)`, `forward_decl()`, `finished`
+* Parser combinators
+    * `p1 + p2`, `p1 | p2`, `p >> f`, `-p`, `maybe(p)`, `many(p)`, `oneplus(p)`,
+      `skip(p)`
+* Abstraction
+    * Use regular Python variables `p = ...  # Expression of type Parser` to define new
+      rules (non-terminals) of your grammar
+
+Every time you apply one of the combinators, you get a new `Parser` object. In other
+words, the set of `Parser` objects is closed under the means of combination.
+
+!!! Note
+
+    We took the parsing combinators language from the book [Introduction to Functional
+    Programming][1] and translated it from ML into Python.
+
+  [1]: https://www.cl.cam.ac.uk/teaching/Lectures/funprog-jrh-1996/
+"""
+
+from __future__ import unicode_literals
+
+__all__ = [
+    "some",
+    "a",
+    "tok",
+    "many",
+    "pure",
+    "finished",
+    "maybe",
+    "skip",
+    "oneplus",
+    "forward_decl",
+    "NoParseError",
+    "Parser",
+]
+
+import sys
+import logging
+import warnings
+
+from lint_lib._vendor.funcparserlib.lexer import Token
+
+log = logging.getLogger("funcparserlib")
+
+debug = False
+if sys.version_info < (3,):
+    string_types = (str, unicode)  # noqa
+else:
+    string_types = str
+
+
+class Parser(object):
+    """A parser object that can parse a sequence of tokens or can be combined with
+    other parsers using `+`, `|`, `>>`, `many()`, and other parsing combinators.
+
+    Type: `Parser[A, B]`
+
+    The generic variables in the type are: `A` — the type of the tokens in the
+    sequence to parse,`B` — the type of the parsed value.
+
+    In order to define a parser for your grammar:
+
+    1. You start with primitive parsers by calling `a(value)`, `some(pred)`,
+       `forward_decl()`, `finished`
+    2. You use parsing combinators `p1 + p2`, `p1 | p2`, `p >> f`, `many(p)`, and
+       others to combine parsers into a more complex parser
+    3. You can assign complex parsers to variables to define names that correspond to
+       the rules of your grammar
+
+    !!! Note
+
+        The constructor `Parser.__init__()` is considered **internal** and may be
+        changed in future versions. Use primitive parsers and parsing combinators to
+        construct new parsers.
+    """
+
+    def __init__(self, p):
+        """Wrap the parser function `p` into a `Parser` object."""
+        self.name = ""
+        self.define(p)
+
+    def named(self, name):
+        # noinspection GrazieInspection
+        """Specify the name of the parser for easier debugging.
+
+        Type: `(str) -> Parser[A, B]`
+
+        This name is used in the debug-level parsing log. You can also get it via the
+        `Parser.name` attribute.
+
+        Examples:
+
+        ```pycon
+        >>> expr = (a("x") + a("y")).named("expr")
+        >>> expr.name
+        'expr'
+
+        ```
+
+        ```pycon
+        >>> expr = a("x") + a("y")
+        >>> expr.name
+        "('x', 'y')"
+
+        ```
+        """
+        self.name = name
+        return self
+
+    def define(self, p):
+        """Define the parser created earlier as a forward declaration.
+
+        Type: `(Parser[A, B]) -> None`
+
+        Use `p = forward_decl()` in combination with `p.define(...)` to define
+        recursive parsers.
+
+        See the examples in the docs for `forward_decl()`.
+        """
+        f = getattr(p, "run", p)
+        if debug:
+            setattr(self, "_run", f)
+        else:
+            setattr(self, "run", f)
+        self.named(getattr(p, "name", p.__doc__))
+
+    def run(self, tokens, s):
+        """Run the parser against the tokens with the specified parsing state.
+
+        Type: `(Sequence[A], State) -> Tuple[B, State]`
+
+        The parsing state includes the current position in the sequence being parsed,
+        and the position of the rightmost token that has been consumed while parsing for
+        better error messages.
+
+        If the parser fails to parse the tokens, it raises `NoParseError`.
+
+        !!! Warning
+
+            This is method is **internal** and may be changed in future versions. Use
+            `Parser.parse(tokens)` instead and let the parser object take care of
+            updating the parsing state.
+        """
+        if debug:
+            log.debug("trying %s" % self.name)
+        return self._run(tokens, s)  # noqa
+
+    def _run(self, tokens, s):
+        raise NotImplementedError("you must define() a parser")
+
+    def parse(self, tokens):
+        """Parse the sequence of tokens and return the parsed value.
+
+        Type: `(Sequence[A]) -> B`
+
+        It takes a sequence of tokens of arbitrary type `A` and returns the parsed value
+        of arbitrary type `B`.
+
+        If the parser fails to parse the tokens, it raises `NoParseError`.
+
+        !!! Note
+
+            Although `Parser.parse()` can parse sequences of any objects (including
+            `str` which is a sequence of `str` chars), **the recommended way** is
+            parsing sequences of `Token` objects.
+
+            You **should** use a regexp-based tokenizer `make_tokenizer()` defined in
+            `funcparserlib.lexer` to convert your text into a sequence of `Token`
+            objects before parsing it. You will get more readable parsing error messages
+            (as `Token` objects contain their position in the source file) and good
+            separation of the lexical and syntactic levels of the grammar.
+        """
+        try:
+            (tree, _) = self.run(tokens, State(0, 0, None))
+            return tree
+        except NoParseError as e:
+            max = e.state.max
+            if len(tokens) > max:
+                t = tokens[max]
+                if isinstance(t, Token):
+                    if t.start is None or t.end is None:
+                        loc = ""
+                    else:
+                        s_line, s_pos = t.start
+                        e_line, e_pos = t.end
+                        loc = "%d,%d-%d,%d: " % (s_line, s_pos, e_line, e_pos)
+                    msg = "%s%s: %r" % (loc, e.msg, t.value)
+                elif isinstance(t, string_types):
+                    msg = "%s: %r" % (e.msg, t)
+                else:
+                    msg = "%s: %s" % (e.msg, t)
+            else:
+                msg = "got unexpected end of input"
+            if e.state.parser is not None:
+                msg = "%s, expected: %s" % (msg, e.state.parser.name)
+            e.msg = msg
+            raise
+
+    def __add__(self, other):
+        """Sequential combination of parsers. It runs this parser, then the other
+        parser.
+
+        The return value of the resulting parser is a tuple of each parsed value in
+        the sum of parsers. We merge all parsing results of `p1 + p2 + ... + pN` into a
+        single tuple. It means that the parsing result may be a 2-tuple, a 3-tuple,
+        a 4-tuple, etc. of parsed values. You avoid this by transforming the parsed
+        pair into a new value using the `>>` combinator.
+
+        You can also skip some parsing results in the resulting parsers by using `-p`
+        or `skip(p)` for some parsers in your sum of parsers. It means that the parsing
+        result might be a single value, not a tuple of parsed values. See the docs
+        for `Parser.__neg__()` for more examples.
+
+        Overloaded types (lots of them to provide stricter checking for the quite
+        dynamic return type of this method):
+
+        * `(self: Parser[A, B], _IgnoredParser[A]) -> Parser[A, B]`
+        * `(self: Parser[A, B], Parser[A, C]) -> _TupleParser[A, Tuple[B, C]]`
+        * `(self: _TupleParser[A, B], _IgnoredParser[A]) -> _TupleParser[A, B]`
+        * `(self: _TupleParser[A, B], Parser[A, Any]) -> Parser[A, Any]`
+        * `(self: _IgnoredParser[A], _IgnoredParser[A]) -> _IgnoredParser[A]`
+        * `(self: _IgnoredParser[A], Parser[A, C]) -> Parser[A, C]`
+
+        Examples:
+
+        ```pycon
+        >>> expr = a("x") + a("y")
+        >>> expr.parse("xy")
+        ('x', 'y')
+
+        ```
+
+        ```pycon
+        >>> expr = a("x") + a("y") + a("z")
+        >>> expr.parse("xyz")
+        ('x', 'y', 'z')
+
+        ```
+
+        ```pycon
+        >>> expr = a("x") + a("y")
+        >>> expr.parse("xz")
+        Traceback (most recent call last):
+            ...
+        parser.NoParseError: got unexpected token: 'z', expected: 'y'
+
+        ```
+        """
+
+        def magic(v1, v2):
+            if isinstance(v1, _Tuple):
+                return _Tuple(v1 + (v2,))
+            else:
+                return _Tuple((v1, v2))
+
+        @_TupleParser
+        def _add(tokens, s):
+            (v1, s2) = self.run(tokens, s)
+            (v2, s3) = other.run(tokens, s2)
+            return magic(v1, v2), s3
+
+        @Parser
+        def ignored_right(tokens, s):
+            v, s2 = self.run(tokens, s)
+            _, s3 = other.run(tokens, s2)
+            return v, s3
+
+        name = "(%s, %s)" % (self.name, other.name)
+        if isinstance(other, _IgnoredParser):
+            return ignored_right.named(name)
+        else:
+            return _add.named(name)
+
+    def __or__(self, other):
+        """Choice combination of parsers.
+
+        It runs this parser and returns its result. If the parser fails, it runs the
+        other parser.
+
+        Examples:
+
+        ```pycon
+        >>> expr = a("x") | a("y")
+        >>> expr.parse("x")
+        'x'
+        >>> expr.parse("y")
+        'y'
+        >>> expr.parse("z")
+        Traceback (most recent call last):
+            ...
+        parser.NoParseError: got unexpected token: 'z', expected: 'x' or 'y'
+
+        ```
+        """
+
+        @Parser
+        def _or(tokens, s):
+            try:
+                return self.run(tokens, s)
+            except NoParseError as e:
+                state = e.state
+            try:
+                return other.run(tokens, State(s.pos, state.max, state.parser))
+            except NoParseError as e:
+                if s.pos == e.state.max:
+                    e.state = State(e.state.pos, e.state.max, _or)
+                raise
+
+        _or.name = "%s or %s" % (self.name, other.name)
+        return _or
+
+    def __rshift__(self, f):
+        """Transform the parsing result by applying the specified function.
+
+        Type: `(Callable[[B], C]) -> Parser[A, C]`
+
+        You can use it for transforming the parsed value into another value before
+        including it into the parse tree (the AST).
+
+        Examples:
+
+        ```pycon
+        >>> def make_canonical_name(s):
+        ...     return s.lower()
+        >>> expr = (a("D") | a("d")) >> make_canonical_name
+        >>> expr.parse("D")
+        'd'
+        >>> expr.parse("d")
+        'd'
+
+        ```
+        """
+
+        @Parser
+        def _shift(tokens, s):
+            (v, s2) = self.run(tokens, s)
+            return f(v), s2
+
+        return _shift.named(self.name)
+
+    def bind(self, f):
+        """Bind the parser to a monadic function that returns a new parser.
+
+        Type: `(Callable[[B], Parser[A, C]]) -> Parser[A, C]`
+
+        Also known as `>>=` in Haskell.
+
+        !!! Note
+
+            You can parse any context-free grammar without resorting to `bind`. Due
+            to its poor performance please use it only when you really need it.
+        """
+
+        @Parser
+        def _bind(tokens, s):
+            (v, s2) = self.run(tokens, s)
+            return f(v).run(tokens, s2)
+
+        _bind.name = "(%s >>=)" % (self.name,)
+        return _bind
+
+    def __neg__(self):
+        """Return a parser that parses the same tokens, but its parsing result is
+        ignored by the sequential `+` combinator.
+
+        Type: `(Parser[A, B]) -> _IgnoredParser[A]`
+
+        You can use it for throwing away elements of concrete syntax (e.g. `","`,
+        `";"`).
+
+        Examples:
+
+        ```pycon
+        >>> expr = -a("x") + a("y")
+        >>> expr.parse("xy")
+        'y'
+
+        ```
+
+        ```pycon
+        >>> expr = a("x") + -a("y")
+        >>> expr.parse("xy")
+        'x'
+
+        ```
+
+        ```pycon
+        >>> expr = a("x") + -a("y") + a("z")
+        >>> expr.parse("xyz")
+        ('x', 'z')
+
+        ```
+
+        ```pycon
+        >>> expr = -a("x") + a("y") + -a("z")
+        >>> expr.parse("xyz")
+        'y'
+
+        ```
+
+        ```pycon
+        >>> expr = -a("x") + a("y")
+        >>> expr.parse("yz")
+        Traceback (most recent call last):
+            ...
+        parser.NoParseError: got unexpected token: 'y', expected: 'x'
+
+        ```
+
+        ```pycon
+        >>> expr = a("x") + -a("y")
+        >>> expr.parse("xz")
+        Traceback (most recent call last):
+            ...
+        parser.NoParseError: got unexpected token: 'z', expected: 'y'
+
+        ```
+
+        !!! Note
+
+            You **should not** pass the resulting parser to any combinators other than
+            `+`. You **should** have at least one non-skipped value in your
+            `p1 + p2 + ... + pN`. The parsed value of `-p` is an **internal** `_Ignored`
+            object, not intended for actual use.
+        """
+        return _IgnoredParser(self)
+
+    def __class_getitem__(cls, key):
+        return cls
+
+
+class State(object):
+    """Parsing state that is maintained basically for error reporting.
+
+    It consists of the current position `pos` in the sequence being parsed, and the
+    position `max` of the rightmost token that has been consumed while parsing.
+    """
+
+    def __init__(self, pos, max, parser=None):
+        self.pos = pos
+        self.max = max
+        self.parser = parser
+
+    def __str__(self):
+        return str((self.pos, self.max))
+
+    def __repr__(self):
+        return "State(%r, %r)" % (self.pos, self.max)
+
+
+class NoParseError(Exception):
+    def __init__(self, msg, state):
+        self.msg = msg
+        self.state = state
+
+    def __str__(self):
+        return self.msg
+
+
+class _Tuple(tuple):
+    pass
+
+
+class _TupleParser(Parser):
+    pass
+
+
+class _Ignored(object):
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return "_Ignored(%s)" % repr(self.value)
+
+    def __eq__(self, other):
+        return isinstance(other, _Ignored) and self.value == other.value
+
+
+@Parser
+def finished(tokens, s):
+    """A parser that throws an exception if there are any unparsed tokens left in the
+    sequence."""
+    if s.pos >= len(tokens):
+        return None, s
+    else:
+        s2 = State(s.pos, s.max, finished if s.pos == s.max else s.parser)
+        raise NoParseError("got unexpected token", s2)
+
+
+finished.name = "end of input"
+
+
+def many(p):
+    """Return a parser that applies the parser `p` as many times as it succeeds at
+    parsing the tokens.
+
+    Return a parser that infinitely applies the parser `p` to the input sequence
+    of tokens as long as it successfully parses them. The parsed value is a list of
+    the sequentially parsed values.
+
+    Examples:
+
+    ```pycon
+    >>> expr = many(a("x"))
+    >>> expr.parse("x")
+    ['x']
+    >>> expr.parse("xx")
+    ['x', 'x']
+    >>> expr.parse("xxxy")  # noqa
+    ['x', 'x', 'x']
+    >>> expr.parse("y")
+    []
+
+    ```
+    """
+
+    @Parser
+    def _many(tokens, s):
+        res = []
+        try:
+            while True:
+                (v, s) = p.run(tokens, s)
+                res.append(v)
+        except NoParseError as e:
+            s2 = State(s.pos, e.state.max, e.state.parser)
+            if debug:
+                log.debug(
+                    "*matched* %d instances of %s, new state = %s"
+                    % (len(res), _many.name, s2)
+                )
+            return res, s2
+
+    _many.name = "{ %s }" % p.name
+    return _many
+
+
+def some(pred):
+    """Return a parser that parses a token if it satisfies the predicate `pred`.
+
+    Type: `(Callable[[A], bool]) -> Parser[A, A]`
+
+    Examples:
+
+    ```pycon
+    >>> expr = some(lambda s: s.isalpha()).named('alpha')
+    >>> expr.parse("x")
+    'x'
+    >>> expr.parse("y")
+    'y'
+    >>> expr.parse("1")
+    Traceback (most recent call last):
+        ...
+    parser.NoParseError: got unexpected token: '1', expected: alpha
+
+    ```
+
+    !!! Warning
+
+        The `some()` combinator is quite slow and may be changed or removed in future
+        versions. If you need a parser for a token by its type (e.g. any identifier)
+        and maybe its value, use `tok(type[, value])` instead. You should use
+        `make_tokenizer()` from `funcparserlib.lexer` to tokenize your text first.
+    """
+
+    @Parser
+    def _some(tokens, s):
+        if s.pos >= len(tokens):
+            s2 = State(s.pos, s.max, _some if s.pos == s.max else s.parser)
+            raise NoParseError("got unexpected end of input", s2)
+        else:
+            t = tokens[s.pos]
+            if pred(t):
+                pos = s.pos + 1
+                s2 = State(pos, max(pos, s.max), s.parser)
+                if debug:
+                    log.debug("*matched* %r, new state = %s" % (t, s2))
+                return t, s2
+            else:
+                s2 = State(s.pos, s.max, _some if s.pos == s.max else s.parser)
+                if debug:
+                    log.debug(
+                        "failed %r, state = %s, expected = %s" % (t, s2, s2.parser.name)
+                    )
+                raise NoParseError("got unexpected token", s2)
+
+    _some.name = "some(...)"
+    return _some
+
+
+def a(value):
+    """Return a parser that parses a token if it's equal to `value`.
+
+    Type: `(A) -> Parser[A, A]`
+
+    Examples:
+
+    ```pycon
+    >>> expr = a("x")
+    >>> expr.parse("x")
+    'x'
+    >>> expr.parse("y")
+    Traceback (most recent call last):
+        ...
+    parser.NoParseError: got unexpected token: 'y', expected: 'x'
+
+    ```
+
+    !!! Note
+
+        Although `Parser.parse()` can parse sequences of any objects (including
+        `str` which is a sequence of `str` chars), **the recommended way** is
+        parsing sequences of `Token` objects.
+
+        You **should** use a regexp-based tokenizer `make_tokenizer()` defined in
+        `funcparserlib.lexer` to convert your text into a sequence of `Token` objects
+        before parsing it. You will get more readable parsing error messages (as `Token`
+        objects contain their position in the source file) and good separation of the
+        lexical and syntactic levels of the grammar.
+    """
+    name = getattr(value, "name", value)
+    return some(lambda t: t == value).named(repr(name))
+
+
+def tok(type, value=None):
+    """Return a parser that parses a `Token` and returns the string value of the token.
+
+    Type: `(str, Optional[str]) -> Parser[Token, str]`
+
+    You can match any token of the specified `type` or you can match a specific token by
+    its `type` and `value`.
+
+    Examples:
+
+    ```pycon
+    >>> expr = tok("expr")
+    >>> expr.parse([Token("expr", "foo")])
+    'foo'
+    >>> expr.parse([Token("expr", "bar")])
+    'bar'
+    >>> expr.parse([Token("op", "=")])
+    Traceback (most recent call last):
+        ...
+    parser.NoParseError: got unexpected token: '=', expected: expr
+
+    ```
+
+    ```pycon
+    >>> expr = tok("op", "=")
+    >>> expr.parse([Token("op", "=")])
+    '='
+    >>> expr.parse([Token("op", "+")])
+    Traceback (most recent call last):
+        ...
+    parser.NoParseError: got unexpected token: '+', expected: '='
+
+    ```
+
+    !!! Note
+
+        In order to convert your text to parse into a sequence of `Token` objects,
+        use a regexp-based tokenizer `make_tokenizer()` defined in
+        `funcparserlib.lexer`. You will get more readable parsing error messages (as
+        `Token` objects contain their position in the source file) and good separation
+        of the lexical and syntactic levels of the grammar.
+    """
+    if value is not None:
+        p = a(Token(type, value))
+    else:
+        p = some(lambda t: t.type == type).named(type)
+    return (p >> (lambda t: t.value)).named(p.name)
+
+
+def pure(x):
+    """Wrap any object into a parser.
+
+    Type: `(A) -> Parser[A, A]`
+
+    A pure parser doesn't touch the tokens sequence, it just returns its pure `x`
+    value.
+
+    Also known as `return` in Haskell.
+    """
+
+    @Parser
+    def _pure(_, s):
+        return x, s
+
+    _pure.name = "(pure %r)" % (x,)
+    return _pure
+
+
+def maybe(p):
+    """Return a parser that returns `None` if the parser `p` fails.
+
+    Examples:
+
+    ```pycon
+    >>> expr = maybe(a("x"))
+    >>> expr.parse("x")
+    'x'
+    >>> expr.parse("y") is None
+    True
+
+    ```
+    """
+    return (p | pure(None)).named("[ %s ]" % (p.name,))
+
+
+def skip(p):
+    """An alias for `-p`.
+
+    See also the docs for `Parser.__neg__()`.
+    """
+    return -p
+
+
+class _IgnoredParser(Parser):
+    def __init__(self, p):
+        super(_IgnoredParser, self).__init__(p)
+        run = self._run if debug else self.run
+
+        def ignored(tokens, s):
+            v, s2 = run(tokens, s)
+            return v if isinstance(v, _Ignored) else _Ignored(v), s2
+
+        self.define(ignored)
+        self.name = getattr(p, "name", p.__doc__)
+
+    def __add__(self, other):
+        def ignored_left(tokens, s):
+            _, s2 = self.run(tokens, s)
+            v, s3 = other.run(tokens, s2)
+            return v, s3
+
+        if isinstance(other, _IgnoredParser):
+            return _IgnoredParser(ignored_left).named(
+                "(%s, %s)" % (self.name, other.name)
+            )
+        else:
+            return Parser(ignored_left).named("(%s, %s)" % (self.name, other.name))
+
+
+def oneplus(p):
+    """Return a parser that applies the parser `p` one or more times.
+
+    A similar parser combinator `many(p)` means apply `p` zero or more times, whereas
+    `oneplus(p)` means apply `p` one or more times.
+
+    Examples:
+
+    ```pycon
+    >>> expr = oneplus(a("x"))
+    >>> expr.parse("x")
+    ['x']
+    >>> expr.parse("xx")
+    ['x', 'x']
+    >>> expr.parse("y")
+    Traceback (most recent call last):
+        ...
+    parser.NoParseError: got unexpected token: 'y', expected: 'x'
+
+    ```
+    """
+
+    @Parser
+    def _oneplus(tokens, s):
+        (v1, s2) = p.run(tokens, s)
+        (v2, s3) = many(p).run(tokens, s2)
+        return [v1] + v2, s3
+
+    _oneplus.name = "(%s, { %s })" % (p.name, p.name)
+    return _oneplus
+
+
+def with_forward_decls(suspension):
+    warnings.warn(
+        "Use forward_decl() instead:\n"
+        "\n"
+        "    p = forward_decl()\n"
+        "    ...\n"
+        "    p.define(parser_value)\n",
+        DeprecationWarning,
+    )
+
+    @Parser
+    def f(tokens, s):
+        return suspension().run(tokens, s)
+
+    return f
+
+
+def forward_decl():
+    """Return an undefined parser that can be used as a forward declaration.
+
+    Type: `Parser[Any, Any]`
+
+    Use `p = forward_decl()` in combination with `p.define(...)` to define recursive
+    parsers.
+
+
+    Examples:
+
+    ```pycon
+    >>> expr = forward_decl()
+    >>> expr.define(a("x") + maybe(expr) + a("y"))
+    >>> expr.parse("xxyy")  # noqa
+    ('x', ('x', None, 'y'), 'y')
+    >>> expr.parse("xxy")
+    Traceback (most recent call last):
+        ...
+    parser.NoParseError: got unexpected end of input, expected: 'y'
+
+    ```
+
+    !!! Note
+
+        If you care about static types, you should add a type hint for your forward
+        declaration, so that your type checker can check types in `p.define(...)` later:
+
+        ```python
+        p: Parser[str, int] = forward_decl()
+        p.define(a("x"))  # Type checker error
+        p.define(a("1") >> int)  # OK
+        ```
+    """
+
+    @Parser
+    def f(_tokens, _s):
+        raise NotImplementedError("you must define() a forward_decl somewhere")
+
+    f.name = "forward_decl()"
+    return f
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/lint_lib/_vendor/funcparserlib/parser.pyi b/lint_lib/_vendor/funcparserlib/parser.pyi
new file mode 100644
index 00000000..e21ded5a
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/parser.pyi
@@ -0,0 +1,83 @@
+from typing import (
+    Optional,
+    Generic,
+    TypeVar,
+    Union,
+    Callable,
+    Tuple,
+    Sequence,
+    Any,
+    List,
+    Text,
+    overload,
+)
+from funcparserlib.lexer import Token
+
+_A = TypeVar("_A")
+_B = TypeVar("_B")
+_C = TypeVar("_C")
+_D = TypeVar("_D")
+
+class State:
+    pos: int
+    max: int
+    parser: Union[Parser, _ParserCallable, None]
+    def __init__(
+        self,
+        pos: int,
+        max: int,
+        parser: Union[Parser, _ParserCallable, None] = ...,
+    ) -> None: ...
+
+_ParserCallable = Callable[[_A, State], Tuple[_B, State]]
+
+class Parser(Generic[_A, _B]):
+    name: Text
+    def __init__(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ...
+    def named(self, name: Text) -> Parser[_A, _B]: ...
+    def define(self, p: Union[Parser[_A, _B], _ParserCallable]) -> None: ...
+    def run(self, tokens: Sequence[_A], s: State) -> Tuple[_B, State]: ...
+    def parse(self, tokens: Sequence[_A]) -> _B: ...
+    @overload
+    def __add__(  # type: ignore[misc]
+        self, other: _IgnoredParser[_A]
+    ) -> Parser[_A, _B]: ...
+    @overload
+    def __add__(self, other: Parser[_A, _C]) -> _TupleParser[_A, Tuple[_B, _C]]: ...
+    def __or__(self, other: Parser[_A, _C]) -> Parser[_A, Union[_B, _C]]: ...
+    def __rshift__(self, f: Callable[[_B], _C]) -> Parser[_A, _C]: ...
+    def bind(self, f: Callable[[_B], Parser[_A, _C]]) -> Parser[_A, _C]: ...
+    def __neg__(self) -> _IgnoredParser[_A]: ...
+
+class _Ignored:
+    value: Any
+    def __init__(self, value: Any) -> None: ...
+
+class _IgnoredParser(Parser[_A, _Ignored]):
+    @overload  # type: ignore[override]
+    def __add__(self, other: _IgnoredParser[_A]) -> _IgnoredParser[_A]: ...
+    @overload  # type: ignore[override]
+    def __add__(self, other: Parser[_A, _C]) -> Parser[_A, _C]: ...
+
+class _TupleParser(Parser[_A, _B]):
+    @overload  # type: ignore[override]
+    def __add__(self, other: _IgnoredParser[_A]) -> _TupleParser[_A, _B]: ...
+    @overload
+    def __add__(self, other: Parser[_A, Any]) -> Parser[_A, Any]: ...
+
+finished: Parser[Any, None]
+
+def many(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ...
+def some(pred: Callable[[_A], bool]) -> Parser[_A, _A]: ...
+def a(value: _A) -> Parser[_A, _A]: ...
+def tok(type: Text, value: Optional[Text] = ...) -> Parser[Token, Text]: ...
+def pure(x: _A) -> Parser[_A, _A]: ...
+def maybe(p: Parser[_A, _B]) -> Parser[_A, Optional[_B]]: ...
+def skip(p: Parser[_A, Any]) -> _IgnoredParser[_A]: ...
+def oneplus(p: Parser[_A, _B]) -> Parser[_A, List[_B]]: ...
+def forward_decl() -> Parser[Any, Any]: ...
+
+class NoParseError(Exception):
+    msg: Text
+    state: State
+    def __init__(self, msg: Text, state: State) -> None: ...
diff --git a/lint_lib/_vendor/funcparserlib/py.typed b/lint_lib/_vendor/funcparserlib/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/lint_lib/_vendor/funcparserlib/util.py b/lint_lib/_vendor/funcparserlib/util.py
new file mode 100644
index 00000000..5c9ea51e
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/util.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+
+# Copyright © 2009/2021 Andrey Vlasovskikh
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this
+# software and associated documentation files (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use, copy, modify,
+# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies
+# or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from __future__ import unicode_literals
+
+
+def pretty_tree(x, kids, show):
+    """Return a pseudo-graphic tree representation of the object `x` similar to the
+    `tree` command in Unix.
+
+    Type: `(T, Callable[[T], List[T]], Callable[[T], str]) -> str`
+
+    It applies the parameter `show` (which is a function of type `(T) -> str`) to get a
+    textual representation of the objects to show.
+
+    It applies the parameter `kids` (which is a function of type `(T) -> List[T]`) to
+    list the children of the object to show.
+
+    Examples:
+
+    ```pycon
+    >>> print(pretty_tree(
+    ...     ["foo", ["bar", "baz"], "quux"],
+    ...     lambda obj: obj if isinstance(obj, list) else [],
+    ...     lambda obj: "[]" if isinstance(obj, list) else str(obj),
+    ... ))
+    []
+    |-- foo
+    |-- []
+    |   |-- bar
+    |   `-- baz
+    `-- quux
+
+    ```
+    """
+    (MID, END, CONT, LAST, ROOT) = ("|-- ", "`-- ", "|   ", "    ", "")
+
+    def rec(obj, indent, sym):
+        line = indent + sym + show(obj)
+        obj_kids = kids(obj)
+        if len(obj_kids) == 0:
+            return line
+        else:
+            if sym == MID:
+                next_indent = indent + CONT
+            elif sym == ROOT:
+                next_indent = indent + ROOT
+            else:
+                next_indent = indent + LAST
+            chars = [MID] * (len(obj_kids) - 1) + [END]
+            lines = [rec(kid, next_indent, sym) for kid, sym in zip(obj_kids, chars)]
+            return "\n".join([line] + lines)
+
+    return rec(x, "", ROOT)
diff --git a/lint_lib/_vendor/funcparserlib/util.pyi b/lint_lib/_vendor/funcparserlib/util.pyi
new file mode 100644
index 00000000..cf6a3d48
--- /dev/null
+++ b/lint_lib/_vendor/funcparserlib/util.pyi
@@ -0,0 +1,7 @@
+from typing import TypeVar, Callable, List, Text
+
+_A = TypeVar("_A")
+
+def pretty_tree(
+    x: _A, kids: Callable[[_A], List[_A]], show: Callable[[_A], Text]
+) -> Text: ...
diff --git a/lint_lib/_vendor/vendor.txt b/lint_lib/_vendor/vendor.txt
new file mode 100644
index 00000000..8af787f1
--- /dev/null
+++ b/lint_lib/_vendor/vendor.txt
@@ -0,0 +1 @@
+funcparserlib==1.0.1
diff --git a/lint_lib/lint.py b/lint_lib/lint.py
new file mode 100644
index 00000000..de4ccd09
--- /dev/null
+++ b/lint_lib/lint.py
@@ -0,0 +1,280 @@
+import codecs
+import contextlib
+import io
+import json
+import os
+import re
+import sys
+from collections import Counter
+from os.path import dirname, join, pardir, relpath
+from typing import Any, Dict, List, Optional, Set, TypeVar
+
+from . import parser
+from ._vendor.funcparserlib.parser import NoParseError
+
+text_type = str
+binary_type = bytes
+
+StringLike = TypeVar("StringLike", str, bytes)
+
+base = join(dirname(__file__), pardir)
+
+_surrogateRe = re.compile(r"\\u([0-9A-Fa-f]{4})(?:\\u([0-9A-Fa-f]{4}))?")
+
+
+def clean_path(path: str) -> str:
+    return relpath(path, base)
+
+
+def is_subsequence(l1: List[StringLike], l2: List[StringLike]) -> bool:
+    """checks if l1 is a subsequence of l2"""
+    i = 0
+    for x in l2:
+        if l1[i] == x:
+            i += 1
+            if i == len(l1):
+                return True
+    return False
+
+
+def unescape_json(obj: Any) -> Any:
+    def decode_str(inp):
+        """Decode \\uXXXX escapes
+
+        This decodes \\uXXXX escapes, possibly into non-BMP characters when
+        two surrogate character escapes are adjacent to each other.
+        """
+
+        # This cannot be implemented using the unicode_escape codec
+        # because that requires its input be ISO-8859-1, and we need
+        # arbitrary unicode as input.
+        def repl(m):
+            if m.group(2) is not None:
+                high = int(m.group(1), 16)
+                low = int(m.group(2), 16)
+                if (
+                    0xD800 <= high <= 0xDBFF
+                    and 0xDC00 <= low <= 0xDFFF
+                    and sys.maxunicode == 0x10FFFF
+                ):
+                    cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
+                    return chr(cp)
+                else:
+                    return chr(high) + chr(low)
+            else:
+                return chr(int(m.group(1), 16))
+
+        return _surrogateRe.sub(repl, inp)
+
+    if isinstance(obj, dict):
+        return {decode_str(k): unescape_json(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [unescape_json(x) for x in obj]
+    elif isinstance(obj, text_type):
+        return decode_str(obj)
+    else:
+        return obj
+
+
+def lint_dat_format(
+    path: str,
+    encoding: Optional[str],
+    first_header: StringLike,
+    expected_headers: Optional[List[StringLike]] = None,
+    input_headers: Optional[Set[StringLike]] = None,
+) -> List[Dict[StringLike, StringLike]]:
+    if expected_headers is not None and first_header not in expected_headers:
+        raise ValueError("First header must be an expected header. (lint config error)")
+
+    if (
+        input_headers is not None
+        and expected_headers is not None
+        and not (set(input_headers) < set(expected_headers))
+    ):
+        raise ValueError(
+            "Input header must be a subset of expected headers. (lint config error)"
+        )
+
+    if expected_headers is not None and len(set(expected_headers)) < len(
+        expected_headers
+    ):
+        raise ValueError(
+            "Can't expect a single header multiple times. (lint config error)"
+        )
+
+    if input_headers is None:
+        input_headers = set(expected_headers)
+
+    try:
+        if encoding is not None:
+            with codecs.open(path, "r", encoding=encoding) as fp:
+                dat = fp.read()
+                parsed = parser.parse(dat, first_header)
+        else:
+            with open(path, "rb") as fp:
+                dat = fp.read()
+                parsed = parser.parse(dat, first_header)
+    except NoParseError as e:
+        print("Parse error in {}, {}".format(path, e))
+        return
+
+    seen_items = {}
+
+    for item in parsed:
+        # Check we don't have duplicate headers within one item.
+        headers = Counter(x[0] for x in item.data)
+        headers.subtract(set(headers.elements()))  # remove one instance of each
+        for header in set(headers.elements()):
+            c = headers[header]
+            print(
+                f"Duplicate header {header!r} occurs {c+1} times in one item in {path} at line {item.lineno}"
+            )
+
+        item_dict = dict(item.data)
+
+        # Check we only have expected headers.
+        if expected_headers is not None:
+            if not is_subsequence(
+                list(item_dict.keys()),
+                expected_headers,
+            ):
+                unexpected = item_dict.keys()
+                print(
+                    f"Unexpected item headings in {list(unexpected)!r} in {path} at line {item.lineno}"
+                )
+
+        # Check for duplicated items.
+        if input_headers is not None:
+            found_input = set()
+            for input_header in input_headers:
+                found_input.add((input_header, item_dict.get(input_header)))
+        else:
+            found_input = set(item_dict.items())
+
+        first_line = seen_items.setdefault(frozenset(found_input), item.lineno)
+        if first_line is not None and first_line != item.lineno:
+            print(
+                f"Duplicate item in {path} at line {item.lineno} previously seen on line {first_line}"
+            )
+
+    return [dict(x.data) for x in parsed]
+
+
+def lint_encoding_test(path: str) -> None:
+    parsed = lint_dat_format(
+        path,
+        None,
+        b"data",
+        expected_headers=[b"data", b"encoding"],
+        input_headers={b"data"},
+    )
+    if not parsed:
+        # We'll already have output if there's a parse error.
+        return
+
+    # We'd put extra linting here, if we ever have anything specific to the
+    # encoding tests here.
+
+
+def lint_encoding_tests(path: str) -> None:
+    for root, dirs, files in os.walk(path):
+        for file in sorted(files):
+            if not file.endswith(".dat"):
+                continue
+            lint_encoding_test(clean_path(join(root, file)))
+
+
+def lint_tokenizer_test(path: str) -> None:
+    all_keys = {
+        "description",
+        "input",
+        "output",
+        "initialStates",
+        "lastStartTag",
+        "ignoreErrorOrder",
+        "doubleEscaped",
+        "errors",
+    }
+    required = {"input", "output"}
+    with codecs.open(path, "r", "utf-8") as fp:
+        parsed = json.load(fp)
+    if not parsed:
+        return
+    if not isinstance(parsed, dict):
+        print("Top-level must be an object in %s" % path)
+        return
+    for test_group in parsed.values():
+        if not isinstance(test_group, list):
+            print("Test groups must be a lists in %s" % path)
+            continue
+        for test in test_group:
+            if "doubleEscaped" in test and test["doubleEscaped"] is True:
+                test = unescape_json(test)
+            keys = set(test.keys())
+            if not (required <= keys):
+                print(
+                    "missing test properties {!r} in {}".format(required - keys, path)
+                )
+            if not (keys <= all_keys):
+                print(
+                    "unknown test properties {!r} in {}".format(keys - all_keys, path)
+                )
+
+
+def lint_tokenizer_tests(path: str) -> None:
+    for root, dirs, files in os.walk(path):
+        for file in sorted(files):
+            if not file.endswith(".test"):
+                continue
+            lint_tokenizer_test(clean_path(join(root, file)))
+
+
+def lint_tree_construction_test(path: str) -> None:
+    parsed = lint_dat_format(
+        path,
+        "utf-8",
+        "data",
+        expected_headers=[
+            "data",
+            "errors",
+            "new-errors",
+            "document-fragment",
+            "script-off",
+            "script-on",
+            "document",
+        ],
+        input_headers={
+            "data",
+            "document-fragment",
+            "script-on",
+            "script-off",
+        },
+    )
+    if not parsed:
+        # We'll already have output if there's a parse error.
+        return
+
+    # We'd put extra linting here, if we ever have anything specific to the
+    # tree construction tests here.
+
+
+def lint_tree_construction_tests(path: str) -> None:
+    for root, dirs, files in os.walk(path):
+        for file in sorted(files):
+            if not file.endswith(".dat"):
+                continue
+            lint_tree_construction_test(clean_path(join(root, file)))
+
+
+def main() -> int:
+    with contextlib.redirect_stdout(io.StringIO()) as f:
+        lint_encoding_tests(join(base, "encoding"))
+        lint_tokenizer_tests(join(base, "tokenizer"))
+        lint_tree_construction_tests(join(base, "tree-construction"))
+
+    print(f.getvalue(), end="")
+    return 0 if f.getvalue() == "" else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/lint_lib/parser.py b/lint_lib/parser.py
new file mode 100644
index 00000000..d18605a6
--- /dev/null
+++ b/lint_lib/parser.py
@@ -0,0 +1,177 @@
+import re
+from typing import Callable, List, Optional, Tuple, Type, TypeVar, Union
+
+from ._vendor.funcparserlib.lexer import LexerError, Token
+from ._vendor.funcparserlib.parser import (
+    NoParseError,
+    Parser,
+    _Tuple,
+    finished,
+    many,
+    pure,
+    skip,
+    some,
+    tok,
+)
+
+StringLike = TypeVar("StringLike", str, bytes)
+
+
+class Test:
+    def __init__(
+        self, data: List[Tuple[StringLike, StringLike]], lineno: Optional[int] = None
+    ) -> None:
+        self.data = data
+        self.lineno = lineno
+
+
+def _make_tokenizer(specs: List[Tuple[str, Tuple[StringLike]]]) -> Callable:
+    # Forked from upstream funcparserlib.lexer to fix #46
+    def compile_spec(spec):
+        name, args = spec
+        return name, re.compile(*args)
+
+    compiled = [compile_spec(s) for s in specs]
+
+    def match_specs(specs, s, i, position):
+        if isinstance(s, str):
+            lf = "\n"
+        else:
+            lf = b"\n"
+        line, pos = position
+        for type, regexp in specs:
+            m = regexp.match(s, i)
+            if m is not None:
+                value = m.group()
+                nls = value.count(lf)
+                n_line = line + nls
+                if nls == 0:
+                    n_pos = pos + len(value)
+                else:
+                    n_pos = len(value) - value.rfind(lf) - 1
+                return Token(type, value, (line, pos + 1), (n_line, n_pos))
+        else:
+            errline = s.splitlines()[line - 1]
+            raise LexerError((line, pos + 1), errline)
+
+    def f(s):
+        length = len(s)
+        line, pos = 1, 0
+        i = 0
+        while i < length:
+            t = match_specs(compiled, s, i, (line, pos))
+            yield t
+            line, pos = t.end
+            i += len(t.value)
+
+    return f
+
+
+_token_specs_u = [
+    ("HEADER", (r"[ \t]*#[^\n]*",)),
+    ("BODY", (r"[^#\n][^\n]*",)),
+    ("EOL", (r"\n",)),
+]
+
+_token_specs_b = [
+    (name, (regexp.encode("ascii"),)) for (name, (regexp,)) in _token_specs_u
+]
+
+_tokenizer_u = _make_tokenizer(_token_specs_u)
+_tokenizer_b = _make_tokenizer(_token_specs_b)
+
+
+def _many_merge(toks: _Tuple) -> List[Test]:
+    x, xs = toks
+    return [x] + xs
+
+
+def _notFollowedBy(p: Parser) -> Parser:
+    @Parser
+    def __notFollowedBy(tokens, s):
+        try:
+            p.run(tokens, s)
+        except NoParseError:
+            return skip(pure(None)).run(tokens, s)
+        else:
+            raise NoParseError("is followed by", s)
+
+    __notFollowedBy.name = "(notFollowedBy {})".format(p)
+    return __notFollowedBy
+
+
+def _trim_prefix(s: StringLike, prefix: StringLike) -> StringLike:
+    if s.startswith(prefix):
+        return s[len(prefix) :]
+    else:
+        return s
+
+
+def _make_test(result: _Tuple) -> Test:
+    first, rest = result
+    (first_header, first_lineno), first_body = first
+    return Test([(first_header, first_body)] + rest, lineno=first_lineno)
+
+
+def _parser(
+    tokens: List[Token],
+    new_test_header: StringLike,
+    tok_type: Union[Type[str], Type[bytes]],
+) -> List[Test]:
+    if tok_type is str:
+        header_prefix = "#"
+    elif tok_type is bytes:
+        header_prefix = b"#"
+    else:
+        assert False, "unreachable"
+
+    first_header = (
+        some(
+            lambda tok: tok.type == "HEADER"
+            and tok.value == header_prefix + new_test_header
+        )
+        >> (
+            lambda x: (
+                _trim_prefix(x.value, header_prefix),
+                x.start[0] if x.start is not None else None,
+            )
+        )
+    ) + skip(tok("EOL"))
+
+    header = (
+        some(
+            lambda tok: tok.type == "HEADER"
+            and tok.value != header_prefix + new_test_header
+        )
+        >> (lambda x: _trim_prefix(x.value, header_prefix))
+    ) + skip(tok("EOL"))
+
+    body = tok("BODY") + tok("EOL") >> (lambda x: x[0] + x[1])
+    empty = tok("EOL")
+
+    actual_body = many(body | (empty + skip(_notFollowedBy(first_header)))) >> (
+        lambda xs: tok_type().join(xs)[:-1]
+    )
+
+    first_segment = first_header + actual_body >> tuple
+    rest_segment = header + actual_body >> tuple
+
+    test = first_segment + many(rest_segment) >> _make_test
+
+    tests = (test + many(skip(empty) + test)) >> _many_merge
+
+    toplevel = tests + skip(finished)
+
+    return toplevel.parse(tokens)
+
+
+def parse(s: StringLike, new_test_header: StringLike) -> List[Test]:
+    if type(s) != type(new_test_header):
+        raise TypeError("s and new_test_header must have same type")
+
+    if isinstance(s, str):
+        return _parser(list(_tokenizer_u(s)), new_test_header, str)
+    elif isinstance(s, bytes):
+        return _parser(list(_tokenizer_b(s)), new_test_header, bytes)
+    else:
+        raise TypeError("s must be unicode or bytes object")
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..a68f7874
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[tool.vendoring]
+destination = "lint_lib/_vendor/"
+requirements = "lint_lib/_vendor/vendor.txt"
+namespace = "lint_lib._vendor"
+
+protected-files = ["__init__.py", "vendor.txt"]
+patches-dir = "lint_lib/_vendor-patches"
diff --git a/serializer/core.test b/serializer/core.test
index c0b4222d..a6fa0754 100644
--- a/serializer/core.test
+++ b/serializer/core.test
@@ -112,12 +112,12 @@
  "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
 },
 
-{"description": "HTML 4.01 DOCTYPE without system identifer",
+{"description": "HTML 4.01 DOCTYPE without system identifier",
  "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN"]],
  "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
 },
 
-{"description": "IBM DOCTYPE without public identifer",
+{"description": "IBM DOCTYPE without public identifier",
  "input": [["Doctype", "html",  "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
  "expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
 }
diff --git a/tokenizer/contentModelFlags.test b/tokenizer/contentModelFlags.test
index 5197b68e..9cf7c8bd 100644
--- a/tokenizer/contentModelFlags.test
+++ b/tokenizer/contentModelFlags.test
@@ -6,6 +6,12 @@
 "input":"<head>&body;",
 "output":[["Character", "<head>&body;"]]},
 
+{"description":"PLAINTEXT with seeming close tag",
+"initialStates":["PLAINTEXT state"],
+"lastStartTag":"plaintext",
+"input":"</plaintext>&body;",
+"output":[["Character", "</plaintext>&body;"]]},
+
 {"description":"End tag closing RCDATA or RAWTEXT",
 "initialStates":["RCDATA state", "RAWTEXT state"],
 "lastStartTag":"xmp",
diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test
index b17a5df5..1a0824d7 100644
--- a/tokenizer/domjs.test
+++ b/tokenizer/domjs.test
@@ -25,7 +25,7 @@
             ]
         },
         {
-            "description":"NUL in RCDATA, RAWTEXT, PLAINTEXT and Script data",
+            "description":"Raw NUL replacement",
             "doubleEscaped":true,
             "initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"],
             "input":"\\u0000",
@@ -34,6 +34,13 @@
                 { "code": "unexpected-null-character", "line": 1, "col": 1 }
             ]
         },
+        {
+            "description":"NUL in CDATA section",
+            "doubleEscaped":true,
+            "initialStates":["CDATA section state"],
+            "input":"\\u0000]]>",
+            "output":[["Character", "\\u0000"]]
+        },
         {
            "description":"NUL in script HTML comment",
            "doubleEscaped":true,
@@ -112,20 +119,95 @@
                { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 }
            ]
         },
+        {
+            "description":"Dash in script HTML comment",
+            "initialStates":["Script data state"],
+            "input":"<!-- - -->",
+            "output":[["Character", "<!-- - -->"]]
+        },
+        {
+            "description":"Dash less-than in script HTML comment",
+            "initialStates":["Script data state"],
+            "input":"<!-- -< -->",
+            "output":[["Character", "<!-- -< -->"]]
+        },
+        {
+            "description":"Dash at end of script HTML comment",
+            "initialStates":["Script data state"],
+            "input":"<!--test--->",
+            "output":[["Character", "<!--test--->"]]
+        },
+        {
+            "description":"</script> in script HTML comment",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- </script> --></script>",
+            "output":[["Character", "<!-- "], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"</script> in script HTML comment - double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- <script></script> --></script>",
+            "output":[["Character", "<!-- <script></script> -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"</script> in script HTML comment - double escaped with nested <script>",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- <script><script></script></script> --></script>",
+            "output":[["Character", "<!-- <script><script></script>"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"</script> in script HTML comment - double escaped with abrupt end",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!-- <script>--></script> --></script>",
+            "output":[["Character", "<!-- <script>-->"], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]]
+        },
+        {
+            "description":"Incomplete start tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<scrip></script>-->",
+            "output":[["Character", "<!--<scrip>"], ["EndTag", "script"], ["Character", "-->"]]
+        },
+        {
+            "description":"Unclosed start tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<script</script>-->",
+            "output":[["Character", "<!--<script"], ["EndTag", "script"], ["Character", "-->"]]
+        },
+        {
+            "description":"Incomplete end tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<script></scrip>-->",
+            "output":[["Character", "<!--<script></scrip>-->"]]
+        },
+        {
+            "description":"Unclosed end tag in script HTML comment double escaped",
+            "initialStates":["Script data state"],
+            "lastStartTag":"script",
+            "input":"<!--<script></script-->",
+            "output":[["Character", "<!--<script></script-->"]]
+        },
         {
             "description":"leading U+FEFF must pass through",
+            "initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"],
             "doubleEscaped":true,
             "input":"\\uFEFFfoo\\uFEFFbar",
             "output":[["Character", "\\uFEFFfoo\\uFEFFbar"]]
         },
         {
-            "description":"Non BMP-charref in in RCDATA",
+            "description":"Non BMP-charref in RCDATA",
             "initialStates":["RCDATA state"],
             "input":"&NotEqualTilde;",
             "output":[["Character", "\u2242\u0338"]]
         },
         {
-            "description":"Bad charref in in RCDATA",
+            "description":"Bad charref in RCDATA",
             "initialStates":["RCDATA state"],
             "input":"&NotEqualTild;",
             "output":[["Character", "&NotEqualTild;"]],
@@ -134,36 +216,36 @@
             ]
         },
         {
-            "description":"lowercase endtags in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "description":"lowercase endtags",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
             "lastStartTag":"xmp",
             "input":"</XMP>",
             "output":[["EndTag","xmp"]]
         },
         {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "description":"bad endtag (space before name)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
             "lastStartTag":"xmp",
             "input":"</ XMP>",
             "output":[["Character","</ XMP>"]]
         },
         {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "description":"bad endtag (not matching last start tag)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
             "lastStartTag":"xmp",
             "input":"</xm>",
             "output":[["Character","</xm>"]]
         },
         {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "description":"bad endtag (without close bracket)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
             "lastStartTag":"xmp",
             "input":"</xm ",
             "output":[["Character","</xm "]]
         },
         {
-            "description":"bad endtag in RCDATA and RAWTEXT",
-            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "description":"bad endtag (trailing solidus)",
+            "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"],
             "lastStartTag":"xmp",
             "input":"</xm/",
             "output":[["Character","</xm/"]]
@@ -200,13 +282,54 @@
         },
         {
             "description":"CDATA content",
-            "input":"foo&bar",
+            "input":"foo&#32;]]>",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo&#32;"]]
+        },
+        {
+            "description":"CDATA followed by HTML content",
+            "input":"foo&#32;]]>&#32;",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo&#32; "]]
+        },
+        {
+            "description":"CDATA with extra bracket",
+            "input":"foo]]]>",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo]"]]
+        },
+        {
+            "description":"CDATA without end marker",
+            "input":"foo",
             "initialStates":["CDATA section state"],
-            "output":[["Character", "foo&bar"]],
+            "output":[["Character", "foo"]],
             "errors":[
-                { "code": "eof-in-cdata", "line": 1, "col": 8 }
+                { "code": "eof-in-cdata", "line": 1, "col": 4 }
             ]
+        },
+        {
+            "description":"CDATA with single bracket ending",
+            "input":"foo]",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo]"]],
+            "errors":[
+                { "code": "eof-in-cdata", "line": 1, "col": 5 }
+            ]
+        },
+        {
+            "description":"CDATA with two brackets ending",
+            "input":"foo]]",
+            "initialStates":["CDATA section state"],
+            "output":[["Character", "foo]]"]],
+            "errors":[
+                { "code": "eof-in-cdata", "line": 1, "col": 6 }
+            ]
+        },
+        {
+            "description": "HTML tag in script data",
+            "input": "<b>hello world</b>",
+            "initialStates": ["Script data state"],
+            "output": [["Character", "<b>hello world</b>"]]
         }
-
     ]
 }
diff --git a/tokenizer/entities.test b/tokenizer/entities.test
index 7c514563..a6469cd0 100644
--- a/tokenizer/entities.test
+++ b/tokenizer/entities.test
@@ -1,13 +1,47 @@
 {"tests": [
 
-{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
+{"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a=\"&noti;\">",
+"output": [["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.",
+"input":"<h a=\"&lang=\">",
+"output": [["StartTag", "h", {"a": "&lang="}]]},
+
+{"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.",
+"input":"<h a=\"&not=\">",
+"output": [["StartTag", "h", {"a": "&not="}]]},
+
+{"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.",
 "input":"<h a='&noti;'>",
 "output": [["StartTag", "h", {"a": "&noti;"}]]},
 
-{"description": "Entity name followed by the equals sign in an attribute value.",
+{"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.",
 "input":"<h a='&lang='>",
 "output": [["StartTag", "h", {"a": "&lang="}]]},
 
+{"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.",
+"input":"<h a='&not='>",
+"output": [["StartTag", "h", {"a": "&not="}]]},
+
+{"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a=&noti;>",
+"output": [["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.",
+"input":"<h a=&lang=>",
+"output": [["StartTag", "h", {"a": "&lang="}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 }
+]},
+
+{"description": "Valid entity name followed by the equals sign in an unquoted attribute value.",
+"input":"<h a=&not=>",
+"output": [["StartTag", "h", {"a": "&not="}]],
+"errors":[
+    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 }
+]},
+
 {"description": "Ambiguous ampersand.",
 "input":"&rrrraannddom;",
 "output": [["Character", "&rrrraannddom;"]],
diff --git a/tokenizer/test1.test b/tokenizer/test1.test
index 8b85050f..5323fbbe 100644
--- a/tokenizer/test1.test
+++ b/tokenizer/test1.test
@@ -102,6 +102,10 @@
 "input":"<!-- --comment -->",
 "output":[["Comment", " --comment "]]},
 
+{"description":"Comment, central less-than bang",
+"input":"<!--<!-->",
+"output":[["Comment", "<!"]]},
+
 {"description":"Unfinished comment",
 "input":"<!--comment",
 "output":[["Comment", "comment"]],
@@ -109,6 +113,13 @@
     { "code": "eof-in-comment", "line": 1, "col": 12 }
 ]},
 
+{"description":"Unfinished comment after start of nested comment",
+"input":"<!-- <!--",
+"output":[["Comment", " <!"]],
+"errors":[
+    { "code": "eof-in-comment", "line": 1, "col": 10 }
+]},
+
 {"description":"Start of a comment",
 "input":"<!-",
 "output":[["Comment", "-"]],
@@ -123,7 +134,6 @@
     { "code": "abrupt-closing-of-empty-comment", "line": 1, "col": 5 }
 ]},
 
-
 {"description":"Short comment two",
 "input":"<!--->",
 "output":[["Comment", ""]],
@@ -135,6 +145,22 @@
  "input":"<!---->",
  "output":[["Comment", ""]]},
 
+{"description":"< in comment",
+"input":"<!-- <test-->",
+"output":[["Comment", " <test"]]},
+
+{"description":"<< in comment",
+"input":"<!--<<-->",
+"output":[["Comment", "<<"]]},
+
+{"description":"<! in comment",
+"input":"<!-- <!test-->",
+"output":[["Comment", " <!test"]]},
+
+{"description":"<!- in comment",
+"input":"<!-- <!-test-->",
+"output":[["Comment", " <!-test"]]},
+
 {"description":"Nested comment",
 "input":"<!-- <!--test-->",
 "output":[["Comment", " <!--test"]],
@@ -142,6 +168,78 @@
     { "code": "nested-comment", "line": 1, "col": 10 }
 ]},
 
+{"description":"Nested comment with extra <",
+"input":"<!-- <<!--test-->",
+"output":[["Comment", " <<!--test"]],
+"errors":[
+    { "code": "nested-comment", "line": 1, "col": 11 }
+]},
+
+{"description":"< in script data",
+"initialStates":["Script data state"],
+"input":"<test-->",
+"output":[["Character", "<test-->"]]},
+
+{"description":"<! in script data",
+"initialStates":["Script data state"],
+"input":"<!test-->",
+"output":[["Character", "<!test-->"]]},
+
+{"description":"<!- in script data",
+"initialStates":["Script data state"],
+"input":"<!-test-->",
+"output":[["Character", "<!-test-->"]]},
+
+{"description":"Escaped script data",
+"initialStates":["Script data state"],
+"input":"<!--test-->",
+"output":[["Character", "<!--test-->"]]},
+
+{"description":"< in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- < test -->",
+"output":[["Character", "<!-- < test -->"]]},
+
+{"description":"</ in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- </ test -->",
+"output":[["Character", "<!-- </ test -->"]]},
+
+{"description":"Start tag in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- <test> -->",
+"output":[["Character", "<!-- <test> -->"]]},
+
+{"description":"End tag in script HTML comment",
+"initialStates":["Script data state"],
+"input":"<!-- </test> -->",
+"output":[["Character", "<!-- </test> -->"]]},
+
+{"description":"- in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script>-</script>-->",
+"output":[["Character", "<!--<script>-</script>-->"]]},
+
+{"description":"-- in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script>--</script>-->",
+"output":[["Character", "<!--<script>--</script>-->"]]},
+
+{"description":"--- in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script>---</script>-->",
+"output":[["Character", "<!--<script>---</script>-->"]]},
+
+{"description":"- spaced in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script> - </script>-->",
+"output":[["Character", "<!--<script> - </script>-->"]]},
+
+{"description":"-- spaced in script HTML comment double escaped",
+"initialStates":["Script data state"],
+"input":"<!--<script> -- </script>-->",
+"output":[["Character", "<!--<script> -- </script>-->"]]},
+
 {"description":"Ampersand EOF",
 "input":"&",
 "output":[["Character", "&"]]},
diff --git a/tokenizer/test2.test b/tokenizer/test2.test
index 521694ca..c29e4c31 100644
--- a/tokenizer/test2.test
+++ b/tokenizer/test2.test
@@ -50,6 +50,10 @@
 "input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
 "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
 
+{"description":"DOCTYPE with single-quoted systemId",
+"input":"<!DOCTYPE html SYSTEM '-//W3C//DTD HTML Transitional 4.01//EN'>",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
 {"description":"DOCTYPE with publicId and systemId",
 "input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
 "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
@@ -186,7 +190,7 @@
     { "code": "unexpected-question-mark-instead-of-tag-name", "line": 1, "col": 2 }
 ]},
 
-{"description":"A bogus comment stops at >, even if preceeded by two dashes",
+{"description":"A bogus comment stops at >, even if preceded by two dashes",
 "input":"<?foo-->",
 "output":[["Comment", "?foo--"]],
 "errors":[
diff --git a/tokenizer/test3.test b/tokenizer/test3.test
index 721f21de..901a581e 100644
--- a/tokenizer/test3.test
+++ b/tokenizer/test3.test
@@ -1,84 +1,451 @@
 {"tests": [
 
-{"description":"",
+{"description":"[empty]",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"",
 "output":[]},
 
+{"description":"[empty]",
+"initialStates":["CDATA section state"],
+"input":"",
+"output":[],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 1 }
+]},
+
 {"description":"\\u0009",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"\u0009",
 "output":[["Character", "\u0009"]]},
 
+{"description":"\\u0009",
+"initialStates":["CDATA section state"],
+"input":"\u0009",
+"output":[["Character", "\u0009"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"\\u000A",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"\u000A",
 "output":[["Character", "\u000A"]]},
 
+{"description":"\\u000A",
+"initialStates":["CDATA section state"],
+"input":"\u000A",
+"output":[["Character", "\u000A"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 2, "col": 1 }
+]},
+
 {"description":"\\u000B",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"\u000B",
 "output":[["Character", "\u000B"]],
 "errors":[
     { "code": "control-character-in-input-stream", "line": 1, "col": 1 }
 ]},
 
+{"description":"\\u000B",
+"initialStates":["CDATA section state"],
+"input":"\u000B",
+"output":[["Character", "\u000B"]],
+"errors":[
+    { "code": "control-character-in-input-stream", "line": 1, "col": 1 },
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"\\u000C",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"\u000C",
 "output":[["Character", "\u000C"]]},
 
+{"description":"\\u000C",
+"initialStates":["CDATA section state"],
+"input":"\u000C",
+"output":[["Character", "\u000C"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":" ",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":" ",
 "output":[["Character", " "]]},
 
+{"description":" ",
+"initialStates":["CDATA section state"],
+"input":" ",
+"output":[["Character", " "]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"!",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"!",
 "output":[["Character", "!"]]},
 
+{"description":"!",
+"initialStates":["CDATA section state"],
+"input":"!",
+"output":[["Character", "!"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"\"",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"\"",
 "output":[["Character", "\""]]},
 
+{"description":"\"",
+"initialStates":["CDATA section state"],
+"input":"\"",
+"output":[["Character", "\""]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"%",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"%",
 "output":[["Character", "%"]]},
 
+{"description":"%",
+"initialStates":["CDATA section state"],
+"input":"%",
+"output":[["Character", "%"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"&",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"&",
 "output":[["Character", "&"]]},
 
+{"description":"&",
+"initialStates":["CDATA section state"],
+"input":"&",
+"output":[["Character", "&"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"'",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"'",
 "output":[["Character", "'"]]},
 
+{"description":"'",
+"initialStates":["CDATA section state"],
+"input":"'",
+"output":[["Character", "'"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":",",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":",",
 "output":[["Character", ","]]},
 
+{"description":",",
+"initialStates":["CDATA section state"],
+"input":",",
+"output":[["Character", ","]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"-",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"-",
 "output":[["Character", "-"]]},
 
+{"description":"-",
+"initialStates":["CDATA section state"],
+"input":"-",
+"output":[["Character", "-"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":".",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":".",
 "output":[["Character", "."]]},
 
+{"description":".",
+"initialStates":["CDATA section state"],
+"input":".",
+"output":[["Character", "."]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"/",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"/",
 "output":[["Character", "/"]]},
 
+{"description":"/",
+"initialStates":["CDATA section state"],
+"input":"/",
+"output":[["Character", "/"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"0",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"0",
 "output":[["Character", "0"]]},
 
+{"description":"0",
+"initialStates":["CDATA section state"],
+"input":"0",
+"output":[["Character", "0"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"1",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"1",
 "output":[["Character", "1"]]},
 
+{"description":"1",
+"initialStates":["CDATA section state"],
+"input":"1",
+"output":[["Character", "1"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"9",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"9",
 "output":[["Character", "9"]]},
 
+{"description":"9",
+"initialStates":["CDATA section state"],
+"input":"9",
+"output":[["Character", "9"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":";",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":";",
 "output":[["Character", ";"]]},
 
+{"description":";",
+"initialStates":["CDATA section state"],
+"input":";",
+"output":[["Character", ";"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
+{"description":";=",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";=",
+"output":[["Character", ";="]]},
+
+{"description":";=",
+"initialStates":["CDATA section state"],
+"input":";=",
+"output":[["Character", ";="]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";>",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";>",
+"output":[["Character", ";>"]]},
+
+{"description":";>",
+"initialStates":["CDATA section state"],
+"input":";>",
+"output":[["Character", ";>"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";?",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";?",
+"output":[["Character", ";?"]]},
+
+{"description":";?",
+"initialStates":["CDATA section state"],
+"input":";?",
+"output":[["Character", ";?"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";@",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";@",
+"output":[["Character", ";@"]]},
+
+{"description":";@",
+"initialStates":["CDATA section state"],
+"input":";@",
+"output":[["Character", ";@"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";A",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";A",
+"output":[["Character", ";A"]]},
+
+{"description":";A",
+"initialStates":["CDATA section state"],
+"input":";A",
+"output":[["Character", ";A"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";B",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";B",
+"output":[["Character", ";B"]]},
+
+{"description":";B",
+"initialStates":["CDATA section state"],
+"input":";B",
+"output":[["Character", ";B"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";Y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";Y",
+"output":[["Character", ";Y"]]},
+
+{"description":";Y",
+"initialStates":["CDATA section state"],
+"input":";Y",
+"output":[["Character", ";Y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";Z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";Z",
+"output":[["Character", ";Z"]]},
+
+{"description":";Z",
+"initialStates":["CDATA section state"],
+"input":";Z",
+"output":[["Character", ";Z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";`",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";`",
+"output":[["Character", ";`"]]},
+
+{"description":";`",
+"initialStates":["CDATA section state"],
+"input":";`",
+"output":[["Character", ";`"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";a",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";a",
+"output":[["Character", ";a"]]},
+
+{"description":";a",
+"initialStates":["CDATA section state"],
+"input":";a",
+"output":[["Character", ";a"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";b",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";b",
+"output":[["Character", ";b"]]},
+
+{"description":";b",
+"initialStates":["CDATA section state"],
+"input":";b",
+"output":[["Character", ";b"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";y",
+"output":[["Character", ";y"]]},
+
+{"description":";y",
+"initialStates":["CDATA section state"],
+"input":";y",
+"output":[["Character", ";y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";z",
+"output":[["Character", ";z"]]},
+
+{"description":";z",
+"initialStates":["CDATA section state"],
+"input":";z",
+"output":[["Character", ";z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";{",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";{",
+"output":[["Character", ";{"]]},
+
+{"description":";{",
+"initialStates":["CDATA section state"],
+"input":";{",
+"output":[["Character", ";{"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]},
+
+{"description":";\\uDBC0\\uDC00",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
+"input":";\uDBC0\uDC00",
+"output":[["Character", ";\uDBC0\uDC00"]]},
+
+{"description":";\\uDBC0\\uDC00",
+"initialStates":["CDATA section state"],
+"input":";\uDBC0\uDC00",
+"output":[["Character", ";\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 4 }
+]},
+
 {"description":"<",
 "input":"<",
 "output":[["Character", "<"]],
@@ -958,28 +1325,28 @@
 "input":"<!----! >",
 "output":[["Comment", "--! >"]],
 "errors":[
-    { "code": "eof-in-comment", "line": 1, "col": 9 }
+    { "code": "eof-in-comment", "line": 1, "col": 10 }
 ]},
 
 {"description":"<!----!LF>",
 "input":"<!----!\n>",
 "output":[["Comment", "--!\n>"]],
 "errors":[
-    { "code": "eof-in-comment", "line": 1, "col": 9 }
+    { "code": "eof-in-comment", "line": 2, "col": 2 }
 ]},
 
 {"description":"<!----!CR>",
 "input":"<!----!\r>",
 "output":[["Comment", "--!\n>"]],
 "errors":[
-    { "code": "eof-in-comment", "line": 1, "col": 9 }
+    { "code": "eof-in-comment", "line": 2, "col": 2 }
 ]},
 
 {"description":"<!----!CRLF>",
 "input":"<!----!\r\n>",
 "output":[["Comment", "--!\n>"]],
 "errors":[
-    { "code": "eof-in-comment", "line": 1, "col": 9 }
+    { "code": "eof-in-comment", "line": 2, "col": 2 }
 ]},
 
 {"description":"<!----!a",
@@ -10669,63 +11036,198 @@
 ]},
 
 {"description":"=",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"=",
 "output":[["Character", "="]]},
 
+{"description":"=",
+"initialStates":["CDATA section state"],
+"input":"=",
+"output":[["Character", "="]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":">",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":">",
 "output":[["Character", ">"]]},
 
+{"description":">",
+"initialStates":["CDATA section state"],
+"input":">",
+"output":[["Character", ">"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"?",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"?",
 "output":[["Character", "?"]]},
 
+{"description":"?",
+"initialStates":["CDATA section state"],
+"input":"?",
+"output":[["Character", "?"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"@",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"@",
 "output":[["Character", "@"]]},
 
+{"description":"@",
+"initialStates":["CDATA section state"],
+"input":"@",
+"output":[["Character", "@"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"A",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"A",
 "output":[["Character", "A"]]},
 
+{"description":"A",
+"initialStates":["CDATA section state"],
+"input":"A",
+"output":[["Character", "A"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"B",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"B",
 "output":[["Character", "B"]]},
 
+{"description":"B",
+"initialStates":["CDATA section state"],
+"input":"B",
+"output":[["Character", "B"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"Y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"Y",
 "output":[["Character", "Y"]]},
 
+{"description":"Y",
+"initialStates":["CDATA section state"],
+"input":"Y",
+"output":[["Character", "Y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"Z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"Z",
 "output":[["Character", "Z"]]},
 
+{"description":"Z",
+"initialStates":["CDATA section state"],
+"input":"Z",
+"output":[["Character", "Z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"`",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"`",
 "output":[["Character", "`"]]},
 
+{"description":"`",
+"initialStates":["CDATA section state"],
+"input":"`",
+"output":[["Character", "`"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"a",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"a",
 "output":[["Character", "a"]]},
 
+{"description":"a",
+"initialStates":["CDATA section state"],
+"input":"a",
+"output":[["Character", "a"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"b",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"b",
 "output":[["Character", "b"]]},
 
+{"description":"b",
+"initialStates":["CDATA section state"],
+"input":"b",
+"output":[["Character", "b"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"y",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"y",
 "output":[["Character", "y"]]},
 
+{"description":"y",
+"initialStates":["CDATA section state"],
+"input":"y",
+"output":[["Character", "y"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"z",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"z",
 "output":[["Character", "z"]]},
 
+{"description":"z",
+"initialStates":["CDATA section state"],
+"input":"z",
+"output":[["Character", "z"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"{",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"{",
 "output":[["Character", "{"]]},
 
+{"description":"{",
+"initialStates":["CDATA section state"],
+"input":"{",
+"output":[["Character", "{"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 2 }
+]},
+
 {"description":"\\uDBC0\\uDC00",
+"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"],
 "input":"\uDBC0\uDC00",
-"output":[["Character", "\uDBC0\uDC00"]]}
+"output":[["Character", "\uDBC0\uDC00"]]},
+
+{"description":"\\uDBC0\\uDC00",
+"initialStates":["CDATA section state"],
+"input":"\uDBC0\uDC00",
+"output":[["Character", "\uDBC0\uDC00"]],
+"errors":[
+    { "code": "eof-in-cdata", "line": 1, "col": 3 }
+]}
 
 ]}
diff --git a/tokenizer/test4.test b/tokenizer/test4.test
index 77706b72..8963c747 100644
--- a/tokenizer/test4.test
+++ b/tokenizer/test4.test
@@ -8,7 +8,7 @@
     { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 }
 ]},
 
-{"description":"",
+{"description":"< in unquoted attribute value",
 "input":"<z x=<>",
 "output":[["StartTag", "z", {"x": "<"}]],
 "errors":[
diff --git a/tree-construction/blocks.dat b/tree-construction/blocks.dat
index 5d3871ea..a1a9c752 100644
--- a/tree-construction/blocks.dat
+++ b/tree-construction/blocks.dat
@@ -2,7 +2,6 @@
 <!doctype html><p>foo<address>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “address”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -32,7 +31,6 @@
 <!doctype html><p>foo<article>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “article”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -62,7 +60,6 @@
 <!doctype html><p>foo<aside>bar<p>baz
 #errors
 (1,37): expected-closing-tag-but-got-eof
-28: Unclosed element “aside”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -92,7 +89,6 @@
 <!doctype html><p>foo<blockquote>bar<p>baz
 #errors
 (1,42): expected-closing-tag-but-got-eof
-33: Unclosed element “blockquote”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -122,7 +118,6 @@
 <!doctype html><p>foo<center>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “center”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -152,7 +147,6 @@
 <!doctype html><p>foo<details>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “details”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -182,7 +176,6 @@
 <!doctype html><p>foo<dialog>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “dialog”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -212,7 +205,6 @@
 <!doctype html><p>foo<dir>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “dir”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -242,7 +234,6 @@
 <!doctype html><p>foo<div>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “div”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -272,7 +263,6 @@
 <!doctype html><p>foo<dl>bar<p>baz
 #errors
 (1,34): expected-closing-tag-but-got-eof
-25: Unclosed element “dl”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -302,7 +292,6 @@
 <!doctype html><p>foo<fieldset>bar<p>baz
 #errors
 (1,40): expected-closing-tag-but-got-eof
-31: Unclosed element “fieldset”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -332,7 +321,6 @@
 <!doctype html><p>foo<figcaption>bar<p>baz
 #errors
 (1,42): expected-closing-tag-but-got-eof
-33: Unclosed element “figcaption”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -362,7 +350,6 @@
 <!doctype html><p>foo<figure>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “figure”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -392,7 +379,6 @@
 <!doctype html><p>foo<footer>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “footer”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -422,7 +408,6 @@
 <!doctype html><p>foo<header>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “header”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -452,7 +437,6 @@
 <!doctype html><p>foo<hgroup>bar<p>baz
 #errors
 (1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “hgroup”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -482,7 +466,6 @@
 <!doctype html><p>foo<listing>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “listing”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -512,7 +495,6 @@
 <!doctype html><p>foo<menu>bar<p>baz
 #errors
 (1,36): expected-closing-tag-but-got-eof
-27: Unclosed element “menu”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -542,7 +524,6 @@
 <!doctype html><p>foo<nav>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “nav”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -572,7 +553,6 @@
 <!doctype html><p>foo<ol>bar<p>baz
 #errors
 (1,34): expected-closing-tag-but-got-eof
-25: Unclosed element “ol”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -602,7 +582,6 @@
 <!doctype html><p>foo<pre>bar<p>baz
 #errors
 (1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “pre”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -632,7 +611,6 @@
 <!doctype html><p>foo<section>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “section”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -662,7 +640,6 @@
 <!doctype html><p>foo<summary>bar<p>baz
 #errors
 (1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “summary”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -692,7 +669,6 @@
 <!doctype html><p>foo<ul>bar<p>baz
 #errors
 (1,34): expected-closing-tag-but-got-eof
-25: Unclosed element “ul”.
 #document
 | <!DOCTYPE html>
 | <html>
diff --git a/tree-construction/comments01.dat b/tree-construction/comments01.dat
index fa79c2b1..4b9ff957 100644
--- a/tree-construction/comments01.dat
+++ b/tree-construction/comments01.dat
@@ -29,8 +29,9 @@ FOO<!-- BAR --!>BAZ
 FOO<!-- BAR --! >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
+(1:21) eof-in-comment
 #new-errors
-(1:20) eof-in-comment
+(1:21) eof-in-comment
 #document
 | <html>
 |   <head>
@@ -43,8 +44,9 @@ FOO<!-- BAR --!
 >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
+(2:5) eof-in-comment
 #new-errors
-(1:20) eof-in-comment
+(2:5) eof-in-comment
 #document
 | <html>
 |   <head>
@@ -57,7 +59,6 @@ FOO<!-- BAR --!
 FOO<!-- BAR --   >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
 (1,21): eof-in-comment
 #new-errors
 (1:22) eof-in-comment
@@ -72,8 +73,6 @@ FOO<!-- BAR --   >BAZ
 FOO<!-- BAR -- <QUX> -- MUX -->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
 #document
 | <html>
 |   <head>
@@ -86,8 +85,6 @@ FOO<!-- BAR -- <QUX> -- MUX -->BAZ
 FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
 (1,31): unexpected-bang-after-double-dash-in-comment
 #new-errors
 (1:32) incorrectly-closed-comment
@@ -103,9 +100,6 @@ FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
 FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-(1,31): unexpected-char-in-comment
 (1,35): eof-in-comment
 #new-errors
 (1:36) eof-in-comment
@@ -202,7 +196,6 @@ FOO<!-->BAZ
 FOO<!----->BAZ
 #errors
 (1,3): expected-doctype-but-got-chars
-(1,10): unexpected-dash-after-double-dash-in-comment
 #document
 | <html>
 |   <head>
diff --git a/tree-construction/doctype01.dat b/tree-construction/doctype01.dat
index c845becf..9efdaf70 100644
--- a/tree-construction/doctype01.dat
+++ b/tree-construction/doctype01.dat
@@ -34,7 +34,6 @@
 #data
 <!DOCTYPE>Hello
 #errors
-(1,9): need-space-after-doctype
 (1,10): expected-doctype-name-but-got-right-bracket
 (1,10): unknown-doctype
 #new-errors
@@ -337,6 +336,7 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd">Hello
 #errors
+(2,43): unknown-doctype
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | <html>
@@ -421,6 +421,7 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
 #errors
 (1,50): unexpected-char-in-doctype
+(1,89): unknown-doctype
 #new-errors
 (1:50) missing-whitespace-between-doctype-public-and-system-identifiers
 #document
@@ -433,6 +434,7 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
 #errors
 (1,50): unexpected-char-in-doctype
+(1,89): unknown-doctype
 #new-errors
 (1:50) missing-whitespace-between-doctype-public-and-system-identifiers
 #document
@@ -446,6 +448,7 @@
 #errors
 (1,21): unexpected-char-in-doctype
 (1,49): unexpected-char-in-doctype
+(1,88): unknown-doctype
 #new-errors
 (1:22) missing-whitespace-after-doctype-public-keyword
 (1:49) missing-whitespace-between-doctype-public-and-system-identifiers
@@ -460,6 +463,7 @@
 #errors
 (1,21): unexpected-char-in-doctype
 (1,49): unexpected-char-in-doctype
+(1,88): unknown-doctype
 #new-errors
 (1:22) missing-whitespace-after-doctype-public-keyword
 (1:49) missing-whitespace-between-doctype-public-and-system-identifiers
diff --git a/tree-construction/entities02.dat b/tree-construction/entities02.dat
index 0c6e898c..74965a35 100644
--- a/tree-construction/entities02.dat
+++ b/tree-construction/entities02.dat
@@ -45,7 +45,6 @@
 #data
 <div bar="ZZ&gt=YY"></div>
 #errors
-(1,15): named-entity-without-semicolon
 (1,20): expected-doctype-but-got-start-tag
 #document
 | <html>
@@ -204,7 +203,6 @@
 #data
 <div bar="ZZ&pound=23"></div>
 #errors
-(1,18): named-entity-without-semicolon
 (1,23): expected-doctype-but-got-start-tag
 #document
 | <html>
@@ -299,6 +297,8 @@
 #data
 <div>ZZ&AElig=</div>
 #errors
+(1,5): expected-doctype-but-got-start-tag
+(1:14) missing-semicolon-after-character-reference
 #new-errors
 (1:14) missing-semicolon-after-character-reference
 #document
diff --git a/tree-construction/foreign-fragment.dat b/tree-construction/foreign-fragment.dat
index c81ae817..e562c6b8 100644
--- a/tree-construction/foreign-fragment.dat
+++ b/tree-construction/foreign-fragment.dat
@@ -3,11 +3,10 @@
 #errors
 6: HTML start tag “nobr” in a foreign namespace context.
 7: End of file seen and there were open elements.
-6: Unclosed element “nobr”.
 #document-fragment
 svg path
 #document
-| <svg nobr>
+| <nobr>
 |   "X"
 
 #data
@@ -17,7 +16,7 @@ svg path
 #document-fragment
 svg path
 #document
-| <svg font>
+| <font>
 |   color=""
 | "X"
 
@@ -35,7 +34,6 @@ svg path
 #errors
 10: End tag “path” did not match the name of the current open element (“g”).
 11: End of file seen and there were open elements.
-3: Unclosed element “g”.
 #document-fragment
 svg path
 #document
@@ -173,7 +171,6 @@ math ms
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
-51: Unclosed element “ms”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
@@ -216,7 +213,6 @@ math ms
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
-51: Unclosed element “mn”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
@@ -259,7 +255,6 @@ math mn
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
-51: Unclosed element “mo”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
@@ -302,7 +297,6 @@ math mo
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
-51: Unclosed element “mi”.
 #new-errors
 (1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
@@ -345,7 +339,6 @@ math mi
 #errors
 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
 52: End of file seen and there were open elements.
-51: Unclosed element “mtext”.
 #new-errors
 (1:44-1:52) non-void-html-element-start-tag-with-trailing-solidus
 #document-fragment
@@ -390,7 +383,7 @@ math mtext
 #document-fragment
 math annotation-xml
 #document
-| <math div>
+| <div>
 
 #data
 <figure></figure>
@@ -407,7 +400,7 @@ math annotation-xml
 #document-fragment
 math math
 #document
-| <math div>
+| <div>
 
 #data
 <figure></figure>
@@ -461,12 +454,11 @@ svg desc
 <div><h1>X</h1></div>
 #errors
 5: HTML start tag “div” in a foreign namespace context.
-9: HTML start tag “h1” in a foreign namespace context.
 #document-fragment
 svg svg
 #document
-| <svg div>
-|   <svg h1>
+| <div>
+|   <h1>
 |     "X"
 
 #data
@@ -476,7 +468,7 @@ svg svg
 #document-fragment
 svg svg
 #document
-| <svg div>
+| <div>
 
 #data
 <div></div>
@@ -486,14 +478,6 @@ svg desc
 #document
 | <div>
 
-#data
-<figure></figure>
-#errors
-#document-fragment
-svg desc
-#document
-| <figure>
-
 #data
 <plaintext><foo>
 #errors
@@ -557,3 +541,105 @@ svg desc
 svg desc
 #document
 | "X"
+
+#data
+<svg><p>
+#errors
+8: HTML start tag “p” in a foreign namespace context.
+#document-fragment
+div
+#document
+| <svg svg>
+| <p>
+
+#data
+<p>
+#errors
+3: HTML start tag “p” in a foreign namespace context.
+#document-fragment
+svg svg
+#document
+| <p>
+
+#data
+<svg></p><foo>
+#errors
+9: HTML end tag “p” in a foreign namespace context.
+(1:6) Unexpected </p> from in body insertion mode
+(1:15) Unexpected EOF
+#document-fragment
+div
+#document
+| <svg svg>
+| <p>
+| <foo>
+
+#data
+<svg></br><foo>
+#errors
+10: HTML end tag “br” in a foreign namespace context.
+(1:6) Unexpected </br> from in body insertion mode
+(1:16) Unexpected EOF
+#document-fragment
+div
+#document
+| <svg svg>
+| <br>
+| <foo>
+
+#data
+</p><foo>
+#errors
+4: HTML end tag “p” in a foreign namespace context.
+(1:1) Unexpected </p> from in body insertion mode
+(1:10) Unexpected EOF
+#document-fragment
+svg svg
+#document
+| <p>
+| <svg foo>
+
+#data
+</br><foo>
+#errors
+5: HTML end tag “br” in a foreign namespace context.
+(1:1) Unexpected </br> from in body insertion mode
+(1:11) Unexpected EOF
+#document-fragment
+svg svg
+#document
+| <br>
+| <svg foo>
+
+#data
+<body><foo>
+#errors
+6: HTML start tag “body” in a foreign namespace context.
+(1:1) Unexpected <body> from in body insertion mode
+(1:12) Unexpected EOF
+#document-fragment
+svg svg
+#document
+| <svg foo>
+
+#data
+<p><foo>
+#errors
+3: HTML start tag “p” in a foreign namespace context.
+(1:9) Unexpected EOF
+#document-fragment
+svg svg
+#document
+| <p>
+|   <foo>
+
+#data
+<p></p><foo>
+#errors
+3: HTML start tag “p” in a foreign namespace context.
+(1:13) Unexpected EOF
+#document-fragment
+svg svg
+#document
+| <p>
+| <svg foo>
diff --git a/tree-construction/html5test-com.dat b/tree-construction/html5test-com.dat
index f7380101..48d0bf95 100644
--- a/tree-construction/html5test-com.dat
+++ b/tree-construction/html5test-com.dat
@@ -142,7 +142,6 @@
 #data
 <!--foo--bar-->
 #errors
-(1,10): unexpected-char-in-comment
 (1,15): expected-doctype-but-got-eof
 #document
 | <!-- foo--bar -->
diff --git a/tree-construction/math.dat b/tree-construction/math.dat
index ae9cd7c6..d6a8ae56 100644
--- a/tree-construction/math.dat
+++ b/tree-construction/math.dat
@@ -1,6 +1,8 @@
 #data
 <math><tr><td><mo><tr>
 #errors
+(1,22): unexpected-start-tag
+(1,23): expected-closing-tag-but-got-eof
 #document-fragment
 td
 #document
@@ -12,6 +14,9 @@ td
 #data
 <math><tr><td><mo><tr>
 #errors
+(1,6): foster-parenting-start-tag
+(1,22): expected-tr-in-table-scope
+(1,23): expected-closing-tag-but-got-eof
 #document-fragment
 tr
 #document
@@ -23,6 +28,9 @@ tr
 #data
 <math><thead><mo><tbody>
 #errors
+(1,6): foster-parenting-start-tag
+(1,24): expected-table-part-in-table-scope
+(1,25): expected-closing-tag-but-got-eof
 #document-fragment
 thead
 #document
@@ -33,6 +41,9 @@ thead
 #data
 <math><tfoot><mo><tbody>
 #errors
+(1,6): foster-parenting-start-tag
+(1,24): expected-table-part-in-table-scope
+(1,25): expected-closing-tag-but-got-eof
 #document-fragment
 tfoot
 #document
@@ -43,6 +54,9 @@ tfoot
 #data
 <math><tbody><mo><tfoot>
 #errors
+(1,6): foster-parenting-start-tag
+(1,24): expected-table-part-in-table-scope
+(1,25): expected-closing-tag-but-got-eof
 #document-fragment
 tbody
 #document
@@ -53,6 +67,9 @@ tbody
 #data
 <math><tbody><mo></table>
 #errors
+(1,6): foster-parenting-start-tag
+(1,25): unexpected-end-tag-in-math
+(1,26): expected-closing-tag-but-got-eof
 #document-fragment
 tbody
 #document
@@ -63,6 +80,9 @@ tbody
 #data
 <math><thead><mo></table>
 #errors
+(1,6): foster-parenting-start-tag
+(1,25): unexpected-end-tag-in-math
+(1,26): expected-closing-tag-but-got-eof
 #document-fragment
 tbody
 #document
@@ -73,6 +93,9 @@ tbody
 #data
 <math><tfoot><mo></table>
 #errors
+(1,6): foster-parenting-start-tag
+(1,25): unexpected-end-tag-in-math
+(1,26): expected-closing-tag-but-got-eof
 #document-fragment
 tbody
 #document
diff --git a/tree-construction/menuitem-element.dat b/tree-construction/menuitem-element.dat
index 43aa0c67..fb13c3c3 100644
--- a/tree-construction/menuitem-element.dat
+++ b/tree-construction/menuitem-element.dat
@@ -3,7 +3,6 @@
 #errors
 10: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
 10: End of file seen and there were open elements.
-10: Unclosed element “menuitem”.
 #document
 | <html>
 |   <head>
@@ -24,7 +23,6 @@
 <!DOCTYPE html><body><menuitem>A
 #errors
 32: End of file seen and there were open elements.
-31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -37,8 +35,6 @@
 <!DOCTYPE html><body><menuitem>A<menuitem>B
 #errors
 43: End of file seen and there were open elements.
-42: Unclosed element “menuitem”.
-31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -53,7 +49,6 @@
 <!DOCTYPE html><body><menuitem>A<menu>B</menu>
 #errors
 46: End of file seen and there were open elements.
-31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -68,7 +63,6 @@
 <!DOCTYPE html><body><menuitem>A<hr>B
 #errors
 37: End of file seen and there were open elements.
-31: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -83,7 +77,6 @@
 <!DOCTYPE html><li><menuitem><li>
 #errors
 33: End tag “li” implied, but there were open elements.
-29: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -98,7 +91,6 @@
 #errors
 39: Stray end tag “menuitem”.
 40: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -112,9 +104,7 @@
 <!DOCTYPE html><p><b></p><menuitem>
 #errors
 25: End tag “p” seen, but there were open elements.
-21: Unclosed element “b”.
 35: End of file seen and there were open elements.
-35: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -129,7 +119,6 @@
 <!DOCTYPE html><menuitem><asdf></menuitem>x
 #errors
 42: End tag “menuitem” seen, but there were open elements.
-31: Unclosed element “asdf”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -184,7 +173,6 @@
 <!DOCTYPE html><option><menuitem>
 #errors
 33: End of file seen and there were open elements.
-33: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -197,7 +185,6 @@
 <!DOCTYPE html><menuitem><option>
 #errors
 33: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -210,7 +197,6 @@
 <!DOCTYPE html><menuitem></body>
 #errors
 32: End tag for  “body” seen, but there were unclosed elements.
-25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -222,7 +208,6 @@
 <!DOCTYPE html><menuitem></html>
 #errors
 32: End tag for  “html” seen, but there were unclosed elements.
-25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -234,7 +219,6 @@
 <!DOCTYPE html><menuitem><p>
 #errors
 28: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -247,7 +231,6 @@
 <!DOCTYPE html><menuitem><li>
 #errors
 29: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
 #document
 | <!DOCTYPE html>
 | <html>
diff --git a/tree-construction/namespace-sensitivity.dat b/tree-construction/namespace-sensitivity.dat
index ca35c0e7..050dca75 100644
--- a/tree-construction/namespace-sensitivity.dat
+++ b/tree-construction/namespace-sensitivity.dat
@@ -1,6 +1,12 @@
 #data
 <body><table><tr><td><svg><td><foreignObject><span></td>Foo
 #errors
+(1,6): expected-doctype-but-got-start-tag
+(1,56): unexpected-end-tag
+(1,60): foster-parenting-character
+(1,60): foster-parenting-character
+(1,60): foster-parenting-character
+(1,60): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
diff --git a/tree-construction/plain-text-unsafe.dat b/tree-construction/plain-text-unsafe.dat
index dfb5cb63..e904eff0 100644
Binary files a/tree-construction/plain-text-unsafe.dat and b/tree-construction/plain-text-unsafe.dat differ
diff --git a/tree-construction/quirks01.dat b/tree-construction/quirks01.dat
new file mode 100644
index 00000000..bc58de5c
--- /dev/null
+++ b/tree-construction/quirks01.dat
@@ -0,0 +1,53 @@
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"><p><table>
+#errors
+(2,54): unknown-doctype
+(2,64): eof-in-table
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <table>
+
+#data
+<!DOCTYPE html SYSTEM "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"><p><table>
+#errors
+(1,83): unknown-doctype
+(1,93): eof-in-table
+#document
+| <!DOCTYPE html "" "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <table>
+
+#data
+<!DOCTYPE html PUBLIC "html"><p><table>
+#errors
+(1,30): unknown-doctype
+(1,39): eof-in-table
+#document
+| <!DOCTYPE html "html" "">
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <table>
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"
+   "http://www.w3.org/TR/html4/strict.dtd"><p><table>
+#errors
+(2,43): unknown-doctype
+(2,53): eof-in-table
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 3.2//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <table>
diff --git a/tree-construction/ruby.dat b/tree-construction/ruby.dat
index 696782f0..f4e5e4e4 100644
--- a/tree-construction/ruby.dat
+++ b/tree-construction/ruby.dat
@@ -203,6 +203,7 @@
 <html><ruby>a<rtc>b<span></ruby></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
+(1,32): unexpected-end-tag
 #document
 | <html>
 |   <head>
diff --git a/tree-construction/scriptdata01.dat b/tree-construction/scriptdata01.dat
index e5708589..6abcb657 100644
--- a/tree-construction/scriptdata01.dat
+++ b/tree-construction/scriptdata01.dat
@@ -172,19 +172,6 @@ FOO<script>'<!-->'</script>BAR
 |       "'<!-->'"
 |     "BAR"
 
-#data
-FOO<script>'<!-->'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-->'"
-|     "BAR"
-
 #data
 FOO<script>'<!-- potato'</script>BAR
 #errors
diff --git a/tree-construction/search-element.dat b/tree-construction/search-element.dat
new file mode 100644
index 00000000..2866d7ec
--- /dev/null
+++ b/tree-construction/search-element.dat
@@ -0,0 +1,46 @@
+#data
+<!doctype html><p>foo<search>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <search>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><search><p>foo</search>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <search>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html>xxx<svg><x><g><a><search><b>
+#errors
+ * (1,44) unexpected HTML-like start tag token in foreign content
+ * (1,44) unexpected end of file
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "xxx"
+|     <svg svg>
+|       <svg x>
+|         <svg g>
+|           <svg a>
+|             <svg search>
+|     <b>
diff --git a/tree-construction/svg.dat b/tree-construction/svg.dat
new file mode 100644
index 00000000..a452e7af
--- /dev/null
+++ b/tree-construction/svg.dat
@@ -0,0 +1,104 @@
+#data
+<svg><tr><td><title><tr>
+#errors
+(1:21) Unexpected <tr> tag
+(1:25) Unexpected EOF
+#document-fragment
+td
+#document
+| <svg svg>
+|   <svg tr>
+|     <svg td>
+|       <svg title>
+
+#data
+<svg><tr><td><title><tr>
+#errors
+(1:1) Unexpected <svg> tag
+(1:21) Unexpected <tr> tag
+(1:25) Unexpected EOF
+#document-fragment
+tr
+#document
+| <svg svg>
+|   <svg tr>
+|     <svg td>
+|       <svg title>
+
+#data
+<svg><thead><title><tbody>
+#errors
+(1:1) Unexpected <svg> tag
+(1:20) Unexpected <tbody> tag
+(1:27) Unexpected EOF
+#document-fragment
+thead
+#document
+| <svg svg>
+|   <svg thead>
+|     <svg title>
+
+#data
+<svg><tfoot><title><tbody>
+#errors
+(1:1) Unexpected <svg> tag
+(1:20) Unexpected <tbody> tag
+(1:27) Unexpected EOF
+#document-fragment
+tfoot
+#document
+| <svg svg>
+|   <svg tfoot>
+|     <svg title>
+
+#data
+<svg><tbody><title><tfoot>
+#errors
+(1:1) Unexpected <svg> tag
+(1:20) Unexpected <tfoot> tag
+(1:27) Unexpected EOF
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg tbody>
+|     <svg title>
+
+#data
+<svg><tbody><title></table>
+#errors
+(1:1) Unexpected <svg> tag
+(1:20) Unexpected </table> tag
+(1:28) Unexpected EOF
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg tbody>
+|     <svg title>
+
+#data
+<svg><thead><title></table>
+#errors
+(1:1) Unexpected <svg> tag
+(1:20) Unexpected </table> tag
+(1:28) Unexpected EOF
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg thead>
+|     <svg title>
+
+#data
+<svg><tfoot><title></table>
+#errors
+(1:1) Unexpected <svg> tag
+(1:20) Unexpected </table> tag
+(1:28) Unexpected EOF
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg tfoot>
+|     <svg title>
diff --git a/tree-construction/tables01.dat b/tree-construction/tables01.dat
index f0caaa3c..aa7915eb 100644
--- a/tree-construction/tables01.dat
+++ b/tree-construction/tables01.dat
@@ -284,3 +284,39 @@
 |             <svg svg>
 |               <svg desc>
 |           <td>
+
+#data
+<div><table><svg><foreignObject><select><table><s>
+#errors
+1:1: Expected a doctype token
+1:13: 'svg' tag isn't allowed here. Currently open tags: html, body, div, table.
+1:33: 'select' tag isn't allowed here. Currently open tags: html, body, div, table, svg, foreignobject.
+1:41: 'table' tag isn't allowed here. Currently open tags: html, body, div, table, svg, foreignobject, select.
+1:41: 'table' tag isn't allowed here. Currently open tags: html, body, div, table, svg, foreignobject.
+1:48: 's' tag isn't allowed here. Currently open tags: html, body, div, table.
+1:51: Premature end of file. Currently open tags: html, body, div, table, s.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg foreignObject>
+|           <select>
+|       <table>
+|       <s>
+|       <table>
+
+#data
+<table>a<!doctype html>
+#errors
+(1,1): expected-doctype-but-got-start-tag
+(1,8): illegal-character-token
+(1,9): illegal-doctype
+(1,24): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     "a"
+|     <table>
diff --git a/tree-construction/template.dat b/tree-construction/template.dat
index b38d4f58..45fb507c 100644
--- a/tree-construction/template.dat
+++ b/tree-construction/template.dat
@@ -867,21 +867,6 @@ no doctype
 |         <link>
 |         <td>
 
-#data
-<body><template><template><tr></tr></template><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <tr>
-|         <td>
-
 #data
 <body><table><colgroup><template><col></col></template></colgroup></table></body>
 #errors
@@ -1089,7 +1074,11 @@ eof in template
 <body><template><col>Hello
 #errors
 no doctype
-unexpected text
+(1,27): foster-parenting-character
+(1,27): foster-parenting-character
+(1,27): foster-parenting-character
+(1,27): foster-parenting-character
+(1,27): foster-parenting-character
 eof in template
 #document
 | <html>
@@ -1103,7 +1092,7 @@ eof in template
 <body><template><i><menu>Foo</i>
 #errors
 no doctype
-mising /menu
+missing /menu
 eof in template
 #document
 | <html>
@@ -1568,6 +1557,19 @@ no doctype
 |         "Foo"
 |   <body>
 
+#data
+<html><head></head><template></template><head>
+#errors
+no doctype
+template-after-head
+head-after-head
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+
 #data
 <!DOCTYPE HTML><dummy><table><template><table><template><table><script>
 #errors
@@ -1593,6 +1595,11 @@ eof table
 #data
 <template><a><table><a>
 #errors
+(1,10): expected-doctype-but-got-start-tag
+(1,23): foster-parenting-start-tag
+(1,23): unexpected-start-tag
+(1,23): formatting-element-not-in-scope
+(1,24): eof-in-template
 #document
 | <html>
 |   <head>
@@ -1602,3 +1609,65 @@ eof table
 |           <a>
 |           <table>
 |   <body>
+
+#data
+<template><form><input name="q"></form><div>second</div></template>
+#errors
+#document-fragment
+template
+#document
+| <template>
+|   content
+|     <form>
+|       <input>
+|         name="q"
+|     <div>
+|       "second"
+
+#data
+<!DOCTYPE HTML><template><tr><td>cell</td></tr></template>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <tr>
+|           <td>
+|             "cell"
+|   <body>
+
+#data
+<!DOCTYPE HTML><template> <tr> <td>cell</td> </tr> </template>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <template>
+|       content
+|         " "
+|         <tr>
+|           " "
+|           <td>
+|             "cell"
+|           " "
+|         " "
+|   <body>
+
+#data
+<!DOCTYPE HTML><template><tr><td>cell</td></tr>a</template>
+#errors
+(1,59): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <tr>
+|           <td>
+|             "cell"
+|         "a"
+|   <body>
diff --git a/tree-construction/tests1.dat b/tree-construction/tests1.dat
index 1c36c1b8..e80e6401 100644
--- a/tree-construction/tests1.dat
+++ b/tree-construction/tests1.dat
@@ -425,7 +425,6 @@ Line1<br>Line2<br>Line3<br>Line4
 #data
 <!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
 #errors
-(1,7): unexpected-dash-after-double-dash-in-comment
 (1,14): expected-doctype-but-got-start-tag
 (1,41): unexpected-start-tag-implies-table-voodoo
 (1,48): foster-parenting-character-in-table
@@ -1434,24 +1433,6 @@ Line1<br>Line2<br>Line3<br>Line4
 |     <meta>
 |     <p>
 
-#data
-<b><table><td><i></table>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,25): unexpected-cell-end-tag
-(1,25): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <i>
-
 #data
 <b><table><td></b><i></table>
 #errors
@@ -1548,19 +1529,6 @@ Line1<br>Line2<br>Line3<br>Line4
 |     <p>
 |     <p>
 
-#data
-<p><hr></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <hr>
-|     <p>
-
 #data
 <select><b><option><select><option></b></select>
 #errors
diff --git a/tree-construction/tests16.dat b/tree-construction/tests16.dat
index cea7340a..05f34c13 100644
--- a/tree-construction/tests16.dat
+++ b/tree-construction/tests16.dat
@@ -221,7 +221,6 @@
 <!doctype html><script><!
 #errors
 (1,25): expected-script-data-but-got-eof
-(1,25): expected-named-closing-tag-but-got-eof
 #document
 | <!DOCTYPE html>
 | <html>
@@ -1525,7 +1524,6 @@
 #errors
 (1,8): expected-doctype-but-got-start-tag
 (1,10): expected-script-data-but-got-eof
-(1,10): expected-named-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
diff --git a/tree-construction/tests18.dat b/tree-construction/tests18.dat
index 05363b39..0b6d5dc4 100644
--- a/tree-construction/tests18.dat
+++ b/tree-construction/tests18.dat
@@ -3,7 +3,6 @@
 #errors
 11: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
 23: End of file seen and there were open elements.
-11: Unclosed element “plaintext”.
 #document
 | <html>
 |   <head>
@@ -27,7 +26,6 @@
 <!doctype html><html><plaintext></plaintext>
 #errors
 44: End of file seen and there were open elements.
-32: Unclosed element “plaintext”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -40,7 +38,6 @@
 <!doctype html><head><plaintext></plaintext>
 #errors
 44: End of file seen and there were open elements.
-32: Unclosed element “plaintext”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -54,7 +51,6 @@
 #errors
 42: Bad start tag in “plaintext” in “head”.
 54: End of file seen and there were open elements.
-42: Unclosed element “plaintext”.
 #script-off
 #document
 | <!DOCTYPE html>
@@ -69,7 +65,6 @@
 <!doctype html></head><plaintext></plaintext>
 #errors
 45: End of file seen and there were open elements.
-33: Unclosed element “plaintext”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -82,7 +77,6 @@
 <!doctype html><body><plaintext></plaintext>
 #errors
 44: End of file seen and there were open elements.
-32: Unclosed element “plaintext”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -95,8 +89,19 @@
 <!doctype html><table><plaintext></plaintext>
 #errors
 (1,33): foster-parenting-start-tag
-(1,45): foster-parenting-character
-(1,45): eof-in-table
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): foster-parenting-character
+(1,46): eof-in-table
 #document
 | <!DOCTYPE html>
 | <html>
@@ -110,8 +115,19 @@
 <!doctype html><table><tbody><plaintext></plaintext>
 #errors
 (1,40): foster-parenting-start-tag
-(1,41): foster-parenting-character
-(1,52): eof-in-table
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): eof-in-table
 #document
 | <!DOCTYPE html>
 | <html>
@@ -126,8 +142,19 @@
 <!doctype html><table><tbody><tr><plaintext></plaintext>
 #errors
 (1,44): foster-parenting-start-tag
-(1,56): foster-parenting-character
-(1,56): eof-in-table
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): foster-parenting-character
+(1,57): eof-in-table
 #document
 | <!DOCTYPE html>
 | <html>
@@ -173,11 +200,20 @@
 #data
 <!doctype html><table><colgroup><plaintext></plaintext>
 #errors
-43: Start tag “plaintext” seen in “table”.
-55: Misplaced non-space characters inside a table.
+(1,43): foster-parenting-start-tag
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
+(1,56): foster-parenting-character
 55: End of file seen and there were open elements.
-43: Unclosed element “plaintext”.
-22: Unclosed element “table”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -194,7 +230,6 @@
 34: Stray start tag “plaintext”.
 46: Stray end tag “plaintext”.
 47: End of file seen and there were open elements.
-23: Unclosed element “select”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -210,8 +245,6 @@
 41: Stray start tag “plaintext”.
 51: “caption” start tag with “select” open.
 52: End of file seen and there were open elements.
-51: Unclosed element “caption”.
-22: Unclosed element “table”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -227,8 +260,6 @@
 <!doctype html><template><plaintext>a</template>b
 #errors
 49: End of file seen and there were open elements.
-36: Unclosed element “plaintext”.
-25: Unclosed element “template”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -244,7 +275,6 @@
 #errors
 39: Stray start tag “plaintext”.
 51: End of file seen and there were open elements.
-39: Unclosed element “plaintext”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -259,7 +289,6 @@
 36: Stray start tag “plaintext”.
 48: Stray end tag “plaintext”.
 48: End of file seen and there were open elements.
-25: Unclosed element “frameset”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -282,7 +311,6 @@
 #errors
 46: Stray start tag “plaintext”.
 58: End of file seen and there were open elements.
-46: Unclosed element “plaintext”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -306,7 +334,6 @@
 <!doctype html><svg><plaintext>a</plaintext>b
 #errors
 45: End of file seen and there were open elements.
-20: Unclosed element “svg”.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -321,9 +348,6 @@
 <!doctype html><svg><title><plaintext>a</plaintext>b
 #errors
 52: End of file seen and there were open elements.
-38: Unclosed element “plaintext”.
-27: Unclosed element “title”.
-20: Unclosed element “svg”.
 #document
 | <!DOCTYPE html>
 | <html>
diff --git a/tree-construction/tests19.dat b/tree-construction/tests19.dat
index a1897774..20cdeabc 100644
--- a/tree-construction/tests19.dat
+++ b/tree-construction/tests19.dat
@@ -387,19 +387,6 @@
 |     <select>
 |       <option>
 
-#data
-<!doctype html><select><option></optgroup>
-#errors
-(1,42): unexpected-end-tag-in-select
-(1,42): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-
 #data
 <!doctype html><dd><optgroup><dd>
 #errors
@@ -1015,7 +1002,6 @@
 <!doctype html><p><math></p>a
 #errors
 (1,28): unexpected-end-tag
-(1,28): unexpected-end-tag
 #document
 | <!DOCTYPE html>
 | <html>
@@ -1236,48 +1222,6 @@
 |           "c"
 |     <table>
 
-#data
-<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-(1,25): foster-parenting-start-tag
-(1,26): foster-parenting-character
-(1,29): foster-parenting-start-tag
-(1,30): foster-parenting-character
-(1,35): foster-parenting-start-tag
-(1,36): foster-parenting-character
-(1,39): foster-parenting-start-tag
-(1,40): foster-parenting-character
-(1,44): foster-parenting-end-tag
-(1,44): adoption-agency-1.3
-(1,44): adoption-agency-1.3
-(1,45): foster-parenting-character
-(1,49): foster-parenting-end-tag
-(1,44): adoption-agency-1.3
-(1,44): adoption-agency-1.3
-(1,50): foster-parenting-character
-(1,50): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "a"
-|       <b>
-|         "b"
-|     <b>
-|     <div>
-|       <b>
-|         <i>
-|           "c"
-|           <a>
-|             "d"
-|         <a>
-|           "e"
-|       <a>
-|         "f"
-|     <table>
-
 #data
 <!doctype html><table><i>a<div>b<tr>c<b>d</i>e
 #errors
diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat
index b44fec4d..11ef9b16 100644
--- a/tree-construction/tests2.dat
+++ b/tree-construction/tests2.dat
@@ -584,6 +584,16 @@
 |   <head>
 |   <body>
 
+#data
+<!DOCTYPE html> <!DOCTYPE html>
+#errors
+Line: 1 Col: 31 Unexpected DOCTYPE. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
 #data
 test
 test
diff --git a/tree-construction/tests20.dat b/tree-construction/tests20.dat
index afdae743..80c57d1a 100644
--- a/tree-construction/tests20.dat
+++ b/tree-construction/tests20.dat
@@ -25,6 +25,32 @@
 |       <button>
 |         <address>
 
+#data
+<!doctype html><p><button><article>
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <article>
+
+#data
+<!doctype html><p><button><aside>
+#errors
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <aside>
+
 #data
 <!doctype html><p><button><blockquote>
 #errors
@@ -38,6 +64,175 @@
 |       <button>
 |         <blockquote>
 
+#data
+<!doctype html><p><button><center>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <center>
+
+#data
+<!doctype html><p><button><details>
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <details>
+
+#data
+<!doctype html><p><button><dialog>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dialog>
+
+#data
+<!doctype html><p><button><dir>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dir>
+
+#data
+<!doctype html><p><button><div>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <div>
+
+#data
+<!doctype html><p><button><dl>
+#errors
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dl>
+
+#data
+<!doctype html><p><button><fieldset>
+#errors
+(1,37): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <fieldset>
+
+#data
+<!doctype html><p><button><figcaption>
+#errors
+(1,39): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <figcaption>
+
+#data
+<!doctype html><p><button><figure>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <figure>
+
+#data
+<!doctype html><p><button><footer>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <footer>
+
+#data
+<!doctype html><p><button><header>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <header>
+
+#data
+<!doctype html><p><button><hgroup>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <hgroup>
+
+#data
+<!doctype html><p><button><main>
+#errors
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <main>
+
 #data
 <!doctype html><p><button><menu>
 #errors
@@ -51,6 +246,32 @@
 |       <button>
 |         <menu>
 
+#data
+<!doctype html><p><button><nav>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <nav>
+
+#data
+<!doctype html><p><button><ol>
+#errors
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <ol>
+
 #data
 <!doctype html><p><button><p>
 #errors
@@ -64,6 +285,45 @@
 |       <button>
 |         <p>
 
+#data
+<!doctype html><p><button><search>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <search>
+
+#data
+<!doctype html><p><button><section>
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <section>
+
+#data
+<!doctype html><p><button><summary>
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <summary>
+
 #data
 <!doctype html><p><button><ul>
 #errors
@@ -249,17 +509,16 @@
 |         <p>
 
 #data
-<!doctype html><address><button></address>a
+<!doctype html><button><p></button>x
 #errors
-(1,42): end-tag-too-early
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
-|     <address>
-|       <button>
-|     "a"
+|     <button>
+|       <p>
+|     "x"
 
 #data
 <!doctype html><address><button></address>a
@@ -557,6 +816,7 @@
 <math><annotation-xml></svg>x
 #errors
 (1,6): expected-doctype-but-got-start-tag
+(1,28): unexpected-end-tag-in-math
 (1,28): unexpected-end-tag
 (1,29): expected-closing-tag-but-got-eof
 #document
diff --git a/tree-construction/tests21.dat b/tree-construction/tests21.dat
index 1e2af7c1..a926b138 100644
--- a/tree-construction/tests21.dat
+++ b/tree-construction/tests21.dat
@@ -41,20 +41,7 @@
 <svg><![CDATA[foo
 #errors
 (1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#new-errors
 (1:18) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-#data
-<svg><![CDATA[foo
-#errors
-(1,5): expected-doctype-but-got-start-tag
 (1,17): expected-closing-tag-but-got-eof
 #new-errors
 (1:18) eof-in-cdata
@@ -69,6 +56,7 @@
 <svg><![CDATA[
 #errors
 (1,5): expected-doctype-but-got-start-tag
+(1:15) eof-in-cdata
 (1,14): expected-closing-tag-but-got-eof
 #new-errors
 (1:15) eof-in-cdata
@@ -101,22 +89,11 @@
 |     <svg svg>
 |       "]] >"
 
-#data
-<svg><![CDATA[]] >]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]] >"
-
 #data
 <svg><![CDATA[]]
 #errors
 (1,5): expected-doctype-but-got-start-tag
+(1:17) eof-in-cdata
 (1,16): expected-closing-tag-but-got-eof
 #new-errors
 (1:17) eof-in-cdata
@@ -131,6 +108,7 @@
 <svg><![CDATA[]
 #errors
 (1,5): expected-doctype-but-got-start-tag
+(1:16) eof-in-cdata
 (1,15): expected-closing-tag-but-got-eof
 #new-errors
 (1:16) eof-in-cdata
@@ -145,6 +123,7 @@
 <svg><![CDATA[]>a
 #errors
 (1,5): expected-doctype-but-got-start-tag
+(1:16) eof-in-cdata
 (1,17): expected-closing-tag-but-got-eof
 #new-errors
 (1:18) eof-in-cdata
@@ -236,6 +215,7 @@
 <svg><![CDATA[<svg>a
 #errors
 (1,5): expected-doctype-but-got-start-tag
+(1:21) eof-in-cdata
 (1,20): expected-closing-tag-but-got-eof
 #new-errors
 (1:21) eof-in-cdata
@@ -250,6 +230,7 @@
 <svg><![CDATA[</svg>a
 #errors
 (1,5): expected-doctype-but-got-start-tag
+(1:22) eof-in-cdata
 (1,21): expected-closing-tag-but-got-eof
 #new-errors
 (1:22) eof-in-cdata
diff --git a/tree-construction/tests26.dat b/tree-construction/tests26.dat
index de453b9c..1ba2be2d 100644
--- a/tree-construction/tests26.dat
+++ b/tree-construction/tests26.dat
@@ -391,3 +391,63 @@ Line 1 Col 19 Expected closing tag. Unexpected end of file.
 |     <button>
 |       <p>
 |     <button>
+
+#data
+<svg></p><foo>
+#errors
+(1:1) Missing doctype
+9: HTML end tag “p” in a foreign namespace context.
+(1:6) Unexpected </p> from in body insertion mode
+(1:16) Unexpected EOF
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <p>
+|     <foo>
+
+#data
+<svg></br><foo>
+#errors
+(1:1) Missing doctype
+10: HTML end tag “br” in a foreign namespace context.
+(1:6) Unexpected </br> from in body insertion mode
+(1:16) Unexpected EOF
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <br>
+|     <foo>
+
+#data
+<math></p><foo>
+#errors
+(1:1) Missing doctype
+10: HTML end tag “p” in a foreign namespace context.
+(1:7) Unexpected </p> from in body insertion mode
+(1:16) Unexpected EOF
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <p>
+|     <foo>
+
+#data
+<math></br><foo>
+#errors
+(1:1) Missing doctype
+11: HTML end tag “br” in a foreign namespace context.
+(1:7) Unexpected </br> from in body insertion mode
+(1:17) Unexpected EOF
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <br>
+|     <foo>
diff --git a/tree-construction/tests4.dat b/tree-construction/tests4.dat
index 0a6174c3..4f0cf70e 100644
--- a/tree-construction/tests4.dat
+++ b/tree-construction/tests4.dat
@@ -56,3 +56,19 @@ head
 #document
 | <title>
 |   "setting head's innerHTML"
+
+#data
+direct <title> content
+#errors
+#document-fragment
+title
+#document
+| "direct <title> content"
+
+#data
+<!-- inside </script> -->
+#errors
+#document-fragment
+script
+#document
+| "<!-- inside </script> -->"
diff --git a/tree-construction/tests6.dat b/tree-construction/tests6.dat
index f3991232..8c36dd3d 100644
--- a/tree-construction/tests6.dat
+++ b/tree-construction/tests6.dat
@@ -48,7 +48,6 @@
 #data
 <!doctype>
 #errors
-(1,9): need-space-after-doctype
 (1,10): expected-doctype-name-but-got-right-bracket
 (1,10): unknown-doctype
 #new-errors
@@ -604,6 +603,7 @@ html
 #data
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
 #errors
+(1,50): doctype-has-public-identifier
 #document
 | <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
 | <html>
diff --git a/tree-construction/tests7.dat b/tree-construction/tests7.dat
index 395dc72b..b2db4de1 100644
--- a/tree-construction/tests7.dat
+++ b/tree-construction/tests7.dat
@@ -46,6 +46,42 @@
 |       "X"
 |   <body>
 
+#data
+<!doctype html></head><base>X
+#errors
+(1,28): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <base>
+|   <body>
+|     "X"
+
+#data
+<!doctype html></head><basefont>X
+#errors
+(1,32): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <basefont>
+|   <body>
+|     "X"
+
+#data
+<!doctype html></head><bgsound>X
+#errors
+(1,31): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <bgsound>
+|   <body>
+|     "X"
+
 #data
 <!doctype html><table><meta></table>
 #errors
@@ -391,7 +427,6 @@ A<table><tr> B</tr> </em>C</table>
 (1,1): expected-doctype-but-got-chars
 (1,13): foster-parenting-character
 (1,14): foster-parenting-character
-(1,20): foster-parenting-character
 (1,25): unexpected-end-tag
 (1,25): unexpected-end-tag-in-special-element
 (1,26): foster-parenting-character
diff --git a/tree-construction/tests8.dat b/tree-construction/tests8.dat
index ba2e63dd..d532801e 100644
--- a/tree-construction/tests8.dat
+++ b/tree-construction/tests8.dat
@@ -90,6 +90,9 @@ x"
 #data
 <table><li><li></table>
 #errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): foster-parenting-start-tag
+(1,15): foster-parenting-start-tag
 #document
 | <html>
 |   <head>
diff --git a/tree-construction/tests_innerHTML_1.dat b/tree-construction/tests_innerHTML_1.dat
index 54f43684..1a37ee52 100644
--- a/tree-construction/tests_innerHTML_1.dat
+++ b/tree-construction/tests_innerHTML_1.dat
@@ -110,16 +110,6 @@ table
 #document
 | <a>
 
-#data
-<a>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,3): eof-in-table
-#document-fragment
-table
-#document
-| <a>
-
 #data
 <a><caption>a
 #errors
@@ -502,30 +492,6 @@ tbody
 | <tr>
 |   <td>
 
-#data
-<a><td>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-|   <td>
-
-#data
-<a><td>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-|   <td>
-
 #data
 <td><table><tbody><a><tr>
 #errors
@@ -648,16 +614,6 @@ tr
 |   <table>
 | <td>
 
-#data
-<td><table></table><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-|   <table>
-| <td>
-
 #data
 <caption><a>
 #errors
diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat
index b5fafdc7..d30e12e5 100644
--- a/tree-construction/webkit01.dat
+++ b/tree-construction/webkit01.dat
@@ -307,6 +307,20 @@ console.log("FOO<span>BAR</span>BAZ");
 |   <body>
 | <!--  Hi there  -->
 
+#data
+<html><body></body></html><!-- Comment A --><!-- Comment B --><!-- Comment C --><!-- Comment D --><!-- Comment E -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Comment A  -->
+| <!--  Comment B  -->
+| <!--  Comment C  -->
+| <!--  Comment D  -->
+| <!--  Comment E  -->
+
 #data
 <html><body></body></html>x<!-- Hi there -->
 #errors
@@ -345,6 +359,32 @@ console.log("FOO<span>BAR</span>BAZ");
 |     <!--  Hi there  -->
 | <!--  Again  -->
 
+#data
+<html><body></body>
+   <!-- Hi there --></html>
+#errors
+no-doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+   "
+|   <!--  Hi there  -->
+
+#data
+<html><body></body></html>
+   <!-- Hi there -->
+#errors
+no-doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+   "
+| <!--  Hi there  -->
+
 #data
 <html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
 #errors
@@ -673,6 +713,10 @@ console.log("FOO<span>BAR</span>BAZ");
 #data
 <table><tr><td><svg><desc><td></desc><circle>
 #errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-start-tag
+(1,37): unexpected-end-tag
+(1,22): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
diff --git a/tree-construction/webkit02.dat b/tree-construction/webkit02.dat
index 791991d2..7d817ec6 100644
--- a/tree-construction/webkit02.dat
+++ b/tree-construction/webkit02.dat
@@ -138,6 +138,7 @@
 #data
 <legend>test</legend>
 #errors
+(1,7): expected-doctype-but-got-start-tag
 #document
 | <html>
 |   <head>
@@ -148,6 +149,9 @@
 #data
 <table><input>
 #errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): foster-parenting-start-tag
+(1,15): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
@@ -155,9 +159,36 @@
 |     <input>
 |     <table>
 
+#data
+<b><em><dcell><postfield><postfield><postfield><postfield><missing_glyph><missing_glyph><missing_glyph><missing_glyph><hkern><aside></b></em>
+#errors
+unexpected-b-end-tag
+unexpected-em-end-tag
+eof-in-aside
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <dcell>
+|       <postfield>
+|         <postfield>
+|           <postfield>
+|             <postfield>
+|               <missing_glyph>
+|                 <missing_glyph>
+|                   <missing_glyph>
+|                     <missing_glyph>
+|                       <hkern>
+| <aside>
+|   <b>
+
 #data
 <b><em><foo><foo><aside></b>
 #errors
+(1,3): expected-doctype-but-got-start-tag
+(1,28): adoption-agency-9
+(1,29): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
@@ -173,6 +204,10 @@
 #data
 <b><em><foo><foo><aside></b></em>
 #errors
+(1,3): expected-doctype-but-got-start-tag
+(1,28): adoption-agency-9
+(1,33): adoption-agency-9
+(1,34): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
@@ -189,6 +224,9 @@
 #data
 <b><em><foo><foo><foo><aside></b>
 #errors
+(1,3): expected-doctype-but-got-start-tag
+(1,33): adoption-agency-9
+(1,34): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
@@ -204,6 +242,10 @@
 #data
 <b><em><foo><foo><foo><aside></b></em>
 #errors
+(1,3): expected-doctype-but-got-start-tag
+(1,33): adoption-agency-9
+(1,38): adoption-agency-9
+(1,39): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
@@ -219,6 +261,9 @@
 #data
 <b><em><foo><foo><foo><foo><foo><foo><foo><foo><foo><foo><aside></b></em>
 #errors
+(1,68): adoption-agency-9
+(1,73): adoption-agency-9
+(1,74): expected-closing-tag-but-got-eof
 #document-fragment
 div
 #document
@@ -240,6 +285,9 @@ div
 #data
 <b><em><foo><foob><foob><foob><foob><fooc><fooc><fooc><fooc><food><aside></b></em>
 #errors
+(1,77): adoption-agency-9
+(1,82): adoption-agency-9
+(1,83): expected-closing-tag-but-got-eof
 #document-fragment
 div
 #document
@@ -261,6 +309,8 @@ div
 #data
 <option><XH<optgroup></optgroup>
 #errors
+(1,21): unexpected-start-tag-in-select
+(1,32): unexpected-end-tag-in-select
 #document-fragment
 select
 #document
@@ -269,6 +319,8 @@ select
 #data
 <svg><foreignObject><div>foo</div><plaintext></foreignObject></svg><div>bar</div>
 #errors
+(1,5): expected-doctype-but-got-start-tag
+(1,82): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
@@ -283,6 +335,8 @@ select
 #data
 <svg><foreignObject></foreignObject><title></svg>foo
 #errors
+(1,5): expected-doctype-but-got-start-tag
+(1,49): expected-one-end-tag-but-got-another
 #document
 | <html>
 |   <head>
@@ -295,9 +349,206 @@ select
 #data
 </foreignObject><plaintext><div>foo</div>
 #errors
+(1,16): expected-doctype-but-got-end-tag
+(1,16): unexpected-end-tag-before-html
+(1,42): expected-closing-tag-but-got-eof
 #document
 | <html>
 |   <head>
 |   <body>
 |     <plaintext>
 |       "<div>foo</div>"
+
+#data
+<svg xml:base xml:lang xml:space xml:baaah definitionurl>
+#errors
+no-doctype
+eof-in-svg
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       definitionurl=""
+|       xml lang=""
+|       xml space=""
+|       xml:baaah=""
+|       xml:base=""
+
+#data
+<math definitionurl xlink:title xlink:show>
+#errors
+no-doctype
+eof-in-math
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       definitionURL=""
+|       xlink show=""
+|       xlink title=""
+
+#data
+<math DEFINITIONURL>
+#errors
+no-doctype
+eof-in-math
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       definitionURL=""
+
+#data
+<select><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:13: ERROR: Premature end of file. Currently open tags: html, body, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <hr>
+
+#data
+<select><option><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:21: ERROR: Premature end of file. Currently open tags: html, body, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <hr>
+
+#data
+<select><optgroup><option><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:31: ERROR: Premature end of file. Currently open tags: html, body, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <hr>
+
+#data
+<select><optgroup><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:23: ERROR: Premature end of file. Currently open tags: html, body, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|       <hr>
+
+#data
+<select><option><optgroup><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:31: ERROR: Premature end of file. Currently open tags: html, body, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <optgroup>
+|       <hr>
+
+#data
+<table><tr><td><select><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:28: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <hr>
+
+#data
+<table><tr><td><select><option><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:36: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <option>
+|               <hr>
+
+#data
+<table><tr><td><select><optgroup><option><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:46: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <optgroup>
+|                 <option>
+|               <hr>
+
+#data
+<table><tr><td><select><optgroup><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:38: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <optgroup>
+|               <hr>
+
+#data
+<table><tr><td><select><option><optgroup><hr>
+#errors
+1:1: ERROR: Expected a doctype token
+1:46: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <option>
+|               <optgroup>
+|               <hr>