From a439a5b65e8213154e71644017122f435d815cce Mon Sep 17 00:00:00 2001 From: Samuel May Date: Thu, 16 May 2019 01:13:46 -0700 Subject: [PATCH 01/59] Name unnamed tests for ease of discovery in test harnesses --- tokenizer/test3.test | 2 +- tokenizer/test4.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tokenizer/test3.test b/tokenizer/test3.test index 721f21de..d1b323a5 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -1,6 +1,6 @@ {"tests": [ -{"description":"", +{"description":"[empty]", "input":"", "output":[]}, diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 77706b72..8963c747 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -8,7 +8,7 @@ { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 } ]}, -{"description":"", +{"description":"< in unquoted attribute value", "input":"", "output":[["StartTag", "z", {"x": "<"}]], "errors":[ From 1ddd636281ec97705faf74a021071f483ce3f941 Mon Sep 17 00:00:00 2001 From: Samuel May Date: Thu, 16 May 2019 01:15:31 -0700 Subject: [PATCH 02/59] Improve coverage of the Tokenizer tests Slight overkill in places, but I figured it's better to err on the side of too many tests than too little. --- tokenizer/contentModelFlags.test | 6 + tokenizer/domjs.test | 150 ++++++++- tokenizer/entities.test | 38 ++- tokenizer/test1.test | 96 +++++- tokenizer/test2.test | 4 + tokenizer/test3.test | 504 ++++++++++++++++++++++++++++++- 6 files changed, 778 insertions(+), 20 deletions(-) diff --git a/tokenizer/contentModelFlags.test b/tokenizer/contentModelFlags.test index 5197b68e..9cf7c8bd 100644 --- a/tokenizer/contentModelFlags.test +++ b/tokenizer/contentModelFlags.test @@ -6,6 +6,12 @@ "input":"&body;", "output":[["Character", "&body;"]]}, +{"description":"PLAINTEXT with seeming close tag", +"initialStates":["PLAINTEXT state"], +"lastStartTag":"plaintext", +"input":"&body;", +"output":[["Character", "&body;"]]}, + {"description":"End tag closing RCDATA or RAWTEXT", "initialStates":["RCDATA state", "RAWTEXT state"], "lastStartTag":"xmp", diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test index b17a5df5..1373b27f 100644 --- a/tokenizer/domjs.test +++ b/tokenizer/domjs.test @@ -25,7 +25,7 @@ ] }, { - "description":"NUL in RCDATA, RAWTEXT, PLAINTEXT and Script data", + "description":"Raw NUL replacement", "doubleEscaped":true, "initialStates":["RCDATA state", "RAWTEXT state", "PLAINTEXT state", "Script data state"], "input":"\\u0000", @@ -34,6 +34,13 @@ { "code": "unexpected-null-character", "line": 1, "col": 1 } ] }, + { + "description":"NUL in CDATA section", + "doubleEscaped":true, + "initialStates":["CDATA section state"], + "input":"\\u0000]]>", + "output":[["Character", "\\u0000"]] + }, { "description":"NUL in script HTML comment", "doubleEscaped":true, @@ -112,20 +119,95 @@ { "code": "eof-in-script-html-comment-like-text", "line": 1, "col": 13 } ] }, + { + "description":"Dash in script HTML comment", + "initialStates":["Script data state"], + "input":"", + "output":[["Character", ""]] + }, + { + "description":"Dash less-than in script HTML comment", + "initialStates":["Script data state"], + "input":"", + "output":[["Character", ""]] + }, + { + "description":"Dash at end of script HTML comment", + "initialStates":["Script data state"], + "input":"", + "output":[["Character", ""]] + }, + { + "description":" in script HTML comment", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":"", + "output":[["Character", ""], ["EndTag", "script"]] + }, + { + "description":" in script HTML comment - double escaped", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":"", + "output":[["Character", ""], ["EndTag", "script"]] + }, + { + "description":" in script HTML comment - double escaped with nested -->", + "output":[["Character", ""], ["EndTag", "script"]] + }, + { + "description":" in script HTML comment - double escaped with abrupt end", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":" -->", + "output":[["Character", ""], ["EndTag", "script"], ["Character", " -->"], ["EndTag", "script"]] + }, + { + "description":"Incomplete start tag in script HTML comment double escaped", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":"", + "output":[["Character", ""]] + }, + { + "description":"Unclosed start tag in script HTML comment double escaped", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":"", + "output":[["Character", ""]] + }, + { + "description":"Incomplete end tag in script HTML comment double escaped", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":"", + "output":[["Character", ""]] + }, + { + "description":"Unclosed end tag in script HTML comment double escaped", + "initialStates":["Script data state"], + "lastStartTag":"script", + "input":"", + "output":[["Character", ""]] + }, { "description":"leading U+FEFF must pass through", + "initialStates":["Data state", "RCDATA state", "RAWTEXT state", "Script data state"], "doubleEscaped":true, "input":"\\uFEFFfoo\\uFEFFbar", "output":[["Character", "\\uFEFFfoo\\uFEFFbar"]] }, { - "description":"Non BMP-charref in in RCDATA", + "description":"Non BMP-charref in RCDATA", "initialStates":["RCDATA state"], "input":"≂̸", "output":[["Character", "\u2242\u0338"]] }, { - "description":"Bad charref in in RCDATA", + "description":"Bad charref in RCDATA", "initialStates":["RCDATA state"], "input":"&NotEqualTild;", "output":[["Character", "&NotEqualTild;"]], @@ -134,36 +216,36 @@ ] }, { - "description":"lowercase endtags in RCDATA and RAWTEXT", - "initialStates":["RCDATA state", "RAWTEXT state"], + "description":"lowercase endtags", + "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"], "lastStartTag":"xmp", "input":"", "output":[["EndTag","xmp"]] }, { - "description":"bad endtag in RCDATA and RAWTEXT", - "initialStates":["RCDATA state", "RAWTEXT state"], + "description":"bad endtag (space before name)", + "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"], "lastStartTag":"xmp", "input":"", "output":[["Character",""]] }, { - "description":"bad endtag in RCDATA and RAWTEXT", - "initialStates":["RCDATA state", "RAWTEXT state"], + "description":"bad endtag (not matching last start tag)", + "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"], "lastStartTag":"xmp", "input":"", "output":[["Character",""]] }, { - "description":"bad endtag in RCDATA and RAWTEXT", - "initialStates":["RCDATA state", "RAWTEXT state"], + "description":"bad endtag (without close bracket)", + "initialStates":["RCDATA state", "RAWTEXT state", "Script data state"], "lastStartTag":"xmp", "input":"", + "initialStates":["CDATA section state"], + "output":[["Character", "foo "]] + }, + { + "description":"CDATA followed by HTML content", + "input":"foo ]]> ", + "initialStates":["CDATA section state"], + "output":[["Character", "foo "]] + }, + { + "description":"CDATA with extra bracket", + "input":"foo]]]>", + "initialStates":["CDATA section state"], + "output":[["Character", "foo]"]] + }, + { + "description":"CDATA without end marker", + "input":"foo", + "initialStates":["CDATA section state"], + "output":[["Character", "foo"]], + "errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 4 } + ] + }, + { + "description":"CDATA with single bracket ending", + "input":"foo]", + "initialStates":["CDATA section state"], + "output":[["Character", "foo]"]], + "errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 5 } + ] + }, + { + "description":"CDATA with two brackets ending", + "input":"foo]]", "initialStates":["CDATA section state"], - "output":[["Character", "foo&bar"]], + "output":[["Character", "foo]]"]], "errors":[ - { "code": "eof-in-cdata", "line": 1, "col": 8 } + { "code": "eof-in-cdata", "line": 1, "col": 6 } ] } diff --git a/tokenizer/entities.test b/tokenizer/entities.test index 7c514563..a6469cd0 100644 --- a/tokenizer/entities.test +++ b/tokenizer/entities.test @@ -1,13 +1,47 @@ {"tests": [ -{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.", +{"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.", +"input":"", +"output": [["StartTag", "h", {"a": "¬i;"}]]}, + +{"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.", +"input":"", +"output": [["StartTag", "h", {"a": "&lang="}]]}, + +{"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.", +"input":"", +"output": [["StartTag", "h", {"a": "¬="}]]}, + +{"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.", "input":"", "output": [["StartTag", "h", {"a": "¬i;"}]]}, -{"description": "Entity name followed by the equals sign in an attribute value.", +{"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "&lang="}]]}, +{"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.", +"input":"", +"output": [["StartTag", "h", {"a": "¬="}]]}, + +{"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.", +"input":"", +"output": [["StartTag", "h", {"a": "¬i;"}]]}, + +{"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.", +"input":"", +"output": [["StartTag", "h", {"a": "&lang="}]], +"errors":[ + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 } +]}, + +{"description": "Valid entity name followed by the equals sign in an unquoted attribute value.", +"input":"", +"output": [["StartTag", "h", {"a": "¬="}]], +"errors":[ + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 } +]}, + {"description": "Ambiguous ampersand.", "input":"&rrrraannddom;", "output": [["Character", "&rrrraannddom;"]], diff --git a/tokenizer/test1.test b/tokenizer/test1.test index 8b85050f..cb0eb48a 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -102,6 +102,10 @@ "input":"", "output":[["Comment", " --comment "]]}, +{"description":"Comment, central less-than bang", +"input":"", +"output":[["Comment", "", "output":[["Comment", ""]], @@ -135,6 +145,18 @@ "input":"", "output":[["Comment", ""]]}, +{"description":"< in comment", +"input":"", +"output":[["Comment", " ", +"output":[["Comment", " ", +"output":[["Comment", " ", "output":[["Comment", " ", +"output":[["Comment", " <", +"output":[["Character", ""]]}, + +{"description":"", +"output":[["Character", ""]]}, + +{"description":"", +"output":[["Character", ""]]}, + +{"description":"Escaped script data", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"< in script HTML comment", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"", +"output":[["Character", ""]]}, + +{"description":"Start tag in script HTML comment", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"End tag in script HTML comment", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"- in script HTML comment double escaped", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"-- in script HTML comment double escaped", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"--- in script HTML comment double escaped", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"- spaced in script HTML comment double escaped", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + +{"description":"-- spaced in script HTML comment double escaped", +"initialStates":["Script data state"], +"input":"", +"output":[["Character", ""]]}, + {"description":"Ampersand EOF", "input":"&", "output":[["Character", "&"]]}, diff --git a/tokenizer/test2.test b/tokenizer/test2.test index 521694ca..f80f27d1 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -50,6 +50,10 @@ "input":"", "output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, +{"description":"DOCTYPE with single-quoted systemId", +"input":"", +"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, + {"description":"DOCTYPE with publicId and systemId", "input":"", "output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]}, diff --git a/tokenizer/test3.test b/tokenizer/test3.test index d1b323a5..814482c4 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -1,84 +1,451 @@ {"tests": [ {"description":"[empty]", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"", "output":[]}, +{"description":"[empty]", +"initialStates":["CDATA section state"], +"input":"", +"output":[], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"\\u0009", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"\u0009", "output":[["Character", "\u0009"]]}, +{"description":"\\u0009", +"initialStates":["CDATA section state"], +"input":"\u0009", +"output":[["Character", "\u0009"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"\\u000A", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"\u000A", "output":[["Character", "\u000A"]]}, +{"description":"\\u000A", +"initialStates":["CDATA section state"], +"input":"\u000A", +"output":[["Character", "\u000A"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"\\u000B", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"\u000B", "output":[["Character", "\u000B"]], "errors":[ { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, +{"description":"\\u000B", +"initialStates":["CDATA section state"], +"input":"\u000B", +"output":[["Character", "\u000B"]], +"errors":[ + { "code": "control-character-in-input-stream", "line": 1, "col": 1 }, + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"\\u000C", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"\u000C", "output":[["Character", "\u000C"]]}, +{"description":"\\u000C", +"initialStates":["CDATA section state"], +"input":"\u000C", +"output":[["Character", "\u000C"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":" ", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":" ", "output":[["Character", " "]]}, +{"description":" ", +"initialStates":["CDATA section state"], +"input":" ", +"output":[["Character", " "]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"!", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"!", "output":[["Character", "!"]]}, +{"description":"!", +"initialStates":["CDATA section state"], +"input":"!", +"output":[["Character", "!"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"\"", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"\"", "output":[["Character", "\""]]}, +{"description":"\"", +"initialStates":["CDATA section state"], +"input":"\"", +"output":[["Character", "\""]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"%", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"%", "output":[["Character", "%"]]}, +{"description":"%", +"initialStates":["CDATA section state"], +"input":"%", +"output":[["Character", "%"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"&", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"&", "output":[["Character", "&"]]}, +{"description":"&", +"initialStates":["CDATA section state"], +"input":"&", +"output":[["Character", "&"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"'", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"'", "output":[["Character", "'"]]}, +{"description":"'", +"initialStates":["CDATA section state"], +"input":"'", +"output":[["Character", "'"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":",", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":",", "output":[["Character", ","]]}, +{"description":",", +"initialStates":["CDATA section state"], +"input":",", +"output":[["Character", ","]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"-", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"-", "output":[["Character", "-"]]}, +{"description":"-", +"initialStates":["CDATA section state"], +"input":"-", +"output":[["Character", "-"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":".", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":".", "output":[["Character", "."]]}, +{"description":".", +"initialStates":["CDATA section state"], +"input":".", +"output":[["Character", "."]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"/", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"/", "output":[["Character", "/"]]}, +{"description":"/", +"initialStates":["CDATA section state"], +"input":"/", +"output":[["Character", "/"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"0", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"0", "output":[["Character", "0"]]}, +{"description":"0", +"initialStates":["CDATA section state"], +"input":"0", +"output":[["Character", "0"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"1", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"1", "output":[["Character", "1"]]}, +{"description":"1", +"initialStates":["CDATA section state"], +"input":"1", +"output":[["Character", "1"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"9", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"9", "output":[["Character", "9"]]}, +{"description":"9", +"initialStates":["CDATA section state"], +"input":"9", +"output":[["Character", "9"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":";", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":";", "output":[["Character", ";"]]}, +{"description":";", +"initialStates":["CDATA section state"], +"input":";", +"output":[["Character", ";"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + +{"description":";=", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";=", +"output":[["Character", ";="]]}, + +{"description":";=", +"initialStates":["CDATA section state"], +"input":";=", +"output":[["Character", ";="]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";>", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";>", +"output":[["Character", ";>"]]}, + +{"description":";>", +"initialStates":["CDATA section state"], +"input":";>", +"output":[["Character", ";>"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";?", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";?", +"output":[["Character", ";?"]]}, + +{"description":";?", +"initialStates":["CDATA section state"], +"input":";?", +"output":[["Character", ";?"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";@", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";@", +"output":[["Character", ";@"]]}, + +{"description":";@", +"initialStates":["CDATA section state"], +"input":";@", +"output":[["Character", ";@"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";A", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";A", +"output":[["Character", ";A"]]}, + +{"description":";A", +"initialStates":["CDATA section state"], +"input":";A", +"output":[["Character", ";A"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";B", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";B", +"output":[["Character", ";B"]]}, + +{"description":";B", +"initialStates":["CDATA section state"], +"input":";B", +"output":[["Character", ";B"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";Y", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";Y", +"output":[["Character", ";Y"]]}, + +{"description":";Y", +"initialStates":["CDATA section state"], +"input":";Y", +"output":[["Character", ";Y"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";Z", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";Z", +"output":[["Character", ";Z"]]}, + +{"description":";Z", +"initialStates":["CDATA section state"], +"input":";Z", +"output":[["Character", ";Z"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";`", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";`", +"output":[["Character", ";`"]]}, + +{"description":";`", +"initialStates":["CDATA section state"], +"input":";`", +"output":[["Character", ";`"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";a", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";a", +"output":[["Character", ";a"]]}, + +{"description":";a", +"initialStates":["CDATA section state"], +"input":";a", +"output":[["Character", ";a"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";b", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";b", +"output":[["Character", ";b"]]}, + +{"description":";b", +"initialStates":["CDATA section state"], +"input":";b", +"output":[["Character", ";b"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";y", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";y", +"output":[["Character", ";y"]]}, + +{"description":";y", +"initialStates":["CDATA section state"], +"input":";y", +"output":[["Character", ";y"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";z", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";z", +"output":[["Character", ";z"]]}, + +{"description":";z", +"initialStates":["CDATA section state"], +"input":";z", +"output":[["Character", ";z"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";{", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";{", +"output":[["Character", ";{"]]}, + +{"description":";{", +"initialStates":["CDATA section state"], +"input":";{", +"output":[["Character", ";{"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + +{"description":";\\uDBC0\\uDC00", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], +"input":";\uDBC0\uDC00", +"output":[["Character", ";\uDBC0\uDC00"]]}, + +{"description":";\\uDBC0\\uDC00", +"initialStates":["CDATA section state"], +"input":";\uDBC0\uDC00", +"output":[["Character", ";\uDBC0\uDC00"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 3 } +]}, + {"description":"<", "input":"<", "output":[["Character", "<"]], @@ -10669,63 +11036,198 @@ ]}, {"description":"=", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"=", "output":[["Character", "="]]}, +{"description":"=", +"initialStates":["CDATA section state"], +"input":"=", +"output":[["Character", "="]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":">", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":">", "output":[["Character", ">"]]}, +{"description":">", +"initialStates":["CDATA section state"], +"input":">", +"output":[["Character", ">"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"?", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"?", "output":[["Character", "?"]]}, +{"description":"?", +"initialStates":["CDATA section state"], +"input":"?", +"output":[["Character", "?"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"@", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"@", "output":[["Character", "@"]]}, +{"description":"@", +"initialStates":["CDATA section state"], +"input":"@", +"output":[["Character", "@"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"A", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"A", "output":[["Character", "A"]]}, +{"description":"A", +"initialStates":["CDATA section state"], +"input":"A", +"output":[["Character", "A"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"B", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"B", "output":[["Character", "B"]]}, +{"description":"B", +"initialStates":["CDATA section state"], +"input":"B", +"output":[["Character", "B"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"Y", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"Y", "output":[["Character", "Y"]]}, +{"description":"Y", +"initialStates":["CDATA section state"], +"input":"Y", +"output":[["Character", "Y"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"Z", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"Z", "output":[["Character", "Z"]]}, +{"description":"Z", +"initialStates":["CDATA section state"], +"input":"Z", +"output":[["Character", "Z"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"`", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"`", "output":[["Character", "`"]]}, +{"description":"`", +"initialStates":["CDATA section state"], +"input":"`", +"output":[["Character", "`"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"a", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"a", "output":[["Character", "a"]]}, +{"description":"a", +"initialStates":["CDATA section state"], +"input":"a", +"output":[["Character", "a"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"b", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"b", "output":[["Character", "b"]]}, +{"description":"b", +"initialStates":["CDATA section state"], +"input":"b", +"output":[["Character", "b"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"y", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"y", "output":[["Character", "y"]]}, +{"description":"y", +"initialStates":["CDATA section state"], +"input":"y", +"output":[["Character", "y"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"z", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"z", "output":[["Character", "z"]]}, +{"description":"z", +"initialStates":["CDATA section state"], +"input":"z", +"output":[["Character", "z"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"{", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"{", "output":[["Character", "{"]]}, +{"description":"{", +"initialStates":["CDATA section state"], +"input":"{", +"output":[["Character", "{"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]}, + {"description":"\\uDBC0\\uDC00", +"initialStates":["Data state", "PLAINTEXT state", "RCDATA state", "RAWTEXT state", "Script data state"], "input":"\uDBC0\uDC00", -"output":[["Character", "\uDBC0\uDC00"]]} +"output":[["Character", "\uDBC0\uDC00"]]}, + +{"description":"\\uDBC0\\uDC00", +"initialStates":["CDATA section state"], +"input":"\uDBC0\uDC00", +"output":[["Character", "\uDBC0\uDC00"]], +"errors":[ + { "code": "eof-in-cdata", "line": 1, "col": 2 } +]} ]} From 71eebd59772d1d39aced0c0582ae9c09acf3ce6e Mon Sep 17 00:00:00 2001 From: Sam Sneddon Date: Tue, 26 May 2020 23:28:15 +0100 Subject: [PATCH 03/59] Add a test for order of comments after Notably, html5lib-python's lxml treebuilder gets this wrong: https://github.com/html5lib/html5lib-python/issues/488 --- tree-construction/webkit01.dat | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index b5fafdc7..2127cfe1 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -307,6 +307,20 @@ console.log("FOOBARBAZ"); | | +#data + +#errors +(1,6): expected-doctype-but-got-start-tag +#document +| +| +| +| +| +| +| +| + #data x #errors From bef9ad1e6ffe8ed6a084be7fbc3ba521eab70844 Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Thu, 13 Aug 2020 11:23:40 +0900 Subject: [PATCH 04/59] Test SVG fragment parsing w/ td/tr/tbody context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a new tree-construction/svg.dat file that’s essentially an analogue of the existing tree-construction/math.dat file. It contains tests for fragment parsing of SVG content with td, tr, and tbody/thead/tfoot context elements. --- tree-construction/svg.dat | 81 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tree-construction/svg.dat diff --git a/tree-construction/svg.dat b/tree-construction/svg.dat new file mode 100644 index 00000000..8e9a2bbb --- /dev/null +++ b/tree-construction/svg.dat @@ -0,0 +1,81 @@ +#data +
+#errors +#document-fragment +td +#document +| +| +| +| + +#data +
+#errors +#document-fragment +tr +#document +| +| +| +| + +#data + +#errors +#document-fragment +thead +#document +| +| +| + +#data + +#errors +#document-fragment +tfoot +#document +| +| +| + +#data + +#errors +#document-fragment +tbody +#document +| +| +| + +#data + +#errors +#document-fragment +tbody +#document +| +| +| + +#data + +#errors +#document-fragment +tbody +#document +| +| +| + +#data + +#errors +#document-fragment +tbody +#document +| +| +| From 0e9ed8efea41217469502098aa448327868c38a9 Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Fri, 21 Aug 2020 13:05:17 +0900 Subject: [PATCH 05/59] Test scripted encoding support This change adds a `scripted` subdirectory in the `encoding` directory, with tests for which the expected results require a system with scripting support. The change removes an existing test from the `encoding/tests1.dat` file, and moves it to the `encoding/scripted/tests1.dat` file. --- encoding/scripted/tests1.dat | 5 +++++ encoding/tests1.dat | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) create mode 100644 encoding/scripted/tests1.dat diff --git a/encoding/scripted/tests1.dat b/encoding/scripted/tests1.dat new file mode 100644 index 00000000..04d18bb9 --- /dev/null +++ b/encoding/scripted/tests1.dat @@ -0,0 +1,5 @@ +#data + +') +#encoding +iso-8859-2 diff --git a/encoding/tests1.dat b/encoding/tests1.dat index 77b0e41d..7aa9586d 100644 --- a/encoding/tests1.dat +++ b/encoding/tests1.dat @@ -356,12 +356,6 @@ iso-8859-2 #encoding iso-8859-2 -#data - - -#encoding -iso-8859-2 - #data From accc80388699156bed78de98d1e885068efd6b1b Mon Sep 17 00:00:00 2001 From: Simon Pieters Date: Fri, 26 Feb 2021 13:46:34 +0100 Subject: [PATCH 06/59] Update existing tests per spec change See https://github.com/whatwg/html/pull/6399 --- tree-construction/foreign-fragment.dat | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tree-construction/foreign-fragment.dat b/tree-construction/foreign-fragment.dat index c81ae817..3f7b2063 100644 --- a/tree-construction/foreign-fragment.dat +++ b/tree-construction/foreign-fragment.dat @@ -7,7 +7,7 @@ #document-fragment svg path #document -| +| | "X" #data @@ -17,7 +17,7 @@ svg path #document-fragment svg path #document -| +| | color="" | "X" @@ -390,7 +390,7 @@ math mtext #document-fragment math annotation-xml #document -| +|
#data
@@ -407,7 +407,7 @@ math annotation-xml #document-fragment math math #document -| +|
#data
@@ -461,12 +461,11 @@ svg desc

X

#errors 5: HTML start tag “div” in a foreign namespace context. -9: HTML start tag “h1” in a foreign namespace context. #document-fragment svg svg #document -| -| +|
+|

| "X" #data @@ -476,7 +475,7 @@ svg svg #document-fragment svg svg #document -| +|
#data
From 1a26b47a4cafc918a4d85428e6d0c3f5cfdb04cf Mon Sep 17 00:00:00 2001 From: Simon Pieters Date: Fri, 26 Feb 2021 14:04:13 +0100 Subject: [PATCH 07/59] Add new tests These currently pass in Chromium and Webkit, and fail in Gecko --- tree-construction/foreign-fragment.dat | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tree-construction/foreign-fragment.dat b/tree-construction/foreign-fragment.dat index 3f7b2063..7aff409c 100644 --- a/tree-construction/foreign-fragment.dat +++ b/tree-construction/foreign-fragment.dat @@ -556,3 +556,51 @@ svg desc svg desc #document | "X" + +#data +

+#errors +8: HTML start tag “p” in a foreign namespace context. +#document-fragment +div +#document +| +|

+ +#data +

+#errors +3: HTML start tag “p” in a foreign namespace context. +#document-fragment +svg svg +#document +|

+ +#data + +#errors +6: HTML start tag “body” in a foreign namespace context. +#document-fragment +svg svg +#document +| + +#data +

+#errors +3: HTML start tag “p” in a foreign namespace context. +#document-fragment +svg svg +#document +|

+| + +#data +

+#errors +3: HTML start tag “p” in a foreign namespace context. +#document-fragment +svg svg +#document +|

+| From 9b4a29c943b3c905e46b26569bae16de8b373516 Mon Sep 17 00:00:00 2001 From: Simon Pieters Date: Fri, 11 Jun 2021 13:23:50 +0200 Subject: [PATCH 08/59] Test

and
in SVG (#135) See https://github.com/whatwg/html/pull/6736 --- tree-construction/foreign-fragment.dat | 42 ++++++++++++++++++++++ tree-construction/tests26.dat | 48 ++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/tree-construction/foreign-fragment.dat b/tree-construction/foreign-fragment.dat index 7aff409c..d5bc22e5 100644 --- a/tree-construction/foreign-fragment.dat +++ b/tree-construction/foreign-fragment.dat @@ -576,6 +576,48 @@ svg svg #document |

+#data +

+#errors +9: HTML end tag “p” in a foreign namespace context. +#document-fragment +div +#document +| +|

+| + +#data +
+#errors +10: HTML end tag “br” in a foreign namespace context. +#document-fragment +div +#document +| +|

+| + +#data +

+#errors +4: HTML end tag “p” in a foreign namespace context. +#document-fragment +svg svg +#document +|

+| + +#data +
+#errors +5: HTML end tag “br” in a foreign namespace context. +#document-fragment +svg svg +#document +|

+| + #data #errors diff --git a/tree-construction/tests26.dat b/tree-construction/tests26.dat index de453b9c..e6f71f6a 100644 --- a/tree-construction/tests26.dat +++ b/tree-construction/tests26.dat @@ -391,3 +391,51 @@ Line 1 Col 19 Expected closing tag. Unexpected end of file. |