8000 Implement revised table foster parenting algo from r3382 · html5lib/html5lib-php@8e12e67 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8e12e67

Browse files
author
Edward Z. Yang ext:(%22)
committed
Implement revised table foster parenting algo from r3382
1 parent e130ac0 commit 8e12e67

File tree

2 files changed

+90
-32
lines changed

2 files changed

+90
-32
lines changed

SPEC

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1-
3354
1+
3382
2+
3+
This is the last revision of the spec this library has been audited against.
4+
5+
Excluding: 3374
26

3-
(this is the last revision of the spec this library has been audited against)

library/HTML5/TreeBuilder.php

Lines changed: 85 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class HTML5_TreeBuilder {
7070
'p','param','plaintext','pre','script','select','spacer','style',
7171
'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
7272

73+
private $pendingTableCharacters;
74+
private $pendingTableCharactersDirty;
75+
7376
// Tree construction modes
7477
const INITIAL = 0;
7578
const BEFORE_HTML = 1;
@@ -80,19 +83,20 @@ class HTML5_TreeBuilder {
8083
const IN_BODY = 6;
8184
const IN_CDATA_RCDATA = 7;
8285
const IN_TABLE = 8;
83-
const IN_CAPTION = 9;
84-
const IN_COLUMN_GROUP = 10;
85-
const IN_TABLE_BODY = 11;
86-
const IN_ROW = 12;
87-
const IN_CELL = 13;
88-
const IN_SELECT = 14;
89-
const IN_SELECT_IN_TABLE= 15;
90-
const IN_FOREIGN_CONTENT= 16;
91-
const AFTER_BODY = 17;
92-
const IN_FRAMESET = 18;
93-
const AFTER_FRAMESET = 19;
94-
const AFTER_AFTER_BODY = 20;
95-
const AFTER_AFTER_FRAMESET = 21;
86+
const IN_TABLE_TEXT = 9;
87+
const IN_CAPTION = 10;
88+
const IN_COLUMN_GROUP = 11;
89+
const IN_TABLE_BODY = 12;
90+
const IN_ROW = 13;
91+
const IN_CELL = 14;
92+
const IN_SELECT = 15;
93+
const IN_SELECT_IN_TABLE= 16;
94+
const IN_FOREIGN_CONTENT= 17;
95+
const AFTER_BODY = 18;
96+
const IN_FRAMESET = 19;
97+
const AFTER_FRAMESET = 20;
98+
const AFTER_AFTER_BODY = 21;
99+
const AFTER_AFTER_FRAMESET = 22;
96100

97101
/**
98102
* Converts a magic number to a readable name. Use for debugging.
@@ -1940,17 +1944,21 @@ public function emitToken($token, $mode = null) {
19401944
case self::IN_TABLE:
19411945
$clear = array('html', 'table');
19421946

1943-
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
1944-
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1945-
or U+0020 SPACE */
1946-
if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
1947-
/* If the current table is tainted, then act as described in
1948-
* the "anything else" entry below. */
1949-
// Note: hsivonen has a test that fails due to this line
1950-
// because he wants to convince Hixie not to do taint
1951-
!$this->currentTableIsTainted()) {
1952-
/* Append the character to the current node. */
1953-
$this->insertText($token['data']);
1947+
/* A character token */
1948+
if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
1949+
$token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
1950+
/* Let the pending table character tokens
1951+
* be an empty list of tokens. */
1952+
$this->pendingTableCharacters = "";
1953+
$this->pendingTableCharactersDirty = false;
1954+
/* Let the original insertion mode be the current
1955+
* insertion mode. */
1956+
$this->original_mode = $this->mode;
1957+
/* Switch the insertion mode to
1958+
* "in table text" and
1959+
* reprocess the token. */
1960+
$this->mode = self::IN_TABLE_TEXT;
1961+
$this->emitToken($token);
19541962

19551963
/* A comment token */
19561964
} elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
@@ -2096,6 +2104,57 @@ public function emitToken($token, $mode = null) {
20962104
}
20972105
break;
20982106

2107+
case self::IN_TABLE_TEXT:
2108+
/* A character token */
2109+
if($token['type'] === HTML5_Tokenizer::CHARACTER) {
2110+
/* Append the character token to the pending table
2111+
* character tokens list. */
2112+
$this->pendingTableCharacters .= $token['data'];
2113+
$this->pendingTableCharactersDirty = true;
2114+
} elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2115+
$this->pendingTableCharacters .= $token['data'];
2116+
/* Anything else */
2117+
} else {
2118+
if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
2119+
/* If any of the tokens in the pending table character tokens list
2120+
* are character tokens that are not one of U+0009 CHARACTER
2121+
* TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2122+
* U+0020 SPACE, then reprocess those character tokens using the
2123+
* rules given in the "anything else" entry in the in table"
2124+
* insertion mode.*/
2125+
if ($this->pendingTableCharactersDirty) {
2126+
/* Parse error. Process the token using the rules for the
2127+
* "in body" insertion mode, except that if the current
2128+
* node is a table, tbody, tfoot, thead, or tr element,
2129+
* then, whenever a node would be inserted into the current
2130+
* node, it must instead be foster parented. */
2131+
// XERROR
2132+
$old = $this->foster_parent;
2133+
$this->foster_parent = true;
2134+
$text_token = array(
2135+
'type' => HTML5_Tokenizer::CHARACTER,
2136+
'data' => $this->pendingTableCharacters,
2137+
);
2138+
$this->processWithRulesFor($text_token, self::IN_BODY);
2139+
$this->foster_parent = $old;
2140+
2141+
/* Otherwise, insert the characters given by the pending table
2142+
* character tokens list into the current node. */
2143+
} else {
2144+
$this->insertText($this->pendingTableCharacters);
2145+
}
2146+
$this->pendingTableCharacters = null;
2147+
$this->pendingTableCharactersNull = null;
2148+
}
2149+
2150+
/* Switch the insertion mode to the original insertion mode and
2151+
* reprocess the token.
2152+
*/
2153+
$this->mode = $this->original_mode;
2154+
$this->emitToken($token);
2155+
}
2156+
break;
2157+
20992158
case self::IN_CAPTION:
21002159
/* An end tag whose tag name is "caption" */
21012160
if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
@@ -3458,12 +3517,8 @@ private function getFosterParent() {
34583517
public function fosterParent($node) {
34593518
$foster_parent = $this->getFosterParent();
34603519
$table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3461-
/* When a node node is to be foster parented, the node node must be
3462-
* inserted into the foster parent element, and the current table must
3463-
* be marked as tainted. (Once the current table has been tainted,
3464-
* whitespace characters are inserted into the foster parent element
3465-
* instead of the current node.) */
3466-
$table->tainted = true;
3520+
/* When a node node is to be foster parented, the node node must be
3521+
* be inserted into the foster parent element. */
34673522
/* If the foster parent element is the parent element of the last table
34683523
* element in the stack of open elements, then node must be inserted
34693524
* immediately before the last table element in the stack of open

0 commit comments

Comments
 (0)
0