8000 Convert to use content_model semaphore instead of returning the code. · Nimbleworks/html5lib-php@8e6d677 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8e6d677

Browse files
author
Edward Z. Yang ext:(%22)
committed
Convert to use content_model semaphore instead of returning the code.
1 parent e7e7133 commit 8e6d677

File tree

2 files changed

+47
-44
lines changed

2 files changed

+47
-44
lines changed

library/HTML5/Tokenizer.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,10 +2255,11 @@ protected function emitToken($token, $checkStream = true) {
22552255
}
22562256

22572257
// the current structure of attributes is not a terribly good one
2258-
$emit = $this->tree->emitToken($token);
2258+
$this->tree->emitToken($token);
22592259

2260-
if(is_int($emit)) {
2261-
$this->content_model = $emit;
2260+
if(is_int($this->tree->content_model)) {
2261+
$this->content_model = $this->tree->content_model;
2262+
$this->tree->content_model = null;
22622263

22632264
} elseif($token['type'] === self::ENDTAG) {
22642265
$this->content_model = self::PCDATA;

library/HTML5/TreeConstructer.php

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
class HTML5_TreeConstructer {
3838
public $stack = array();
39+
public $content_model;
3940

4041
private $mode;
4142
private $original_mode;
@@ -138,7 +139,9 @@ public function emitToken($token, $mode = null) {
138139
/*
139140
$backtrace = debug_backtrace();
140141
if ($backtrace[1]['class'] !== 'HTML5_TreeConstructer') echo "--\n";
141-
echo $this->strConst($mode) . "\n ";
142+
echo $this->strConst($mode);
143+
if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
144+
echo "\n ";
142145
token_dump($token);
143146
$this->printStack();
144147
$this->printActiveFormattingElements();
@@ -206,7 +209,7 @@ public function emitToken($token, $mode = null) {
206209
/* Switch the insertion mode to "before html", then reprocess the
207210
* current token. */
208211
$this->mode = self::BEFORE_HTML;
209-
return $this->emitToken($token);
212+
$this->emitToken($token);
210213
}
211214
break;
212215

@@ -253,7 +256,7 @@ public function emitToken($token, $mode = null) {
253256
/* Switch the insertion mode to "before head", then reprocess the
254257
* current token. */
255258
$this->mode = self::BEFORE_HEAD;
256-
return $this->emitToken($token);
259+
$this->emitToken($token);
257260
}
258261
break;
259262

@@ -283,7 +286,7 @@ public function emitToken($token, $mode = null) {
283286
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
284287
/* Process the token using the rules for the "in body"
285288
* insertion mode. */
286-
return $this->processWithRulesFor($token, self::IN_BODY);
289+
$this->processWithRulesFor($token, self::IN_BODY);
287290

288291
/* A start tag token with the tag name "head" */
289292
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
@@ -309,7 +312,7 @@ public function emitToken($token, $mode = null) {
309312
'type' => HTML5_Tokenizer::STARTTAG,
310313
'attr' => array()
311314
));
312-
return $this->emitToken($token);
315+
$this->emitToken($token);
313316

314317
/* Any other end tag */
315318
} elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
@@ -327,7 +330,7 @@ public function emitToken($token, $mode = null) {
327330
'type' => HTML5_Tokenizer::STARTTAG,
328331
'attr' => array()
329332
));
330-
return $this->emitToken($token);
333+
$this->emitToken($token);
331334
}
332335
break;
333336

@@ -356,7 +359,7 @@ public function emitToken($token, $mode = null) {
356359
/* A start tag whose tag name is "html" */
357360
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
358361
$token['name'] === 'html') {
359-
return $this->processWithRulesFor($token, self::IN_BODY);
362+
$this->processWithRulesFor($token, self::IN_BODY);
360363

361364
/* A start tag whose tag name is one of: "base", "command", "link" */
362365
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
@@ -391,14 +394,14 @@ public function emitToken($token, $mode = null) {
391394

392395
/* A start tag with the tag name "title" */
393396
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
394-
return $this->insertRCDATAElement($token);
397+
$this->insertRCDATAElement($token);
395398

396399
/* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
397400
* A start tag whose tag name is one of: "noframes", "style" */
398401
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
399402
($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
400403
// XSCRIPT: Scripting flag not respected
401-
return $this->insertCDATAElement($token);
404+
$this->insertCDATAElement($token);
402405

403406
// XSCRIPT: Scripting flag disable not implemented
404407

@@ -426,7 +429,7 @@ public function emitToken($token, $mode = null) {
426429
/* 7. Switch the insertion mode to "in CDATA/RCDATA" */
427430
$this->mode = self::IN_CDATA_RCDATA;
428431
/* 5. Switch the tokeniser's content model flag to the CDATA state. */
429-
return HTML5_Tokenizer::CDATA;
432+
$this->content_model = HTML5_Tokenizer::CDATA;
430433

431434
/* An end tag with the tag name "head" */
432435
} elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
@@ -455,15 +458,15 @@ public function emitToken($token, $mode = null) {
455458
));
456459

457460
/* Then, reprocess the current token. */
458-
return $this->emitToken($token);
461+
$this->emitToken($token);
459462
}
460463
break;
461464

462465
case self::IN_HEAD_NOSCRIPT:
463466
if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
464467
// parse error
465468
} elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
466-
return $this->processWithRulesFor($token, self::IN_BODY);
469+
$this->processWithRulesFor($token, self::IN_BODY);
467470
} elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
468471
/* Pop the current node (which will be a noscript element) from the
469472
* stack of open elements; the new current node will be a head
@@ -477,7 +480,7 @@ public function emitToken($token, $mode = null) {
477480
($token['type'] === HTML5_Tokenizer::STARTTAG && (
478481
$token['name'] === 'link' || $token['name'] === 'meta' ||
479482
$token['name'] === 'noframes' || $token['name'] === 'style'))) {
480-
return $this->processWithRulesFor($token, self::IN_HEAD);
483+
$this->processWithRulesFor($token, self::IN_HEAD);
481484
// inverted logic
482485
} elseif (
483486
($token['type'] === HTML5_Tokenizer::STARTTAG && (
@@ -491,7 +494,7 @@ public function emitToken($token, $mode = null) {
491494
'type' => HTML5_Tokenizer::ENDTAG,
492495
'name' => 'noscript',
493496
));
494-
return $this->emitToken($token);
497+
$this->emitToken($token);
495498
}
496499
break;
497500

@@ -516,7 +519,7 @@ public function emitToken($token, $mode = null) {
516519
// parse error
517520

518521
} elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
519-
return $this->processWithRulesFor($token, self::IN_BODY);
522+
$this->processWithRulesFor($token, self::IN_BODY);
520523

521524
/* A start tag token with the tag name "body" */
522525
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
@@ -544,9 +547,8 @@ public function emitToken($token, $mode = null) {
544547
/* Push the node pointed to by the head element pointer onto the
545548
* stack of open elements. */
546549
$this->stack[] = $this->head_pointer;
547-
$out = $this->processWithRulesFor($token, self::IN_HEAD);
550+
$this->processWithRulesFor($token, self::IN_HEAD);
548551
array_pop($this->stack);
549-
return $out;
550552

551553
// inversion of specification
552554
} elseif(
@@ -564,7 +566,7 @@ public function emitToken($token, $mode = null) {
564566
'attr' => array()
565567
));
566568
$this->flag_frameset_ok = true;
567-
return $this->emitToken($token);
569+
$this->emitToken($token);
568570
}
569571
break;
570572

@@ -616,7 +618,7 @@ public function emitToken($token, $mode = null) {
616618
case 'script': case 'style': case 'title':
617619
/* Process the token as if the insertion mode had been "in
618620
head". */
619-
return $this->processWithRulesFor($token, self::IN_HEAD);
621+
$this->processWithRulesFor($token, self::IN_HEAD);
620622
break;
621623

622624
/* A start tag token with the tag name "body" */
@@ -839,7 +841,7 @@ public function emitToken($token, $mode = null) {
839841
/* Insert an HTML element for the token. */
840842
$this->insertElement($token);
841843

842-
return HTML5_Tokenizer::PLAINTEXT;
844+
$this->content_model = HTML5_Tokenizer::PLAINTEXT;
843845
break;
844846

845847
// more diversions
@@ -1049,7 +1051,7 @@ public function emitToken($token, $mode = null) {
10491051
/* Parse error. Change the token's tag name to "img" and
10501052
reprocess it. (Don't ask.) */
10511053
$token['name'] = 'img';
1052-
return $this->emitToken($token);
1054+
$this->emitToken($token);
10531055
break;
10541056

10551057
/* A start tag whose tag name is "isindex" */
@@ -1178,7 +1180,7 @@ public function emitToken($token, $mode = null) {
11781180

11791181
/* Switch the tokeniser's content model flag to the
11801182
RCDATA state. */
1181-
return HTML5_Tokenizer::RCDATA;
1183+
$this->content_model = HTML5_Tokenizer::RCDATA;
11821184
break;
11831185

11841186
/* A start tag token whose tag name is "xmp" */
@@ -1188,16 +1190,17 @@ public function emitToken($token, $mode = null) {
11881190

11891191
$this->flag_frameset_ok = false;
11901192

1191-
return $this->insertCDATAElement($token);
1193+
$this->insertCDATAElement($token);
11921194
break;
11931195

11941196
case 'iframe':
11951197
$this->flag_frameset_ok = false;
1196-
return $this->insertCDATAElement($token);
1198+
$this->insertCDATAElement($token);
1199+
break;
11971200

11981201
case 'noembed': case 'noscript':
11991202
// XSCRIPT: should check scripting flag
1200-
return $this->insertCDATAElement($token);
1203+
$this->insertCDATAElement($token);
12011204
break;
12021205

12031206
/* A start tag whose tag name is "select" */
@@ -1319,7 +1322,7 @@ public function emitToken($token, $mode = null) {
13191322
'type' => HTML5_Tokenizer::ENDTAG
13201323
));
13211324

1322-
if (!$this->ignored) return $this->emitToken($token);
1325+
if (!$this->ignored) $this->emitToken($token);
13231326
break;
13241327

13251328
case 'address': case 'article': case 'aside': case 'blockquote':
@@ -1844,7 +1847,7 @@ public function emitToken($token, $mode = null) {
18441847
'attr' => array()
18451848
));
18461849

1847-
return $this->emitToken($token);
1850+
$this->emitToken($token);
18481851

18491852
/* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
18501853
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
@@ -1868,7 +1871,7 @@ public function emitToken($token, $mode = null) {
18681871
'attr' => array()
18691872
));
18701873

1871-
return $this->emitToken($token);
1874+
$this->emitToken($token);
18721875

18731876
/* A start tag whose tag name is "table" */
18741877
} elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
@@ -1881,7 +1884,7 @@ public function emitToken($token, $mode = null) {
18811884
'type' => HTML5_Tokenizer::ENDTAG
18821885
));
18831886

1884-
if (!$this->ignored) return $this->emitToken($token);
1887+
if (!$this->ignored) $this->emitToken($token);
18851888

18861889
/* An end tag whose tag name is "table" */
18871890
} elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
@@ -1991,7 +1994,7 @@ public function emitToken($token, $mode = null) {
19911994
'type' => HTML5_Tokenizer::ENDTAG
19921995
));
19931996

1994-
if (!$this->ignored) return $this->emitToken($token);
1997+
if (!$this->ignored) $this->emitToken($token);
19951998

19961999
/* An end tag whose tag name is one of: "body", "col", "colgroup",
19972000
"html", "tbody", "td", "tfoot", "th", "thead", "tr" */
@@ -2072,7 +2075,7 @@ public function emitToken($token, $mode = null) {
20722075
'type' => HTML5_Tokenizer::ENDTAG
20732076
));
20742077

2075-
if (!$this->ignored) return $this->emitToken($token);
2078+
if (!$this->ignored) $this->emitToken($token);
20762079
}
20772080
break;
20782081

@@ -2100,7 +2103,7 @@ public function emitToken($token, $mode = null) {
21002103 10000
'attr' => array()
21012104
));
21022105

2103-
return $this->emitToken($token);
2106+
$this->emitToken($token);
21042107

21052108
/* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
21062109
} elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
@@ -2148,7 +2151,7 @@ public function emitToken($token, $mode = null) {
21482151
'type' => HTML5_Tokenizer::ENDTAG
21492152
));
21502153

2151-
return $this->emitToken($token);
2154+
$this->emitToken($token);
21522155
}
21532156

21542157
/* An end tag whose tag name is one of: "body", "caption", "col",
@@ -2215,7 +2218,7 @@ public function emitToken($token, $mode = null) {
22152218
'name' => 'tr',
22162219
'type' => HTML5_Tokenizer::ENDTAG
22172220
));
2218-
if (!$this->ignored) return $this->emitToken($token);
2221+
if (!$this->ignored) $this->emitToken($token);
22192222

22202223
/* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
22212224
} elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
@@ -2235,7 +2238,7 @@ public function emitToken($token, $mode = null) {
22352238
'type' => HTML5_Tokenizer::ENDTAG
22362239
));
22372240

2238-
return $this->emitToken($token);
2241+
$this->emitToken($token);
22392242
}
22402243

22412244
/* An end tag whose tag name is one of: "body", "caption", "col",
@@ -2303,7 +2306,7 @@ public function emitToken($token, $mode = null) {
23032306
token. */
23042307
} else {
23052308
$this->closeCell();
2306-
return $this->emitToken($token);
2309+
$this->emitToken($token);
23072310
}
23082311

23092312
/* An end tag whose tag name is one of: "body", "caption", "col",
@@ -2328,7 +2331,7 @@ public function emitToken($token, $mode = null) {
23282331
token. */
23292332
} else {
23302333
$this->closeCell();
2331-
return $this->emitToken($token);
2334+
$this->emitToken($token);
23322335
}
23332336

23342337
/* Anything else */
@@ -2577,7 +2580,7 @@ public function emitToken($token, $mode = null) {
25772580
/* Parse error. Set the insertion mode to "in body" and reprocess
25782581
the token. */
25792582
$this->mode = self::IN_BODY;
2580-
return $this->emitToken($token);
2583+
$this->emitToken($token);
25812584
}
25822585
break;
25832586

@@ -3121,14 +3124,14 @@ private function insertCDATAElement($token) {
31213124
$this->insertElement($token);
31223125
$this->original_mode = $this->mode;
31233126
$this->mode = self::IN_CDATA_RCDATA;
3124-
return HTML5_Tokenizer::CDATA;
3127+
$this->content_model = HTML5_Tokenizer::CDATA;
31253128
}
31263129

31273130
private function insertRCDATAElement($token) {
31283131
$this->insertElement($token);
31293132
$this->original_mode = $this->mode;
31303133
$this->mode = self::IN_CDATA_RCDATA;
3131-
return HTML5_Tokenizer::RCDATA;
3134+
$this->content_model = HTML5_Tokenizer::RCDATA;
31323135
}
31333136

31343137
private function getAttr($token, $key) {
@@ -3137,7 +3140,6 @@ private function getAttr($token, $key) {
31373140
foreach ($token['attr'] as $keypair) {
31383141
if ($keypair['name'] === $key) $ret = $keypair['value'];
31393142
}
3140-
var_dump($ret);
31413143
return $ret;
31423144
}
31433145

0 commit comments

Comments
 (0 2F80 )
0