8000 Fill in some missing tokenizer error emissions. · html5lib/html5lib-php@1cbacc5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1cbacc5

Browse files
author
Edward Z. Yang ext:(%22)
committed
Fill in some missing tokenizer error emissions.
1 parent fd6660f commit 1cbacc5

File tree

2 files changed

+36
-22
lines changed

2 files changed

+36
-22
lines changed

library/HTML5/Tokenizer.php

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,14 +1109,6 @@ public function parse() {
11091109
Emit the current tag token. Switch to the data state. */
11101110
// not sure if this is the name we want
11111111
$this->token['self-closing'] = true;
1112-
/* When an end tag token is emitted with its self-closing flag set,
1113-
that is a parse error. */
1114-
if ($this->token['type'] === self::ENDTAG) {
1115-
$this->emitToken(array(
1116-
'type' => self::PARSEERROR,
1117-
'data' => 'self-closing-end-tag'
1118-
));
1119-
}
11201112
$this->emitToken($this->token);
11211113
$state = 'data';
11221114

@@ -2367,18 +2359,48 @@ private function characterReferenceInAttributeValue($allowed = false) {
23672359
/**
23682360
* Emits a token, passing it on to the tree builder.
23692361
*/
2370-
protected function emitToken($token, $checkStream = true) {
2362+
protected function emitToken($token, $checkStream = true, $dry = false) {
23712363
if ($checkStream) {
23722364
// Emit errors from input stream.
23732365
while ($this->stream->errors) {
23742366
$this->emitToken(array_shift($this->stream->errors), false);
23752367
}
23762368
}
2369+
if($token['type'] === self::ENDTAG && !empty($token['attr'])) {
2370+
for ($i = 0; $i < count($token['attr']); $i++) {
2371+
$this->emitToken(array(
2372+
'type' => self::PARSEERROR,
2373+
'data' => 'attributes-in-end-tag'
2374+
));
2375+
}
2376+
}
2377+
if($token['type'] === self::ENDTAG && !empty($token['self-closing'])) {
2378+
$this->emitToken(array(
2379+
'type' => self::PARSEERROR,
2380+
'data' => 'self-closing-flag-on-end-tag',
2381+
));
2382+
}
2383+
if($token['type'] === self::STARTTAG) {
2384+
// This could be changed to actually pass the tree-builder a hash
2385+
$hash = array();
2386+
foreach ($token['attr'] as $keypair) {
2387+
if (isset($hash[$keypair['name']])) {
2388+
$this->emitToken(array(
2389+
'type' => self::PARSEERROR,
2390+
'data' => 'duplicate-attribute',
2391+
));
2392+
} else {
2393+
$hash[$keypair['name']] = $keypair['value'];
2394+
}
2395+
}
2396+
}
23772397

2378-
// the current structure of attributes is not a terribly good one
2379-
$this->tree->emitToken($token);
2398+
if(!$dry) {
2399+
// the current structure of attributes is not a terribly good one
2400+
$this->tree->emitToken($token);
2401+
}
23802402

2381-
if(is_int($this->tree->content_model)) {
2403+
8000 if(!$dry && is_int($this->tree->content_model)) {
23822404
$this->content_model = $this->tree->content_model;
23832405
$this->tree->content_model = null;
23842406

tests/HTML5/TestableTokenizer.php

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,8 @@ public function parse() {
2525
}
2626
// --end mismatched interface
2727

28-
protected function emitToken($token, $checkStream = true) {
29-
if ($checkStream) {
30-
// Emit errors from input stream.
31-
while ($this->stream->errors) {
32-
$this->emitToken(array_shift($this->stream->errors), false);
33-
}
34-
}
28+
protected function emitToken($token, $checkStream = true, $dry = false) {
29+
parent::emitToken($token, $checkStream, true);
3530

3631
// tree handling code omitted
3732
switch ($token['type']) {
@@ -56,9 +51,6 @@ protected function emitToken($token, $checkStream = true) {
5651
break;
5752
case self::ENDTAG:
5853
$this->outputTokens[] = array('EndTag', $token['name']);
59-
// this is logic in the parent emitToken algorithm, but
60-
// for optimization reasons we haven't factored it out
61-
$this->content_model = self::PCDATA;
6254
break;
6355
case self::COMMENT:
6456
$this->outputTokens[] = array('Comment', $token['data']);

0 commit comments

Comments
 (0)
0