8000 Add quirks handling, update Python doctype list, move some tests to 99. · Nimbleworks/html5lib-php@4e149f1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4e149f1

Browse files
author
Edward Z. Yang ext:(%22)
committed
Add quirks handling, update Python doctype list, move some tests to 99.
1 parent 9799e6f commit 4e149f1

File tree

1 file changed

+129
-5
lines changed

1 file changed

+129
-5
lines changed

library/HTML5/TreeConstructer.php

Lines changed: 129 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
// Tags for FIX ME!!!: (in order of priority)
3030
// XXX - should be fixed NAO!
3131
// XFOREIGN - with regards to SVG and MathML
32-
// XQUIRKS - with regards to quirks mode
3332
// XERROR - with regards to parse errors
3433
// XSCRIPT - with regards to scripting mode
3534
// XENCODING - with regards to encoding (for reparsing tests)
@@ -203,14 +202,139 @@ public function emitToken($token, $mode = null) {
203202
// It looks like libxml's not actually *able* to express this case.
204203
// So... don't. XXX
205204
}
206-
// XQUIRKS: Implement quirks mode
205+
$public = is_null($token['public']) ? false : strtolower($token['public']);
206+
$system = is_null($token['system']) ? false : strtolower($token['system']);
207+
$publicStartsWithForQuirks = array(
208+
"+//silmaril//dtd html pro v0r11 19970101//",
209+
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
210+
"-//as//dtd html 3.0 aswedit + extensions//",
211+
"-//ietf//dtd html 2.0 level 1//",
212+
"-//ietf//dtd html 2.0 level 2//",
213+
"-//ietf//dtd html 2.0 strict level 1//",
214+
"-//ietf//dtd html 2.0 strict level 2//",
215+
"-//ietf//dtd html 2.0 strict//",
216+
"-//ietf//dtd html 2.0//",
217+
"-//ietf//dtd html 2.1e//",
218+
"-//ietf//dtd html 3.0//",
219+
"-//ietf//dtd html 3.2 final//",
220+
"-//ietf//dtd html 3.2//",
221+
"-//ietf//dtd html 3//",
222+
"-//ietf//dtd html level 0//",
223+
"-//ietf//dtd html level 1//",
224+
"-//ietf//dtd html level 2//",
225+
"-//ietf//dtd html level 3//",
226+
"-//ietf//dtd html strict level 0//",
227+
"-//ietf//dtd html strict level 1//",
228+
"-//ietf//dtd html strict level 2//",
229+
"-//ietf//dtd html strict level 3//",
230+
"-//ietf//dtd html strict//",
231+
"-//ietf//dtd html//",
232+
"-//metrius//dtd metrius presentational//",
233+
"-//microsoft//dtd internet explorer 2.0 html strict//",
234+
"-//microsoft//dtd internet explorer 2.0 html//",
235+
"-//microsoft//dtd internet explorer 2.0 tables//",
236+
"-//microsoft//dtd internet explorer 3.0 html strict//",
237+
"-//microsoft//dtd internet explorer 3.0 html//",
238+
"-//microsoft//dtd internet explorer 3.0 tables//",
239+
"-//netscape comm. corp.//dtd html//",
240+
"-//netscape comm. corp.//dtd strict html//",
241+
"-//o'reilly and associates//dtd html 2.0//",
242+
"-//o'reilly and associates//dtd html extended 1.0//",
243+
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
244+
"-//spyglass//dtd html 2.0 extended//",
245+
"-//sq//dtd html 2.0 hotmetal + extensions//",
246+
"-//sun microsystems corp.//dtd hotjava html//",
247+
"-//sun microsystems corp.//dtd hotjava strict html//",
248+
"-//w3c//dtd html 3 1995-03-24//",
249+
"-//w3c//dtd html 3.2 draft//",
250+
"-//w3c//dtd html 3.2 final//",
251+
"-//w3c//dtd html 3.2//",
252+
"-//w3c//dtd html 3.2s draft//",
253+
"-//w3c//dtd html 4.0 frameset//",
254+
"-//w3c//dtd html 4.0 transitional//",
255+
"-//w3c//dtd html experimental 19960712//",
256+
"-//w3c//dtd html experimental 970421//",
257+
"-//w3c//dtd w3 html//",
258+
"-//w3o//dtd w3 html 3.0//",
259+
"-//webtechs//dtd mozilla html 2.0//",
260+
"-//webtechs//dtd mozilla html//",
261+
);
262+
$publicSetToForQuirks = array(
263+
"-//w3o//dtd w3 html strict 3.0//",
264+
"-/w3c/dtd html 4.0 transitional/en",
265+
"html",
266+
);
267+
$publicStartsWithAndSystemForQuirks = array(
268+
"-//w3c//dtd html 4.01 frameset//",
269+
"-//w3c//dtd html 4.01 transitional//",
270+
);
271+
$publicStartsWithForLimitedQuirks = array(
272+
"-//w3c//dtd xhtml 1.0 frameset//",
273+
"-//w3c//dtd xhtml 1.0 transitional//",
274+
);
275+
$publicStartsWithAndSystemForLimitedQuirks = array(
276+
"-//w3c//dtd html 4.01 frameset//",
277+
"-//w3c//dtd html 4.01 transitional//",
278+
);
279+
// first, do easy checks
280+
if (
281+
!empty($token['force-quirks']) ||
282+
strtolower($token['name']) !== 'html'
283+
) {
284+
$this->quirks_mode = self::QUIRKS_MODE;
285+
} else {
286+
do {
287+
if ($system) {
288+
foreach ($publicStartsWithAndSystemForQuirks as $x) {
289+
if (strncmp($public, $x, strlen($x)) === 0) {
290+
$this->quirks_mode = self::QUIRKS_MODE;
291+
break;
292+
}
293+
}
294+
if (!is_null($this->quirks_mode)) break;
295+
foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
296+
if (strncmp($public, $x, strlen($x)) === 0) {
297+
$this->quirks_mode = self::LIMITED_QUIRKS_MODE;
298+
break;
299+
}
300+
}
301+
if (!is_null($this->quirks_mode)) break;
302+
}
303+
foreach ($publicSetToForQuirks as $x) {
304+
if ($public === $x) {
305+
$this->quirks_mode = self::QUIRKS_MODE;
306+
break;
307+
}
308+
}
309+
if (!is_null($this->quirks_mode)) break;
310+
foreach ($publicStartsWithForLimitedQuirks as $x) {
311+
if (strncmp($public, $x, strlen($x)) === 0) {
312+
$this->quirks_mode = self::LIMITED_QUIRKS_MODE;
313+
}
314+
}
315+
if (!is_null($this->quirks_mode)) break;
316+
if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
317+
$this->quirks_mode = self::QUIRKS_MODE;
318+
break;
319+
}
320+
foreach ($publicStartsWithForQuirks as $x) {
321+
if (strncmp($public, $x, strlen($x)) === 0) {
322+
$this->quirks_mode = self::QUIRKS_MODE;
323+
break;
324+
}
325+
}
326+
if (is_null($this->quirks_mode)) {
327+
$this->quirks_mode = self::NO_QUIRKS;
328+
}
329+
} while (false);
330+
}
207331
$this->mode = self::BEFORE_HTML;
208332
} else {
209333
// parse error
210-
// XQUIRKS: Implement quirks mode
211334
/* Switch the insertion mode to "before html", then reprocess the
212335
* current token. */
213336
$this->mode = self::BEFORE_HTML;
337+
$this->quirks_mode = self::QUIRKS_MODE;
214338
$this->emitToken($token);
215339
}
216340
break;
@@ -979,10 +1103,10 @@ public function emitToken($token, $mode = null) {
9791103

9801104
/* A start tag whose tag name is "table" */
9811105
case 'table':
982-
// XQUIRKS: If NOT in quirks mode
9831106
/* If the stack of open elements has a p element in scope,
9841107
then act as if an end tag with the tag name p had been seen. */
985-
if($this->elementInScope('p')) {
1108+
if($this->quirks_mode !== self::QUIRKS_MODE &&
1109+
$this->elementInScope('p')) {
9861110
$this->emitToken(array(
9871111
'name' => 'p',
9881112
'type' => HTML5_Tokenizer::ENDTAG

0 commit comments

Comments
 (0)
0