8000 Updates to match current spec including end tag handling for inForeign · akhil-vader/html5lib-python@ae51b0a · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ae51b0a

Browse files
author
James Graham
committed
Updates to match current spec including end tag handling for inForeign
1 parent 1221a95 commit ae51b0a

File tree

1 file changed

+70
-52
lines changed

1 file changed

+70
-52
lines changed

html5lib/html5parser.py

Lines changed: 70 additions & 52 deletions
< 10000 tr class="diff-line-row">
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ def wrapped(self, *args, **kwargs):
461461
return function(self, *args, **kwargs)
462462
return wrapped
463463

464+
def getMetaclass(use_metaclass, metaclass_func):
465+
if use_metaclass:
466+
return method_decorator_metaclass(metaclass_func)
467+
else:
468+
return type
469+
464470
class Phase(object):
465471
"""Base class for helper object that implements each phase of processing
466472
"""
@@ -475,8 +481,7 @@ class Phase(object):
475481
# * EndTag
476482
# - endTag* methods
477483

478-
if debug_log:
479-
__metaclass__ = method_decorator_metaclass(log)
484+
__metaclass__ = getMetaclass(debug_log, log)
480485

481486
def __init__(self, parser, tree):
482487
self.parser = parser
@@ -851,6 +856,9 @@ def processCharacters(self, token):
851856
self.anythingElse()
852857
self.parser.phase.processCharacters(token)
853858

859+
def startTagHtml(self, token):
860+
self.parser.phases["inBody"].processStartTag(token)
861+
854862
def startTagBody(self, token):
855863
self.parser.framesetOK = False
856864
self.tree.insertElement(token)
@@ -956,7 +964,7 @@ def __init__(self, parser, tree):
956964
(headingElements, self.endTagHeading),
957965
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
958966
"strike", "strong", "tt", "u"), self.endTagFormatting),
959-
(("applet", "button", "marquee", "object"), self.endTagAppletButtonMarqueeObject),
967+
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
960968
("br", self.endTagBr),
961969
])
962970
self.endTagHandler.default = self.endTagOther
@@ -1120,7 +1128,6 @@ def startTagButton(self, token):
11201128
else:
11211129
self.tree.reconstructActiveFormattingElements()
11221130
self.tree.insertElement(token)
1123-
self.tree.activeFormattingElements.append(Marker)
11241131
self.parser.framesetOK = False
11251132

11261133
def startTagAppletMarqueeObject(self, token):
@@ -1395,10 +1402,9 @@ def endTagFormatting(self, token):
13951402
# Step 1 paragraph 1
13961403
formattingElement = self.tree.elementInActiveFormattingElements(
13971404
token["name"])
1398-
if not formattingElement or (formattingElement in
1399-
self.tree.openElements and
1400-
not self.tree.elementInScope(
1401-
formattingElement.name)):
1405+
if (not formattingElement or
1406+
(formattingElement in self.tree.openElements and
1407+
not self.tree.elementInScope(formattingElement.name))):
14021408
self.parser.parseError("adoption-agency-1.1", {"name": token["name"]})
14031409
return
14041410

@@ -1421,7 +1427,6 @@ def endTagFormatting(self, token):
14211427
specialElements | scopingElements):
14221428
furthestBlock = element
14231429
break
1424-
14251430
# Step 3
14261431
if furthestBlock is None:
14271432
element = self.tree.openElements.pop()
@@ -1487,7 +1492,12 @@ def endTagFormatting(self, token):
14871492
# lastNode
14881493
if lastNode.parent:
14891494
lastNode.parent.removeChild(lastNode)
1490-
commonAncestor.appendChild(lastNode)
1495+
1496+
if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
1497+
parent, insertBefore = self.tree.getTableMisnestedNodePosition()
1498+
parent.insertBefore(lastNode, insertBefore)
1499+
else:
1500+
commonAncestor.appendChild(lastNode)
14911501

14921502
# Step 8
14931503
clone = formattingElement.cloneNode()
@@ -1507,7 +1517,7 @@ def endTagFormatting(self, token):
15071517
self.tree.openElements.insert(
15081518
self.tree.openElements.index(furthestBlock) + 1, clone)
15091519

1510-
def endTagAppletButtonMarqueeObject(self, token):
1520+
def endTagAppletMarqueeObject(self, token):
15111521
if self.tree.elementInScope(token["name"]):
15121522
self.tree.generateImpliedEndTags()
15131523
if self.tree.openElements[-1].name != token["name"]:
@@ -1532,7 +1542,6 @@ def endTagOther(self, token):
15321542
self.tree.generateImpliedEndTags(exclude=token["name"])
15331543
if self.tree.openElements[-1].name != token["name"]:
15341544
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1535-
print self.tree.openElements
15361545
while self.tree.openElements.pop() != node:
15371546
pass
15381547
break
@@ -1957,6 +1966,7 @@ def startTagTableOther(self, token):
19571966
self.parser.phase.processStartTag(token)
19581967
else:
19591968
# innerHTML case
1969+
assert self.parser.innerHTML
19601970
self.parser.parseError()
19611971

19621972
def startTagOther(self, token):
@@ -1981,6 +1991,7 @@ def endTagTable(self, token):
19811991
self.parser.phase.processEndTag(token)
19821992
else:
19831993
# innerHTML case
1994+
assert self.parser.innerHTML
19841995
self.parser.parseError()
19851996

19861997
def endTagIgnore(self, token):
@@ -2072,6 +2083,7 @@ def endTagTableRowGroup(self, token):
20722083
self.parser.phase.processEndTag(token)
20732084
else:
20742085
# innerHTML case
2086+
assert self.parser.innerHTML
20752087
self.parser.parseError()
20762088

20772089
def endTagIgnore(self, token):
@@ -2120,6 +2132,7 @@ def startTagTableOther(self, token):
21202132
self.parser.phase.processStartTag(token)
21212133
else:
21222134
# innerHTML case
2135+
assert self.parser.innerHTML
21232136
self.parser.parseError()
21242137

21252138
def startTagOther(self, token):
@@ -2181,9 +2194,7 @@ def __init__(self, parser, tree):
21812194
self.endTagHandler = utils.MethodDispatcher([
21822195
("option", self.endTagOption),
21832196
("optgroup", self.endTagOptgroup),
2184-
("select", self.endTagSelect),
2185-
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td",
2186-
"th"), self.endTagTableElements)
2197+
("select", self.endTagSelect)
21872198
])
21882199
self.endTagHandler.default = self.endTagOther
21892200

@@ -2255,15 +2266,9 @@ def endTagSelect(self, token):
22552266
self.parser.resetInsertionMode()
22562267
else:
22572268
# innerHTML case
2269+
assert self.parser.innerHTML
22582270
self.parser.parseError()
22592271

2260-
def endTagTableElements(self, token):
2261-
self.parser.parseError("unexpected-end-tag-in-select",
2262-
{"name": token["name"]})
2263-
if self.tree.elementInScope(token["name"], variant="table"):
2264-
self.endTagSelect("select")
2265-
self.parser.phase.processEndTag(token)
2266-
22672272
def endTagOther(self, token):
22682273
self.parser.parseError("unexpected-end-tag-in-select",
22692274
{"name": token["name"]})
@@ -2322,16 +2327,13 @@ def __init__(self, parser, tree):
23222327
Phase.__init__(self, parser, tree)
23232328

23242329
def nonHTMLElementInScope(self):
2330+
rv = False
23252331
for element in self.tree.openElements[::-1]:
2326-
if element.namespace == self.tree.defaultNamespace:
2327-
return self.tree.elementInScope(element)
2328-
assert False
2329-
for item in self.tree.openElements[::-1]:
2330-
if item.namespace == self.tree.defaultNamespace:
2331-
return True
2332-
elif item.nameTuple in scopingElements:
2333-
return False
2334-
return False
2332+
if element.namespace != self.tree.defaultNamespace:
2333+
rv = self.tree.elementInScope(element.name)
2334+
if rv:
2335+
break
2336+
return rv
23352337

23362338
def adjustSVGTagNames(self, token):
23372339
replacements = {"altglyph":"altGlyph",
@@ -2383,21 +2385,23 @@ def proc E377 essEOF(self):
23832385

23842386
def processStartTag(self, token):
23852387
currentNode = self.tree.openElements[-1]
2386-
if (currentNode.namespace == self.tree.defaultNamespace or
2387-
(currentNode.namespace == namespaces["mathml"] and
2388+
currentNodeNamespace = currentNode.namespace
2389+
currentNodeName = currentNode.name
2390+
if (currentNodeNamespace == self.tree.defaultNamespace or
2391+
(currentNodeNamespace == namespaces["mathml"] and
23882392
token["name"] not in frozenset(["mglyph", "malignmark"]) and
2389-
currentNode.name in frozenset(["mi", "mo", "mn",
2393+
currentNodeName in frozenset(["mi", "mo", "mn",
23902394
"ms", "mtext"])) or
2391-
(currentNode.namespace == namespaces["mathml"] and
2392-
currentNode.name == "annotation-xml" and
2395+
(currentNodeNamespace == namespaces["mathml"] and
2396+
currentNodeName == "annotation-xml" and
23932397
token["name"] == "svg") or
23942398
(currentNode.namespace == namespaces["svg"] and
23952399
currentNode.name in frozenset(["foreignObject",
23962400
"desc", "title"])
23972401
)):
23982402
assert self.parser.secondaryPhase != self
23992403
self.parser.secondaryPhase.processStartTag(token)
2400-
if self.parser.phase == self and self.nonHTMLElementInScope():
2404+
if self.parser.phase == self and not self.nonHTMLElementInScope():
24012405
self.parser.phase = self.parser.secondaryPhase
24022406
elif token["name"] in self.breakoutElements:
24032407
self.parser.parseError("unexpected-html-element-in-foreign-content",
@@ -2421,10 +2425,34 @@ def processStartTag(self, token):
24212425
token["selfClosingAcknowledged"] = True
24222426

24232427
def processEndTag(self, token):
2424-
self.adjustSVGTagNames(token)
2425-
self.parser.secondaryPhase.processEndTag(token)
2426-
if self.parser.phase == self and self.nonHTMLElementInScope():
2427-
self.parser.phase = self.parser.secondaryPhase
2428+
if self.tree.openElements[-1].namespace != self.tree.defaultNamespace:
2429+
nodeIndex = len(self.tree.openElements) - 1
2430+
node = self.tree.openElements[-1]
2431+
if node.name != token["name"]:
2432+
self.parser.parseError("unexpected-end-tag", token["name"])
2433+
2434+
while True:
2435+
if node.name == token["name"]:
2436+
popped = self.tree.openElements.pop()
2437+
while popped != node:
2438+
popped = self.tree.openElements.pop()
2439+
assert self.tree.openElements
2440+
break
2441+
nodeIndex -= 1
2442+
2443+
node = self.tree.openElements[nodeIndex]
2444+
if node.namespace == self.tree.defaultNamespace:
2445+
assert self.parser.secondaryPhase != self
2446+
self.parser.secondaryPhase.processEndTag(token)
2447+
if self.parser.phase == self and not self.nonHTMLElementInScope():
2448+
self.parser.phase = self.parser.secondaryPhase
2449+
break
2450+
2451+
else:
2452+
assert self.parser.secondaryPhase != self
2453+
self.parser.secondaryPhase.processEndTag(token)
2454+
if self.parser.phase == self and not self.nonHTMLElementInScope():
2455+
self.parser.phase = self.parser.secondaryPhase
24282456

24292457
class AfterBodyPhase(Phase):
24302458
def __init__(self, parser, tree):
@@ -2487,8 +2515,7 @@ def __init__(self, parser, tree):
24872515
self.startTagHandler.default = self.startTagOther
24882516

24892517
self.endTagHandler = utils.MethodDispatcher([
2490-
("frameset", self.endTagFrameset),
2491-
("noframes", self.endTagNoframes)
2518+
("frameset", self.endTagFrameset)
24922519
])
24932520
self.endTagHandler.default = self.endTagOther
24942521

@@ -2527,9 +2554,6 @@ def endTagFrameset(self, token):
25272554
# "frameset" element (anymore) then switch.
25282555
self.parser.phase = self.parser.phases["afterFrameset"]
25292556

2530-
def endTagNoframes(self, token):
2531-
self.parser.phases["inBody"].processEndTag(token)
2532-
25332557
def endTagOther(self, token):
25342558
self.parser.parseError("unexpected-end-tag-in-frameset",
25352559
{"name": token["name"]})
@@ -2632,8 +2656,6 @@ def processSpaceCharacters(self, token):
26322656

26332657
def processCharacters(self, token):
26342658
self.parser.parseError("expected-eof-but-got-char")
2635-
self.parser.phase = self.parser.phases["inBody"]
2636-
self.parser.phase.processCharacters(token)
26372659

26382660
def startTagHtml(self, token):
26392661
self.parser.phases["inBody"].processStartTag(token)
@@ -2644,14 +2666,10 @@ def startTagNoFrames(self, token):
26442666
def startTagOther(self, token):
26452667
self.parser.parseError("expected-eof-but-got-start-tag",
26462668
{"name": token["name"]})
2647-
self.parser.phase = self.parser.phases["inBody"]
2648-
self.parser.phase.processStartTag(token)
26492669

26502670
def processEndTag(self, token):
26512671
self.parser.parseError("expected-eof-but-got-end-tag",
26522672
{"name": token["name"]})
2653-
self.parser.phase = self.parser.phases["inBody"]
2654-
self.parser.phase.processEndTag(token)
26552673

26562674
def impliedTagToken(name, type="EndTag", attributes = None,
26572675
selfClosing = False):

0 commit comments

Comments
 (0)
0