|
18 | 18 | from constants import scopingElements, formattingElements, specialElements
|
19 | 19 | from constants import headingElements, tableInsertModeElements
|
20 | 20 | from constants import cdataElements, rcdataElements, voidElements
|
21 |
| -from constants import tokenTypes |
| 21 | +from constants import tokenTypes, namespaces |
22 | 22 |
|
23 | 23 | def parse(doc, treebuilderName="simpletree", encoding=None):
|
24 | 24 | tb = treebuilders.getTreeBuilder(treebuilderName)
|
@@ -68,6 +68,7 @@ def __init__(self, tree = simpletree.TreeBuilder,
|
68 | 68 | "inCell": InCellPhase(self, self.tree),
|
69 | 69 | "inSelect": InSelectPhase(self, self.tree),
|
70 | 70 | "inSelectInTable": InSelectInTablePhase(self, self.tree),
|
| 71 | + "inForeignContent": InForeignContentPhase(self, self.tree), |
71 | 72 | "afterBody": AfterBodyPhase(self, self.tree),
|
72 | 73 | "inFrameset": InFramesetPhase(self, self.tree),
|
73 | 74 | "afterFrameset": AfterFramesetPhase(self, self.tree),
|
@@ -109,9 +110,8 @@ def _parse(self, stream, innerHTML=False, container="div",
|
109 | 110 | self.innerHTML = False
|
110 | 111 | self.phase = self.phases["initial"]
|
111 | 112 |
|
112 |
| - # We only seem to have InBodyPhase testcases where the following is |
113 |
| - # relevant ... need others too |
114 | 113 | self.lastPhase = None
|
| 114 | + self.secondaryPhase = None |
115 | 115 |
|
116 | 116 | self.beforeRCDataPhase = None
|
117 | 117 |
|
@@ -196,6 +196,105 @@ def normalizeToken(self, token):
|
196 | 196 |
|
197 | 197 | return token
|
198 | 198 |
|
| 199 | + def adjustMathMLAttributes(self, token): |
| 200 | + replacements = {"definitionurl":"definitionURL"} |
| 201 | + for k,v in replacements.iteritems(): |
| 202 | + if k in token["data"]: |
| 203 | + token["data"][v] = token["data"][k] |
| 204 | + del token["data"][k] |
| 205 | + |
| 206 | + def adjustSVGAttributes(self, token): |
| 207 | + replacements = { |
| 208 | + "attributename" : "attributeName", |
| 209 | + "attributetype" : "attributeType", |
| 210 | + "basefrequency" : "baseFrequency", |
| 211 | + "baseprofile" : "baseProfile", |
| 212 | + "calcmode" : "calcMode", |
| 213 | + "clippathunits" : "clipPathUnits", |
| 214 | + "contentscripttype" : "contentScriptType", |
| 215 | + "contentstyletype" : "contentStyleType", |
| 216 | + "diffuseconstant" : "diffuseConstant", |
| 217 | + "edgemode" : "edgeMode", |
| 218 | + "externalresourcesrequired" : "externalResourcesRequired", |
| 219 | + "filterres" : "filterRes", |
| 220 | + "filterunits" : "filterUnits", |
| 221 | + "glyphref" : "glyphRef", |
| 222 | + "gradienttransform" : "gradientTransform", |
| 223 | + "gradientunits" : "gradientUnits", |
| 224 | + "kernelmatrix" : "kernelMatrix", |
| 225 | + "kernelunitlength" : "kernelUnitLength", |
| 226 | + "keypoints" : "keyPoints", |
| 227 | + "keysplines" : "keySplines", |
| 228 | + "keytimes" : "keyTimes", |
| 229 | + "lengthadjust" : "lengthAdjust", |
| 230 | + "limitingconeangle" : "limitingConeAngle", |
| 231 | + "markerheight" : "markerHeight", |
| 232 | + "markerunits" : "markerUnits", |
| 233 | + "markerwidth" : "markerWidth", |
| 234 | + "maskcontentunits" : "maskContentUnits", |
| 235 | + "maskunits" : "maskUnits", |
| 236 | + "numoctaves" : "numOctaves", |
| 237 | + "pathlength" : "pathLength", |
| 238 | + "patterncontentunits" : "patternContentUnits", |
| 239 | + "patterntransform" : "patternTransform", |
| 240 | + "patternunits" : "patternUnits", |
| 241 | + "pointsatx" : "pointsAtX", |
| 242 | + "pointsaty" : "pointsAtY", |
| 243 | + "pointsatz" : "pointsAtZ", |
| 244 | + "preservealpha" : "preserveAlpha", |
| 245 | + "preserveaspectratio" : "preserveAspectRatio", |
| 246 | + "primitiveunits" : "primitiveUnits", |
| 247 | + "refx" : "refX", |
| 248 | + "refy" : "refY", |
| 249 | + "repeatcount" : "repeatCount", |
| 250 | + "repeatdur" : "repeatDur", |
| 251 | + "requiredextensions" : "requiredExtensions", |
| 252 | + "requiredfeatures" : "requiredFeatures", |
| 253 | + "specularconstant" : "specularConstant", |
| 254 | + "specularexponent" : "specularExponent", |
| 255 | + "spreadmethod" : "spreadMethod", |
| 256 | + "startoff
10000
set" : "startOffset", |
| 257 | + "stddeviation" : "stdDeviation", |
| 258 | + "stitchtiles" : "stitchTiles", |
| 259 | + "surfacescale" : "surfaceScale", |
| 260 | + "systemlanguage" : "systemLanguage", |
| 261 | + "tablevalues" : "tableValues", |
| 262 | + "targetx" : "targetX", |
| 263 | + "targety" : "targetY", |
| 264 | + "textlength" : "textLength", |
| 265 | + "viewbox" : "viewBox", |
| 266 | + "viewtarget" : "viewTarget", |
| 267 | + "xchannelselector" : "xChannelSelector", |
| 268 | + "ychannelselector" : "yChannelSelector", |
| 269 | + "zoomandpan" : "zoomAndPan" |
| 270 | + } |
| 271 | + for originalName in token["data"].iterkeys(): |
| 272 | + if originalName in replacements: |
| 273 | + svgName = replacements[originalName] |
| 274 | + token["data"][svgName] = token["data"][originalName] |
| 275 | + del token["data"][originalName] |
| 276 | + |
| 277 | + def adjustForeignAttributes(self, token): |
| 278 | + replacements = { |
| 279 | + "xlink:actuate":("xlink", "actuate", namespaces["xlink"]), |
| 280 | + "xlink:arcrole":("xlink", "arcrole", namespaces["xlink"]), |
| 281 | + "xlink:href":("xlink", "href", namespaces["xlink"]), |
| 282 | + "xlink:role":("xlink", "role", namespaces["xlink"]), |
| 283 | + "xlink:show":("xlink", "show", namespaces["xlink"]), |
| 284 | + "xlink:title":("xlink", "title", namespaces["xlink"]), |
| 285 | + "xlink:type":("xlink", "type", namespaces["xlink"]), |
| 286 | + "xml:base":("xml", "base", namespaces["xml"]), |
| 287 | + "xml:lang":("xml", "lang", namespaces["xml"]), |
| 288 | + "xml:space":("xml", "space", namespaces["xml"]), |
| 289 | + "xmlns":(None, "xmlns", namespaces["xmlns"]), |
| 290 | + "xmlns:xlink":("xmlns", "xlink", namespaces["xmlns"]) |
| 291 | + } |
| 292 | + |
| 293 | + for originalName in token["data"].iterkeys(): |
| 294 | + if originalName in replacements: |
| 295 | + foreignName = replacements[originalName] |
| 296 | + token["data"][foreignName] = token["data"][originalName] |
| 297 | + del token["data"][originalName] |
199 | 298 |
|
200 | 299 | def resetInsertionMode(self):
|
201 | 300 | # The name of this method is mostly historical. (It's also used in the
|
@@ -296,6 +395,9 @@ def processComment(self, token):
|
296 | 395 | def processDoctype(self, token):
|
297 | 396 | self.parser.parseError("unexpected-doctype")
|
298 | 397 |
|
| 398 | + def processCharacters(self, token): |
| 399 | + self.tree.insertText(token["data"]) |
| 400 | + |
299 | 401 | def processSpaceCharacters(self, token):
|
300 | 402 | self.tree.insertText(token["data"])
|
301 | 403 |
|
@@ -745,6 +847,8 @@ def __init__(self, parser, tree):
|
745 | 847 | ("select", self.startTagSelect),
|
746 | 848 | (("rp", "rt"), self.startTagRpRt),
|
747 | 849 | (("option", "optgroup"), self.startTagOpt),
|
| 850 | + (("math"), self.startTagMath), |
| 851 | + (("svg"), self.startTagSvg), |
748 | 852 | (("caption", "col", "colgroup", "frame", "frameset", "head",
|
749 | 853 | "tbody", "td", "tfoot", "th", "thead",
|
750 | 854 | "tr"), self.startTagMisplaced),
|
@@ -1030,6 +1134,34 @@ def startTagRpRt(self, token):
|
1030 | 1134 | self.tree.openElements.pop()
|
1031 | 1135 | self.tree.insertElement(token)
|
1032 | 1136 |
|
| 1137 | + def startTagMath(self, token): |
| 1138 | + self.tree.reconstructActiveFormattingElements() |
| 1139 | + self.parser.adjustMathMLAttributes(token) |
| 1140 | + self.parser.adjustForeignAttributes(token) |
| 1141 | + token["namespace"] = namespaces["mathml"] |
| 1142 | + self.tree.insertElement(token) |
| 1143 | + #Need to get the parse error right for the case where the token |
| 1144 | + #has a namespace not equal to the xmlns attribute |
| 1145 | + self.parser.phase = self.parser.phases["inForeignContent"] |
| 1146 | + self.parser.secondaryPhase = self |
| 1147 | + if token["selfClosing"]: |
| 1148 | + self.tree.openElements.pop() |
| 1149 | + token["selfClosingAcknowledged"] = True |
<
2851
/td> | 1150 | + |
| 1151 | + def startTagSvg(self, token): |
| 1152 | + self.tree.reconstructActiveFormattingElements() |
| 1153 | + self.parser.adjustSVGAttributes(token) |
| 1154 | + self.parser.adjustForeignAttributes(token) |
| 1155 | + token["namespace"] = namespaces["svg"] |
| 1156 | + self.tree.insertElement(token) |
| 1157 | + #Need to get the parse error right for the case where the token |
| 1158 | + #has a namespace not equal to the xmlns attribute |
| 1159 | + self.parser.phase = self.parser.phases["inForeignContent"] |
| 1160 | + self.parser.secondaryPhase = self |
| 1161 | + if token["selfClosing"]: |
| 1162 | + self.tree.openElements.pop() |
| 1163 | + token["selfClosingAcknowledged"] = True |
| 1164 | + |
1033 | 1165 | def startTagMisplaced(self, token):
|
1034 | 1166 | """ Elements that should be children of other elements that have a
|
1035 | 1167 | different insertion mode; here they are ignored
|
@@ -2015,27 +2147,118 @@ def startTagOther(self, token):
|
2015 | 2147 | def endTagTable(self, token):
|
2016 | 2148 | self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
|
2017 | 2149 | if self.tree.elementInScope(token["name"]):
|
2018 |
| - self.endTagOther(impliedTgToken("select")) |
| 2150 | + self.endTagOther(impliedTagToken("select")) |
2019 | 2151 | self.parser.phase.processEndTag(token)
|
2020 | 2152 |
|
2021 | 2153 | def endTagOther(self, token):
|
2022 | 2154 | self.parser.phases["inSelect"].processEndTag(token)
|
2023 | 2155 |
|
2024 | 2156 |
|
2025 | 2157 | class InForeignContentPhase(Phase):
|
2026 |
| - def __init__(self, parser, tree): |
| 2158 | + breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", |
| 2159 | + "center", "code", "dd", "div", "dl", "dt", |
| 2160 | + "em", "embed", "font", "h1", "h2", "h3", |
| 2161 | + "h4", "h5", "h6", "head", "hr", "i", "img", |
| 2162 | + "li", "listing", "menu", "meta", "nobr", |
| 2163 | + "ol", "p", "pre", "ruby", "s", "small", |
| 2164 | + "span", "strong", "strike", "sub", "sup", |
| 2165 | + "table", "tt", "u", "ul", "var"]) |
| 2166 | + def __init__(self, parser, tree): |
2027 | 2167 | Phase.__init__(self, parser, tree)
|
2028 |
| - self.secondardInsertionMode = None |
2029 | 2168 |
|
2030 |
| - def processStartTag(self, token): |
| 2169 | + def nonHTMLElementOpen(self): |
| 2170 | + for item in self.tree.openElements[::-1]: |
| 2171 | + if item.namespace != self.tree.defaultNamespace: |
| 2172 | + return True |
| 2173 | + return False |
| 2174 | + |
| 2175 | + def adjustSVGTagNames(self, token): |
| 2176 | + replacements = {"altglyph":"altGlyph", |
| 2177 | + "altglyphdef":"altGlyphDef", |
| 2178 | + "altglyphitem":"altGlyphItem", |
| 2179 | + "animatecolor":"animateColor", |
| 2180 | + "animatemotion":"animateMotion", |
| 2181 | + "animatetransform":"animateTransform", |
| 2182 | + "clippath":"clipPath", |
| 2183 | + "feblend":"feBlend", |
| 2184 | + "fecolormatrix":"feColorMatrix", |
| 2185 | + "fecomponenttransfer":"feComponentTransfer", |
| 2186 | + "fecomposite":"feComposite", |
| 2187 | + "feconvolvematrix":"feConvolveMatrix", |
| 2188 | + "fediffuselighting":"feDiffuseLighting", |
| 2189 | + "fedisplacementmap":"feDisplacementMap", |
| 2190 | + "fedistantlight":"feDistantLight", |
| 2191 | + "feflood":"feFlood", |
| 2192 | + "fefunca":"feFuncA", |
| 2193 | + "fefuncb":"feFuncB", |
| 2194 | + "fefuncg":"feFuncG", |
| 2195 | + "fefuncr":"feFuncR", |
| 2196 | + "fegaussianblur":"feGaussianBlur", |
| 2197 | + "feimage":"feImage", |
| 2198 | + "femerge":"feMerge", |
| 2199 | + "femergenode":"feMergeNode", |
| 2200 | + "femorphology":"feMorphology", |
| 2201 | + "feoffset":"feOffset", |
| 2202 | + "fepointlight":"fePointLight", |
| 2203 | + "fespecularlighting":"feSpecularLighting", |
| 2204 | + "fespotlight":"feSpotLight", |
| 2205 | + "fetile":"feTile", |
| 2206 | + "feturbulence":"feTurbulence", |
| 2207 | + "foreignobject":"foreignObject", |
| 2208 | + "glyphref":"glyphRef", |
| 2209 | + "lineargradient":"linearGradient", |
| 2210 | + "radialgradient":"radialGradient", |
| 2211 | + "textpath":"textPath"} |
| 2212 | + |
| 2213 | + if token["name"] in replacements: |
| 2214 | + token["name"] = replacements[token["name"]] |
| 2215 | + |
| 2216 | + def processEOF(self): |
| 2217 | + pass |
| 2218 | + |
| 2219 | + def processStartTag(self, token): |
| 2220 | + currentNode = self.tree.openElements[-1] |
| 2221 | + if (currentNode.namespace == self.tree.defaultNamespace or |
| 2222 | + (currentNode.namespace == namespaces["mathml"] and |
| 2223 | + token["name"] not in frozenset(["mglyph", "malignmark"]) and |
| 2224 | + currentNode.name in frozenset(["mi", "mo", "mn", |
| 2225 | + "ms", "mtext"])) or |
| 2226 | + (currentNode.namespace == namespaces["mathml"] and |
| 2227 | + token["name"] == "svg" and |
| 2228 | + currentNode.name == "annotation-xml") or |
| 2229 | + (currentNode.namespace == namespaces["svg"] and |
| 2230 | + currentNode.name in frozenset(["foreignObject", |
| 2231 | + "desc", "title"]) |
| 2232 | + )): |
2031 | 2233 |
|
2032 |
| - self.startTagHandler = utils.MethodDispatcher([ |
2033 |
| - ("html", self.startTagHtml) |
2034 |
| - ]) |
2035 |
| - self.startTagHandler.default = self.startTagOther |
| 2234 | + self.parser.secondaryPhase.processStartTag(token) |
| 2235 | + if self.parser.phase == self and not self.nonHTMLElementOpen(): |
| 2236 | + self.parser.phase = self.parser.secondaryPhase |
| 2237 | + elif token["name"] in self.breakoutElements: |
| 2238 | + self.parser.parseError("unexpected_html_element_in_foreign_content", |
| 2239 | + token["name"]) |
| 2240 | + while (self.tree.openElements[-1].namespace != |
| 2241 | + self.tree.defaultNamespace): |
| 2242 | + self.tree.openElements.pop() |
| 2243 | + self.parser.phase = self.parser.secondaryPhase |
| 2244 | + self.parser.phase.processStartTag(token) |
| 2245 | + else: |
| 2246 | + if currentNode.namespace == namespaces["mathml"]: |
| 2247 | + self.parser.adjustMathMLAttributes(token) |
| 2248 | + elif currentNode.namespace == namespaces["svg"]: |
| 2249 | + self.adjustSVGTagNames(token) |
| 2250 | + self.parser.adjustSVGAttributes(token) |
| 2251 | + self.parser.adjustForeignAttributes(token) |
| 2252 | + token["namespace"] = currentNode.namespace |
| 2253 | + self.tree.insertElement(token) |
| 2254 | + if token["selfClosing"]: |
| 2255 | + self.tree.openElements.pop() |
| 2256 | + token["selfClosingAcknowledged"] = True |
2036 | 2257 |
|
2037 |
| - self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)]) |
2038 |
| - self.endTagHandler.default = self.endTagOther |
| 2258 | + def processEndTag(self, token): |
| 2259 | + self.parser.secondaryPhase.processEndTag(token) |
| 2260 | + if self.parser.phase == self and not self.nonHTMLElementOpen(): |
| 2261 | + self.parser.phase = self.parser.secondaryPhase |
2039 | 2262 |
|
2040 | 2263 | class AfterBodyPhase(Phase):
|
2041 | 2264 | def __init__(self, parser, tree):
|
|
0 commit comments