8000 Getting closer, just a few bugs to fix · awesome-python/html5lib-python@595e72d · GitHub
[go: up one dir, main page]

Skip to content

Commit 595e72d

Browse files
committed
Getting closer, just a few bugs to fix
--HG-- branch : svgmathml extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/branches/svgmathml%401268
1 parent 9411041 commit 595e72d

File tree

7 files changed

+288
-36
lines changed

7 files changed

+288
-36
lines changed

src/html5lib/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,8 @@
11111111
"xmlns":"http://www.w3.org/2000/xmlns/"
11121112
}
11131113

1114+
prefixes = dict([(v,k) for k,v in namespaces.iteritems()])
1115+
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
11141116

11151117
class DataLossWarning(UserWarning):
11161118
pass

src/html5lib/html5parser.py

Lines changed: 236 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from constants import scopingElements, formattingElements, specialElements
1919
from constants import headingElements, tableInsertModeElements
2020
from constants import cdataElements, rcdataElements, voidElements
21-
from constants import tokenTypes
21+
from constants import tokenTypes, namespaces
2222

2323
def parse(doc, treebuilderName="simpletree", encoding=None):
2424
tb = treebuilders.getTreeBuilder(treebuilderName)
@@ -68,6 +68,7 @@ def __init__(self, tree = simpletree.TreeBuilder,
6868
"inCell": InCellPhase(self, self.tree),
6969
"inSelect": InSelectPhase(self, self.tree),
7070
"inSelectInTable": InSelectInTablePhase(self, self.tree),
71+
"inForeignContent": InForeignContentPhase(self, self.tree),
7172
"afterBody": AfterBodyPhase(self, self.tree),
7273
"inFrameset": InFramesetPhase(self, self.tree),
7374
"afterFrameset": AfterFramesetPhase(self, self.tree),
@@ -109,9 +110,8 @@ def _parse(self, stream, innerHTML=False, container="div",
109110
self.innerHTML = False
110111
self.phase = self.phases["initial"]
111112

112-
# We only seem to have InBodyPhase testcases where the following is
113-
# relevant ... need others too
114113
self.lastPhase = None
114+
self.secondaryPhase = None
115115

116116
self.beforeRCDataPhase = None
117117

@@ -196,6 +196,105 @@ def normalizeToken(self, token):
196196

197197
return token
198198

199+
def adjustMathMLAttributes(self, token):
200+
replacements = {"definitionurl":"definitionURL"}
201+
for k,v in replacements.iteritems():
202+
if k in token["data"]:
203+
token["data"][v] = token["data"][k]
204+
del token["data"][k]
205+
206+
def adjustSVGAttributes(self, token):
207+
replacements = {
208+
"attributename" : "attributeName",
209+
"attributetype" : "attributeType",
210+
"basefrequency" : "baseFrequency",
211+
"baseprofile" : "baseProfile",
212+
"calcmode" : "calcMode",
213+
"clippathunits" : "clipPathUnits",
214+
"contentscripttype" : "contentScriptType",
215+
"contentstyletype" : "contentStyleType",
216+
"diffuseconstant" : "diffuseConstant",
217+
"edgemode" : "edgeMode",
218+
"externalresourcesrequired" : "externalResourcesRequired",
219+
"filterres" : "filterRes",
220+
"filterunits" : "filterUnits",
221+
"glyphref" : "glyphRef",
222+
"gradienttransform" : "gradientTransform",
223+
"gradientunits" : "gradientUnits",
224+
"kernelmatrix" : "kernelMatrix",
225+
"kernelunitlength" : "kernelUnitLength",
226+
"keypoints" : "keyPoints",
227+
"keysplines" : "keySplines",
228+
"keytimes" : "keyTimes",
229+
"lengthadjust" : "lengthAdjust",
230+
"limitingconeangle" : "limitingConeAngle",
231+
"markerheight" : "markerHeight",
232+
"markerunits" : "markerUnits",
233+
"markerwidth" : "markerWidth",
234+
"maskcontentunits" : "maskContentUnits",
235+
"maskunits" : "maskUnits",
236+
"numoctaves" : "numOctaves",
237+
"pathlength" : "pathLength",
238+
"patterncontentunits" : "patternContentUnits",
239+
"patterntransform" : "patternTransform",
240+
"patternunits" : "patternUnits",
241+
"pointsatx" : "pointsAtX",
242+
"pointsaty" : "pointsAtY",
243+
"pointsatz" : "pointsAtZ",
244+
"preservealpha" : "preserveAlpha",
245+
"preserveaspectratio" : "preserveAspectRatio",
246+
"primitiveunits" : "primitiveUnits",
247+
"refx" : "refX",
248+
"refy" : "refY",
249+
"repeatcount" : "repeatCount",
250+
"repeatdur" : "repeatDur",
251+
"requiredextensions" : "requiredExtensions",
252+
"requiredfeatures" : "requiredFeatures",
253+
"specularconstant" : "specularConstant",
254+
"specularexponent" : "specularExponent",
255+
"spreadmethod" : "spreadMethod",
256+
"startoff 10000 set" : "startOffset",
257+
"stddeviation" : "stdDeviation",
258+
"stitchtiles" : "stitchTiles",
259+
"surfacescale" : "surfaceScale",
260+
"systemlanguage" : "systemLanguage",
261+
"tablevalues" : "tableValues",
262+
"targetx" : "targetX",
263+
"targety" : "targetY",
264+
"textlength" : "textLength",
265+
"viewbox" : "viewBox",
266+
"viewtarget" : "viewTarget",
267+
"xchannelselector" : "xChannelSelector",
268+
"ychannelselector" : "yChannelSelector",
269+
"zoomandpan" : "zoomAndPan"
270+
}
271+
for originalName in token["data"].iterkeys():
272+
if originalName in replacements:
273+
svgName = replacements[originalName]
274+
token["data"][svgName] = token["data"][originalName]
275+
del token["data"][originalName]
276+
277+
def adjustForeignAttributes(self, token):
278+
replacements = {
279+
"xlink:actuate":("xlink", "actuate", namespaces["xlink"]),
280+
"xlink:arcrole":("xlink", "arcrole", namespaces["xlink"]),
281+
"xlink:href":("xlink", "href", namespaces["xlink"]),
282+
"xlink:role":("xlink", "role", namespaces["xlink"]),
283+
"xlink:show":("xlink", "show", namespaces["xlink"]),
284+
"xlink:title":("xlink", "title", namespaces["xlink"]),
285+
"xlink:type":("xlink", "type", namespaces["xlink"]),
286+
"xml:base":("xml", "base", namespaces["xml"]),
287+
"xml:lang":("xml", "lang", namespaces["xml"]),
288+
"xml:space":("xml", "space", namespaces["xml"]),
289+
"xmlns":(None, "xmlns", namespaces["xmlns"]),
290+
"xmlns:xlink":("xmlns", "xlink", namespaces["xmlns"])
291+
}
292+
293+
for originalName in token["data"].iterkeys():
294+
if originalName in replacements:
295+
foreignName = replacements[originalName]
296+
token["data"][foreignName] = token["data"][originalName]
297+
del token["data"][originalName]
199298

200299
def resetInsertionMode(self):
201300
# The name of this method is mostly historical. (It's also used in the
@@ -296,6 +395,9 @@ def processComment(self, token):
296395
def processDoctype(self, token):
297396
self.parser.parseError("unexpected-doctype")
298397

398+
def processCharacters(self, token):
399+
self.tree.insertText(token["data"])
400+
299401
def processSpaceCharacters(self, token):
300402
self.tree.insertText(token["data"])
301403

@@ -745,6 +847,8 @@ def __init__(self, parser, tree):
745847
("select", self.startTagSelect),
746848
(("rp", "rt"), self.startTagRpRt),
747849
(("option", "optgroup"), self.startTagOpt),
850+
(("math"), self.startTagMath),
851+
(("svg"), self.startTagSvg),
748852
(("caption", "col", "colgroup", "frame", "frameset", "head",
749853
"tbody", "td", "tfoot", "th", "thead",
750854
"tr"), self.startTagMisplaced),
@@ -1030,6 +1134,34 @@ def startTagRpRt(self, token):
10301134
self.tree.openElements.pop()
10311135
self.tree.insertElement(token)
10321136

1137+
def startTagMath(self, token):
1138+
self.tree.reconstructActiveFormattingElements()
1139+
self.parser.adjustMathMLAttributes(token)
1140+
self.parser.adjustForeignAttributes(token)
1141+
token["namespace"] = namespaces["mathml"]
1142+
self.tree.insertElement(token)
1143+
#Need to get the parse error right for the case where the token
1144+
#has a namespace not equal to the xmlns attribute
1145+
self.parser.phase = self.parser.phases["inForeignContent"]
1146+
self.parser.secondaryPhase = self
1147+
if token["selfClosing"]:
1148+
self.tree.openElements.pop()
1149+
token["selfClosingAcknowledged"] = True
< 2851 /td>1150+
1151+
def startTagSvg(self, token):
1152+
self.tree.reconstructActiveFormattingElements()
1153+
self.parser.adjustSVGAttributes(token)
1154+
self.parser.adjustForeignAttributes(token)
1155+
token["namespace"] = namespaces["svg"]
1156+
self.tree.insertElement(token)
1157+
#Need to get the parse error right for the case where the token
1158+
#has a namespace not equal to the xmlns attribute
1159+
self.parser.phase = self.parser.phases["inForeignContent"]
1160+
self.parser.secondaryPhase = self
1161+
if token["selfClosing"]:
1162+
self.tree.openElements.pop()
1163+
token["selfClosingAcknowledged"] = True
1164+
10331165
def startTagMisplaced(self, token):
10341166
""" Elements that should be children of other elements that have a
10351167
different insertion mode; here they are ignored
@@ -2015,27 +2147,118 @@ def startTagOther(self, token):
20152147
def endTagTable(self, token):
20162148
self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
20172149
if self.tree.elementInScope(token["name"]):
2018-
self.endTagOther(impliedTgToken("select"))
2150+
self.endTagOther(impliedTagToken("select"))
20192151
self.parser.phase.processEndTag(token)
20202152

20212153
def endTagOther(self, token):
20222154
self.parser.phases["inSelect"].processEndTag(token)
20232155

20242156

20252157
class InForeignContentPhase(Phase):
2026-
def __init__(self, parser, tree):
2158+
breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
2159+
"center", "code", "dd", "div", "dl", "dt",
2160+
"em", "embed", "font", "h1", "h2", "h3",
2161+
"h4", "h5", "h6", "head", "hr", "i", "img",
2162+
"li", "listing", "menu", "meta", "nobr",
2163+
"ol", "p", "pre", "ruby", "s", "small",
2164+
"span", "strong", "strike", "sub", "sup",
2165+
"table", "tt", "u", "ul", "var"])
2166+
def __init__(self, parser, tree):
20272167
Phase.__init__(self, parser, tree)
2028-
self.secondardInsertionMode = None
20292168

2030-
def processStartTag(self, token):
2169+
def nonHTMLElementOpen(self):
2170+
for item in self.tree.openElements[::-1]:
2171+
if item.namespace != self.tree.defaultNamespace:
2172+
return True
2173+
return False
2174+
2175+
def adjustSVGTagNames(self, token):
2176+
replacements = {"altglyph":"altGlyph",
2177+
"altglyphdef":"altGlyphDef",
2178+
"altglyphitem":"altGlyphItem",
2179+
"animatecolor":"animateColor",
2180+
"animatemotion":"animateMotion",
2181+
"animatetransform":"animateTransform",
2182+
"clippath":"clipPath",
2183+
"feblend":"feBlend",
2184+
"fecolormatrix":"feColorMatrix",
2185+
"fecomponenttransfer":"feComponentTransfer",
2186+
"fecomposite":"feComposite",
2187+
"feconvolvematrix":"feConvolveMatrix",
2188+
"fediffuselighting":"feDiffuseLighting",
2189+
"fedisplacementmap":"feDisplacementMap",
2190+
"fedistantlight":"feDistantLight",
2191+
"feflood":"feFlood",
2192+
"fefunca":"feFuncA",
2193+
"fefuncb":"feFuncB",
2194+
"fefuncg":"feFuncG",
2195+
"fefuncr":"feFuncR",
2196+
"fegaussianblur":"feGaussianBlur",
2197+
"feimage":"feImage",
2198+
"femerge":"feMerge",
2199+
"femergenode":"feMergeNode",
2200+
"femorphology":"feMorphology",
2201+
"feoffset":"feOffset",
2202+
"fepointlight":"fePointLight",
2203+
"fespecularlighting":"feSpecularLighting",
2204+
"fespotlight":"feSpotLight",
2205+
"fetile":"feTile",
2206+
"feturbulence":"feTurbulence",
2207+
"foreignobject":"foreignObject",
2208+
"glyphref":"glyphRef",
2209+
"lineargradient":"linearGradient",
2210+
"radialgradient":"radialGradient",
2211+
"textpath":"textPath"}
2212+
2213+
if token["name"] in replacements:
2214+
token["name"] = replacements[token["name"]]
2215+
2216+
def processEOF(self):
2217+
pass
2218+
2219+
def processStartTag(self, token):
2220+
currentNode = self.tree.openElements[-1]
2221+
if (currentNode.namespace == self.tree.defaultNamespace or
2222+
(currentNode.namespace == namespaces["mathml"] and
2223+
token["name"] not in frozenset(["mglyph", "malignmark"]) and
2224+
currentNode.name in frozenset(["mi", "mo", "mn",
2225+
"ms", "mtext"])) or
2226+
(currentNode.namespace == namespaces["mathml"] and
2227+
token["name"] == "svg" and
2228+
currentNode.name == "annotation-xml") or
2229+
(currentNode.namespace == namespaces["svg"] and
2230+
currentNode.name in frozenset(["foreignObject",
2231+
"desc", "title"])
2232+
)):
20312233

2032-
self.startTagHandler = utils.MethodDispatcher([
2033-
("html", self.startTagHtml)
2034-
])
2035-
self.startTagHandler.default = self.startTagOther
2234+
self.parser.secondaryPhase.processStartTag(token)
2235+
if self.parser.phase == self and not self.nonHTMLElementOpen():
2236+
self.parser.phase = self.parser.secondaryPhase
2237+
elif token["name"] in self.breakoutElements:
2238+
self.parser.parseError("unexpected_html_element_in_foreign_content",
2239+
token["name"])
2240+
while (self.tree.openElements[-1].namespace !=
2241+
self.tree.defaultNamespace):
2242+
self.tree.openElements.pop()
2243+
self.parser.phase = self.parser.secondaryPhase
2244+
self.parser.phase.processStartTag(token)
2245+
else:
2246+
if currentNode.namespace == namespaces["mathml"]:
2247+
self.parser.adjustMathMLAttributes(token)
2248+
elif currentNode.namespace == namespaces["svg"]:
2249+
self.adjustSVGTagNames(token)
2250+
self.parser.adjustSVGAttributes(token)
2251+
self.parser.adjustForeignAttributes(token)
2252+
token["namespace"] = currentNode.namespace
2253+
self.tree.insertElement(token)
2254+
if token["selfClosing"]:
2255+
self.tree.openElements.pop()
2256+
token["selfClosingAcknowledged"] = True
20362257

2037-
self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
2038-
self.endTagHandler.default = self.endTagOther
2258+
def processEndTag(self, token):
2259+
self.parser.secondaryPhase.processEndTag(token)
2260+
if self.parser.phase == self and not self.nonHTMLElementOpen():
2261+
self.parser.phase = self.parser.secondaryPhase
20392262

20402263
class AfterBodyPhase(Phase):
20412264
def __init__(self, parser, tree):

src/html5lib/treebuilders/dom.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,15 @@ def setAttributes(self, attributes):
7979
if attributes:
8080
for name, value in attributes.items():
8181
value=illegal_xml_chars.sub(u'\uFFFD',value)
82-
self.element.setAttribute(name, value)
82+
if isinstance(name, tuple):
83+
if name[0] is not None:
84+
qualifiedName = name[0] + ":" + name[1]
85+
else:
86+
qualifiedName = name[1]
87+
self.element.setAttributeNS(name[2], qualifiedName,
88+
value)
89+
else:
90+
self.element.setAttribute(name, value)
8391

8492
attributes = property(getAttributes, setAttributes)
8593

@@ -240,6 +248,7 @@ def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
240248

241249
return locals()
242250

243-
# XXX: Keep backwards compatibility with things that directly load classes/functions from this module
251+
# Keep backwards compatibility with things that directly load
252+
# classes/functions from this module
244253
for key, value in getDomModule(minidom).__dict__.items():
245254
globals()[key] = value

src/html5lib/treebuilders/etree.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ def _setAttributes(self, attributes):
4848
for key in self._element.attrib.keys():
4949
del self._element.attrib[key]
5050
for key, value in attributes.iteritems():
51-
self._element.set(key, value)
51+
if isinstance(key, tuple):
52+
name = "{%s}%s"%(key[2], key[1])
53+
else:
54+
name = key
55+
self._element.set(name, value)
5256

5357
attributes = property(_getAttributes, _setAttributes)
5458

@@ -193,6 +197,12 @@ def serializeElement(element, indent=0):
193197
elif type(element.tag) == type(ElementTree.Comment):
194198
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
195199
else:
200+
if element.namespave == self.defaultNamespace:
201+
name = element.tag
202+
else:
203+
ns, name = element.tag.split("}")
204+
ns = ns[1:]
205+
name = "%s %s"%(ns, name)
196206
rv.append("|%s<%s>"%(' '*indent, element.tag))
197207
if hasattr(element, "attrib"):
198208
for name, value in element.attrib.iteritems():

src/html5lib/treebuilders/etree_lxml.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,19 @@ class Attributes(dict):
158158
def __init__(self, element, value={}):
159159
self._element = element
160160
dict.__init__(self, value)
161-
for k, v in self.iteritems():
162-
self._element._element.attrib[filter.coerceAttribute(k)] = v
161+
for key, value in self.iteritems():
162+
if isinstance(key, tuple):
163+
name = "{%s}%s"%(key[2], key 819E [1])
164+
else:
165+
name = key
166+
self._element._element.attrib[filter.coerceAttribute(name)] = value
163167

164168
def __setitem__(self, key, value):
165169
dict.__setitem__(self, key, value)
170+
if isinstance(key, tuple):
171+
name = "{%s}%s"%(key[2], key[1])
172+
else:
173+
name = key
166174
self._element._element.attrib[filter.coerceAttribute(key)] = value
167175

168176
class Element(builder.Element):

0 commit comments

Comments
 (0)
0