|
19 | 19 | import gettext
|
20 | 20 | _ = gettext.gettext
|
21 | 21 |
|
| 22 | +from xml.dom import XHTML_NAMESPACE |
| 23 | + |
22 | 24 | class XMLParser(html5parser.HTMLParser):
|
23 | 25 | """ liberal XML parser """
|
24 | 26 |
|
@@ -66,16 +68,21 @@ def normalizeToken(self, token):
|
66 | 68 |
|
67 | 69 | # ensure that non-void XHTML elements have content so that separate
|
68 | 70 | # open and close tags are emitted
|
69 |
| - if token["type"] == "EndTag" and \ |
70 |
| - token["name"] not in voidElements and \ |
71 |
| - token["name"] == self.tree.openElements[-1].name and \ |
72 |
| - not self.tree.openElements[-1].hasContent():
73 |
| - for e in self.tree.openElements: |
74 |
| - if 'xmlns' in e.attributes.keys(): |
75 |
| - if e.attributes['xmlns'] <> 'http://www.w3.org/1999/xhtml': |
76 |
| - break |
| 71 | + if token["type"] == "EndTag": |
| 72 | + if token["name"] in voidElements: |
| 73 | + if not self.tree.openElements or \ |
| 74 | + self.tree.openElements[-1].name != token["name"]: |
| 75 | + token["type"] = "EmptyTag" |
| 76 | + if not token.has_key("data"): token["data"] = {} |
77 | 77 | else:
|
78 |
| - self.tree.insertText('') |
| 78 | + if token["name"] == self.tree.openElements[-1].name and \ |
| 79 | + not self.tree.openElements[-1].hasContent(): |
| 80 | + for e in self.tree.openElements: |
| 81 | + if 'xmlns' in e.attributes.keys(): |
| 82 | + if e.attributes['xmlns'] != XHTML_NAMESPACE: |
| 83 | + break |
| 84 | + else: |
| 85 | + self.tree.insertText('') |
79 | 86 |
|
80 | 87 | return token
|
81 | 88 |
|
|
0 commit comments