|
1 |
| -import _base |
2 | 1 | import new
|
3 | 2 | import warnings
|
| 3 | +import re |
| 4 | + |
| 5 | +import _base |
4 | 6 | from html5lib.constants import DataLossWarning
|
| 7 | +import html5lib.constants as constants |
5 | 8 | import etree as etree_builders
|
6 | 9 | from html5lib import ihatexml
|
7 | 10 |
|
|
25 | 28 |
|
26 | 29 | class DocumentType(object):
|
27 | 30 | def __init__(self, name, publicId, systemId):
|
28 |
| - self.name = name |
29 |
| - if name != name.lower(): |
30 |
| - warnings.warn("lxml does not preserve doctype case", DataLossWarning) |
| 31 | + self.name = name |
31 | 32 | self.publicId = publicId
|
32 | 33 | self.systemId = systemId
|
33 | 34 |
|
@@ -80,11 +81,36 @@ def serializeElement(element, indent=0):
|
80 | 81 | elif type(element.tag) == type(etree.Comment):
|
81 | 82 | rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
|
82 | 83 | else:
|
83 |
| - rv.append("|%s<%s>"%(' '*indent, filter.fromXmlName(element.tag))) |
| 84 | + nsmatch = etree_builders.tag_regexp.match(element.tag) |
| 85 | + if nsmatch is not None: |
| 86 | + ns = nsmatch.group(1) |
| 87 | + tag = nsmatch.group(2) |
| 88 | + prefix = constants.prefixes[ns] |
| 89 | + if prefix != "html": |
| 90 | + rv.append("|%s<%s %s>"%(' '*indent, prefix, |
| 91 | + filter.fromXmlName(tag))) |
| 92 | + else: |
| 93 | + rv.append("|%s<%s>"%(' '*indent, |
| 94 | + filter.fromXmlName(tag))) |
| 95 | + else: |
| 96 | + rv.append("|%s<%s>"%(' '*indent, |
| 97 | + filter.fromXmlName(element.tag))) |
| 98 | + |
84 | 99 | if hasattr(element, "attrib"):
|
85 | 100 | for name, value in element.attrib.iteritems():
|
86 |
| - rv.append('|%s%s="%s"' % (' '*(indent+2), |
87 |
| - filter.fromXmlName(name), value)) |
| 101 | + nsmatch = etree_builders.tag_regexp.match(name) |
| 102 | + if nsmatch: |
| 103 | + ns = nsmatch.group(1) |
| 104 | + name = nsmatch.group(2) |
| 105 | + prefix = constants.prefixes[ns] |
| 106 | + rv.append('|%s%s %s="%s"' % (' '*(indent+2), |
| 107 | + prefix, |
| 108 | + filter.fromXmlName(name), |
| 109 | + value)) |
| 110 | + else: |
| 111 | + rv.append('|%s%s="%s"' % (' '*(indent+2), |
| 112 | + filter.fromXmlName(name), |
| 113 | + value)) |
88 | 114 | if element.text:
|
89 | 115 | rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
|
90 | 116 | indent += 2
|
@@ -160,38 +186,33 @@ def __init__(self, element, value={}):
|
160 | 186 | dict.__init__(self, value)
|
161 | 187 | for key, value in self.iteritems():
|
162 | 188 | if isinstance(key, tuple):
|
163 |
| - name = "{%s}%s"%(key[2], key[1]) |
| 189 | + name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1])) |
164 | 190 | else:
|
165 |
| - name = key |
166 |
| - self._element._element.attrib[filter.coerceAttribute(name)] = value |
| 191 | + name = filter.coerceAttribute(key) |
| 192 | + self._element._element.attrib[name] = value |
167 | 193 |
|
168 | 194 | def __setitem__(self, key, value):
|
169 | 195 | dict.__setitem__(self, key, value)
|
170 | 196 | if isinstance(key, tuple):
|
171 |
| - name = "{%s}%s"%(key[2], key[1]) |
| 197 | + name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1])) |
172 | 198 | else:
|
173 |
| - name = key |
174 |
| - self._element._element.attrib[filter.coerceAttribute(key)] = value |
| 199 | + name = filter.coerceAttribute(key) |
| 200 | + self._element._element.attrib[name] = value |
175 | 201 |
|
176 | 202 | class Element(builder.Element):
|
177 |
| - def __init__(self, name, namespace = None): |
| 203 | + def __init__(self, name, namespace): |
178 | 204 | name = filter.coerceElement(name)
|
179 |
| - if namespace is None: |
180 |
| - etree_tag = name |
181 |
| - else:<
10000
/div> |
182 |
| - etree_tag = "{%s}%s"%(namespace, name) |
183 |
| - self._name = name |
184 |
| - self.namespace = namespace |
185 |
| - builder.Element.__init__(self, name) |
| 205 | + builder.Element.__init__(self, name, namespace=namespace) |
186 | 206 | self._attributes = Attributes(self)
|
187 | 207 |
|
188 | 208 | def _setName(self, name):
|
189 |
| - self._name = name |
190 |
| - self._element.tag = filter.coerceElement(name) |
191 |
| - |
| 209 | + self._name = filter.coerceElement(name) |
| 210 | + self._element.tag = self._getETreeTag( |
| 211 | + self._name, self._namespace) |
| 212 | + |
192 | 213 | def _getName(self):
|
193 | 214 | return self._name
|
194 |
| - |
| 215 | + |
195 | 216 | name = property(_getName, _setName)
|
196 | 217 |
|
197 | 218 | def _getAttributes(self):
|
@@ -281,7 +302,8 @@ def insertRoot(self, token):
|
281 | 302 | docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "",
|
282 | 303 | self.doctype.systemId or "")
|
283 | 304 | docStr += ">"
|
284 |
| - docStr += "<html></html>" |
| 305 | + #TODO - this needs to work when elements are not put into the default ns |
| 306 | + docStr += "<html xmlns='http://www.w3.org/1999/xhtml'></html>" |
285 | 307 |
|
286 | 308 | try:
|
287 | 309 | root = etree.fromstring(docStr)
|
|
0 commit comments