8000 More crlf and svn ignore · awesome-python/html5lib-python@4017fa8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4017fa8

Browse files
committed
More crlf and svn ignore
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40618
1 parent 0804ead commit 4017fa8

File tree

7 files changed

+269
-270
lines changed

7 files changed

+269
-270
lines changed

src/treewalkers/__init__.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
"""A collection of modules for iterating through different kinds of
2-
tree, generating tokens identical to those produced by the tokenizer
1+
"""A collection of modules for iterating through different kinds of
2+
tree, generating tokens identical to those produced by the tokenizer
33
module.
44
55
To create a tree walker for a new type of tree, you need to do
66
implement a tree walker object (called TreeWalker by convention) that
7-
implements a 'serialize' method taking a tree as sole argument and
7+
implements a 'serialize' method taking a tree as sole argument and
88
returning an iterator generating tokens.
99
"""
1010

@@ -13,7 +13,7 @@
1313

1414
import dom
1515
import simpletree
16-
import etree
16+
import etree
1717
import pulldom
1818
try:
1919
import soup as beautifulsoup
@@ -23,25 +23,25 @@
2323

2424
def getTreeWalker(treeType, implementation=None, **kwargs):
2525
"""Get a TreeWalker class for various types of tree with built-in support
26-
26+
2727
treeType - the name of the tree type required (case-insensitive). Supported
2828
values are "simpletree", "dom", "etree" and "beautifulsoup"
29-
29+
3030
"simpletree" - a built-in DOM-ish tree type with support for some
3131
more pythonic idioms.
32-
"dom" - The xml.dom.minidom DOM implementation
32+
"dom" - The xml.dom.minidom DOM implementation
3333
"pulldom" - The xml.dom.pulldom event stream
3434
"etree" - A generic builder for tree implementations exposing an
3535
elementtree-like interface (known to work with
3636
ElementTree, cElementTree and lxml.etree).
3737
"beautifulsoup" - Beautiful soup (if installed)
38-
38+
3939
implementation - (Currently applies to the "etree" tree type only). A module
4040
implementing the tree type e.g. xml.etree.ElementTree or
4141
lxml.etree."""
42-
42+
4343
treeType = treeType.lower()
4444
if treeType in ("dom", "pulldom", "simpletree", "beautifulsoup"):
4545
return globals()[treeType].TreeWalker
4646
elif treeType == "etree":
47-
return etree.getETreeModule(implementation, **kwargs).TreeWalker
47+
return etree.getETreeModule(implementation, **kwargs).TreeWalker

src/treewalkers/_base.py

Lines changed: 66 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,66 @@
1-
from constants import voidElements, spaceCharacters
2-
spaceCharacters = u"".join(spaceCharacters)
3-
4-
class TreeWalker(object):
5-
def walk(self, node):
6-
raise NotImplementedError
7-
8-
def walkChildren(self, node):
9-
raise NodeImplementedError
10-
11-
def error(self, msg):
12-
yield {"type": "SerializeError", "data": msg}
13-
14-
def normalizeAttrs(self, attrs):
15-
if not attrs:
16-
attrs = []
17-
elif hasattr(attrs, 'items'):
18-
attrs = attrs.items()
19-
return attrs
20-
21-
def element(self, name, attrs, hasChildren):
22-
if name in voidElements:
23-
for token in self.emptyTag(name, attrs, hasChildren):
24-
yield token
25-
else:
26-
yield self.startTag(name, attrs)
27-
if hasChildren:
28-
for token in self.serializeChildren(node):
29-
yield token
30-
yield self.endTag(name)
31-
32-
def emptyTag(self, name, attrs, hasChildren=False):
33-
yield {"type": "EmptyTag", "name": name, \
34-
"data": self.normalizeAttrs(attrs)}
35-
if hasChildren:
36-
yield self.error(_("Void element has children"))
37-
38-
def startTag(self, name, attrs):
39-
return {"type": "StartTag", "name": name, \
40-
"data": self.normalizeAttrs(attrs)}
41-
42-
def endTag(self, name):
43-
return {"type": "EndTag", "name": name, "data": []}
44-
45-
def text(self, data):
46-
middle = data.lstrip(spaceCharacters)
47-
left = data[:len(data)-len(middle)]
48-
if left:
49-
yield {"type": "SpaceCharacters", "data": left}
50-
if middle:
51-
data = middle
52-
middle = data.rstrip(spaceCharacters)
53-
right = data[len(data)-len(middle):]
54-
if middle:
55-
yield {"type": "Characters", "data": middle}
56-
if right:
57-
yield {"type": "SpaceCharacters", "data": right}
58-
59-
def comment(self, data):
60-
return {"type": "Comment", "data": data}
61-
62-
def doctype(self, name):
63-
return {"type": "Doctype", "name": name, "data": name.upper() == "HTML"}
64-
65-
def unknown(self, nodeType):
66-
return self.error(_("Unknown node type: ") + nodeType)
67-
1+
from constants import voidElements, spaceCharacters
2+
spaceCharacters = u"".join(spaceCharacters)
3+
4+
class TreeWalker(object):
5+
def walk(self, node):
6+
raise NotImplementedError
7+
8+
def walkChildren(self, node):
9+
raise NodeImplementedError
10+
11+
def 10000 error(self, msg):
12+
yield {"type": "SerializeError", "data": msg}
13+
14+
def normalizeAttrs(self, attrs):
15+
if not attrs:
16+
attrs = []
17+
elif hasattr(attrs, 'items'):
18+
attrs = attrs.items()
19+
return attrs
20+
21+
def element(self, name, attrs, hasChildren):
22+
if name in voidElements:
23+
for token in self.emptyTag(name, attrs, hasChildren):
24+
yield token
25+
else:
26+
yield self.startTag(name, attrs)
27+
if hasChildren:
28+
for token in self.serializeChildren(node):
29+
yield token
30+
yield self.endTag(name)
31+
32+
def emptyTag(self, name, attrs, hasChildren=False):
33+
yield {"type": "EmptyTag", "name": name, \
34+
"data": self.normalizeAttrs(attrs)}
35+
if hasChildren:
36+
yield self.error(_("Void element has children"))
37+
38+
def startTag(self, name, attrs):
39+
return {"type": "StartTag", "name": name, \
40+
"data": self.normalizeAttrs(attrs)}
41+
42+
def endTag(self, name):
43+
return {"type": "EndTag", "name": name, "data": []}
44+
45+
def text(self, data):
46+
middle = data.lstrip(spaceCharacters)
47+
left = data[:len(data)-len(middle)]
48+
if left:
49+
yield {"type": "SpaceCharacters", "data": left}
50+
if middle:
51+
data = middle
52+
middle = data.rstrip(spaceCharacters)
53+
right = data[len(data)-len(middle):]
54+
if middle:
55+
yield {"type": "Characters", "data": middle}
56+
if right:
57+
yield {"type": "SpaceCharacters", "data": right}
58+
59+
def comment(self, data):
60+
return {"type": "Comment", "data": data}
61+
62+
def doctype(self, name):
63+
return {"type": "Doctype", "name": name, "data": name.upper() == "HTML"}
64+
65+
def unknown(self, nodeType):
66+
return self.error(_("Unknown node type: ") + nodeType)

src/treewalkers/dom.py

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,35 @@
1-
from xml.dom import Node
2-
3-
import gettext
4-
_ = gettext.gettext
5-
6-
import _base
7-
8-
class TreeWalker(_base.TreeWalker):
9-
def walk(self, node):
10-
if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
11-
for token in self.walkChildren(node):
12-
yield token
13-
14-
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
15-
yield self.doctype(node.nodeName)
16-
17-
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
18-
for token in self.text(node.nodeValue):
19-
yield token
20-
21-
elif node.nodeType == Node.ELEMENT_NODE:
22-
for token in self.element(node.nodeName, \
23-
node.attributes.items(), node.childNodes):
24-
yield token
25-
26-
elif node.nodeType == Node.COMMENT_NODE:
27-
yield self.comment(node.nodeValue)
28-
29-
else:
30-
yield self.unknown(node.n F438 odeType)
31-
32-
def walkChildren(self, node):
33-
for childNode in node.childNodes:
34-
for token in self.walk(node):
35-
yield token
1+
from xml.dom import Node
2+
3+
import gettext
4+
_ = gettext.gettext
5+
6+
import _base
7+
8+
class TreeWalker(_base.TreeWalker):
9+
def walk(self, node):
10+
if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
11+
for token in self.walkChildren(node):
12+
yield token
13+
14+
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
15+
yield self.doctype(node.nodeName)
16+
17+
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
18+
for token in self.text(node.nodeValue):
19+
yield token
20+
21+
elif node.nodeType == Node.ELEMENT_NODE:
22+
for token in self.element(node.nodeName, \
23+
node.attributes.items(), node.childNodes):
24+
yield token
25+
26+
elif node.nodeType == Node.COMMENT_NODE:
27+
yield self.comment(node.nodeValue)
28+
29+
else:
30+
yield self.unknown(node.nodeType)
31+
32+
def walkChildren(self, node):
33+
for childNode in node.childNodes:
34+
for token in self.walk(node):
35+
yield token

src/treewalkers/etree.py

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import gettext
2-
_ = gettext.gettext
3-
1+
import gettext
2+
_ = gettext.gettext
3+
44
import new
5-
import copy
6-
5+
import copy
6+
77
import _base
88

99
moduleCache = {}
@@ -16,47 +16,47 @@ def getETreeModule(ElementTreeImplementation):
1616
mod = new.module("_" + ElementTreeImplementation.__name__+"builder")
1717
objs = getETreeBuilder(ElementTreeImplementation)
1818
mod.__dict__.update(objs)
19-
moduleCache[name] = mod
19+
moduleCache[name] = mod
2020
return mod
2121

2222
def getETreeBuilder(ElementTreeImplementation):
2323
ElementTree = ElementTreeImplementation
24-
25-
class TreeWalker(_base.TreeWalker):
26-
def walk(self, node):
24+
25+
class TreeWalker(_base.TreeWalker):
26+
def walk(self, node):
2727
if type(element) == type(ElementTree.ElementTree):
28-
element = element.getroot()
29-
30-
if node.tag in ("<DOCUMENT_ROOT>", "<DOCUMENT_FRAGMENT>"):
31-
for token in self.walkChildren(node):
32-
yield token
33-
34-
elif node.tag == "<!DOCTYPE>":
35-
yield self.doctype(node.text)
36-
if node.tail:
37-
for token in self.text(node.tail):
38-
yield token
39-
40-
elif type(node.tag) == type(ElementTree.Comment):
41-
yield self.comment(node.text)
42-
if node.tail:
43-
for token in self.text(node.tail):
44-
yield token
45-
46-
else:
47-
#This is assumed to be an ordinary element
48-
for token in self.element(node):
49-
yield token
50-
51-
def walkChildren(self, node):
52-
if node.text:
53-
for token in self.text(node.text):
54-
yield token
55-
for childNode in node.getchildren():
56-
for token in self.walk(childNode):
57-
yield token
58-
if node.tail:
59-
for token in self.text(node.tail):
60-
yield token
61-
62-
return locals()
28+
element = element.getroot()
29+
30+
if node.tag in ("<DOCUMENT_ROOT>", "<DOCUMENT_FRAGMENT>"):
31+
for token in self.walkChildren(node):
32+
yield token
33+
34+
elif node.tag == "<!DOCTYPE>":
35+
yield self.doctype(node.text)
36+
if node.tail:
37+
for token in self.text(node.tail):
38+
yield token
39+
40+
elif type(node.tag) == type(ElementTree.Comment):
41+
yield self.comment(node.text)
42+
if node.tail:
43+
for token in self.text(node.tail):
44+
yield token
45+
46+
else:
47+
#This is assumed to be an ordinary element
48+
for token in self.element(node):
49+
yield token
50+
51+
def walkChildren(self, node):
52+
if node.text:
53+
for token in self.text(node.text):
54+
yield token
55+
for childNode in node.getchildren():
56+
for token in self.walk(childNode):
57+
yield token
58+
if node.tail:
59+
for token in self.text(node.tail):
60+
yield token
61+
62+
return locals()

0 commit comments

Comments
 (0)
0