8000 Optionally produce html as output · awesome-python/html5lib-python@d2a2a36 · GitHub
[go: up one dir, main page]

Skip to content

Commit d2a2a36

Browse files
committed
Optionally produce html as output
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40658
1 parent c98097f commit d2a2a36

File tree

3 files changed

+13
-14
lines changed

3 files changed

+13
-14
lines changed

parse.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111

1212
#RELEASE remove
1313
from src import html5parser, liberalxmlparser
14-
from src import treebuilders
14+
from src import treebuilders, serializer, treewalkers
1515
#END RELEASE
1616
#RELEASE add
1717
#from html5lib import html5parser, liberalxmlparser
18-
#from html5lib import treebuilders
18+
#from html5lib import treebuilders, serializer, treewalkers
1919
#END RELEASE
2020

2121
def convertTreeDump(treedump):
@@ -51,16 +51,7 @@ def parse():
5151
sys.stderr.write("No filename provided. Use -h for help\n")
5252
sys.exit(1)
5353

54-
if opts.treebuilder is not None:
55-
try:
56-
treebuilder = eval("treebuilders." + opts.treebuilder).TreeBuilder
57-
except ImportError, name:
58-
sys.stderr.write("Treebuilder %s not found\n"%name)
59-
raise
60-
except Exception, foo:
61-
treebuilder = treebuilders.simpletree.TreeBuilder
62-
else:
63-
treebuilder = treebuilders.simpletree.TreeBuilder
54+
treebuilder = treebuilders.getTreeBuilder(opts.treebuilder)
6455

6556
if opts.xml:
6657
p = liberalxmlparser.XHTMLParser(tree=treebuilder)
@@ -96,6 +87,10 @@ def printOutput(parser, document, opts):
9687
if not opts.no_tree:
9788
if opts.xml:
9889
sys.stdout.write(document.toxml("utf-8"))
90+
elif opts.html:
91+
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
92+
for text in serializer.HTMLSerializer().serialize(tokens):
93+
sys.stdout.write(text.encode('utf-8'))
9994
elif opts.hilite:
10095
sys.stdout.write(document.hilite("utf-8"))
10196
else:
@@ -121,14 +116,17 @@ def getOptParser():
121116
dest="no_tree", help="Do not print output tree")
122117

123118
parser.add_option("-b", "--treebuilder", action="store", type="string",
124-
dest="treebuilder")
119+
dest="treebuilder", default="simpleTree")
125120

126121
parser.add_option("-e", "--error", action="store_true", default=False,
127122
dest="error", help="Print a list of parse errors")
128123

129124
parser.add_option("-x", "--xml", action="store_true", default=False,
130125
dest="xml", help="Output as xml")
131126

127+
parser.add_option("", "--html", action="store_true", default=False,
128+
dest="html", help="Output as html")
129+
132130
parser.add_option("", "--hilite", action="store_true", default=False,
133131
dest="hilite", help="Output as formatted highlighted code.")
134132

src/serializer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ def __init__(self, **kwargs):
249249
if attr in kwargs:
250250
setattr(self, attr, kwargs[attr])
251251
self.errors = []
252+
self.strict = False
252253

25325 8DB6 4
def serialize(self, treewalker, encoding=None):
254255
in_cdata = False

src/treewalkers/_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def getNextSibling(self, node):
9595
def getParentNode(self, node):
9696
raise NotImplementedError
9797

98-
def walk(self):
98+
def __iter__(self):
9999
currentNode = self.tree
100100
while currentNode is not None:
101101
details = self.getNodeDetails(currentNode)

0 commit comments

Comments
 (0)
0