@@ -38,56 +38,87 @@ def load(f):
38
38
#from html5lib import html5parser, serializer, treewalkers, treebuilders
39
39
#END RELEASE
40
40
41
+ def PullDOMAdapter (node ):
42
+ from xml .dom import Node
43
+ from xml .dom .pulldom import START_ELEMENT , END_ELEMENT , COMMENT , CHARACTERS
44
+
45
+ if node .nodeType in (Node .DOCUMENT_NODE , Node .DOCUMENT_FRAGMENT_NODE ):
46
+ for childNode in node .childNodes :
47
+ for event in PullDOMAdapter (childNode ):
48
+ yield event
49
+
50
+ elif node .nodeType == Node .DOCUMENT_TYPE_NODE :
51
+ raise NotImplementedError ("DOCTYPE nodes are not supported by PullDOM" )
52
+
53
+ elif node .nodeType == Node .COMMENT_NODE :
54
+ yield COMMENT , node
55
+
56
+ elif node .nodeType in (Node .TEXT_NODE , Node .CDATA_SECTION_NODE ):
57
+ yield CHARACTERS , node
58
+
59
+ elif node .nodeType == Node .ELEMENT_NODE :
60
+ yield START_ELEMENT , node
61
+ for childNode in node .childNodes :
62
+ for event in PullDOMAdapter (childNode ):
63
+ yield event
64
+ yield END_ELEMENT , node
65
+
66
+ else :
67
+ raise NotImplementedError ("Node type not supported: " + str (node .nodeType ))
68
+
41
69
treeTypes = {
42
- "simpletree" : (treebuilders .getTreeBuilder ("simpletree" ),
43
- treewalkers .getTreeWalker ("simpletree" )),
44
- "DOM" : (treebuilders .getTreeBuilder ("dom" ),
45
- treewalkers .getTreeWalker ("dom" )),
70
+ "simpletree" : {"builder" : treebuilders .getTreeBuilder ("simpletree" ),
71
+ "walker" : treewalkers .getTreeWalker ("simpletree" )},
72
+ "DOM" : {"builder" : treebuilders .getTreeBuilder ("dom" ),
73
+ "walker" : treewalkers .getTreeWalker ("dom" )},
74
+ "PullDOM" : {"builder" : treebuilders .getTreeBuilder ("dom" ),
75
+ "adapter" : PullDOMAdapter ,
76
+ "walker" : treewalkers .getTreeWalker ("pulldom" )},
46
77
}
47
78
48
79
#Try whatever etree implementations are available from a list that are
49
80
#"supposed" to work
50
81
try :
51
82
import xml .etree .ElementTree as ElementTree
52
83
treeTypes ['ElementTree' ] = \
53
- ( treebuilders .getTreeBuilder ("etree" , ElementTree ),
54
- treewalkers .getTreeWalker ("etree" , ElementTree ))
84
+ { "builder" : treebuilders .getTreeBuilder ("etree" , ElementTree ),
85
+ "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
55
86
except ImportError :
56
87
try :
57
88
import elementtree .ElementTree as ElementTree
58
89
treeTypes ['ElementTree' ] = \
59
- ( treebuilders .getTreeBuilder ("etree" , ElementTree ),
60
- treewalkers .getTreeWalker ("etree" , ElementTree ))
90
+ { "builder" : treebuilders .getTreeBuilder ("etree" , ElementTree ),
91
+ "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
61
92
except ImportError :
62
93
pass
63
94
64
95
try :
65
96
import xml .etree .cElementTree as cElementTree
66
97
treeTypes ['cElementTree' ] = \
67
- ( treebuilders .getTreeBuilder ("etree" , cElementTree ),
68
- treewalkers .getTreeWalker ("etree" , cElementTree ))
98
+ { "builder" : treebuilders .getTreeBuilder ("etree" , cElementTree ),
99
+ "walker" : treewalkers .getTreeWalker ("etree" , cElementTree )}
69
100
except ImportError :
70
101
try :
71
102
import cElementTree
72
103
treeTypes ['cElementTree' ] = \
73
- ( treebuilders .getTreeBuilder ("etree" , cElementTree ),
74
- treewalkers .getTreeWalker ("etree" , cElementTree ))
104
+ { "builder" : treebuilders .getTreeBuilder ("etree" , cElementTree ),
105
+ "walker" : treewalkers .getTreeWalker ("etree" , cElementTree )}
75
106
except ImportError :
76
107
pass
77
108
78
109
try :
79
110
import lxml .etree as lxml
80
111
treeTypes ['lxml' ] = \
81
- ( treebuilders .getTreeBuilder ("etree" , lxml ),
82
- treewalkers .getTreeWalker ("etree" , lxml ))
112
+ { "builder" : treebuilders .getTreeBuilder ("etree" , lxml ),
113
+ "walker" : treewalkers .getTreeWalker ("etree" , lxml )}
83
114
except ImportError :
84
115
pass
85
116
86
117
try :
87
118
import BeautifulSoup
88
119
treeTypes ["beautifulsoup" ] = \
89
- ( treebuilders .getTreeBuilder ("beautifulsoup" ),
90
- treewalkers .getTreeWalker ("beautifulsoup" ))
120
+ { "builder" : treebuilders .getTreeBuilder ("beautifulsoup" ),
121
+ "walker" : treewalkers .getTreeWalker ("beautifulsoup" )}
91
122
except ImportError :
92
123
pass
93
124
@@ -146,12 +177,14 @@ def sortattrs(x):
146
177
147
178
class TestCase (unittest .TestCase ):
148
179
def runTest (self , innerHTML , input , expected , errors , treeClass ):
149
- p = html5parser .HTMLParser (tree = treeClass [0 ])
180
+ p = html5parser .HTMLParser (tree = treeClass ["builder" ])
181
+
150
182
if innerHTML :
151
183
document = p .parseFragment (StringIO .StringIO (input ), innerHTML )
152
184
else :
153
185
document = p .parse (StringIO .StringIO (input ))
154
- output = convertTokens (treeClass [1 ]().walk (document ))
186
+ document = treeClass .get ("adapter" , lambda x : x )(document )
187
+ output = convertTokens (treeClass ["walker" ]().walk (document ))
155
188
output = attrlist .sub (sortattrs , output )
156
189
expected = attrlist .sub (sortattrs , expected )
157
190
errorMsg = "\n " .join (["\n \n Expected:" , expected ,
0 commit comments