6
6
os .chdir (os .path .split (os .path .abspath (__file__ ))[0 ])
7
7
sys .path .insert (0 , os .path .abspath (os .path .join (os .pardir , "src" )))
8
8
9
+ #RELEASE remove
10
+ import html5parser
11
+ #Run tests over all treebuilders
12
+ #XXX - it would be nice to automate finding all treebuilders or to allow running just one
13
+
14
+ import treebuilders
15
+ #END RELEASE
16
+
17
+ #RELEASE add
18
+ #import html5lib
19
+ #from html5lib import html5parser
20
+ #from html5lib.treebuilders import simpletree, etreefull, dom
21
+ #END RELEASE
22
+
9
23
try :
10
24
import simplejson
11
25
except :
@@ -17,5 +31,85 @@ def load(f):
17
31
return eval (input .replace ('\r ' ,'' ))
18
32
load = staticmethod (load )
19
33
34
+ #Build a dict of avaliable trees
35
+ treeTypes = {"simpletree" :treebuilders .getTreeBuilder ("simpletree" ),
36
+ "DOM" :treebuilders .getTreeBuilder ("dom" )}
37
+
38
+ #Try whatever etree implementations are avaliable from a list that are
39
+ #"supposed" to work
40
+ try :
41
+ import xml .etree .ElementTree as ElementTree
42
+ treeTypes ['ElementTree' ] = treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True )
43
+ except ImportError :
44
+ try :
45
+ import elementtree .ElementTree as ElementTree
46
+ treeTypes ['ElementTree' ] = treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True )
47
+ except ImportError :
48
+ pass
49
+
50
+ try :
51
+ import xml .etree .cElementTree as cElementTree
52
+ treeTypes ['cElementTree' ] = treebuilders .getTreeBuilder ("etree" , cElementTree , fullTree = True )
53
+ except ImportError :
54
+ try :
55
+ import cElementTree
56
+ treeTypes ['cElementTree' ] = treebuilders .getTreeBuilder ("etree" , cElementTree , fullTree = True )
57
+ except ImportError :
58
+ pass
59
+
60
+ try :
61
+ import lxml .etree as lxml
62
+ treeTypes ['lxml' ] = treebuilders .getTreeBuilder ("etree" , lxml , fullTree = True )
63
+ except ImportError :
64
+ pass
65
+
66
+ try :
67
+ import BeautifulSoup
68
+ treeTypes ["beautifulsoup" ] = treebuilders .getTreeBuilder ("beautifulsoup" , fullTree = True )
69
+ except ImportError :
70
+ pass
71
+
20
72
def html5lib_test_files (subdirectory , files = '*.dat' ):
21
73
return glob .glob (os .path .join (os .path .pardir ,os .path .pardir ,'testdata' ,subdirectory ,files ))
74
+
75
+ class TestData (object ):
76
+ def __init__ (self , filename , sections ):
77
+ self .f = open (filename )
78
+ self .sections = sections
79
+
80
+ def __iter__ (self ):
81
+ data = {}
82
+ key = None
83
+ for line in self .f :
84
+ heading = self .isSectionHeading (line )
85
+ if heading :
86
+ if data and heading == self .sections [0 ]:
87
+ #Remove trailing newline
88
+ data [key ] = data [key ][:- 1 ]
89
+ yield self .normaliseOutput (data )
90
+ data = {}
91
+ key = heading
92
+ data [key ]= ""
93
+ elif key is not None :
94
+ data [key ] += line
95
+ if data :
96
+ yield self .normaliseOutput (data )
97
+
98
+ def isSectionHeading (self , line ):
99
+ """If the current heading is a test section heading return the heading,
100
+ otherwise return False"""
101
+ line = line .strip ()
102
+ if line .startswith ("#" ) and line [1 :] in self .sections :
103
+ return line [1 :]
104
+ else :
105
+ return False
106
+
107
+ def normaliseOutput (self , data ):
108
+ #Remove trailing newlines
109
+ for key ,value in data .iteritems ():
110
+ if value .endswith ("\n " ):
111
+ data [key ] = value [:- 1 ]
112
+ for heading in self .sections :
113
+ if heading not in data :
114
+ data [heading ] = None
115
+ return data
0 commit comments