@@ -6,78 +6,55 @@ does not report any of the other (many) possible types of conformance
6
6
errors that may exist in a HTML5 document"""
7
7
8
8
import sys
9
- import urllib2
10
9
import cgi
10
+ import copy
11
11
12
- import html5lib
13
-
14
- htmlTemplate = u"""<html>
15
- <head>
16
- <title>%(title)s</title>
17
- </head>
18
- <body>
19
- <h1>%(title)s</h1>
20
- %(body)s
21
- </body>
22
- </html>"""
23
-
24
- def parseDocument (document ):
25
- """Parse the document and return a list of errors and a parse tree"""
26
- p = html5lib .HTMLParser ()
27
- tree = p .parse (document )
28
- return p .errors , cgi .escape (tree .printTree (), True )
29
-
30
- def getDocument (uri ):
31
- if uri .startswith ("http://" ) or uri .startswith ("https://" ):
32
- #Why is string conversion necessary here?
33
- document = "" .join (urllib2 .urlopen (uri ).readlines ())[:- 1 ]
34
- #print "<--!%s-->"%(document,)
35
- else :
36
- raise ValueError , "Unrecognised URI type"
37
- return document
38
-
39
- def writeValid (uri , treeStr ):
40
- bodyText = """<p><strong>%s is valid HTML5!</strong></p>
41
- <h2>Parse Tree:</h2>
42
- <pre>
43
- %s
44
- </pre>""" % (uri , treeStr )
45
- writeOutput (htmlTemplate % {"title" :"Validation Results" , "body" :bodyText })
46
-
47
- def writeInvalid (uri , treeStr , errors ):
48
- errList = []
49
- for pos , message in errors :
50
- errList .append ("Line %i Col %i" % pos + " " + message )
51
- errStr = "<br>\n " .join (errList )
52
- bodyText = """<p><strong>%s is not valid HTML5</strong></p>
53
- <h2>Errors:</h2>
54
- %s
55
- <h2>Parse Tree:</h2>
56
- <pre>
57
- %s
58
- </pre>""" % (uri , errStr , treeStr )
59
- writeOutput (htmlTemplate % {"title" :"Validation Results" , "body" :bodyText })
12
+ import httplib2
13
+ import lxml
14
+ from genshi .template import MarkupTemplate
60
15
61
- def writeErr (uri ):
62
- bodyText = "<p>Failed to load URI %s</p>" % (uri ,)
63
- writeOutput (htmlTemplate % {"title" :"Error" , "body" :bodyText })
64
-
65
- def writeOutput (s ):
66
- print s .encode ('utf-8' )
67
-
68
- print "Content-type: text/html"
69
- print ""
70
-
71
- try :
72
- form = cgi .FieldStorage ()
73
- uri = form .getvalue ("uri" )
74
- document = getDocument (uri )
75
- except :
76
- writeErr (uri )
77
- sys .exit (1 )
78
-
79
- errors , tree = parseDocument (document )
80
- if errors :
81
- writeInvalid (uri , tree , errors )
82
- else :
83
- writeValid (uri , tree )
16
+ import html5lib
17
+ from html5lib import treebuilders
18
+
19
+ class Resource (object ):
20
+ http = httplib2 .Http ()
21
+ def __init__ (self , uri ):
22
+ self .uri = uri
23
+ self .content = None
24
+
25
+ def load (self ):
26
+ self .response , self .content = self .http .request (self .uri )
27
+
28
+ def parse (self ):
29
+ raise NotImplementedError
30
+
31
+ class Schema (Resource ):
32
+ def load (self ):
33
+ #This will just be a network operation eventually
34
+ self .content = open (self .uri ).read ()
35
+
36
+ def parse (self ):
37
+ self .tree = lxml .etree .parse (self .content )
38
+ self .relaxng = lxml .etree .RelaxNG (self .tree )
39
+
40
+ class Document (Resource ):
41
+
42
+ def parse (self ):
43
+ parser = html5lib .HTMLParser (
44
+ tree = treebuilders .getTreeBuilder ("etree" , lxml .etree ))
45
+ self .tree = parser .parse (self .content )
46
+ self .parseErrors = parser .parseErrors
47
+ self .hasSyntaxErrors = not (self .parseErrors )
48
+
49
+ def check (self , schema ):
50
+ self .hasConformaceErrors = schema .relaxng .validate (self .tree )
51
+ self .relaxErrors = schema .relaxng .error_log
52
+
53
+ class Response (object ):
54
+ templateFilename = "response.html"
55
+ def __init__ (self ):
56
+ self .template = MarkupTemplate (open (self .templateFilename ).read ())
57
+
58
+ def render (self , document ):
59
+ stream = self .template .generate (doc = document )
60
+ return stream .render (doctype = ("html" ,"" ,"" ))
0 commit comments