awesome-python
diff --git a/‎examples/validate.cgi
Lines changed: 49 additions & 72 deletions b/‎examples/validate.cgi
Lines changed: 49 additions & 72 deletions
diff --git a/‎src/treebuilders/soup.py
Lines changed: 0 additions & 1 deletion b/‎src/treebuilders/soup.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/treewalkers/soup.py
Lines changed: 5 additions & 5 deletions b/‎src/treewalkers/soup.py
Lines changed: 5 additions & 5 deletions
@@ -6,78 +6,55 @@ does not report any of the other (many) possible types of conformance
 errors that may exist in a HTML5 document"""
 
 import sys
-import urllib2
 import cgi
+import copy
 
-import html5lib
-
-htmlTemplate = u"""<html>
-<head>
-<title>%(title)s</title>
-</head>
-<body>
-<h1>%(title)s</h1>
-%(body)s
-</body>
-</html>"""
-
-def parseDocument(document):
-    """Parse the document and return a list of errors and a parse tree"""
-    p = html5lib.HTMLParser()
-    tree = p.parse(document)
-    return p.errors, cgi.escape(tree.printTree(), True)
-
-def getDocument(uri):
-    if uri.startswith("http://") or uri.startswith("https://"):
-        #Why is string conversion necessary here?
-        document = "".join(urllib2.urlopen(uri).readlines())[:-1]
-        #print "<--!%s-->"%(document,)
-    else:
-        raise ValueError, "Unrecognised URI type"
-    return document
-
-def writeValid(uri, treeStr):
-    bodyText = """<p><strong>%s is valid HTML5!</strong></p>
-<h2>Parse Tree:</h2>
-<pre>
-%s
-</pre>"""%(uri, treeStr)
-    writeOutput(htmlTemplate%{"title":"Validation Results", "body":bodyText})
-
-def writeInvalid(uri, treeStr, errors):
-    errList=[]
-    for pos, message in errors:
-        errList.append("Line %i Col %i"%pos + " " + message)
-    errStr = "<br>\n".join(errList)
-    bodyText = """<p><strong>%s is not valid HTML5</strong></p>
-<h2>Errors:</h2>
-%s
-<h2>Parse Tree:</h2>
-<pre>
-%s
-</pre>"""%(uri, errStr, treeStr)
-    writeOutput(htmlTemplate%{"title":"Validation Results", "body":bodyText})
+import httplib2
+import lxml
+from genshi.template import MarkupTemplate
 
-def writeErr(uri):
-    bodyText = "<p>Failed to load URI %s</p>"%(uri,)
-    writeOutput(htmlTemplate%{"title":"Error", "body":bodyText})
-
-def writeOutput(s):
-    print s.encode('utf-8')
-
-print "Content-type: text/html"
-print ""
-
-try:
-    form = cgi.FieldStorage()
-    uri = form.getvalue("uri")
-    document = getDocument(uri)
-except:
-    writeErr(uri)
-    sys.exit(1)
-
-errors, tree = parseDocument(document)
-if errors:
-    writeInvalid(uri, tree, errors)
-else:
-    writeValid(uri, tree)
+import html5lib
+from html5lib import treebuilders
+
+class Resource(object):
+    http = httplib2.Http()
+    def __init__(self, uri):
+        self.uri = uri
+        self.content = None
+    
+    def load(self):
+        self.response, self.content = self.http.request(self.uri)
+
+    def parse(self):
+        raise NotImplementedError
+
+class Schema(Resource):
+    def load(self):
+        #This will just be a network operation eventually
+        self.content = open(self.uri).read()
+    
+    def parse(self):
+        self.tree = lxml.etree.parse(self.content)
+        self.relaxng = lxml.etree.RelaxNG(self.tree)
+
+class Document(Resource):
+    
+    def parse(self):
+        parser = html5lib.HTMLParser(
+            tree=treebuilders.getTreeBuilder("etree", lxml.etree))
+        self.tree = parser.parse(self.content)
+        self.parseErrors = parser.parseErrors
+        self.hasSyntaxErrors = not(self.parseErrors)
+    
+    def check(self, schema):
+        self.hasConformaceErrors = schema.relaxng.validate(self.tree)
+        self.relaxErrors = schema.relaxng.error_log
+
+class Response(object):
+    templateFilename = "response.html"
+    def __init__(self):
+        self.template = MarkupTemplate(open(self.templateFilename).read())
+    
+    def render(self, document):
+        stream = self.template.generate(doc = document)
+        return stream.render(doctype=("html","",""))
@@ -60,7 +60,6 @@ def insertText(self, data, insertBefore=None):
             self.appendChild(text)
 
     def insertBefore(self, node, refNode):
-        #XXX What exception should we throw here?
         index = self.element.contents.index(refNode.element)
         if (node.element.__class__ == NavigableString and self.element.contents
             and self.element.contents[index-1].__class__ == NavigableString):
 
@@ -1,7 +1,7 @@
 import gettext
 _ = gettext.gettext
 
-from BeautifulSoup import BeautifulSoup, Declaration, Comment
+from BeautifulSoup import BeautifulSoup, Declaration, Comment, Tag
 
 import _base
 
@@ -11,18 +11,18 @@ def getNodeDetails(self, node):
             return (_base.DOCUMENT,)
 
         elif isinstance(node, Declaration): # DocumentType
-            return _base.DOCTYPE, node.string
+            #Slice needed to remove markup added during unicode conversion
+            return _base.DOCTYPE, unicode(node.string)[2:-1]
 
         elif isinstance(node, Comment):
-            return _base.COMMENT, node.data
+            return _base.COMMENT, unicode(node.string)[4:-3]
 
         elif isinstance(node, unicode): # TextNode
             return _base.TEXT, node
 
         elif isinstance(node, Tag): # Element
             return _base.ELEMENT, node.name, \
-              node.attrs.items(), node.contents
-
+                dict(node.attrs).items(), node.contents
         else:
             return _base.UNKNOWN, node.__class__.__name__