@@ -69,6 +69,7 @@ class XHTMLParser(XMLParser):
69
69
70
70
def __init__ (self , * args , ** kwargs ):
71
71
html5parser .HTMLParser .__init__ (self , * args , ** kwargs )
72
+ self .phases ["initial" ] = XmlInitialPhase (self , self .tree )
72
73
self .phases ["rootElement" ] = XhmlRootPhase (self , self .tree )
73
74
74
75
def normalizeToken (self , token ):
@@ -101,7 +102,19 @@ def insertHtmlElement(self):
101
102
self .tree .document .appendChild (element )
102
103
self .parser .phase = self .parser .phases ["beforeHead" ]
103
104
105
+ class XmlInitialPhase (html5parser .InitialPhase ):
106
+ """ Consume XML Prologs """
107
+ def processComment (self , data ):
108
+ if not data .startswith ('?xml' ) or not data .endswith ('?' ):
109
+ html5parser .InitialPhase .processComment (self , data )
110
+
104
111
class XmlRootPhase (html5parser .Phase ):
112
+ """ Consume XML Prologs """
113
+ def processComment (self , data ):
114
+ print repr (data )
115
+ if not data .startswith ('?xml' ) or not data .endswith ('?' ):
116
+ html5parser .InitialPhase .processComment (self , data )
117
+
105
118
""" Prime the Xml parser """
106
119
def __getattr__ (self , name ):
107
120
self .tree .openElements .append (self .tree .document )
0 commit comments