@@ -71,6 +71,8 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
71
71
"comment" :self .commentState ,
72
72
"commentEndDash" :self .commentEndDashState ,
73
73
"commentEnd" :self .commentEndState ,
74
+ "commentEndBang" :self .commentEndBangState ,
75
+ "commentEndSpace" :self .commentEndSpaceState ,
74
76
"doctype" :self .doctypeState ,
75
77
"beforeDoctypeName" :self .beforeDoctypeNameState ,
76
78
"doctypeName" :self .doctypeNameState ,
@@ -476,7 +478,7 @@ def tagNameState(self):
476
478
elif data is EOF :
477
479
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
478
480
"eof-in-tag-name" })
479
- self .emitCurrentToken ()
481
+ self .state = self . states [ "data" ]
480
482
elif data == u"/" :
481
483
self .state = self .states ["selfClosingStartTag" ]
482
484
else :
@@ -504,7 +506,7 @@ def beforeAttributeNameState(self):
504
506
elif data is EOF :
505
507
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
506
508
"expected-attribute-name-but-got-eof" })
507
- self .emitCurrentToken ()
509
+ self .state = self . states [ "data" ]
508
510
else :
509
511
self .currentToken ["data" ].append ([data , "" ])
510
512
self .state = self .states ["attributeName" ]
@@ -654,7 +656,7 @@ def attributeValueUnQuotedState(self):
654
656
self .processEntityInAttribute (None )
655
657
elif data == u">" :
656
658
self .emitCurrentToken ()
657
- elif data == u'"' or data == u"'" or data == u"=" :
659
+ elif data in ( u'"' , u"'" , u"=" , u"<" ) :
658
660
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
659
661
"unexpected-character-in-unquoted-attribute-value" })
660
662
self .currentToken ["data" ][- 1 ][1 ] += data
@@ -697,7 +699,6 @@ def selfClosingStartTagState(self):
697
699
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ],
698
700
"data" :
699
701
"unexpected-EOF-after-solidus-in-tag" })
700
- self .emitCurrentToken ()
701
702
self .stream .unget (data )
702
703
self .state = self .states ["data" ]
703
704
else :
@@ -846,6 +847,15 @@ def commentEndState(self):
846
847
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
847
848
"unexpected-dash-after-double-dash-in-comment" })
848
849
self .currentToken ["data" ] += data
850
+ elif data in spaceCharacters :
851
+ self .currentToken ["data" ] += "--" + data
852
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
853
+ "unexpected-space-after-double-dash-in-comment" })
854
+ self .state = self .states ["commentEndSpace" ]
855
+ elif data == "!" :
856
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
857
+ "unexpected-bang-after-double-dash-in-comment" })
858
+ self .state = self .states ["commentEndBang" ]
849
859
elif data is EOF :
850
860
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
851
861
"eof-in-comment-double-dash" })
@@ -859,10 +869,53 @@ def commentEndState(self):
859
869
self .state = self .states ["comment" ]
860
870
return True
861
871
872
+ def commentEndBangState (self ):
873
+ data = self .stream .char ()
874
+ if data == u">" :
875
+ self .tokenQueue .append (self .currentToken )
876
+ self .state = self .states ["data" ]
877
+ elif data == u"-" :
878
+ self .currentToken ["data" ] += "--!"
879
+ self .state = self .states ["commentEndDash" ]
880
+ elif data is EOF :
881
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
882
+ "eof-in-comment-end-bang-state" })
883
+ self .tokenQueue .append (self .currentToken )
884
+ self .state = self .states ["data" ]
885
+ else :
886
+ self .currentToken ["data" ] += u"--!" + data
887
+ self .state = self .states ["comment" ]
888
+ return True
889
+
890
+ def commentEndSpaceState (self ):
891
+ data = self .stream .char ()
892
+ if data == u">" :
893
+ self .tokenQueue .append (self .currentToken )
894
+ self .state = self .states ["data" ]
895
+ elif data == u"-" :
896
+ self .state = self .states ["commentEndDash" ]
897
+ elif data in spaceCharacters :
898
+ self .currentToken ["data" ] += data
899
+ elif data is EOF :
900
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
901
+ "eof-in-comment-end-space-state" })
902
+ self .tokenQueue .append (self .currentToken )
903
+ self .state = self .states ["data" ]
904
+ else :
905
+ self .currentToken ["data" ] += data
906
+ self .state = self .states ["comment" ]
907
+ return True
908
+
862
909
def doctypeState (self ):
863
910
data = self .stream .char ()
864
911
if data in spaceCharacters :
865
912
self .state = self .states ["beforeDoctypeName" ]
913
+ elif data is EOF :
914
+ self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
915
+ "expected-doctype-name-but-got-eof" })
916
+ self .currentToken ["correct" ] = False
917
+ self .tokenQueue .append (self .currentToken )
918
+ self .state = self .states ["data" ]
866
919
else :
867
920
self .tokenQueue .append ({"type" : tokenTypes ["ParseError" ], "data" :
868
921
"need-space-after-doctype" })
0 commit comments