File tree Expand file tree Collapse file tree 2 files changed +14
-1
lines changed Expand file tree Collapse file tree 2 files changed +14
-1
lines changed Original file line number Diff line number Diff line change @@ -509,8 +509,8 @@ def changeEncoding(self, newEncoding):
509
509
self .charEncoding = (self .charEncoding [0 ], "certain" )
510
510
else :
511
511
self .rawStream .seek (0 )
512
- self .reset ()
513
512
self .charEncoding = (newEncoding , "certain" )
513
+ self .reset ()
514
514
raise ReparseException ("Encoding changed from %s to %s" % (self .charEncoding [0 ], newEncoding ))
515
515
516
516
def detectBOM (self ):
Original file line number Diff line number Diff line change @@ -21,6 +21,19 @@ def test_basic_prescan_length():
21
21
assert 'utf-8' == stream .charEncoding [0 ].name
22
22
23
23
24
+ def test_parser_reparse ():
25
+ data = "<title>Caf\u00E9 </title><!--a--><meta charset='utf-8'>" .encode ('utf-8' )
26
+ pad = 10240 - len (data ) + 1
27
+ data = data .replace (b"-a-" , b"-" + (b"a" * pad ) + b"-" )
28
+ assert len (data ) == 10240 # Sanity
29
+ stream = inputstream .HTMLBinaryInputStream (data , chardet = False )
30
+ assert 'windows-1252' == stream .charEncoding [0 ].name
31
+ p = HTMLParser (namespaceHTMLElements = False )
32
+ doc = p .parse (data , useChardet = False )
33
+ assert 'utf-8' == p .documentEncoding
34
+ assert doc .find (".//title" ).text == "Caf\u00E9 "
35
+
36
+
24
37
def runParserEncodingTest (data , encoding ):
25
38
p = HTMLParser ()
26
39
assert p .documentEncoding is None
You can’t perform that action at this time.
0 commit comments