File tree Expand file tree Collapse file tree 2 files changed +4
-14
lines changed Expand file tree Collapse file tree 2 files changed +4
-14
lines changed Original file line number Diff line number Diff line change @@ -11,20 +11,10 @@ def build_doc(page):
11
11
if isinstance (page , unicode ):
12
12
page_unicode = page
13
13
else :
14
- enc = get_encoding (page )
15
- if enc :
16
- page_unicode = page .decode (enc , 'replace' )
17
- encoding = enc
18
- else :
19
- try :
20
- #try utf-8
21
- page_unicode = page .decode ('utf-8' , 'strict' )
22
- encoding = 'utf-8'
23
- except UnicodeDecodeError :
24
- page_unicode = page .decode ('utf-8' , 'replace' )
25
- encoding = 'utf-8'
14
+ enc = get_encoding (page ) or 'utf-8'
15
+ page_unicode = page .decode (enc , 'replace' )
26
16
doc = lxml .html .document_fromstring (page_unicode .encode ('utf-8' , 'replace' ), parser = utf8_parser )
27
- return doc , encoding
17
+ return doc , enc
28
18
29
19
def js_re (src , pattern , flags , repl ):
30
20
return re .compile (pattern , flags ).sub (src , repl .replace ('$' , '\\ ' ))
Original file line number Diff line number Diff line change 9
9
10
10
setup (
11
11
name = "readability-lxml" ,
12
- version = "0.3" ,
12
+ version = "0.3.0.1 " ,
13
13
author = "Yuri Baburov" ,
14
14
author_email = "burchik@gmail.com" ,
15
15
description = "fast python port of arc90's readability tool" ,
You can’t perform that action at this time.
0 commit comments