17
17
from encoding import get_encoding
18
18
from debug import describe , text_content , open_in_browser
19
19
20
- log = logging .getLogger ('readbility.readability' )
21
- StandardError = Exception in python3
20
+ log = logging .getLogger (__file__ )
F621
22
21
23
22
REGEXES = {
24
23
'unlikelyCandidatesRe' : re .compile ('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter' , re .I ),
@@ -190,7 +189,7 @@ def summary(self, html_partial=False):
190
189
continue
191
190
else :
192
191
return cleaned_article
193
- except StandardError as e :
192
+ except Exception as e :
194
193
log .exception ('error getting summary: ' )
195
194
raise Unparseable (str (e )), None , sys .exc_info ()[2 ]
196
195
@@ -635,9 +634,8 @@ def main():
635
634
file = urllib .urlopen (options .url )
636
635
else :
637
636
file = open (args [0 ], 'rt' )
638
- output_encoding = sys .__stdout__ .encoding or 'utf-8'
639
- # XXX: a hack, better set PYTHONIOENCODING explicitly
640
637
html = file .read () # bytes object
638
+
641
639
encoding = get_encoding (html )
642
640
html = html .decode (encoding )
643
641
try :
@@ -648,6 +646,9 @@ def main():
648
646
result = 'Title: ' + doc .short_title () + '<br/>' + doc .summary ()
649
647
open_in_browser (result )
650
648
else :
649
+ # XXX: a hack, better to set PYTHONIOENCODING explicitly
650
+ output_encoding = sys .__stdout__ .encoding or 'utf-8'
651
+
651
652
print 'Title:' , doc .short_title ().encode (output_encoding , 'replace' )
652
653
print doc .summary ().encode (output_encoding , 'replace' )
653
654
finally :
0 commit comments