8000 Several updates from dev version. · Harry0201/python-readability@e8f86bd · GitHub
[go: up one dir, main page]

Skip to content

Commit e8f86bd

Browse files
committed
Several updates from dev version.
1 parent 40e430c commit e8f86bd

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

readability/encoding.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ def get_encoding(page):
4545
except UnicodeDecodeError:
4646
log.info('Encoding "%s", specified in the document as "%s" '
4747
'didn\'t work' % (encoding, declared_encoding))
48-
print "Content encoding didn't work:", encoding
4948

5049
# Fallback to chardet if declared encodings fail
5150
text = re.sub('</?[^>]*>\s*', ' ', page)

readability/readability.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717
from encoding import get_encoding
1818
from debug import describe, text_content, open_in_browser
1919

20-
log = logging.getLogger('readbility.readability')
21-
StandardError = Exception in python3
20+
log = logging.getLogger(__file__)
F621 2221

2322
REGEXES = {
2423
'unlikelyCandidatesRe': re.compile('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter', re.I),
@@ -190,7 +189,7 @@ def summary(self, html_partial=False):
190189
continue
191190
else:
192191
return cleaned_article
193-
except StandardError as e:
192+
except Exception as e:
194193
log.exception('error getting summary: ')
195194
raise Unparseable(str(e)), None, sys.exc_info()[2]
196195

@@ -635,9 +634,8 @@ def main():
635634
file = urllib.urlopen(options.url)
636635
else:
637636
file = open(args[0], 'rt')
638-
output_encoding = sys.__stdout__.encoding or 'utf-8'
639-
# XXX: a hack, better set PYTHONIOENCODING explicitly
640637
html = file.read() # bytes object
638+
641639
encoding = get_encoding(html)
642640
html = html.decode(encoding)
643641
try:
@@ -648,6 +646,9 @@ def main():
648646
result = 'Title: ' + doc.short_title() + '<br/>' + doc.summary()
649647
open_in_browser(result)
650648
else:
649+
# XXX: a hack, better to set PYTHONIOENCODING explicitly
650+
output_encoding = sys.__stdout__.encoding or 'utf-8'
651+
651652
print 'Title:', doc.short_title().encode(output_encoding, 'replace')
652653
print doc.summary().encode(output_encoding, 'replace')
653654
finally:

0 commit comments

Comments
 (0)
0