10000 Merge pull request #20 from andreypopp/master · Harry0201/python-readability@2e49e34 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2e49e34

Browse files
committed
Merge pull request buriy#20 from andreypopp/master
readability.htmls: some docs do not have title elem
2 parents 274b60c + 95852d5 commit 2e49e34

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

readability/htmls.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ def norm_title(title):
4343
return normalize_entities(normalize_spaces(title))
4444

4545
def get_title(doc):
46-
title = doc.find('.//title').text
47-
if not title:
46+
title = doc.find('.//title')
47+
if not title or not title.text:
4848
return '[no-title]'
49-
50-
return norm_title(title)
49+
50+
return norm_title(title.text)
5151

5252
def add_match(collection, text, orig):
5353
text = norm_title(text)
@@ -56,11 +56,11 @@ def add_match(collection, text, orig):
5656
collection.add(text)
5757

5858
def shorten_title(doc):
59-
title = doc.find('.//title').text
60-
if not title:
59+
title = doc.find('.//title')
60+
if not title or not title.text:
6161
return ''
62-
63-
title = orig = norm_title(title)
62+
63+
title = orig = norm_title(title.text)
6464

6565
candidates = set()
6666

@@ -77,7 +77,7 @@ def shorten_title(doc):
7777
add_match(candidates, e.text, orig)
7878
if e.text_content():
7979
add_match(candidates, e.text_content(), orig)
80-
80+
8181
if candidates:
8282
title = sorted(candidates, key=len)[-1]
8383
else:

0 commit comments

Comments
 (0)
0