8000 Garden · gui199/python-readability@9765d13 · GitHub
[go: up one dir, main page]

Skip to content {"props":{"docsUrl":"https://docs.github.com/get-started/accessibility/keyboard-shortcuts"}}

Commit 9765d13

Browse files
committed
Garden
1 parent 32d1764 commit 9765d13

File tree

1 file changed

+3
-21
lines changed

1 file changed

+3
-21
lines changed

src/readability_lxml/readability.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import sys
55
import urlparse
66

7-
from collections import defaultdict
87
from collections import namedtuple
98
from lxml.etree import tostring
109
from lxml.etree import tounicode
@@ -232,9 +231,11 @@ def same_domain(lhs, rhs):
232231
else:
233232
return split_lhs.netloc == split_rhs.netloc
234233

234+
235235
def strip_trailing_slash(s):
236236
return re.sub(r'/$', '', s)
237237

238+
238239
def eval_possible_next_page_link(
239240
parsed_urls,
240241
url,
@@ -336,6 +337,7 @@ def eval_possible_next_page_link(
336337
except ValueError as e:
337338
pass
338339

340+
339341
def find_next_page_link(parsed_urls, url, elem):
340342
links = tags(elem, 'a')
341343
base_url = find_base_url(url)
@@ -814,26 +816,6 @@ def sanitize(self, node, candidates):
814816
' many <embed>s')
815817
to_remove = True
816818

817-
818-
# if el.tag == 'div' and counts['img'] >= 1 and to_remove:
819-
# imgs = el.findall('.//img')
820-
# valid_img = False
821-
# self.debug(tounicode(el))
822-
# for img in imgs:
823-
#
824-
# height = img.get('height')
825-
# text_length = img.get('text_length')
826-
# self.debug ("height %s text_length %s" %(repr(height), repr(text_length)))
827-
# if to_int(height) >= 100 or to_int(text_length) >= 100:
828-
# valid_img = True
829-
# 6F96 self.debug("valid image" + tounicode(img))
830-
# break
831-
# if valid_img:
832-
# to_remove = False
833-
# self.debug("Allowing %s" %el.text_content())
834-
# for desnode in tags(el, "table", "ul", "div"):
835-
# allowed[desnode] = True
836-
837819
# don't really understand what this is doing. Originally
838820
# the i/j were =+ which sets the value to 1. I think that
839821
# was supposed to be += which would increment. But then

0 commit comments

Comments
 (0)
0