10000 Move the tests to the testfile · Harry0201/python-readability@1e30e33 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1e30e33

Browse files
jcharummitechie
authored andcommitted
Move the tests to the testfile
1 parent e8a6250 commit 1e30e33

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

src/readability_lxml/readability.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ def clean_segment_number(num_segments, index, segment):
139139
return segment
140140

141141

142+
def clean_segment_index(num_segments, index, segment):
143+
if index == (num_segments - 1) and segment.lower() == 'index':
144+
return None
145+
else:
146+
return segment
147+
148+
142149
def clean_segment(num_segments, index, segment):
143150
"""
144151
Cleans a single segment of a URL to find the base URL. The base URL is as
@@ -150,7 +157,8 @@ def clean_segment(num_segments, index, segment):
150157
clean_segment_extension,
151158
clean_segment_ewcms,
152159
clean_segment_page_number,
153-
clean_segment_number
160+
clean_segment_number,
161+
clean_segment_index
154162
]
155163
cleaned_segment = segment
156164
for func in funcs:

src/tests/test_readability.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,17 @@ def test_numbers(self):
110110
]
111111
self._run_urls(specs)
112112

113+
def test_index(self):
114+
specs = [
115+
(
116+
'http://foo.com/index.html',
117+
'http://foo.com',
118+
'index should be stripped'
119+
),
120+
(
121+
'http://foo.com/path/to/index.html',
122+
'http://foo.com/path/to',
123+
'index should be stripped'
124+
)
125+
]
126+
self._run_urls(specs)

0 commit comments

Comments
 (0)
0