8000 Clean up merge, put tests in right place, adjust imports · gui199/python-readability@e8a6250 · GitHub
[go: up one dir, main page]

Skip to content

Commit e8a6250

Browse files
committed
Clean up merge, put tests in right place, adjust imports
1 parent 62df355 commit e8a6250

File tree

3 files changed

+104
-2
lines changed

3 files changed

+104
-2
lines changed

src/tests/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
from .readability import Document

src/tests/gen_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,24 @@
88
import errno
99
import os
1010
import os.path
11-
import readability
1211
import sys
1312
import test
1413
import urllib2
1514
import yaml
1615

16+
from readability_lxml import readability
17+
18+
1719
OVERWRITE_QUESTION = '%s exists; overwrite and continue (y/n)? '
1820

21+
1922
def y_or_n(question):
2023
while True:
2124
response = raw_input(question).strip()
2225
if len(response) > 0:
2326
return response[0] in ['y', 'Y']
2427

28+
2529
def write_file(test_name, suffix, data):
2630
path = os.path.join(test.TEST_DATA_PATH, test_name + suffix)
2731
mode = 0644

src/tests/test_readability.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import unittest
22

33
from readability_lxml.readability import Document
4+
from readability_lxml import readability as r
45

56

67
class TestReadabilityDocument(unittest.TestCase):
@@ -11,3 +12,101 @@ def test_none_input_raises_exception(self):
1112

1213
doc = None
1314
self.assertRaises(ValueError, Document, doc)
15+
16+
17+
class TestFindBaseUrl(unittest.TestCase):
18+
19+
def setUp(self):
20+
self.longMessage = True
21+
22+
def _assert_url(self, url, expected_base_url, msg = None):
23+
actual_base_url = r.find_base_url(url)
24+
self.assertEqual(expected_base_url, actual_base_url, msg)
25+
26+
def _run_urls(self, specs):
27+
"""
28+
Asserts expected results on a sequence of specs, where each spec is a
29+
pair: (URL, expected base URL).
30+
"""
31+
for spec in specs:
32+
url = spec[0]
33+
expected = spec[1]
34+
if len(spec) > 2:
35+
msg = spec[2]
36+
else:
37+
msg = None
38+
self._assert_url(url, expected, msg)
39+
40+
def test_none(self):
41+
self._assert_url(None, None)
42+
43+
def test_no_change(self):
44+
url = 'http://foo.com/article'
45+
self._assert_url(url, url)
46+
47+
def test_extension_stripping(self):
48+
specs = [
49+
(
50+
'http://foo.com/article.html',
51+
'http://foo.com/article',
52+
'extension should be stripped'
53+
),
54+
(
55+
'http://foo.com/path/to/article.html',
56+
'http://foo.com/path/to/article',
57+
'extension should be stripped'
58+
),
59+
(
60+
'http://foo.com/article.123not',
61+
'http://foo.com/article.123not',
62+
'123not is not extension'
63+
),
64+
(
65+
'http://foo.com/path/to/article.123not',
66+
'http://foo.com/path/to/article.123not',
67+
'123not is not extension'
68+
)
69+
]
70+
self._run_urls(specs)
71+
72+
def test_ewcms(self):
73+
self._assert_url(
74+
'http://www.ew.com/ew/article/0,,20313460_20369436,00.html',
75+
'http://www.ew.com/ew/article/0,,20313460_20369436'
76+
)
77+
78+
def test_page_numbers(self):
79+
specs = [
80+
(
81+
'http://foo.com/page5.html',
82+
'http://foo.com',
83+
'page number should be stripped'
84+
),
85+
(
86+
'http://foo.com/path/to/page5.html',
87+
'http://foo.com/path/to',
88+
'page number should be stripped'
89+
),
90+
(
91+
'http://foo.com/article-5.html',
92+
'http://foo.com/article',
93+
'page number should be stripped'
94+
)
95+
]
96+
self._run_urls(specs)
97+
98+
def test_numbers(self):
99+
specs = [
100+
(
101+
'http://foo.com/5.html',
102+
'http://foo.com',
103+
'number should be stripped'
104+
),
105+
(
106+
'http://foo.com/path/to/5.html',
107+
'http://foo.com/path/to',
108+
'number should be stripped'
109+
)
110+
]
111+
self._run_urls(specs)
112+

0 commit comments

Comments
 (0)
0