gui199
diff --git a/‎src/tests/__init__.py
Lines changed: 0 additions & 1 deletion b/‎src/tests/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/tests/gen_test.py
Lines changed: 5 additions & 1 deletion b/‎src/tests/gen_test.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/tests/test_readability.py
Lines changed: 99 additions & 0 deletions b/‎src/tests/test_readability.py
Lines changed: 99 additions & 0 deletions
@@ -1 +0,0 @@
-from .readability import Document
@@ -8,20 +8,24 @@
 import errno
 import os
 import os.path
-import readability
 import sys
 import test
 import urllib2
 import yaml
 
+from readability_lxml import readability
+
+
 OVERWRITE_QUESTION = '%s exists; overwrite and continue (y/n)? '
 
+
 def y_or_n(question):
     while True:
         response = raw_input(question).strip()
         if len(response) > 0:
             return response[0] in ['y', 'Y']
 
+
 def write_file(test_name, suffix, data):
     path = os.path.join(test.TEST_DATA_PATH, test_name + suffix)
     mode = 0644
 
@@ -1,6 +1,7 @@
 import unittest
 
 from readability_lxml.readability import Document
+from readability_lxml import readability as r
 
 
 class TestReadabilityDocument(unittest.TestCase):
@@ -11,3 +12,101 @@ def test_none_input_raises_exception(self):
 
         doc = None
         self.assertRaises(ValueError, Document, doc)
+
+
+class TestFindBaseUrl(unittest.TestCase):
+
+    def setUp(self):
+        self.longMessage = True
+
+    def _assert_url(self, url, expected_base_url, msg = None):
+        actual_base_url = r.find_base_url(url)
+        self.assertEqual(expected_base_url, actual_base_url, msg)
+
+    def _run_urls(self, specs):
+        """
+        Asserts expected results on a sequence of specs, where each spec is a
+        pair: (URL, expected base URL).
+        """
+        for spec in specs:
+            url = spec[0]
+            expected = spec[1]
+            if len(spec) > 2:
+                msg = spec[2]
+            else:
+                msg = None
+            self._assert_url(url, expected, msg)
+
+    def test_none(self):
+        self._assert_url(None, None)
+
+    def test_no_change(self):
+        url = 'http://foo.com/article'
+        self._assert_url(url, url)
+
+    def test_extension_stripping(self):
+        specs = [
+                (
+                'http://foo.com/article.html',
+                'http://foo.com/article',
+                'extension should be stripped'
+                ),
+                (
+                'http://foo.com/path/to/article.html',
+                'http://foo.com/path/to/article',
+                'extension should be stripped'
+                ),
+                (
+                'http://foo.com/article.123not',
+                'http://foo.com/article.123not',
+                ),
+                (
+                'http://foo.com/path/to/article.123not',
+                'http://foo.com/path/to/article.123not',
+                '123not is not extension'
+                )
+                ]
+        self._run_urls(specs)
+
+    def test_ewcms(self):
+        self._assert_url(
+                'http://www.ew.com/ew/article/0,,20313460_20369436,00.html',
+                'http://www.ew.com/ew/article/0,,20313460_20369436'
+                )
+
+    def test_page_numbers(self):
+        specs = [
+                (
+                'http://foo.com/page5.html',
+                'http://foo.com',
+                'page number should be stripped'
+                ),
+                (
+                'http://foo.com/path/to/page5.html',
+                'http://foo.com/path/to',
+                'page number should be stripped'
+                ),
+                (
+                'http://foo.com/article-5.html',
+                'http://foo.com/article',
+                'page number should be stripped'
+                )
+                ]
+        self._run_urls(specs)
+
+    def test_numbers(self):
+        specs = [
+                (
+                'http://foo.com/5.html',
+                'http://foo.com',
+                'number should be stripped'
+                ),
+                (
+                'http://foo.com/path/to/5.html',
+                'http://foo.com/path/to',
+                'number should be stripped'
+                )
+                ]
+        self._run_urls(specs)
+