gui199
diff --git a/‎src/readability_lxml/readability.py
Lines changed: 19 additions & 4 deletions b/‎src/readability_lxml/readability.py
Lines changed: 19 additions & 4 deletions
diff --git a/‎src/tests/test_readability.py
Lines changed: 23 additions & 3 deletions b/‎src/tests/test_readability.py
Lines changed: 23 additions & 3 deletions
diff --git a/‎test_data/basic-multi-page-3.html
Lines changed: 0 additions & 60 deletions b/‎test_data/basic-multi-page-3.html
Lines changed: 0 additions & 60 deletions
@@ -9,7 +9,6 @@
 from collections import namedtuple
 from lxml.etree import tostring
 from lxml.etree import tounicode
-from lxml.html.diff import htmldiff
 from lxml.html import document_fromstring
 from lxml.html import fragment_fromstring
 
@@ -448,7 +447,11 @@ def get_raw_article(candidates, best_candidate, enclose_with_html_tag=True):
                 # We don't want to append directly to output, but the div
                 # in html->body->div
                 if enclose_with_html_tag:
-                    output.getchildren()[0].getchildren()[0].append(sibling)
+                    if sibling.tag == 'body':
+                        for elem in sibling.getchildren():
+                            output.getchildren()[0].getchildren()[0].append(elem)
+                    else:
+                        output.getchildren()[0].getchildren()[0].append(sibling)
                 else:
                     output.append(sibling)
 
@@ -824,8 +827,20 @@ def append_next_page(parsed_urls, page_url, doc, options):
         # page_doc is a singular element containing the page article elements.  We
         # want to add its children to the main article document to which we are
         # appending a page.
-        for elem in page_doc:
-            doc.append(elem)
+        if doc.tag == 'html':
+            children = doc.getchildren()
+            if children[0].tag == 'head':
+                import ipdb; ipdb.set_trace()
+                for elem in page_doc:
+                    doc.getchildren()[1].append(elem)
+            else:
+                import ipdb; ipdb.set_trace()
+                for elem in page_doc:
+                    doc.getchildren()[0].append(elem)
+        else:
+            import ipdb; ipdb.set_trace()
+            for elem in page_doc:
+                doc.append(elem)
     if next_page_url is not None:
         append_next_page(parsed_urls, next_page_url, doc, options)
 
 
@@ -1,6 +1,9 @@
 import os
 import unittest
 
+from lxml.html import document_fromstring
+from lxml.html.diff import htmldiff
+
 from helpers import load_regression_data
 from helpers import REGRESSION_DATA
 from readability_lxml.readability import Document
@@ -209,7 +212,24 @@ def test_basic(self):
                 'urlfetch': fetcher
                 }
         doc = Document(html, **options)
-        res = doc.summary()
+        res = doc.summary_with_metadata()
+
+        self.assertIn('Page 2', res.html, 'Should find the page 2 heading')
+        self.assertIn('Page 3', res.html, 'Should find the page 3 heading')
+
+        expected_html = load_regression_data('basic-multi-page-expected.html')
+        diff_html = htmldiff(expected_html, res.html)
+        diff_doc = document_fromstring(diff_html)
+
+        insertions = diff_doc.xpath('//ins')
+        deletions = diff_doc.xpath('//del')
+
+        if len(insertions) != 0:
+            for i in insertions:
+                print('unexpected insertion: %s' % i.xpath('string()'))
+            self.fail('readability result does not match expected')
 
-        self.assertIn('Page 2', res, 'Should find the page 2 heading')
-        self.assertIn('Page 3', res, 'Should find the page 3 heading')
+        if len(deletions) != 0:
+            for i in deletions:
+                print('unexpected deletion: %s' % i.xpath('string()'))
+            self.fail('readability result does not match expected')