This repository was archived by the owner on Jan 4, 2022. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Original file line number Diff line number Diff line change 5
5
syntax that can only be solved by conditionally importing different functions.
6
6
"""
7
7
import sys
8
+ from lxml .etree import tostring
8
9
9
10
if sys .version_info [0 ] == 2 :
10
11
bytes_ = str
11
12
str_ = unicode
13
+ def tostring_ (s ):
14
+ return tostring (s , encoding = 'utf-8' ).decode ('utf-8' )
12
15
13
16
elif sys .version_info [0 ] == 3 :
14
17
bytes_ = bytes
15
18
str_ = str
19
+ def tostring_ (s ):
20
+ return tostring (s , encoding = 'utf-8' )
Original file line number Diff line number Diff line change 4
4
import re
5
5
import sys
6
6
7
- from lxml .etree import tostring
8
7
from lxml .etree import tounicode
9
8
from lxml .html import document_fromstring
10
9
from lxml .html import fragment_fromstring
15
14
from .htmls import get_body
16
15
from .htmls import get_title
17
16
from .htmls import shorten_title
18
- from .compat import str_ , bytes_
17
+ from .compat import str_ , bytes_ , tostring_
19
18
from .debug import describe , text_content
20
19
21
20
@@ -464,7 +463,7 @@ def transform_misused_divs_into_paragraphs(self):
464
463
# This results in incorrect results in case there is an <img>
465
464
# buried within an <a> for example
466
465
if not REGEXES ["divToPElementsRe" ].search (
467
- str_ (b"" .join (map (lambda it : tostring ( it , encoding = "utf-8" ) , list (elem ))))
466
+ str_ (b"" .join (map (tostring_ , list (elem ))))
468
467
):
469
468
# log.debug("Altering %s to p" % (describe(elem)))
470
469
elem .tag = "p"
You can’t perform that action at this time.
0 commit comments