@@ -36,7 +36,10 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
36
36
# List of where new lines occur
37
37
self .newLines = [0 ]
38
38
39
- # Raw Stream
39
+ self .charEncoding = encoding
40
+
41
+ # Raw Stream - for unicode objects this will encode to utf-8 and set
42
+ # self.charEncoding as appropriate
40
43
self .rawStream = self .openStream (source )
41
44
42
45
# Encoding Information
@@ -49,9 +52,8 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
49
52
self .defaultEncoding = "windows-1252"
50
53
51
54
#Detect encoding iff no explicit "transport level" encoding is supplied
52
- if encoding is None or not isValidEncoding (encoding ):
53
- encoding = self .detectEncoding (parseMeta , chardet )
54
- self .charEncoding = encoding
55
+ if self .charEncoding is None or not isValidEncoding (self .charEncoding ):
56
+ self .charEncoding = self .detectEncoding (parseMeta , chardet )
55
57
56
58
self .dataStream = codecs .getreader (self .charEncoding )(self .rawStream , 'replace' )
57
59
@@ -74,6 +76,7 @@ def openStream(self, source):
74
76
# Otherwise treat source as a string and convert to a file object
75
77
if isinstance (source , unicode ):
76
78
source = source .encode ('utf-8' )
79
+ self .charEncoding = "utf-8"
77
80
import cStringIO
78
81
stream = cStringIO .StringIO (str (source ))
79
82
return stream
0 commit comments