@@ -87,7 +87,7 @@ def detectEncoding(self, parseMeta=True, chardet=True):
87
87
import chardet
88
88
buffer = self .rawStream .read ()
89
89
encoding = chardet .detect (buffer )['encoding' ]
90
- self .rawStream = self . openStream (buffer )
90
+ self .seek (buffer , 0 )
91
91
except ImportError :
92
92
pass
93
93
# If all else fails use the default encoding
@@ -127,18 +127,50 @@ def detectBOM(self):
127
127
seek = 2
128
128
129
129
130
- #AT - move this to the caller?
131
- # Set the read position past the BOM if one was found, otherwise
132
- # set it to the start of the stream
133
- self .rawStream .seek (encoding and seek or 0 )
130
+ self .seek (string , encoding and seek or 0 )
134
131
135
132
return encoding
136
133
134
+ def seek (self , buffer , n ):
135
+ """Unget buffer[n:]"""
136
+ if hasattr (self .rawStream , 'unget' ):
137
+ self .rawStream .unget (buffer [n :])
138
+ return
139
+
140
+ try :
141
+ self .rawStream .seek (n )
142
+ except IOError :
143
+ class BufferedStream :
144
+ def __init__ (self , data , stream ):
145
+ self .data = data
146
+ self .stream = stream
147
+ def read (self , chars = - 1 ):
148
+ if chars == - 1 or chars > len (self .data ):
149
+ result = self .data
150
+ self .data = ''
151
+ if chars == - 1 :
152
+ return result + self .stream .read ()
153
+ else :
154
+ return result + self .stream .read (chars - len (result ))
155
+ elif not self .data :
156
+ return self .stream .read (chars )
157
+ else :
158
+ result = self .data [:chars ]
159
+ self .data = self .data [chars :]
160
+ return result
161
+ def unget (self , data ):
162
+ if self .data :
163
+ self .data += data
164
+ else :
165
+ self .data = data
166
+ self .rawStream = BufferedStream (buffer [n :], self .rawStream )
167
+
137
168
def detectEncodingMeta (self ):
138
169
"""Report the encoding declared by the meta element
139
170
"""
140
- parser = EncodingParser (self .rawStream .read (self .numBytesMeta ))
141
- self .rawStream .seek (0 )
171
+ buffer = self .rawStream .read (self .numBytesMeta )
172
+ parser = EncodingParser (buffer )
173
+ self .seek (buffer , 0 )
142
174
return parser .getEncoding ()
143
175
144
176
def position (self ):
@@ -195,18 +227,9 @@ def charsUntil(self, characters, opposite = False):
195
227
# Put the character stopped on back to the front of the queue
196
228
# from where it came.
197
229
c = charStack .pop ()
198
- if c != EOF :
199
- self .queue .insert (0 , c )
230
+ if c != EOF :
231
+ self .queue .insert (0 , c )
200
232
201
- # XXX the following is need for correct line number reporting apparently
202
- # but it causes to break other tests with the fixes in tokenizer. I have
203
- # no idea why...
204
- #
205
- #if c != EOF and self.tell <= len(self.dataStream) and \
206
- # self.dataStream[self.tell - 1] == c[0]:
207
- # self.tell -= 1
208
- #else:
209
<
497C
code> - # self.queue.insert(0, c)
210
233
return u"" .join (charStack )
211
234
212
235
class EncodingBytes (str ):
0 commit comments