@@ -226,8 +226,8 @@ def char(self):
226
226
self .col += 1
227
227
return char
228
228
229
- def readChunk (self , chunkSize = 1024 ):
230
- data = self .dataStream .read (1024 )
229
+ def readChunk (self , chunkSize = 10240 ):
230
+ data = self .dataStream .read (chunkSize )
231
231
if not data :
232
232
return
233
233
#Replace null characters
@@ -250,18 +250,40 @@ def charsUntil(self, characters, opposite = False):
250
250
including any character in characters or EOF. characters can be
251
251
any container that supports the in method being called on it.
252
252
10000
code>
"""
253
- charStack = [self .char ()]
254
253
255
- while charStack [- 1 ] and (charStack [- 1 ] in characters ) == opposite :
256
- charStack .append (self .char ())
254
+ #This method is currently 40-50% of our total runtime and badly needs
255
+ #optimizing
256
+ #Possible improvements:
257
+ # - use regexp to find characters that match the required character set
258
+ # - compute line positions in a single pass at the end
259
+ # - improve EOF handling for fewer if statements
257
260
258
- # Put the character stopped on back to the front of the queue
259
- # from where it came.
260
- c = charStack . pop ()
261
- if c != EOF :
262
- self . unget ( c )
261
+ if not self . queue :
262
+ self . readChunk ()
263
+ #Break if we have reached EOF
264
+ if not self . queue or self . queue [ 0 ] == None :
265
+ return u""
263
266
264
- return u"" .join (charStack )
267
+ i = 0
268
+ while (self .queue [i ] in characters ) == opposite :
269
+ #Working out positions like this really sucks
270
+ if self .queue [i ] == '\n ' :
271
+ self .lineLengths .append (self .col )
272
+ self .line += 1
273
+ self .col = 0
274
+ else :
275
+ self .col += 1
276
+ i += 1
277
+ if i == len (self .queue ):
278
+ self .readChunk ()
279
+ #If the queue doesn't grow we have reached EOF
280
+ if i == len (self .queue ) or self .queue [i ] is EOF :
281
+ break
282
+
283
+ rv = u"" .join (self .queue [:i ])
284
+ self .queue = self .queue [i :]
285
+
286
+ return rv
265
287
266
288
def unget (self , chars ):
267
289
if chars :
0 commit comments