@@ -207,24 +207,25 @@ def char(self):
207
207
EOF when EOF is reached.
208
208
"""
209
209
if self .queue :
210
- return self .queue .pop (0 )
210
+ char = self .queue .pop (0 )
211
+ if char == "\n " :
212
+ self .lineLengths .append (self .col )
213
+ self .line += 1
214
+ self .col = 0
215
+ return char
211
216
else :
212
- c = self .dataStream .read (1 , 1 )
213
- if not c :
214
- self .col += 1
215
- return EOF
216
-
217
- # Normalize newlines and null characters
218
- if c == '\x00 ' :
219
- self .errors .append ('null character found in input stream, '
220
- 'replaced with U+FFFD' )
221
- c = u'\uFFFD '
217
+ c = self .readChar ()
218
+ if c is EOF :
219
+ return c
220
+
222
221
if c == '\r ' :
223
222
#XXX This isn't right in the case with multiple CR in a row
224
223
#also recursing here isn't ideal + not sure what happens to input position
225
- c = self .char ()
226
- if c and c != '\n ' :
224
+ c = self .readChar ()
225
+ if c is not EOF and c not in ( '\n ' , ' \r ' ) :
227
226
self .queue .insert (0 , unicode (c ))
227
+ elif c == '\r ' :
228
+ self .queue .insert (0 , u'\n ' )
228
229
c = '\n '
229
230
230
231
# update position in stream
@@ -236,6 +237,21 @@ def char(self):
236
237
self .col += 1
237
238
return unicode (c )
238
239
240
+ def readChar (self ):
241
+ """Read the next character from the datastream and normalize for null
242
+ but not for CR"""
243
+ c = self .dataStream .read (1 , 1 )
244
+ if not c :
245
+ self .col += 1
246
+ return EOF
247
+
248
+ # Normalize newlines and null characters
249
+ if c == '\x00 ' :
250
+ self .errors .append ('null character found in input stream, '
251
+ 'replaced with U+FFFD' )
252
+ c = u'\uFFFD '
253
+ return c
254
+
239
255
def charsUntil (self , characters , opposite = False ):
240
256
""" Returns a string of characters from the stream up to but not
241
257
including any character in characters or EOF. characters can be
0 commit comments