@@ -214,6 +214,7 @@ tokenizeriter_next(tokenizeriterobject *it)
214
214
215
215
const char * line_start = ISSTRINGLIT (type ) ? it -> tok -> multi_line_start : it -> tok -> line_start ;
216
216
PyObject * line = NULL ;
217
+ int line_changed = 1 ;
217
218
if (it -> tok -> tok_extra_tokens && is_trailing_token ) {
218
219
line = PyUnicode_FromString ("" );
219
220
} else {
@@ -228,12 +229,11 @@ tokenizeriter_next(tokenizeriterobject *it)
228
229
Py_XDECREF (it -> last_line );
229
230
line = PyUnicode_DecodeUTF8 (line_start , size , "replace" );
230
231
it -> last_line = line ;
231
- if (it -> tok -> lineno != it -> last_end_lineno ) {
232
- it -> byte_col_offset_diff = 0 ;
233
- }
232
+ it -> byte_col_offset_diff = 0 ;
234
233
} else {
235
234
// Line hasn't changed so we reuse the cached one.
236
235
line = it -> last_line ;
236
+ line_changed = 0 ;
237
237
}
238
238
}
239
239
if (line == NULL ) {
@@ -251,7 +251,13 @@ tokenizeriter_next(tokenizeriterobject *it)
251
251
Py_ssize_t byte_offset = -1 ;
252
252
if (token .start != NULL && token .start >= line_start ) {
253
253
byte_offset = token .start - line_start ;
254
- col_offset = byte_offset - it -> byte_col_offset_diff ;
254
+ if (line_changed ) {
255
+ col_offset = _PyPegen_byte_offset_to_character_offset_line (line , 0 , byte_offset );
256
+ it -> byte_col_offset_diff = byte_offset - col_offset ;
257
+ }
258
+ else {
259
+ col_offset = byte_offset - it -> byte_col_offset_diff ;
260
+ }
255
261
}
256
262
if (token .end != NULL && token .end >= it -> tok -> line_start ) {
257
263
Py_ssize_t end_byte_offset = token .end - it -> tok -> line_start ;
0 commit comments