@@ -131,31 +131,6 @@ STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
131
131
return is_head_of_identifier (lex ) || is_digit (lex );
132
132
}
133
133
134
- #if MICROPY_PY_FSTRING
135
- STATIC void swap_char_banks (mp_lexer_t * lex ) {
136
- if (lex -> vstr_postfix_processing ) {
137
- lex -> chr3 = lex -> chr0 ;
138
- lex -> chr4 = lex -> chr1 ;
139
- lex -> chr5 = lex -> chr2 ;
140
- lex -> chr0 = lex -> vstr_postfix .buf [0 ];
141
- lex -> chr1 = lex -> vstr_postfix .buf [1 ];
142
- lex -> chr2 = lex -> vstr_postfix .buf [2 ];
143
-
144
- lex -> vstr_postfix_idx = 3 ;
145
- } else {
146
- // blindly reset to the "backup" bank when done postfix processing
147
- // this restores control to the mp_reader
148
- lex -> chr0 = lex -> chr3 ;
149
- lex -> chr1 = lex -> chr4 ;
150
- lex -> chr2 = lex -> chr5 ;
151
- // willfully ignoring setting chr3-5 here - WARNING consider those garbage data now
152
-
153
- vstr_reset (& lex -> vstr_postfix );
154
- lex -> vstr_postfix_idx = 0 ;
155
- }
156
- }
157
- #endif
158
-
159
134
STATIC void next_char (mp_lexer_t * lex ) {
160
135
if (lex -> chr0 == '\n' ) {
161
136
// a new line
@@ -169,15 +144,29 @@ STATIC void next_char(mp_lexer_t *lex) {
169
144
++ lex -> column ;
170
145
}
171
146
147
+ // shift the input queue forward
172
148
lex -> chr0 = lex -> chr1 ;
173
149
lex -> chr1 = lex -> chr2 ;
174
150
151
+ // and add the next byte from either the fstring args or the reader
175
152
#if MICROPY_PY_FSTRING
176
- if (lex -> vstr_postfix_processing ) {
177
- if (lex -> vstr_postfix_idx == lex -> vstr_postfix .len ) {
178
- lex -> chr2 = '\0' ;
153
+ if (lex -> fstring_args_idx ) {
154
+ // if there are saved chars, then we're currently injecting fstring args
155
+ if (lex -> fstring_args_idx < lex -> fstring_args .len ) {
156
+ lex -> chr2 = lex -> fstring_args .buf [lex -> fstring_args_idx ++ ];
179
157
} else {
180
- lex -> chr2 = lex -> vstr_postfix .buf [lex -> vstr_postfix_idx ++ ];
158
+ // no more fstring arg bytes
159
+ lex -> chr2 = '\0' ;
160
+ }
161
+
162
+ if (lex -> chr0 == '\0' ) {
163
+ // consumed all fstring data, restore saved input queue
164
+ lex -> chr0 = lex -> chr0_saved ;
165
+ lex -> chr1 = lex -> chr1_saved ;
166
+ lex -> chr2 = lex -> chr2_saved ;
167
+ // stop consuming fstring arg data
168
+ vstr_reset (& lex -> fstring_args );
169
+ lex -> fstring_args_idx = 0 ;
181
170
}
182
171
} else
183
172
#endif
@@ -198,13 +187,6 @@ STATIC void next_char(mp_lexer_t *lex) {
198
187
if (lex -> chr2 == MP_LEXER_EOF && lex -> chr1 != MP_LEXER_EOF && lex -> chr1 != '\n' ) {
199
188
lex -> chr2 = '\n' ;
200
189
}
201
-
202
- #if MICROPY_PY_FSTRING
203
- if (lex -> vstr_postfix_processing && lex -> chr0 == '\0' ) {
204
- lex -> vstr_postfix_processing = false;
205
- swap_char_banks (lex );
206
- }
207
- #endif
208
190
}
209
191
210
192
STATIC void indent_push (mp_lexer_t * lex , size_t indent ) {
@@ -350,8 +332,13 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
350
332
351
333
size_t n_closing = 0 ;
352
334
#if MICROPY_PY_FSTRING
353
- bool in_expression = false;
354
- bool expression_eat = true;
335
+ if (is_fstring ) {
336
+ // assume there's going to be interpolation, so prep the injection data
337
+ // fstring_args_idx==0 && len(fstring_args)>0 means we're extracting the args.
338
+ // only when fstring_args_idx>0 will we consume the arg data
339
+ // note: lex->fstring_args will be empty already (it's reset when finished)
340
+ vstr_add_str (& lex -> fstring_args , ".format(" );
341
+ }
355
342
#endif
356
343
357
344
while (!is_end (lex ) && (num_quotes > 1 || !is_char (lex , '\n' )) && n_closing < num_quotes ) {
@@ -362,49 +349,36 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
362
349
n_closing = 0 ;
363
350
364
351
#if MICROPY_PY_FSTRING
365
- if (is_fstring && is_char (lex , '{' )) {
366
- vstr_add_char (& lex -> vstr , CUR_CHAR (lex ));
367
- in_expression = !in_expression ;
368
- expression_eat = in_expression ;
369
-
370
- if (lex -> vstr_postfix .len == 0 ) {
371
- vstr_add_str (& lex -> vstr_postfix , ".format(" );
372
- }
373
-
352
+ while (is_fstring && is_char (lex , '{' )) {
374
353
next_char (lex );
375
- continue ;
376
- }
377
-
378
- if (is_fstring && is_char (lex , '}' )) {
379
- vstr_add_char (& lex -> vstr , CUR_CHAR (lex ));
380
-
381
- if (in_expression ) {
382
- in_expression = false;
383
- vstr_add_char (& lex -> vstr_postfix , ',' );
384
- }
385
-
386
- next_char (lex );
387
- continue ;
388
- }
389
-
390
- if (in_expression ) {
391
- // throw errors for illegal chars inside f-string expressions
392
- if (is_char (lex , '#' ) || is_char (lex , '\\' )) {
393
- lex -> tok_kind = MP_TOKEN_MALFORMED_FSTRING ;
394
- return ;
395
- } else if (is_char (lex , ':' )) {
396
- expression_eat = false;
397
- }
398
-
399
- unichar c = CUR_CHAR (lex );
400
- if (expression_eat ) {
401
- vstr_add_char (& lex -> vstr_postfix , c );
354
+ if (is_char (lex , '{' )) {
355
+ // "{{" is passed through unchanged to be handled by str.format
356
+ vstr_add_byte (& lex -> vstr , '{' );
357
+ next_char (lex );
402
358
} else {
403
- vstr_add_char (& lex -> vstr , c );
359
+ // remember the start of this argument (if we need it for f'{a=}').
360
+ size_t i = lex -> fstring_args .len ;
361
+ // extract characters inside the { until we reach the
362
+ // format specifier or closing }.
363
+ // (MicroPython limitation) note: this is completely unaware of
364
+ // Python syntax and will not handle any expression containing '}' or ':'.
365
+ // e.g. f'{"}"}' or f'{foo({})}'.
366
+ while (!is_end (lex ) && !is_char_or (lex , ':' , '}' )) {
367
+ // like the default case at the end of this function, stay 8-bit clean
368
+ vstr_add_byte (& lex -> fstring_args , CUR_CHAR (lex ));
369
+ next_char (lex );
370
+ }
371
+ if (lex -> fstring_args .buf [lex -> fstring_args .len - 1 ] == '=' ) {
372
+ // if the last character of the arg was '=', then inject "arg=" before the '{'.
373
+ // f'{a=}' --> 'a={}'.format(a)
374
+ vstr_add_strn (& lex -> vstr , lex -> fstring_args .buf + i , lex -> fstring_args .len - i );
375
+ // remove the trailing '='
376
+ lex -> fstring_args .len -- ;
377
+ }
378
+ // comma-separate args
379
+ vstr_add_byte (& lex -> fstring_args , ',' );
404
380
}
405
-
406
- next_char (lex );
407
- continue ;
381
+ vstr_add_byte (& lex -> vstr , '{' );
408
382
}
409
383
#endif
410
384
@@ -562,11 +536,19 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
562
536
563
537
void mp_lexer_to_next (mp_lexer_t * lex ) {
564
538
#if MICROPY_PY_FSTRING
565
- if (lex -> vstr_postfix .len && !lex -> vstr_postfix_processing ) {
566
- // end format call injection
567
- vstr_add_char (& lex -> vstr_postfix , ')' );
568
- lex -> vstr_postfix_processing = true;
569
- swap_char_banks (lex );
539
+ if (lex -> fstring_args .len && lex -> fstring_args_idx == 0 ) {
540
+ // moving onto the next token means the literal string is complete.
541
+ // switch into injecting the format args.
542
+ vstr_add_byte (& lex -> fstring_args , ')' );
543
+ lex -> chr0_saved = lex -> chr0 ;
544
+ lex -> chr1_saved = lex -> chr1 ;
545
+ lex -> chr2_saved = lex -> chr2 ;
546
+ lex -> chr0 = lex -> fstring_args .buf [0 ];
547
+ lex -> chr1 = lex -> fstring_args .buf [1 ];
548
+ lex -> chr2 = lex -> fstring_args .buf [2 ];
549
+ // we've already extracted 3 chars, but setting this non-zero also
550
+ // means we'll start consuming the fstring data
551
+ lex -> fstring_args_idx = 3 ;
570
552
}
571
553
#endif
572
554
@@ -646,6 +628,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
646
628
}
647
629
#if MICROPY_PY_FSTRING
648
630
if (is_char_following (lex , 'f' )) {
631
+ // raw-f-strings unsupported, immediately return (invalid) token.
649
632
lex -> tok_kind = MP_TOKEN_FSTRING_RAW ;
650
633
break ;
651
634
}
@@ -654,6 +637,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
654
637
#if MICROPY_PY_FSTRING
655
638
else if (is_char (lex , 'f ')) {
656
639
if (is_char_following (lex , 'r' )) {
640
+ // raw-f-strings unsupported, immediately return (invalid) token.
657
641
lex -> tok_kind = MP_TOKEN_FSTRING_RAW ;
658
642
break ;
659
643
}
@@ -839,7 +823,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
839
823
lex -> indent_level = m_new (uint16_t , lex -> alloc_indent_level );
840
824
vstr_init (& lex -> vstr , 32 );
841
825
#if MICROPY_PY_FSTRING
842
- vstr_init (& lex -> vstr_postfix , 0 );
826
+ vstr_init (& lex -> fstring_args , 0 );
843
827
#endif
844
828
845
829
// store sentinel for first indentation level
0 commit com 319B ments