@@ -62,6 +62,12 @@ STATIC bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
62
62
return lex -> chr0 == c1 || lex -> chr0 == c2 || lex -> chr0 == c3 ;
63
63
}
64
64
65
+ #if MICROPY_PY_FSTRING
66
+ STATIC bool is_char_or4 (mp_lexer_t * lex , byte c1 , byte c2 , byte c3 , byte c4 ) {
67
+ return lex -> chr0 == c1 || lex -> chr0 == c2 || lex -> chr0 == c3 || lex -> chr0 == c4 ;
68
+ }
69
+ #endif
70
+
65
71
STATIC bool is_char_following (mp_lexer_t * lex , byte c ) {
66
72
return lex -> chr1 == c ;
67
73
}
@@ -105,7 +111,13 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {
105
111
106
112
STATIC bool is_string_or_bytes (mp_lexer_t * lex ) {
107
113
return is_char_or (lex , '\'' , '\"' )
114
+ #if MICROPY_PY_FSTRING
115
+ || (is_char_or4 (lex , 'r' , 'u' , 'b' , 'f' ) && is_char_following_or (lex , '\'' , '\"' ))
116
+ || (((is_char_and (lex , 'r' , 'f' ) || is_char_and (lex , 'f' , 'r' ))
117
+ && is_char_following_following_or (lex , '\'' , '\"' )))
118
+ #else
108
119
|| (is_char_or3 (lex , 'r' , 'u' , 'b' ) && is_char_following_or (lex , '\'' , '\"' ))
120
+ #endif
109
121
|| ((is_char_and (lex , 'r' , 'b' ) || is_char_and (lex , 'b' , 'r' ))
110
122
&& is_char_following_following_or (lex , '\'' , '\"' ));
111
123
}
@@ -132,9 +144,35 @@ STATIC void next_char(mp_lexer_t *lex) {
132
144
++ lex -> column ;
133
145
}
134
146
147
+ // shift the input queue forward
135
148
lex -> chr0 = lex -> chr1 ;
136
149
lex -> chr1 = lex -> chr2 ;
137
- lex -> chr2 = lex -> reader .readbyte (lex -> reader .data );
150
+
151
+ // and add the next byte from either the fstring args or the reader
152
+ #if MICROPY_PY_FSTRING
153
+ if (lex -> fstring_args_idx ) {
154
+ // if there are saved chars, then we're currently injecting fstring args
155
+ if (lex -> fstring_args_idx < lex -> fstring_args .len ) {
156
+ lex -> chr2 = lex -> fstring_args .buf [lex -> fstring_args_idx ++ ];
157
+ } else {
158
+ // no more fstring arg bytes
159
+ lex -> chr2 = '\0' ;
160
+ }
161
+
162
+ if (lex -> chr0 == '\0' ) {
163
+ // consumed all fstring data, restore saved input queue
164
+ lex -> chr0 = lex -> chr0_saved ;
165
+ lex -> chr1 = lex -> chr1_saved ;
166
+ lex -> chr2 = lex -> chr2_saved ;
167
+ // stop consuming fstring arg data
168
+ vstr_reset (& lex -> fstring_args );
169
+ lex -> fstring_args_idx = 0 ;
170
+ }
171
+ } else
172
+ #endif
173
+ {
174
+ lex -> chr2 = lex -> reader .readbyte (lex -> reader .data );
175
+ }
138
176
139
177
if (lex -> chr1 == '\r' ) {
140
178
// CR is a new line, converted to LF
@@ -272,7 +310,7 @@ STATIC bool get_hex(mp_lexer_t *lex, size_t num_digits, mp_uint_t *result) {
272
310
return true;
273
311
}
274
312
275
- STATIC void parse_string_literal (mp_lexer_t * lex , bool is_raw ) {
313
+ STATIC void parse_string_literal (mp_lexer_t * lex , bool is_raw , bool is_fstring ) {
276
314
// get first quoting character
277
315
char quote_char = '\'' ;
278
316
if (is_char (lex , '\"' )) {
@@ -293,15 +331,61 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw) {
293
331
}
294
332
295
333
size_t n_closing = 0 ;
334
+ #if MICROPY_PY_FSTRING
335
+ if (is_fstring ) {
336
+ // assume there's going to be interpolation, so prep the injection data
337
+ // fstring_args_idx==0 && len(fstring_args)>0 means we're extracting the args.
338
+ // only when fstring_args_idx>0 will we consume the arg data
339
+ // note: lex->fstring_args will be empty already (it's reset when finished)
340
+ vstr_add_str (& lex -> fstring_args , ".format(" );
341
+ }
342
+ #endif
343
+
296
344
while (!is_end (lex ) && (num_quotes > 1 || !is_char (lex , '\n' )) && n_closing < num_quotes ) {
297
345
if (is_char (lex , quote_char )) {
298
346
n_closing += 1 ;
299
347
vstr_add_char (& lex -> vstr , CUR_CHAR (lex ));
300
348
} else {
301
349
n_closing = 0 ;
350
+
351
+ #if MICROPY_PY_FSTRING
352
+ while (is_fstring && is_char (lex , '{' )) {
353
+ next_char (lex );
354
+ if (is_char (lex , '{' )) {
355
+ // "{{" is passed through unchanged to be handled by str.format
356
+ vstr_add_byte (& lex -> vstr , '{' );
357
+ next_char (lex );
358
+ } else {
359
+ // remember the start of this argument (if we need it for f'{a=}').
360
+ size_t i = lex -> fstring_args .len ;
361
+ // extract characters inside the { until we reach the
362
+ // format specifier or closing }.
363
+ // (MicroPython limitation) note: this is completely unaware of
364
+ // Python syntax and will not handle any expression containing '}' or ':'.
365
+ // e.g. f'{"}"}' or f'{foo({})}'.
366
+ while (!is_end (lex ) && !is_char_or (lex , ':' , '}' )) {
367
+ // like the default case at the end of this function, stay 8-bit clean
368
+ vstr_add_byte (& lex -> fstring_args , CUR_CHAR (lex ));
369
+ next_char (lex );
370
+ }
371
+ if (lex -> fstring_args .buf [lex -> fstring_args .len - 1 ] == '=' ) {
372
+ // if the last character of the arg was '=', then inject "arg=" before the '{'.
373
+ // f'{a=}' --> 'a={}'.format(a)
374
+ vstr_add_strn (& lex -> vstr , lex -> fstring_args .buf + i , lex -> fstring_args .len - i );
375
+ // remove the trailing '='
376
+ lex -> fstring_args .len -- ;
377
+ }
378
+ // comma-separate args
379
+ vstr_add_byte (& lex -> fstring_args , ',' );
380
+ }
381
+ vstr_add_byte (& lex -> vstr , '{' );
382
+ }
383
+ #endif
384
+
302
385
if (is_char (lex , '\\' )) {
303
386
next_char (lex );
304
387
unichar c = CUR_CHAR (lex );
388
+
305
389
if (is_raw ) {
306
390
// raw strings allow escaping of quotes, but the backslash is also emitted
307
391
vstr_add_char (& lex -> vstr , '\\' );
@@ -451,6 +535,23 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
451
<
F438
code>535 }
452
536
453
537
void mp_lexer_to_next (mp_lexer_t * lex ) {
538
+ #if MICROPY_PY_FSTRING
539
+ if (lex -> fstring_args .len && lex -> fstring_args_idx == 0 ) {
540
+ // moving onto the next token means the literal string is complete.
541
+ // switch into injecting the format args.
542
+ vstr_add_byte (& lex -> fstring_args , ')' );
543
+ lex -> chr0_saved = lex -> chr0 ;
544
+ lex -> chr1_saved = lex -> chr1 ;
545
+ lex -> chr2_saved = lex -> chr2 ;
546
+ lex -> chr0 = lex -> fstring_args .buf [0 ];
547
+ lex -> chr1 = lex -> fstring_args .buf [1 ];
548
+ lex -> chr2 = lex -> fstring_args .buf [2 ];
549
+ // we've already extracted 3 chars, but setting this non-zero also
550
+ // means we'll start consuming the fstring data
551
+ lex -> fstring_args_idx = 3 ;
552
+ }
553
+ #endif
554
+
454
555
// start new token text
455
556
vstr_reset (& lex -> vstr );
456
557
@@ -506,6 +607,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
506
607
do {
507
608
// parse type codes
508
609
bool is_raw = false;
610
+ bool is_fstring = false;
509
611
mp_token_kind_t kind = MP_TOKEN_STRING ;
510
612
int n_char = 0 ;
511
613
if (is_char (lex , 'u' )) {
@@ -524,7 +626,25 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
524
626
kind = MP_TOKEN_BYTES ;
525
627
n_char = 2 ;
526
628
}
629
+ #if MICROPY_PY_FSTRING
630
+ if (is_char_following (lex , 'f' )) {
631
+ // raw-f-strings unsupported, immediately return (invalid) token.
632
+ lex -> tok_kind = MP_TOKEN_FSTRING_RAW ;
633
+ break ;
634
+ }
635
+ #endif
636
+ }
637
+ #if MICROPY_PY_FSTRING
638
+ else if (is_char (lex , 'f ')) {
639
+ if (is_char_following (lex , 'r' )) {
640
+ // raw-f-strings unsupported, immediately return (invalid) token.
641
+ lex -> tok_kind = MP_TOKEN_FSTRING_RAW ;
642
+ break ;
643
+ }
644
+ n_char = 1 ;
645
+ is_fstring = true;
527
646
}
647
+ #endif
528
648
529
649
// Set or check token kind
530
650
if (lex -> tok_kind == MP_TOKEN_END ) {
@@ -543,13 +663,12 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
543
663
}
544
664
545
665
// Parse the literal
546
- parse_string_literal (lex , is_raw );
666
+ parse_string_literal (lex , is_raw , is_fstring );
547
667
548
668
// Skip whitespace so we can check if there's another string following
549
669
skip_whitespace (lex , true);
550
670
551
671
} while (is_string_or_bytes (lex ));
552
-
553
672
} else if (is_head_of_identifier (lex )) {
554
673
lex -> tok_kind = MP_TOKEN_NAME ;
555
674
@@ -703,6 +822,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
703
822
lex -> num_indent_level = 1 ;
704
823
lex -> indent_level = m_new (uint16_t , lex -> alloc_indent_level );
705
824
vstr_init (& lex -> vstr , 32 );
825
+ #if MICROPY_PY_FSTRING
826
+ vstr_init (& lex -> fstring_args , 0 );
827
+ #endif
706
828
707
829
// store sentinel for first indentation level
708
830
lex -> indent_level [0 ] = 0 ;
0 commit comments