8000 py/parse: Break rule data into separate act and arg arrays. · neverhover/micropython@845511a · GitHub
[go: up one dir, main page]

Skip to content

Commit 845511a

Browse files
committed
py/parse: Break rule data into separate act and arg arrays.
Instead of each rule being stored in ROM as a struct with rule_id, act and arg, the act and arg parts are now in separate arrays and the rule_id part is removed because it's not needed. This reduces code size, by roughly one byte per grammar rule, around 150 bytes.
1 parent 1039c5e commit 845511a

File tree

1 file changed

+57
-27
lines changed

1 file changed

+57
-27
lines changed

py/parse.c

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
typedef struct _rule_t {
6262
byte rule_id;
6363
byte act;
64-
uint16_t arg[];
64+
const uint16_t *arg;
6565
} rule_t;
6666

6767
enum {
@@ -81,42 +81,64 @@ enum {
8181
#undef DEF_RULE_NC
8282
};
8383

84+
// Define an array of actions corresponding to each rule
85+
STATIC const uint8_t rule_act_table[] = {
8486
#define or(n) (RULE_ACT_OR | n)
8587
#define and(n) (RULE_ACT_AND | n)
8688
#define and_ident(n) (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
8789
#define and_blank(n) (RULE_ACT_AND | n | RULE_ACT_ADD_BLANK)
8890
#define one_or_more (RULE_ACT_LIST | 2)
8991
#define list (RULE_ACT_LIST | 1)
9092
#define list_with_end (RULE_ACT_LIST | 3)
91-
#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
92-
#define rule(r) (RULE_ARG_RULE | RULE_##r)
93-
#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
94-
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
95-
#define DEF_RULE_NC(rule, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
93+
94+
#define DEF_RULE(rule, comp, kind, ...) kind,
95+
#define DEF_RULE_NC(rule, kind, ...)
9696
#include "py/grammar.h"
97+
#undef DEF_RULE
98+
#undef DEF_RULE_NC
99+
100+
0, // RULE_const_object
101+
102+
#define DEF_RULE(rule, comp, kind, ...)
103+
#define DEF_RULE_NC(rule, kind, ...) kind,
104+
#include "py/grammar.h"
105+
#undef DEF_RULE
106+
#undef DEF_RULE_NC
107+
97108
#undef or
98109
#undef and
110+
#undef and_ident
111+
#undef and_blank
112+
#undef one_or_more
99113
#undef list
100114
#undef list_with_end
115+
};
116+
117+
// Define the argument data for each rule
118+
#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
119+
#define rule(r) (RULE_ARG_RULE | RULE_##r)
120+
#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
121+
122+
#define DEF_RULE(rule, comp, kind, ...) static const uint16_t const rule_arg_##rule[] = { __VA_ARGS__ };
123+
#define DEF_RULE_NC(rule, kind, ...) static const uint16_t const rule_arg_##rule[] = { __VA_ARGS__ };
124+
#include "py/grammar.h"
125+
#undef DEF_RULE
126+
#undef DEF_RULE_NC
127+
101128
#undef tok
102129
#undef rule
103130
#undef opt_rule
104-
#undef one_or_more
105-
#undef DEF_RULE
106-
#undef DEF_RULE_NC
107131

108-
STATIC const rule_t *const rules[] = {
109-
// define rules with a compile function
110-
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
132+
// Define an array of pointers to corresponding rule data
133+
STATIC const uint16_t *const rule_arg_table[] = {
134+
#define DEF_RULE(rule, comp, kind, ...) &rule_arg_##rule[0],
111135
#define DEF_RULE_NC(rule, kind, ...)
112136
#include "py/grammar.h"
113137
#undef DEF_RULE
114138
#undef DEF_RULE_NC
115139
NULL, // RULE_const_object
116-
117-
// define rules without a compile function
118140
#define DEF_RULE(rule, comp, kind, ...)
119-
#define DEF_RULE_NC(rule, kind, ...) &rule_##rule,
141+
#define DEF_RULE_NC(rule, kind, ...) &rule_arg_##rule[0],
120142
#include "py/grammar.h"
121143
#undef DEF_RULE
122144
#undef DEF_RULE_NC
@@ -139,6 +161,10 @@ STATIC const char *const rule_name_table[] = {
139161
};
140162
#endif
141163

164+
#if (MICROPY_COMP_CONST_FOLDING && MICROPY_COMP_CONST) || !MICROPY_ENABLE_DOC_STRING
165+
static const rule_t rule_pass_stmt = {RULE_pass_stmt, (RULE_ACT_AND | 1), &rule_arg_pass_stmt[0]};
166+
#endif
167+
142168
typedef struct _rule_stack_t {
143169
size_t src_line : 8 * sizeof(size_t) - 8; // maximum bits storing source line number
144170
size_t rule_id : 8; // this must be large enough to fit largest rule number
@@ -214,27 +240,29 @@ STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
214240
return ret;
215241
}
216242

217-
STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) {
243+
STATIC void push_rule(parser_t *parser, size_t src_line, uint8_t rule_id, size_t arg_i) {
218244
if (parser->rule_stack_top >= parser->rule_stack_alloc) {
219245
rule_stack_t *rs = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
220246
parser->rule_stack = rs;
221247
parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
222248
}
223249
rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
224250
rs->src_line = src_line;
225-
rs->rule_id = rule->rule_id;
251+
rs->rule_id = rule_id;
226252
rs->arg_i = arg_i;
227253
}
228254

229255
STATIC void push_rule_from_arg(parser_t *parser, size_t arg) {
230256
assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
231257
size_t rule_id = arg & RULE_ARG_ARG_MASK;
232-
push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
258+
push_rule(parser, parser->lexer->tok_line, rule_id, 0);
233259
}
234260

235-
STATIC void pop_rule(parser_t *parser, const rule_t **rule, size_t *arg_i, size_t *src_line) {
261+
STATIC void pop_rule(parser_t *parser, rule_t *rule, size_t *arg_i, size_t *src_line) {
236262
parser->rule_stack_top -= 1;
237-
*rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
263+
rule->rule_id = parser->rule_stack[parser->rule_stack_top].rule_id;
264+
rule->act = rule_act_table[rule->rule_id];
265+
rule->arg = rule_arg_table[rule->rule_id];
238266
*arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
239267
*src_line = parser->rule_stack[parser->rule_stack_top].src_line;
240268
}
@@ -656,7 +684,7 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
656684
if (qstr_str(id)[0] == '_') {
657685
pop_result(parser); // pop const(value)
658686
pop_result(parser); // pop id
659-
push_result_rule(parser, 0, rules[RULE_pass_stmt], 0); // replace with "pass"
687+
push_result_rule(parser, 0, &rule_pass_stmt, 0); // replace with "pass"
660688
return true;
661689
}
662690

@@ -782,7 +810,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
782810
case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
783811
default: top_level_rule = RULE_file_input;
784812
}
785-
push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
813+
push_rule(&parser, lex->tok_line, top_level_rule, 0);
786814

787815
// parse!
788816

@@ -797,7 +825,9 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
797825
break;
798826
}
799827

800-
pop_rule(&parser, &rule, &i, &rule_src_line);
828+
rule_t rule_data;
829+
pop_rule(&parser, &rule_data, &i, &rule_src_line);
830+
rule = &rule_data;
801831
n = rule->act & RULE_ACT_ARG_MASK;
802832

803833
/*
@@ -827,7 +857,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
827857
} else {
828858
assert(kind == RULE_ARG_RULE);
829859
if (i + 1 < n) {
830-
push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
860+
push_rule(&parser, rule_src_line, rule->rule_id, i + 1); // save this or-rule
831861
}
832862
push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
833863
goto next_rule;
@@ -879,7 +909,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
879909
}
880910
}
881911
} else {
882-
push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
912+
push_rule(&parser, rule_src_line, rule->rule_id, i + 1); // save this and-rule
883913
push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
884914
goto next_rule;
885915
}
@@ -900,7 +930,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
900930
// Pushing the "pass" rule here will overwrite any RULE_const_object
901931
// entry that was on the result stack, allowing the GC to reclaim
902932
// the memory from the const object when needed.
903-
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
933+
push_result_rule(&parser, rule_src_line, &rule_pass_stmt, 0);
904934
break;
905935
}
906936
}
@@ -1009,7 +1039,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
10091039
}
10101040
} else {
10111041
assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE);
1012-
push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
1042+
push_rule(&parser, rule_src_line, rule->rule_id, i + 1); // save this list-rule
10131043
push_rule_from_arg(&parser, arg); // push child of list-rule
10141044
goto next_rule;
10151045
}

0 commit comments

Comments
 (0)
0