8000 py/parse: Compress rule pointer table to table of offsets. · neverhover/micropython@0016a45 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0016a45

Browse files
committed
py/parse: Compress rule pointer table to table of offsets.
This is the sixth and final patch in a series of patches to the parser that aims to reduce code size by compressing the data corresponding to the rules of the grammar. Prior to this set of patches the rules were stored as rule_t structs with rule_id, act and arg members. And then there was a big table of pointers which allowed to lookup the address of a rule_t struct given the id of that rule. The changes that have been made are: - Breaking up of the rule_t struct into individual components, with each component in a separate array. - Removal of the rule_id part of the struct because it's not needed. - Put all the rule arg data in a big array. - Change the table of pointers to rules to a table of offsets within the array of rule arg data. The last point is what is done in this patch here and brings about the biggest decreases in code size, because an array of pointers is now an array of bytes. Code size changes for the six patches combined is: bare-arm: -644 minimal x86: -1856 unix x64: -5408 unix nanbox: -2080 stm32: -720 esp8266: -812 cc3200: -712 For the change in parser performance: it was measured on pyboard that these six patches combined gave an increase in script parse time of about 0.4%. This is due to the slightly more complicated way of looking up the data for a rule (since the 9th bit of the offset into the rule arg data table is calculated with an if statement). This is an acceptable increase in parse time considering that parsing is only done once per script (if compiled on the target).
1 parent c2c92ce commit 0016a45

File tree

1 file changed

+63
-10
lines changed

1 file changed

+63
-10
lines changed

py/parse.c

Lines changed: 63 additions & 10 deletions
10000
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* The MIT License (MIT)
55
*
6-
* Copyright (c) 2013-2015 Damien P. George
6+
* Copyright (c) 2013-2017 Damien P. George
77
*
88
* Permission is hereby granted, free of charge, to any person obtaining a copy
99
* of this software and associated documentation files (the "Software"), to deal
@@ -108,36 +108,81 @@ STATIC const uint8_t rule_act_table[] = {
108108
#undef list_with_end
109109
};
110110

111-
// Define the argument data for each rule
111+
// Define the argument data for each rule, as a combined array
112+
STATIC const uint16_t rule_arg_combined_table[] = {
112113
#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
113114
#define rule(r) (RULE_ARG_RULE | RULE_##r)
114115
#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
115116

116-
#define DEF_RULE(rule, comp, kind, ...) static const uint16_t const rule_arg_##rule[] = { __VA_ARGS__ };
117-
#define DEF_RULE_NC(rule, kind, ...) static const uint16_t const rule_arg_##rule[] = { __VA_ARGS__ };
117+
#define DEF_RULE(rule, comp, kind, ...) __VA_ARGS__,
118+
#define DEF_RULE_NC(rule, kind, ...)
119+
#include "py/grammar.h"
120+
#undef DEF_RULE
121+
#undef DEF_RULE_NC
122+
123+
#define DEF_RULE(rule, comp, kind, ...)
124+
#define DEF_RULE_NC(rule, kind, ...) __VA_ARGS__,
118125
#include "py/grammar.h"
119126
#undef DEF_RULE
120127
#undef DEF_RULE_NC
121128

122129
#undef tok
123130
#undef rule
124131
#undef opt_rule
132+
};
133+
134+
// Macro to create a list of N-1 identifiers where N is the number of variable arguments to the macro
135+
#define RULE_PADDING(rule, ...) RULE_PADDING2(rule, __VA_ARGS__, RULE_PADDING_IDS(rule))
136+
#define RULE_PADDING2(rule, ...) RULE_PADDING3(rule, __VA_ARGS__)
137+
#define RULE_PADDING3(rule, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, ...) __VA_ARGS__
138+
#define RULE_PADDING_IDS(r) PAD12_##r, PAD11_##r, PAD10_##r, PAD9_##r, PAD8_##r, PAD7_##r, PAD6_##r, PAD5_##r, PAD4_##r, PAD3_##r, PAD2_##r, PAD1_##r,
139+
140+
// Use an enum to create constants that specify where in rule_arg_combined_table a given rule starts
141+
enum {
142+
#define DEF_RULE(rule, comp, kind, ...) RULE_ARG_OFFSET_##rule, RULE_PADDING(rule, __VA_ARGS__)
143+
#define DEF_RULE_NC(rule, kind, ...)
144+
#include "py/grammar.h"
145+
#undef DEF_RULE
146+
#undef DEF_RULE_NC
147+
#define DEF_RULE(rule, comp, kind, ...)
148+
#define DEF_RULE_NC(rule, kind, ...) RULE_ARG_OFFSET_##rule, RULE_PADDING(rule, __VA_ARGS__)
149+
#include "py/grammar.h"
150+
#undef DEF_RULE
151+
#undef DEF_RULE_NC
152+
};
125153

126-
// Define an array of pointers to corresponding rule data
127-
STATIC const uint16_t *const rule_arg_table[] = {
128-
#define DEF_RULE(rule, comp, kind, ...) &rule_arg_##rule[0],
154+
// Use the above enum values to create a table of offsets for each rule's arg
155+
// data, which indexes rule_arg_combined_table. The offsets require 9 bits of
156+
// storage but only the lower 8 bits are stored here. The 9th bit is computed
157+
// in get_rule_arg using the FIRST_RULE_WITH_OFFSET_ABOVE_255 constant.
158+
STATIC const uint8_t rule_arg_offset_table[] = {
159+
#define DEF_RULE(rule, comp, kind, ...) RULE_ARG_OFFSET_##rule & 0xff,
129160
#define DEF_RULE_NC(rule, kind, ...)
130161
#include "py/grammar.h"
131162
#undef DEF_RULE
132163
#undef DEF_RULE_NC
133-
NULL, // RULE_const_object
164+
0, // RULE_const_object
134165
#define DEF_RULE(rule, comp, kind, ...)
135-
#define DEF_RULE_NC(rule, kind, ...) &rule_arg_##rule[0],
166+
#define DEF_RULE_NC(rule, kind, ...) RULE_ARG_OFFSET_##rule & 0xff,
136167
#include "py/grammar.h"
137168
#undef DEF_RULE
138169
#undef DEF_RULE_NC
139170
};
140171

172+
// Define a constant that's used to determine the 9th bit of the values in rule_arg_offset_table
173+
static const size_t FIRST_RULE_WITH_OFFSET_ABOVE_255 =
174+
#define DEF_RULE(rule, comp, kind, ...) RULE_ARG_OFFSET_##rule >= 0x100 ? RULE_##rule :
175+
#define DEF_RULE_NC(rule, kind, ...)
176+
#include "py/grammar.h"
177+
#undef DEF_RULE
178+
#undef DEF_RULE_NC
179+
#define DEF_RULE(rule, comp, kind, ...)
180+
#define DEF_RULE_NC(rule, kind, ...) RULE_ARG_OFFSET_##rule >= 0x100 ? RULE_##rule :
181+
#include "py/grammar.h"
182+
#undef DEF_RULE
183+
#undef DEF_RULE_NC
184+
0;
185+
141186
#if USE_RULE_NAME
142187
// Define an array of rule names corresponding to each rule
143188
STATIC const char *const rule_name_table[] = {
@@ -189,6 +234,14 @@ typedef struct _parser_t {
189234
#endif
190235
} parser_t;
191236

237+
STATIC const uint16_t *get_rule_arg(uint8_t r_id) {
238+
size_t off = rule_arg_offset_table[r_id];
239+
if (r_id >= FIRST_RULE_WITH_OFFSET_ABOVE_255) {
240+
off |= 0x100;
241+
}
242+
return &rule_arg_combined_table[off];
243+
}
244+
192245
STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
193246
// use a custom memory allocator to store parse nodes sequentially in large chunks
194247

@@ -816,7 +869,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
816869
size_t rule_src_line; // source line for the first token matched by the current rule
817870
uint8_t rule_id = pop_rule(&parser, &i, &rule_src_line);
818871
uint8_t rule_act = rule_act_table[rule_id];
819-
const uint16_t *rule_arg = rule_arg_table[rule_id];
872+
const uint16_t *rule_arg = get_rule_arg(rule_id);
820873
size_t n = rule_act & RULE_ACT_ARG_MASK;
821874

822875
/*

0 commit comments

Comments
 (0)
0