8000 py/parse: Simplify parse nodes representing a list. · micropython/micropython@e685083 · GitHub
[go: up one dir, main page]

Skip to content

Commit e685083

Browse files
committed
py/parse: Simplify parse nodes representing a list.
This commit simplifies and optimises the parse tree in-memory representation of lists of expressions, for tuples and lists, and when tuples are used on the left-hand-side of assignments and within del statements. This reduces memory usage of the parse tree when such code is compiled, and also reduces the size of the compiler. For example, (1,) was previously the following parse tree: expr_stmt(5) (n=2) atom_paren(45) (n=1) testlist_comp(146) (n=2) int(1) testlist_comp_3b(149) (n=1) NULL NULL and with this commit is now: expr_stmt(5) (n=2) atom_paren(45) (n=1) testlist_comp(146) (n=1) int(1) NULL Similarly, (1, 2, 3) was previously: expr_stmt(5) (n=2) atom_paren(45) (n=1) testlist_comp(146) (n=2) int(1) testlist_comp_3c(150) (n=2) int(2) int(3) NULL and is now: expr_stmt(5) (n=2) atom_paren(45) (n=1) testlist_comp(146) (n=3) int(1) int(2) int(3) NULL Signed-off-by: Damien George <damien@micropython.org>
1 parent 61b7c09 commit e685083

File tree

2 files changed

+75
-132
lines changed

2 files changed

+75
-132
lines changed

py/compile.c

Lines changed: 39 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ typedef enum {
5959
#undef DEF_RULE_NC
6060
} pn_kind_t;
6161

62+
// Whether a mp_parse_node_struct_t that has pns->kind == PN_testlist_comp
63+
// corresponds to a list comprehension or generator.
64+
#define MP_PARSE_NODE_TESTLIST_COMP_HAS_COMP_FOR(pns) \
65+
(MP_PARSE_NODE_STRUCT_NUM_NODES(pns) == 2 && \
66+
MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for))
67+
6268
#define NEED_METHOD_TABLE MICROPY_EMIT_NATIVE
6369

6470
#if NEED_METHOD_TABLE
@@ -317,25 +323,13 @@ STATIC void compile_delete_id(compiler_t *comp, qstr qst) {
317323
}
318324
}
319325

320-
STATIC void c_tuple(compiler_t *comp, mp_parse_node_t pn, mp_parse_node_struct_t *pns_list) {
321-
int total = 0;
322-
if (!MP_PARSE_NODE_IS_NULL(pn)) {
323-
compile_node(comp, pn);
324-
total += 1;
325-
}
326-
if (pns_list != NULL) {
327-
int n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns_list);
328-
for (int i = 0; i < n; i++) {
329-
compile_node(comp, pns_list->nodes[i]);
330-
}
331-
total += n;
332-
}
333-
EMIT_ARG(build, total, MP_EMIT_BUILD_TUPLE);
334-
}
335-
336326
STATIC void compile_generic_tuple(compiler_t *comp, mp_parse_node_struct_t *pns) {
337327
// a simple tuple expression
338-
c_tuple(comp, MP_PARSE_NODE_NULL, pns);
328+
size_t num_nodes = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
329+
for (size_t i = 0; i < num_nodes; i++) {
330+
compile_node(comp, pns->nodes[i]);
331+
}
332+
EMIT_ARG(build, num_nodes, MP_EMIT_BUILD_TUPLE);
339333
}
340334

341335
STATIC void c_if_cond(compiler_t *comp, mp_parse_node_t pn, bool jump_if, int label) {
@@ -452,39 +446,25 @@ STATIC void c_assign_atom_expr(compiler_t *comp, mp_parse_node_struct_t *pns, as
452446
compile_syntax_error(comp, (mp_parse_node_t)pns, MP_ERROR_TEXT("can't assign to expression"));
453447
}
454448

455- 8000
// we need to allow for a caller passing in 1 initial node (node_head) followed by an array of nodes (nodes_tail)
456-
STATIC void c_assign_tuple(compiler_t *comp, mp_parse_node_t node_head, uint num_tail, mp_parse_node_t *nodes_tail) {
457-
uint num_head = (node_head == MP_PARSE_NODE_NULL) ? 0 : 1;
458-
449+
STATIC void c_assign_tuple(compiler_t *comp, uint num_tail, mp_parse_node_t *nodes_tail) {
459450
// look for star expression
460451
uint have_star_index = -1;
461-
if (num_head != 0 && MP_PARSE_NODE_IS_STRUCT_KIND(node_head, PN_star_expr)) {
462-
EMIT_ARG(unpack_ex, 0, num_tail);
463-
have_star_index = 0;
464-
}
465452
for (uint i = 0; i < num_tail; i++) {
466453
if (MP_PARSE_NODE_IS_STRUCT_KIND(nodes_tail[i], PN_star_expr)) {
467454
if (have_star_index == (uint)-1) {
468-
EMIT_ARG(unpack_ex, num_head + i, num_tail - i - 1);
469-
have_star_index = num_head + i;
455+
EMIT_ARG(unpack_ex, i, num_tail - i - 1);
456+
have_star_index = i;
470457
} else {
471458
compile_syntax_error(comp, nodes_tail[i], MP_ERROR_TEXT("multiple *x in assignment"));
472459
return;
473460
}
474461
}
475462
}
476463
if (have_star_index == (uint)-1) {
477-
EMIT_ARG(unpack_sequence, num_head + num_tail);
478-
}
479-
if (num_head != 0) {
480-
if (0 == have_star_index) {
481-
c_assign(comp, ((mp_parse_node_struct_t *)node_head)->nodes[0], ASSIGN_STORE);
482-
} else {
483-
c_assign(comp, node_head, ASSIGN_STORE);
484-
}
464+
EMIT_ARG(unpack_sequence, num_tail);
485465
}
486466
for (uint i = 0; i < num_tail; i++) {
487-
if (num_head + i == have_star_index) {
467+
if (i == have_star_index) {
488468
c_assign(comp, ((mp_parse_node_struct_t *)nodes_tail[i])->nodes[0], ASSIGN_STORE);
489469
} else {
490470
c_assign(comp, nodes_tail[i], ASSIGN_STORE);
@@ -526,7 +506,7 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
526506
if (assign_kind != ASSIGN_STORE) {
527507
goto cannot_assign;
528508
}
529-
c_assign_tuple(comp, MP_PARSE_NODE_NULL, MP_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes);
509+
c_assign_tuple(comp, MP_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes);
530510
break;
531511

532512
case PN_atom_paren:
@@ -551,13 +531,13 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
551531
}
552532
if (MP_PARSE_NODE_IS_NULL(pns->nodes[0])) {
553533
// empty list, assignment allowed
554-
c_assign_tuple(comp, MP_PARSE_NODE_NULL, 0, NULL);
534+
c_assign_tuple(comp, 0, NULL);
555535
} else if (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
556536
pns = (mp_parse_node_struct_t *)pns->nodes[0];
557537
goto testlist_comp;
558538
} else {
559539
// brackets around 1 item
560-
c_assign_tuple(comp, pns->nodes[0], 0, NULL);
540+
c_assign_tuple(comp, 1, pns->nodes);
561541
}
562542
break;
563543

@@ -568,27 +548,10 @@ STATIC void c_assign(compiler_t *comp, mp_parse_node_t pn, assign_kind_t assign_
568548

569549
testlist_comp:
570550
// lhs is a sequence
571-
if (MP_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
572-
mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t *)pns->nodes[1];
573-
if (MP_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) {
574-
// sequence of one item, with trailing comma
575-
assert(MP_PARSE_NODE_IS_NULL(pns2->nodes[0]));
576-
c_assign_tuple(comp, pns->nodes[0], 0, NULL);
577-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) {
578-
// sequence of many items
579-
uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns2);
580-
c_assign_tuple(comp, pns->nodes[0], n, pns2->nodes);
581-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) {
582-
goto cannot_assign;
583-
} else {
584-
// sequence with 2 items
585-
goto sequence_with_2_items;
586-
}
587-
} else {
588-
// sequence with 2 items
589-
sequence_with_2_items:
590-
c_assign_tuple(comp, MP_PARSE_NODE_NULL, 2, pns->nodes);
551+
if (MP_PARSE_NODE_TESTLIST_COMP_HAS_COMP_FOR(pns)) {
552+
goto cannot_assign;
591553
}
554+
c_assign_tuple(comp, MP_PARSE_NODE_STRUCT_NUM_NODES(pns), pns->nodes);
592555
return;
593556
}
594557
return;
@@ -983,32 +946,11 @@ STATIC void c_del_stmt(compiler_t *comp, mp_parse_node_t pn) {
983946
} else {
984947
assert(MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_testlist_comp));
985948
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
986-
// TODO perhaps factorise testlist_comp code with other uses of PN_testlist_comp
987-
988-
if (MP_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
989-
mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t *)pns->nodes[1];
990-
if (MP_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3b) {
991-
// sequence of one item, with trailing comma
992-
assert(MP_PARSE_NODE_IS_NULL(pns1->nodes[0]));
993-
c_del_stmt(comp, pns->nodes[0]);
994-
} < 10000 span class=pl-k>else if (MP_PARSE_NODE_STRUCT_KIND(pns1) == PN_testlist_comp_3c) {
995-
// sequence of many items
996-
int n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns1);
997-
c_del_stmt(comp, pns->nodes[0]);
998-
for (int i = 0; i < n; i++) {
999-
c_del_stmt(comp, pns1->nodes[i]);
1000-
}
1001-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns1) == PN_comp_for) {
1002-
goto cannot_delete;
1003-
} else {
1004-
// sequence with 2 items
1005-
goto sequence_with_2_items;
1006-
}
1007-
} else {
1008-
// sequence with 2 items
1009-
sequence_with_2_items:
1010-
c_del_stmt(comp, pns->nodes[0]);
1011-
c_del_stmt(comp, pns->nodes[1]);
949+
if (MP_PARSE_NODE_TESTLIST_COMP_HAS_COMP_FOR(pns)) {
950+
goto cannot_delete;
951+
}
952+
for (size_t i = 0; i < MP_PARSE_NODE_STRUCT_NUM_NODES(pns); ++i) {
953+
c_del_stmt(comp, pns->nodes[i]);
1012954
}
1013955
}
1014956
} else {
@@ -2490,31 +2432,16 @@ STATIC void compile_comprehension(compiler_t *comp, mp_parse_node_struct_t *pns,
24902432
STATIC void compile_atom_paren(compiler_t *comp, mp_parse_node_struct_t *pns) {
24912433
if (MP_PARSE_NODE_IS_NULL(pns->nodes[0])) {
24922434
// an empty tuple
2493-
c_tuple(comp, MP_PARSE_NODE_NULL, NULL);
2435+
EMIT_ARG(build, 0, MP_EMIT_BUILD_TUPLE);
24942436
} else {
24952437
assert(MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp));
24962438
pns = (mp_parse_node_struct_t *)pns->nodes[0];
2497-
assert(!MP_PARSE_NODE_IS_NULL(pns->nodes[1]));
2498-
if (MP_PARSE_NODE_IS_STRUCT(pns->nodes[1])) {
2499-
mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t *)pns->nodes[1];
2500-
if (MP_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3b) {
2501-
// tuple of one item, with trailing comma
2502-
assert(MP_PARSE_NODE_IS_NULL(pns2->nodes[0]));
2503-
c_tuple(comp, pns->nodes[0], NULL);
2504-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns2) == PN_testlist_comp_3c) {
2505-
// tuple of many items
2506-
c_tuple(comp, pns->nodes[0], pns2);
2507-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns2) == PN_comp_for) {
2508-
// generator expression
2509-
compile_comprehension(comp, pns, SCOPE_GEN_EXPR);
2510-
} else {
2511-
// tuple with 2 items
2512-
goto tuple_with_2_items;
2513-
}
2439+
if (MP_PARSE_NODE_TESTLIST_COMP_HAS_COMP_FOR(pns)) {
2440+
// generator expression
2441+
compile_comprehension(comp, pns, SCOPE_GEN_EXPR);
25142442
} else {
2515-
// tuple with 2 items
2516-
tuple_with_2_items:
2517-
c_tuple(comp, MP_PARSE_NODE_NULL, pns);
2443+
// tuple with N items
2444+
compile_generic_tuple(comp, pns);
25182445
}
25192446
}
25202447
}
@@ -2525,31 +2452,13 @@ STATIC void compile_atom_bracket(compiler_t *comp, mp_parse_node_struct_t *pns)
25252452
EMIT_ARG(build, 0, MP_EMIT_BUILD_LIST);
25262453
} else if (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) {
25272454
mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t *)pns->nodes[0];
2528-
if (MP_PARSE_NODE_IS_STRUCT(pns2->nodes[1])) {
2529-
mp_parse_node_struct_t *pns3 = (mp_parse_node_struct_t *)pns2->nodes[1];
2530-
if (MP_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3b) {
2531-
// list of one item, with trailing comma
2532-
assert(MP_PARSE_NODE_IS_NULL(pns3->nodes[0]));
2533-
compile_node(comp, pns2->nodes[0]);
2534-
EMIT_ARG(build, 1, MP_EMIT_BUILD_LIST);
2535-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns3) == PN_testlist_comp_3c) {
2536-
// list of many items
2537-
compile_node(comp, pns2->nodes[0]);
2538-
compile_generic_all_nodes(comp, pns3);
2539-
EMIT_ARG(build, 1 + MP_PARSE_NODE_STRUCT_NUM_NODES(pns3), MP_EMIT_BUILD_LIST);
2540-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns3) == PN_comp_for) {
2541-
// list comprehension
2542-
compile_comprehension(comp, pns2, SCOPE_LIST_COMP);
2543-
} else {
2544-
// list with 2 items
2545-
goto list_with_2_items;
2546-
}
2455+
if (MP_PARSE_NODE_TESTLIST_COMP_HAS_COMP_FOR(pns2)) {
2456+
// list comprehension
2457+
compile_comprehension(comp, pns2, SCOPE_LIST_COMP);
25472458
} else {
2548-
// list with 2 items
2549-
list_with_2_items:
2550-
compile_node(comp, pns2->nodes[0]);
2551-
compile_node(comp, pns2->nodes[1]);
2552-
EMIT_ARG(build, 2, MP_EMIT_BUILD_LIST);
2459+
// list with N items
2460+
compile_generic_all_nodes(comp, pns2);
2461+
EMIT_ARG(build, MP_PARSE_NODE_STRUCT_NUM_NODES(pns2), MP_EMIT_BUILD_LIST);
25532462
}
25542463
} else {
25552464
// list with 1 item

py/parse.c

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,9 +796,11 @@ STATIC bool fold_constants(parser_t *parser, uint8_t rule_id, size_t num_args) {
796796
#endif
797797

798798
STATIC void push_result_rule(parser_t *parser, size_t src_line, uint8_t rule_id, size_t num_args) {
799-
// optimise away parenthesis around an expression if possible
799+
// Simplify and optimise certain rules, to reduce memory usage and simplify the compiler.
800800
if (rule_id == RULE_atom_paren) {
801-
// there should be just 1 arg for this rule
801+
// Remove parenthesis around a single expression if possible.
802+
// This atom_paren rule always has a single argument, and after this
803+
// optimisation that argument is either NULL or testlist_comp.
802804
mp_parse_node_t pn = peek_result(parser, 0);
803805
if (MP_PARSE_NODE_IS_NULL(pn)) {
804806
// need to keep parenthesis for ()
@@ -808,6 +810,34 @@ STATIC void push_result_rule(parser_t *parser, size_t src_line, uint8_t rule_id,
808810
// parenthesis around a single expression, so it's just the expression
809811
return;
810812
}
813+
} else if (rule_id == RULE_testlist_comp) {
814+
// The testlist_comp rule can be the sole argument to either atom_parent
815+
// or atom_bracket, for (...) and [...] respectively.
816+
assert(num_args == 2);
817+
mp_parse_node_t pn = peek_result(parser, 0);
818+
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
819+
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
820+
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_testlist_comp_3b) {
821+
// tuple of one item, with trailing comma
822+
pop_result(parser);
823+
--num_args;
824+
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_testlist_comp_3c) {
825+
// tuple of many items, convert testlist_comp_3c to testlist_comp
826+
pop_result(parser);
827+
assert(pn == peek_result(parser, 0));
828+
pns->kind_num_nodes = rule_id | MP_PARSE_NODE_STRUCT_NUM_NODES(pns) << 8;
829+
return;
830+
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_comp_for) {
831+
// generator expression
832+
} else {
833+
// tuple with 2 items
834+
}
835+
} else {
836+
// tuple with 2 items
837+
}
838+
} else if (rule_id == RULE_testlist_comp_3c) {
839+
// steal first arg of outer testlist_comp rule
840+
++num_args;
811841
}
812842

813843
#if MICROPY_COMP_CONST_FOLDING
@@ -827,6 +857,10 @@ STATIC void push_result_rule(parser_t *parser, size_t src_line, uint8_t rule_id,
827857
for (size_t i = num_args; i > 0; i--) {
828858
pn->nodes[i - 1] = pop_result(parser);
829859
}
860+
if (rule_id == RULE_testlist_comp_3c) {
861+
// need to push something non-null to replace stolen first arg of testlist_comp
862+
push_result_node(parser, (mp_parse_node_t)pn);
863+
}
830864
push_result_node(parser, (mp_parse_node_t)pn);
831865
}
832866

0 commit comments

Comments
 (0)
0