From ee0d675573cf63ca12e3021f718b55f3cf41a28f Mon Sep 17 00:00:00 2001 From: Damien George Date: Wed, 18 Mar 2015 17:47:47 +0000 Subject: [PATCH 01/12] py: Reorganise bytecode layout so it's more structured, easier to edit. --- py/bc.c | 34 +++++++++------ py/bc.h | 24 +++++++++++ py/emitbc.c | 106 ++++++++++++++++++++-------------------------- py/emitnative.c | 17 +++----- py/objfun.c | 33 ++++++--------- py/objgenerator.c | 9 +--- py/showbc.c | 30 +++++++------ 7 files changed, 130 insertions(+), 123 deletions(-) diff --git a/py/bc.c b/py/bc.c index 3e9125d5cde1e..a4ee847098301 100644 --- a/py/bc.c +++ b/py/bc.c @@ -84,10 +84,8 @@ STATIC void dump_args(const mp_obj_t *a, mp_uint_t sz) { // On entry code_state should be allocated somewhere (stack/heap) and // contain the following valid entries: -// - code_state->code_info should be the offset in bytes from the start of -// the bytecode chunk to the start of the code-info within the bytecode // - code_state->ip should contain the offset in bytes from the start of -// the bytecode chunk to the start of the prelude within the bytecode +// the bytecode chunk to just after n_state and n_exc_stack // - code_state->n_state should be set to the state size (locals plus stack) void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) { // This function is pretty complicated. It's main aim is to be efficient in speed and RAM @@ -95,10 +93,16 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t mp_obj_fun_bc_t *self = self_in; mp_uint_t n_state = code_state->n_state; + // ip comes in as an offset into bytecode, so turn it into a true pointer + code_state->ip = self->bytecode + (mp_uint_t)code_state->ip; + #if MICROPY_STACKLESS code_state->prev = NULL; #endif - code_state->code_info = self->bytecode + (mp_uint_t)code_state->code_info; + + // align ip + code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t)); + code_state->sp = &code_state->state[0] - 1; code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1; @@ -156,13 +160,8 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t *var_pos_kw_args = dict; } - // get pointer to arg_names array at start of bytecode prelude - const mp_obj_t *arg_names; - { - const byte *code_info = code_state->code_info; - mp_uint_t code_info_size = mp_decode_uint(&code_info); - arg_names = (const mp_obj_t*)(code_state->code_info + code_info_size); - } + // get pointer to arg_names array + const mp_obj_t *arg_names = (const mp_obj_t*)code_state->ip; for (mp_uint_t i = 0; i < n_kw; i++) { mp_obj_t wanted_arg_name = kwargs[2 * i]; @@ -235,8 +234,19 @@ continue2:; } } + // get the ip and skip argument names + const byte *ip = code_state->ip; + ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_uint_t); + + // store pointer to code_info and jump over it + { + code_state->code_info = ip; + const byte *ip2 = ip; + mp_uint_t code_info_size = mp_decode_uint(&ip2); + ip += code_info_size; + } + // bytecode prelude: initialise closed over variables - const byte *ip = self->bytecode + (mp_uint_t)code_state->ip; mp_uint_t local_num; while ((local_num = *ip++) != 255) { code_state->state[n_state - 1 - local_num] = diff --git a/py/bc.h b/py/bc.h index b4b4d8c550a88..73b67bc1053e6 100644 --- a/py/bc.h +++ b/py/bc.h @@ -29,6 +29,30 @@ #include "py/runtime.h" #include "py/obj.h" +// bytecode layout: +// +// n_state : var uint +// n_exc_stack : var uint +// +// +// +// argname0 : obj (qstr) +// ... : obj (qstr) +// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args +// +// code_info_size : var uint | code_info_size counts bytes in this chunk +// simple_name : var qstr | +// source_file : var qstr | +// | +// | +// +// num_cells : byte number of locals that are cells +// local_num0 : byte +// ... : byte +// local_numN : byte N = num_cells +// +// + // Exception stack entry typedef struct _mp_exc_stack { const byte *handler; diff --git a/py/emitbc.c b/py/emitbc.c index 71ed4afd866ae..0ed7828f9f300 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -111,7 +111,11 @@ STATIC void emit_align_code_info_to_machine_word(emit_t *emit) { emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1)); } -STATIC void emit_write_code_info_uint(emit_t *emit, mp_uint_t val) { +STATIC void emit_write_code_info_byte(emit_t* emit, byte val) { + *emit_get_cur_to_write_code_info(emit, 1) = val; +} + +STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) { emit_write_uint(emit, emit_get_cur_to_write_code_info, val); } @@ -119,6 +123,13 @@ STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) { emit_write_uint(emit, emit_get_cur_to_write_code_info, qst); } +STATIC void emit_write_code_info_prealigned_ptr(emit_t* emit, void *ptr) { + mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_code_info(emit, sizeof(mp_uint_t)); + // Verify thar c is already uint-aligned + assert(c == MP_ALIGN(c, sizeof(mp_uint_t))); + *c = (mp_uint_t)ptr; +} + #if MICROPY_ENABLE_SOURCE_LINE STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_skip, mp_uint_t lines_to_skip) { assert(bytes_to_skip > 0 || lines_to_skip > 0); @@ -167,11 +178,7 @@ STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) { c[0] = b1; } -STATIC void emit_write_bytecode_uint(emit_t *emit, mp_uint_t val) { - emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); -} - -STATIC void emit_write_bytecode_byte_byte(emit_t *emit, byte b1, byte b2) { +STATIC void emit_write_bytecode_byte_byte(emit_t* emit, byte b1, byte b2) { assert((b2 & (~0xff)) == 0); byte *c = emit_get_cur_to_write_bytecode(emit, 2); c[0] = b1; @@ -210,13 +217,6 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) { emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); } -STATIC void emit_write_bytecode_prealigned_ptr(emit_t *emit, void *ptr) { - mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t)); - // Verify thar c is already uint-aligned - assert(c == MP_ALIGN(c, sizeof(mp_uint_t))); - *c = (mp_uint_t)ptr; -} - // aligns the pointer so it is friendly to GC STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) { emit_write_bytecode_byte(emit, b); @@ -227,15 +227,7 @@ STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) { *c = (mp_uint_t)ptr; } -/* currently unused -STATIC void emit_write_bytecode_byte_uint_uint(emit_t *emit, byte b, mp_uint_t num1, mp_uint_t num2) { - emit_write_bytecode_byte(emit, b); - emit_write_bytecode_byte_uint(emit, num1); - emit_write_bytecode_byte_uint(emit, num2); -} -*/ - -STATIC void emit_write_bytecode_byte_qstr(emit_t *emit, byte b, qstr qst) { +STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) { emit_write_bytecode_byte_uint(emit, b, qst); } @@ -289,19 +281,26 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit->bytecode_offset = 0; emit->code_info_offset = 0; - // Write code info size as compressed uint. If we are not in the final pass - // then space for this uint is reserved in emit_bc_end_pass. - if (pass == MP_PASS_EMIT) { - emit_write_code_info_uint(emit, emit->code_info_size); + // Write local state size and exception stack size. + { + mp_uint_t n_state = scope->num_locals + scope->stack_size; + if (n_state == 0) { + // Need at least 1 entry in the state, in the case an exception is + // propagated through this function, the exception is returned in + // the highest slot in the state (fastn[0], see vm.c). + n_state = 1; + } + emit_write_code_info_uint(emit, n_state); + emit_write_code_info_uint(emit, scope->exc_stack_size); } - // write the name and source file of this function - emit_write_code_info_qstr(emit, scope->simple_name); - emit_write_code_info_qstr(emit, scope->source_file); + // Align code-info so that following pointers are aligned on a machine word. + emit_align_code_info_to_machine_word(emit); - // bytecode prelude: argument names (needed to resolve positional args passed as keywords) - // we store them as full word-sized objects for efficient access in mp_setup_code_state - // this is the start of the prelude and is guaranteed to be aligned on a word boundary + // Write argument names (needed to resolve positional args passed as + // keywords). We store them as full word-sized objects for efficient access + // in mp_setup_code_state this is the start of the prelude and is guaranteed + // to be aligned on a word boundary. { // For a given argument position (indexed by i) we need to find the // corresponding id_info which is a parameter, as it has the correct @@ -322,23 +321,23 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { break; } } - emit_write_bytecode_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst)); + emit_write_code_info_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst)); } } - // bytecode prelude: local state size and exception stack size - { - mp_uint_t n_state = scope->num_locals + scope->stack_size; - if (n_state == 0) { - // Need at least 1 entry in the state, in the case an exception is - // propagated through this function, the exception is returned in - // the highest slot in the state (fastn[0], see vm.c). - n_state = 1; - } - emit_write_bytecode_uint(emit, n_state); - emit_write_bytecode_uint(emit, scope->exc_stack_size); + // Write size of the rest of the code info. We don't know how big this + // variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes + // for it and hope that is enough! TODO assert this or something. + if (pass == MP_PASS_EMIT) { + emit_write_code_info_uint(emit, emit->code_info_size - emit->code_info_offset); + } else { + emit_get_cur_to_write_code_info(emit, 2); } + // Write the name and source file of this function. + emit_write_code_info_qstr(emit, scope->simple_name); + emit_write_code_info_qstr(emit, scope->source_file); + // bytecode prelude: initialise closed over variables for (int i = 0; i < scope->id_info_len; i++) { id_info_t *id = &scope->id_info[i]; @@ -360,25 +359,10 @@ void mp_emit_bc_end_pass(emit_t *emit) { mp_printf(&mp_plat_print, "ERROR: stack size not back to zero; got %d\n", emit->stack_size); } - *emit_get_cur_to_write_code_info(emit, 1) = 0; // end of line number info + emit_write_code_info_byte(emit, 0); // end of line number info if (emit->pass == MP_PASS_CODE_SIZE) { - // Need to make sure we have enough room in the code-info block to write - // the size of the code-info block. Since the size is written as a - // compressed uint, we don't know its size until we write it! Thus, we - // take the biggest possible value it could be and write that here. - // Then there will be enough room to write the value, and any leftover - // space will be absorbed in the alignment at the end of the code-info - // block. - mp_uint_t max_code_info_size = - emit->code_info_offset // current code-info size - + BYTES_FOR_INT // maximum space for compressed uint - + BYTES_PER_WORD - 1; // maximum space for alignment padding - emit_write_code_info_uint(emit, max_code_info_size); - - // Align code-info so that following bytecode is aligned on a machine word. - // We don't need to write anything here, it's just dead space between the - // code-info block and the bytecode block that follows it. + // so bytecode is aligned emit_align_code_info_to_machine_word(emit); // calculate size of total code-info + bytecode, in bytes diff --git a/py/emitnative.c b/py/emitnative.c index d0614d303aa17..1fcc8433850a2 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -566,8 +566,6 @@ struct _emit_t { stack_info_t *stack_info; vtype_kind_t saved_stack_vtype; - int code_info_size; - int code_info_offset; int prelude_offset; int n_state; int stack_start; @@ -774,10 +772,6 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_1); #endif - // set code_state.code_info (offset from start of this function to code_info data) - // XXX this encoding may change size - ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->code_info_offset, offsetof(mp_code_state, code_info) / sizeof(mp_uint_t), REG_ARG_1); - // set code_state.ip (offset from start of this function to prelude info) // XXX this encoding may change size ASM_MOV_IMM_TO_LOCAL_USING(emit->as, emit->prelude_offset, offsetof(mp_code_state, ip) / sizeof(mp_uint_t), REG_ARG_1); @@ -829,11 +823,10 @@ STATIC void emit_native_end_pass(emit_t *emit) { } if (!emit->do_viper_types) { - // write dummy code info (for mp_setup_code_state to parse) and arg names - emit->code_info_offset = ASM_GET_CODE_POS(emit->as); - ASM_DATA(emit->as, 1, emit->code_info_size); + emit->prelude_offset = ASM_GET_CODE_POS(emit->as); ASM_ALIGN(emit->as, ASM_WORD_SIZE); - emit->code_info_size = ASM_GET_CODE_POS(emit->as) - emit->code_info_offset; + + // write argument names as qstr objects // see comment in corresponding part of emitbc.c about the logic here for (int i = 0; i < emit->scope->num_pos_args + emit->scope->num_kwonly_args; i++) { qstr qst = MP_QSTR__star_; @@ -847,8 +840,10 @@ STATIC void emit_native_end_pass(emit_t *emit) { ASM_DATA(emit->as, ASM_WORD_SIZE, (mp_uint_t)MP_OBJ_NEW_QSTR(qst)); } + // write dummy code info (for mp_setup_code_state to parse) + ASM_DATA(emit->as, 1, 1); + // bytecode prelude: initialise closed over variables - emit->prelude_offset = ASM_GET_CODE_POS(emit->as); for (int i = 0; i < emit->scope->id_info_len; i++) { id_info_t *id = &emit->scope->id_info[i]; if (id->kind == ID_INFO_KIND_CELL) { diff --git a/py/objfun.c b/py/objfun.c index 774d8e64c4de5..53ddb0a7b90ca 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -121,8 +121,13 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) { return MP_QSTR_; } #endif - const byte *code_info = fun->bytecode; - return mp_obj_code_get_name(code_info); + + const byte *bc = fun->bytecode; + mp_decode_uint(&bc); // skip n_state + mp_decode_uint(&bc); // skip n_exc_stack + bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align + bc += (fun->n_pos_args + fun->n_kwonly_args) * sizeof(mp_uint_t); // skip arg names + return mp_obj_code_get_name(bc); } #if MICROPY_CPYTHON_COMPAT @@ -158,13 +163,8 @@ mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, mp_uint_t n_arg MP_STACK_CHECK(); mp_obj_fun_bc_t *self = self_in; - // skip code-info block - const byte *code_info = self->bytecode; - mp_uint_t code_info_size = mp_decode_uint(&code_info); - const byte *ip = self->bytecode + code_info_size; - - // bytecode prelude: skip arg names - ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t); + // get start of bytecode + const byte *ip = self->bytecode; // bytecode prelude: state size and exception stack size mp_uint_t n_state = mp_decode_uint(&ip); @@ -178,9 +178,8 @@ mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t self_in, mp_uint_t n_arg return NULL; } + code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack code_state->n_state = n_state; - code_state->code_info = 0; // offset to code-info - code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude mp_setup_code_state(code_state, self_in, n_args, n_kw, args); // execute the byte code with the correct globals context @@ -202,13 +201,8 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, mp_obj_fun_bc_t *self = self_in; DEBUG_printf("Func n_def_args: %d\n", self->n_def_args); - // skip code-info block - const byte *code_info = self->bytecode; - mp_uint_t code_info_size = mp_decode_uint(&code_info); - const byte *ip = self->bytecode + code_info_size; - - // bytecode prelude: skip arg names - ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_obj_t); + // get start of bytecode + const byte *ip = self->bytecode; // bytecode prelude: state size and exception stack size mp_uint_t n_state = mp_decode_uint(&ip); @@ -229,9 +223,8 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, state_size = 0; // indicate that we allocated using alloca } + code_state->ip = (byte*)(ip - self->bytecode); // offset to after n_state/n_exc_stack code_state->n_state = n_state; - code_state->code_info = 0; // offset to code-info - code_state->ip = (byte*)(ip - self->bytecode); // offset to prelude mp_setup_code_state(code_state, self_in, n_args, n_kw, args); // execute the byte code with the correct globals context diff --git a/py/objgenerator.c b/py/objgenerator.c index 4ba6bf76836e8..f7b637e471977 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -54,13 +54,8 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw mp_obj_fun_bc_t *self_fun = (mp_obj_fun_bc_t*)self->fun; assert(MP_OBJ_IS_TYPE(self_fun, &mp_type_fun_bc)); - // skip code-info block - const byte *code_info = self_fun->bytecode; - mp_uint_t code_info_size = mp_decode_uint(&code_info); - const byte *ip = self_fun->bytecode + code_info_size; - - // bytecode prelude: skip arg names - ip += (self_fun->n_pos_args + self_fun->n_kwonly_args) * sizeof(mp_obj_t); + // get start of bytecode + const byte *ip = self_fun->bytecode; // bytecode prelude: get state size and exception stack size mp_uint_t n_state = mp_decode_uint(&ip); diff --git a/py/showbc.c b/py/showbc.c index 2da8d3febec00..87e7c6af46cc3 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -57,7 +57,16 @@ const byte *mp_showbc_code_start; void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip, mp_uint_t len) { mp_showbc_code_start = ip; - // get code info size + // get state size and exception stack size + mp_uint_t n_state = mp_decode_uint(&ip); + mp_uint_t n_exc_stack = mp_decode_uint(&ip); + + ip = MP_ALIGN(ip, sizeof(mp_uint_t)); + + // get and skip arg names + const mp_obj_t *arg_names = (const mp_obj_t*)ip; + ip += n_total_args * sizeof(mp_uint_t); + const byte *code_info = ip; mp_uint_t code_info_size = mp_decode_uint(&code_info); ip += code_info_size; @@ -65,7 +74,7 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip qstr block_name = mp_decode_uint(&code_info); qstr source_file = mp_decode_uint(&code_info); printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n", - qstr_str(source_file), qstr_str(block_name), descr, code_info, len); + qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len); // raw bytecode dump printf("Raw bytecode (code_info_size=" UINT_FMT ", bytecode_size=" UINT_FMT "):\n", code_info_size, len - code_info_size); @@ -80,18 +89,15 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip // bytecode prelude: arg names (as qstr objects) printf("arg names:"); for (mp_uint_t i = 0; i < n_total_args; i++) { - printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(*(mp_obj_t*)ip))); - ip += sizeof(mp_obj_t); + printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(arg_names[i]))); } printf("\n"); - // bytecode prelude: state size and exception stack size; 16 bit uints - { - uint n_state = mp_decode_uint(&ip); - uint n_exc_stack = mp_decode_uint(&ip); - printf("(N_STATE %u)\n", n_state); - printf("(N_EXC_STACK %u)\n", n_exc_stack); - } + printf("(N_STATE " UINT_FMT ")\n", n_state); + printf("(N_EXC_STACK " UINT_FMT ")\n", n_exc_stack); + + // for printing line number info + const byte *bytecode_start = ip; // bytecode prelude: initialise closed over variables { @@ -104,7 +110,7 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip // print out line number info { - mp_int_t bc = (mp_showbc_code_start + code_info_size) - ip; // start counting from the prelude + mp_int_t bc = bytecode_start - ip; mp_uint_t source_line = 1; printf(" bc=" INT_FMT " line=" UINT_FMT "\n", bc, source_line); for (const byte* ci = code_info; *ci;) { From e6aae9e80d9e8e0ca807409287a9ecbe055a692a Mon Sep 17 00:00:00 2001 From: Damien George Date: Thu, 22 Oct 2015 23:45:37 +0100 Subject: [PATCH 02/12] py: Put all bytecode state (arg count, etc) in bytecode. --- py/bc.c | 73 ++++++++++++++++++++++++-------------------- py/bc.h | 6 +++- py/compile.c | 6 ++++ py/emitbc.c | 11 +++++-- py/emitglue.c | 22 +++++-------- py/emitglue.h | 4 +-- py/emitinlinethumb.c | 2 +- py/emitnative.c | 7 +++-- py/obj.h | 4 +-- py/objfun.c | 18 +++++------ py/objfun.h | 6 ---- py/runtime0.h | 1 + py/scope.h | 1 + py/showbc.c | 12 +++++--- 14 files changed, 95 insertions(+), 78 deletions(-) diff --git a/py/bc.c b/py/bc.c index a4ee847098301..da0ea78765146 100644 --- a/py/bc.c +++ b/py/bc.c @@ -31,6 +31,7 @@ #include "py/nlr.h" #include "py/objfun.h" +#include "py/runtime0.h" #include "py/bc.h" #if 0 // print debugging info @@ -100,6 +101,12 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t code_state->prev = NULL; #endif + // get params + mp_uint_t scope_flags = *code_state->ip++; + mp_uint_t n_pos_args = *code_state->ip++; + mp_uint_t n_kwonly_args = *code_state->ip++; + mp_uint_t n_def_pos_args = *code_state->ip++; + // align ip code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t)); @@ -112,33 +119,33 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t const mp_obj_t *kwargs = args + n_args; // var_pos_kw_args points to the stack where the var-args tuple, and var-kw dict, should go (if they are needed) - mp_obj_t *var_pos_kw_args = &code_state->state[n_state - 1 - self->n_pos_args - self->n_kwonly_args]; + mp_obj_t *var_pos_kw_args = &code_state->state[n_state - 1 - n_pos_args - n_kwonly_args]; // check positional arguments - if (n_args > self->n_pos_args) { + if (n_args > n_pos_args) { // given more than enough arguments - if (!self->takes_var_args) { - fun_pos_args_mismatch(self, self->n_pos_args, n_args); + if ((scope_flags & MP_SCOPE_FLAG_VARARGS) == 0) { + fun_pos_args_mismatch(self, n_pos_args, n_args); } // put extra arguments in varargs tuple - *var_pos_kw_args-- = mp_obj_new_tuple(n_args - self->n_pos_args, args + self->n_pos_args); - n_args = self->n_pos_args; + *var_pos_kw_args-- = mp_obj_new_tuple(n_args - n_pos_args, args + n_pos_args); + n_args = n_pos_args; } else { - if (self->takes_var_args) { + if ((scope_flags & MP_SCOPE_FLAG_VARARGS) != 0) { DEBUG_printf("passing empty tuple as *args\n"); *var_pos_kw_args-- = mp_const_empty_tuple; } // Apply processing and check below only if we don't have kwargs, // otherwise, kw handling code below has own extensive checks. - if (n_kw == 0 && !self->has_def_kw_args) { - if (n_args >= (mp_uint_t)(self->n_pos_args - self->n_def_args)) { + if (n_kw == 0 && (scope_flags & MP_SCOPE_FLAG_DEFKWARGS) == 0) { + if (n_args >= (mp_uint_t)(n_pos_args - n_def_pos_args)) { // given enough arguments, but may need to use some default arguments - for (mp_uint_t i = n_args; i < self->n_pos_args; i++) { - code_state->state[n_state - 1 - i] = self->extra_args[i - (self->n_pos_args - self->n_def_args)]; + for (mp_uint_t i = n_args; i < n_pos_args; i++) { + code_state->state[n_state - 1 - i] = self->extra_args[i - (n_pos_args - n_def_pos_args)]; } } else { - fun_pos_args_mismatch(self, self->n_pos_args - self->n_def_args, n_args); + fun_pos_args_mismatch(self, n_pos_args - n_def_pos_args, n_args); } } } @@ -150,12 +157,12 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t // check keyword arguments - if (n_kw != 0 || self->has_def_kw_args) { + if (n_kw != 0 || (scope_flags & MP_SCOPE_FLAG_DEFKWARGS) != 0) { DEBUG_printf("Initial args: "); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args); mp_obj_t dict = MP_OBJ_NULL; - if (self->takes_kw_args) { + if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0) { dict = mp_obj_new_dict(n_kw); // TODO: better go conservative with 0? *var_pos_kw_args = dict; } @@ -165,7 +172,7 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t for (mp_uint_t i = 0; i < n_kw; i++) { mp_obj_t wanted_arg_name = kwargs[2 * i]; - for (mp_uint_t j = 0; j < self->n_pos_args + self->n_kwonly_args; j++) { + for (mp_uint_t j = 0; j < n_pos_args + n_kwonly_args; j++) { if (wanted_arg_name == arg_names[j]) { if (code_state->state[n_state - 1 - j] != MP_OBJ_NULL) { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, @@ -176,7 +183,7 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t } } // Didn't find name match with positional args - if (!self->takes_kw_args) { + if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) == 0) { nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "function does not take keyword arguments")); } mp_obj_dict_store(dict, kwargs[2 * i], kwargs[2 * i + 1]); @@ -184,19 +191,19 @@ continue2:; } DEBUG_printf("Args with kws flattened: "); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args); // fill in defaults for positional args - mp_obj_t *d = &code_state->state[n_state - self->n_pos_args]; - mp_obj_t *s = &self->extra_args[self->n_def_args - 1]; - for (mp_uint_t i = self->n_def_args; i > 0; i--, d++, s--) { + mp_obj_t *d = &code_state->state[n_state - n_pos_args]; + mp_obj_t *s = &self->extra_args[n_def_pos_args - 1]; + for (mp_uint_t i = n_def_pos_args; i > 0; i--, d++, s--) { if (*d == MP_OBJ_NULL) { *d = *s; } } DEBUG_printf("Args after filling default positional: "); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args); // Check that all mandatory positional args are specified while (d < &code_state->state[n_state]) { @@ -208,35 +215,35 @@ continue2:; // Check that all mandatory keyword args are specified // Fill in default kw args if we have them - for (mp_uint_t i = 0; i < self->n_kwonly_args; i++) { - if (code_state->state[n_state - 1 - self->n_pos_args - i] == MP_OBJ_NULL) { + for (mp_uint_t i = 0; i < n_kwonly_args; i++) { + if (code_state->state[n_state - 1 - n_pos_args - i] == MP_OBJ_NULL) { mp_map_elem_t *elem = NULL; - if (self->has_def_kw_args) { - elem = mp_map_lookup(&((mp_obj_dict_t*)self->extra_args[self->n_def_args])->map, arg_names[self->n_pos_args + i], MP_MAP_LOOKUP); + if ((scope_flags & MP_SCOPE_FLAG_DEFKWARGS) != 0) { + elem = mp_map_lookup(&((mp_obj_dict_t*)self->extra_args[n_def_pos_args])->map, arg_names[n_pos_args + i], MP_MAP_LOOKUP); } if (elem != NULL) { - code_state->state[n_state - 1 - self->n_pos_args - i] = elem->value; + code_state->state[n_state - 1 - n_pos_args - i] = elem->value; } else { nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "function missing required keyword argument '%q'", MP_OBJ_QSTR_VALUE(arg_names[self->n_pos_args + i]))); + "function missing required keyword argument '%q'", MP_OBJ_QSTR_VALUE(arg_names[n_pos_args + i]))); } } } } else { // no keyword arguments given - if (self->n_kwonly_args != 0) { + if (n_kwonly_args != 0) { nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "function missing keyword-only argument")); } - if (self->takes_kw_args) { + if ((scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0) { *var_pos_kw_args = mp_obj_new_dict(0); } } // get the ip and skip argument names const byte *ip = code_state->ip; - ip += (self->n_pos_args + self->n_kwonly_args) * sizeof(mp_uint_t); + ip += (n_pos_args + n_kwonly_args) * sizeof(mp_uint_t); // store pointer to code_info and jump over it { @@ -256,7 +263,7 @@ continue2:; // now that we skipped over the prelude, set the ip for the VM code_state->ip = ip; - DEBUG_printf("Calling: n_pos_args=%d, n_kwonly_args=%d\n", self->n_pos_args, self->n_kwonly_args); - dump_args(code_state->state + n_state - self->n_pos_args - self->n_kwonly_args, self->n_pos_args + self->n_kwonly_args); + DEBUG_printf("Calling: n_pos_args=%d, n_kwonly_args=%d\n", n_pos_args, n_kwonly_args); + dump_args(code_state->state + n_state - n_pos_args - n_kwonly_args, n_pos_args + n_kwonly_args); dump_args(code_state->state, n_state); } diff --git a/py/bc.h b/py/bc.h index 73b67bc1053e6..5824688d85994 100644 --- a/py/bc.h +++ b/py/bc.h @@ -33,6 +33,10 @@ // // n_state : var uint // n_exc_stack : var uint +// scope_flags : byte +// n_pos_args : byte number of arguments this function takes +// n_kwonly_args : byte number of keyword-only arguments this function takes +// n_def_pos_args : byte number of default positional arguments // // // @@ -85,7 +89,7 @@ mp_uint_t mp_decode_uint(const byte **ptr); mp_vm_return_kind_t mp_execute_bytecode(mp_code_state *code_state, volatile mp_obj_t inject_exc); mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args); void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args); -void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *code, mp_uint_t len); +void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len); void mp_bytecode_print2(const byte *code, mp_uint_t len); const byte *mp_bytecode_print_str(const byte *ip); #define mp_bytecode_print_inst(code) mp_bytecode_print2(code, 1) diff --git a/py/compile.c b/py/compile.c index f10b378074c80..e0561e6c322f0 100644 --- a/py/compile.c +++ b/py/compile.c @@ -539,6 +539,12 @@ STATIC void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int assert(n_pos_defaults >= 0); assert(n_kw_defaults >= 0); + // set flags + if (n_kw_defaults > 0) { + this_scope->scope_flags |= MP_SCOPE_FLAG_DEFKWARGS; + } + this_scope->num_def_pos_args = n_pos_defaults; + // make closed over variables, if any // ensure they are closed over in the order defined in the outer scope (mainly to agree with CPython) int nfree = 0; diff --git a/py/emitbc.c b/py/emitbc.c index 0ed7828f9f300..6b45019015376 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -294,6 +294,13 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit_write_code_info_uint(emit, scope->exc_stack_size); } + // Write scope flags and number of arguments. + // TODO check that num args all fit in a byte + emit_write_code_info_byte(emit, emit->scope->scope_flags); + emit_write_code_info_byte(emit, emit->scope->num_pos_args); + emit_write_code_info_byte(emit, emit->scope->num_kwonly_args); + emit_write_code_info_byte(emit, emit->scope->num_def_pos_args); + // Align code-info so that following pointers are aligned on a machine word. emit_align_code_info_to_machine_word(emit); @@ -372,9 +379,7 @@ void mp_emit_bc_end_pass(emit_t *emit) { } else if (emit->pass == MP_PASS_EMIT) { mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, - emit->code_info_size + emit->bytecode_size, - emit->scope->num_pos_args, emit->scope->num_kwonly_args, - emit->scope->scope_flags); + emit->code_info_size + emit->bytecode_size, emit->scope->scope_flags); } } diff --git a/py/emitglue.c b/py/emitglue.c index 610e76d5345f0..83fe420b7b7aa 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -48,11 +48,9 @@ struct _mp_raw_code_t { mp_raw_code_kind_t kind : 3; mp_uint_t scope_flags : 7; mp_uint_t n_pos_args : 11; - mp_uint_t n_kwonly_args : 11; union { struct { - byte *code; - mp_uint_t len; + const byte *code; } u_byte; struct { void *fun_data; @@ -67,36 +65,32 @@ mp_raw_code_t *mp_emit_glue_new_raw_code(void) { return rc; } -void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, byte *code, mp_uint_t len, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_uint_t scope_flags) { +void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, byte *code, mp_uint_t len, mp_uint_t scope_flags) { rc->kind = MP_CODE_BYTECODE; rc->scope_flags = scope_flags; - rc->n_pos_args = n_pos_args; - rc->n_kwonly_args = n_kwonly_args; rc->data.u_byte.code = code; - rc->data.u_byte.len = len; #ifdef DEBUG_PRINT - DEBUG_printf("assign byte code: code=%p len=" UINT_FMT " n_pos_args=" UINT_FMT " n_kwonly_args=" UINT_FMT " flags=%x\n", code, len, n_pos_args, n_kwonly_args, (uint)scope_flags); + DEBUG_printf("assign byte code: code=%p len=" UINT_FMT " flags=%x\n", code, len, (uint)scope_flags); #endif #if MICROPY_DEBUG_PRINTERS if (mp_verbose_flag >= 2) { - mp_bytecode_print(rc, n_pos_args + n_kwonly_args, code, len); + mp_bytecode_print(rc, code, len); } #endif } #if MICROPY_EMIT_NATIVE || MICROPY_EMIT_INLINE_THUMB -void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_uint_t scope_flags, mp_uint_t type_sig) { +void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig) { assert(kind == MP_CODE_NATIVE_PY || kind == MP_CODE_NATIVE_VIPER || kind == MP_CODE_NATIVE_ASM); rc->kind = kind; rc->scope_flags = scope_flags; rc->n_pos_args = n_pos_args; - rc->n_kwonly_args = n_kwonly_args; rc->data.u_native.fun_data = fun_data; rc->data.u_native.type_sig = type_sig; #ifdef DEBUG_PRINT - DEBUG_printf("assign native: kind=%d fun=%p len=" UINT_FMT " n_pos_args=" UINT_FMT " n_kwonly_args=" UINT_FMT " flags=%x\n", kind, fun_data, fun_len, n_pos_args, n_kwonly_args, (uint)scope_flags); + DEBUG_printf("assign native: kind=%d fun=%p len=" UINT_FMT " n_pos_args=" UINT_FMT " flags=%x\n", kind, fun_data, fun_len, n_pos_args, (uint)scope_flags); for (mp_uint_t i = 0; i < fun_len; i++) { if (i > 0 && i % 16 == 0) { DEBUG_printf("\n"); @@ -131,11 +125,11 @@ mp_obj_t mp_make_function_from_raw_code(mp_raw_code_t *rc, mp_obj_t def_args, mp switch (rc->kind) { case MP_CODE_BYTECODE: no_other_choice: - fun = mp_obj_new_fun_bc(rc->scope_flags, rc->n_pos_args, rc->n_kwonly_args, def_args, def_kw_args, rc->data.u_byte.code); + fun = mp_obj_new_fun_bc(def_args, def_kw_args, rc->data.u_byte.code); break; #if MICROPY_EMIT_NATIVE case MP_CODE_NATIVE_PY: - fun = mp_obj_new_fun_native(rc->scope_flags, rc->n_pos_args, rc->n_kwonly_args, def_args, def_kw_args, rc->data.u_native.fun_data); + fun = mp_obj_new_fun_native(def_args, def_kw_args, rc->data.u_native.fun_data); break; case MP_CODE_NATIVE_VIPER: fun = mp_obj_new_fun_viper(rc->n_pos_args, rc->data.u_native.fun_data, rc->data.u_native.type_sig); diff --git a/py/emitglue.h b/py/emitglue.h index 97e680b4c4c63..56029b3a9bd7c 100644 --- a/py/emitglue.h +++ b/py/emitglue.h @@ -43,8 +43,8 @@ typedef struct _mp_raw_code_t mp_raw_code_t; mp_raw_code_t *mp_emit_glue_new_raw_code(void); -void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, byte *code, mp_uint_t len, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_uint_t scope_flags); -void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_uint_t scope_flags, mp_uint_t type_sig); +void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, byte *code, mp_uint_t len, mp_uint_t scope_flags); +void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig); mp_obj_t mp_make_function_from_raw_code(mp_raw_code_t *rc, mp_obj_t def_args, mp_obj_t def_kw_args); mp_obj_t mp_make_closure_from_raw_code(mp_raw_code_t *rc, mp_uint_t n_closed_over, const mp_obj_t *args); diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index 6e341d0b0a6cc..ad1ad98549101 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -90,7 +90,7 @@ STATIC void emit_inline_thumb_end_pass(emit_inline_asm_t *emit) { if (emit->pass == MP_PASS_EMIT) { void *f = asm_thumb_get_code(emit->as); - mp_emit_glue_assign_native(emit->scope->raw_code, MP_CODE_NATIVE_ASM, f, asm_thumb_get_code_size(emit->as), emit->scope->num_pos_args, 0, 0, 0); + mp_emit_glue_assign_native(emit->scope->raw_code, MP_CODE_NATIVE_ASM, f, asm_thumb_get_code_size(emit->as), emit->scope->num_pos_args, 0, 0); } } diff --git a/py/emitnative.c b/py/emitnative.c index 1fcc8433850a2..99eac79253528 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -824,6 +824,10 @@ STATIC void emit_native_end_pass(emit_t *emit) { if (!emit->do_viper_types) { emit->prelude_offset = ASM_GET_CODE_POS(emit->as); + ASM_DATA(emit->as, 1, emit->scope->scope_flags); + ASM_DATA(emit->as, 1, emit->scope->num_pos_args); + ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args); + ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args); ASM_ALIGN(emit->as, ASM_WORD_SIZE); // write argument names as qstr objects @@ -874,8 +878,7 @@ STATIC void emit_native_end_pass(emit_t *emit) { mp_emit_glue_assign_native(emit->scope->raw_code, emit->do_viper_types ? MP_CODE_NATIVE_VIPER : MP_CODE_NATIVE_PY, - f, f_len, emit->scope->num_pos_args, emit->scope->num_kwonly_args, - emit->scope->scope_flags, type_sig); + f, f_len, emit->scope->num_pos_args, emit->scope->scope_flags, type_sig); } } diff --git a/py/obj.h b/py/obj.h index af5652c4ec4c9..af156e8f9c5c6 100644 --- a/py/obj.h +++ b/py/obj.h @@ -537,8 +537,8 @@ mp_obj_t mp_obj_new_exception_arg1(const mp_obj_type_t *exc_type, mp_obj_t arg); mp_obj_t mp_obj_new_exception_args(const mp_obj_type_t *exc_type, mp_uint_t n_args, const mp_obj_t *args); mp_obj_t mp_obj_new_exception_msg(const mp_obj_type_t *exc_type, const char *msg); mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char *fmt, ...); // counts args by number of % symbols in fmt, excluding %%; can only handle void* sizes (ie no float/double!) -mp_obj_t mp_obj_new_fun_bc(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args, mp_obj_t def_kw_args, const byte *code); -mp_obj_t mp_obj_new_fun_native(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data); +mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args, mp_obj_t def_kw_args, const byte *code); +mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data); mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig); mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data); mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun); diff --git a/py/objfun.c b/py/objfun.c index 53ddb0a7b90ca..a54e50d2cdca4 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -125,8 +125,12 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) { const byte *bc = fun->bytecode; mp_decode_uint(&bc); // skip n_state mp_decode_uint(&bc); // skip n_exc_stack + bc++; // skip scope_params + mp_uint_t n_pos_args = *bc++; + mp_uint_t n_kwonly_args = *bc++; + bc++; // skip n_def_pos_args bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align - bc += (fun->n_pos_args + fun->n_kwonly_args) * sizeof(mp_uint_t); // skip arg names + bc += (n_pos_args + n_kwonly_args) * sizeof(mp_uint_t); // skip arg names return mp_obj_code_get_name(bc); } @@ -316,7 +320,7 @@ const mp_obj_type_t mp_type_fun_bc = { #endif }; -mp_obj_t mp_obj_new_fun_bc(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const byte *code) { +mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byte *code) { mp_uint_t n_def_args = 0; mp_uint_t n_extra_args = 0; mp_obj_tuple_t *def_args = def_args_in; @@ -331,12 +335,6 @@ mp_obj_t mp_obj_new_fun_bc(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_ mp_obj_fun_bc_t *o = m_new_obj_var(mp_obj_fun_bc_t, mp_obj_t, n_extra_args); o->base.type = &mp_type_fun_bc; o->globals = mp_globals_get(); - o->n_pos_args = n_pos_args; - o->n_kwonly_args = n_kwonly_args; - o->n_def_args = n_def_args; - o->has_def_kw_args = def_kw_args != MP_OBJ_NULL; - o->takes_var_args = (scope_flags & MP_SCOPE_FLAG_VARARGS) != 0; - o->takes_kw_args = (scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0; o->bytecode = code; if (def_args != MP_OBJ_NULL) { memcpy(o->extra_args, def_args->items, n_def_args * sizeof(mp_obj_t)); @@ -366,8 +364,8 @@ STATIC const mp_obj_type_t mp_type_fun_native = { .unary_op = mp_generic_unary_op, }; -mp_obj_t mp_obj_new_fun_native(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) { - mp_obj_fun_bc_t *o = mp_obj_new_fun_bc(scope_flags, n_pos_args, n_kwonly_args, def_args_in, def_kw_args, (const byte*)fun_data); +mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) { + mp_obj_fun_bc_t *o = mp_obj_new_fun_bc(def_args_in, def_kw_args, (const byte*)fun_data); o->base.type = &mp_type_fun_native; return o; } diff --git a/py/objfun.h b/py/objfun.h index a5b9f2b5fd4a7..cdc495e5be9a2 100644 --- a/py/objfun.h +++ b/py/objfun.h @@ -31,12 +31,6 @@ typedef struct _mp_obj_fun_bc_t { mp_obj_base_t base; mp_obj_dict_t *globals; // the context within which this function was defined - mp_uint_t n_pos_args : 8; // number of arguments this function takes - mp_uint_t n_kwonly_args : 8; // number of keyword-only arguments this function takes - mp_uint_t n_def_args : 8; // number of default arguments - mp_uint_t has_def_kw_args : 1; // set if this function has default keyword args - mp_uint_t takes_var_args : 1; // set if this function takes variable args - mp_uint_t takes_kw_args : 1; // set if this function takes keyword args const byte *bytecode; // bytecode for the function // the following extra_args array is allocated space to take (in order): // - values of positional default args (if any) diff --git a/py/runtime0.h b/py/runtime0.h index 1d7d64bea6ccc..d00949f92958c 100644 --- a/py/runtime0.h +++ b/py/runtime0.h @@ -30,6 +30,7 @@ #define MP_SCOPE_FLAG_VARARGS (0x01) #define MP_SCOPE_FLAG_VARKEYWORDS (0x02) #define MP_SCOPE_FLAG_GENERATOR (0x04) +#define MP_SCOPE_FLAG_DEFKWARGS (0x08) // types for native (viper) function signature #define MP_NATIVE_TYPE_OBJ (0x00) diff --git a/py/scope.h b/py/scope.h index 0ea003516a7ad..fac936a729d70 100644 --- a/py/scope.h +++ b/py/scope.h @@ -66,6 +66,7 @@ typedef struct _scope_t { uint8_t emit_options; // see compile.h uint16_t num_pos_args; uint16_t num_kwonly_args; + uint16_t num_def_pos_args; uint16_t num_locals; uint16_t stack_size; // maximum size of the locals stack uint16_t exc_stack_size; // maximum size of the exception stack diff --git a/py/showbc.c b/py/showbc.c index 87e7c6af46cc3..538eddc40fcfa 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -54,18 +54,22 @@ const byte *mp_showbc_code_start; -void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip, mp_uint_t len) { +void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len) { mp_showbc_code_start = ip; - // get state size and exception stack size + // get bytecode parameters mp_uint_t n_state = mp_decode_uint(&ip); mp_uint_t n_exc_stack = mp_decode_uint(&ip); + /*mp_uint_t scope_flags =*/ ip++; + mp_uint_t n_pos_args = *ip++; + mp_uint_t n_kwonly_args = *ip++; + /*mp_uint_t n_def_pos_args =*/ ip++; ip = MP_ALIGN(ip, sizeof(mp_uint_t)); // get and skip arg names const mp_obj_t *arg_names = (const mp_obj_t*)ip; - ip += n_total_args * sizeof(mp_uint_t); + ip += (n_pos_args + n_kwonly_args) * sizeof(mp_uint_t); const byte *code_info = ip; mp_uint_t code_info_size = mp_decode_uint(&code_info); @@ -88,7 +92,7 @@ void mp_bytecode_print(const void *descr, mp_uint_t n_total_args, const byte *ip // bytecode prelude: arg names (as qstr objects) printf("arg names:"); - for (mp_uint_t i = 0; i < n_total_args; i++) { + for (mp_uint_t i = 0; i < n_pos_args + n_kwonly_args; i++) { printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(arg_names[i]))); } printf("\n"); From 910febb39e3b237e9a71ba4be0d1f5aab4aad1c9 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 23 Oct 2015 01:23:11 +0100 Subject: [PATCH 03/12] py: Add constant table to bytecode. Contains just argument names at the moment but makes it easy to add arbitrary constants. --- py/bc.c | 9 +++--- py/bc.h | 29 ++++++++++-------- py/emitbc.c | 73 +++++++++++++++++++++----------------------- py/emitglue.c | 14 ++++++--- py/emitglue.h | 4 +-- py/emitinlinethumb.c | 2 +- py/emitnative.c | 33 ++++++++++++-------- py/obj.h | 4 +-- py/objfun.c | 13 ++++---- py/objfun.h | 1 + py/objgenerator.c | 1 - py/showbc.c | 10 ++---- 12 files changed, 97 insertions(+), 96 deletions(-) diff --git a/py/bc.c b/py/bc.c index da0ea78765146..9f0e79c677dd1 100644 --- a/py/bc.c +++ b/py/bc.c @@ -97,6 +97,9 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t // ip comes in as an offset into bytecode, so turn it into a true pointer code_state->ip = self->bytecode + (mp_uint_t)code_state->ip; + // store pointer to constant table + code_state->const_table = self->const_table; + #if MICROPY_STACKLESS code_state->prev = NULL; #endif @@ -107,9 +110,6 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t mp_uint_t n_kwonly_args = *code_state->ip++; mp_uint_t n_def_pos_args = *code_state->ip++; - // align ip - code_state->ip = MP_ALIGN(code_state->ip, sizeof(mp_uint_t)); - code_state->sp = &code_state->state[0] - 1; code_state->exc_sp = (mp_exc_stack_t*)(code_state->state + n_state) - 1; @@ -168,7 +168,7 @@ void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t } // get pointer to arg_names array - const mp_obj_t *arg_names = (const mp_obj_t*)code_state->ip; + const mp_obj_t *arg_names = (const mp_obj_t*)code_state->const_table; for (mp_uint_t i = 0; i < n_kw; i++) { mp_obj_t wanted_arg_name = kwargs[2 * i]; @@ -243,7 +243,6 @@ continue2:; // get the ip and skip argument names const byte *ip = code_state->ip; - ip += (n_pos_args + n_kwonly_args) * sizeof(mp_uint_t); // store pointer to code_info and jump over it { diff --git a/py/bc.h b/py/bc.h index 5824688d85994..4c8401b9fa3b1 100644 --- a/py/bc.h +++ b/py/bc.h @@ -38,24 +38,26 @@ // n_kwonly_args : byte number of keyword-only arguments this function takes // n_def_pos_args : byte number of default positional arguments // -// -// -// argname0 : obj (qstr) -// ... : obj (qstr) -// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args -// // code_info_size : var uint | code_info_size counts bytes in this chunk // simple_name : var qstr | // source_file : var qstr | // | -// | +// | only needed if bytecode contains pointers // -// num_cells : byte number of locals that are cells -// local_num0 : byte -// ... : byte -// local_numN : byte N = num_cells +// local_num0 : byte | +// ... : byte | +// local_numN : byte | N = num_cells +// 255 : byte | end of list sentinel +// | // -// +// +// constant table layout: +// +// argname0 : obj (qstr) +// ... : obj (qstr) +// argnameN : obj (qstr) N = num_pos_args + num_kwonly_args +// const0 : obj +// constN : obj // Exception stack entry typedef struct _mp_exc_stack { @@ -70,6 +72,7 @@ typedef struct _mp_exc_stack { typedef struct _mp_code_state { const byte *code_info; const byte *ip; + const mp_uint_t *const_table; mp_obj_t *sp; // bit 0 is saved currently_in_except_block value mp_exc_stack_t *exc_sp; @@ -89,7 +92,7 @@ mp_uint_t mp_decode_uint(const byte **ptr); mp_vm_return_kind_t mp_execute_bytecode(mp_code_state *code_state, volatile mp_obj_t inject_exc); mp_code_state *mp_obj_fun_bc_prepare_codestate(mp_obj_t func, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args); void mp_setup_code_state(mp_code_state *code_state, mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args); -void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len); +void mp_bytecode_print(const void *descr, const byte *code, mp_uint_t len, const mp_uint_t *const_table); void mp_bytecode_print2(const byte *code, mp_uint_t len); const byte *mp_bytecode_print_str(const byte *ip); #define mp_bytecode_print_inst(code) mp_bytecode_print2(code, 1) diff --git a/py/emitbc.c b/py/emitbc.c index 6b45019015376..abe782b0d3ef2 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -56,6 +56,7 @@ struct _emit_t { mp_uint_t bytecode_offset; mp_uint_t bytecode_size; byte *code_base; // stores both byte code and code info + mp_uint_t *const_table; // Accessed as mp_uint_t, so must be aligned as such byte dummy_data[DUMMY_DATA_SIZE]; }; @@ -123,13 +124,6 @@ STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) { emit_write_uint(emit, emit_get_cur_to_write_code_info, qst); } -STATIC void emit_write_code_info_prealigned_ptr(emit_t* emit, void *ptr) { - mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_code_info(emit, sizeof(mp_uint_t)); - // Verify thar c is already uint-aligned - assert(c == MP_ALIGN(c, sizeof(mp_uint_t))); - *c = (mp_uint_t)ptr; -} - #if MICROPY_ENABLE_SOURCE_LINE STATIC void emit_write_code_info_bytes_lines(emit_t *emit, mp_uint_t bytes_to_skip, mp_uint_t lines_to_skip) { assert(bytes_to_skip > 0 || lines_to_skip > 0); @@ -301,37 +295,6 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit_write_code_info_byte(emit, emit->scope->num_kwonly_args); emit_write_code_info_byte(emit, emit->scope->num_def_pos_args); - // Align code-info so that following pointers are aligned on a machine word. - emit_align_code_info_to_machine_word(emit); - - // Write argument names (needed to resolve positional args passed as - // keywords). We store them as full word-sized objects for efficient access - // in mp_setup_code_state this is the start of the prelude and is guaranteed - // to be aligned on a word boundary. - { - // For a given argument position (indexed by i) we need to find the - // corresponding id_info which is a parameter, as it has the correct - // qstr name to use as the argument name. Note that it's not a simple - // 1-1 mapping (ie i!=j in general) because of possible closed-over - // variables. In the case that the argument i has no corresponding - // parameter we use "*" as its name (since no argument can ever be named - // "*"). We could use a blank qstr but "*" is better for debugging. - // Note: there is some wasted RAM here for the case of storing a qstr - // for each closed-over variable, and maybe there is a better way to do - // it, but that would require changes to mp_setup_code_state. - for (int i = 0; i < scope->num_pos_args + scope->num_kwonly_args; i++) { - qstr qst = MP_QSTR__star_; - for (int j = 0; j < scope->id_info_len; ++j) { - id_info_t *id = &scope->id_info[j]; - if ((id->flags & ID_FLAG_IS_PARAM) && id->local_num == i) { - qst = id->qst; - break; - } - } - emit_write_code_info_prealigned_ptr(emit, MP_OBJ_NEW_QSTR(qst)); - } - } - // Write size of the rest of the code info. We don't know how big this // variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes // for it and hope that is enough! TODO assert this or something. @@ -354,6 +317,35 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { } } emit_write_bytecode_byte(emit, 255); // end of list sentinel + + if (pass == MP_PASS_EMIT) { + // Write argument names (needed to resolve positional args passed as + // keywords). We store them as full word-sized objects for efficient access + // in mp_setup_code_state this is the start of the prelude and is guaranteed + // to be aligned on a word boundary. + + // For a given argument position (indexed by i) we need to find the + // corresponding id_info which is a parameter, as it has the correct + // qstr name to use as the argument name. Note that it's not a simple + // 1-1 mapping (ie i!=j in general) because of possible closed-over + // variables. In the case that the argument i has no corresponding + // parameter we use "*" as its name (since no argument can ever be named + // "*"). We could use a blank qstr but "*" is better for debugging. + // Note: there is some wasted RAM here for the case of storing a qstr + // for each closed-over variable, and maybe there is a better way to do + // it, but that would require changes to mp_setup_code_state. + for (int i = 0; i < scope->num_pos_args + scope->num_kwonly_args; i++) { + qstr qst = MP_QSTR__star_; + for (int j = 0; j < scope->id_info_len; ++j) { + id_info_t *id = &scope->id_info[j]; + if ((id->flags & ID_FLAG_IS_PARAM) && id->local_num == i) { + qst = id->qst; + break; + } + } + emit->const_table[i] = (mp_uint_t)MP_OBJ_NEW_QSTR(qst); + } + } } void mp_emit_bc_end_pass(emit_t *emit) { @@ -377,9 +369,12 @@ void mp_emit_bc_end_pass(emit_t *emit) { emit->bytecode_size = emit->bytecode_offset; emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size); + emit->const_table = m_new0(mp_uint_t, emit->scope->num_pos_args + emit->scope->num_kwonly_args); + } else if (emit->pass == MP_PASS_EMIT) { mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, - emit->code_info_size + emit->bytecode_size, emit->scope->scope_flags); + emit->code_info_size + emit->bytecode_size, + emit->const_table, emit->scope->scope_flags); } } diff --git a/py/emitglue.c b/py/emitglue.c index 83fe420b7b7aa..feed5d99a282a 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -51,9 +51,11 @@ struct _mp_raw_code_t { union { struct { const byte *code; + const mp_uint_t *const_table; } u_byte; struct { void *fun_data; + const mp_uint_t *const_table; mp_uint_t type_sig; // for viper, compressed as 2-bit types; ret is MSB, then arg0, arg1, etc } u_native; } data; @@ -65,28 +67,30 @@ mp_raw_code_t *mp_emit_glue_new_raw_code(void) { return rc; } -void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, byte *code, mp_uint_t len, mp_uint_t scope_flags) { +void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, const byte *code, mp_uint_t len, const mp_uint_t *const_table, mp_uint_t scope_flags) { rc->kind = MP_CODE_BYTECODE; rc->scope_flags = scope_flags; rc->data.u_byte.code = code; + rc->data.u_byte.const_table = const_table; #ifdef DEBUG_PRINT DEBUG_printf("assign byte code: code=%p len=" UINT_FMT " flags=%x\n", code, len, (uint)scope_flags); #endif #if MICROPY_DEBUG_PRINTERS if (mp_verbose_flag >= 2) { - mp_bytecode_print(rc, code, len); + mp_bytecode_print(rc, code, len, const_table); } #endif } #if MICROPY_EMIT_NATIVE || MICROPY_EMIT_INLINE_THUMB -void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig) { +void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, const mp_uint_t *const_table, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig) { assert(kind == MP_CODE_NATIVE_PY || kind == MP_CODE_NATIVE_VIPER || kind == MP_CODE_NATIVE_ASM); rc->kind = kind; rc->scope_flags = scope_flags; rc->n_pos_args = n_pos_args; rc->data.u_native.fun_data = fun_data; + rc->data.u_native.const_table = const_table; rc->data.u_native.type_sig = type_sig; #ifdef DEBUG_PRINT @@ -125,11 +129,11 @@ mp_obj_t mp_make_function_from_raw_code(mp_raw_code_t *rc, mp_obj_t def_args, mp switch (rc->kind) { case MP_CODE_BYTECODE: no_other_choice: - fun = mp_obj_new_fun_bc(def_args, def_kw_args, rc->data.u_byte.code); + fun = mp_obj_new_fun_bc(def_args, def_kw_args, rc->data.u_byte.code, rc->data.u_byte.const_table); break; #if MICROPY_EMIT_NATIVE case MP_CODE_NATIVE_PY: - fun = mp_obj_new_fun_native(def_args, def_kw_args, rc->data.u_native.fun_data); + fun = mp_obj_new_fun_native(def_args, def_kw_args, rc->data.u_native.fun_data, rc->data.u_native.const_table); break; case MP_CODE_NATIVE_VIPER: fun = mp_obj_new_fun_viper(rc->n_pos_args, rc->data.u_native.fun_data, rc->data.u_native.type_sig); diff --git a/py/emitglue.h b/py/emitglue.h index 56029b3a9bd7c..9bb2ba2d74f97 100644 --- a/py/emitglue.h +++ b/py/emitglue.h @@ -43,8 +43,8 @@ typedef struct _mp_raw_code_t mp_raw_code_t; mp_raw_code_t *mp_emit_glue_new_raw_code(void); -void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, byte *code, mp_uint_t len, mp_uint_t scope_flags); -void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig); +void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, const byte *code, mp_uint_t len, const mp_uint_t *const_table, mp_uint_t scope_flags); +void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, const mp_uint_t *const_table, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig); mp_obj_t mp_make_function_from_raw_code(mp_raw_code_t *rc, mp_obj_t def_args, mp_obj_t def_kw_args); mp_obj_t mp_make_closure_from_raw_code(mp_raw_code_t *rc, mp_uint_t n_closed_over, const mp_obj_t *args); diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index ad1ad98549101..d6e07985a4423 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -90,7 +90,7 @@ STATIC void emit_inline_thumb_end_pass(emit_inline_asm_t *emit) { if (emit->pass == MP_PASS_EMIT) { void *f = asm_thumb_get_code(emit->as); - mp_emit_glue_assign_native(emit->scope->raw_code, MP_CODE_NATIVE_ASM, f, asm_thumb_get_code_size(emit->as), emit->scope->num_pos_args, 0, 0); + mp_emit_glue_assign_native(emit->scope->raw_code, MP_CODE_NATIVE_ASM, f, asm_thumb_get_code_size(emit->as), NULL, emit->scope->num_pos_args, 0, 0); } } diff --git a/py/emitnative.c b/py/emitnative.c index 99eac79253528..d8f1640c0f4bb 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -567,6 +567,7 @@ struct _emit_t { vtype_kind_t saved_stack_vtype; int prelude_offset; + int const_table_offset; int n_state; int stack_start; int stack_size; @@ -828,7 +829,24 @@ STATIC void emit_native_end_pass(emit_t *emit) { ASM_DATA(emit->as, 1, emit->scope->num_pos_args); ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args); ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args); + + // write code info (just contains block name and source file) + ASM_DATA(emit->as, 1, 5); + ASM_DATA(emit->as, 2, emit->scope->simple_name); + ASM_DATA(emit->as, 2, emit->scope->source_file); + + // bytecode prelude: initialise closed over variables + for (int i = 0; i < emit->scope->id_info_len; i++) { + id_info_t *id = &emit->scope->id_info[i]; + if (id->kind == ID_INFO_KIND_CELL) { + assert(id->local_num < 255); + ASM_DATA(emit->as, 1, id->local_num); // write the local which should be converted to a cell + } + } + ASM_DATA(emit->as, 1, 255); // end of list sentinel + ASM_ALIGN(emit->as, ASM_WORD_SIZE); + emit->const_table_offset = ASM_GET_CODE_POS(emit->as); // write argument names as qstr objects // see comment in corresponding part of emitbc.c about the logic here @@ -844,18 +862,6 @@ STATIC void emit_native_end_pass(emit_t *emit) { ASM_DATA(emit->as, ASM_WORD_SIZE, (mp_uint_t)MP_OBJ_NEW_QSTR(qst)); } - // write dummy code info (for mp_setup_code_state to parse) - ASM_DATA(emit->as, 1, 1); - - // bytecode prelude: initialise closed over variables - for (int i = 0; i < emit->scope->id_info_len; i++) { - id_info_t *id = &emit->scope->id_info[i]; - if (id->kind == ID_INFO_KIND_CELL) { - assert(id->local_num < 255); - ASM_DATA(emit->as, 1, id->local_num); // write the local which should be converted to a cell - } - } - ASM_DATA(emit->as, 1, 255); // end of list sentinel } ASM_END_PASS(emit->as); @@ -878,7 +884,8 @@ STATIC void emit_native_end_pass(emit_t *emit) { mp_emit_glue_assign_native(emit->scope->raw_code, emit->do_viper_types ? MP_CODE_NATIVE_VIPER : MP_CODE_NATIVE_PY, - f, f_len, emit->scope->num_pos_args, emit->scope->scope_flags, type_sig); + f, f_len, (mp_uint_t*)((byte*)f + emit->const_table_offset), + emit->scope->num_pos_args, emit->scope->scope_flags, type_sig); } } diff --git a/py/obj.h b/py/obj.h index af156e8f9c5c6..f03615ca0ce7f 100644 --- a/py/obj.h +++ b/py/obj.h @@ -537,8 +537,8 @@ mp_obj_t mp_obj_new_exception_arg1(const mp_obj_type_t *exc_type, mp_obj_t arg); mp_obj_t mp_obj_new_exception_args(const mp_obj_type_t *exc_type, mp_uint_t n_args, const mp_obj_t *args); mp_obj_t mp_obj_new_exception_msg(const mp_obj_type_t *exc_type, const char *msg); mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char *fmt, ...); // counts args by number of % symbols in fmt, excluding %%; can only handle void* sizes (ie no float/double!) -mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args, mp_obj_t def_kw_args, const byte *code); -mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data); +mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args, mp_obj_t def_kw_args, const byte *code, const mp_uint_t *const_table); +mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data, const mp_uint_t *const_table); mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig); mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data); mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun); diff --git a/py/objfun.c b/py/objfun.c index a54e50d2cdca4..f55d44ca293a6 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -126,11 +126,9 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) { mp_decode_uint(&bc); // skip n_state mp_decode_uint(&bc); // skip n_exc_stack bc++; // skip scope_params - mp_uint_t n_pos_args = *bc++; - mp_uint_t n_kwonly_args = *bc++; + bc++; // skip n_pos_args + bc++; // skip n_kwonly_args bc++; // skip n_def_pos_args - bc = MP_ALIGN(bc, sizeof(mp_uint_t)); // align - bc += (n_pos_args + n_kwonly_args) * sizeof(mp_uint_t); // skip arg names return mp_obj_code_get_name(bc); } @@ -320,7 +318,7 @@ const mp_obj_type_t mp_type_fun_bc = { #endif }; -mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byte *code) { +mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byte *code, const mp_uint_t *const_table) { mp_uint_t n_def_args = 0; mp_uint_t n_extra_args = 0; mp_obj_tuple_t *def_args = def_args_in; @@ -336,6 +334,7 @@ mp_obj_t mp_obj_new_fun_bc(mp_obj_t def_args_in, mp_obj_t def_kw_args, const byt o->base.type = &mp_type_fun_bc; o->globals = mp_globals_get(); o->bytecode = code; + o->const_table = const_table; if (def_args != MP_OBJ_NULL) { memcpy(o->extra_args, def_args->items, n_def_args * sizeof(mp_obj_t)); } @@ -364,8 +363,8 @@ STATIC const mp_obj_type_t mp_type_fun_native = { .unary_op = mp_generic_unary_op, }; -mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) { - mp_obj_fun_bc_t *o = mp_obj_new_fun_bc(def_args_in, def_kw_args, (const byte*)fun_data); +mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data, const mp_uint_t *const_table) { + mp_obj_fun_bc_t *o = mp_obj_new_fun_bc(def_args_in, def_kw_args, (const byte*)fun_data, const_table); o->base.type = &mp_type_fun_native; return o; } diff --git a/py/objfun.h b/py/objfun.h index cdc495e5be9a2..d02fada9b1921 100644 --- a/py/objfun.h +++ b/py/objfun.h @@ -32,6 +32,7 @@ typedef struct _mp_obj_fun_bc_t { mp_obj_base_t base; mp_obj_dict_t *globals; // the context within which this function was defined const byte *bytecode; // bytecode for the function + const mp_uint_t *const_table; // constant table // the following extra_args array is allocated space to take (in order): // - values of positional default args (if any) // - a single slot for default kw args dict (if it has them) diff --git a/py/objgenerator.c b/py/objgenerator.c index f7b637e471977..59ca6b85fc022 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -68,7 +68,6 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t n_kw o->globals = self_fun->globals; o->code_state.n_state = n_state; - o->code_state.code_info = 0; // offset to code-info o->code_state.ip = (byte*)(ip - self_fun->bytecode); // offset to prelude mp_setup_code_state(&o->code_state, self_fun, n_args, n_kw, args); return o; diff --git a/py/showbc.c b/py/showbc.c index 538eddc40fcfa..62c6168b78209 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -54,7 +54,7 @@ const byte *mp_showbc_code_start; -void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len) { +void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) { mp_showbc_code_start = ip; // get bytecode parameters @@ -65,12 +65,6 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len) { mp_uint_t n_kwonly_args = *ip++; /*mp_uint_t n_def_pos_args =*/ ip++; - ip = MP_ALIGN(ip, sizeof(mp_uint_t)); - - // get and skip arg names - const mp_obj_t *arg_names = (const mp_obj_t*)ip; - ip += (n_pos_args + n_kwonly_args) * sizeof(mp_uint_t); - const byte *code_info = ip; mp_uint_t code_info_size = mp_decode_uint(&code_info); ip += code_info_size; @@ -93,7 +87,7 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len) { // bytecode prelude: arg names (as qstr objects) printf("arg names:"); for (mp_uint_t i = 0; i < n_pos_args + n_kwonly_args; i++) { - printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(arg_names[i]))); + printf(" %s", qstr_str(MP_OBJ_QSTR_VALUE(const_table[i]))); } printf("\n"); From 475b674eb22e00d54aaa4b5d5248a88d3eab7669 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 17:27:18 +0000 Subject: [PATCH 04/12] py: Add MICROPY_PORTABLE_CODE, for compiler to produce portable bytecode. Main changes when MICROPY_PORTABLE_CODE is enabled are: - qstrs are encoded as 2-byte fixed width in the bytecode - all pointers are removed from bytecode and put in const_table (this includes const objects and raw code pointers) --- py/emitbc.c | 98 ++++++++++++++++++++++++++++++++++++++++--------- py/emitnative.c | 12 ++++-- py/mpconfig.h | 5 +++ py/objfun.c | 4 ++ py/showbc.c | 21 +++++++++++ py/vm.c | 21 +++++++++++ 6 files changed, 140 insertions(+), 21 deletions(-) diff --git a/py/emitbc.c b/py/emitbc.c index abe782b0d3ef2..1662ea38c3a80 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -56,7 +56,14 @@ struct _emit_t { mp_uint_t bytecode_offset; mp_uint_t bytecode_size; byte *code_base; // stores both byte code and code info + + #if MICROPY_PORTABLE_CODE + uint16_t ct_cur_obj; + uint16_t ct_num_obj; + uint16_t ct_cur_raw_code; + #endif mp_uint_t *const_table; + // Accessed as mp_uint_t, so must be aligned as such byte dummy_data[DUMMY_DATA_SIZE]; }; @@ -108,10 +115,6 @@ STATIC byte *emit_get_cur_to_write_code_info(emit_t *emit, int num_bytes_to_writ } } -STATIC void emit_align_code_info_to_machine_word(emit_t *emit) { - emit->code_info_offset = (emit->code_info_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1)); -} - STATIC void emit_write_code_info_byte(emit_t* emit, byte val) { *emit_get_cur_to_write_code_info(emit, 1) = val; } @@ -121,7 +124,14 @@ STATIC void emit_write_code_info_uint(emit_t* emit, mp_uint_t val) { } STATIC void emit_write_code_info_qstr(emit_t *emit, qstr qst) { + #if MICROPY_PORTABLE_CODE + assert((qst >> 16) == 0); + byte *c = emit_get_cur_to_write_code_info(emit, 2); + c[0] = qst; + c[1] = qst >> 8; + #else emit_write_uint(emit, emit_get_cur_to_write_code_info, qst); + #endif } #if MICROPY_ENABLE_SOURCE_LINE @@ -163,10 +173,6 @@ STATIC byte *emit_get_cur_to_write_bytecode(emit_t *emit, int num_bytes_to_write } } -STATIC void emit_align_bytecode_to_machine_word(emit_t *emit) { - emit->bytecode_offset = (emit->bytecode_offset + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1)); -} - STATIC void emit_write_bytecode_byte(emit_t *emit, byte b1) { byte *c = emit_get_cur_to_write_bytecode(emit, 1); c[0] = b1; @@ -211,18 +217,55 @@ STATIC void emit_write_bytecode_byte_uint(emit_t *emit, byte b, mp_uint_t val) { emit_write_uint(emit, emit_get_cur_to_write_bytecode, val); } -// aligns the pointer so it is friendly to GC +#if MICROPY_PORTABLE_CODE +STATIC void emit_write_bytecode_byte_const(emit_t *emit, byte b, mp_uint_t n, mp_uint_t c) { + if (emit->pass == MP_PASS_EMIT) { + emit->const_table[n] = c; + } + emit_write_bytecode_byte_uint(emit, b, n); +} +#else STATIC void emit_write_bytecode_byte_ptr(emit_t *emit, byte b, void *ptr) { + // aligns the pointer so it is friendly to GC emit_write_bytecode_byte(emit, b); - emit_align_bytecode_to_machine_word(emit); + emit->bytecode_offset = (mp_uint_t)MP_ALIGN(emit->bytecode_offset, sizeof(mp_uint_t)); mp_uint_t *c = (mp_uint_t*)emit_get_cur_to_write_bytecode(emit, sizeof(mp_uint_t)); // Verify thar c is already uint-aligned assert(c == MP_ALIGN(c, sizeof(mp_uint_t))); *c = (mp_uint_t)ptr; } +#endif STATIC void emit_write_bytecode_byte_qstr(emit_t* emit, byte b, qstr qst) { + #if MICROPY_PORTABLE_CODE + assert((qst >> 16) == 0); + byte *c = emit_get_cur_to_write_bytecode(emit, 3); + c[0] = b; + c[1] = qst; + c[2] = qst >> 8; + #else emit_write_bytecode_byte_uint(emit, b, qst); + #endif +} + +STATIC void emit_write_bytecode_byte_obj(emit_t *emit, byte b, void *ptr) { + #if MICROPY_PORTABLE_CODE + emit_write_bytecode_byte_const(emit, b, + emit->scope->num_pos_args + emit->scope->num_kwonly_args + + emit->ct_cur_obj++, (mp_uint_t)ptr); + #else + emit_write_bytecode_byte_ptr(emit, b, ptr); + #endif +} + +STATIC void emit_write_bytecode_byte_raw_code(emit_t *emit, byte b, mp_raw_code_t *rc) { + #if MICROPY_PORTABLE_CODE + emit_write_bytecode_byte_const(emit, b, + emit->scope->num_pos_args + emit->scope->num_kwonly_args + + emit->ct_num_obj + emit->ct_cur_raw_code++, (mp_uint_t)rc); + #else + emit_write_bytecode_byte_ptr(emit, b, rc); + #endif } // unsigned labels are relative to ip following this instruction, stored as 16 bits @@ -318,6 +361,11 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { } emit_write_bytecode_byte(emit, 255); // end of list sentinel + #if MICROPY_PORTABLE_CODE + emit->ct_cur_obj = 0; + emit->ct_cur_raw_code = 0; + #endif + if (pass == MP_PASS_EMIT) { // Write argument names (needed to resolve positional args passed as // keywords). We store them as full word-sized objects for efficient access @@ -360,16 +408,30 @@ void mp_emit_bc_end_pass(emit_t *emit) { emit_write_code_info_byte(emit, 0); // end of line number info + #if MICROPY_PORTABLE_CODE + assert(emit->pass <= MP_PASS_STACK_SIZE || (emit->ct_num_obj == emit->ct_cur_obj)); + emit->ct_num_obj = emit->ct_cur_obj; + #endif + if (emit->pass == MP_PASS_CODE_SIZE) { + #if !MICROPY_PORTABLE_CODE // so bytecode is aligned - emit_align_code_info_to_machine_word(emit); + emit->code_info_offset = (mp_uint_t)MP_ALIGN(emit->code_info_offset, sizeof(mp_uint_t)); + #endif // calculate size of total code-info + bytecode, in bytes emit->code_info_size = emit->code_info_offset; emit->bytecode_size = emit->bytecode_offset; emit->code_base = m_new0(byte, emit->code_info_size + emit->bytecode_size); - emit->const_table = m_new0(mp_uint_t, emit->scope->num_pos_args + emit->scope->num_kwonly_args); + #if MICROPY_PORTABLE_CODE + emit->const_table = m_new0(mp_uint_t, + emit->scope->num_pos_args + emit->scope->num_kwonly_args + + emit->ct_cur_obj + emit->ct_cur_raw_code); + #else + emit->const_table = m_new0(mp_uint_t, + emit->scope->num_pos_args + emit->scope->num_kwonly_args); + #endif } else if (emit->pass == MP_PASS_EMIT) { mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, @@ -457,7 +519,7 @@ void mp_emit_bc_load_const_tok(emit_t *emit, mp_token_kind_t tok) { case MP_TOKEN_KW_NONE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_NONE); break; case MP_TOKEN_KW_TRUE: emit_write_bytecode_byte(emit, MP_BC_LOAD_CONST_TRUE); break; no_other_choice: - case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break; + case MP_TOKEN_ELLIPSIS: emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, (void*)&mp_const_ellipsis_obj); break; default: assert(0); goto no_other_choice; // to help flow control analysis } } @@ -478,7 +540,7 @@ void mp_emit_bc_load_const_str(emit_t *emit, qstr qst) { void mp_emit_bc_load_const_obj(emit_t *emit, void *obj) { emit_bc_pre(emit, 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_LOAD_CONST_OBJ, obj); + emit_write_bytecode_byte_obj(emit, MP_BC_LOAD_CONST_OBJ, obj); } void mp_emit_bc_load_null(emit_t *emit) { @@ -821,22 +883,22 @@ void mp_emit_bc_unpack_ex(emit_t *emit, mp_uint_t n_left, mp_uint_t n_right) { void mp_emit_bc_make_function(emit_t *emit, scope_t *scope, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) { if (n_pos_defaults == 0 && n_kw_defaults == 0) { emit_bc_pre(emit, 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION, scope->raw_code); } else { emit_bc_pre(emit, -1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_FUNCTION_DEFARGS, scope->raw_code); } } void mp_emit_bc_make_closure(emit_t *emit, scope_t *scope, mp_uint_t n_closed_over, mp_uint_t n_pos_defaults, mp_uint_t n_kw_defaults) { if (n_pos_defaults == 0 && n_kw_defaults == 0) { emit_bc_pre(emit, -n_closed_over + 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE, scope->raw_code); emit_write_bytecode_byte(emit, n_closed_over); } else { assert(n_closed_over <= 255); emit_bc_pre(emit, -2 - n_closed_over + 1); - emit_write_bytecode_byte_ptr(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code); + emit_write_bytecode_byte_raw_code(emit, MP_BC_MAKE_CLOSURE_DEFARGS, scope->raw_code); emit_write_bytecode_byte(emit, n_closed_over); } } diff --git a/py/emitnative.c b/py/emitnative.c index d8f1640c0f4bb..3dd1013b427fa 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -830,10 +830,16 @@ STATIC void emit_native_end_pass(emit_t *emit) { ASM_DATA(emit->as, 1, emit->scope->num_kwonly_args); ASM_DATA(emit->as, 1, emit->scope->num_def_pos_args); - // write code info (just contains block name and source file) + // write code info + #if MICROPY_PORTABLE_CODE ASM_DATA(emit->as, 1, 5); - ASM_DATA(emit->as, 2, emit->scope->simple_name); - ASM_DATA(emit->as, 2, emit->scope->source_file); + ASM_DATA(emit->as, 1, emit->scope->simple_name); + ASM_DATA(emit->as, 1, emit->scope->simple_name >> 8); + ASM_DATA(emit->as, 1, emit->scope->source_file); + ASM_DATA(emit->as, 1, emit->scope->source_file >> 8); + #else + ASM_DATA(emit->as, 1, 1); + #endif // bytecode prelude: initialise closed over variables for (int i = 0; i < emit->scope->id_info_len; i++) { diff --git a/py/mpconfig.h b/py/mpconfig.h index 98e9406c6bb1d..588bceb901202 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -189,6 +189,11 @@ /*****************************************************************************/ /* Micro Python emitters */ +// Whether generated bytecode is portable from one VM/runtime to another +#ifndef MICROPY_PORTABLE_CODE +#define MICROPY_PORTABLE_CODE (0) +#endif + // Whether to emit x64 native code #ifndef MICROPY_EMIT_X64 #define MICROPY_EMIT_X64 (0) diff --git a/py/objfun.c b/py/objfun.c index f55d44ca293a6..14c41eeccf773 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -106,7 +106,11 @@ const mp_obj_type_t mp_type_fun_builtin = { qstr mp_obj_code_get_name(const byte *code_info) { mp_decode_uint(&code_info); // skip code_info_size entry + #if MICROPY_PORTABLE_CODE + return code_info[0] | (code_info[1] << 8); + #else return mp_decode_uint(&code_info); + #endif } #if MICROPY_EMIT_NATIVE diff --git a/py/showbc.c b/py/showbc.c index 62c6168b78209..6ed3e4aa3f207 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -40,6 +40,18 @@ } #define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0) #define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0) + +#if MICROPY_PORTABLE_CODE + +#define DECODE_QSTR \ + qst = ip[0] | ip[1] << 8; \ + ip += 2; +#define DECODE_PTR \ + DECODE_UINT; \ + unum = mp_showbc_const_table[unum] + +#else + #define DECODE_QSTR { \ qst = 0; \ do { \ @@ -52,10 +64,14 @@ ip += sizeof(mp_uint_t); \ } while (0) +#endif + const byte *mp_showbc_code_start; +const mp_uint_t *mp_showbc_const_table; void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const mp_uint_t *const_table) { mp_showbc_code_start = ip; + mp_showbc_const_table = const_table; // get bytecode parameters mp_uint_t n_state = mp_decode_uint(&ip); @@ -69,8 +85,13 @@ void mp_bytecode_print(const void *descr, const byte *ip, mp_uint_t len, const m mp_uint_t code_info_size = mp_decode_uint(&code_info); ip += code_info_size; + #if MICROPY_PORTABLE_CODE + qstr block_name = code_info[0] | (code_info[1] << 8); + qstr source_file = code_info[2] | (code_info[3] << 8); + #else qstr block_name = mp_decode_uint(&code_info); qstr source_file = mp_decode_uint(&code_info); + #endif printf("File %s, code block '%s' (descriptor: %p, bytecode @%p " UINT_FMT " bytes)\n", qstr_str(source_file), qstr_str(block_name), descr, mp_showbc_code_start, len); diff --git a/py/vm.c b/py/vm.c index 393b8a1db7de8..83561eb9f8a14 100644 --- a/py/vm.c +++ b/py/vm.c @@ -65,6 +65,18 @@ typedef enum { } while ((*ip++ & 0x80) != 0) #define DECODE_ULABEL mp_uint_t ulab = (ip[0] | (ip[1] << 8)); ip += 2 #define DECODE_SLABEL mp_uint_t slab = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2 + +#if MICROPY_PORTABLE_CODE + +#define DECODE_QSTR \ + qstr qst = ip[0] | ip[1] << 8; \ + ip += 2; +#define DECODE_PTR \ + DECODE_UINT; \ + void *ptr = (void*)code_state->const_table[unum] + +#else + #define DECODE_QSTR qstr qst = 0; \ do { \ qst = (qst << 7) + (*ip & 0x7f); \ @@ -73,6 +85,9 @@ typedef enum { ip = (byte*)(((mp_uint_t)ip + sizeof(mp_uint_t) - 1) & (~(sizeof(mp_uint_t) - 1))); /* align ip */ \ void *ptr = (void*)*(mp_uint_t*)ip; \ ip += sizeof(mp_uint_t) + +#endif + #define PUSH(val) *++sp = (val) #define POP() (*sp--) #define TOP() (*sp) @@ -1280,8 +1295,14 @@ unwind_jump:; if (mp_obj_is_exception_instance(nlr.ret_val) && nlr.ret_val != &mp_const_GeneratorExit_obj && nlr.ret_val != &mp_const_MemoryError_obj) { const byte *ip = code_state->code_info; mp_uint_t code_info_size = mp_decode_uint(&ip); + #if MICROPY_PORTABLE_CODE + qstr block_name = ip[0] | (ip[1] << 8); + qstr source_file = ip[2] | (ip[3] << 8); + ip += 4; + #else qstr block_name = mp_decode_uint(&ip); qstr source_file = mp_decode_uint(&ip); + #endif mp_uint_t bc = code_state->ip - code_state->code_info - code_info_size; mp_uint_t source_line = 1; mp_uint_t c; From 36b31d2e691fa0be853991336b2a0042e653a039 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 21:55:42 +0000 Subject: [PATCH 05/12] py: Add MICROPY_PORTABLE_CODE_SAVE, for ability to load/save bytecode. With MICROPY_PORTABLE_CODE, bytecode can be read from a .mpc file and executed. With MICROPY_PORTABLE_CODE_SAVE enabled as well, bytecode can be saved to a .mpc file. --- py/emitbc.c | 6 +- py/emitglue.c | 517 +++++++++++++++++++++++++++++++++++++++++++++++++- py/emitglue.h | 26 ++- py/mpconfig.h | 5 + 4 files changed, 548 insertions(+), 6 deletions(-) diff --git a/py/emitbc.c b/py/emitbc.c index 1662ea38c3a80..5bd3c1589e7d2 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -436,7 +436,11 @@ void mp_emit_bc_end_pass(emit_t *emit) { } else if (emit->pass == MP_PASS_EMIT) { mp_emit_glue_assign_bytecode(emit->scope->raw_code, emit->code_base, emit->code_info_size + emit->bytecode_size, - emit->const_table, emit->scope->scope_flags); + emit->const_table, + #if MICROPY_PORTABLE_CODE_SAVE + emit->ct_cur_obj, emit->ct_cur_raw_code, + #endif + emit->scope->scope_flags); } } diff --git a/py/emitglue.c b/py/emitglue.c index feed5d99a282a..a011ad16937ca 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -50,8 +50,13 @@ struct _mp_raw_code_t { mp_uint_t n_pos_args : 11; union { struct { - const byte *code; + const byte *bytecode; const mp_uint_t *const_table; + #if MICROPY_PORTABLE_CODE_SAVE + mp_uint_t bc_len; + uint16_t n_obj; + uint16_t n_raw_code; + #endif } u_byte; struct { void *fun_data; @@ -67,11 +72,22 @@ mp_raw_code_t *mp_emit_glue_new_raw_code(void) { return rc; } -void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, const byte *code, mp_uint_t len, const mp_uint_t *const_table, mp_uint_t scope_flags) { +void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, const byte *code, mp_uint_t len, + const mp_uint_t *const_table, + #if MICROPY_PORTABLE_CODE_SAVE + uint16_t n_obj, uint16_t n_raw_code, + #endif + mp_uint_t scope_flags) { + rc->kind = MP_CODE_BYTECODE; rc->scope_flags = scope_flags; - rc->data.u_byte.code = code; + rc->data.u_byte.bytecode = code; rc->data.u_byte.const_table = const_table; + #if MICROPY_PORTABLE_CODE_SAVE + rc->data.u_byte.bc_len = len; + rc->data.u_byte.n_obj = n_obj; + rc->data.u_byte.n_raw_code = n_raw_code; + #endif #ifdef DEBUG_PRINT DEBUG_printf("assign byte code: code=%p len=" UINT_FMT " flags=%x\n", code, len, (uint)scope_flags); @@ -129,7 +145,7 @@ mp_obj_t mp_make_function_from_raw_code(mp_raw_code_t *rc, mp_obj_t def_args, mp switch (rc->kind) { case MP_CODE_BYTECODE: no_other_choice: - fun = mp_obj_new_fun_bc(def_args, def_kw_args, rc->data.u_byte.code, rc->data.u_byte.const_table); + fun = mp_obj_new_fun_bc(def_args, def_kw_args, rc->data.u_byte.bytecode, rc->data.u_byte.const_table); break; #if MICROPY_EMIT_NATIVE case MP_CODE_NATIVE_PY: @@ -172,3 +188,496 @@ mp_obj_t mp_make_closure_from_raw_code(mp_raw_code_t *rc, mp_uint_t n_closed_ove // wrap function in closure object return mp_obj_new_closure(ffun, n_closed_over & 0xff, args + ((n_closed_over >> 7) & 2)); } + +#if MICROPY_PORTABLE_CODE + +#include "py/bc0.h" + +// The following table encodes the number of bytes that a specific opcode +// takes up. There are 3 special opcodes that have an extra byte: +// MP_BC_MAKE_CLOSURE +// MP_BC_MAKE_CLOSURE_DEFARGS +// MP_BC_RAISE_VARARGS +#define OC4(a, b, c, d) (a | (b << 2) | (c << 4) | (d << 6)) +#define U (0) // undefined opcode +#define B (0) // single byte +#define Q (1) // single byte plus 2-byte qstr +#define V (2) // single byte plus variable encoded unsigned int +#define O (3) // single byte plus 2-byte bytecode offset +STATIC const byte opcode_format[64] = { + OC4(U, U, U, U), // 0x00-0x03 + OC4(U, U, U, U), // 0x04-0x07 + OC4(U, U, U, U), // 0x08-0x0b + OC4(U, U, U, U), // 0x0c-0x0f + OC4(B, B, B, U), // 0x10-0x13 + OC4(V, U, Q, V), // 0x14-0x17 + OC4(B, U, V, V), // 0x18-0x1b + OC4(Q, Q, Q, Q), // 0x1c-0x1f + OC4(B, B, V, V), // 0x20-0x23 + OC4(Q, Q, Q, B), // 0x24-0x27 + OC4(V, V, Q, Q), // 0x28-0x2b + OC4(U, U, U, U), // 0x2c-0x2f + OC4(B, B, B, B), // 0x30-0x33 + OC4(B, O, O, O), // 0x34-0x37 + OC4(O, O, U, U), // 0x38-0x3b + OC4(U, O, B, O), // 0x3c-0x3f + OC4(O, B, B, O), // 0x40-0x43 + OC4(B, B, O, B), // 0x44-0x47 + OC4(U, U, U, U), // 0x48-0x4b + OC4(U, U, U, U), // 0x4c-0x4f + OC4(V, V, V, V), // 0x50-0x53 + OC4(B, V, V, V), // 0x54-0x57 + OC4(V, V, V, B), // 0x58-0x5b + OC4(B, B, B, U), // 0x5c-0x5f + OC4(V, V, V, V), // 0x60-0x63 + OC4(V, V, V, V), // 0x64-0x67 + OC4(Q, Q, B, U), // 0x68-0x6b + OC4(U, U, U, U), // 0x6c-0x6f + + OC4(B, B, B, B), // 0x70-0x73 + OC4(B, B, B, B), // 0x74-0x77 + OC4(B, B, B, B), // 0x78-0x7b + OC4(B, B, B, B), // 0x7c-0x7f + OC4(B, B, B, B), // 0x80-0x83 + OC4(B, B, B, B), // 0x84-0x87 + OC4(B, B, B, B), // 0x88-0x8b + OC4(B, B, B, B), // 0x8c-0x8f + OC4(B, B, B, B), // 0x90-0x93 + OC4(B, B, B, B), // 0x94-0x97 + OC4(B, B, B, B), // 0x98-0x9b + OC4(B, B, B, B), // 0x9c-0x9f + OC4(B, B, B, B), // 0xa0-0xa3 + OC4(B, B, B, B), // 0xa4-0xa7 + OC4(B, B, B, B), // 0xa8-0xab + OC4(B, B, B, B), // 0xac-0xaf + + OC4(B, B, B, B), // 0xb0-0xb3 + OC4(B, B, B, B), // 0xb4-0xb7 + OC4(B, B, B, B), // 0xb8-0xbb + OC4(B, B, B, B), // 0xbc-0xbf + + OC4(B, B, B, B), // 0xc0-0xc3 + OC4(B, B, B, B), // 0xc4-0xc7 + OC4(B, B, B, B), // 0xc8-0xcb + OC4(B, B, B, B), // 0xcc-0xcf + + OC4(B, B, B, B), // 0xd0-0xd3 + OC4(B, B, B, B), // 0xd4-0xd7 + OC4(B, B, B, B), // 0xd8-0xdb + OC4(B, B, B, B), // 0xdc-0xdf + + OC4(B, B, B, B), // 0xe0-0xe3 + OC4(B, B, B, B), // 0xe4-0xe7 + OC4(B, B, B, B), // 0xe8-0xeb + OC4(B, B, B, B), // 0xec-0xef + + OC4(B, B, B, B), // 0xf0-0xf3 + OC4(B, B, B, B), // 0xf4-0xf7 + OC4(B, B, U, U), // 0xf8-0xfb + OC4(U, U, U, U), // 0xfc-0xff +}; +#undef OC4 +#undef U +#undef B +#undef Q +#undef V +#undef O + +STATIC void read_bytes(mp_reader_t *reader, byte *buf, size_t len) { + while (len-- > 0) { + *buf++ = reader->read_byte(reader->data); + } +} + +STATIC mp_uint_t read_uint(mp_reader_t *reader) { + mp_uint_t unum = 0; + for (;;) { + byte b = reader->read_byte(reader->data); + unum = (unum << 7) | (b & 0x7f); + if ((b & 0x80) == 0) { + break; + } + } + return unum; +} + +STATIC qstr load_qstr(mp_reader_t *reader) { + mp_uint_t len = read_uint(reader); + char *str = m_new(char, len); + read_bytes(reader, (byte*)str, len); + qstr qst = qstr_from_strn(str, len); + m_del(char, str, len); + return qst; +} + +STATIC mp_obj_t load_obj(mp_reader_t *reader) { + assert(0); + return MP_OBJ_NULL; +} + +STATIC void load_bytecode_qstrs(mp_reader_t *reader, byte *ip, byte *ip_top) { + while (ip < ip_top) { + int f = (opcode_format[*ip >> 2] >> (2 * (*ip & 3))) & 3; + if (f == 1) { + // qstr + qstr qst = load_qstr(reader); + ip[1] = qst; + ip[2] = qst >> 8; + ip += 3; + } else { + int extra_byte = (*ip == MP_BC_RAISE_VARARGS + || *ip == MP_BC_MAKE_CLOSURE + || *ip == MP_BC_MAKE_CLOSURE_DEFARGS); + ip += 1; + if (f == 2) { + // var-uint + while ((*ip++ & 0x80) != 0) { + } + } else if (f == 3) { + // bc offset + ip += 2; + } + ip += extra_byte; + } + } +} + +typedef struct _bytecode_prelude_t { + uint n_state; + uint n_exc_stack; + uint scope_flags; + uint n_pos_args; + uint n_kwonly_args; + uint n_def_pos_args; + uint code_info_size; +} bytecode_prelude_t; + +// ip will point to start of opcodes +// ip2 will point to simple_name, source_file qstrs +STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_t *prelude) { + prelude->n_state = mp_decode_uint(ip); + prelude->n_exc_stack = mp_decode_uint(ip); + prelude->scope_flags = *(*ip)++; + prelude->n_pos_args = *(*ip)++; + prelude->n_kwonly_args = *(*ip)++; + prelude->n_def_pos_args = *(*ip)++; + *ip2 = *ip; + prelude->code_info_size = mp_decode_uint(ip2); + *ip += prelude->code_info_size; + while (*(*ip)++ != 255) { + } +} + +STATIC mp_raw_code_t *load_raw_code(mp_reader_t *reader) { + // load bytecode + mp_uint_t bc_len = read_uint(reader); + byte *bytecode = m_new(byte, bc_len); + read_bytes(reader, bytecode, bc_len); + + // extract prelude + const byte *ip = bytecode; + const byte *ip2; + bytecode_prelude_t prelude; + extract_prelude(&ip, &ip2, &prelude); + + // load qstrs and link global qstr ids into bytecode + qstr simple_name = load_qstr(reader); + qstr source_file = load_qstr(reader); + ((byte*)ip2)[0] = simple_name; ((byte*)ip2)[1] = simple_name >> 8; + ((byte*)ip2)[2] = source_file; ((byte*)ip2)[3] = source_file >> 8; + load_bytecode_qstrs(reader, (byte*)ip, bytecode + bc_len); + + // load constant table + mp_uint_t n_obj = read_uint(reader); + mp_uint_t n_raw_code = read_uint(reader); + mp_uint_t *const_table = m_new(mp_uint_t, prelude.n_pos_args + prelude.n_kwonly_args + n_obj + n_raw_code); + mp_uint_t *ct = const_table; + for (mp_uint_t i = 0; i < prelude.n_pos_args + prelude.n_kwonly_args; ++i) { + *ct++ = (mp_uint_t)MP_OBJ_NEW_QSTR(load_qstr(reader)); + } + for (mp_uint_t i = 0; i < n_obj; ++i) { + *ct++ = (mp_uint_t)load_obj(reader); + } + for (mp_uint_t i = 0; i < n_raw_code; ++i) { + *ct++ = (mp_uint_t)load_raw_code(reader); + } + + // create raw_code and return it + mp_raw_code_t *rc = mp_emit_glue_new_raw_code(); + mp_emit_glue_assign_bytecode(rc, bytecode, bc_len, const_table, + #if MICROPY_PORTABLE_CODE_SAVE + n_obj, n_raw_code, + #endif + prelude.scope_flags); + return rc; +} + +mp_raw_code_t *mp_raw_code_load(mp_reader_t *reader) { + byte header[6]; + read_bytes(reader, header, 6); + if (strncmp((char*)header, "MPC001", 6) != 0) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, + "invalid bytecode file")); + } + return load_raw_code(reader); +} + +// here we define mp_raw_code_load_file depending on the port +// TODO abstract this away properly + +#if defined(__i386__) || defined(__x86_64__) +// unix file reader + +#include +#include + +typedef struct _mp_lexer_file_buf_t { + int fd; + byte buf[20]; + mp_uint_t len; + mp_uint_t pos; +} mp_lexer_file_buf_t; + +STATIC mp_uint_t file_buf_next_byte(void *fb_in) { + mp_lexer_file_buf_t *fb = fb_in; + if (fb->pos >= fb->len) { + if (fb->len == 0) { + return (mp_uint_t)-1; + } else { + int n = read(fb->fd, fb->buf, sizeof(fb->buf)); + if (n <= 0) { + fb->len = 0; + return (mp_uint_t)-1; + } + fb->len = n; + fb->pos = 0; + } + } + return fb->buf[fb->pos++]; +} + +STATIC const byte* file_buf_get_ptr(void *fb, size_t len) { + (void)fb; + (void)len; + return NULL; +} + +mp_raw_code_t *mp_raw_code_load_file(const char *filename) { + mp_lexer_file_buf_t fb; + fb.fd = open(filename, O_RDONLY, 0644); + int n = read(fb.fd, fb.buf, sizeof(fb.buf)); + fb.len = n; + fb.pos = 0; + mp_reader_t reader; + reader.data = &fb; + reader.read_byte = file_buf_next_byte; + reader.get_ptr = file_buf_get_ptr; + mp_raw_code_t *rc = mp_raw_code_load(&reader); + close(fb.fd); + return rc; +} + +#else +// fatfs file reader + +#include "lib/fatfs/ff.h" + +typedef struct _mp_lexer_file_buf_t { + FIL fp; + byte buf[20]; + uint16_t len; + uint16_t pos; +} mp_lexer_file_buf_t; + +STATIC mp_uint_t file_buf_next_byte(void *fb_in) { + mp_lexer_file_buf_t *fb = fb_in; + if (fb->pos >= fb->len) { + if (fb->len < sizeof(fb->buf)) { + return (mp_uint_t)-1; + } else { + UINT n; + f_read(&fb->fp, fb->buf, sizeof(fb->buf), &n); + if (n == 0) { + return (mp_uint_t)-1; + } + fb->len = n; + fb->pos = 0; + } + } + return fb->buf[fb->pos++]; +} + +STATIC const byte* file_buf_get_ptr(void *fb, size_t len) { + (void)fb; + (void)len; + return NULL; +} + +mp_raw_code_t *mp_raw_code_load_file(const char *filename) { + mp_lexer_file_buf_t fb; + /*FRESULT res =*/ f_open(&fb.fp, filename, FA_READ); + UINT n; + f_read(&fb.fp, fb.buf, sizeof(fb.buf), &n); + fb.len = n; + fb.pos = 0; + + mp_reader_t reader; + reader.data = &fb; + reader.read_byte = file_buf_next_byte; + reader.get_ptr = file_buf_get_ptr; + mp_raw_code_t *rc = mp_raw_code_load(&reader); + + f_close(&fb.fp); + + return rc; +} + +#endif + +#endif // MICROPY_PORTABLE_CODE + +#if MICROPY_PORTABLE_CODE_SAVE +STATIC void mp_print_bytes(mp_print_t *print, const byte *data, size_t len) { + print->print_strn(print->data, (const char*)data, len); +} + +#define BYTES_FOR_INT ((BYTES_PER_WORD * 8 + 6) / 7) +STATIC void mp_print_uint(mp_print_t *print, mp_uint_t n) { + byte buf[BYTES_FOR_INT]; + byte *p = buf + sizeof(buf); + for (;;) { + *--p = n & 0x7f; + n >>= 7; + if (n == 0) { + break; + } + *p |= 0x80; + } + print->print_strn(print->data, (char*)p, buf + sizeof(buf) - p); +} + +STATIC void save_qstr(mp_print_t *print, qstr qst) { + mp_uint_t len; + const byte *str = qstr_data(qst, &len); + mp_print_uint(print, len); + mp_print_bytes(print, str, len); +} + +STATIC void save_obj(mp_print_t *print, mp_obj_t o) { + if (MP_OBJ_IS_STR(o)) { + byte buf[] = {'s'}; + mp_print_bytes(print, buf, 1); + mp_uint_t len; + const char *str = mp_obj_str_get_data(o, &len); + mp_print_uint(print, len); + mp_print_bytes(print, (const byte*)str, len); + } else if (MP_OBJ_IS_TYPE(o, &mp_type_bytes)) { + byte buf[] = {'b'}; + mp_print_bytes(print, buf, 1); + mp_uint_t len; + const char *str = mp_obj_str_get_data(o, &len); + mp_print_uint(print, len); + mp_print_bytes(print, (const byte*)str, len); + } else if (MP_OBJ_IS_TYPE(o, &mp_type_int)) { + byte buf[] = {'i'}; + mp_print_bytes(print, buf, 1); + // TODO + } else if (MP_OBJ_IS_TYPE(o, &mp_type_float)) { + byte buf[] = {'f'}; + mp_print_bytes(print, buf, 1); + // TODO + } else if (MP_OBJ_IS_TYPE(o, &mp_type_complex)) { + byte buf[] = {'c'}; + mp_print_bytes(print, buf, 1); + // TODO + } else if (o == &mp_const_ellipsis_obj) { + byte buf[] = {'e'}; + mp_print_bytes(print, buf, 1); + } else { + mp_obj_print(o, PRINT_STR); + assert(0); + } +} + +STATIC void save_bytecode_qstrs(mp_print_t *print, const byte *ip, const byte *ip_top) { + while (ip < ip_top) { + int f = (opcode_format[*ip >> 2] >> (2 * (*ip & 3))) & 3; + if (f == 1) { + // qstr opcode + qstr qst = ip[1] | (ip[2] << 8); + save_qstr(print, qst); + ip += 3; + } else { + // non-qstr opcode + int extra_byte = (ip[0] == MP_BC_RAISE_VARARGS + || ip[0] == MP_BC_MAKE_CLOSURE + || ip[0] == MP_BC_MAKE_CLOSURE_DEFARGS); + ip += 1; + if (f == 2) { + // var-uint + while ((*ip++ & 0x80) != 0) { + } + } else if (f == 3) { + // bc offset + ip += 2; + } + ip += extra_byte; + } + } +} + +void save_raw_code(mp_print_t *print, mp_raw_code_t *rc) { + if (rc->kind != MP_CODE_BYTECODE) { + nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, + "can only save bytecode")); + } + + // save bytecode + mp_print_uint(print, rc->data.u_byte.bc_len); + mp_print_bytes(print, rc->data.u_byte.bytecode, rc->data.u_byte.bc_len); + + // extract prelude + const byte *ip = rc->data.u_byte.bytecode; + const byte *ip2; + bytecode_prelude_t prelude; + extract_prelude(&ip, &ip2, &prelude); + + // save qstrs + save_qstr(print, ip2[0] | (ip2[1] << 8)); // simple_name + save_qstr(print, ip2[2] | (ip2[3] << 8)); // source_file + save_bytecode_qstrs(print, ip, rc->data.u_byte.bytecode + rc->data.u_byte.bc_len); + + // save constant table + mp_print_uint(print, rc->data.u_byte.n_obj); + mp_print_uint(print, rc->data.u_byte.n_raw_code); + const mp_uint_t *const_table = rc->data.u_byte.const_table; + for (uint i = 0; i < prelude.n_pos_args + prelude.n_kwonly_args; ++i) { + mp_obj_t o = (mp_obj_t)*const_table++; + save_qstr(print, MP_OBJ_QSTR_VALUE(o)); + } + for (uint i = 0; i < rc->data.u_byte.n_obj; ++i) { + save_obj(print, (mp_obj_t)*const_table++); + } + for (uint i = 0; i < rc->data.u_byte.n_raw_code; ++i) { + save_raw_code(print, (mp_raw_code_t*)*const_table++); + } +} + +void mp_raw_code_save(mp_raw_code_t *rc, mp_print_t *print) { + mp_print_bytes(print, (const byte*)"MPC001", 6); + save_raw_code(print, rc); +} + +STATIC void fd_print_strn(void *env, const char *str, mp_uint_t len) { + int fd = (mp_int_t)env; + write(fd, str, len); +} + +void mp_raw_code_save_file(mp_raw_code_t *rc, const char *filename) { + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); + mp_print_t fd_print = {(void*)(mp_int_t)fd, fd_print_strn}; + mp_raw_code_save(rc, &fd_print); + close(fd); +} + +#endif diff --git a/py/emitglue.h b/py/emitglue.h index 9bb2ba2d74f97..609c72b9ae6cd 100644 --- a/py/emitglue.h +++ b/py/emitglue.h @@ -43,10 +43,34 @@ typedef struct _mp_raw_code_t mp_raw_code_t; mp_raw_code_t *mp_emit_glue_new_raw_code(void); -void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, const byte *code, mp_uint_t len, const mp_uint_t *const_table, mp_uint_t scope_flags); +void mp_emit_glue_assign_bytecode(mp_raw_code_t *rc, const byte *code, mp_uint_t len, + const mp_uint_t *const_table, + #if MICROPY_PORTABLE_CODE_SAVE + uint16_t n_obj, uint16_t n_raw_code, + #endif + mp_uint_t scope_flags); void mp_emit_glue_assign_native(mp_raw_code_t *rc, mp_raw_code_kind_t kind, void *fun_data, mp_uint_t fun_len, const mp_uint_t *const_table, mp_uint_t n_pos_args, mp_uint_t scope_flags, mp_uint_t type_sig); mp_obj_t mp_make_function_from_raw_code(mp_raw_code_t *rc, mp_obj_t def_args, mp_obj_t def_kw_args); mp_obj_t mp_make_closure_from_raw_code(mp_raw_code_t *rc, mp_uint_t n_closed_over, const mp_obj_t *args); +#if MICROPY_PORTABLE_CODE +typedef mp_uint_t (*mp_reader_byte_t)(void *data); +typedef const byte *(*mp_reader_ptr_t)(void *data, size_t len); + +typedef struct _mp_reader_t { + void *data; + mp_reader_byte_t read_byte; + mp_reader_ptr_t get_ptr; +} mp_reader_t; + +mp_raw_code_t *mp_raw_code_load(mp_reader_t *reader); +mp_raw_code_t *mp_raw_code_load_file(const char *filename); +#endif + +#if MICROPY_PORTABLE_CODE_SAVE +void mp_raw_code_save(mp_raw_code_t *rc, mp_print_t *print); +void mp_raw_code_save_file(mp_raw_code_t *rc, const char *filename); +#endif + #endif // __MICROPY_INCLUDED_PY_EMITGLUE_H__ diff --git a/py/mpconfig.h b/py/mpconfig.h index 588bceb901202..77fea1dcebc93 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -194,6 +194,11 @@ #define MICROPY_PORTABLE_CODE (0) #endif +// Whether to support saving of portable code +#ifndef MICROPY_PORTABLE_CODE_SAVE +#define MICROPY_PORTABLE_CODE_SAVE (0) +#endif + // Whether to emit x64 native code #ifndef MICROPY_EMIT_X64 #define MICROPY_EMIT_X64 (0) From c074af4425a720cd76aa75c1bc8a5ba2c58c56b4 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 21:57:08 +0000 Subject: [PATCH 06/12] py: Automatically save bytecode after compiling it. --- py/compile.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/py/compile.c b/py/compile.c index e0561e6c322f0..6c9cb0e778b83 100644 --- a/py/compile.c +++ b/py/compile.c @@ -3265,6 +3265,17 @@ mp_obj_t mp_compile(mp_parse_tree_t *parse_tree, qstr source_file, uint emit_opt nlr_raise(comp->compile_error); } else { // return function that executes the outer module + #if MICROPY_PORTABLE_CODE_SAVE + if (!is_repl) { + vstr_t vstr; + vstr_init(&vstr, 16); + vstr_add_str(&vstr, qstr_str(source_file)); + vstr_cut_tail_bytes(&vstr, 2); + vstr_add_str(&vstr, "mpc"); + mp_raw_code_save_file(outer_raw_code, vstr_null_terminated_str(&vstr)); + vstr_clear(&vstr); + } + #endif return mp_make_function_from_raw_code(outer_raw_code, MP_OBJ_NULL, MP_OBJ_NULL); } } From 522e4f86a7b5a27b1c028aabca1b7ed47872d9ac Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 21:57:42 +0000 Subject: [PATCH 07/12] py: Allow to import compiled bytecode files. --- py/builtinimport.c | 59 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/py/builtinimport.c b/py/builtinimport.c index d5a7d56defc72..4248a012f2559 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -66,6 +66,16 @@ STATIC mp_import_stat_t stat_dir_or_file(vstr_t *path) { if (stat == MP_IMPORT_STAT_DIR) { return stat; } + + #if MICROPY_PORTABLE_CODE + vstr_add_str(path, ".mpc"); + stat = mp_import_stat(vstr_null_terminated_str(path)); + if (stat == MP_IMPORT_STAT_FILE) { + return stat; + } + vstr_cut_tail_bytes(path, 4); + #endif + vstr_add_str(path, ".py"); stat = mp_import_stat(vstr_null_terminated_str(path)); if (stat == MP_IMPORT_STAT_FILE) { @@ -132,11 +142,56 @@ STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex, const char mp_parse_compile_execute(lex, MP_PARSE_FILE_INPUT, mod_globals, mod_globals); } +#if MICROPY_PORTABLE_CODE +STATIC void do_load_from_mpc(mp_obj_t module_obj, const char *fname) { + #if MICROPY_PY___FILE__ + //TODO + //qstr source_name = lex->source_name; + //mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name)); + #endif + + // execute the module in its context + mp_obj_dict_t *mod_globals = mp_obj_module_get_globals(module_obj); + + // save context + mp_obj_dict_t *volatile old_globals = mp_globals_get(); + mp_obj_dict_t *volatile old_locals = mp_locals_get(); + + // set new context + mp_globals_set(mod_globals); + mp_locals_set(mod_globals); + + nlr_buf_t nlr; + if (nlr_push(&nlr) == 0) { + mp_raw_code_t *outer_raw_code = mp_raw_code_load_file(fname); + mp_obj_t module_fun = mp_make_function_from_raw_code(outer_raw_code, MP_OBJ_NULL, MP_OBJ_NULL); + mp_call_function_0(module_fun); + + // finish nlr block, restore context + nlr_pop(); + mp_globals_set(old_globals); + mp_locals_set(old_locals); + } else { + // exception; restore context and re-raise same exception + mp_globals_set(old_globals); + mp_locals_set(old_locals); + nlr_raise(nlr.ret_val); + } +} +#endif + STATIC void do_load(mp_obj_t module_obj, vstr_t *file) { // create the lexer char *file_str = vstr_null_terminated_str(file); - mp_lexer_t *lex = mp_lexer_new_from_file(file_str); - do_load_from_lexer(module_obj, lex, file_str); + #if MICROPY_PORTABLE_CODE + if (file_str[file->len - 1] == 'c') { + do_load_from_mpc(module_obj, file_str); + } else + #endif + { + mp_lexer_t *lex = mp_lexer_new_from_file(file_str); + do_load_from_lexer(module_obj, lex, file_str); + } } STATIC void chop_component(const char *start, const char **end) { From 2b48de9a8edc45f79be4172600eb13bd60190a5b Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 21:58:33 +0000 Subject: [PATCH 08/12] unix: Allow to execute compiled .mpc files from the command line. --- unix/main.c | 36 ++++++++++++++++++++++++++++++++++-- unix/mpconfigport.h | 4 +++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/unix/main.c b/unix/main.c index 1dc54198e54d3..4a758bdd7b1c4 100644 --- a/unix/main.c +++ b/unix/main.c @@ -134,6 +134,34 @@ STATIC int execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, } } +STATIC int execute_from_mpc(const char *filename) { + mp_hal_set_interrupt_char(CHAR_CTRL_C); + + nlr_buf_t nlr; + if (nlr_push(&nlr) == 0) { + mp_raw_code_t *outer_raw_code = mp_raw_code_load_file(filename); + #if MICROPY_PY___FILE__ + //if (input_kind == MP_PARSE_FILE_INPUT) { + //mp_store_global(MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name)); TODO + //} + #endif + mp_obj_t module_fun = mp_make_function_from_raw_code(outer_raw_code, MP_OBJ_NULL, MP_OBJ_NULL); + if (!compile_only) { + // execute it + mp_call_function_0(module_fun); + } + + mp_hal_set_interrupt_char(-1); + nlr_pop(); + return 0; + + } else { + // uncaught exception + mp_hal_set_interrupt_char(-1); + return handle_uncaught_exception((mp_obj_t)nlr.ret_val); + } +} + #if MICROPY_USE_READLINE == 1 #include "lib/mp-readline/readline.h" #else @@ -264,8 +292,12 @@ STATIC int do_repl(void) { } STATIC int do_file(const char *file) { - mp_lexer_t *lex = mp_lexer_new_from_file(file); - return execute_from_lexer(lex, MP_PARSE_FILE_INPUT, false); + if (file[strlen(file) - 1] == 'c') { + return execute_from_mpc(file); + } else { + mp_lexer_t *lex = mp_lexer_new_from_file(file); + return execute_from_lexer(lex, MP_PARSE_FILE_INPUT, false); + } } STATIC int do_str(const char *str) { diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h index 0e1fb6a401d58..a7602fa7101e9 100644 --- a/unix/mpconfigport.h +++ b/unix/mpconfigport.h @@ -27,6 +27,8 @@ // options to control how Micro Python is built #define MICROPY_ALLOC_PATH_MAX (PATH_MAX) +#define MICROPY_PORTABLE_CODE (1) +#define MICROPY_PORTABLE_CODE_SAVE (1) #if !defined(MICROPY_EMIT_X64) && defined(__x86_64__) #define MICROPY_EMIT_X64 (1) #endif @@ -60,7 +62,7 @@ #define MICROPY_LONGINT_IMPL (MICROPY_LONGINT_IMPL_MPZ) #define MICROPY_STREAMS_NON_BLOCK (1) #define MICROPY_OPT_COMPUTED_GOTO (1) -#define MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE (1) +#define MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE (0) #define MICROPY_CAN_OVERRIDE_BUILTINS (1) #define MICROPY_PY_FUNCTION_ATTRS (1) #define MICROPY_PY_DESCRIPTORS (1) From 1770b408729f489980fe8ed968531a3c38c7224f Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 21:58:49 +0000 Subject: [PATCH 09/12] stmhal: Enable MICROPY_PORTABLE_CODE to load/import .mpc files. --- stmhal/mpconfigport.h | 1 + 1 file changed, 1 insertion(+) diff --git a/stmhal/mpconfigport.h b/stmhal/mpconfigport.h index 3848daa8faf69..c720194846309 100644 --- a/stmhal/mpconfigport.h +++ b/stmhal/mpconfigport.h @@ -31,6 +31,7 @@ // options to control how Micro Python is built #define MICROPY_ALLOC_PATH_MAX (128) +#define MICROPY_PORTABLE_CODE (1) #define MICROPY_EMIT_THUMB (1) #define MICROPY_EMIT_INLINE_THUMB (1) #define MICROPY_COMP_MODULE_CONST (1) From 41aaf53616523e5bbebc7cab03d3d889c71aec2f Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 22:03:59 +0000 Subject: [PATCH 10/12] tools: Add mpcdump.py to dump .mpc files for inspection and freezing. --- tools/mpcdump.py | 311 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 tools/mpcdump.py diff --git a/tools/mpcdump.py b/tools/mpcdump.py new file mode 100644 index 0000000000000..5376724c42e70 --- /dev/null +++ b/tools/mpcdump.py @@ -0,0 +1,311 @@ +import sys + +################################################################ +# this stuff is copied from makeqstrdata.py + +import re + +# codepoint2name is different in Python 2 to Python 3 +import platform +if platform.python_version_tuple()[0] == '2': + from htmlentitydefs import codepoint2name +elif platform.python_version_tuple()[0] == '3': + from html.entities import codepoint2name +codepoint2name[ord('-')] = 'hyphen'; + +# add some custom names to map characters that aren't in HTML +codepoint2name[ord(' ')] = 'space' +codepoint2name[ord('\'')] = 'squot' +codepoint2name[ord(',')] = 'comma' +codepoint2name[ord('.')] = 'dot' +codepoint2name[ord(':')] = 'colon' +codepoint2name[ord('/')] = 'slash' +codepoint2name[ord('%')] = 'percent' +codepoint2name[ord('#')] = 'hash' +codepoint2name[ord('(')] = 'paren_open' +codepoint2name[ord(')')] = 'paren_close' +codepoint2name[ord('[')] = 'bracket_open' +codepoint2name[ord(']')] = 'bracket_close' +codepoint2name[ord('{')] = 'brace_open' +codepoint2name[ord('}')] = 'brace_close' +codepoint2name[ord('*')] = 'star' +codepoint2name[ord('!')] = 'bang' +codepoint2name[ord('\\')] = 'backslash' +codepoint2name[ord('+')] = 'plus' + +def qstr_escape(qst): + return re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + '_', qst) + +def qstr_id(qst): + return 'MP_QSTR_' + qstr_escape(qst) + +################################################################ + +# extra bytes: +MP_BC_MAKE_CLOSURE = 0x62 +MP_BC_MAKE_CLOSURE_DEFARGS = 0x63 +MP_BC_RAISE_VARARGS = 0x5c + +def make_opcode_format(): + def OC4(a, b, c, d): + return a | (b << 2) | (c << 4) | (d << 6) + U = 0 + B = 0 + Q = 1 + V = 2 + O = 3 + return bytes(( + OC4(U, U, U, U), # 0x00-0x03 + OC4(U, U, U, U), # 0x04-0x07 + OC4(U, U, U, U), # 0x08-0x0b + OC4(U, U, U, U), # 0x0c-0x0f + OC4(B, B, B, U), # 0x10-0x13 + OC4(V, U, Q, V), # 0x14-0x17 + OC4(B, U, V, V), # 0x18-0x1b + OC4(Q, Q, Q, Q), # 0x1c-0x1f + OC4(B, B, V, V), # 0x20-0x23 + OC4(Q, Q, Q, B), # 0x24-0x27 + OC4(V, V, Q, Q), # 0x28-0x2b + OC4(U, U, U, U), # 0x2c-0x2f + OC4(B, B, B, B), # 0x30-0x33 + OC4(B, O, O, O), # 0x34-0x37 + OC4(O, O, U, U), # 0x38-0x3b + OC4(U, O, B, O), # 0x3c-0x3f + OC4(O, B, B, O), # 0x40-0x43 + OC4(B, B, O, B), # 0x44-0x47 + OC4(U, U, U, U), # 0x48-0x4b + OC4(U, U, U, U), # 0x4c-0x4f + OC4(V, V, V, V), # 0x50-0x53 + OC4(B, V, V, V), # 0x54-0x57 + OC4(V, V, V, B), # 0x58-0x5b + OC4(B, B, B, U), # 0x5c-0x5f + OC4(V, V, V, V), # 0x60-0x63 + OC4(V, V, V, V), # 0x64-0x67 + OC4(Q, Q, B, U), # 0x68-0x6b + OC4(U, U, U, U), # 0x6c-0x6f + + OC4(B, B, B, B), # 0x70-0x73 + OC4(B, B, B, B), # 0x74-0x77 + OC4(B, B, B, B), # 0x78-0x7b + OC4(B, B, B, B), # 0x7c-0x7f + OC4(B, B, B, B), # 0x80-0x83 + OC4(B, B, B, B), # 0x84-0x87 + OC4(B, B, B, B), # 0x88-0x8b + OC4(B, B, B, B), # 0x8c-0x8f + OC4(B, B, B, B), # 0x90-0x93 + OC4(B, B, B, B), # 0x94-0x97 + OC4(B, B, B, B), # 0x98-0x9b + OC4(B, B, B, B), # 0x9c-0x9f + OC4(B, B, B, B), # 0xa0-0xa3 + OC4(B, B, B, B), # 0xa4-0xa7 + OC4(B, B, B, B), # 0xa8-0xab + OC4(B, B, B, B), # 0xac-0xaf + + OC4(B, B, B, B), # 0xb0-0xb3 + OC4(B, B, B, B), # 0xb4-0xb7 + OC4(B, B, B, B), # 0xb8-0xbb + OC4(B, B, B, B), # 0xbc-0xbf + + OC4(B, B, B, B), # 0xc0-0xc3 + OC4(B, B, B, B), # 0xc4-0xc7 + OC4(B, B, B, B), # 0xc8-0xcb + OC4(B, B, B, B), # 0xcc-0xcf + + OC4(B, B, B, B), # 0xd0-0xd3 + OC4(B, B, B, B), # 0xd4-0xd7 + OC4(B, B, B, B), # 0xd8-0xdb + OC4(B, B, B, B), # 0xdc-0xdf + + OC4(B, B, B, B), # 0xe0-0xe3 + OC4(B, B, B, B), # 0xe4-0xe7 + OC4(B, B, B, B), # 0xe8-0xeb + OC4(B, B, B, B), # 0xec-0xef + + OC4(B, B, B, B), # 0xf0-0xf3 + OC4(B, B, B, B), # 0xf4-0xf7 + OC4(B, B, U, U), # 0xf8-0xfb + OC4(U, U, U, U), # 0xfc-0xff + )) + +def get_opcode_info(bytecode, ip, opcode_format=make_opcode_format()): + opcode = bytecode[ip] + ip_start = ip + f = (opcode_format[opcode >> 2] >> (2 * (opcode & 3))) & 3 + if f == 1: + # qstr + ip += 3 + else: + extra_byte = (opcode == MP_BC_RAISE_VARARGS + or opcode == MP_BC_MAKE_CLOSURE + or opcode == MP_BC_MAKE_CLOSURE_DEFARGS) + ip += 1 + if f == 2: + # var-uint + while bytecode[ip] & 0x80 != 0: + ip += 1 + ip += 1 + elif f == 3: + # bc offset + ip += 2 + ip += extra_byte + return f, ip - ip_start + +def decode_uint(bytecode, ip): + unum = 0 + while True: + val = bytecode[ip] + ip += 1 + unum = (unum << 7) | (val & 0x7f) + if not (val & 0x80): + break + return ip, unum + +def extract_prelude(bytecode): + ip = 0 + ip, n_state = decode_uint(bytecode, ip) + ip, n_exc_stack = decode_uint(bytecode, ip) + scope_flags = bytecode[ip]; ip += 1 + n_pos_args = bytecode[ip]; ip += 1 + n_kwonly_args = bytecode[ip]; ip += 1 + n_def_pos_args = bytecode[ip]; ip += 1 + ip2, code_info_size = decode_uint(bytecode, ip) + ip += code_info_size + while bytecode[ip] != 0xff: + ip += 1 + ip += 1 + # ip now points to first opcode + # ip2 points to simple_name qstr + return ip, ip2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args, code_info_size) + +class RawCode: + def __init__(self, bytecode, qstrs, objs, raw_codes): + self.bytecode = bytecode + self.qstrs = qstrs + self.objs = objs + self.raw_codes = raw_codes + + def _unpack_qstr(self, ip): + qst = self.bytecode[ip] | self.bytecode[ip + 1] << 8 + return global_qstrs[qst] + + def dump_c(self): + # emit children first + child_names = [] + for rc in self.raw_codes: + child_names.append(rc.dump_c()) + + ip, ip2, prelude = extract_prelude(self.bytecode) + simple_name = self._unpack_qstr(ip2) + source_file = self._unpack_qstr(ip2 + 2) + esc_name = source_file[:-3] + '_' + qstr_escape(simple_name) + + print() + print('// frozen bytecode for file %s, scope %s' % (source_file, simple_name)) + print('STATIC const byte bytecode_data_%s[%u] = {' % (esc_name, len(self.bytecode))) + print(' ', end='') + for i in range(ip2): + print(' 0x%02x,' % self.bytecode[i], end='') + print() + print(' ', qstr_id(simple_name), '& 0xff,', qstr_id(simple_name), '>> 8,') + print(' ', qstr_id(source_file), '& 0xff,', qstr_id(source_file), '>> 8,') + print(' ', end='') + for i in range(ip2 + 4, ip): + print(' 0x%02x,' % self.bytecode[i], end='') + print() + while ip < len(self.bytecode): + f, sz = get_opcode_info(self.bytecode, ip) + if f == 1: + qst = self._unpack_qstr(ip + 1) + print(' ', '0x%02x,' % self.bytecode[ip], qstr_id(qst), '& 0xff,', qstr_id(qst), '>> 8,') + else: + print(' ', ''.join('0x%02x, ' % self.bytecode[ip + i] for i in range(sz))) + ip += sz + print('};') + print('STATIC const mp_uint_t const_table_data_%s[%u] = {' % (esc_name, len(self.qstrs) + len(self.objs) + len(self.raw_codes))) + for qst in self.qstrs: + print(' (mp_uint_t)MP_OBJ_NEW_QSTR(%s),' % qstr_id(global_qstrs[qst])) + for obj in self.objs: + assert 0 + for n in child_names: + print(' (mp_uint_t)&raw_code_%s,' % n) + print('};') + if simple_name != '': + print('STATIC ', end='') + print('const mp_raw_code_t raw_code_%s = {' % esc_name) + print(' .kind = MP_CODE_BYTECODE,') + print(' .scope_flags = 0x%02x,' % prelude[2]) + print(' .n_pos_args = %u,' % prelude[3]) + print(' .data.u_byte = {') + print(' .bytecode = bytecode_data_%s,' % esc_name) + print(' .const_table = const_table_data_%s,' % esc_name) + print(' #if MICROPY_PORTABLE_CODE_SAVE') + print(' .bc_len = %u,' % len(self.bytecode)) + print(' .n_obj = %u,' % len(self.objs)) + print(' .n_raw_code = %u,' % len(self.raw_codes)) + print(' #endif') + print(' },') + print('};') + + return esc_name + +def read_uint(f): + i = 0 + while True: + b = f.read(1)[0] + i = (i << 7) | (b & 0x7f) + if b & 0x80 == 0: + break + return i + +global_qstrs = [] +def read_qstr(f): + ln = read_uint(f) + data = str(f.read(ln), 'utf8') + global_qstrs.append(data) + return len(global_qstrs) - 1 + +def read_obj(f): + assert 0 + +def read_qstr_and_pack(f, bytecode, ip): + qst = read_qstr(f) + bytecode[ip] = qst & 0xff + bytecode[ip + 1] = qst >> 8 + +def read_bytecode_qstrs(file, bytecode, ip): + while ip < len(bytecode): + f, sz = get_opcode_info(bytecode, ip) + if f == 1: + read_qstr_and_pack(file, bytecode, ip + 1) + ip += sz + +def read_raw_code(f): + bc_len = read_uint(f) + bytecode = bytearray(f.read(bc_len)) + ip, ip2, prelude = extract_prelude(bytecode) + read_qstr_and_pack(f, bytecode, ip2) # simple_name + read_qstr_and_pack(f, bytecode, ip2 + 2) # source_file + read_bytecode_qstrs(f, bytecode, ip) + n_obj = read_uint(f) + n_raw_code = read_uint(f) + qstrs = [read_qstr(f) for _ in range(prelude[3] + prelude[4])] + objs = [read_obj(f) for _ in range(n_obj)] + raw_codes = [read_raw_code(f) for _ in range(n_raw_code)] + return RawCode(bytecode, qstrs, objs, raw_codes) + +def read_mpc(filename): + with open(filename, 'rb') as f: + header = f.read(6) + if header[:3] != b'MPC': + raise Exception('not an MPC file') + if header[3:6] != b'001': + raise Exception('incompatible version') + return read_raw_code(f) + +if __name__ == '__main__': + mpc = read_mpc(sys.argv[1]) + print('#include "py/emitglue.h"') + print() + for q in global_qstrs: + print('// Q(%s)' % q) + mpc.dump_c() From 09778f1ad85196bbd2dc5227210e7440ce76b7f3 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 23:00:56 +0000 Subject: [PATCH 11/12] fix some compiler warnings in emitglue.c (to be squashed) --- py/emitglue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/py/emitglue.c b/py/emitglue.c index a011ad16937ca..d19a9899b21ee 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -311,6 +311,7 @@ STATIC qstr load_qstr(mp_reader_t *reader) { } STATIC mp_obj_t load_obj(mp_reader_t *reader) { + (void)reader; assert(0); return MP_OBJ_NULL; } @@ -626,7 +627,7 @@ STATIC void save_bytecode_qstrs(mp_print_t *print, const byte *ip, const byte *i } } -void save_raw_code(mp_print_t *print, mp_raw_code_t *rc) { +STATIC void save_raw_code(mp_print_t *print, mp_raw_code_t *rc) { if (rc->kind != MP_CODE_BYTECODE) { nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "can only save bytecode")); @@ -670,7 +671,8 @@ void mp_raw_code_save(mp_raw_code_t *rc, mp_print_t *print) { STATIC void fd_print_strn(void *env, const char *str, mp_uint_t len) { int fd = (mp_int_t)env; - write(fd, str, len); + ssize_t ret = write(fd, str, len); + (void)ret; } void mp_raw_code_save_file(mp_raw_code_t *rc, const char *filename) { From b74e0a0cfd28514aebaae750b83b07471441dc05 Mon Sep 17 00:00:00 2001 From: Damien George Date: Mon, 2 Nov 2015 23:01:09 +0000 Subject: [PATCH 12/12] fix some compiler warnings in main.c (to be squashed) --- unix/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/unix/main.c b/unix/main.c index 4a758bdd7b1c4..f26bf97e0d6f5 100644 --- a/unix/main.c +++ b/unix/main.c @@ -134,6 +134,7 @@ STATIC int execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, } } +#if MICROPY_PORTABLE_CODE STATIC int execute_from_mpc(const char *filename) { mp_hal_set_interrupt_char(CHAR_CTRL_C); @@ -161,6 +162,7 @@ STATIC int execute_from_mpc(const char *filename) { return handle_uncaught_exception((mp_obj_t)nlr.ret_val); } } +#endif #if MICROPY_USE_READLINE == 1 #include "lib/mp-readline/readline.h" @@ -292,9 +294,12 @@ STATIC int do_repl(void) { } STATIC int do_file(const char *file) { + #if MICROPY_PORTABLE_CODE if (file[strlen(file) - 1] == 'c') { return execute_from_mpc(file); - } else { + } else + #endif + { mp_lexer_t *lex = mp_lexer_new_from_file(file); return execute_from_lexer(lex, MP_PARSE_FILE_INPUT, false); }