diff --git a/ports/bare-arm/Makefile b/ports/bare-arm/Makefile index 1a21eb56a867b..e48342924b951 100644 --- a/ports/bare-arm/Makefile +++ b/ports/bare-arm/Makefile @@ -16,7 +16,6 @@ PYDFU ?= $(TOP)/tools/pydfu.py CFLAGS += -I. -I$(TOP) -I$(BUILD) CFLAGS += -Wall -Werror -std=c99 -nostdlib CFLAGS += -mthumb -mtune=cortex-m4 -mcpu=cortex-m4 -msoft-float -CSUPEROPT = -Os # save some code space for performance-critical code # Select debugging or optimisation build. ifeq ($(DEBUG), 1) diff --git a/ports/bare-arm/mpconfigport.h b/ports/bare-arm/mpconfigport.h index 65bb67f7b9a70..7a8fa55d30209 100644 --- a/ports/bare-arm/mpconfigport.h +++ b/ports/bare-arm/mpconfigport.h @@ -37,6 +37,9 @@ // Python internal features #define MICROPY_ERROR_REPORTING (MICROPY_ERROR_REPORTING_NONE) +// Just use -Os for everything to generate the smallest possible binary. +#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f + // Type definitions for the specific machine typedef int32_t mp_int_t; // must be pointer size diff --git a/ports/esp32/mpconfigport.h b/ports/esp32/mpconfigport.h index 5dc4a9c758790..624415c08b085 100644 --- a/ports/esp32/mpconfigport.h +++ b/ports/esp32/mpconfigport.h @@ -231,18 +231,12 @@ void *esp_native_code_commit(void *, size_t, void *); #endif // Functions that should go in IRAM +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) IRAM_ATTR MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) +#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND) // For ESP32 with SPIRAM workaround, firmware is larger and uses more static IRAM, // so in that configuration don't put too many functions in IRAM. -#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND) -#define MICROPY_WRAP_MP_BINARY_OP(f) IRAM_ATTR f +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) IRAM_ATTR MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif -#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_LOAD_NAME(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_MAP_LOOKUP(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) IRAM_ATTR f #define UINT_FMT "%u" #define INT_FMT "%d" diff --git a/ports/minimal/Makefile b/ports/minimal/Makefile index 050c4ddf526a5..554a024c66041 100644 --- a/ports/minimal/Makefile +++ b/ports/minimal/Makefile @@ -31,8 +31,6 @@ CFLAGS += $(INC) -Wall -Werror -Wdouble-promotion -Wfloat-conversion -std=c99 $( LDFLAGS += -Wl,-Map=$@.map,--cref -Wl,--gc-sections endif -CSUPEROPT = -Os # save some code space - # Tune for Debugging or Optimization CFLAGS += -g # always include debug info in the ELF ifeq ($(DEBUG), 1) diff --git a/ports/minimal/mpconfigport.h b/ports/minimal/mpconfigport.h index 56bef165facda..eaf5abe76397a 100644 --- a/ports/minimal/mpconfigport.h +++ b/ports/minimal/mpconfigport.h @@ -21,6 +21,9 @@ // Use the minimum headroom in the chunk allocator for parse nodes. #define MICROPY_ALLOC_PARSE_CHUNK_INIT (16) +// Just use -Os for everything to generate the smallest possible binary. +#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f + // type definitions for the specific machine typedef intptr_t mp_int_t; // must be pointer size diff --git a/ports/stm32/Makefile b/ports/stm32/Makefile index e44a542395182..edc59b742a875 100644 --- a/ports/stm32/Makefile +++ b/ports/stm32/Makefile @@ -284,7 +284,6 @@ SRC_O += \ $(SYSTEM_FILE) ifeq ($(MCU_SERIES),$(filter $(MCU_SERIES),f0 g0 l0)) -CSUPEROPT = -Os # save some code space SRC_O += \ resethandler_m0.o \ shared/runtime/gchelper_thumb1.o diff --git a/ports/stm32/mpconfigport.h b/ports/stm32/mpconfigport.h index 300ad086bf473..0a1ce27c2258d 100644 --- a/ports/stm32/mpconfigport.h +++ b/ports/stm32/mpconfigport.h @@ -55,6 +55,12 @@ #define MICROPY_OPT_MAP_LOOKUP_CACHE (__CORTEX_M > 0) #endif +#if __CORTEX_M == 0 +// Just use -Os for everything to avoid using extra flash. Using M0 as a stand-in +// for "likely has small flash", this could be moved to the board config instead. +#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f +#endif + // emitters #define MICROPY_PERSISTENT_CODE_LOAD (1) #ifndef MICROPY_EMIT_THUMB diff --git a/py/gc.c b/py/gc.c index b6969dfd42429..f331c6ee0bbf7 100644 --- a/py/gc.c +++ b/py/gc.c @@ -723,7 +723,12 @@ void gc_info(gc_info_t *info) { GC_EXIT(); } -void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { +#ifndef MICROPY_WRAP_GC_ALLOC +// Optimising gc for speed; 5ms down to 4ms on pybv2 +#define MICROPY_WRAP_GC_ALLOC(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) +#endif + +void *MICROPY_WRAP_GC_ALLOC(gc_alloc)(size_t n_bytes, unsigned int alloc_flags) { bool has_finaliser = alloc_flags & GC_ALLOC_FLAG_HAS_FINALISER; size_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK; DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks); @@ -890,9 +895,13 @@ void *gc_alloc_with_finaliser(mp_uint_t n_bytes) { } */ +#ifndef MICROPY_WRAP_GC_FREE +#define MICROPY_WRAP_GC_FREE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) +#endif + // force the freeing of a piece of memory // TODO: freeing here does not call finaliser -void gc_free(void *ptr) { +void MICROPY_WRAP_GC_FREE(gc_free)(void *ptr) { if (MP_STATE_THREAD(gc_lock_depth) > 0) { // Cannot free while the GC is locked. However free is an optimisation // to reclaim the memory immediately, this means it will now be left @@ -1021,7 +1030,11 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) { #else // Alternative gc_realloc impl -void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { +#ifndef MICROPY_WRAP_GC_REALLOC +#define MICROPY_WRAP_GC_REALLOC(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) +#endif + +void *MICROPY_WRAP_GC_REALLOC(gc_realloc)(void *ptr_in, size_t n_bytes, bool allow_move) { // check for pure allocation if (ptr_in == NULL) { return gc_alloc(n_bytes, false); diff --git a/py/map.c b/py/map.c index c18df5a9f333c..2157c4d7d823b 100644 --- a/py/map.c +++ b/py/map.c @@ -147,6 +147,10 @@ STATIC void mp_map_rehash(mp_map_t *map) { m_del(mp_map_elem_t, old_table, old_alloc); } +#ifndef MICROPY_WRAP_MP_MAP_LOOKUP +#define MICROPY_WRAP_MP_MAP_LOOKUP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + // MP_MAP_LOOKUP behaviour: // - returns NULL if not found, else the slot it was found in with key,value non-null // MP_MAP_LOOKUP_ADD_IF_NOT_FOUND behaviour: diff --git a/py/mpconfig.h b/py/mpconfig.h index a1e9660bf46a6..47534a64cb5c2 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1784,46 +1784,39 @@ typedef double mp_float_t; #endif /*****************************************************************************/ -/* Hooks for a port to wrap functions with attributes */ +/* Hooks for a port to wrap functions with performance-tuning attributes */ -#ifndef MICROPY_WRAP_MP_BINARY_OP -#define MICROPY_WRAP_MP_BINARY_OP(f) f -#endif - -#ifndef MICROPY_WRAP_MP_EXECUTE_BYTECODE -#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) f -#endif - -#ifndef MICROPY_WRAP_MP_LOAD_GLOBAL -#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) f -#endif - -#ifndef MICROPY_WRAP_MP_LOAD_NAME -#define MICROPY_WRAP_MP_LOAD_NAME(f) f -#endif - -#ifndef MICROPY_WRAP_MP_MAP_LOOKUP -#define MICROPY_WRAP_MP_MAP_LOOKUP(f) f +#ifndef MICROPY_APPLY_COMPILER_OPTIMISATIONS +#if defined(__GNUC__) && !defined(__clang__) +// Enable -O3 optimisations. +#define MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) __attribute__((optimize("O2"))) f +#else +// Unsupported on other compilers, will use global optimisation setting (typically -Os). +#define MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) f #endif - -#ifndef MICROPY_WRAP_MP_OBJ_GET_TYPE -#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) f #endif -#ifndef MICROPY_WRAP_MP_SCHED_EXCEPTION -#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) f +// Ideally apply full compiler optimisations and place in RAM. +// Use this on small functions that need the highest possible performance. +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_1 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif -#ifndef MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT -#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) f +// Ideally apply full compiler optimisations and optionally place in RAM (if IRAM available). +// Use this on larger functions that should go in RAM if possible. +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_2 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif -#ifndef MICROPY_WRAP_MP_SCHED_SCHEDULE -#define MICROPY_WRAP_MP_SCHED_SCHEDULE(f) f +// Ideally apply full compiler optimisation if flash available. +// Use this on functions that are not important enough to place in RAM. +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_3 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif -#ifndef MICROPY_WRAP_MP_SCHED_VM_ABORT -#define MICROPY_WRAP_MP_SCHED_VM_ABORT(f) f +// Ideally apply full compiler optimisation if flash available (but lower priority than level 3). +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_4 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) f #endif /*****************************************************************************/ diff --git a/py/mpz.c b/py/mpz.c index b61997e2fd4ed..c274457d00de6 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -36,18 +36,16 @@ #define DIG_MSB (MPZ_LONG_1 << (DIG_SIZE - 1)) #define DIG_BASE (MPZ_LONG_1 << DIG_SIZE) -/* - mpz is an arbitrary precision integer type with a public API. +// mpz is an arbitrary precision integer type with a public API. - mpn functions act on non-negative integers represented by an array of generalised - digits (eg a word per digit). You also need to specify separately the length of the - array. There is no public API for mpn. Rather, the functions are used by mpz to - implement its features. +// mpn functions act on non-negative integers represented by an array of generalised +// digits (eg a word per digit). You also need to specify separately the length of the +// array. There is no public API for mpn. Rather, the functions are used by mpz to +// implement its features. - Integer values are stored little endian (first digit is first in memory). +// Integer values are stored little endian (first digit is first in memory). - Definition of normalise: ? -*/ +// Definition of normalise: ? STATIC size_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) { for (--idig; idig >= oidig && *idig == 0; --idig) { @@ -55,10 +53,9 @@ STATIC size_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) { return idig + 1 - oidig; } -/* compares i with j - returns sign(i - j) - assumes i, j are normalised -*/ +// Compares i with j +// Returns sign(i - j) +// Assumes i, j are normalised STATIC int mpn_cmp(const mpz_dig_t *idig, size_t ilen, const mpz_dig_t *jdig, size_t jlen) { if (ilen < jlen) { return -1; @@ -80,11 +77,10 @@ STATIC int mpn_cmp(const mpz_dig_t *idig, size_t ilen, const mpz_dig_t *jdig, si return 0; } -/* computes i = j << n - returns number of digits in i - assumes enough memory in i; assumes normalised j; assumes n > 0 - can have i, j pointing to same memory -*/ +// Computes i = j << n +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j; assumes n > 0 +// Can have i, j pointing to same memory STATIC size_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n) { mp_uint_t n_whole = (n + DIG_SIZE - 1) / DIG_SIZE; mp_uint_t n_part = n % DIG_SIZE; @@ -119,11 +115,10 @@ STATIC size_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n return jlen; } -/* computes i = j >> n - returns number of digits in i - assumes enough memory in i; assumes normalised j; assumes n > 0 - can have i, j pointing to same memory -*/ +// Computes i = j >> n +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j; assumes n > 0 +// Can have i, j pointing to same memory STATIC size_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n) { mp_uint_t n_whole = n / DIG_SIZE; mp_uint_t n_part = n % DIG_SIZE; @@ -151,11 +146,10 @@ STATIC size_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n return jlen; } -/* computes i = j + k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen - can have i, j, k pointing to same memory -*/ +// Computes i = j + k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen +// Can have i, j, k pointing to same memory STATIC size_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; mpz_dbl_dig_t carry = 0; @@ -181,11 +175,10 @@ STATIC size_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const return idig - oidig; } -/* computes i = j - k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes j >= k - can have i, j, k pointing to same memory -*/ +// Computes i = j - k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes j >= k +// Can have i, j, k pointing to same memory STATIC size_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; mpz_dbl_dig_signed_t borrow = 0; @@ -208,12 +201,10 @@ STATIC size_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const } #if MICROPY_OPT_MPZ_BITWISE - -/* computes i = j & k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen (jlen argument not needed) - can have i, j, k pointing to same memory -*/ +// Computes i = j & k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen (jlen argument not needed) +// Can have i, j, k pointing to same memory STATIC size_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; @@ -223,18 +214,16 @@ STATIC size_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t *k return mpn_remove_trailing_zeros(oidig, idig); } - #endif -/* i = -((-j) & (-k)) = ~((~j + 1) & (~k + 1)) + 1 - i = (j & (-k)) = (j & (~k + 1)) = ( j & (~k + 1)) - i = ((-j) & k) = ((~j + 1) & k) = ((~j + 1) & k ) - computes general form: - i = (im ^ (((j ^ jm) + jc) & ((k ^ km) + kc))) + ic where Xm = Xc == 0 ? 0 : DIG_MASK - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes length j >= length k - can have i, j, k pointing to same memory -*/ +// i = -((-j) & (-k)) = ~((~j + 1) & (~k + 1)) + 1 +// i = (j & (-k)) = (j & (~k + 1)) = ( j & (~k + 1)) +// i = ((-j) & k) = ((~j + 1) & k) = ((~j + 1) & k ) +// Computes general form: +// i = (im ^ (((j ^ jm) + jc) & ((k ^ km) + kc))) + ic where Xm = Xc == 0 ? 0 : DIG_MASK +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes length j >= length k +// Can have i, j, k pointing to same memory STATIC size_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -260,12 +249,10 @@ STATIC size_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, c } #if MICROPY_OPT_MPZ_BITWISE - -/* computes i = j | k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen - can have i, j, k pointing to same memory -*/ +// Computes i = j | k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen +// Can have i, j, k pointing to same memory STATIC size_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; @@ -281,21 +268,17 @@ STATIC size_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const return idig - oidig; } - #endif -/* i = -((-j) | (-k)) = ~((~j + 1) | (~k + 1)) + 1 - i = -(j | (-k)) = -(j | (~k + 1)) = ~( j | (~k + 1)) + 1 - i = -((-j) | k) = -((~j + 1) | k) = ~((~j + 1) | k ) + 1 - computes general form: - i = ~(((j ^ jm) + jc) | ((k ^ km) + kc)) + 1 where Xm = Xc == 0 ? 0 : DIG_MASK - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes length j >= length k - can have i, j, k pointing to same memory -*/ - +// i = -((-j) | (-k)) = ~((~j + 1) | (~k + 1)) + 1 +// i = -(j | (-k)) = -(j | (~k + 1)) = ~( j | (~k + 1)) + 1 +// i = -((-j) | k) = -((~j + 1) | k) = ~((~j + 1) | k ) + 1 +// Computes general form: +// i = ~(((j ^ jm) + jc) | ((k ^ km) + kc)) + 1 where Xm = Xc == 0 ? 0 : DIG_MASK +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes length j >= length k +// Can have i, j, k pointing to same memory #if MICROPY_OPT_MPZ_BITWISE - STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -323,9 +306,7 @@ STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, co return mpn_remove_trailing_zeros(oidig, idig); } - #else - STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -348,16 +329,13 @@ STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, co return mpn_remove_trailing_zeros(oidig, idig); } - #endif #if MICROPY_OPT_MPZ_BITWISE - -/* computes i = j ^ k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen - can have i, j, k pointing to same memory -*/ +// Computes i = j ^ k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen +// Can have i, j, k pointing to same memory STATIC size_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; @@ -373,18 +351,16 @@ STATIC size_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const return mpn_remove_trailing_zeros(oidig, idig); } - #endif -/* i = (-j) ^ (-k) = ~(j - 1) ^ ~(k - 1) = (j - 1) ^ (k - 1) - i = -(j ^ (-k)) = -(j ^ ~(k - 1)) = ~(j ^ ~(k - 1)) + 1 = (j ^ (k - 1)) + 1 - i = -((-j) ^ k) = -(~(j - 1) ^ k) = ~(~(j - 1) ^ k) + 1 = ((j - 1) ^ k) + 1 - computes general form: - i = ((j - 1 + jc) ^ (k - 1 + kc)) + ic - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes length j >= length k - can have i, j, k pointing to same memory -*/ +// i = (-j) ^ (-k) = ~(j - 1) ^ ~(k - 1) = (j - 1) ^ (k - 1) +// i = -(j ^ (-k)) = -(j ^ ~(k - 1)) = ~(j ^ ~(k - 1)) + 1 = (j ^ (k - 1)) + 1 +// i = -((-j) ^ k) = -(~(j - 1) ^ k) = ~(~(j - 1) ^ k) + 1 = ((j - 1) ^ k) + 1 +// Computes general form: +// i = ((j - 1 + jc) ^ (k - 1 + kc)) + ic +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes length j >= length k +// Can have i, j, k pointing to same memory STATIC size_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -406,10 +382,9 @@ STATIC size_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, c return mpn_remove_trailing_zeros(oidig, idig); } -/* computes i = i * d1 + d2 - returns number of digits in i - assumes enough memory in i; assumes normalised i; assumes dmul != 0 -*/ +// Computes i = i * d1 + d2 +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised i; assumes dmul != 0 STATIC size_t mpn_mul_dig_add_dig(mpz_dig_t *idig, size_t ilen, mpz_dig_t dmul, mpz_dig_t dadd) { mpz_dig_t *oidig = idig; mpz_dbl_dig_t carry = dadd; @@ -427,11 +402,10 @@ STATIC size_t mpn_mul_dig_add_dig(mpz_dig_t *idig, size_t ilen, mpz_dig_t dmul, return idig - oidig; } -/* computes i = j * k - returns number of digits in i - assumes enough memory in i; assumes i is zeroed; assumes normalised j, k - can have j, k point to same memory -*/ +// Computes i = j * k +// Returns number of digits in i +// Assumes enough memory in i; assumes i is zeroed; assumes normalised j, k +// Can have j, k point to same memory STATIC size_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; size_t ilen = 0; @@ -457,12 +431,11 @@ STATIC size_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mpz_dig_t * return ilen; } -/* natural_div - quo * den + new_num = old_num (ie num is replaced with rem) - assumes den != 0 - assumes num_dig has enough memory to be extended by 1 digit - assumes quo_dig has enough memory (as many digits as num) - assumes quo_dig is filled with zeros -*/ +// natural_div - quo * den + new_num = old_num (ie num is replaced with rem) +// Assumes den != 0 +// Assumes num_dig has enough memory to be extended by 1 digit +// Assumes quo_dig has enough memory (as many digits as num) +// Assumes quo_dig is filled with zeros STATIC void mpn_div(mpz_dig_t *num_dig, size_t *num_len, const mpz_dig_t *den_dig, size_t den_len, mpz_dig_t *quo_dig, size_t *quo_len) { mpz_dig_t *orig_num_dig = num_dig; mpz_dig_t *orig_quo_dig = quo_dig; @@ -632,42 +605,6 @@ void mpz_deinit(mpz_t *z) { } } -#if 0 -these functions are unused - -mpz_t *mpz_zero(void) { - mpz_t *z = m_new_obj(mpz_t); - mpz_init_zero(z); - return z; -} - -mpz_t *mpz_from_int(mp_int_t val) { - mpz_t *z = mpz_zero(); - mpz_set_from_int(z, val); - return z; -} - -mpz_t *mpz_from_ll(long long val, bool is_signed) { - mpz_t *z = mpz_zero(); - mpz_set_from_ll(z, val, is_signed); - return z; -} - -#if MICROPY_PY_BUILTINS_FLOAT -mpz_t *mpz_from_float(mp_float_t val) { - mpz_t *z = mpz_zero(); - mpz_set_from_float(z, val); - return z; -} -#endif - -mpz_t *mpz_from_str(const char *str, size_t len, bool neg, unsigned int base) { - mpz_t *z = mpz_zero(); - mpz_set_from_str(z, str, len, neg, base); - return z; -} -#endif - STATIC void mpz_free(mpz_t *z) { if (z != NULL) { m_del(mpz_dig_t, z->dig, z->alloc); @@ -701,17 +638,116 @@ STATIC mpz_t *mpz_clone(const mpz_t *src) { return z; } -/* sets dest = src - can have dest, src the same -*/ -void mpz_set(mpz_t *dest, const mpz_t *src) { +// Sets dest = src +// Can have dest, src the same +STATIC void mpz_set(mpz_t *dest, const mpz_t *src) { mpz_need_dig(dest, src->len); dest->neg = src->neg; dest->len = src->len; memcpy(dest->dig, src->dig, src->len * sizeof(mpz_dig_t)); } -void mpz_set_from_int(mpz_t *z, mp_int_t val) { +#ifndef MICROPY_WRAP_MPZ_SET_FROM_INT +#define MICROPY_WRAP_MPZ_SET_FROM_INT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_LL +#define MICROPY_WRAP_MPZ_SET_FROM_LL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_FLOAT +#define MICROPY_WRAP_MPZ_SET_FROM_FLOAT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_STR +#define MICROPY_WRAP_MPZ_SET_FROM_STR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_BYTES +#define MICROPY_WRAP_MPZ_SET_FROM_BYTES(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_CMP +#define MICROPY_WRAP_MPZ_CMP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_ABS +#define MICROPY_WRAP_MPZ_ABS(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_NEG +#define MICROPY_WRAP_MPZ_NEG(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_NOT +#define MICROPY_WRAP_MPZ_NOT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SHL +#define MICROPY_WRAP_MPZ_SHL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SHR +#define MICROPY_WRAP_MPZ_SHR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_ADD +#define MICROPY_WRAP_MPZ_ADD(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SUB +#define MICROPY_WRAP_MPZ_SUB(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AND +#define MICROPY_WRAP_MPZ_AND(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_OR +#define MICROPY_WRAP_MPZ_OR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_XOR +#define MICROPY_WRAP_MPZ_XOR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_MUL +#define MICROPY_WRAP_MPZ_MUL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_POW +#define MICROPY_WRAP_MPZ_POW(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_POW3 +#define MICROPY_WRAP_MPZ_POW3(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_DIVMOD +#define MICROPY_WRAP_MPZ_DIVMOD(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_HASH +#define MICROPY_WRAP_MPZ_HASH(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_INT +#define MICROPY_WRAP_MPZ_AS_INT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_UINT +#define MICROPY_WRAP_MPZ_AS_UINT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_BYTES +#define MICROPY_WRAP_MPZ_AS_BYTES(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_STR +#define MICROPY_WRAP_MPZ_AS_STR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +void MICROPY_WRAP_MPZ_SET_FROM_INT(mpz_set_from_int)(mpz_t * z, mp_int_t val) { if (val == 0) { z->neg = 0; z->len = 0; @@ -736,7 +772,7 @@ void mpz_set_from_int(mpz_t *z, mp_int_t val) { } } -void mpz_set_from_ll(mpz_t *z, long long val, bool is_signed) { +void MICROPY_WRAP_MPZ_SET_FROM_LL(mpz_set_from_ll)(mpz_t * z, long long val, bool is_signed) { mpz_need_dig(z, MPZ_NUM_DIG_FOR_LL); unsigned long long uval; @@ -756,7 +792,7 @@ void mpz_set_from_ll(mpz_t *z, long long val, bool is_signed) { } #if MICROPY_PY_BUILTINS_FLOAT -void mpz_set_from_float(mpz_t *z, mp_float_t src) { +void MICROPY_WRAP_MPZ_SET_FROM_FLOAT(mpz_set_from_float)(mpz_t * z, mp_float_t src) { mp_float_union_t u = {src}; z->neg = u.p.sgn; if (u.p.exp == 0) { @@ -813,8 +849,8 @@ void mpz_set_from_float(mpz_t *z, mp_float_t src) { } #endif -// returns number of bytes from str that were processed -size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base) { +// Returns number of bytes from str that were processed +size_t MICROPY_WRAP_MPZ_SET_FROM_STR(mpz_set_from_str)(mpz_t * z, const char *str, size_t len, bool neg, unsigned int base) { assert(base <= 36); const char *cur = str; @@ -850,7 +886,7 @@ size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigne return cur - str; } -void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) { +void MICROPY_WRAP_MPZ_SET_FROM_BYTES(mpz_set_from_bytes)(mpz_t * z, bool big_endian, size_t len, const byte *buf) { int delta = 1; if (big_endian) { buf += len - 1; @@ -883,23 +919,7 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) z->len = mpn_remove_trailing_zeros(z->dig, z->dig + z->len); } -#if 0 -these functions are unused - -bool mpz_is_pos(const mpz_t *z) { - return z->len > 0 && z->neg == 0; -} - -bool mpz_is_odd(const mpz_t *z) { - return z->len > 0 && (z->dig[0] & 1) != 0; -} - -bool mpz_is_even(const mpz_t *z) { - return z->len == 0 || (z->dig[0] & 1) == 0; -} -#endif - -int mpz_cmp(const mpz_t *z1, const mpz_t *z2) { +int MICROPY_WRAP_MPZ_CMP(mpz_cmp)(const mpz_t * z1, const mpz_t *z2) { int cmp = (int)z2->neg - (int)z1->neg; if (cmp != 0) { return cmp; @@ -911,141 +931,18 @@ int mpz_cmp(const mpz_t *z1, const mpz_t *z2) { return cmp; } -#if 0 -// obsolete -// compares mpz with an integer that fits within DIG_SIZE bits -mp_int_t mpz_cmp_sml_int(const mpz_t *z, mp_int_t sml_int) { - mp_int_t cmp; - if (z->neg == 0) { - if (sml_int < 0) { - return 1; - } - if (sml_int == 0) { - if (z->len == 0) { - return 0; - } - return 1; - } - if (z->len == 0) { - return -1; - } - assert(sml_int < (1 << DIG_SIZE)); - if (z->len != 1) { - return 1; - } - cmp = z->dig[0] - sml_int; - } else { - if (sml_int > 0) { - return -1; - } - if (sml_int == 0) { - if (z->len == 0) { - return 0; - } - return -1; - } - if (z->len == 0) { - return 1; - } - assert(sml_int > -(1 << DIG_SIZE)); - if (z->len != 1) { - return -1; - } - cmp = -z->dig[0] - sml_int; - } - if (cmp < 0) { - return -1; - } - if (cmp > 0) { - return 1; - } - return 0; -} -#endif - -#if 0 -these functions are unused - -/* returns abs(z) -*/ -mpz_t *mpz_abs(const mpz_t *z) { - // TODO: handle case of z->alloc=0 - mpz_t *z2 = mpz_clone(z); - z2->neg = 0; - return z2; -} - -/* returns -z -*/ -mpz_t *mpz_neg(const mpz_t *z) { - // TODO: handle case of z->alloc=0 - mpz_t *z2 = mpz_clone(z); - z2->neg = 1 - z2->neg; - return z2; -} - -/* returns lhs + rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_add(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_add_inpl(z, lhs, rhs); - return z; -} - -/* returns lhs - rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_sub(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_sub_inpl(z, lhs, rhs); - return z; -} - -/* returns lhs * rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_mul(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_mul_inpl(z, lhs, rhs); - return z; -} - -/* returns lhs ** rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_pow(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_pow_inpl(z, lhs, rhs); - return z; -} - -/* computes new integers in quo and rem such that: - quo * rhs + rem = lhs - 0 <= rem < rhs - can have lhs, rhs the same -*/ -void mpz_divmod(const mpz_t *lhs, const mpz_t *rhs, mpz_t **quo, mpz_t **rem) { - *quo = mpz_zero(); - *rem = mpz_zero(); - mpz_divmod_inpl(*quo, *rem, lhs, rhs); -} -#endif - -/* computes dest = abs(z) - can have dest, z the same -*/ -void mpz_abs_inpl(mpz_t *dest, const mpz_t *z) { +// Computes dest = abs(z) +// Can have dest, z the same +void MICROPY_WRAP_MPZ_ABS(mpz_abs_inpl)(mpz_t * dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); } dest->neg = 0; } -/* computes dest = -z - can have dest, z the same -*/ -void mpz_neg_inpl(mpz_t *dest, const mpz_t *z) { +// Computes dest = -z +// Can have dest, z the same +void MICROPY_WRAP_MPZ_NEG(mpz_neg_inpl)(mpz_t * dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); } @@ -1054,10 +951,9 @@ void mpz_neg_inpl(mpz_t *dest, const mpz_t *z) { } } -/* computes dest = ~z (= -z - 1) - can have dest, z the same -*/ -void mpz_not_inpl(mpz_t *dest, const mpz_t *z) { +// Computes dest = ~z (= -z - 1) +// Can have dest, z the same +void MICROPY_WRAP_MPZ_NOT(mpz_not_inpl)(mpz_t * dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); } @@ -1078,10 +974,9 @@ void mpz_not_inpl(mpz_t *dest, const mpz_t *z) { } } -/* computes dest = lhs << rhs - can have dest, lhs the same -*/ -void mpz_shl_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { +// Computes dest = lhs << rhs +// Can have dest, lhs the same +void MICROPY_WRAP_MPZ_SHL(mpz_shl_inpl)(mpz_t * dest, const mpz_t *lhs, mp_uint_t rhs) { if (lhs->len == 0 || rhs == 0) { mpz_set(dest, lhs); } else { @@ -1091,10 +986,9 @@ void mpz_shl_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { } } -/* computes dest = lhs >> rhs - can have dest, lhs the same -*/ -void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { +// Computes dest = lhs >> rhs +// Can have dest, lhs the same +void MICROPY_WRAP_MPZ_SHR(mpz_shr_inpl)(mpz_t * dest, const mpz_t *lhs, mp_uint_t rhs) { if (lhs->len == 0 || rhs == 0) { mpz_set(dest, lhs); } else { @@ -1129,10 +1023,9 @@ void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { } } -/* computes dest = lhs + rhs - can have dest, lhs, rhs the same -*/ -void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs + rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_ADD(mpz_add_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { if (mpn_cmp(lhs->dig, lhs->len, rhs->dig, rhs->len) < 0) { const mpz_t *temp = lhs; lhs = rhs; @@ -1150,10 +1043,9 @@ void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { dest->neg = lhs->neg & !!dest->len; } -/* computes dest = lhs - rhs - can have dest, lhs, rhs the same -*/ -void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs - rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_SUB(mpz_sub_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { bool neg = false; if (mpn_cmp(lhs->dig, lhs->len, rhs->dig, rhs->len) < 0) { @@ -1180,10 +1072,9 @@ void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { } } -/* computes dest = lhs & rhs - can have dest, lhs, rhs the same -*/ -void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs & rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_AND(mpz_and_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { const mpz_t *temp = lhs; @@ -1214,10 +1105,9 @@ void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { #endif } -/* computes dest = lhs | rhs - can have dest, lhs, rhs the same -*/ -void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs | rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_OR(mpz_or_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { const mpz_t *temp = lhs; @@ -1248,10 +1138,9 @@ void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { #endif } -/* computes dest = lhs ^ rhs - can have dest, lhs, rhs the same -*/ -void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs ^ rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_XOR(mpz_xor_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { const mpz_t *temp = lhs; @@ -1286,10 +1175,9 @@ void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { #endif } -/* computes dest = lhs * rhs - can have dest, lhs, rhs the same -*/ -void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs * rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_MUL(mpz_mul_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { if (lhs->len == 0 || rhs->len == 0) { mpz_set_from_int(dest, 0); return; @@ -1318,10 +1206,9 @@ void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { mpz_free(temp); } -/* computes dest = lhs ** rhs - can have dest, lhs, rhs the same -*/ -void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +// Computes dest = lhs ** rhs +// Can have dest, lhs, rhs the same +void MICROPY_WRAP_MPZ_POW(mpz_pow_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { if (lhs->len == 0 || rhs->neg != 0) { mpz_set_from_int(dest, 0); return; @@ -1352,10 +1239,9 @@ void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { mpz_free(n); } -/* computes dest = (lhs ** rhs) % mod - can have dest, lhs, rhs the same; mod can't be the same as dest -*/ -void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t *mod) { +// Computes dest = (lhs ** rhs) % mod +// Can have dest, lhs, rhs the same; mod can't be the same as dest +void MICROPY_WRAP_MPZ_POW3(mpz_pow3_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t *mod) { if (lhs->len == 0 || rhs->neg != 0 || (mod->len == 1 && mod->dig[0] == 1)) { mpz_set_from_int(dest, 0); return; @@ -1390,97 +1276,12 @@ void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t mpz_free(n); } -#if 0 -these functions are unused - -/* computes gcd(z1, z2) - based on Knuth's modified gcd algorithm (I think?) - gcd(z1, z2) >= 0 - gcd(0, 0) = 0 - gcd(z, 0) = abs(z) -*/ -mpz_t *mpz_gcd(const mpz_t *z1, const mpz_t *z2) { - if (z1->len == 0) { - // TODO: handle case of z2->alloc=0 - mpz_t *a = mpz_clone(z2); - a->neg = 0; - return a; - } else if (z2->len == 0) { - mpz_t *a = mpz_clone(z1); - a->neg = 0; - return a; - } - - mpz_t *a = mpz_clone(z1); - mpz_t *b = mpz_clone(z2); - mpz_t c; - mpz_init_zero(&c); - a->neg = 0; - b->neg = 0; - - for (;;) { - if (mpz_cmp(a, b) < 0) { - if (a->len == 0) { - mpz_free(a); - mpz_deinit(&c); - return b; - } - mpz_t *t = a; - a = b; - b = t; - } - if (!(b->len >= 2 || (b->len == 1 && b->dig[0] > 1))) { // compute b > 0; could be mpz_cmp_small_int(b, 1) > 0 - break; - } - mpz_set(&c, b); - do { - mpz_add_inpl(&c, &c, &c); - } while (mpz_cmp(&c, a) <= 0); - c.len = mpn_shr(c.dig, c.dig, c.len, 1); - mpz_sub_inpl(a, a, &c); - } - - mpz_deinit(&c); - - if (b->len == 1 && b->dig[0] == 1) { // compute b == 1; could be mpz_cmp_small_int(b, 1) == 0 - mpz_free(a); - return b; - } else { - mpz_free(b); - return a; - } -} - -/* computes lcm(z1, z2) - = abs(z1) / gcd(z1, z2) * abs(z2) - lcm(z1, z1) >= 0 - lcm(0, 0) = 0 - lcm(z, 0) = 0 -*/ -mpz_t *mpz_lcm(const mpz_t *z1, const mpz_t *z2) { - if (z1->len == 0 || z2->len == 0) { - return mpz_zero(); - } - - mpz_t *gcd = mpz_gcd(z1, z2); - mpz_t *quo = mpz_zero(); - mpz_t *rem = mpz_zero(); - mpz_divmod_inpl(quo, rem, z1, gcd); - mpz_mul_inpl(rem, quo, z2); - mpz_free(gcd); - mpz_free(quo); - rem->neg = 0; - return rem; -} -#endif - -/* computes new integers in quo and rem such that: - quo * rhs + rem = lhs - 0 <= rem < rhs - can have lhs, rhs the same - assumes rhs != 0 (undefined behaviour if it is) -*/ -void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const mpz_t *rhs) { +// Computes new integers in quo and rem such that: +// quo * rhs + rem = lhs +// 0 <= rem < rhs +// Can have lhs, rhs the same +// Assumes rhs != 0 (undefined behaviour if it is) +void MICROPY_WRAP_MPZ_DIVMOD(mpz_divmod_inpl)(mpz_t * dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const mpz_t *rhs) { assert(!mpz_is_zero(rhs)); mpz_need_dig(dest_quo, lhs->len + 1); // +1 necessary? @@ -1504,36 +1305,8 @@ void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const m } } -#if 0 -these functions are unused - -/* computes floor(lhs / rhs) - can have lhs, rhs the same -*/ -mpz_t *mpz_div(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *quo = mpz_zero(); - mpz_t rem; - mpz_init_zero(&rem); - mpz_divmod_inpl(quo, &rem, lhs, rhs); - mpz_deinit(&rem); - return quo; -} - -/* computes lhs % rhs ( >= 0) - can have lhs, rhs the same -*/ -mpz_t *mpz_mod(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t quo; - mpz_init_zero(&quo); - mpz_t *rem = mpz_zero(); - mpz_divmod_inpl(&quo, rem, lhs, rhs); - mpz_deinit(&quo); - return rem; -} -#endif - -// must return actual int value if it fits in mp_int_t -mp_int_t mpz_hash(const mpz_t *z) { +// Must return actual int value if it fits in mp_int_t +mp_int_t MICROPY_WRAP_MPZ_HASH(mpz_hash)(const mpz_t * z) { mp_uint_t val = 0; mpz_dig_t *d = z->dig + z->len; @@ -1548,7 +1321,7 @@ mp_int_t mpz_hash(const mpz_t *z) { return val; } -bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) { +bool MICROPY_WRAP_MPZ_AS_INT(mpz_as_int_checked)(const mpz_t * i, mp_int_t *value) { mp_uint_t val = 0; mpz_dig_t *d = i->dig + i->len; @@ -1568,7 +1341,7 @@ bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) { return true; } -bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { +bool MICROPY_WRAP_MPZ_AS_UINT(mpz_as_uint_checked)(const mpz_t * i, mp_uint_t *value) { if (i->neg != 0) { // can't represent signed values return false; @@ -1589,7 +1362,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { +void MICROPY_WRAP_MPZ_AS_BYTES(mpz_as_bytes)(const mpz_t * z, bool big_endian, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1648,19 +1421,10 @@ mp_float_t mpz_as_float(const mpz_t *i) { } #endif -#if 0 -this function is unused -char *mpz_as_str(const mpz_t *i, unsigned int base) { - char *s = m_new(char, mp_int_format_size(mpz_max_num_bits(i), base, NULL, '\0')); - mpz_as_str_inpl(i, base, NULL, 'a', '\0', s); - return s; -} -#endif - -// assumes enough space in str as calculated by mp_int_format_size +// Assumes enough space in str as calculated by mp_int_format_size // base must be between 2 and 32 inclusive -// returns length of string, not including null byte -size_t mpz_as_str_inpl(const mpz_t *i, unsigned int base, const char *prefix, char base_char, char comma, char *str) { +// Returns length of string, not including null byte +size_t MICROPY_WRAP_MPZ_AS_STR(mpz_as_str_inpl)(const mpz_t * i, unsigned int base, const char *prefix, char base_char, char comma, char *str) { assert(str != NULL); assert(2 <= base && base <= 32); diff --git a/py/mpz.h b/py/mpz.h index d27f5724047ae..d01d646ccefd3 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -107,7 +107,6 @@ void mpz_init_from_int(mpz_t *z, mp_int_t val); void mpz_init_fixed_from_int(mpz_t *z, mpz_dig_t *dig, size_t dig_alloc, mp_int_t val); void mpz_deinit(mpz_t *z); -void mpz_set(mpz_t *dest, const mpz_t *src); void mpz_set_from_int(mpz_t *z, mp_int_t src); void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed); #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/obj.c b/py/obj.c index 5e01198b6fb4c..6b620170739d1 100644 --- a/py/obj.c +++ b/py/obj.c @@ -44,6 +44,10 @@ MP_NOINLINE void *mp_obj_malloc_helper(size_t num_bytes, const mp_obj_type_t *ty return base; } +#ifndef MICROPY_WRAP_MP_OBJ_GET_TYPE +#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + const mp_obj_type_t *MICROPY_WRAP_MP_OBJ_GET_TYPE(mp_obj_get_type)(mp_const_obj_t o_in) { #if MICROPY_OBJ_IMMEDIATE_OBJS && MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A diff --git a/py/py.mk b/py/py.mk index e81df52fb7f99..18beed3452961 100644 --- a/py/py.mk +++ b/py/py.mk @@ -18,9 +18,6 @@ endif QSTR_GLOBAL_DEPENDENCIES += $(PY_SRC)/mpconfig.h mpconfigport.h QSTR_GLOBAL_REQUIREMENTS += $(HEADER_BUILD)/mpversion.h -# some code is performance bottleneck and compiled with other optimization options -CSUPEROPT = -O3 - # Enable building 32-bit code on 64-bit host. ifeq ($(MICROPY_FORCE_32BIT),1) CC += -m32 @@ -254,17 +251,3 @@ $(BUILD)/shared/libc/string0.o: CFLAGS += $(CFLAGS_BUILTIN) # Force nlr code to always be compiled with space-saving optimisation so # that the function preludes are of a minimal and predictable form. $(PY_BUILD)/nlr%.o: CFLAGS += -Os - -# optimising gc for speed; 5ms down to 4ms on pybv2 -$(PY_BUILD)/gc.o: CFLAGS += $(CSUPEROPT) - -# optimising vm for speed, adds only a small amount to code size but makes a huge difference to speed (20% faster) -$(PY_BUILD)/vm.o: CFLAGS += $(CSUPEROPT) -# Optimizing vm.o for modern deeply pipelined CPUs with branch predictors -# may require disabling tail jump optimization. This will make sure that -# each opcode has its own dispatching jump which will improve branch -# branch predictor efficiency. -# https://marc.info/?l=lua-l&m=129778596120851 -# http://hg.python.org/cpython/file/b127046831e2/Python/ceval.c#l828 -# http://www.emulators.com/docs/nx25_nostradamus.htm -#-fno-crossjumping diff --git a/py/runtime.c b/py/runtime.c index 6d8eddedc8646..5342dd9251bca 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -203,6 +203,10 @@ void mp_call_function_1_from_nlr_jump_callback(void *ctx_in) { ctx->func(ctx->arg); } +#ifndef MICROPY_WRAP_MP_LOAD_NAME +#define MICROPY_WRAP_MP_LOAD_NAME(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + mp_obj_t MICROPY_WRAP_MP_LOAD_NAME(mp_load_name)(qstr qst) { // logic: search locals, globals, builtins DEBUG_OP_printf("load name %s\n", qstr_str(qst)); @@ -216,6 +220,10 @@ mp_obj_t MICROPY_WRAP_MP_LOAD_NAME(mp_load_name)(qstr qst) { return mp_load_global(qst); } +#ifndef MICROPY_WRAP_MP_LOAD_GLOBAL +#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + mp_obj_t MICROPY_WRAP_MP_LOAD_GLOBAL(mp_load_global)(qstr qst) { // logic: search globals, builtins DEBUG_OP_printf("load global %s\n", qstr_str(qst)); @@ -360,6 +368,10 @@ mp_obj_t mp_unary_op(mp_unary_op_t op, mp_obj_t arg) { } } +#ifndef MICROPY_WRAP_MP_BINARY_OP +#define MICROPY_WRAP_MP_BINARY_OP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) +#endif + mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs, mp_obj_t rhs) { DEBUG_OP_printf("binary " UINT_FMT " %q %p %p\n", op, mp_binary_op_method_name[op], lhs, rhs); diff --git a/py/scheduler.c b/py/scheduler.c index 3eae8b4fa366c..682b8dbd70b6c 100644 --- a/py/scheduler.c +++ b/py/scheduler.c @@ -29,6 +29,10 @@ #include "py/mphal.h" #include "py/runtime.h" +#ifndef MICROPY_WRAP_MP_SCHED_EXCEPTION +#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + // Schedules an exception on the main thread (for exceptions "thrown" by async // sources such as interrupts and UNIX signal handlers). void MICROPY_WRAP_MP_SCHED_EXCEPTION(mp_sched_exception)(mp_obj_t exc) { @@ -45,6 +49,10 @@ void MICROPY_WRAP_MP_SCHED_EXCEPTION(mp_sched_exception)(mp_obj_t exc) { } #if MICROPY_KBD_EXCEPTION +#ifndef MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT +#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + // This function may be called asynchronously at any time so only do the bare minimum. void MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(mp_sched_keyboard_interrupt)(void) { MP_STATE_VM(mp_kbd_exception).traceback_data = NULL; @@ -53,6 +61,10 @@ void MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(mp_sched_keyboard_interrupt)(void) #endif #if MICROPY_ENABLE_VM_ABORT +#ifndef MICROPY_WRAP_MP_SCHED_VM_ABORT +#define MICROPY_WRAP_MP_SCHED_VM_ABORT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + void MICROPY_WRAP_MP_SCHED_VM_ABORT(mp_sched_vm_abort)(void) { MP_STATE_VM(vm_abort) = true; } @@ -156,6 +168,10 @@ void mp_sched_unlock(void) { MICROPY_END_ATOMIC_SECTION(atomic_state); } +#ifndef MICROPY_WRAP_MP_SCHED_SCHEDULE +#define MICROPY_WRAP_MP_SCHED_SCHEDULE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + bool MICROPY_WRAP_MP_SCHED_SCHEDULE(mp_sched_schedule)(mp_obj_t function, mp_obj_t arg) { mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION(); bool ret; diff --git a/py/vm.c b/py/vm.c index a7902d9276732..9e816b4e80bf1 100644 --- a/py/vm.c +++ b/py/vm.c @@ -195,6 +195,21 @@ #define TRACE_TICK(current_ip, current_sp, is_exception) #endif // MICROPY_PY_SYS_SETTRACE +#ifndef MICROPY_WRAP_MP_EXECUTE_BYTECODE +// Using -O3 (rather than -Os) only a small amount to code size but makes a huge difference to speed (20% faster) +#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) + +// Note: +// Optimizing vm.o for modern deeply pipelined CPUs with branch predictors +// may require disabling tail jump optimization. This will make sure that +// each opcode has its own dispatching jump which will improve branch +// branch predictor efficiency. +// https://marc.info/?l=lua-l&m=129778596120851 +// http://hg.python.org/cpython/file/b127046831e2/Python/ceval.c#l828 +// http://www.emulators.com/docs/nx25_nostradamus.htm +// -fno-crossjumping +#endif + // fastn has items in reverse order (fastn[0] is local[0], fastn[-1] is local[1], etc) // sp points to bottom of stack which grows up // returns: