8000 py/mpconfig.h: Finer-grained super-opt setting. by jimmo · Pull Request #12644 · micropython/micropython · GitHub
[go: up one dir, main page]

Skip to content

py/mpconfig.h: Finer-grained super-opt setting. #12644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
1 change: 0 additions & 1 deletion ports/bare-arm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ PYDFU ?= $(TOP)/tools/pydfu.py
CFLAGS += -I. -I$(TOP) -I$(BUILD)
CFLAGS += -Wall -Werror -std=c99 -nostdlib
CFLAGS += -mthumb -mtune=cortex-m4 -mcpu=cortex-m4 -msoft-float
CSUPEROPT = -Os # save some code space for performance-critical code

# Select debugging or optimisation build.
ifeq ($(DEBUG), 1)
Expand Down
3 changes: 3 additions & 0 deletions ports/bare-arm/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
// Python internal features
#define MICROPY_ERROR_REPORTING (MICROPY_ERROR_REPORTING_NONE)

// Just use -Os for everything to generate the smallest possible binary.
#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f

// Type definitions for the specific machine

typedef int32_t mp_int_t; // must be pointer size
Expand Down
12 changes: 3 additions & 9 deletions ports/esp32/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,18 +231,12 @@ void *esp_native_code_commit(void *, size_t, void *);
#endif

// Functions that should go in IRAM
#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) IRAM_ATTR MICROPY_APPLY_COMPILER_OPTIMISATIONS(f)
#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND)
// For ESP32 with SPIRAM workaround, firmware is larger and uses more static IRAM,
// so in that configuration don't put too many functions in IRAM.
#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND)
#define MICROPY_WRAP_MP_BINARY_OP(f) IRAM_ATTR f
#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) IRAM_ATTR MICROPY_APPLY_COMPILER_OPTIMISATIONS(f)
#endif
#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) IRAM_ATTR f
#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) IRAM_ATTR f
#define MICROPY_WRAP_MP_LOAD_NAME(f) IRAM_ATTR f
#define MICROPY_WRAP_MP_MAP_LOOKUP(f) IRAM_ATTR f
#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) IRAM_ATTR f
#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) IRAM_ATTR f
#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) IRAM_ATTR f

#define UINT_FMT "%u"
#define INT_FMT "%d"
Expand Down
2 changes: 0 additions & 2 deletions ports/minimal/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ CFLAGS += $(INC) -Wall -Werror -Wdouble-promotion -Wfloat-conversion -std=c99 $(
LDFLAGS += -Wl,-Map=$@.map,--cref -Wl,--gc-sections
endif

CSUPEROPT = -Os # save some code space

# Tune for Debugging or Optimization
CFLAGS += -g # always include debug info in the ELF
ifeq ($(DEBUG), 1)
Expand Down
3 changes: 3 additions & 0 deletions ports/minimal/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
// Use the minimum headroom in the chunk allocator for parse nodes.
#define MICROPY_ALLOC_PARSE_CHUNK_INIT (16)

// Just use -Os for everything to generate the smallest possible binary.
#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f

// type definitions for the specific machine

typedef intptr_t mp_int_t; // must be pointer size
Expand Down
1 change: 0 additions & 1 deletion ports/stm32/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ SRC_O += \
$(SYSTEM_FILE)

ifeq ($(MCU_SERIES),$(filter $(MCU_SERIES),f0 g0 l0))
CSUPEROPT = -Os # save some code space
SRC_O += \
resethandler_m0.o \
shared/runtime/gchelper_thumb1.o
Expand Down
6 changes: 6 additions & 0 deletions ports/stm32/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
#define MICROPY_OPT_MAP_LOOKUP_CACHE (__CORTEX_M > 0)
#endif

#if __CORTEX_M == 0
// Just use -Os for everything to avoid using extra flash. Using M0 as a stand-in
// for "likely has small flash", this could be moved to the board config instead.
#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f
#endif

// emitters
#define MICROPY_PERSISTENT_CODE_LOAD (1)
#ifndef MICROPY_EMIT_THUMB
Expand Down
19 changes: 16 additions & 3 deletions py/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,12 @@ void gc_info(gc_info_t *info) {
GC_EXIT();
}

void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) {
#ifndef MICROPY_WRAP_GC_ALLOC
// Optimising gc for speed; 5ms down to 4ms on pybv2
#define MICROPY_WRAP_GC_ALLOC(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f)
#endif

void *MICROPY_WRAP_GC_ALLOC(gc_alloc)(size_t n_bytes, unsigned int alloc_flags) {
bool has_finaliser = alloc_flags & GC_ALLOC_FLAG_HAS_FINALISER;
size_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK;
DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks);
Expand Down Expand Up @@ -890,9 +895,13 @@ void *gc_alloc_with_finaliser(mp_uint_t n_bytes) {
}
*/

#ifndef MICROPY_WRAP_GC_FREE
#define MICROPY_WRAP_GC_FREE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f)
#endif

// force the freeing of a piece of memory
// TODO: freeing here does not call finaliser
void gc_free(void *ptr) {
void MICROPY_WRAP_GC_FREE(gc_free)(void *ptr) {
if (MP_STATE_THREAD(gc_lock_depth) > 0) {
// Cannot free while the GC is locked. However free is an optimisation
// to reclaim the memory immediately, this means it will now be left
Expand Down Expand Up @@ -1021,7 +1030,11 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) {

#else // Alternative gc_realloc impl

void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
#ifndef MICROPY_WRAP_GC_REALLOC
#define MICROPY_WRAP_GC_REALLOC(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f)
#endif

void *MICROPY_WRAP_GC_REALLOC(gc_realloc)(void *ptr_in, size_t n_bytes, bool allow_move) {
// check for pure allocation
if (ptr_in == NULL) {
return gc_alloc(n_bytes, false);
Expand Down
4 changes: 4 additions & 0 deletions py/map.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ STATIC void mp_map_rehash(mp_map_t *map) {
m_del(mp_map_elem_t, old_table, old_alloc);
}

#ifndef MICROPY_WRAP_MP_MAP_LOOKUP
#define MICROPY_WRAP_MP_MAP_LOOKUP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f)
#endif

// MP_MAP_LOOKUP behaviour:
// - returns NULL if not found, else the slot it was found in with key,value non-null
// MP_MAP_LOOKUP_ADD_IF_NOT_FOUND behaviour:
Expand Down
53 changes: 23 additions & 30 deletions py/mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -1784,46 +1784,39 @@ typedef double mp_float_t;
#endif

/*****************************************************************************/
/* Hooks for a port to wrap functions with attributes */
/* Hooks for a port to wrap functions with performance-tuning attributes */

#ifndef MICROPY_WRAP_MP_BINARY_OP
#define MICROPY_WRAP_MP_BINARY_OP(f) f
#endif

#ifndef MICROPY_WRAP_MP_EXECUTE_BYTECODE
#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) f
#endif

#ifndef MICROPY_WRAP_MP_LOAD_GLOBAL
#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) f
#endif

#ifndef MICROPY_WRAP_MP_LOAD_NAME
#define MICROPY_WRAP_MP_LOAD_NAME(f) f
#endif

#ifndef MICROPY_WRAP_MP_MAP_LOOKUP
#define MICROPY_WRAP_MP_MAP_LOOKUP(f) f
#ifndef MICROPY_APPLY_COMPILER_OPTIMISATIONS
#if defined(__GNUC__) && !defined(__clang__)
// Enable -O3 optimisations.
#define MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) __attribute__((optimize("O2"))) f
#else
// Unsupported on other compilers, will use global optimisation setting (typically -Os).
#define MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) f
#endif

#ifndef MICROPY_WRAP_MP_OBJ_GET_TYPE
#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) f
#endif

#ifndef MICROPY_WRAP_MP_SCHED_EXCEPTION
#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) f
// Ideally apply full compiler optimisations and place in RAM.
// Use this on small functions that need the highest possible performance.
#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_1
#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f)
#endif

#ifndef MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT
#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) f
// Ideally apply full compiler optimisations and optionally place in RAM (if IRAM available).
// Use this on larger functions that should go in RAM if possible.
#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_2
#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f)
#endif

#ifndef MICROPY_WRAP_MP_SCHED_SCHEDULE
#define MICROPY_WRAP_MP_SCHED_SCHEDULE(f) f
// Ideally apply full compiler optimisation if flash available.
// Use this on functions that are not important enough to place in RAM.
#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_3
#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f)
#endif

#ifndef MICROPY_WRAP_MP_SCHED_VM_ABORT
#define MICROPY_WRAP_MP_SCHED_VM_ABORT(f) f
// Ideally apply full compiler optimisation if flash available (but lower priority than level 3).
#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_4
#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) f
#endif

/*****************************************************************************/
Expand Down
Loading
0