8000 py/gc & esp32: Add MICROPY_GC_SPLIT_HEAP_AUTO to automatically grow heap on esp32 by projectgus · Pull Request #12141 · micropython/micropython · GitHub
[go: up one dir, main page]

Skip to content

py/gc & esp32: Add MICROPY_GC_SPLIT_HEAP_AUTO to automatically grow heap on esp32 #12141

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions docs/library/esp32.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,20 @@ Functions
buffers and other data. This data is useful to get a sense of how much memory
is available to ESP-IDF and the networking stack in particular. It may shed
some light on situations where ESP-IDF operations fail due to allocation failures.
The information returned is *not* useful to troubleshoot Python allocation failures,
use `micropython.mem_info()` instead.

The capabilities parameter corresponds to ESP-IDF's ``MALLOC_CAP_XXX`` values but the
two most useful ones are predefined as `esp32.HEAP_DATA` for data heap regions and
`esp32.HEAP_EXEC` for executable regions as used by the native code emitter.

Free IDF heap memory in the `esp32.HEAP_DATA` region is available to be
automatically added to the MicroPython heap to prevent a MicroPython
allocation from failing. However, the information returned here is otherwise
*not* useful to troubleshoot Python allocation failures, use
`micropython.mem_info()` instead. The "max new split" value in
`micropython.mem_info()` output corresponds to the largest free block of
ESP-IDF heap that could be automatically added on demand to the MicroPython
heap.

The return value is a list of 4-tuples, where each 4-tuple corresponds to one heap
and contains: the total bytes, the free bytes, the largest free block, and
the minimum free seen over time.
Expand Down
9 changes: 9 additions & 0 deletions ports/esp32/boards/sdkconfig.base
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ CONFIG_LWIP_PPP_CHAP_SUPPORT=y
# Use 4kiB output buffer instead of default 16kiB
CONFIG_MBEDTLS_ASYMMETRIC_CONTENT_LEN=y

# Allow mbedTLS to allocate from PSRAM or internal memory
#
# (The ESP-IDF default is internal-only, partly for physical security to prevent
# possible information leakage from unencrypted PSRAM contents on the original
# ESP32 - no PSRAM encryption on that chip. MicroPython doesn't support flash
# encryption and is already storing the Python heap in PSRAM so this isn't a
# significant factor in overall physical security.)
CONFIG_MBEDTLS_DEFAULT_MEM_ALLOC=y

# ULP coprocessor support
# Only on: ESP32, ESP32S2, ESP32S3
CONFIG_ULP_COPROC_ENABLED=y
Expand Down
6 changes: 5 additions & 1 deletion ports/esp32/boards/sdkconfig.spiram
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
CONFIG_SPIRAM=y
CONFIG_SPIRAM_CACHE_WORKAROUND=y
CONFIG_SPIRAM_IGNORE_NOTFOUND=y
CONFIG_SPIRAM_USE_CAPS_ALLOC=y
CONFIG_SPIRAM_USE_MALLOC=y

# This is the threshold for preferring small allocations from internal memory
# first, before failing over to PSRAM.
CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=8192

# SPIRAM increases the size of the firmware and overflows iram0_0_seg, due
# to PSRAM bug workarounds. Apply some options to reduce the firmware size.
Expand Down
6 changes: 5 additions & 1 deletion ports/esp32/boards/sdkconfig.spiram_sx
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,8 @@ CONFIG_SPIRAM_SPEED_80M=y
CONFIG_SPIRAM=y
CONFIG_SPIRAM_BOOT_INIT=y
CONFIG_SPIRAM_IGNORE_NOTFOUND=y
CONFIG_SPIRAM_USE_CAPS_ALLOC=y
CONFIG_SPIRAM_USE_MALLOC=y

# This is the threshold for preferring small allocations from internal memory
# first, before failing over to PSRAM.
CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=8192
10 changes: 10 additions & 0 deletions ports/esp32/gccollect.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,13 @@ void gc_collect(void) {
}

#endif

#if MICROPY_GC_SPLIT_HEAP_AUTO

// The largest new region that is available to become Python heap is the largest
// free block in the ESP-IDF system heap.
size_t gc_get_max_new_split(void) {
return heap_caps_get_largest_free_block(MALLOC_CAP_DEFAULT);
}

#endif
14 changes: 6 additions & 8 deletions ports/esp32/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@
#define MP_TASK_STACK_LIMIT_MARGIN (1024)
#endif

// Initial Python heap size. This starts small but adds new heap areas on
// demand due to settings MICROPY_GC_SPLIT_HEAP & MICROPY_GC_SPLIT_HEAP_AUTO
#define MP_TASK_HEAP_SIZE (64 * 1024)

int vprintf_null(const char *format, va_list ap) {
// do nothing: this is used as a log target during raw repl mode
return 0;
Expand All @@ -100,19 +104,13 @@ void mp_task(void *pvParameter) {
ESP_LOGE("esp_init", "can't create event loop: 0x%x\n", err);
}

// Allocate the uPy heap using malloc and get the largest available region,
// limiting to 1/2 total available memory to leave memory for the OS.
// When SPIRAM is enabled, this will allocate from SPIRAM.
uint32_t caps = MALLOC_CAP_8BIT;
size_t heap_total = heap_caps_get_total_size(caps);
size_t mp_task_heap_size = MIN(heap_caps_get_largest_free_block(caps), heap_total / 2);
void *mp_task_heap = heap_caps_malloc(mp_task_heap_size, caps);
void *mp_task_heap = MP_PLAT_ALLOC_HEAP(MP_TASK_HEAP_SIZE);

soft_reset:
// initialise the stack pointer for the main thread
mp_stack_set_top((void *)sp);
mp_stack_set_limit(MP_TASK_STACK_SIZE - MP_TASK_STACK_LIMIT_MARGIN);
gc_init(mp_task_heap, mp_task_heap + mp_task_heap_size);
gc_init(mp_task_heap, mp_task_heap + MP_TASK_HEAP_SIZE);
mp_init();
mp_obj_list_append(mp_sys_path, MP_OBJ_NEW_QSTR(MP_QSTR__slash_lib));
readline_init0();
Expand Down
3 changes: 3 additions & 0 deletions ports/esp32/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@
#define MICROPY_PY_THREAD_GIL (1)
#define MICROPY_PY_THREAD_GIL_VM_DIVISOR (32)

#define MICROPY_GC_SPLIT_HEAP (1)
#define MICROPY_GC_SPLIT_HEAP_AUTO (1)

// extended modules
#ifndef MICROPY_ESPNOW
#define MICROPY_ESPNOW (1)
Expand Down
131 changes: 124 additions & 7 deletions py/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
#define ATB_3_IS_FREE(a) (((a) & ATB_MASK_3) == 0)

#if MICROPY_GC_SPLIT_HEAP
#define NEXT_AREA(area) (area->next)
#define NEXT_AREA(area) ((area)->next)
#else
#define NEXT_AREA(area) (NULL)
#endif
Expand Down Expand Up @@ -129,7 +129,13 @@ STATIC void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
// => T = A * (1 + BLOCKS_PER_ATB / BLOCKS_PER_FTB + BLOCKS_PER_ATB * BYTES_PER_BLOCK)
size_t total_byte_len = (byte *)end - (byte *)start;
#if MICROPY_ENABLE_FINALISER
area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE) * MP_BITS_PER_BYTE / (MP_BITS_PER_BYTE + MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_FTB + MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK);
area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE)
* MP_BITS_PER_BYTE
/ (
MP_BITS_PER_BYTE
+ MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_FTB
+ MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK
);
#else
area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE) / (1 + MP_BITS_PER_BYTE / 2 * BYTES_PER_BLOCK);
#endif
Expand Down Expand Up @@ -165,11 +171,19 @@ STATIC void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
#endif

DEBUG_printf("GC layout:\n");
DEBUG_printf(" alloc table at %p, length " UINT_FMT " bytes, " UINT_FMT " blocks\n", MP_STATE_MEM(area).gc_alloc_table_start, MP_STATE_MEM(area).gc_alloc_table_byte_len, MP_STATE_MEM(area).gc_alloc_table_byte_len * BLOCKS_PER_ATB);
DEBUG_printf(" alloc table at %p, length " UINT_FMT " bytes, "
UINT_FMT " blocks\n",
area->gc_alloc_table_start, area->gc_alloc_table_byte_len,
area->gc_alloc_table_byte_len * BLOCKS_PER_ATB);
#if MICROPY_ENABLE_FINALISER
DEBUG_printf(" finaliser table at %p, length " UINT_FMT " bytes, " UINT_FMT " blocks\n", MP_STATE_MEM(area).gc_finaliser_table_start, gc_finaliser_table_byte_len, gc_finaliser_table_byte_len * BLOCKS_PER_FTB);
DEBUG_printf(" finaliser table at %p, length " UINT_FMT " bytes, "
UINT_FMT " blocks\n", area->gc_finaliser_table_start,
gc_finaliser_table_byte_len,
gc_finaliser_table_byte_len * BLOCKS_PER_FTB);
#endif
DEBUG_printf(" pool at %p, length " UINT_FMT " bytes, " UINT_FMT " blocks\n", MP_STATE_MEM(area).gc_pool_start, gc_pool_block_len * BYTES_PER_BLOCK, gc_pool_block_len);
DEBUG_printf(" pool at %p, length " UINT_FMT " bytes, "
UINT_FMT " blocks\n", area->gc_pool_start,
gc_pool_block_len * BYTES_PER_BLOCK, gc_pool_block_len);
}

void gc_init(void *start, void *end) {
Expand Down Expand Up @@ -222,6 +236,83 @@ void gc_add(void *start, void *end) {
// Add this area to the linked list
prev_area->next = area;
}

#if MICROPY_GC_SPLIT_HEAP_AUTO
// Try to automatically add a heap area large enough to fulfill 'failed_alloc'.
STATIC bool gc_try_add_heap(size_t failed_alloc) {
// 'needed' is the size of a heap large enough to hold failed_alloc, with
// the additional metadata overheads as calculated in gc_setup_area().
//
// Rather than reproduce all of that logic here, we approximate that adding
// (13/512) is enough overhead for sufficiently large heap areas (the
// overhead converges to 3/128, but there's some fixed overhead and some
// rounding up of partial block sizes).
size_t needed = failed_alloc + MAX(2048, failed_alloc * 13 / 512);

size_t avail = gc_get_max_new_split();

DEBUG_printf("gc_try_add_heap failed_alloc " UINT_FMT ", "
"needed " UINT_FMT ", avail " UINT_FMT " bytes \n",
failed_alloc,
needed,
avail);

if (avail < needed) {
// Can't fit this allocation, or system heap has nearly run out anyway
return false;
}

// Deciding how much to grow the total heap by each time is tricky:
//
// - Grow by too small amounts, leads to heap fragmentation issues.
//
// - Grow by too large amounts, may lead to system heap running out of
// space.
//
// Currently, this implementation is:
//
// - At minimum, aim to double the total heap size each time we add a new
// heap. i.e. without any large single allocations, total size will be
// 64KB -> 128KB -> 256KB -> 512KB -> 1MB, etc
//
// - If the failed allocation is too large to fit in that size, the new
// heap is made exactly large enough for that allocation. Future growth
// will double the total heap size again.
//
// - If the new heap won't fit in the available free space, add the largest
// new heap that will fit (this may lead to failed system heap allocations
// elsewhere, but some allocation will likely fail in this circumstance!)
size_t total_heap = 0;
for (mp_state_mem_area_t *area = &MP_STATE_MEM(area);
area != NULL;
area = NEXT_AREA(area)) {
total_heap += area->gc_pool_end - area->gc_alloc_table_start;
total_heap += ALLOC_TABLE_GAP_BYTE + sizeof(mp_state_mem_area_t);
}

DEBUG_printf("total_heap " UINT_FMT " bytes\n", total_heap);

size_t to_alloc = MIN(avail, MAX(total_heap, needed));

mp_state_mem_area_t *new_heap = MP_PLAT_ALLOC_HEAP(to_alloc);

DEBUG_printf("MP_PLAT_ALLOC_HEAP " UINT_FMT " = %p\n",
to_alloc, new_heap);

if (new_heap == NULL) {
// This should only fail:
// - In a threaded environment if another thread has
// allocated while this function ran.
// - If there is a bug in gc_get_max_new_split().
return false;
}

gc_add(new_heap, (void *)new_heap + to_alloc);

return true;
}
#endif

#endif

void gc_lock(void) {
Expand Down Expand Up @@ -378,6 +469,9 @@ STATIC void gc_sweep(void) {
#endif
// free unmarked heads and their tails
int free_tail = 0;
#if MICROPY_GC_SPLIT_HEAP_AUTO
mp_state_mem_area_t *prev_area = NULL;
#endif
for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) {
size_t end_block = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB;
if (area->gc_last_used_block < end_block) {
Expand Down Expand Up @@ -440,6 +534,17 @@ STATIC void gc_sweep(void) {
}

area->gc_last_used_block = last_used_block;

#if MICROPY_GC_SPLIT_HEAP_AUTO
// Free any empty area, aside from the first one
if (last_used_block == 0 && prev_area != NULL) {
DEBUG_printf("gc_sweep free empty area %p\n", area);
NEXT_AREA(prev_area) = NEXT_AREA(area);
MP_PLAT_FREE_HEAP(area);
area = prev_area;
}
prev_area = area;
#endif
}
}

Expand Down Expand Up @@ -622,6 +727,9 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) {
size_t start_block;
size_t n_free;
int collected = !MP_STATE_MEM(gc_auto_collect_enabled);
#if MICROPY_GC_SPLIT_HEAP_AUTO
bool added = false;
#endif

#if MICROPY_GC_ALLOC_THRESHOLD
if (!collected && MP_STATE_MEM(gc_alloc_amount) >= MP_STATE_MEM(gc_alloc_threshold)) {
Expand Down Expand Up @@ -667,6 +775,12 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) {
GC_EXIT();
// nothing found!
if (collected) {
#if MICROPY_GC_SPLIT_HEAP_AUTO
if (!added && gc_try_add_heap(n_bytes)) {
added = true;
continue;
}
#endif
return NULL;
}
DEBUG_printf("gc_alloc(" UINT_FMT "): no free mem, triggering GC\n", n_bytes);
Expand Down Expand Up @@ -1042,9 +1156,12 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
void gc_dump_info(const mp_print_t *print) {
gc_info_t info;
gc_info(&info);
mp_printf(print, "GC: total: %u, used: %u, free: %u\n",
mp_printf(print, "GC: total: %u, used: %u, free: %u",
(uint)info.total, (uint)info.used, (uint)info.free);
mp_printf(print, " No. of 1-blocks: %u, 2-blocks: %u, max blk sz: %u, max free sz: %u\n",
#if MICROPY_GC_SPLIT_HEAP_AUTO
mp_printf(print, ", max new split: %u", (uint)gc_get_max_new_split());
#endif
mp_printf(print, "\n No. of 1-blocks: %u, 2-blocks: %u, max blk sz: %u, max free sz: %u\n",
(uint)info.num_1block, (uint)info.num_2block, (uint)info.max_block, (uint)info.max_free);
}

Expand Down
8 changes: 7 additions & 1 deletion py/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@ void gc_init(void *start, void *end);
#if MICROPY_GC_SPLIT_HEAP
// Used to add additional memory areas to the heap.
void gc_add(void *start, void *end);
#endif

#if MICROPY_GC_SPLIT_HEAP_AUTO
// Port must implement this function to return the maximum available block of
// RAM to allocate a new heap area into using MP_PLAT_ALLOC_HEAP.
size_t gc_get_max_new_split(void);
#endif // MICROPY_GC_SPLIT_HEAP_AUTO
#endif // MICROPY_GC_SPLIT_HEAP

// These lock/unlock functions can be nested.
// They can be used to prevent the GC from allocating/freeing.
Expand Down
15 changes: 15 additions & 0 deletions py/mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,11 @@
#define MICROPY_GC_SPLIT_HEAP (0)
#endif

// Whether regions should be added/removed from the split heap as needed.
#ifndef MICROPY_GC_SPLIT_HEAP_AUTO
#define MICROPY_GC_SPLIT_HEAP_AUTO (0)
#endif

// Hook to run code during time consuming garbage collector operations
// *i* is the loop index variable (e.g. can be used to run every x loops)
#ifndef MICROPY_GC_HOOK_LOOP
Expand Down Expand Up @@ -1896,6 +1901,16 @@ typedef double mp_float_t;
#define MP_PLAT_FREE_EXEC(ptr, size) m_del(byte, ptr, size)
#endif

// Allocating new heap area at runtime requires port to be able to allocate from system heap
#if MICROPY_GC_SPLIT_HEAP_AUTO
#ifndef MP_PLAT_ALLOC_HEAP
#define MP_PLAT_ALLOC_HEAP(size) malloc(size)
#endif
#ifndef MP_PLAT_FREE_HEAP
#define MP_PLAT_FREE_HEAP(ptr) free(ptr)
#endif
#endif

// This macro is used to do all output (except when MICROPY_PY_IO is defined)
#ifndef MP_PLAT_PRINT_STRN
#define MP_PLAT_PRINT_STRN(str, len) mp_hal_stdout_tx_strn_cooked(str, len)
Expand Down
0