From 871dc4c13bc84bdb93a30d5d5c3d59ac8fe1de17 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 17 Jun 2024 14:59:47 +0300 Subject: [PATCH 01/11] First try to fix.. --- Include/cpython/optimizer.h | 1 + Python/optimizer.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index f2093a1e5f6aa4..cc7a2d6bc4e1b9 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -109,6 +109,7 @@ PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int o void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); void _Py_ExecutorDetach(_PyExecutorObject *); +int _Py_ExecutorClear(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); diff --git a/Python/optimizer.c b/Python/optimizer.c index 4dc3438b6c23a4..07103bd4ab5d52 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1617,8 +1617,9 @@ _Py_ExecutorDetach(_PyExecutorObject *executor) Py_DECREF(executor); } -static int -executor_clear(_PyExecutorObject *executor) + +int +_Py_ExecutorClear(_PyExecutorObject *executor) { if (!executor->vm_data.valid) { return 0; @@ -1644,6 +1645,11 @@ executor_clear(_PyExecutorObject *executor) Py_DECREF(executor); return 0; } +static int +executor_clear(_PyExecutorObject *executor) +{ + return _Py_ExecutorClear(executor); +} void _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) From 08c26d3fd30fa29b3ddc8deb00c10540c09ab3d9 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 17 Jun 2024 18:38:33 +0300 Subject: [PATCH 02/11] Forgot to commit codeobject.c --- Objects/codeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index e3e306bfe810c4..d06967bfc4a6ea 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1607,7 +1607,7 @@ clear_executors(PyCodeObject *co) assert(co->co_executors); for (int i = 0; i < co->co_executors->size; i++) { if (co->co_executors->executors[i]) { - _Py_ExecutorDetach(co->co_executors->executors[i]); + _Py_ExecutorClear(co->co_executors->executors[i]); assert(co->co_executors->executors[i] == NULL); } } From 08cb747c833d9332b6aa53a32fc025f10cf28ec1 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 23 Jun 2024 18:50:01 +0000 Subject: [PATCH 03/11] Add a newline --- Python/optimizer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index deaadbcda0b758..b09334bd1c5eeb 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -537,7 +537,7 @@ add_to_trace( // Reserve space for N uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE #define RESERVE(needed) RESERVE_RAW((needed) + 3, _PyUOpName(opcode)) -// Trace stack operations (used by _PUSH_FRAME, _RETURN_VALUE) +// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME) #define TRACE_STACK_PUSH() \ if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ @@ -748,10 +748,10 @@ translate_bytecode_to_trace( int nuops = expansion->nuops; RESERVE(nuops + 1); /* One extra for exit */ int16_t last_op = expansion->uops[nuops-1].uop; - if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { + if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { // Check for trace stack underflow now: // We can't bail e.g. in the middle of - // LOAD_CONST + _RETURN_VALUE. + // LOAD_CONST + _POP_FRAME. if (trace_stack_depth == 0) { DPRINTF(2, "Trace stack underflow\n"); OPT_STAT_INC(trace_stack_underflow); @@ -810,7 +810,7 @@ translate_bytecode_to_trace( Py_FatalError("garbled expansion"); } - if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { + if (uop == _POP_FRAME || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { TRACE_STACK_POP(); /* Set the operand to the function or code object returned to, * to assist optimization passes. (See _PUSH_FRAME below.) @@ -1644,7 +1644,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) _Py_ExecutorDetach(executor); Py_DECREF(executor); return 0; -} +}] static int executor_clear(_PyExecutorObject *executor) { From 9926e1c01321d37e75b7ff4a379b079e8fa77d8f Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 23 Jun 2024 18:57:57 +0000 Subject: [PATCH 04/11] Add a newline (2) --- Python/optimizer.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index b09334bd1c5eeb..137126d43095c8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -537,7 +537,7 @@ add_to_trace( // Reserve space for N uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE #define RESERVE(needed) RESERVE_RAW((needed) + 3, _PyUOpName(opcode)) -// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME) +// Trace stack operations (used by _PUSH_FRAME, _RETURN_VALUE) #define TRACE_STACK_PUSH() \ if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ @@ -748,10 +748,10 @@ translate_bytecode_to_trace( int nuops = expansion->nuops; RESERVE(nuops + 1); /* One extra for exit */ int16_t last_op = expansion->uops[nuops-1].uop; - if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { + if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { // Check for trace stack underflow now: // We can't bail e.g. in the middle of - // LOAD_CONST + _POP_FRAME. + // LOAD_CONST + _RETURN_VALUE. if (trace_stack_depth == 0) { DPRINTF(2, "Trace stack underflow\n"); OPT_STAT_INC(trace_stack_underflow); @@ -810,7 +810,7 @@ translate_bytecode_to_trace( Py_FatalError("garbled expansion"); } - if (uop == _POP_FRAME || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { + if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { TRACE_STACK_POP(); /* Set the operand to the function or code object returned to, * to assist optimization passes. (See _PUSH_FRAME below.) @@ -1644,7 +1644,8 @@ _Py_ExecutorClear(_PyExecutorObject *executor) _Py_ExecutorDetach(executor); Py_DECREF(executor); return 0; -}] +} + static int executor_clear(_PyExecutorObject *executor) { From 326401bff6c11dc82e7ff622538d5acb5a465bb9 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Fri, 25 Oct 2024 20:57:12 +0300 Subject: [PATCH 05/11] Clear child executors from side exits Co-authored-by: Brandt Bucher --- Python/optimizer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/optimizer.c b/Python/optimizer.c index df6b43a820da9f..f001f304688f7a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -257,6 +257,10 @@ uop_dealloc(_PyExecutorObject *self) { _PyObject_GC_UNTRACK(self); assert(self->vm_data.code == NULL); unlink_executor(self); + for (uint32_t i = 0; i < self->exit_count; i++) { + self->exits[i].temperature = initial_unreachable_backoff_counter(); + Py_CLEAR(self->exits[i].executor); + } #ifdef _Py_JIT _PyJIT_Free(self); #endif From a7957fa3845ca16fea548d94df69142355046e34 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 12 Jan 2025 13:11:54 +0200 Subject: [PATCH 06/11] Remove unnecessary decref --- Python/optimizer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 95180e55af2506..9e6c2b058d5cd6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1387,7 +1387,6 @@ counter_optimize( if (executor == NULL) { return -1; } - Py_INCREF(self); Py_SET_TYPE(executor, &_PyCounterExecutor_Type); *exec_ptr = executor; return 1; From 7d908441e9fe83ae317edcc17ea50514693101fc Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 12 Jan 2025 13:54:56 +0200 Subject: [PATCH 07/11] Fix incorrect conversion specifier --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8bab4ea16b629b..5d0a754337af3c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4937,7 +4937,7 @@ dummy_func( if (lltrace >= 2) { printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); @@ -5065,7 +5065,7 @@ dummy_func( if (lltrace >= 2) { printf("DYNAMIC EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e40fa88be89172..2b704c89f7a27b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5864,7 +5864,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); @@ -6082,7 +6082,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); printf("DYNAMIC EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", + printf(", exit %ld, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); From f2cb39d382e3ff8b631fc320ada2c39d3e79cc46 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 14 Jan 2025 12:02:29 +0200 Subject: [PATCH 08/11] Steal reference instead of increfing --- Include/internal/pycore_uop_ids.h | 117 +++++++++++++------------ Include/internal/pycore_uop_metadata.h | 4 + Python/bytecodes.c | 4 + Python/executor_cases.c.h | 10 +++ Python/optimizer.c | 3 +- Python/optimizer_cases.c.h | 9 ++ 6 files changed, 88 insertions(+), 59 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 21690a28839565..e6fd186f1f261a 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -205,100 +205,101 @@ extern "C" { #define _LOAD_CONST_INLINE 429 #define _LOAD_CONST_INLINE_BORROW 430 #define _LOAD_CONST_INLINE_BORROW_WITH_NULL 431 -#define _LOAD_CONST_INLINE_WITH_NULL 432 +#define _LOAD_CONST_INLINE_STEAL 432 +#define _LOAD_CONST_INLINE_WITH_NULL 433 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 433 -#define _LOAD_FAST_0 434 -#define _LOAD_FAST_1 435 -#define _LOAD_FAST_2 436 -#define _LOAD_FAST_3 437 -#define _LOAD_FAST_4 438 -#define _LOAD_FAST_5 439 -#define _LOAD_FAST_6 440 -#define _LOAD_FAST_7 441 +#define _LOAD_FAST 434 +#define _LOAD_FAST_0 435 +#define _LOAD_FAST_1 436 +#define _LOAD_FAST_2 437 +#define _LOAD_FAST_3 438 +#define _LOAD_FAST_4 439 +#define _LOAD_FAST_5 440 +#define _LOAD_FAST_6 441 +#define _LOAD_FAST_7 442 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 442 -#define _LOAD_GLOBAL_BUILTINS 443 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 444 -#define _LOAD_GLOBAL_MODULE 445 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 446 +#define _LOAD_GLOBAL 443 +#define _LOAD_GLOBAL_BUILTINS 444 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 445 +#define _LOAD_GLOBAL_MODULE 446 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 447 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 447 -#define _LOAD_SMALL_INT_0 448 -#define _LOAD_SMALL_INT_1 449 -#define _LOAD_SMALL_INT_2 450 -#define _LOAD_SMALL_INT_3 451 +#define _LOAD_SMALL_INT 448 +#define _LOAD_SMALL_INT_0 449 +#define _LOAD_SMALL_INT_1 450 +#define _LOAD_SMALL_INT_2 451 +#define _LOAD_SMALL_INT_3 452 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 452 +#define _MAKE_CALLARGS_A_TUPLE 453 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 453 +#define _MAKE_WARM 454 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 454 -#define _MAYBE_EXPAND_METHOD_KW 455 -#define _MONITOR_CALL 456 -#define _MONITOR_JUMP_BACKWARD 457 -#define _MONITOR_RESUME 458 +#define _MAYBE_EXPAND_METHOD 455 +#define _MAYBE_EXPAND_METHOD_KW 456 +#define _MONITOR_CALL 457 +#define _MONITOR_JUMP_BACKWARD 458 +#define _MONITOR_RESUME 459 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 459 -#define _POP_JUMP_IF_TRUE 460 +#define _POP_JUMP_IF_FALSE 460 +#define _POP_JUMP_IF_TRUE 461 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 461 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 462 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 462 +#define _PUSH_FRAME 463 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 463 -#define _PY_FRAME_KW 464 -#define _QUICKEN_RESUME 465 -#define _REPLACE_WITH_TRUE 466 +#define _PY_FRAME_GENERAL 464 +#define _PY_FRAME_KW 465 +#define _QUICKEN_RESUME 466 +#define _REPLACE_WITH_TRUE 467 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 467 -#define _SEND 468 -#define _SEND_GEN_FRAME 469 +#define _SAVE_RETURN_OFFSET 468 +#define _SEND 469 +#define _SEND_GEN_FRAME 470 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 470 -#define _STORE_ATTR 471 -#define _STORE_ATTR_INSTANCE_VALUE 472 -#define _STORE_ATTR_SLOT 473 -#define _STORE_ATTR_WITH_HINT 474 +#define _START_EXECUTOR 471 +#define _STORE_ATTR 472 +#define _STORE_ATTR_INSTANCE_VALUE 473 +#define _STORE_ATTR_SLOT 474 +#define _STORE_ATTR_WITH_HINT 475 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 475 -#define _STORE_FAST_0 476 -#define _STORE_FAST_1 477 -#define _STORE_FAST_2 478 -#define _STORE_FAST_3 479 -#define _STORE_FAST_4 480 -#define _STORE_FAST_5 481 -#define _STORE_FAST_6 482 -#define _STORE_FAST_7 483 +#define _STORE_FAST 476 +#define _STORE_FAST_0 477 +#define _STORE_FAST_1 478 +#define _STORE_FAST_2 479 +#define _STORE_FAST_3 480 +#define _STORE_FAST_4 481 +#define _STORE_FAST_5 482 +#define _STORE_FAST_6 483 +#define _STORE_FAST_7 484 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 484 -#define _STORE_SUBSCR 485 +#define _STORE_SLICE 485 +#define _STORE_SUBSCR 486 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 486 -#define _TO_BOOL 487 +#define _TIER2_RESUME_CHECK 487 +#define _TO_BOOL 488 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -308,13 +309,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 488 +#define _UNPACK_SEQUENCE 489 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 488 +#define MAX_UOP_ID 489 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 83e578cdd76fbd..7ac9541b70c9e7 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -278,6 +278,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_EXIT_TRACE] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_STEAL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, @@ -479,6 +480,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = "_LOAD_CONST_INLINE_BORROW_WITH_NULL", + [_LOAD_CONST_INLINE_STEAL] = "_LOAD_CONST_INLINE_STEAL", [_LOAD_CONST_INLINE_WITH_NULL] = "_LOAD_CONST_INLINE_WITH_NULL", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", @@ -1103,6 +1105,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_CONST_INLINE: return 0; + case _LOAD_CONST_INLINE_STEAL: + return 0; case _LOAD_CONST_INLINE_BORROW: return 0; case _POP_TOP_LOAD_CONST_INLINE_BORROW: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5d0a754337af3c..b49b1b8e8e10a9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4989,6 +4989,10 @@ dummy_func( value = PyStackRef_FromPyObjectNew(ptr); } + tier2 pure op(_LOAD_CONST_INLINE_STEAL, (ptr/4 -- value)) { + value = PyStackRef_FromPyObjectSteal(ptr); + } + tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectImmortal(ptr); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2b704c89f7a27b..deee9240afe69b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5930,6 +5930,16 @@ break; } + case _LOAD_CONST_INLINE_STEAL: { + _PyStackRef value; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + value = PyStackRef_FromPyObjectSteal(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_CONST_INLINE_BORROW: { _PyStackRef value; PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 9e6c2b058d5cd6..858d9c70407a14 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1379,7 +1379,7 @@ counter_optimize( _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg; _PyUOpInstruction buffer[4] = { { .opcode = _START_EXECUTOR, .jump_target = 3, .format=UOP_FORMAT_JUMP }, - { .opcode = _LOAD_CONST_INLINE, .operand0 = (uintptr_t)self }, + { .opcode = _LOAD_CONST_INLINE_STEAL, .operand0 = (uintptr_t)self }, { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER }, { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)), .format=UOP_FORMAT_TARGET } }; @@ -1387,6 +1387,7 @@ counter_optimize( if (executor == NULL) { return -1; } + Py_INCREF(self); Py_SET_TYPE(executor, &_PyCounterExecutor_Type); *exec_ptr = executor; return 1; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index c72ae7b6281e80..334993953934e1 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2535,6 +2535,15 @@ break; } + case _LOAD_CONST_INLINE_STEAL: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_CONST_INLINE_BORROW: { _Py_UopsSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; From 09f7314b9551c55e49a8a459b58c8767b5f5609d Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 20 Jan 2025 15:34:10 +0200 Subject: [PATCH 09/11] Revert changes because counter optimizer has been removed --- Include/internal/pycore_uop_ids.h | 117 ++++++++++++------------- Include/internal/pycore_uop_metadata.h | 4 - Python/bytecodes.c | 4 - Python/executor_cases.c.h | 10 --- Python/optimizer.c | 2 +- Python/optimizer_cases.c.h | 9 -- 6 files changed, 59 insertions(+), 87 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index e6fd186f1f261a..21690a28839565 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -205,101 +205,100 @@ extern "C" { #define _LOAD_CONST_INLINE 429 #define _LOAD_CONST_INLINE_BORROW 430 #define _LOAD_CONST_INLINE_BORROW_WITH_NULL 431 -#define _LOAD_CONST_INLINE_STEAL 432 -#define _LOAD_CONST_INLINE_WITH_NULL 433 +#define _LOAD_CONST_INLINE_WITH_NULL 432 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 434 -#define _LOAD_FAST_0 435 -#define _LOAD_FAST_1 436 -#define _LOAD_FAST_2 437 -#define _LOAD_FAST_3 438 -#define _LOAD_FAST_4 439 -#define _LOAD_FAST_5 440 -#define _LOAD_FAST_6 441 -#define _LOAD_FAST_7 442 +#define _LOAD_FAST 433 +#define _LOAD_FAST_0 434 +#define _LOAD_FAST_1 435 +#define _LOAD_FAST_2 436 +#define _LOAD_FAST_3 437 +#define _LOAD_FAST_4 438 +#define _LOAD_FAST_5 439 +#define _LOAD_FAST_6 440 +#define _LOAD_FAST_7 441 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 443 -#define _LOAD_GLOBAL_BUILTINS 444 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 445 -#define _LOAD_GLOBAL_MODULE 446 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 447 +#define _LOAD_GLOBAL 442 +#define _LOAD_GLOBAL_BUILTINS 443 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 444 +#define _LOAD_GLOBAL_MODULE 445 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 446 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 448 -#define _LOAD_SMALL_INT_0 449 -#define _LOAD_SMALL_INT_1 450 -#define _LOAD_SMALL_INT_2 451 -#define _LOAD_SMALL_INT_3 452 +#define _LOAD_SMALL_INT 447 +#define _LOAD_SMALL_INT_0 448 +#define _LOAD_SMALL_INT_1 449 +#define _LOAD_SMALL_INT_2 450 +#define _LOAD_SMALL_INT_3 451 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 453 +#define _MAKE_CALLARGS_A_TUPLE 452 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 454 +#define _MAKE_WARM 453 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 455 -#define _MAYBE_EXPAND_METHOD_KW 456 -#define _MONITOR_CALL 457 -#define _MONITOR_JUMP_BACKWARD 458 -#define _MONITOR_RESUME 459 +#define _MAYBE_EXPAND_METHOD 454 +#define _MAYBE_EXPAND_METHOD_KW 455 +#define _MONITOR_CALL 456 +#define _MONITOR_JUMP_BACKWARD 457 +#define _MONITOR_RESUME 458 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 460 -#define _POP_JUMP_IF_TRUE 461 +#define _POP_JUMP_IF_FALSE 459 +#define _POP_JUMP_IF_TRUE 460 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 462 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 461 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 463 +#define _PUSH_FRAME 462 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 464 -#define _PY_FRAME_KW 465 -#define _QUICKEN_RESUME 466 -#define _REPLACE_WITH_TRUE 467 +#define _PY_FRAME_GENERAL 463 +#define _PY_FRAME_KW 464 +#define _QUICKEN_RESUME 465 +#define _REPLACE_WITH_TRUE 466 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 468 -#define _SEND 469 -#define _SEND_GEN_FRAME 470 +#define _SAVE_RETURN_OFFSET 467 +#define _SEND 468 +#define _SEND_GEN_FRAME 469 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 471 -#define _STORE_ATTR 472 -#define _STORE_ATTR_INSTANCE_VALUE 473 -#define _STORE_ATTR_SLOT 474 -#define _STORE_ATTR_WITH_HINT 475 +#define _START_EXECUTOR 470 +#define _STORE_ATTR 471 +#define _STORE_ATTR_INSTANCE_VALUE 472 +#define _STORE_ATTR_SLOT 473 +#define _STORE_ATTR_WITH_HINT 474 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 476 -#define _STORE_FAST_0 477 -#define _STORE_FAST_1 478 -#define _STORE_FAST_2 479 -#define _STORE_FAST_3 480 -#define _STORE_FAST_4 481 -#define _STORE_FAST_5 482 -#define _STORE_FAST_6 483 -#define _STORE_FAST_7 484 +#define _STORE_FAST 475 +#define _STORE_FAST_0 476 +#define _STORE_FAST_1 477 +#define _STORE_FAST_2 478 +#define _STORE_FAST_3 479 +#define _STORE_FAST_4 480 +#define _STORE_FAST_5 481 +#define _STORE_FAST_6 482 +#define _STORE_FAST_7 483 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 485 -#define _STORE_SUBSCR 486 +#define _STORE_SLICE 484 +#define _STORE_SUBSCR 485 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 487 -#define _TO_BOOL 488 +#define _TIER2_RESUME_CHECK 486 +#define _TO_BOOL 487 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -309,13 +308,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 489 +#define _UNPACK_SEQUENCE 488 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 489 +#define MAX_UOP_ID 488 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7ac9541b70c9e7..83e578cdd76fbd 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -278,7 +278,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_EXIT_TRACE] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, - [_LOAD_CONST_INLINE_STEAL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, @@ -480,7 +479,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = "_LOAD_CONST_INLINE_BORROW_WITH_NULL", - [_LOAD_CONST_INLINE_STEAL] = "_LOAD_CONST_INLINE_STEAL", [_LOAD_CONST_INLINE_WITH_NULL] = "_LOAD_CONST_INLINE_WITH_NULL", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", @@ -1105,8 +1103,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_CONST_INLINE: return 0; - case _LOAD_CONST_INLINE_STEAL: - return 0; case _LOAD_CONST_INLINE_BORROW: return 0; case _POP_TOP_LOAD_CONST_INLINE_BORROW: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b49b1b8e8e10a9..5d0a754337af3c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4989,10 +4989,6 @@ dummy_func( value = PyStackRef_FromPyObjectNew(ptr); } - tier2 pure op(_LOAD_CONST_INLINE_STEAL, (ptr/4 -- value)) { - value = PyStackRef_FromPyObjectSteal(ptr); - } - tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = PyStackRef_FromPyObjectImmortal(ptr); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index deee9240afe69b..2b704c89f7a27b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5930,16 +5930,6 @@ break; } - case _LOAD_CONST_INLINE_STEAL: { - _PyStackRef value; - PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); - value = PyStackRef_FromPyObjectSteal(ptr); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - case _LOAD_CONST_INLINE_BORROW: { _PyStackRef value; PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 858d9c70407a14..95180e55af2506 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1379,7 +1379,7 @@ counter_optimize( _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg; _PyUOpInstruction buffer[4] = { { .opcode = _START_EXECUTOR, .jump_target = 3, .format=UOP_FORMAT_JUMP }, - { .opcode = _LOAD_CONST_INLINE_STEAL, .operand0 = (uintptr_t)self }, + { .opcode = _LOAD_CONST_INLINE, .operand0 = (uintptr_t)self }, { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER }, { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)), .format=UOP_FORMAT_TARGET } }; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 334993953934e1..c72ae7b6281e80 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2535,15 +2535,6 @@ break; } - case _LOAD_CONST_INLINE_STEAL: { - _Py_UopsSymbol *value; - value = sym_new_not_null(ctx); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - case _LOAD_CONST_INLINE_BORROW: { _Py_UopsSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; From b10a0c91ce28ab8d89dc24e90117b5cdd06a5100 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 20 Jan 2025 16:43:34 +0200 Subject: [PATCH 10/11] Revert unnecessary changes --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 663eefbf36c2ac..c0ef767a9dd68b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4964,7 +4964,7 @@ dummy_func( if (lltrace >= 2) { printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %ld, temp %d, target %d -> %s]\n", + printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); @@ -5085,7 +5085,7 @@ dummy_func( if (lltrace >= 2) { printf("DYNAMIC EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %ld, temp %d, target %d -> %s]\n", + printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 72e533b7cd4c1b..e2eaca2c90fa76 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6012,7 +6012,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %ld, temp %d, target %d -> %s]\n", + printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); @@ -6220,7 +6220,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); printf("DYNAMIC EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %ld, temp %d, target %d -> %s]\n", + printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); From 7ef50091a36199ab3a26df766f33c36bd0cdc553 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 13 Apr 2025 17:32:03 +0300 Subject: [PATCH 11/11] Resolve merge conflict --- Include/internal/pycore_optimizer.h | 2 +- Python/optimizer.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index db2bf3111f63e5..77a7809c417652 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -90,7 +90,7 @@ PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); void _Py_ExecutorDetach(_PyExecutorObject *); -int _Py_ExecutorClear(_PyExecutorObject *); +int _Py_ExecutorClear(PyObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); diff --git a/Python/optimizer.c b/Python/optimizer.c index 16b43c60e63bc6..2fe3d404137bb4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1429,7 +1429,7 @@ _Py_ExecutorDetach(_PyExecutorObject *executor) } int -_Py_ExecutorClear(_PyExecutorObject *executor) +_Py_ExecutorClear(PyObject *op) { _PyExecutorObject *executor = _PyExecutorObject_CAST(op); if (!executor->vm_data.valid) { @@ -1453,7 +1453,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) } static int -executor_clear(_PyExecutorObject *executor) +executor_clear(PyObject *executor) { return _Py_ExecutorClear(executor); }