From f8637ce644cc45044be839e8b3b81427521070f2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 17 Jul 2024 12:46:25 -0700 Subject: [PATCH 1/9] Allow one backward jump per trace, anywhere --- Objects/genobject.c | 2 -- Python/optimizer.c | 40 ++++++++++++++++++---------------------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/Objects/genobject.c b/Objects/genobject.c index c204ac04b480a4..eff9cf333aece0 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -342,8 +342,6 @@ _PyGen_yf(PyGenObject *gen) { if (gen->gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { _PyInterpreterFrame *frame = &gen->gi_iframe; - assert(is_resume(frame->instr_ptr)); - assert((frame->instr_ptr->op.arg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM); return PyStackRef_AsPyObjectNew(_PyFrame_StackPeek(frame)); } return NULL; diff --git a/Python/optimizer.c b/Python/optimizer.c index 73316b3587f221..081a76bf1ffb0d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -550,6 +550,7 @@ translate_bytecode_to_trace( } trace_stack[TRACE_STACK_SIZE]; int trace_stack_depth = 0; int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions + bool jump_seen = false; #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -577,6 +578,13 @@ translate_bytecode_to_trace( uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; + if (!progress_needed && instr == initial_instr) { + // We have looped round to the start + RESERVE(1); + ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); + goto done; + } + DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg); if (opcode == ENTER_EXECUTOR) { @@ -604,21 +612,11 @@ translate_bytecode_to_trace( * so that we can guarantee forward progress */ if (progress_needed) { progress_needed = false; - if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) { - instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg; - initial_instr = instr; - if (opcode == JUMP_BACKWARD) { - ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target); - } - continue; - } - else { - if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { - opcode = _PyOpcode_Deopt[opcode]; - } - assert(!OPCODE_HAS_EXIT(opcode)); - assert(!OPCODE_HAS_DEOPT(opcode)); + if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { + opcode = _PyOpcode_Deopt[opcode]; } + assert(!OPCODE_HAS_EXIT(opcode)); + assert(!OPCODE_HAS_DEOPT(opcode)); } if (OPCODE_HAS_EXIT(opcode)) { @@ -672,19 +670,17 @@ translate_bytecode_to_trace( } case JUMP_BACKWARD: + ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); case JUMP_BACKWARD_NO_INTERRUPT: { - _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg; - if (target == initial_instr) { - /* We have looped round to the start */ - RESERVE(1); - ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); - } - else { + instr += 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] - (int)oparg; + if (jump_seen) { OPT_STAT_INC(inner_loop); DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n"); + goto done; } - goto done; + jump_seen = true; + goto top; } case JUMP_FORWARD: From f1f5d13d738d4d3512fd872196826cc7525082cb Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 17 Jul 2024 12:51:18 -0700 Subject: [PATCH 2/9] Use progress, rather than trace length, to reject traces --- Python/optimizer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 081a76bf1ffb0d..d82f5b92184236 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -503,7 +503,7 @@ add_to_trace( if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ OPT_STAT_INC(trace_stack_overflow); \ - trace_length = 0; \ + progress_needed = true; \ goto done; \ } \ assert(func == NULL || func->func_code == (PyObject *)code); \ @@ -569,7 +569,6 @@ translate_bytecode_to_trace( ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); uint32_t target = 0; -top: // Jump here after _PUSH_FRAME or likely branches for (;;) { target = INSTR_IP(instr, code); // Need space for _DEOPT @@ -611,7 +610,6 @@ translate_bytecode_to_trace( /* Special case the first instruction, * so that we can guarantee forward progress */ if (progress_needed) { - progress_needed = false; if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { opcode = _PyOpcode_Deopt[opcode]; } @@ -888,6 +886,9 @@ translate_bytecode_to_trace( assert(instr->op.code == POP_TOP); instr++; } + top: + // Jump here after _PUSH_FRAME or likely branches. + progress_needed = false; } // End for (;;) done: @@ -895,16 +896,15 @@ translate_bytecode_to_trace( TRACE_STACK_POP(); } assert(code == initial_code); - // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE - if (progress_needed || trace_length < 5) { + // Skip short traces where we can't even translate a single instruction: + if (progress_needed) { OPT_STAT_INC(trace_too_short); DPRINTF(2, - "No trace for %s (%s:%d) at byte offset %d (%s)\n", + "No trace for %s (%s:%d) at byte offset %d (no progress)\n", PyUnicode_AsUTF8(code->co_qualname), PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, - 2 * INSTR_IP(initial_instr, code), - progress_needed ? "no progress" : "too short"); + 2 * INSTR_IP(initial_instr, code)); return 0; } if (trace[trace_length-1].opcode != _JUMP_TO_TOP) { From 159e46b5f9215155cb582bb5e3ab69fece064a7a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 24 Jul 2024 22:08:44 -0700 Subject: [PATCH 3/9] Add missing RESERVEs --- Python/optimizer.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index d82f5b92184236..4c6c66e708783b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -578,7 +578,7 @@ translate_bytecode_to_trace( uint32_t oparg = instr->op.arg; if (!progress_needed && instr == initial_instr) { - // We have looped round to the start + // We have looped around to the start: RESERVE(1); ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); goto done; @@ -618,11 +618,13 @@ translate_bytecode_to_trace( } if (OPCODE_HAS_EXIT(opcode)) { - // Make space for exit code + // Make space for side exit and final _EXIT_TRACE: + RESERVE_RAW(2, "_EXIT_TRACE"); max_length--; } if (OPCODE_HAS_ERROR(opcode)) { - // Make space for error code + // Make space for error stub and final _EXIT_TRACE: + RESERVE_RAW(2, "_ERROR_POP_N"); max_length--; } switch (opcode) { @@ -669,6 +671,7 @@ translate_bytecode_to_trace( case JUMP_BACKWARD: ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); + _Py_FALLTHROUGH; case JUMP_BACKWARD_NO_INTERRUPT: { instr += 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] - (int)oparg; From 511ec09538d61734476f16614822ff33a6c5236e Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 25 Jul 2024 13:31:03 -0700 Subject: [PATCH 4/9] Clean up overflow recovery --- Python/optimizer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 4c6c66e708783b..64be860874919a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -503,8 +503,7 @@ add_to_trace( if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ OPT_STAT_INC(trace_stack_overflow); \ - progress_needed = true; \ - goto done; \ + return 0; \ } \ assert(func == NULL || func->func_code == (PyObject *)code); \ trace_stack[trace_stack_depth].func = func; \ From ba3d68becca72bb2755d92334345aba0aa3bd5e5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 26 Jul 2024 11:20:55 -0700 Subject: [PATCH 5/9] Add asserts back and teach is_resume about ENTER_EXECUTOR --- Objects/genobject.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/Objects/genobject.c b/Objects/genobject.c index eff9cf333aece0..0e637a338200cf 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -10,6 +10,7 @@ #include "pycore_gc.h" // _PyGC_CLEAR_FINALIZED() #include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() +#include "pycore_opcode_metadata.h" // _PyOpcode_Deopt #include "pycore_opcode_utils.h" // RESUME_AFTER_YIELD_FROM #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_* #include "pycore_pyerrors.h" // _PyErr_ClearExcState() @@ -327,14 +328,23 @@ gen_close_iter(PyObject *yf) } static inline bool -is_resume(_Py_CODEUNIT *instr) +is_resume(_PyInterpreterFrame *frame, uint8_t *oparg_p) { - uint8_t code = FT_ATOMIC_LOAD_UINT8_RELAXED(instr->op.code); - return ( - code == RESUME || - code == RESUME_CHECK || - code == INSTRUMENTED_RESUME - ); + PyCodeObject *code = _PyFrame_GetCode(frame); + int offset = frame->instr_ptr - _PyCode_CODE(code); + uint8_t opcode = _Py_GetBaseOpcode(code, offset); + uint8_t oparg = frame->instr_ptr->op.arg; + if (opcode == ENTER_EXECUTOR) { + _PyExecutorObject *executor = _Py_GetExecutor(code, sizeof(_Py_CODEUNIT) * offset); + opcode = _PyOpcode_Deopt[executor->vm_data.opcode]; + oparg = executor->vm_data.oparg; + Py_DECREF(executor); + } + if (opcode == RESUME) { + *oparg_p = oparg; + return true; + } + return false; } PyObject * @@ -342,6 +352,11 @@ _PyGen_yf(PyGenObject *gen) { if (gen->gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { _PyInterpreterFrame *frame = &gen->gi_iframe; + #ifndef NDEBUG + uint8_t oparg; + assert(is_resume(frame, &oparg)); + assert((oparg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM); + #endif return PyStackRef_AsPyObjectNew(_PyFrame_StackPeek(frame)); } return NULL; @@ -370,11 +385,11 @@ gen_close(PyGenObject *gen, PyObject *args) Py_DECREF(yf); } _PyInterpreterFrame *frame = &gen->gi_iframe; - if (is_resume(frame->instr_ptr)) { + uint8_t oparg; + if (is_resume(frame, &oparg)) { /* We can safely ignore the outermost try block * as it is automatically generated to handle * StopIteration. */ - int oparg = frame->instr_ptr->op.arg; if (oparg & RESUME_OPARG_DEPTH1_MASK) { // RESUME after YIELD_VALUE and exception depth is 1 assert((oparg & RESUME_OPARG_LOCATION_MASK) != RESUME_AT_FUNC_START); From 7c404a8dea42300d90f81602c09058898fc0bcf5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 26 Jul 2024 11:27:59 -0700 Subject: [PATCH 6/9] Guard _Py_GetExecutor on non-tier-two builds --- Objects/genobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/genobject.c b/Objects/genobject.c index 0e637a338200cf..0bc295e0d9d999 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -334,12 +334,14 @@ is_resume(_PyInterpreterFrame *frame, uint8_t *oparg_p) int offset = frame->instr_ptr - _PyCode_CODE(code); uint8_t opcode = _Py_GetBaseOpcode(code, offset); uint8_t oparg = frame->instr_ptr->op.arg; +#ifdef _Py_TIER2 if (opcode == ENTER_EXECUTOR) { _PyExecutorObject *executor = _Py_GetExecutor(code, sizeof(_Py_CODEUNIT) * offset); opcode = _PyOpcode_Deopt[executor->vm_data.opcode]; oparg = executor->vm_data.oparg; Py_DECREF(executor); } +#endif if (opcode == RESUME) { *oparg_p = oparg; return true; From 1606abd27b05b558de8344f4a8d2f46c2d24091a Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 29 Jul 2024 09:04:20 -0700 Subject: [PATCH 7/9] Revert "Guard _Py_GetExecutor on non-tier-two builds" This reverts commit 7c404a8dea42300d90f81602c09058898fc0bcf5. --- Objects/genobject.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/genobject.c b/Objects/genobject.c index 0bc295e0d9d999..0e637a338200cf 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -334,14 +334,12 @@ is_resume(_PyInterpreterFrame *frame, uint8_t *oparg_p) int offset = frame->instr_ptr - _PyCode_CODE(code); uint8_t opcode = _Py_GetBaseOpcode(code, offset); uint8_t oparg = frame->instr_ptr->op.arg; -#ifdef _Py_TIER2 if (opcode == ENTER_EXECUTOR) { _PyExecutorObject *executor = _Py_GetExecutor(code, sizeof(_Py_CODEUNIT) * offset); opcode = _PyOpcode_Deopt[executor->vm_data.opcode]; oparg = executor->vm_data.oparg; Py_DECREF(executor); } -#endif if (opcode == RESUME) { *oparg_p = oparg; return true; From 6901f6009801859e1aac23a64ce052b35293e68b Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 29 Jul 2024 09:04:45 -0700 Subject: [PATCH 8/9] Revert "Add asserts back and teach is_resume about ENTER_EXECUTOR" This reverts commit ba3d68becca72bb2755d92334345aba0aa3bd5e5. --- Objects/genobject.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/Objects/genobject.c b/Objects/genobject.c index 0e637a338200cf..eff9cf333aece0 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -10,7 +10,6 @@ #include "pycore_gc.h" // _PyGC_CLEAR_FINALIZED() #include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() -#include "pycore_opcode_metadata.h" // _PyOpcode_Deopt #include "pycore_opcode_utils.h" // RESUME_AFTER_YIELD_FROM #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_* #include "pycore_pyerrors.h" // _PyErr_ClearExcState() @@ -328,23 +327,14 @@ gen_close_iter(PyObject *yf) } static inline bool -is_resume(_PyInterpreterFrame *frame, uint8_t *oparg_p) +is_resume(_Py_CODEUNIT *instr) { - PyCodeObject *code = _PyFrame_GetCode(frame); - int offset = frame->instr_ptr - _PyCode_CODE(code); - uint8_t opcode = _Py_GetBaseOpcode(code, offset); - uint8_t oparg = frame->instr_ptr->op.arg; - if (opcode == ENTER_EXECUTOR) { - _PyExecutorObject *executor = _Py_GetExecutor(code, sizeof(_Py_CODEUNIT) * offset); - opcode = _PyOpcode_Deopt[executor->vm_data.opcode]; - oparg = executor->vm_data.oparg; - Py_DECREF(executor); - } - if (opcode == RESUME) { - *oparg_p = oparg; - return true; - } - return false; + uint8_t code = FT_ATOMIC_LOAD_UINT8_RELAXED(instr->op.code); + return ( + code == RESUME || + code == RESUME_CHECK || + code == INSTRUMENTED_RESUME + ); } PyObject * @@ -352,11 +342,6 @@ _PyGen_yf(PyGenObject *gen) { if (gen->gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { _PyInterpreterFrame *frame = &gen->gi_iframe; - #ifndef NDEBUG - uint8_t oparg; - assert(is_resume(frame, &oparg)); - assert((oparg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM); - #endif return PyStackRef_AsPyObjectNew(_PyFrame_StackPeek(frame)); } return NULL; @@ -385,11 +370,11 @@ gen_close(PyGenObject *gen, PyObject *args) Py_DECREF(yf); } _PyInterpreterFrame *frame = &gen->gi_iframe; - uint8_t oparg; - if (is_resume(frame, &oparg)) { + if (is_resume(frame->instr_ptr)) { /* We can safely ignore the outermost try block * as it is automatically generated to handle * StopIteration. */ + int oparg = frame->instr_ptr->op.arg; if (oparg & RESUME_OPARG_DEPTH1_MASK) { // RESUME after YIELD_VALUE and exception depth is 1 assert((oparg & RESUME_OPARG_LOCATION_MASK) != RESUME_AT_FUNC_START); From 9bcc6435540e67fa88a293b63bb75dab50380e8b Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 29 Jul 2024 09:54:09 -0700 Subject: [PATCH 9/9] Add comment to removed asserts --- Objects/genobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/genobject.c b/Objects/genobject.c index eff9cf333aece0..b281af8d7e1f4e 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -342,6 +342,9 @@ _PyGen_yf(PyGenObject *gen) { if (gen->gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { _PyInterpreterFrame *frame = &gen->gi_iframe; + // GH-122390: These asserts are wrong in the presence of ENTER_EXECUTOR! + // assert(is_resume(frame->instr_ptr)); + // assert((frame->instr_ptr->op.arg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM); return PyStackRef_AsPyObjectNew(_PyFrame_StackPeek(frame)); } return NULL;