8000 GH-118093: Better handling of short and mid-loop traces by brandtbucher · Pull Request #122252 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-118093: Better handling of short and mid-loop traces #122252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 29, 2024
5 changes: 3 additions & 2 deletions Objects/genobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,9 @@ _PyGen_yf(PyGenObject *gen)
{
if (gen->gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) {
_PyInterpreterFrame *frame = &gen->gi_iframe;
assert(is_resume(frame->instr_ptr));
assert((frame->instr_ptr->op.arg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM);
// GH-122390: These asserts are wrong in the presence of ENTER_EXECUTOR!
// assert(is_resume(frame->instr_ptr));
// assert((frame->instr_ptr->op.arg & RESUME_OPARG_LOCATION_MASK) >= RESUME_AFTER_YIELD_FROM);
return PyStackRef_AsPyObjectNew(_PyFrame_StackPeek(frame));
}
return NULL;
Expand Down
64 changes: 31 additions & 33 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,7 @@ add_to_trace(
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
OPT_STAT_INC(trace_stack_overflow); \
trace_length = 0; \
goto done; \
return 0; \
} \
assert(func == NULL || func->func_code == (PyObject *)code); \
trace_stack[trace_stack_depth].func = func; \
Expand Down Expand Up @@ -550,6 +549,7 @@ translate_bytecode_to_trace(
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
bool jump_seen = false;

#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
Expand All @@ -568,7 +568,6 @@ translate_bytecode_to_trace(
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
uint32_t target = 0;

top: // Jump here after _PUSH_FRAME or likely branches
for (;;) {
target = INSTR_IP(instr, code);
// Need space for _DEOPT
Expand All @@ -577,6 +576,13 @@ translate_bytecode_to_trace(
uint32_t opcode = instr->op.code;
uint32_t oparg = instr->op.arg;

if (!progress_needed && instr == initial_instr) {
// We have looped around to the start:
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
goto done;
}

DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);

if (opcode == ENTER_EXECUTOR) {
Expand All @@ -603,30 +609,21 @@ translate_bytecode_to_trace(
/* Special case the first instruction,
* so that we can guarantee forward progress */
if (progress_needed) {
progress_needed = false;
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
initial_instr = instr;
if (opcode == JUMP_BACKWARD) {
ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
}
continue;
}
else {
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_EXIT(opcode));
assert(!OPCODE_HAS_DEOPT(opcode));
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
opcode = _PyOpcode_Deopt[opcode];
}
assert(!OPCODE_HAS_EXIT(opcode));
assert(!OPCODE_HAS_DEOPT(opcode));
}

if (OPCODE_HAS_EXIT(opcode)) {
// Make space for exit code
// Make space for side exit and final _EXIT_TRACE:
RESERVE_RAW(2, "_EXIT_TRACE");
max_length--;
}
if (OPCODE_HAS_ERROR(opcode)) {
// Make space for error code
// Make space for error stub and final _EXIT_TRACE:
RESERVE_RAW(2, "_ERROR_POP_N");
max_length--;
}
switch (opcode) {
Expand Down Expand Up @@ -672,19 +669,18 @@ translate_bytecode_to_trace(
}

case JUMP_BACKWARD:
ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
_Py_FALLTHROUGH;
case JUMP_BACKWARD_NO_INTERRUPT:
{
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
if (target == initial_instr) {
/* We have looped round to the start */
RESERVE(1);
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
}
else {
instr += 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] - (int)oparg;
if (jump_seen) {
OPT_STAT_INC(inner_loop);
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
goto done;
}
goto done;
jump_seen = true;
goto top;
}

case JUMP_FORWARD:
Expand Down Expand Up @@ -904,23 +900,25 @@ translate_bytecode_to_trace(
assert(instr->op.code == POP_TOP);
instr++;
}
top:
// Jump here after _PUSH_FRAME or likely branches.
progress_needed = false;
} // End for (;;)

done:
while (trace_stack_depth > 0) {
TRACE_STACK_POP();
}
assert(code == initial_code);
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
if (progress_needed || trace_length < 5) {
// Skip short traces where we can't even translate a single instruction:
if (progress_needed) {
OPT_STAT_INC(trace_too_short);
DPRINTF(2,
"No trace for %s (%s:%d) at byte offset %d (%s)\n",
"No trace for %s (%s:%d) at byte offset %d (no progress)\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code),
progress_needed ? "no progress" : "too short");
2 * INSTR_IP(initial_instr, code));
return 0;
}
if (trace[trace_length-1].opcode != _JUMP_TO_TOP) {
Expand Down
Loading
0