8000 gh-106529: Split FOR_ITER_{LIST,TUPLE} into uops by gvanrossum · Pull Request #106696 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-106529: Split FOR_ITER_{LIST,TUPLE} into uops #106696

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 14, 2023
Merged
Prev Previous commit
Next Next commit
Make reserving space less error-prone
  • Loading branch information
gvanrossum committed Jul 12, 2023
commit a94e583d7443b86c1ce8b78a64831c53310ad7c8
64 changes: 39 additions & 25 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,13 +378,17 @@ translate_bytecode_to_trace(
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
int reserved = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point of reserve is to make sure that you won't error out in the middle of an opcode's translation to uops?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. I'd rather reserve space ahead than having to bail out in the middle and undo some work already done (the undoing feels more brittle).


#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
int lltrace = 0;
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
}
#endif

#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
if (lltrace >= (level)) { fprintf(stderr, __VA_ARGS__); }
#else
Expand All @@ -397,6 +401,8 @@ translate_bytecode_to_trace(
uop_name(OPCODE), \
(uint64_t)(OPERAND)); \
assert(trace_length < max_length); \
assert(reserved > 0); \
reserved--; \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].operand = (OPERAND); \
trace_length++;
Expand All @@ -409,9 +415,23 @@ translate_bytecode_to_trace(
(INDEX), \
uop_name(OPCODE), \
(uint64_t)(OPERAND)); \
assert(reserved > 0); \
reserved--; \
trace[(INDEX)].opcode = (OPCODE); \
trace[(INDEX)].operand = (OPERAND);

// Reserve space for n uops
#define RESERVE_RAW(n, opname) \
if (trace_length + (n) > max_length) { \
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
(opname), (n), max_length - trace_length); \
goto done; \
} \
reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest

// Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE
#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))

DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %ld\n",
PyUnicode_AsUTF8(code->co_qualname),
Expand All @@ -420,34 +440,35 @@ translate_bytecode_to_trace(
2 * INSTR_IP(initial_instr, code));

for (;;) {
RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE
ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code));

int opcode = instr->op.code;
int oparg = instr->op.arg;
int extras = 0;

while (opcode == EXTENDED_ARG) {
instr++;
extras += 1;
opcode = instr->op.code;
oparg = (oparg << 8) | instr->op.arg;
}

if (opcode == ENTER_EXECUTOR) {
_PyExecutorObject *executor =
(_PyExecutorObject *)code->co_executors->executors[oparg&255];
opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
}

switch (opcode) {

case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
{
// Assume jump unlikely (TODO: handle jump likely case)
// Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE)
if (trace_length + 5 > max_length) {
DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n");
goto done;
}
RESERVE(1, 2);
_Py_CODEUNIT *target_instr =
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg;
max_length -= 2; // Really the start of the stubs
Expand All @@ -461,9 +482,8 @@ translate_bytecode_to_trace(

case JUMP_BACKWARD:
{
if (instr + 2 - oparg == initial_instr
&& trace_length + 3 <= max_length)
{
if (instr + 2 - oparg == initial_instr) {
RESERVE(1, 0);
ADD_TO_TRACE(JUMP_TO_TOP, 0);
}
else {
Expand All @@ -474,11 +494,7 @@ translate_bytecode_to_trace(

case JUMP_FORWARD:
{
// Reserve 2 entries (SAVE_IP + EXIT_TRACE)
if (trace_length + 2 > max_length) {
DPRINTF(1, "Ran out of space for JUMP_FORWARD\n");
goto done;
}
RESERVE(0, 0);
// This will emit two SAVE_IP instructions; leave it to the optimizer
instr += oparg;
break;
Expand All @@ -487,6 +503,7 @@ translate_bytecode_to_trace(
case FOR_ITER_LIST:
case FOR_ITER_RANGE:
{
RESERVE(4, 3);
int check_op, exhausted_op, next_op;
switch (opcode) {
case FOR_ITER_LIST:
Expand All @@ -503,11 +520,6 @@ translate_bytecode_to_trace(
assert(0);
}
// Assume jump unlikely (can a for-loop exit be likely?)
// Reserve 9 entries (4 here, 3 stub, plus SAVE_IP + EXIT_TRACE)
if (trace_length + 9 > max_length) {
DPRINTF(1, "Ran out of space for %s\n", uop_name(opcode));
goto done;
}
_Py_CODEUNIT *target_instr = // +1 at the end skips over END_FOR
instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1;
max_length -= 3; // Really the start of the stubs
Expand All @@ -528,10 +540,7 @@ translate_bytecode_to_trace(
if (expansion->nuops > 0) {
// Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
int nuops = expansion->nuops;
if (trace_length + nuops + 2 > max_length) {
DPRINTF(1, "Ran out of space for %s\n", uop_name(opcode));
goto done;
}
RESERVE(nuops, 0);
for (int i = 0; i < nuops; i++) {
uint64_t operand;
int offset = expansion->uops[i].offset;
Expand Down Expand Up @@ -577,12 +586,14 @@ translate_bytecode_to_trace(
}
DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode));
goto done; // Break out of loop
}
}
} // End default

} // End switch (opcode)

instr++;
// Add cache size for opcode
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
}
} // End for (;;)

done:
// Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
Expand Down Expand Up @@ -631,6 +642,9 @@ translate_bytecode_to_trace(
}
return 0;

#undef RESERVE
#undef RESERVE_RAW
#undef INSTR_IP
#undef ADD_TO_TRACE
#undef DPRINTF
}
Expand Down
0