8000 gh-106149: Simplify stack depth calculation. Replace asserts by exceptions. by iritkatriel · Pull Request #107255 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-106149: Simplify stack depth calculation. Replace asserts by exceptions. #107255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Include/internal/pycore_flowgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ int _PyCfgBuilder_Init(_PyCfgBuilder *g);
void _PyCfgBuilder_Fini(_PyCfgBuilder *g);

int _PyCfg_OptimizeCodeUnit(_PyCfgBuilder *g, PyObject *consts, PyObject *const_cache,
int code_flags, int nlocals, int nparams, int firstlineno);
int _PyCfg_Stackdepth(_PyCfgBasicblock *entryblock, int code_flags);
int nlocals, int nparams, int firstlineno);
int _PyCfg_Stackdepth(_PyCfgBuilder *g);
void _PyCfg_ConvertPseudoOps(_PyCfgBasicblock *entryblock);
int _PyCfg_ResolveJumps(_PyCfgBuilder *g);

Expand Down
21 changes: 17 additions & 4 deletions Lib/test/test_peepholer.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,14 +991,15 @@ def test_conditional_jump_forward_non_const_condition(self):
('LOAD_NAME', 1, 11),
('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
('LOAD_CONST', 2, 13),
('RETURN_VALUE', 13),
lbl,
('LOAD_CONST', 3, 14),
('RETURN_VALUE', 14),
]
expected_insts = [
('LOAD_NAME', 1, 11),
('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
('LOAD_CONST', 1, 13),
('RETURN_CONST', 1, 13),
lbl,
('RETURN_CONST', 2, 14),
]
Expand Down Expand Up @@ -1072,6 +1073,7 @@ def test_no_unsafe_static_swap(self):
('STORE_FAST', 1, 4),
('STORE_FAST', 1, 4),
('POP_TOP', 0, 4),
('LOAD_CONST', 0, 5),
('RETURN_VALUE', 5)
]
expected_insts = [
Expand All @@ -1080,7 +1082,7 @@ def test_no_unsafe_static_swap(self):
('NOP', 0, 3),
('STORE_FAST', 1, 4),
('POP_TOP', 0, 4),
('RETURN_VALUE', 5)
('RETURN_CONST', 0)
]
self.cfg_optimization_test(insts, expected_insts, consts=list(range(3)), nlocals=1)

Expand All @@ -1092,6 +1094,7 @@ def test_dead_store_elimination_in_same_lineno(self):
('STORE_FAST', 1, 4),
('STORE_FAST', 1, 4),
('STORE_FAST', 1, 4),
('LOAD_CONST', 0, 5),
('RETURN_VALUE', 5)
]
expected_insts = [
Expand All @@ -1100,7 +1103,7 @@ def test_dead_store_elimination_in_same_lineno(self):
('NOP', 0, 3),
('POP_TOP', 0, 4),
('STORE_FAST', 1, 4),
('RETURN_VALUE', 5)
('RETURN_CONST', 0, 5)
]
self.cfg_optimization_test(insts, expected_insts, consts=list(range(3)), nlocals=1)

Expand All @@ -1112,9 +1115,19 @@ def test_no_dead_store_elimination_in_different_lineno(self):
('STORE_FAST', 1, 4),
('STORE_FAST', 1, 5),
('STORE_FAST', 1, 6),
('LOAD_CONST', 0, 5),
('RETURN_VALUE', 5)
]
self.cfg_optimization_test(insts, insts, consts=list(range(3)), nlocals=1)
expected_insts = [
('LOAD_CONST', 0, 1),
('LOAD_CONST', 1, 2),
('LOAD_CONST', 2, 3),
('STORE_FAST', 1, 4),
('STORE_FAST', 1, 5),
('STORE_FAST', 1, 6),
('RETURN_CONST', 0, 5)
]
self.cfg_optimization_test(insts, expected_insts, consts=list(range(3)), nlocals=1)


if __name__ == "__main__":
Expand Down
47 changes: 31 additions & 16 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7527,14 +7527,21 @@ build_cellfixedoffsets(_PyCompile_CodeUnitMetadata *umd)
return fixed;
}

#define IS_GENERATOR(CF) \
((CF) & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))

static int
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
int *fixed, int nfreevars, int code_flags)
{
assert(umd->u_firstlineno > 0);

/* Add the generator prefix instructions. */
if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
if (IS_GENERATOR(code_flags)) {
/* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
* of 0. This is because RETURN_GENERATOR pushes an element
* with _PyFrame_StackPush before switching stacks.
*/
cfg_instr make_gen = {
.i_opcode = RETURN_GENERATOR,
.i_oparg = 0,
Expand Down Expand Up @@ -7715,19 +7722,27 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
int nparams = (int)PyList_GET_SIZE(u->u_ste->ste_varnames);
int nlocals = (int)PyDict_GET_SIZE(u->u_metadata.u_varnames);
assert(u->u_metadata.u_firstlineno);
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals,
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, nlocals,
nparams, u->u_metadata.u_firstlineno) < 0) {
goto error;
}

/** Assembly **/
int nlocalsplus = prepare_localsplus(&u->u_metadata, &g, code_flags);
if (nlocalsplus < 0) {
int stackdepth = _PyCfg_Stackdepth(&g);
if (stackdepth < 0) {
goto error;
}

int maxdepth = _PyCfg_Stackdepth(g.g_entryblock, code_flags);
if (maxdepth < 0) {
/* prepare_localsplus adds instructions for generators that push
* and pop an item on the stack. This assertion makes sure there
* is space on the stack for that.
* It should always be true, because at least one expression is
* required to turn a function into a generator.
Comment on lines +7738 to +7739
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this explanation is accurate? This function is a generator:

async def f():
    return

Here is the disassembly of it:

>>> dis.dis(f)
  1           0 RETURN_GENERATOR
              2 POP_TOP
              4 RESUME                   0

  2           6 RETURN_CONST             0 (None)
        >>    8 CALL_INTRINSIC_1         3 (INTRINSIC_STOPITERATION_ERROR)
             10 RERAISE                  1
ExceptionTable:
  4 to 6 -> 8 [0] lasti

I think if RETURN_CONST didn't exist, it would be enough to say "all functions must return a value, which always must go on the stack before it is returned." But with RETURN_CONST that's not true either. It really does seem like the StopIteration handler is what we are relying on here.

*/
assert(!(IS_GENERATOR(code_flags) && stackdepth == 0));

/** Assembly **/
int nlocalsplus = prepare_localsplus(&u->u_metadata, &g, code_flags);
if (nlocalsplus < 0) {
goto error;
}

Expand All @@ -7746,7 +7761,7 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
}

co = _PyAssemble_MakeCodeObject(&u->u_metadata, const_cache, consts,
maxdepth, &optimized_instrs, nlocalsplus,
stackdepth, &optimized_instrs, nlocalsplus,
code_flags, filename);

error:
Expand Down Expand Up @@ -8190,8 +8205,8 @@ _PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts, int nlocals)
if (instructions_to_cfg(instructions, &g) < 0) {
goto error;
}
int code_flags = 0, nparams = 0, firstlineno = 1;
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals,
int nparams = 0, firstlineno = 1;
if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, nlocals,
nparams, firstlineno) < 0) {
goto error;
}
Expand Down Expand Up @@ -8226,14 +8241,14 @@ _PyCompile_Assemble(_PyCompile_CodeUnitMetadata *umd, PyObject *filename,
goto error;
}

int code_flags = 0;
int nlocalsplus = prepare_localsplus(umd, &g, code_flags);
if (nlocalsplus < 0) {
int stackdepth = _PyCfg_Stackdepth(&g);
if (stackdepth < 0) {
goto error;
}

int maxdepth = _PyCfg_Stackdepth(g.g_entryblock, code_flags);
if (maxdepth < 0) {
int code_flags = 0;
int nlocalsplus = prepare_localsplus(umd, &g, code_flags);
if (nlocalsplus < 0) {
goto error;
}

Expand All @@ -8256,7 +8271,7 @@ _PyCompile_Assemble(_PyCompile_CodeUnitMetadata *umd, PyObject *filename,
goto error;
}
co = _PyAssemble_MakeCodeObject(umd, const_cache,
consts, maxdepth, &optimized_instrs,
consts, stackdepth, &optimized_instrs,
nlocalsplus, code_flags, filename);
Py_DECREF(consts);

Expand Down
61 changes: 41 additions & 20 deletions Python/flowgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -615,23 +615,28 @@ make_cfg_traversal_stack(basicblock *entryblock) {
return stack;
}

Py_LOCAL_INLINE(void)
Py_LOCAL_INLINE(int)
stackdepth_push(basicblock ***sp, basicblock *b, int depth)
{
assert(b->b_startdepth < 0 || b->b_startdepth == depth);
if (!(b->b_startdepth < 0 || b->b_startdepth == depth)) {
PyErr_Format(PyExc_ValueError, "Invalid CFG, inconsistent stackdepth");
return ERROR;
}
if (b->b_startdepth < depth && b->b_startdepth < 100) {
assert(b->b_startdepth < 0);
b->b_startdepth = depth;
*(*sp)++ = b;
}
return SUCCESS;
}

/* Find the flow path that needs the largest stack. We assume that
* cycles in the flow graph have no net effect on the stack depth.
*/
int
_PyCfg_Stackdepth(basicblock *entryblock, int code_flags)
_PyCfg_Stackdepth(cfg_builder *g)
{
basicblock *entryblock = g->g_entryblock;
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
b->b_startdepth = INT_MIN;
}
Expand All @@ -640,42 +645,54 @@ _PyCfg_Stackdepth(basicblock *entryblock, int code_flags)
return ERROR;
}


int stackdepth = -1;
int maxdepth = 0;
basicblock **sp = stack;
if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
stackdepth_push(&sp, entryblock, 1);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused about this. Why is it 1 if the RETURN_GENERATOR+POP_TOP add up to 0?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it's somehow related to the fact that stack_effect() returns 0 for RETURN_GENERATOR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added some prints to the old and new versions, to see how the stackdepth calculation proceeds. So here is the output for def f(): yield 12.

Old:

1 [0]  opcode = 75  depth = 1 effect = 0  new_depth = 1  maxdepth = 1
1 [1]  opcode = 1  depth = 1 effect = -1  new_depth = 0  maxdepth = 1
1 [2]  opcode = 257  depth = 0 effect = 0  new_depth = 0  maxdepth = 1
2 [2]  opcode = 257  depth = 0 effect = 2  target_depth = 2  maxdepth = 2
1 [3]  opcode = 151  depth = 0 effect = 0  new_depth = 0  maxdepth = 2
1 [4]  opcode = 100  depth = 0 effect = 1  new_depth = 1  maxdepth = 2
1 [5]  opcode = 150  depth = 1 effect = 0  new_depth = 1  maxdepth = 2
1 [6]  opcode = 151  depth = 1 effect = 0  new_depth = 1  maxdepth = 2
1 [7]  opcode = 1  depth = 1 effect = -1  new_depth = 0  maxdepth = 2
1 [8]  opcode = 121  depth = 0 effect = 0  new_depth = 0  maxdepth = 2
1 [0]  opcode = 173  depth = 2 effect = 0  new_depth = 2  maxdepth = 2
1 [1]  opcode = 119  depth = 2 effect = -1  new_depth = 1  maxdepth = 2
1 [0]  opcode = 151  depth = 0 effect = 0  new_depth = 0  maxdepth = 0
1 [1]  opcode = 100  depth = 0 effect = 1  new_depth = 1  maxdepth = 1
1 [2]  opcode = 24  depth = 1 effect = 0  new_depth = 1  maxdepth = 1
1 [3]  opcode = 90  depth = 1 effect = -1  new_depth = 0  maxdepth = 1
1 [4]  opcode = 121  depth = 0 effect = 0  new_depth = 0  maxdepth = 1

New:

1 [0]  opcode = 257  depth = 0 effect = 0  new_depth = 0  maxdepth = 0
2 [0]  opcode = 257  depth = 0 effect = 2  target_depth = 2  maxdepth = 2
1 [1]  opcode = 151  depth = 0 effect = 0  new_depth = 0  maxdepth = 2
1 [2]  opcode = 100  depth = 0 effect = 1  new_depth = 1  maxdepth = 2
1 [3]  opcode = 150  depth = 1 effect = 0  new_depth = 1  maxdepth = 2
1 [4]  opcode = 151  depth = 1 effect = 0  new_depth = 1  maxdepth = 2
1 [5]  opcode = 1  depth = 1 effect = -1  new_depth = 0  maxdepth = 2
1 [6]  opcode = 121  depth = 0 effect = 0  new_depth = 0  maxdepth = 2
1 [0]  opcode = 173  depth = 2 effect = 0  new_depth = 2  maxdepth = 2
1 [1]  opcode = 119  depth = 2 effect = -1  new_depth = 1  maxdepth = 2
1 [0]  opcode = 151  depth = 0 effect = 0  new_depth = 0  maxdepth = 0
1 [1]  opcode = 100  depth = 0 effect = 1  new_depth = 1  maxdepth = 1
1 [2]  opcode = 24  depth = 1 effect = 0  new_depth = 1  maxdepth = 1
1 [3]  opcode = 90  depth = 1 effect = -1  new_depth = 0  maxdepth = 1
1 [4]  opcode = 121  depth = 0 effect = 0  new_depth = 0  maxdepth = 1

So the old code looks at those two instructions which are now added after the stackdepth calculation. And indeed stackdepth(RETURN_GENERATOR) returns 0, so the additional 1 in the initialisation compensated for that. So the old code is a messy hack.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wondered the same thing! Thanks for tracking this down. Separately, is there any reason not to fix num_pushed metadata for RETURN_GENERATOR? I guess with this PR it now hardly matters what that metadata says for it...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that the stack effect is derived from the bytecodes.c file, from the signature of the bytecode. If we add stuff to the signature the code generator will do other stuff with it.

} else {
stackdepth_push(&sp, entryblock, 0);
if (stackdepth_push(&sp, entryblock, 0) < 0) {
goto error;
}

while (sp != stack) {
basicblock *b = *--sp;
int depth = b->b_startdepth;
assert(depth >= 0);
basicblock *next = b->b_next;
for (int i = 0; i < b->b_iused; i++) {
cfg_instr *instr = &b->b_instr[i];
int effect = PyCompile_OpcodeStackEffectWithJump(instr->i_opcode, instr->i_oparg, 0);
int effect = PyCompile_OpcodeStackEffectWithJump(
instr->i_opcode, instr->i_oparg, 0);
if (effect == PY_INVALID_STACK_EFFECT) {
PyErr_Format(PyExc_SystemError,
"compiler PyCompile_OpcodeStackEffectWithJump(opcode=%d, arg=%i) failed",
"Invalid stack effect for opcode=%d, arg=%i",
instr->i_opcode, instr->i_oparg);
return ERROR;
goto error;
}
int new_depth = depth + effect;
assert(new_depth >= 0); /* invalid code or bug in stackdepth() */
if (new_depth < 0) {
PyErr_Format(PyExc_ValueError,
"Invalid CFG, stack underflow");
goto error;
}
if (new_depth > maxdepth) {
maxdepth = new_depth;
}
if (HAS_TARGET(instr->i_opcode)) {
effect = PyCompile_OpcodeStackEffectWithJump(instr->i_opcode, instr->i_oparg, 1);
assert(effect != PY_INVALID_STACK_EFFECT);
effect = PyCompile_OpcodeStackEffectWithJump(
instr->i_opcode, instr->i_oparg, 1);
if (effect == PY_INVALID_STACK_EFFECT) {
PyErr_Format(PyExc_SystemError,
"Invalid stack effect for opcode=%d, arg=%i",
instr->i_opcode, instr->i_oparg);
goto error;
}
int target_depth = depth + effect;
assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
if (target_depth > maxdepth) {
maxdepth = target_depth;
}
stackdepth_push(&sp, instr->i_target, target_depth);
if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) {
goto error;
}
}
depth = new_depth;
assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
Expand All @@ -689,11 +706,15 @@ _PyCfg_Stackdepth(basicblock *entryblock, int code_flags)
}
if (next != NULL) {
assert(BB_HAS_FALLTHROUGH(b));
stackdepth_push(&sp, next, depth);
if (stackdepth_push(&sp, next, depth) < 0) {
goto error;
}
}
}
stackdepth = maxdepth;
error:
PyMem_Free(stack);
return maxdepth;
return stackdepth;
}

static int
Expand Down Expand Up @@ -2009,7 +2030,7 @@ mark_cold(basicblock *entryblock) {


static int
push_cold_blocks_to_end(cfg_builder *g, int code_flags) {
push_cold_blocks_to_end(cfg_builder *g) {
basicblock *entryblock = g->g_entryblock;
if (entryblock->b_next == NULL) {
/* single basicblock, no need to reorder */
Expand Down Expand Up @@ -2251,7 +2272,7 @@ resolve_line_numbers(cfg_builder *g, int firstlineno)

int
_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
int code_flags, int nlocals, int nparams, int firstlineno)
int nlocals, int nparams, int firstlineno)
{
assert(cfg_builder_check(g));
/** Preprocessing **/
Expand All @@ -2268,7 +2289,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
g->g_entryblock, nlocals, nparams));
insert_superinstructions(g);

RETURN_IF_ERROR(push_cold_blocks_to_end(g, code_flags));
RETURN_IF_ERROR(push_cold_blocks_to_end(g));
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
return SUCCESS;
}
0