8000 gh-99254: remove all unused consts from code objects (GH-99255) · python/cpython@3dd6ee2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3dd6ee2

Browse files
authored
gh-99254: remove all unused consts from code objects (GH-99255)
1 parent faf7dfa commit 3dd6ee2

File tree

5 files changed

+142
-33
lines changed

5 files changed

+142
-33
lines changed

Lib/importlib/_bootstrap_external.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ def _write_atomic(path, data, mode=0o666):
425425
# Python 3.12a1 3509 (Conditional jumps only jump forward)
426426
# Python 3.12a1 3510 (FOR_ITER leaves iterator on the stack)
427427
# Python 3.12a1 3511 (Add STOPITERATION_ERROR instruction)
428+
# Python 3.12a1 3512 (Remove all unused consts from code objects)
428429

429430
# Python 3.13 will start with 3550
430431

@@ -437,7 +438,7 @@ def _write_atomic(path, data, mode=0o666):
437438
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
438439
# in PC/launcher.c must also be updated.
439440

440-
MAGIC_NUMBER = (3511).to_bytes(2, 'little') + b'\r\n'
441+
MAGIC_NUMBER = (3512).to_bytes(2, 'little') + b'\r\n'
441442

442443
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
443444

Lib/test/test_compile.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ def test_merge_code_attrs(self):
670670
self.assertIs(f1.__code__.co_linetable, f2.__code__.co_linetable)
671671

672672
@support.cpython_only
673-
def test_strip_unused_consts(self):
673+
def test_remove_unused_consts(self):
674674
def f():
675675
"docstring"
676676
if True:
@@ -679,7 +679,41 @@ def f():
679679
return "unused"
680680

681681
self.assertEqual(f.__code__.co_consts,
682-
("docstring", True, "used"))
682+
("docstring", "used"))
683+
684+
@support.cpython_only
685+
def test_remove_unused_consts_no_docstring(self):
686+
# the first item (None for no docstring in this case) is
687+
# always retained.
688+
def f():
689+
if True:
690+
return "used"
691+
else:
692+
return "unused"
693+
694+
self.assertEqual(f.__code__.co_consts,
695+
(None, "used"))
696+
697+
@support.cpython_only
698+
def test_remove_unused_consts_extended_args(self):
699+
N = 1000
700+
code = ["def f():\n"]
701+
code.append("\ts = ''\n")
702+
code.append("\tfor i in range(1):\n")
703+
for i in range(N):
704+
code.append(f"\t\tif True: s += 't{i}'\n")
705+
code.append(f"\t\tif False: s += 'f{i}'\n")
706+
code.append("\treturn s\n")
707+
708+
code = "".join(code)
709+
g = {}
710+
eval(compile(code, "file.py", "exec"), g)
711+
exec(code, g)
712+
f = g['f']
713+
expected = tuple([None, '', 1] + [f't{i}' for i in range(N)])
714+
self.assertEqual(f.__code__.co_consts, expected)
715+
expected = "".join(expected[3:])
716+
self.assertEqual(expected, f())
683717

684718
# Stripping unused constants is not a strict requirement for the
685719
# Python semantics, it's a more an implementation detail.

Lib/test/test_dis.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,13 @@ def bug1333982(x=[]):
168168
%3d RESUME 0
169169
170170
%3d LOAD_ASSERTION_ERROR
171-
LOAD_CONST 2 (<code object <listcomp> at 0x..., file "%s", line %d>)
171+
LOAD_CONST 1 (<code object <listcomp> at 0x..., file "%s", line %d>)
172172
MAKE_FUNCTION 0
173173
LOAD_FAST 0 (x)
174174
GET_ITER
175175
CALL 0
176176
177-
%3d LOAD_CONST 3 (1)
177+
%3d LOAD_CONST 2 (1)
178178
179179
%3d BINARY_OP 0 (+)
180180
CALL 0
@@ -1446,9 +1446,9 @@ def jumpy():
14461446
# End fodder for opinfo generation tests
14471447
expected_outer_line = 1
14481448
_line_offset = outer.__code__.co_firstlineno - 1
1449-
code_object_f = outer.__code__.co_consts[3]
1449+
code_object_f = outer.__code__.co_consts[1]
14501450
expected_f_line = code_object_f.co_firstlineno - _line_offset
1451-
code_object_inner = code_object_f.co_consts[3]
1451+
code_object_inner = code_object_f.co_consts[1]
14521452
expected_inner_line = code_object_inner.co_firstlineno - _line_offset
14531453
expected_jumpy_line = 1
14541454

@@ -1485,21 +1485,21 @@ def _prepare_test_cases():
14851485
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='a', argrepr='a', offset=0, starts_line=None, is_jump_target=False, positions=None),
14861486
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='b', argrepr='b', offset=2, starts_line=None, is_jump_target=False, positions=None),
14871487
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=4, starts_line=1, is_jump_target=False, positions=None),
1488-
Instruction(opname='LOAD_CONST', opcode=100, arg=7, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
1488+
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=(3, 4), argrepr='(3, 4)', offset=6, starts_line=2, is_jump_target=False, positions=None),
14891489
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='a', argrepr='a', offset=8, starts_line=None, is_jump_target=False, positions=None),
14901490
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='b', argrepr='b', offset=10, starts_line=None, is_jump_target=False, positions=None),
14911491
Instruction(opname='BUILD_TUPLE', opcode=102, arg=2, argval=2, argrepr='', offset=12, starts_line=None, is_jump_target=False, positions=None),
1492-
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
1492+
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_f, argrepr=repr(code_object_f), offset=14, starts_line=None, is_jump_target=False, positions=None),
14931493
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=16, starts_line=None, is_jump_target=False, positions=None),
14941494
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='f', argrepr='f', offset=18, starts_line=None, is_jump_target=False, positions=None),
14951495
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=20, starts_line=7, is_jump_target=False, positions=None),
14961496
Instruction(opname='LOAD_DEREF', opcode=137, arg=0, argval='a', argrepr='a', offset=32, starts_line=None, is_jump_target=False, positions=None),
14971497
Instruction(opname='LOAD_DEREF', opcode=137, arg=1, argval='b', argrepr='b', offset=34, starts_line=None, is_jump_target=False, positions=None),
1498-
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
1499-
Instruction(opname='LOAD_CONST', opcode=100, arg=5, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
1498+
Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval='', argrepr="''", offset=36, starts_line=None, is_jump_target=False, positions=None),
1499+
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=1, argrepr='1', offset=38, starts_line=None, is_jump_target=False, positions=None),
15001500
Instruction(opname='BUILD_LIST', opcode=103, arg=0, argval=0, argrepr='', offset=40, starts_line=None, is_jump_target=False, positions=None),
15011501
Instruction(opname='BUILD_MAP', opcode=105, arg=0, argval=0, argrepr='', offset=42, starts_line=None, is_jump_target=False, positions=None),
1502-
Instruction(opname='LOAD_CONST', opcode=100, arg=6, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
1502+
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval='Hello world!', argrepr="'Hello world!'", offset=44, starts_line=None, is_jump_target=False, positions=None),
15031503
Instruction(opname='CALL', opcode=171, arg=7, argval=7, argrepr='', offset=46, starts_line=None, is_jump_target=False, positions=None),
15041504
Instruction(opname='POP_TOP', opcode=1, arg=None, argval=None, argrepr='', offset=56, starts_line=None, is_jump_target=False, positions=None),
15051505
Instruction(opname='LOAD_FAST', opcode=124, arg=2, argval='f', argrepr='f', offset=58, starts_line=8, is_jump_target=False, positions=None),
@@ -1511,13 +1511,13 @@ def _prepare_test_cases():
15111511
Instruction(opname='MAKE_CELL', opcode=135, arg=0, argval='c', argrepr='c', offset=2, starts_line=None, is_jump_target=False, positions=None),
15121512
Instruction(opname='MAKE_CELL', opcode=135, arg=1, argval='d', argrepr='d', offset=4, starts_line=None, is_jump_target=False, positions=None),
15131513
Instruction(opname='RESUME', opcode=151, arg=0, argval=0, argrepr='', offset=6, starts_line=2, is_jump_target=False, positions=None),
1514-
Instruction(opname='LOAD_CONST', opcode=100, arg=4, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
1514+
Instruction(opname='LOAD_CONST', opcode=100, arg=2, argval=(5, 6), argrepr='(5, 6)', offset=8, starts_line=3, is_jump_target=False, positions=None),
15151515
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=3, argval='a', argrepr='a', offset=10, starts_line=None, is_jump_target=False, positions=None),
15161516
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=4, argval='b', argrepr='b', offset=12, starts_line=None, is_jump_target=False, positions=None),
15171517
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=0, argval='c', argrepr='c', offset=14, starts_line=None, is_jump_target=False, positions=None),
15181518
Instruction(opname='LOAD_CLOSURE', opcode=136, arg=1, argval='d', argrepr='d', offset=16, starts_line=None, is_jump_target=False, positions=None),
15191519
Instruction(opname='BUILD_TUPLE', opcode=102, arg=4, argval=4, argrepr='', offset=18, starts_line=None, is_jump_target=False, positions=None),
1520-
Instruction(opname='LOAD_CONST', opcode=100, arg=3, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
1520+
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, starts_line=None, is_jump_target=False, positions=None),
15211521
Instruction(opname='MAKE_FUNCTION', opcode=132, arg=9, argval=9, argrepr='defaults, closure', offset=22, starts_line=None, is_jump_target=False, positions=None),
15221522
Instruction(opname='STORE_FAST', opcode=125, arg=2, argval='inner', argrepr='inner', offset=24, starts_line=None, is_jump_target=False, positions=None),
15231523
Instruction(opname='LOAD_GLOBAL', opcode=116, arg=1, argval='print', argrepr='NULL + print', offset=26, starts_line=5, is_jump_target=False, positions=None),
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The compiler now removes all unused constants from code objects (except the first one, which may be a docstring).

Python/compile.c

Lines changed: 92 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8472,7 +8472,7 @@ static int
84728472
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);
84738473

84748474
static int
8475-
trim_unused_consts(basicblock *entryblock, PyObject *consts);
8475+
remove_unused_consts(basicblock *entryblock, PyObject *consts);
84768476

84778477
/* Duplicates exit BBs, so that line numbers can be propagated to them */
84788478
static int
@@ -8813,6 +8813,9 @@ assemble(struct compiler *c, int addNone)
88138813
if (add_checks_for_loads_of_uninitialized_variables(g->g_entryblock, c) < 0) {
88148814
goto error;
88158815
}
8816+
if (remove_unused_consts(g->g_entryblock, consts)) {
8817+
goto error;
8818+
}
88168819

88178820
/** line numbers (TODO: move this before optimization stage) */
88188821
if (duplicate_exits_without_lineno(g) < 0) {
@@ -8844,10 +8847,6 @@ assemble(struct compiler *c, int addNone)
88448847
/* Can't modify the bytecode after computing jump offsets. */
88458848
assemble_jump_offsets(g->g_entryblock);
88468849

8847-
if (trim_unused_consts(g->g_entryblock, consts)) {
8848-
goto error;
8849-
}
8850-
88518850
/* Create assembler */
88528851
if (!assemble_init(&a, c->u->u_firstlineno))
88538852
goto error;
@@ -9706,32 +9705,106 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
97069705
return 0;
97079706
}
97089707

9709-
// Remove trailing unused constants.
9708+
97109709
static int
9711-
trim_unused_consts(basicblock *entryblock, PyObject *consts)
9710+
remove_unused_consts(basicblock *entryblock, PyObject *consts)
97129711
{
97139712
assert(PyList_CheckExact(consts));
9713+
Py_ssize_t nconsts = PyList_GET_SIZE(consts);
9714+
if (nconsts == 0) {
9715+
return 0; /* nothing to do */
9716+
}
97149717

9718+
Py_ssize_t *index_map = NULL;
9719+
Py_ssize_t *reverse_index_map = NULL;
9720+
int err = 1;
9721+
9722+
index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
9723+
if (index_map == NULL) {
9724+
goto end;
9725+
}
9726+
for (Py_ssize_t i = 1; i < nconsts; i++) {
9727+
index_map[i] = -1;
9728+
}
97159729
// The first constant may be docstring; keep it always.
9716-
int max_const_index = 0;
9730+
index_map[0] = 0;
9731+
9732+
/* mark used consts */
97179733
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
97189734
for (int i = 0; i < b->b_iused; i++) {
9719-
if ((b->b_instr[i].i_opcode == LOAD_CONST ||
9720-
b->b_instr[i].i_opcode == KW_NAMES) &&
9721-
b->b_instr[i].i_oparg > max_const_index) {
9722-
max_const_index = b->b_instr[i].i_oparg;
9735+
if (b->b_instr[i].i_opcode == LOAD_CONST ||
9736+
b->b_instr[i].i_opcode == KW_NAMES) {
9737+
9738+
int index = b->b_instr[i].i_oparg;
9739+
index_map[index] = index;
97239740
}
97249741
}
97259742
}
9726-
if (max_const_index+1 < PyList_GET_SIZE(consts)) {
9727-
//fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
9728-
// max_const_index, (int)PyList_GET_SIZE(consts));
9729-
if (PyList_SetSlice(consts, max_const_index+1,
9730-
PyList_GET_SIZE(consts), NULL) < 0) {
9731-
return 1;
9743+
/* now index_map[i] == i if consts[i] is used, -1 otherwise */
9744+
9745+
/* condense consts */
9746+
Py_ssize_t n_used_consts = 0;
9747+
for (int i = 0; i < nconsts; i++) {
9748+
if (index_map[i] != -1) {
9749+
assert(index_map[i] == i);
9750+
index_map[n_used_consts++] = index_map[i];
97329751
}
97339752
}
9734-
return 0;
9753+
if (n_used_consts == nconsts) {
9754+
/* nothing to do */
9755+
err = 0;
9756+
goto end;
9757+
}
9758+
9759+
/* move all used consts to the beginning of the consts list */
9760+
assert(n_used_consts < nconsts);
9761+
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
9762+
Py_ssize_t old_index = index_map[i];
9763+
assert(i <= old_index && old_index < nconsts);
9764+
if (i != old_index) {
9765+
PyObject *value = PyList_GET_ITEM(consts, index_map[i]);
9766+
assert(value != NULL);
9767+
PyList_SetItem(consts, i, Py_NewRef(value));
9768+
}
9769+
}
9770+
9771+
/* truncate the consts list at its new size */
9772+
if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) {
9773+
goto end;
9774+
}
9775+
9776+
/* adjust const indices in the bytecode */
9777+
reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t));
9778+
if (reverse_index_map == NULL) {
9779+
goto end;
9780+
}
9781+
for (Py_ssize_t i = 0; i < nconsts; i++) {
9782+
reverse_index_map[i] = -1;
9783+
}
9784+
for (Py_ssize_t i = 0; i < n_used_consts; i++) {
9785+
assert(index_map[i] != -1);
9786+
assert(reverse_index_map[index_map[i]] == -1);
9787+
reverse_index_map[index_map[i]] = i;
9788+
}
9789+
9790+
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
9791+
for (int i = 0; i < b->b_iused; i++) {
9792+
if (b->b_instr[i].i_opcode == LOAD_CONST ||
9793+
b->b_instr[i].i_opcode == KW_NAMES) {
9794+
9795+
int index = b->b_instr[i].i_oparg;
9796+
assert(reverse_index_map[index] >= 0);
9797+
assert(reverse_index_map[index] < n_used_consts);
9798+
b->b_instr[i].i_oparg = (int)reverse_index_map[index];
9799+
}
9800+
}
9801+
}
9802+
9803+
err = 0;
9804+
end:
9805+
PyMem_Free(index_map);
9806+
PyMem_Free(reverse_index_map);
9807+
return err;
97359808
}
97369809

97379810
static inline int

0 commit comments

Comments
 (0)
0