8000 gh-114058: Foundations of the Tier2 redundancy eliminator by Fidget-Spinner · Pull Request #115085 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-114058: Foundations of the Tier2 redundancy eliminator #115085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 54 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
169e08d
bring over changes from old branch
Fidget-Spinner Feb 6, 2024
e270364
Port over to Mark's DSL
Fidget-Spinner Feb 6, 2024
e8a8b78
reduce diff
Fidget-Spinner Feb 6, 2024
bb6137a
fix passthrough
Fidget-Spinner Feb 6, 2024
9b15f9e
fix tests
Fidget-Spinner Feb 6, 2024
66bfce5
fix mypy issues
Fidget-Spinner Feb 6, 2024
3789cd9
fix tests and address review
Fidget-Spinner Feb 6, 2024
20d087b
add tests for abstract cases generator
Fidget-Spinner Feb 6, 2024
0f58526
patchcheck
Fidget-Spinner Feb 6, 2024
c55ba14
Address review
Fidget-Spinner Feb 7, 2024
e34a0f5
reduce diff, add tests
Fidget-Spinner Feb 7, 2024
229aab0
fix compilation problems
Fidget-Spinner Feb 7, 2024
3c8def4
don't needlessly clear aliasing information
Fidget-Spinner Feb 7, 2024
649b0c7
remove is_const for now
Fidget-Spinner Feb 7, 2024
2805755
remove type prop (address review)
Fidget-Spinner Feb 7, 2024
54e2f64
Add back SELF_OR_NULL :(
Fidget-Spinner Feb 7, 2024
dcddb2d
remove broken tests
Fidget-Spinner Feb 7, 2024
6e5ed12
address most reviews (except LOAD_ATTR todo)
Fidget-Spinner Feb 8, 2024
a8b27fc
implement NOT_NULL for attributes
Fidget-Spinner Feb 8, 2024
e00ec6f
revert function version cache size
Fidget-Spinner Feb 8, 2024
b8f0af1
remove unused stats
Fidget-Spinner Feb 8, 2024
9a9033f
remove unused macro
Fidget-Spinner Feb 8, 2024
710f609
remove unused types
Fidget-Spinner Feb 8, 2024
eaeb293
remove unused import
Fidget-Spinner Feb 8, 2024
0bb9539
rename file to tier2_redundancy_eliminator
Fidget-Spinner Feb 8, 2024
82405fa
fix the type system
Fidget-Spinner Feb 8, 2024
2e31906
fix cases generator
Fidget-Spinner Feb 8, 2024
ed25ce0
remove whitespace
Fidget-Spinner Feb 8, 2024
3bf3c7d
check all overridden uops are originally defined
Fidget-Spinner Feb 8, 2024
2af5b74
error -> out of space
Fidget-Spinner Feb 8, 2024
cb1d5fe
fix test cases
Fidget-Spinner Feb 8, 2024
5013027
remove extra lines
Fidget-Spinner Feb 8, 2024
1283660
fix out_of_space vs error
Fidget-Spinner Feb 8, 2024
d14dac6
fix CI
Fidget-Spinner Feb 8, 2024
5394cdd
move macros out, remove temp writebuffer
Fidget-Spinner Feb 9, 2024
0729778
update comment
Fidget-Spinner Feb 10, 2024
35227f8
Address review
Fidget-Spinner Feb 12, 2024
40a19f7
set IS_NULL as a flag
Fidget-Spinner Feb 12, 2024
0a4b6be
Merge remote-tracking branch 'upstream/main' into tier2_redundancy_el…
Fidget-Spinner Feb 12, 2024
aaeb4cd
Fix a bug
Fidget-Spinner Feb 12, 2024
6143256
fix a bug in frames
Fidget-Spinner Feb 12, 2024
35fff8f
address review
Fidget-Spinner Feb 12, 2024
2b6eff4
remove sym sharing
Fidget-Spinner Feb 12, 2024
3b5b498
Address changes
Fidget-Spinner Feb 12, 2024
702a6fc
Address review
Fidget-Spinner Feb 12, 2024
77e56ff
address review
Fidget-Spinner Feb 13, 2024
c457098
Merge remote-tracking branch 'upstream/main' into tier2_redundancy_el…
Fidget-Spinner Feb 13, 2024
fcd31af
rename inst -> this_instr
Fidget-Spinner Feb 13, 2024
92b211c
address review
Fidget-Spinner Feb 13, 2024
1bf6ef8
remove whitespace
Fidget-Spinner Feb 13, 2024
ea3755f
remove assert
Fidget-Spinner Feb 13, 2024
f09e3c3
remove enabled
Fidget-Spinner Feb 13, 2024
9670b23
fix up error codes
Fidget-Spinner Feb 13, 2024
38420c3
fix error code again
Fidget-Spinner Feb 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated
Python/Python-ast.c generated
Python/executor_cases.c.h generated
Python/generated_cases.c.h generated
Python/tier2_redundancy_eliminator_bytecodes.c.h generated
Python/opcode_targets.h generated
Python/stdlib_module_names.h generated
Tools/peg_generator/pegen/grammar_parser.py generated
Expand Down
3 changes: 3 additions & 0 deletions Include/cpython/pystats.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ typedef struct _optimization_stats {
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimizer_attempts;
uint64_t optimizer_successes;
uint64_t optimizer_failure_reason_no_memory;
} OptimizationStats;

typedef struct _rare_event_stats {
Expand Down
10 changes: 5 additions & 5 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_uop_ids.h"

// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 512

#define TRACE_STACK_SIZE 5

int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);
Expand Down
10 changes: 5 additions & 5 deletions Include/internal/pycore_uop_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];

#ifdef NEED_OPCODE_METADATA
const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_NOP] = 0,
[_NOP] = HAS_PURE_FLAG,
[_RESUME_CHECK] = HAS_DEOPT_FLAG,
[_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG,
[_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG,
Expand Down Expand Up @@ -202,10 +202,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
[_EXIT_TRACE] = HAS_DEOPT_FLAG,
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
[_LOAD_CONST_INLINE] = 0,
[_LOAD_CONST_INLINE_BORROW] = 0,
[_LOAD_CONST_INLINE_WITH_NULL] = 0,
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0,
[_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG,
[_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
[_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
Expand Down
209 changes: 209 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
import textwrap
import unittest
import gc

import _testinternalcapi

Expand Down Expand Up @@ -556,6 +557,214 @@ def testfunc(n):
# too much already.
self.assertEqual(count, 1)

class TestUopsOptimization(unittest.TestCase):

def test_int_type_propagation(self):
def testfunc(loops):
num = 0
while num < loops:
x = num + num
a = x + 1
num += 1
return a

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 63)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)

def test_int_type_propagation_through_frame(self):
def double(x):
return x + x
def testfunc(loops):
num = 0
while num < loops:
x = num + num
a = double(x)
num += 1
return a

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 124)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)

def test_int_type_propagation_from_frame(self):
def double(x):
return x + x
def testfunc(loops):
num = 0
while num < loops:
a = double(num)
x = a + a
num += 1
return x

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 124)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)

def test_int_impure_region(self):
def testfunc(loops):
num = 0
while num < loops:
x = num + num
y = 1
x // 2
a = x + y
num += 1
return a

opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(64)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
self.assertGreaterEqual(len(binop_count), 3)

def test_call_py_exact_args(self):
def testfunc(n):
def dummy(x):
return x+1
for i in range(n):
dummy(i)

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_CHECK_PEP_523", uops)

def test_int_type_propagate_through_range(self):
def testfunc(n):

for i in range(n):
x = i + i
return x

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
res = testfunc(20)

ex = get_first_executor(testfunc)
self.assertEqual(res, 19 * 2)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_GUARD_BOTH_INT", uops)

def test_int_value_numbering(self):
def testfunc(n):

y = 1
for i in range(n):
x = y
z = x
a = z
b = a
res = x + z + a + b
return res

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
res = testfunc(20)

ex = get_first_executor(testfunc)
self.assertEqual(res, 4)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_GUARD_BOTH_INT", uops)
guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertEqual(len(guard_count), 1)

def test_comprehension(self):
def testfunc(n):
for _ in range(n):
return [i for i in range(n)]

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_BINARY_OP_ADD_INT", uops)

def test_call_py_exact_args_disappearing(self):
def dummy(x):
return x+1

def testfunc(n):
for i in range(n):
dummy(i)

opt = _testinternalcapi.get_uop_optimizer()
# Trigger specialization
testfunc(8)
with temporary_optimizer(opt):
del dummy
gc.collect()

def dummy(x):
return x + 2
testfunc(10)

ex = get_first_executor(testfunc)
# Honestly as long as it doesn't crash it's fine.
# Whether we get an executor or not is non-deterministic,
# because it's decided by when the function is freed.
# This test is a little implementation specific.

def test_promote_globals_to_constants(self):
def testfunc(n):
for i in range(n):
x = range(i)
return x

opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)

ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops)
self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops)



if __name__ == "__main__":
unittest.main()
Loading
0