From e7b7d5d9b607181e1ef5ffc0c0e6b963df877d82 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 11:44:29 -0400 Subject: [PATCH 01/39] Add type version and offset to optimizer header --- Include/internal/pycore_optimizer.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 76123987ac99f5..e19e416c25b0cd 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -33,6 +33,8 @@ struct _Py_UopsSymbol { int flags; // 0 bits: Top; 2 or more bits: Bottom PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) + int32_t typ_version; // currently stores type version + int32_t typ_version_offset; // bytecode offset where the last type version was set }; #define UOP_FORMAT_TARGET 0 From c90227661ec1c97c03d5f001a9477113d3c291e1 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 12:16:40 -0400 Subject: [PATCH 02/39] Start adding type version --- Python/optimizer_bytecodes.c | 7 +++++++ Python/optimizer_symbols.c | 1 + 2 files changed, 8 insertions(+) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e5c982befb2411..83a8f7917f8252 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -113,6 +113,13 @@ dummy_func(void) { sym_set_type(right, &PyLong_Type); } + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + if (sym_matches_type_version(owner, type_version)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type_version(owner, type_version); + } + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { if (sym_matches_type(left, &PyFloat_Type)) { if (sym_matches_type(right, &PyFloat_Type)) { diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e546eef306eeca..d02314b0ee4ba1 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -76,6 +76,7 @@ sym_new(_Py_UOpsContext *ctx) self->flags = 0; self->typ = NULL; self->const_val = NULL; + self->typ_version = return self; } From 89e3649ef1ac9e47054f8f5edafc6bf5b2b8a0cc Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 13:11:22 -0400 Subject: [PATCH 03/39] Add type version functions and struct initialization --- Python/optimizer_bytecodes.c | 2 ++ Python/optimizer_symbols.c | 26 ++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 83a8f7917f8252..e87472c36be1a1 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_get_type _Py_uop_sym_get_type #define sym_has_type _Py_uop_sym_has_type #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d02314b0ee4ba1..c5d197e2f323ff 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -52,7 +52,9 @@ static inline int get_lltrace(void) { static _Py_UopsSymbol NO_SPACE_SYMBOL = { .flags = IS_NULL | NOT_NULL | NO_SPACE, .typ = NULL, - .const_val = NULL + .const_val = NULL, + .typ_version = 0, + .typ_version_offset = 0, }; _Py_UopsSymbol * @@ -76,7 +78,8 @@ sym_new(_Py_UOpsContext *ctx) self->flags = 0; self->typ = NULL; self->const_val = NULL; - self->typ_version = + self->typ_version = 0; + self->typ_version_offset = 0; return self; } @@ -153,6 +156,12 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } } +void +_Py_uop_sym_set_type_version(_Py_UopsContext *ctx, _Py_UopsSymbol *sym, int32_t version) +{ + sym->typ_version = version; +} + void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val) { @@ -257,6 +266,12 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym) return sym->typ; } +int32_t +_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) +{ + return sym->typ_version; +} + bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -273,6 +288,13 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) return _Py_uop_sym_get_type(sym) == typ; } +bool +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version) +{ + return _Py_uop_sym_get_type_version(sym) == version; +} + + int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym) { From b8525bef8196d1ca4984a854d6ee432cf3b5d1fc Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 13:52:08 -0400 Subject: [PATCH 04/39] fix typo --- Python/optimizer_symbols.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index c5d197e2f323ff..2a56eb5d5f9fed 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -157,7 +157,7 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } void -_Py_uop_sym_set_type_version(_Py_UopsContext *ctx, _Py_UopsSymbol *sym, int32_t version) +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version) { sym->typ_version = version; } From 4bd1608802dae24e0fda2e6b2126f10d3d5d4e46 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 14:16:52 -0400 Subject: [PATCH 05/39] Add failing test case --- Lib/test/test_capi/test_opt.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0491ff9b84d486..c8e749b7150453 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1333,6 +1333,27 @@ def test_modified_local_is_seen_by_optimized_code(self): self.assertIs(type(s), float) self.assertEqual(s, 1024.0) + def test_guard_type_version_removed(self): + def thing(a): + x = 0 + for _ in range(100): + x += a.attr + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + for i in iter_opnames(ex): + print(i) + + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 1) + if __name__ == "__main__": unittest.main() From ddbfd9443c4c39463f6bdbbbc000992b383e7f1a Mon Sep 17 00:00:00 2001 From: parmeggiani Date: Mon, 20 May 2024 20:31:14 +0200 Subject: [PATCH 06/39] breakpoints and prints --- Lib/test/test_capi/test_opt.py | 2 ++ Python/optimizer_bytecodes.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c8e749b7150453..9ec842862e54d9 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1344,6 +1344,8 @@ def thing(a): class Foo: attr = 1 + breakpoint() + res, ex = self._run_with_optimizer(thing, Foo()) opnames = list(iter_opnames(ex)) for i in iter_opnames(ex): diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e87472c36be1a1..8a8158c3c50cf1 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -116,6 +116,7 @@ dummy_func(void) { } op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + printf("%d %d %d\n", type_version, owner->typ_version, owner->typ_version_offset); if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); } @@ -711,6 +712,7 @@ dummy_func(void) { } op(_ITER_NEXT_RANGE, (iter -- iter, next)) { + printf("ciao"); next = sym_new_type(ctx, &PyLong_Type); (void)iter; } From f22130b0bffb11aaa7a66ff7056a798611ce395d Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 14:41:41 -0400 Subject: [PATCH 07/39] Add function definitions --- Include/internal/pycore_optimizer.h | 2 ++ Python/optimizer_analysis.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e19e416c25b0cd..138c202db8a58b 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -125,9 +125,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); +extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e5d3793bd4d204..047d1a6a8cf9f0 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -310,9 +310,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_has_type _Py_uop_sym_has_type #define sym_get_type _Py_uop_sym_get_type #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness From 77bc293e6a959f8fca87a7d65acb47c8c079b4dd Mon Sep 17 00:00:00 2001 From: dpdani Date: Mon, 20 May 2024 21:31:24 +0200 Subject: [PATCH 08/39] wooohoooo --- Include/internal/pycore_optimizer.h | 7 ++++--- Lib/test/test_capi/test_opt.py | 27 +++++++++++++++++++++++++++ Python/optimizer_analysis.c | 8 +++++++- Python/optimizer_bytecodes.c | 6 +++--- Python/optimizer_cases.c.h | 9 +++++++++ Python/optimizer_symbols.c | 14 +++++++++++--- 6 files changed, 61 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 138c202db8a58b..0aec3d7250d914 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -34,7 +34,7 @@ struct _Py_UopsSymbol { PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) int32_t typ_version; // currently stores type version - int32_t typ_version_offset; // bytecode offset where the last type version was set + int typ_version_offset; // bytecode offset where the last type version was set }; #define UOP_FORMAT_TARGET 0 @@ -98,6 +98,7 @@ struct _Py_UOpsContext { char done; char out_of_space; bool contradiction; + int64_t latest_escape_offset; // The current "executing" frame. _Py_UOpsAbstractFrame *frame; _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; @@ -125,11 +126,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version, int offset); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); -extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version); +extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version, int offset); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 9ec842862e54d9..78208eff729a4c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1356,6 +1356,33 @@ class Foo: guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 1) + def test_guard_type_version_not_removed(self): + def fn(): + pass + + def thing(a): + x = 0 + for _ in range(100): + x += a.attr + fn() + x += a.attr + return x + + class Foo: + attr = 1 + + breakpoint() + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + for i in iter_opnames(ex): + print(i) + + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 2) + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 047d1a6a8cf9f0..6d7736ed15bd22 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -314,7 +314,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) -#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) +#define sym_set_type_version(SYM, VERSION, OFFSET) _Py_uop_sym_set_type_version(ctx, SYM, VERSION, OFFSET) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness @@ -406,6 +406,7 @@ optimize_uops( ctx->done = false; ctx->out_of_space = false; ctx->contradiction = false; + ctx->latest_escape_offset = 0; _PyUOpInstruction *this_instr = NULL; for (int i = 0; !ctx->done; i++) { @@ -416,6 +417,11 @@ optimize_uops( opcode = this_instr->opcode; _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + printf("opcode: %d ", opcode); + ctx->latest_escape_offset = i; // i is the offset we're looping on + } + #ifdef Py_DEBUG if (get_lltrace() >= 3) { printf("%4d abs: ", (int)(this_instr - trace)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 8a8158c3c50cf1..980d6e4bf1dd84 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -27,7 +27,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) -#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) +#define sym_set_type_version(SYM, VERSION, OFFSET) _Py_uop_sym_set_type_version(ctx, SYM, VERSION, OFFSET) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new @@ -117,10 +117,10 @@ dummy_func(void) { op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { printf("%d %d %d\n", type_version, owner->typ_version, owner->typ_version_offset); - if (sym_matches_type_version(owner, type_version)) { + if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { REPLACE_OP(this_instr, _NOP, 0, 0); } - sym_set_type_version(owner, type_version); + sym_set_type_version(owner, type_version, i); } op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index cd4431d55ad908..26e72ec4e7a987 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -935,6 +935,14 @@ } case _GUARD_TYPE_VERSION: { + _Py_UopsSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand; + printf("%d %d %d\n", type_version, owner->typ_version, owner->typ_version_offset); + if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type_version(owner, type_version, i); break; } @@ -1335,6 +1343,7 @@ _Py_UopsSymbol *iter; _Py_UopsSymbol *next; iter = stack_pointer[-1]; + printf("ciao"); next = sym_new_type(ctx, &PyLong_Type); (void)iter; stack_pointer[0] = next; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2a56eb5d5f9fed..47ccc908fcccdb 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -157,9 +157,10 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } void -_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version) +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version, int offset) { sym->typ_version = version; + sym->typ_version_offset = offset; } void @@ -272,6 +273,12 @@ _Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) return sym->typ_version; } +int +_Py_uop_sym_get_type_version_offset(_Py_UopsSymbol *sym) +{ + return sym->typ_version_offset; +} + bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -289,9 +296,10 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) } bool -_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version) +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version, int offset) { - return _Py_uop_sym_get_type_version(sym) == version; + return _Py_uop_sym_get_type_version(sym) == version + && _Py_uop_sym_get_type_version_offset(sym) > offset; } From a2ebc49f8c385ae07f1ebaf890dc1272cbfcc787 Mon Sep 17 00:00:00 2001 From: dpdani Date: Mon, 20 May 2024 21:33:59 +0200 Subject: [PATCH 09/39] unprint --- Python/optimizer_analysis.c | 1 - Python/optimizer_bytecodes.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6d7736ed15bd22..75a8aa8f8573fe 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -418,7 +418,6 @@ optimize_uops( _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - printf("opcode: %d ", opcode); ctx->latest_escape_offset = i; // i is the offset we're looping on } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 980d6e4bf1dd84..c679f6b5674a59 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -116,7 +116,6 @@ dummy_func(void) { } op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { - printf("%d %d %d\n", type_version, owner->typ_version, owner->typ_version_offset); if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { REPLACE_OP(this_instr, _NOP, 0, 0); } @@ -712,7 +711,6 @@ dummy_func(void) { } op(_ITER_NEXT_RANGE, (iter -- iter, next)) { - printf("ciao"); next = sym_new_type(ctx, &PyLong_Type); (void)iter; } From e982837c15e0d846055a72c873279aedb1dfc1f1 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 15:35:33 -0400 Subject: [PATCH 10/39] Remove generated printf --- Python/optimizer_cases.c.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 26e72ec4e7a987..c4c820ba1d1e41 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -938,7 +938,6 @@ _Py_UopsSymbol *owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand; - printf("%d %d %d\n", type_version, owner->typ_version, owner->typ_version_offset); if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { REPLACE_OP(this_instr, _NOP, 0, 0); } @@ -1343,7 +1342,6 @@ _Py_UopsSymbol *iter; _Py_UopsSymbol *next; iter = stack_pointer[-1]; - printf("ciao"); next = sym_new_type(ctx, &PyLong_Type); (void)iter; stack_pointer[0] = next; From 0e7e7eb572d776d449fa7d64f51c879c734b88d6 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 15:35:44 -0400 Subject: [PATCH 11/39] Remove test breakpoints --- Lib/test/test_capi/test_opt.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 78208eff729a4c..07ec1f0ac2f22f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1344,13 +1344,8 @@ def thing(a): class Foo: attr = 1 - breakpoint() - res, ex = self._run_with_optimizer(thing, Foo()) opnames = list(iter_opnames(ex)) - for i in iter_opnames(ex): - print(i) - self.assertIsNotNone(ex) self.assertEqual(res, 200) guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") @@ -1371,12 +1366,8 @@ def thing(a): class Foo: attr = 1 - breakpoint() - res, ex = self._run_with_optimizer(thing, Foo()) opnames = list(iter_opnames(ex)) - for i in iter_opnames(ex): - print(i) self.assertIsNotNone(ex) self.assertEqual(res, 200) From 4060ab8e64488cf236dbbf8c4230501e8767443c Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 20 May 2024 20:15:49 +0000 Subject: [PATCH 12/39] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst new file mode 100644 index 00000000000000..1f744f95805a0f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst @@ -0,0 +1 @@ +Tier 2 optimizer eliminate type version guards From 1520928862135e8420e0cadbfc2a63ce07972f92 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Mon, 20 May 2024 16:55:29 -0400 Subject: [PATCH 13/39] =?UTF-8?q?Revert=20"=F0=9F=93=9C=F0=9F=A4=96=20Adde?= =?UTF-8?q?d=20by=20blurb=5Fit."?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4060ab8e64488cf236dbbf8c4230501e8767443c --- .../2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst deleted file mode 100644 index 1f744f95805a0f..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2024-05-20-20-15-47.gh-issue-119258.6R2Vf4.rst +++ /dev/null @@ -1 +0,0 @@ -Tier 2 optimizer eliminate type version guards From 5e0792b5d2d319cf046b306f2cec0d609a710add Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 21 May 2024 17:24:23 -0400 Subject: [PATCH 14/39] Use global type watcher to invalidate type versions --- Include/internal/pycore_optimizer.h | 6 +- Include/internal/pycore_typeobject.h | 12 + Lib/test/test_capi/test_opt.py | 362 +++++++++++++++++++-------- Objects/typeobject.c | 91 ++++++- Python/optimizer_analysis.c | 19 +- Python/optimizer_bytecodes.c | 16 +- Python/optimizer_cases.c.h | 13 +- Python/optimizer_symbols.c | 16 +- 8 files changed, 392 insertions(+), 143 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 0aec3d7250d914..50c700a8b548c2 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -34,7 +34,6 @@ struct _Py_UopsSymbol { PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) int32_t typ_version; // currently stores type version - int typ_version_offset; // bytecode offset where the last type version was set }; #define UOP_FORMAT_TARGET 0 @@ -98,7 +97,6 @@ struct _Py_UOpsContext { char done; char out_of_space; bool contradiction; - int64_t latest_escape_offset; // The current "executing" frame. _Py_UOpsAbstractFrame *frame; _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; @@ -126,11 +124,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version, int offset); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); -extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version, int offset); +extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 7e533bd138469b..394400ba4ba08d 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -61,6 +61,8 @@ typedef struct { PyObject *tp_weaklist; } static_builtin_state; +#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ + struct types_state { /* Used to set PyTypeObject.tp_version_tag. It starts at _Py_MAX_GLOBAL_TYPE_VERSION_TAG + 1, @@ -108,6 +110,12 @@ struct types_state { size_t num_builtins_initialized; static_builtin_state builtins[_Py_MAX_STATIC_BUILTIN_TYPES]; PyMutex mutex; + + // Borrowed references to type objects whose + // func_version % TYPE_VERSION_CACHE_SIZE + // once was equal to the index in the table. + // They are cleared when the type object is deallocated. + PyTypeObject* type_version_cache[TYPE_VERSION_CACHE_SIZE]; }; @@ -202,6 +210,10 @@ extern void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags); +extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp); +PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version); +void _PyType_ClearCodeByVersion(unsigned int version); +PyTypeObject *_PyType_LookupByVersion(unsigned int version); #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 07ec1f0ac2f22f..3ae128d81e8462 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -13,6 +13,7 @@ from _testinternalcapi import TIER2_THRESHOLD + @contextlib.contextmanager def temporary_optimizer(opt): old_opt = _testinternalcapi.get_optimizer() @@ -34,10 +35,10 @@ def clear_executors(func): @requires_specialization -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") +@unittest.skipUnless( + hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" +) class TestOptimizerAPI(unittest.TestCase): - def test_new_counter_optimizer_dealloc(self): # See gh-108727 def f(): @@ -56,16 +57,21 @@ def test_get_set_optimizer(self): finally: _testinternalcapi.set_optimizer(old) - def test_counter_optimizer(self): # Generate a new function at each call ns = {} - exec(textwrap.dedent(""" + exec( + textwrap.dedent( + """ def loop(): for _ in range(1000): pass - """), ns, ns) - loop = ns['loop'] + """ + ), + ns, + ns, + ) + loop = ns["loop"] for repeat in range(5): opt = _testinternalcapi.new_counter_optimizer() @@ -81,7 +87,9 @@ def test_long_loop(self): # Generate a new function at each call ns = {} - exec(textwrap.dedent(""" + exec( + textwrap.dedent( + """ def nop(): pass @@ -94,14 +102,20 @@ def long_loop(): nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); - """), ns, ns) - long_loop = ns['long_loop'] + """ + ), + ns, + ns, + ) + long_loop = ns["long_loop"] opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): self.assertEqual(opt.get_count(), 0) long_loop() - self.assertEqual(opt.get_count(), 20 - TIER2_THRESHOLD) # Need iterations to warm up + self.assertEqual( + opt.get_count(), 20 - TIER2_THRESHOLD + ) # Need iterations to warm up def test_code_restore_for_ENTER_EXECUTOR(self): def testfunc(x): @@ -112,7 +126,7 @@ def testfunc(x): opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): testfunc(1000) - code, replace_code = testfunc.__code__, testfunc.__code__.replace() + code, replace_code = testfunc.__code__, testfunc.__code__.replace() self.assertEqual(code, replace_code) self.assertEqual(hash(code), hash(replace_code)) @@ -138,10 +152,10 @@ def get_opnames(ex): @requires_specialization -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") +@unittest.skipUnless( + hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" +) class TestExecutorInvalidation(unittest.TestCase): - def setUp(self): self.old = _testinternalcapi.get_optimizer() self.opt = _testinternalcapi.new_counter_optimizer() @@ -158,10 +172,11 @@ def test_invalidate_object(self): def f{n}(): for _ in range(1000): pass - """ for n in range(5) + """ + for n in range(5) ) exec(textwrap.dedent(func_src), ns, ns) - funcs = [ ns[f'f{n}'] for n in range(5)] + funcs = [ns[f"f{n}"] for n in range(5)] objects = [object() for _ in range(5)] for f in funcs: @@ -171,13 +186,13 @@ def f{n}(): # with an equal or lower index. for i, exe in enumerate(executors): self.assertTrue(exe.is_valid()) - for obj in objects[:i+1]: + for obj in objects[: i + 1]: _testinternalcapi.add_executor_dependency(exe, obj) self.assertTrue(exe.is_valid()) # Assert that the correct executors are invalidated # and check that nothing crashes when we invalidate # an executor mutliple times. - for i in (4,3,2,1,0): + for i in (4, 3, 2, 1, 0): _testinternalcapi.invalidate_executors(objects[i]) for exe in executors[i:]: self.assertFalse(exe.is_valid()) @@ -187,12 +202,18 @@ def f{n}(): def test_uop_optimizer_invalidation(self): # Generate a new function at each call ns = {} - exec(textwrap.dedent(""" + exec( + textwrap.dedent( + """ def f(): for i in range(1000): pass - """), ns, ns) - f = ns['f'] + """ + ), + ns, + ns, + ) + f = ns["f"] opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): f() @@ -206,6 +227,7 @@ def test_sys__clear_internal_caches(self): def f(): for _ in range(1000): pass + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): f() @@ -219,11 +241,13 @@ def f(): @requires_specialization -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") -@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") +@unittest.skipUnless( + hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" +) +@unittest.skipIf( + os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run." +) class TestUops(unittest.TestCase): - def test_basic_loop(self): def testfunc(x): i = 0 @@ -243,7 +267,9 @@ def testfunc(x): def test_extended_arg(self): "Check EXTENDED_ARG handling in superblock creation" ns = {} - exec(textwrap.dedent(""" + exec( + textwrap.dedent( + """ def many_vars(): # 260 vars, so z9 should have index 259 a0 = a1 = a2 = a3 = a4 = a5 = a6 = a7 = a8 = a9 = 42 @@ -274,7 +300,11 @@ def many_vars(): z0 = z1 = z2 = z3 = z4 = z5 = z6 = z7 = z8 = z9 = 42 while z9 > 0: z9 = z9 - 1 - """), ns, ns) + """ + ), + ns, + ns, + ) many_vars = ns["many_vars"] opt = _testinternalcapi.new_uop_optimizer() @@ -285,8 +315,12 @@ def many_vars(): ex = get_first_executor(many_vars) self.assertIsNotNone(ex) - self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) - for opcode, oparg, _, operand in list(ex))) + self.assertTrue( + any( + (opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) + for opcode, oparg, _, operand in list(ex) + ) + ) def test_unspecialized_unpack(self): # An example of an unspecialized opcode @@ -492,7 +526,8 @@ def testfunc(it): def test_call_py_exact_args(self): def testfunc(n): def dummy(x): - return x+1 + return x + 1 + for i in range(n): dummy(i) @@ -527,8 +562,10 @@ def test_for_iter_tier_two(self): class MyIter: def __init__(self, n): self.n = n + def __iter__(self): return self + def __next__(self): self.n -= 1 if self.n < 0: @@ -539,7 +576,7 @@ def testfunc(n, m): x = 0 for i in range(m): for j in MyIter(n): - x += 1000*i + j + x += 1000 * i + j return x opt = _testinternalcapi.new_uop_optimizer() @@ -561,13 +598,13 @@ def testfunc(n): bits += 1 if i & 0x02: bits += 1 - if i&0x04: + if i & 0x04: bits += 1 - if i&0x08: + if i & 0x08: bits += 1 - if i&0x10: + if i & 0x10: bits += 1 - if i&0x20: + if i & 0x20: bits += 1 return bits @@ -579,17 +616,19 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) ops = list(iter_opnames(ex)) - #Since branch is 50/50 the trace could go either way. + # Since branch is 50/50 the trace could go either way. count = ops.count("_GUARD_IS_TRUE_POP") + ops.count("_GUARD_IS_FALSE_POP") self.assertLessEqual(count, 2) @requires_specialization -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") -@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") +@unittest.skipUnless( + hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" +) +@unittest.skipIf( + os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run." +) class TestUopsOptimization(unittest.TestCase): - def _run_with_optimizer(self, testfunc, arg): res = None opt = _testinternalcapi.new_uop_optimizer() @@ -599,7 +638,6 @@ def _run_with_optimizer(self, testfunc, arg): ex = get_first_executor(testfunc) return res, ex - def test_int_type_propagation(self): def testfunc(loops): num = 0 @@ -612,14 +650,19 @@ def testfunc(loops): res, ex = self._run_with_optimizer(testfunc, 32) self.assertIsNotNone(ex) self.assertEqual(res, 63) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + binop_count = [ + opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" + ] + guard_both_int_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" + ] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) def test_int_type_propagation_through_frame(self): def double(x): return x + x + def testfunc(loops): num = 0 for i in range(loops): @@ -636,14 +679,19 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + binop_count = [ + opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" + ] + guard_both_int_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" + ] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) def test_int_type_propagation_from_frame(self): def double(x): return x + x + def testfunc(loops): num = 0 for i in range(loops): @@ -660,8 +708,12 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + binop_count = [ + opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" + ] + guard_both_int_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" + ] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -678,13 +730,16 @@ def testfunc(loops): res, ex = self._run_with_optimizer(testfunc, 64) self.assertIsNotNone(ex) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + binop_count = [ + opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" + ] self.assertGreaterEqual(len(binop_count), 3) def test_call_py_exact_args(self): def testfunc(n): def dummy(x): - return x+1 + return x + 1 + for i in range(n): dummy(i) @@ -697,7 +752,6 @@ def dummy(x): def test_int_type_propagate_through_range(self): def testfunc(n): - for i in range(n): x = i + i return x @@ -710,7 +764,6 @@ def testfunc(n): def test_int_value_numbering(self): def testfunc(n): - y = 1 for i in range(n): x = y @@ -725,7 +778,9 @@ def testfunc(n): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_GUARD_BOTH_INT", uops) - guard_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + guard_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" + ] self.assertEqual(len(guard_count), 1) def test_comprehension(self): @@ -741,7 +796,7 @@ def testfunc(n): def test_call_py_exact_args_disappearing(self): def dummy(x): - return x+1 + return x + 1 def testfunc(n): for i in range(n): @@ -756,6 +811,7 @@ def testfunc(n): def dummy(x): return x + 2 + testfunc(32) ex = get_first_executor(testfunc) @@ -765,8 +821,10 @@ def dummy(x): # This test is a little implementation specific. def test_promote_globals_to_constants(self): - - result = script_helper.run_python_until_end('-c', textwrap.dedent(""" + result = script_helper.run_python_until_end( + "-c", + textwrap.dedent( + """ import _testinternalcapi import opcode import _opcode @@ -798,7 +856,9 @@ def testfunc(n): uops = get_opnames(ex) assert "_LOAD_GLOBAL_BUILTINS" not in uops assert "_LOAD_CONST_INLINE_BORROW_WITH_NULL" in uops - """)) + """ + ), + ) self.assertEqual(result[0].rc, 0, result) def test_float_add_constant_propagation(self): @@ -815,7 +875,9 @@ def testfunc(n): self.assertAlmostEqual(res, 33.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] + guard_both_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" + ] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -835,7 +897,9 @@ def testfunc(n): self.assertAlmostEqual(res, -31.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] + guard_both_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" + ] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -855,7 +919,9 @@ def testfunc(n): self.assertAlmostEqual(res, 1.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] + guard_both_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" + ] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -875,7 +941,9 @@ def testfunc(n): self.assertEqual(res, "") self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_unicode_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE"] + guard_both_unicode_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE" + ] self.assertLessEqual(len(guard_both_unicode_count), 1) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) @@ -893,7 +961,9 @@ def testfunc(n): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] + guard_both_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" + ] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_FLOAT", uops) @@ -911,7 +981,9 @@ def testfunc(n): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + guard_both_int_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" + ] self.assertLessEqual(len(guard_both_int_count), 1) self.assertIn("_COMPARE_OP_INT", uops) @@ -929,8 +1001,12 @@ def testfunc(n): self.assertEqual(res, 1) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_left_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + guard_left_int_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_INT" + ] + guard_both_int_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" + ] self.assertLessEqual(len(guard_left_int_count), 1) self.assertEqual(len(guard_both_int_count), 0) self.assertIn("_COMPARE_OP_INT", uops) @@ -949,8 +1025,12 @@ def testfunc(n): self.assertEqual(res, 1) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_left_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_FLOAT"] - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] + guard_left_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_FLOAT" + ] + guard_both_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" + ] self.assertLessEqual(len(guard_left_float_count), 1) self.assertEqual(len(guard_both_float_count), 0) self.assertIn("_COMPARE_OP_FLOAT", uops) @@ -969,23 +1049,27 @@ def testfunc(n): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE"] + guard_both_float_count = [ + opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE" + ] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_STR", uops) def test_type_inconsistency(self): ns = {} - src = textwrap.dedent(""" + src = textwrap.dedent( + """ def testfunc(n): for i in range(n): x = _test_global + _test_global - """) + """ + ) exec(src, ns, ns) - testfunc = ns['testfunc'] - ns['_test_global'] = 0 + testfunc = ns["testfunc"] + ns["_test_global"] = 0 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) - ns['_test_global'] = 1 + ns["_test_global"] = 1 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) @@ -995,20 +1079,22 @@ def testfunc(n): # This should result in no executor the second time. ns = {} exec(src, ns, ns) - testfunc = ns['testfunc'] - ns['_test_global'] = 0 + testfunc = ns["testfunc"] + ns["_test_global"] = 0 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) - ns['_test_global'] = 3.14 + ns["_test_global"] = 3.14 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) def test_combine_stack_space_checks_sequential(self): def dummy12(x): return x - 1 + def dummy13(y): z = y + 2 return y, z + def testfunc(n): a = 0 for _ in range(n): @@ -1034,9 +1120,11 @@ def testfunc(n): def test_combine_stack_space_checks_nested(self): def dummy12(x): return x + 3 + def dummy15(y): z = dummy12(y) return y, z + def testfunc(n): a = 0 for _ in range(n): @@ -1055,22 +1143,24 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) # nested calls: 15 + 12 == 27 - largest_stack = ( - _testinternalcapi.get_co_framesize(dummy15.__code__) + - _testinternalcapi.get_co_framesize(dummy12.__code__) - ) + largest_stack = _testinternalcapi.get_co_framesize( + dummy15.__code__ + ) + _testinternalcapi.get_co_framesize(dummy12.__code__) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_several_calls(self): def dummy12(x): return x + 3 + def dummy13(y): z = y + 2 return y, z + def dummy18(y): z = dummy12(y) x, w = dummy13(z) return z, x, w + def testfunc(n): a = 0 for _ in range(n): @@ -1090,23 +1180,25 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) # max(12, 18 + max(12, 13)) == 31 - largest_stack = ( - _testinternalcapi.get_co_framesize(dummy18.__code__) + - _testinternalcapi.get_co_framesize(dummy13.__code__) - ) + largest_stack = _testinternalcapi.get_co_framesize( + dummy18.__code__ + ) + _testinternalcapi.get_co_framesize(dummy13.__code__) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_several_calls_different_order(self): # same as `several_calls` but with top-level calls reversed def dummy12(x): return x + 3 + def dummy13(y): z = y + 2 return y, z + def dummy18(y): z = dummy12(y) x, w = dummy13(z) return z, x, w + def testfunc(n): a = 0 for _ in range(n): @@ -1126,32 +1218,38 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) # max(18 + max(12, 13), 12) == 31 - largest_stack = ( - _testinternalcapi.get_co_framesize(dummy18.__code__) + - _testinternalcapi.get_co_framesize(dummy13.__code__) - ) + largest_stack = _testinternalcapi.get_co_framesize( + dummy18.__code__ + ) + _testinternalcapi.get_co_framesize(dummy13.__code__) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_complex(self): def dummy0(x): return x + def dummy1(x): return dummy0(x) + def dummy2(x): return dummy1(x) + def dummy3(x): return dummy0(x) + def dummy4(x): y = dummy0(x) return dummy3(y) + def dummy5(x): return dummy2(x) + def dummy6(x): y = dummy5(x) z = dummy0(y) return dummy4(z) + def testfunc(n): - a = 0; + a = 0 for _ in range(32): b = dummy5(1) c = dummy0(1) @@ -1171,15 +1269,13 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) largest_stack = ( - _testinternalcapi.get_co_framesize(dummy6.__code__) + - _testinternalcapi.get_co_framesize(dummy5.__code__) + - _testinternalcapi.get_co_framesize(dummy2.__code__) + - _testinternalcapi.get_co_framesize(dummy1.__code__) + - _testinternalcapi.get_co_framesize(dummy0.__code__) - ) - self.assertIn( - ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands + _testinternalcapi.get_co_framesize(dummy6.__code__) + + _testinternalcapi.get_co_framesize(dummy5.__code__) + + _testinternalcapi.get_co_framesize(dummy2.__code__) + + _testinternalcapi.get_co_framesize(dummy1.__code__) + + _testinternalcapi.get_co_framesize(dummy0.__code__) ) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_large_framesize(self): # Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is @@ -1191,14 +1287,19 @@ def test_combine_stack_space_checks_large_framesize(self): header = """ def dummy_large(a0): """ - body = "".join([f""" + body = "".join( + [ + f""" a{n+1} = a{n} + 1 - """ for n in range(repetitions)]) + """ + for n in range(repetitions) + ] + ) return_ = f""" return a{repetitions-1} """ exec(textwrap.dedent(header + body + return_), ns, ns) - dummy_large = ns['dummy_large'] + dummy_large = ns["dummy_large"] # this is something like: # @@ -1232,21 +1333,19 @@ def testfunc(n): # so we need to account for both possibilities self.assertIn(uop_names.count("_CHECK_STACK_SPACE"), [0, 1]) if uop_names.count("_CHECK_STACK_SPACE") == 0: - largest_stack = ( - _testinternalcapi.get_co_framesize(dummy15.__code__) + - _testinternalcapi.get_co_framesize(dummy_large.__code__) - ) + largest_stack = _testinternalcapi.get_co_framesize( + dummy15.__code__ + ) + _testinternalcapi.get_co_framesize(dummy_large.__code__) else: largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__) - self.assertIn( - ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands - ) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_recursion(self): def dummy15(x): while x > 0: return dummy15(x - 1) return 42 + def testfunc(n): a = 0 for _ in range(n): @@ -1270,20 +1369,28 @@ def test_many_nested(self): # overflow the trace_stack def dummy_a(x): return x + def dummy_b(x): return dummy_a(x) + def dummy_c(x): return dummy_b(x) + def dummy_d(x): return dummy_c(x) + def dummy_e(x): return dummy_d(x) + def dummy_f(x): return dummy_e(x) + def dummy_g(x): return dummy_f(x) + def dummy_h(x): return dummy_g(x) + def testfunc(n): a = 0 for _ in range(n): @@ -1297,10 +1404,12 @@ def testfunc(n): def test_return_generator(self): def gen(): yield None + def testfunc(n): for i in range(n): gen() return i + res, ex = self._run_with_optimizer(testfunc, 20) self.assertEqual(res, 19) self.assertIsNotNone(ex) @@ -1310,12 +1419,14 @@ def test_for_iter_gen(self): def gen(n): for i in range(n): yield i + def testfunc(n): g = gen(n) s = 0 for i in g: s += i return s + res, ex = self._run_with_optimizer(testfunc, 20) self.assertEqual(res, 190) self.assertIsNotNone(ex) @@ -1351,7 +1462,11 @@ class Foo: guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 1) - def test_guard_type_version_not_removed(self): + def test_guard_type_version_removed_exiting(self): + """ + Verify that the guard type version if we have an exiting function + """ + def fn(): pass @@ -1368,10 +1483,39 @@ class Foo: res, ex = self._run_with_optimizer(thing, Foo()) opnames = list(iter_opnames(ex)) - self.assertIsNotNone(ex) self.assertEqual(res, 200) guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 1) + + def test_guard_type_version_not_remove(self): + """ + Verify that the guard type version is not removed if we Modify the class + """ + + def thing(a): + x = 0 + for i in range(100): + x += a.attr + # for the first 90 iterations we set the attribute on this dummy function which shouldn't + # trigger the type watcher + # then after 90 it should trigger it and stop optimizing + setattr((Foo, xxx)[i < 90], "attr", 0) + x += a.attr + return x + + class Foo: + attr = 1 + + def xxx(): + pass + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + + self.assertIsNotNone(ex) + self.assertEqual(res, 181) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 2) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b7c3fcf47f23fc..cacc49e243de59 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -787,7 +787,8 @@ PyType_AddWatcher(PyType_WatchCallback callback) { PyInterpreterState *interp = _PyInterpreterState_GET(); - for (int i = 0; i < TYPE_MAX_WATCHERS; i++) { + // start at 1, 0 is reserved for cpython optimizer + for (int i = 1; i < TYPE_MAX_WATCHERS; i++) { if (!interp->type_watchers[i]) { interp->type_watchers[i] = callback; return i; @@ -1043,6 +1044,82 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { } } +/* +The Tier 2 interpreter requires looking up the type object by the type version, so it can install +watchers to understand when they change. + +So we add a global cache from type version to weak references of type objects. + +This is similar to how the cache used with function objects and versions. +*/ + +void +_PyType_SetVersion(PyTypeObject *tp, unsigned int version) +{ +#ifndef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + // lookup the old version and set to null + if (tp->tp_version_tag != 0) { + PyTypeObject **slot = + interp->types.type_version_cache + + (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE); + *slot = NULL; + } +#endif + tp->tp_version_tag = version; +#ifndef Py_GIL_DISABLED + if (version != 0) { + PyTypeObject **slot = + interp->types.type_version_cache + + (version % TYPE_VERSION_CACHE_SIZE); + *slot = tp; + } +#endif +} + +void +_PyType_ClearCodeByVersion(unsigned int version) +{ +#ifndef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyTypeObject **slot = + interp->types.type_version_cache + + (version % TYPE_VERSION_CACHE_SIZE); + if (*slot) { + assert(PyType_Check(*slot)); + if ((*slot)->tp_version_tag == version) { + *slot = NULL; + } + } +#endif +} + +PyTypeObject * +_PyType_LookupByVersion(unsigned int version) +{ +#ifdef Py_GIL_DISABLED + return NULL; +#else + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyTypeObject **slot = + interp->types.type_version_cache + + (version % TYPE_VERSION_CACHE_SIZE); + printf("slot: %p\n", slot); + if (*slot && (*slot)->tp_version_tag == version) { + return *slot; + } + return NULL; +#endif +} + +unsigned int +_PyType_GetVersionForCurrentState(PyTypeObject *tp) +{ + return tp->tp_version_tag; +} + + + #define MAX_VERSIONS_PER_CLASS 1000 static int @@ -1071,8 +1148,9 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) /* We have run out of version numbers */ return 0; } - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, - NEXT_GLOBAL_VERSION_TAG++); + _PyType_SetVersion(type, NEXT_GLOBAL_VERSION_TAG++); + // FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, + // NEXT_GLOBAL_VERSION_TAG++); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); } else { @@ -1081,8 +1159,9 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) /* We have run out of version numbers */ return 0; } - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, - NEXT_VERSION_TAG(interp)++); + _PyType_SetVersion(type, NEXT_VERSION_TAG(interp)++); + // FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, + // NEXT_VERSION_TAG(interp)++); assert (type->tp_version_tag != 0); } @@ -5605,7 +5684,7 @@ type_dealloc(PyObject *self) _PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE); _PyObject_GC_UNTRACK(type); - + _PyType_ClearCodeByVersion(type->tp_version_tag); type_dealloc_common(type); // PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0 diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 75a8aa8f8573fe..e057c5bf02c5e6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -79,6 +79,7 @@ increment_mutations(PyObject* dict) { * so we don't need to check that they haven't been used */ #define BUILTINS_WATCHER_ID 0 #define GLOBALS_WATCHER_ID 1 +#define TYPE_WATCHER_ID 0 static int globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, @@ -92,6 +93,14 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, return 0; } +static int +type_watcher_callback(PyTypeObject* type) +{ + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), type, 1); + PyType_Unwatch(TYPE_WATCHER_ID, (PyObject *)type); + return 0; +} + static PyObject * convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) { @@ -167,6 +176,9 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) { interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback; } + if (interp->type_watchers[TYPE_WATCHER_ID] == NULL) { + interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback; + } for (int pc = 0; pc < buffer_size; pc++) { _PyUOpInstruction *inst = &buffer[pc]; int opcode = inst->opcode; @@ -314,7 +326,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) -#define sym_set_type_version(SYM, VERSION, OFFSET) _Py_uop_sym_set_type_version(ctx, SYM, VERSION, OFFSET) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness @@ -406,7 +418,6 @@ optimize_uops( ctx->done = false; ctx->out_of_space = false; ctx->contradiction = false; - ctx->latest_escape_offset = 0; _PyUOpInstruction *this_instr = NULL; for (int i = 0; !ctx->done; i++) { @@ -417,10 +428,6 @@ optimize_uops( opcode = this_instr->opcode; _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; - if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - ctx->latest_escape_offset = i; // i is the offset we're looping on - } - #ifdef Py_DEBUG if (get_lltrace() >= 3) { printf("%4d abs: ", (int)(this_instr - trace)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c679f6b5674a59..cec036905daab6 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -27,7 +27,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) -#define sym_set_type_version(SYM, VERSION, OFFSET) _Py_uop_sym_set_type_version(ctx, SYM, VERSION, OFFSET) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new @@ -116,10 +116,20 @@ dummy_func(void) { } op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { - if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { + if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalide this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + sym_set_type_version(owner, type_version); + PyType_Watch(TYPE_WATCHER_ID, type); + _Py_BloomFilter_Add(dependencies, type); + } + } - sym_set_type_version(owner, type_version, i); } op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index c4c820ba1d1e41..3f08405b260a82 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -938,10 +938,19 @@ _Py_UopsSymbol *owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand; - if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { + if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalide this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + sym_set_type_version(owner, type_version); + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } } - sym_set_type_version(owner, type_version, i); break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 47ccc908fcccdb..f4b6b87e7555d6 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -54,7 +54,6 @@ static _Py_UopsSymbol NO_SPACE_SYMBOL = { .typ = NULL, .const_val = NULL, .typ_version = 0, - .typ_version_offset = 0, }; _Py_UopsSymbol * @@ -79,7 +78,6 @@ sym_new(_Py_UOpsContext *ctx) self->typ = NULL; self->const_val = NULL; self->typ_version = 0; - self->typ_version_offset = 0; return self; } @@ -157,10 +155,9 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } void -_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version, int offset) +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version) { sym->typ_version = version; - sym->typ_version_offset = offset; } void @@ -273,12 +270,6 @@ _Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) return sym->typ_version; } -int -_Py_uop_sym_get_type_version_offset(_Py_UopsSymbol *sym) -{ - return sym->typ_version_offset; -} - bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -296,10 +287,9 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) } bool -_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version, int offset) +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version) { - return _Py_uop_sym_get_type_version(sym) == version - && _Py_uop_sym_get_type_version_offset(sym) > offset; + return _Py_uop_sym_get_type_version(sym) == version; } From fd979f0d0adbd6f674bd215da672c01fd6f80fc1 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 21 May 2024 17:32:19 -0400 Subject: [PATCH 15/39] Revert formatting changes to tests --- Lib/test/test_capi/test_opt.py | 325 +++++++++++---------------------- 1 file changed, 107 insertions(+), 218 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 3ae128d81e8462..2b7fee5a2d220e 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -13,7 +13,6 @@ from _testinternalcapi import TIER2_THRESHOLD - @contextlib.contextmanager def temporary_optimizer(opt): old_opt = _testinternalcapi.get_optimizer() @@ -35,10 +34,10 @@ def clear_executors(func): @requires_specialization -@unittest.skipUnless( - hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" -) +@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), + "Requires optimizer infrastructure") class TestOptimizerAPI(unittest.TestCase): + def test_new_counter_optimizer_dealloc(self): # See gh-108727 def f(): @@ -57,21 +56,16 @@ def test_get_set_optimizer(self): finally: _testinternalcapi.set_optimizer(old) + def test_counter_optimizer(self): # Generate a new function at each call ns = {} - exec( - textwrap.dedent( - """ + exec(textwrap.dedent(""" def loop(): for _ in range(1000): pass - """ - ), - ns, - ns, - ) - loop = ns["loop"] + """), ns, ns) + loop = ns['loop'] for repeat in range(5): opt = _testinternalcapi.new_counter_optimizer() @@ -87,9 +81,7 @@ def test_long_loop(self): # Generate a new function at each call ns = {} - exec( - textwrap.dedent( - """ + exec(textwrap.dedent(""" def nop(): pass @@ -102,20 +94,14 @@ def long_loop(): nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); - """ - ), - ns, - ns, - ) - long_loop = ns["long_loop"] + """), ns, ns) + long_loop = ns['long_loop'] opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): self.assertEqual(opt.get_count(), 0) long_loop() - self.assertEqual( - opt.get_count(), 20 - TIER2_THRESHOLD - ) # Need iterations to warm up + self.assertEqual(opt.get_count(), 20 - TIER2_THRESHOLD) # Need iterations to warm up def test_code_restore_for_ENTER_EXECUTOR(self): def testfunc(x): @@ -126,7 +112,7 @@ def testfunc(x): opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): testfunc(1000) - code, replace_code = testfunc.__code__, testfunc.__code__.replace() + code, replace_code = testfunc.__code__, testfunc.__code__.replace() self.assertEqual(code, replace_code) self.assertEqual(hash(code), hash(replace_code)) @@ -152,10 +138,10 @@ def get_opnames(ex): @requires_specialization -@unittest.skipUnless( - hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" -) +@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), + "Requires optimizer infrastructure") class TestExecutorInvalidation(unittest.TestCase): + def setUp(self): self.old = _testinternalcapi.get_optimizer() self.opt = _testinternalcapi.new_counter_optimizer() @@ -172,11 +158,10 @@ def test_invalidate_object(self): def f{n}(): for _ in range(1000): pass - """ - for n in range(5) + """ for n in range(5) ) exec(textwrap.dedent(func_src), ns, ns) - funcs = [ns[f"f{n}"] for n in range(5)] + funcs = [ ns[f'f{n}'] for n in range(5)] objects = [object() for _ in range(5)] for f in funcs: @@ -186,13 +171,13 @@ def f{n}(): # with an equal or lower index. for i, exe in enumerate(executors): self.assertTrue(exe.is_valid()) - for obj in objects[: i + 1]: + for obj in objects[:i+1]: _testinternalcapi.add_executor_dependency(exe, obj) self.assertTrue(exe.is_valid()) # Assert that the correct executors are invalidated # and check that nothing crashes when we invalidate # an executor mutliple times. - for i in (4, 3, 2, 1, 0): + for i in (4,3,2,1,0): _testinternalcapi.invalidate_executors(objects[i]) for exe in executors[i:]: self.assertFalse(exe.is_valid()) @@ -202,18 +187,12 @@ def f{n}(): def test_uop_optimizer_invalidation(self): # Generate a new function at each call ns = {} - exec( - textwrap.dedent( - """ + exec(textwrap.dedent(""" def f(): for i in range(1000): pass - """ - ), - ns, - ns, - ) - f = ns["f"] + """), ns, ns) + f = ns['f'] opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): f() @@ -227,7 +206,6 @@ def test_sys__clear_internal_caches(self): def f(): for _ in range(1000): pass - opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): f() @@ -241,13 +219,11 @@ def f(): @requires_specialization -@unittest.skipUnless( - hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" -) -@unittest.skipIf( - os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run." -) +@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), + "Requires optimizer infrastructure") +@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUops(unittest.TestCase): + def test_basic_loop(self): def testfunc(x): i = 0 @@ -267,9 +243,7 @@ def testfunc(x): def test_extended_arg(self): "Check EXTENDED_ARG handling in superblock creation" ns = {} - exec( - textwrap.dedent( - """ + exec(textwrap.dedent(""" def many_vars(): # 260 vars, so z9 should have index 259 a0 = a1 = a2 = a3 = a4 = a5 = a6 = a7 = a8 = a9 = 42 @@ -300,11 +274,7 @@ def many_vars(): z0 = z1 = z2 = z3 = z4 = z5 = z6 = z7 = z8 = z9 = 42 while z9 > 0: z9 = z9 - 1 - """ - ), - ns, - ns, - ) + """), ns, ns) many_vars = ns["many_vars"] opt = _testinternalcapi.new_uop_optimizer() @@ -315,12 +285,8 @@ def many_vars(): ex = get_first_executor(many_vars) self.assertIsNotNone(ex) - self.assertTrue( - any( - (opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) - for opcode, oparg, _, operand in list(ex) - ) - ) + self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) + for opcode, oparg, _, operand in list(ex))) def test_unspecialized_unpack(self): # An example of an unspecialized opcode @@ -526,8 +492,7 @@ def testfunc(it): def test_call_py_exact_args(self): def testfunc(n): def dummy(x): - return x + 1 - + return x+1 for i in range(n): dummy(i) @@ -562,10 +527,8 @@ def test_for_iter_tier_two(self): class MyIter: def __init__(self, n): self.n = n - def __iter__(self): return self - def __next__(self): self.n -= 1 if self.n < 0: @@ -576,7 +539,7 @@ def testfunc(n, m): x = 0 for i in range(m): for j in MyIter(n): - x += 1000 * i + j + x += 1000*i + j return x opt = _testinternalcapi.new_uop_optimizer() @@ -598,13 +561,13 @@ def testfunc(n): bits += 1 if i & 0x02: bits += 1 - if i & 0x04: + if i&0x04: bits += 1 - if i & 0x08: + if i&0x08: bits += 1 - if i & 0x10: + if i&0x10: bits += 1 - if i & 0x20: + if i&0x20: bits += 1 return bits @@ -616,19 +579,17 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) ops = list(iter_opnames(ex)) - # Since branch is 50/50 the trace could go either way. + #Since branch is 50/50 the trace could go either way. count = ops.count("_GUARD_IS_TRUE_POP") + ops.count("_GUARD_IS_FALSE_POP") self.assertLessEqual(count, 2) @requires_specialization -@unittest.skipUnless( - hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure" -) -@unittest.skipIf( - os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run." -) +@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), + "Requires optimizer infrastructure") +@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUopsOptimization(unittest.TestCase): + def _run_with_optimizer(self, testfunc, arg): res = None opt = _testinternalcapi.new_uop_optimizer() @@ -638,6 +599,7 @@ def _run_with_optimizer(self, testfunc, arg): ex = get_first_executor(testfunc) return res, ex + def test_int_type_propagation(self): def testfunc(loops): num = 0 @@ -650,19 +612,14 @@ def testfunc(loops): res, ex = self._run_with_optimizer(testfunc, 32) self.assertIsNotNone(ex) self.assertEqual(res, 63) - binop_count = [ - opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" - ] - guard_both_int_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" - ] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) def test_int_type_propagation_through_frame(self): def double(x): return x + x - def testfunc(loops): num = 0 for i in range(loops): @@ -679,19 +636,14 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [ - opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" - ] - guard_both_int_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" - ] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) def test_int_type_propagation_from_frame(self): def double(x): return x + x - def testfunc(loops): num = 0 for i in range(loops): @@ -708,12 +660,8 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [ - opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" - ] - guard_both_int_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" - ] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -730,16 +678,13 @@ def testfunc(loops): res, ex = self._run_with_optimizer(testfunc, 64) self.assertIsNotNone(ex) - binop_count = [ - opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT" - ] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] self.assertGreaterEqual(len(binop_count), 3) def test_call_py_exact_args(self): def testfunc(n): def dummy(x): - return x + 1 - + return x+1 for i in range(n): dummy(i) @@ -752,6 +697,7 @@ def dummy(x): def test_int_type_propagate_through_range(self): def testfunc(n): + for i in range(n): x = i + i return x @@ -764,6 +710,7 @@ def testfunc(n): def test_int_value_numbering(self): def testfunc(n): + y = 1 for i in range(n): x = y @@ -778,9 +725,7 @@ def testfunc(n): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_GUARD_BOTH_INT", uops) - guard_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" - ] + guard_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertEqual(len(guard_count), 1) def test_comprehension(self): @@ -796,7 +741,7 @@ def testfunc(n): def test_call_py_exact_args_disappearing(self): def dummy(x): - return x + 1 + return x+1 def testfunc(n): for i in range(n): @@ -811,7 +756,6 @@ def testfunc(n): def dummy(x): return x + 2 - testfunc(32) ex = get_first_executor(testfunc) @@ -821,10 +765,8 @@ def dummy(x): # This test is a little implementation specific. def test_promote_globals_to_constants(self): - result = script_helper.run_python_until_end( - "-c", - textwrap.dedent( - """ + + result = script_helper.run_python_until_end('-c', textwrap.dedent(""" import _testinternalcapi import opcode import _opcode @@ -856,9 +798,7 @@ def testfunc(n): uops = get_opnames(ex) assert "_LOAD_GLOBAL_BUILTINS" not in uops assert "_LOAD_CONST_INLINE_BORROW_WITH_NULL" in uops - """ - ), - ) + """)) self.assertEqual(result[0].rc, 0, result) def test_float_add_constant_propagation(self): @@ -875,9 +815,7 @@ def testfunc(n): self.assertAlmostEqual(res, 33.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" - ] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -897,9 +835,7 @@ def testfunc(n): self.assertAlmostEqual(res, -31.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" - ] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -919,9 +855,7 @@ def testfunc(n): self.assertAlmostEqual(res, 1.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" - ] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -941,9 +875,7 @@ def testfunc(n): self.assertEqual(res, "") self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_unicode_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE" - ] + guard_both_unicode_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE"] self.assertLessEqual(len(guard_both_unicode_count), 1) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) @@ -961,9 +893,7 @@ def testfunc(n): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" - ] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_FLOAT", uops) @@ -981,9 +911,7 @@ def testfunc(n): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_int_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" - ] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertLessEqual(len(guard_both_int_count), 1) self.assertIn("_COMPARE_OP_INT", uops) @@ -1001,12 +929,8 @@ def testfunc(n): self.assertEqual(res, 1) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_left_int_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_INT" - ] - guard_both_int_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT" - ] + guard_left_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertLessEqual(len(guard_left_int_count), 1) self.assertEqual(len(guard_both_int_count), 0) self.assertIn("_COMPARE_OP_INT", uops) @@ -1025,12 +949,8 @@ def testfunc(n): self.assertEqual(res, 1) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_left_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_FLOAT" - ] - guard_both_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT" - ] + guard_left_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_FLOAT"] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_left_float_count), 1) self.assertEqual(len(guard_both_float_count), 0) self.assertIn("_COMPARE_OP_FLOAT", uops) @@ -1049,27 +969,23 @@ def testfunc(n): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [ - opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE" - ] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE"] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_STR", uops) def test_type_inconsistency(self): ns = {} - src = textwrap.dedent( - """ + src = textwrap.dedent(""" def testfunc(n): for i in range(n): x = _test_global + _test_global - """ - ) + """) exec(src, ns, ns) - testfunc = ns["testfunc"] - ns["_test_global"] = 0 + testfunc = ns['testfunc'] + ns['_test_global'] = 0 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) - ns["_test_global"] = 1 + ns['_test_global'] = 1 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) @@ -1079,22 +995,20 @@ def testfunc(n): # This should result in no executor the second time. ns = {} exec(src, ns, ns) - testfunc = ns["testfunc"] - ns["_test_global"] = 0 + testfunc = ns['testfunc'] + ns['_test_global'] = 0 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) - ns["_test_global"] = 3.14 + ns['_test_global'] = 3.14 _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) def test_combine_stack_space_checks_sequential(self): def dummy12(x): return x - 1 - def dummy13(y): z = y + 2 return y, z - def testfunc(n): a = 0 for _ in range(n): @@ -1120,11 +1034,9 @@ def testfunc(n): def test_combine_stack_space_checks_nested(self): def dummy12(x): return x + 3 - def dummy15(y): z = dummy12(y) return y, z - def testfunc(n): a = 0 for _ in range(n): @@ -1143,24 +1055,22 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) # nested calls: 15 + 12 == 27 - largest_stack = _testinternalcapi.get_co_framesize( - dummy15.__code__ - ) + _testinternalcapi.get_co_framesize(dummy12.__code__) + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy15.__code__) + + _testinternalcapi.get_co_framesize(dummy12.__code__) + ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_several_calls(self): def dummy12(x): return x + 3 - def dummy13(y): z = y + 2 return y, z - def dummy18(y): z = dummy12(y) x, w = dummy13(z) return z, x, w - def testfunc(n): a = 0 for _ in range(n): @@ -1180,25 +1090,23 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) # max(12, 18 + max(12, 13)) == 31 - largest_stack = _testinternalcapi.get_co_framesize( - dummy18.__code__ - ) + _testinternalcapi.get_co_framesize(dummy13.__code__) + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy18.__code__) + + _testinternalcapi.get_co_framesize(dummy13.__code__) + ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_several_calls_different_order(self): # same as `several_calls` but with top-level calls reversed def dummy12(x): return x + 3 - def dummy13(y): z = y + 2 return y, z - def dummy18(y): z = dummy12(y) x, w = dummy13(z) return z, x, w - def testfunc(n): a = 0 for _ in range(n): @@ -1218,38 +1126,32 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) # max(18 + max(12, 13), 12) == 31 - largest_stack = _testinternalcapi.get_co_framesize( - dummy18.__code__ - ) + _testinternalcapi.get_co_framesize(dummy13.__code__) + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy18.__code__) + + _testinternalcapi.get_co_framesize(dummy13.__code__) + ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_complex(self): def dummy0(x): return x - def dummy1(x): return dummy0(x) - def dummy2(x): return dummy1(x) - def dummy3(x): return dummy0(x) - def dummy4(x): y = dummy0(x) return dummy3(y) - def dummy5(x): return dummy2(x) - def dummy6(x): y = dummy5(x) z = dummy0(y) return dummy4(z) - def testfunc(n): - a = 0 + a = 0; for _ in range(32): b = dummy5(1) c = dummy0(1) @@ -1269,13 +1171,15 @@ def testfunc(n): self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) largest_stack = ( - _testinternalcapi.get_co_framesize(dummy6.__code__) - + _testinternalcapi.get_co_framesize(dummy5.__code__) - + _testinternalcapi.get_co_framesize(dummy2.__code__) - + _testinternalcapi.get_co_framesize(dummy1.__code__) - + _testinternalcapi.get_co_framesize(dummy0.__code__) + _testinternalcapi.get_co_framesize(dummy6.__code__) + + _testinternalcapi.get_co_framesize(dummy5.__code__) + + _testinternalcapi.get_co_framesize(dummy2.__code__) + + _testinternalcapi.get_co_framesize(dummy1.__code__) + + _testinternalcapi.get_co_framesize(dummy0.__code__) + ) + self.assertIn( + ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands ) - self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) def test_combine_stack_space_checks_large_framesize(self): # Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is @@ -1287,19 +1191,14 @@ def test_combine_stack_space_checks_large_framesize(self): header = """ def dummy_large(a0): """ - body = "".join( - [ - f""" + body = "".join([f""" a{n+1} = a{n} + 1 - """ - for n in range(repetitions) - ] - ) + """ for n in range(repetitions)]) return_ = f""" return a{repetitions-1} """ exec(textwrap.dedent(header + body + return_), ns, ns) - dummy_large = ns["dummy_large"] + dummy_large = ns['dummy_large'] # this is something like: # @@ -1333,19 +1232,21 @@ def testfunc(n): # so we need to account for both possibilities self.assertIn(uop_names.count("_CHECK_STACK_SPACE"), [0, 1]) if uop_names.count("_CHECK_STACK_SPACE") == 0: - largest_stack = _testinternalcapi.get_co_framesize( - dummy15.__code__ - ) + _testinternalcapi.get_co_framesize(dummy_large.__code__) + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy15.__code__) + + _testinternalcapi.get_co_framesize(dummy_large.__code__) + ) else: largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__) - self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + self.assertIn( + ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands + ) def test_combine_stack_space_checks_recursion(self): def dummy15(x): while x > 0: return dummy15(x - 1) return 42 - def testfunc(n): a = 0 for _ in range(n): @@ -1369,28 +1270,20 @@ def test_many_nested(self): # overflow the trace_stack def dummy_a(x): return x - def dummy_b(x): return dummy_a(x) - def dummy_c(x): return dummy_b(x) - def dummy_d(x): return dummy_c(x) - def dummy_e(x): return dummy_d(x) - def dummy_f(x): return dummy_e(x) - def dummy_g(x): return dummy_f(x) - def dummy_h(x): return dummy_g(x) - def testfunc(n): a = 0 for _ in range(n): @@ -1404,12 +1297,10 @@ def testfunc(n): def test_return_generator(self): def gen(): yield None - def testfunc(n): for i in range(n): gen() return i - res, ex = self._run_with_optimizer(testfunc, 20) self.assertEqual(res, 19) self.assertIsNotNone(ex) @@ -1419,14 +1310,12 @@ def test_for_iter_gen(self): def gen(n): for i in range(n): yield i - def testfunc(n): g = gen(n) s = 0 for i in g: s += i return s - res, ex = self._run_with_optimizer(testfunc, 20) self.assertEqual(res, 190) self.assertIsNotNone(ex) From 5da1a40b71ddde388faa96d358e6efff67719d86 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 21 May 2024 17:39:11 -0400 Subject: [PATCH 16/39] Add additional test --- Lib/test/test_capi/test_opt.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b7fee5a2d220e..0cd9c49ed77f57 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1407,6 +1407,29 @@ def xxx(): guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 2) + def test_guard_type_version_executor_invalidated(self): + """ + Verify that the executor is invalided on a type change. + """ + + def thing(a): + x = 0 + for i in range(100): + x += a.attr + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + self.assertEqual(res, 200) + self.assertIsNotNone(ex) + self.assertEqual(list(iter_opnames(ex)).count("_GUARD_TYPE_VERSION"), 1) + self.assertTrue(ex.is_valid()) + Foo.attr = 0 + self.assertFalse(ex.is_valid()) + if __name__ == "__main__": unittest.main() From 84860ea1ea5e0c0cc43dfe45766f13e31b847828 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 21 May 2024 17:53:59 -0400 Subject: [PATCH 17/39] Remove printf and make slightly more threadsafe --- Objects/typeobject.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cacc49e243de59..48c1c45366e254 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1066,7 +1066,7 @@ _PyType_SetVersion(PyTypeObject *tp, unsigned int version) *slot = NULL; } #endif - tp->tp_version_tag = version; + FT_ATOMIC_STORE_UINT32_RELAXED(tp->tp_version_tag, version); #ifndef Py_GIL_DISABLED if (version != 0) { PyTypeObject **slot = @@ -1104,7 +1104,6 @@ _PyType_LookupByVersion(unsigned int version) PyTypeObject **slot = interp->types.type_version_cache + (version % TYPE_VERSION_CACHE_SIZE); - printf("slot: %p\n", slot); if (*slot && (*slot)->tp_version_tag == version) { return *slot; } @@ -1149,8 +1148,6 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) return 0; } _PyType_SetVersion(type, NEXT_GLOBAL_VERSION_TAG++); - // FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, - // NEXT_GLOBAL_VERSION_TAG++); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); } else { @@ -1160,8 +1157,6 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) return 0; } _PyType_SetVersion(type, NEXT_VERSION_TAG(interp)++); - // FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, - // NEXT_VERSION_TAG(interp)++); assert (type->tp_version_tag != 0); } From c12bb90d827b1988e3719cfdae1412758441e06f Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 21 May 2024 17:57:33 -0400 Subject: [PATCH 18/39] fix typo --- Include/internal/pycore_typeobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 394400ba4ba08d..ffea0064792f7c 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -112,7 +112,7 @@ struct types_state { PyMutex mutex; // Borrowed references to type objects whose - // func_version % TYPE_VERSION_CACHE_SIZE + // tp_version_tag % TYPE_VERSION_CACHE_SIZE // once was equal to the index in the table. // They are cleared when the type object is deallocated. PyTypeObject* type_version_cache[TYPE_VERSION_CACHE_SIZE]; From 53ab26294ced214a434bd254ac0e4388e78e6dc9 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Wed, 22 May 2024 10:14:02 -0400 Subject: [PATCH 19/39] Update type watcher ID in test bc we know have a default one at 0 --- Lib/test/test_capi/test_watchers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 90665a7561b316..709b5e1c4b716a 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -282,8 +282,10 @@ class C: pass self.watch(wid, C) with catch_unraisable_exception() as cm: C.foo = "bar" - self.assertEqual(cm.unraisable.err_msg, - f"Exception ignored in type watcher callback #0 for {C!r}") + self.assertEqual( + cm.unraisable.err_msg, + f"Exception ignored in type watcher callback #1 for {C!r}", + ) self.assertIs(cm.unraisable.object, None) self.assertEqual(str(cm.unraisable.exc_value), "boom!") self.assert_events([]) From 88e2e59d3225a59b7c52eb36d1230d03c696b326 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Wed, 22 May 2024 15:31:21 -0400 Subject: [PATCH 20/39] Fix panic and add test for it --- Lib/test/test_capi/test_opt.py | 21 +++++++++++++++++++++ Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0cd9c49ed77f57..98a33dca6bd221 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1430,6 +1430,27 @@ class Foo: Foo.attr = 0 self.assertFalse(ex.is_valid()) + def test_type_version_doesnt_segfault(self): + """ + Tests that setting a type version doesn't cause a segfault when later looking at the stack. + """ + + # Minimized from mdp.py benchmark + + class A: + def __init__(self): + self.attr = {} + + def method(self, arg): + self.attr[arg] = None + + def fn(a): + for _ in range(100): + (_ for _ in []) + (_ for _ in [a.method(None)]) + + fn(A()) + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index cec036905daab6..14d8efe938a4b5 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -588,7 +588,7 @@ dummy_func(void) { // Can determine statically, so we interleave the new locals // and make the current stack the new locals. // This also sets up for true call inlining. - if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + if (sym_is_null(self_or_null)) { localsplus_start = args; n_locals_already_filled = argcount; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3f08405b260a82..1467def4efa84d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1608,7 +1608,7 @@ // Can determine statically, so we interleave the new locals // and make the current stack the new locals. // This also sets up for true call inlining. - if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + if (sym_is_null(self_or_null)) { localsplus_start = args; n_locals_already_filled = argcount; } From 1eabca9433196da5f0190b3a4b119a4f9b082968 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Wed, 22 May 2024 15:31:28 -0400 Subject: [PATCH 21/39] add manual cast --- Python/optimizer_bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 14d8efe938a4b5..94ad69c438ae0c 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -125,7 +125,7 @@ dummy_func(void) { // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); - PyType_Watch(TYPE_WATCHER_ID, type); + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); _Py_BloomFilter_Add(dependencies, type); } From 1f7cc74bd29113c6759884956de83d90d82bf3c6 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Wed, 22 May 2024 17:39:56 -0400 Subject: [PATCH 22/39] Properly fixed the frame creation with discussion with Ken Jin --- Include/internal/pycore_optimizer.h | 6 +++--- Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 16 ++++++---------- Python/optimizer_cases.c.h | 13 ++++--------- Python/optimizer_symbols.c | 17 ++++++++++------- 5 files changed, 24 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 50c700a8b548c2..5bce857c6714e3 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -141,9 +141,9 @@ extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx); extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, - _Py_UopsSymbol **localsplus_start, - int n_locals_already_filled, - int curr_stackentries); + int curr_stackentries, + _Py_UopsSymbol **args, + int arg_len); extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e057c5bf02c5e6..75d1d9f6b2a794 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -409,7 +409,7 @@ optimize_uops( _PyUOpInstruction *corresponding_check_stack = NULL; _Py_uop_abstractcontext_init(ctx); - _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); + _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0); if (frame == NULL) { return -1; } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 94ad69c438ae0c..28bbdfe7b7300f 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -583,16 +583,12 @@ dummy_func(void) { argcount++; } - _Py_UopsSymbol **localsplus_start = ctx->n_consumed; - int n_locals_already_filled = 0; - // Can determine statically, so we interleave the new locals - // and make the current stack the new locals. - // This also sets up for true call inlining. - if (sym_is_null(self_or_null)) { - localsplus_start = args; - n_locals_already_filled = argcount; - } - new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); + if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); + + } } op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1467def4efa84d..fb5455154995fc 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1603,16 +1603,11 @@ args--; argcount++; } - _Py_UopsSymbol **localsplus_start = ctx->n_consumed; - int n_locals_already_filled = 0; - // Can determine statically, so we interleave the new locals - // and make the current stack the new locals. - // This also sets up for true call inlining. - if (sym_is_null(self_or_null)) { - localsplus_start = args; - n_locals_already_filled = argcount; + if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); } - new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; stack_pointer += -1 - oparg; break; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index f4b6b87e7555d6..2e66441e86271e 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -332,9 +332,9 @@ _Py_UOpsAbstractFrame * _Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, - _Py_UopsSymbol **localsplus_start, - int n_locals_already_filled, - int curr_stackentries) + int curr_stackentries, + _Py_UopsSymbol **args, + int arg_len) { assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; @@ -342,19 +342,22 @@ _Py_uop_frame_new( frame->stack_len = co->co_stacksize; frame->locals_len = co->co_nlocalsplus; - frame->locals = localsplus_start; + frame->locals = ctx->n_consumed; frame->stack = frame->locals + co->co_nlocalsplus; frame->stack_pointer = frame->stack + curr_stackentries; - ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + co->co_stacksize); if (ctx->n_consumed >= ctx->limit) { ctx->done = true; ctx->out_of_space = true; return NULL; } - // Initialize with the initial state of all local variables - for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + for (int i = 0; i < arg_len; i++) { + frame->locals[i] = args[i]; + } + + for (int i = arg_len; i < co->co_nlocalsplus; i++) { _Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx); frame->locals[i] = local; } From f892ea738072f73e25a44a814eedbcf22a548b72 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Thu, 23 May 2024 16:00:10 -0400 Subject: [PATCH 23/39] Change type_assign_specific_version_unsafe to use safe setting We need to move type_assign_specific_version_unsafe to a different module so we can use the private API to set the type version now, so it tracks cleanup --- Lib/test/test_type_cache.py | 3 ++- Modules/_testcapimodule.c | 17 ----------------- Modules/_testinternalcapi.c | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_type_cache.py b/Lib/test/test_type_cache.py index e90e315c808361..edaf076707ad8b 100644 --- a/Lib/test/test_type_cache.py +++ b/Lib/test/test_type_cache.py @@ -10,8 +10,9 @@ # Skip this test if the _testcapi module isn't available. _testcapi = import_helper.import_module("_testcapi") +_testinternalcapi = import_helper.import_module("_testinternalcapi") type_get_version = _testcapi.type_get_version -type_assign_specific_version_unsafe = _testcapi.type_assign_specific_version_unsafe +type_assign_specific_version_unsafe = _testinternalcapi.type_assign_specific_version_unsafe type_assign_version = _testcapi.type_assign_version type_modified = _testcapi.type_modified diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index f99ebf0dde4f9e..752b9693342da5 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2395,21 +2395,6 @@ type_modified(PyObject *self, PyObject *type) Py_RETURN_NONE; } -// Circumvents standard version assignment machinery - use with caution and only on -// short-lived heap types -static PyObject * -type_assign_specific_version_unsafe(PyObject *self, PyObject *args) -{ - PyTypeObject *type; - unsigned int version; - if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) { - return NULL; - } - assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE)); - type->tp_version_tag = version; - type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; - Py_RETURN_NONE; -} static PyObject * type_assign_version(PyObject *self, PyObject *type) @@ -3418,8 +3403,6 @@ static PyMethodDef TestMethods[] = { {"test_py_is_funcs", test_py_is_funcs, METH_NOARGS}, {"type_get_version", type_get_version, METH_O, PyDoc_STR("type->tp_version_tag")}, {"type_modified", type_modified, METH_O, PyDoc_STR("PyType_Modified")}, - {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS, - PyDoc_STR("forcefully assign type->tp_version_tag")}, {"type_assign_version", type_assign_version, METH_O, PyDoc_STR("PyUnstable_Type_AssignVersionTag")}, {"type_get_tp_bases", type_get_tp_bases, METH_O}, {"type_get_tp_mro", type_get_tp_mro, METH_O}, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index e209c7e05264f2..1d71e1dc0338ec 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2006,6 +2006,23 @@ has_inline_values(PyObject *self, PyObject *obj) Py_RETURN_FALSE; } + +// Circumvents standard version assignment machinery - use with caution and only on +// short-lived heap types +static PyObject * +type_assign_specific_version_unsafe(PyObject *self, PyObject *args) +{ + PyTypeObject *type; + unsigned int version; + if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) { + return NULL; + } + assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE)); + _PyType_SetVersion(type, version); + type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; + Py_RETURN_NONE; +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2088,6 +2105,9 @@ static PyMethodDef module_functions[] = { {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, {"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS}, {"has_inline_values", has_inline_values, METH_O}, + {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS, + PyDoc_STR("forcefully assign type->tp_version_tag")}, + #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif From 06ed18fcd146a07d117c4214f7b6d7f5c50068e1 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 23 May 2024 20:17:40 +0000 Subject: [PATCH 24/39] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst new file mode 100644 index 00000000000000..68f1ec1efa5751 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst @@ -0,0 +1,3 @@ +Eliminate type version guards in the tier two interpreter. + +Note that setting the ``tp_version_tag`` manually (which has never been supported) may result in crashes. From 134357120183fd3985195cc8d992b0bbed81aa91 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 23 May 2024 13:29:30 -0700 Subject: [PATCH 25/39] Fix more places where tp_version_tag is being set manually --- Objects/typeobject.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 48c1c45366e254..1965c53afafd92 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -895,7 +895,7 @@ type_modification_starting_unlocked(PyTypeObject *type) } /* 0 is not a valid version tag */ - _Py_atomic_store_uint32_release(&type->tp_version_tag, 0); + _PyType_SetVersion(type, 0); } #endif @@ -959,7 +959,7 @@ type_modified_unlocked(PyTypeObject *type) } type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */ + _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */ if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the // comment on struct _specialization_cache): @@ -1036,7 +1036,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */ + _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */ if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the // comment on struct _specialization_cache): @@ -5661,7 +5661,7 @@ _PyStaticType_Dealloc(PyInterpreterState *interp, PyTypeObject *type) if (_Py_IsMainInterpreter(interp)) { type->tp_flags &= ~Py_TPFLAGS_READY; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; - type->tp_version_tag = 0; + _PyType_SetVersion(type, 0); } _PyStaticType_ClearWeakRefs(interp, type); @@ -8244,7 +8244,7 @@ _PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); - self->tp_version_tag = NEXT_GLOBAL_VERSION_TAG++; + _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++); self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; } else { From 37aeea5c87178314455af42ea39570b9bbc5baa7 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 23 May 2024 14:03:04 -0700 Subject: [PATCH 26/39] _PyType_ClearCodeByVersion isn't necessary --- Include/internal/pycore_typeobject.h | 1 - Objects/typeobject.c | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index ffea0064792f7c..8c24fbbedee842 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -212,7 +212,6 @@ extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp); PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version); -void _PyType_ClearCodeByVersion(unsigned int version); PyTypeObject *_PyType_LookupByVersion(unsigned int version); #ifdef __cplusplus diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 86508ece58be7a..4f443bc883b1ea 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1077,23 +1077,6 @@ _PyType_SetVersion(PyTypeObject *tp, unsigned int version) #endif } -void -_PyType_ClearCodeByVersion(unsigned int version) -{ -#ifndef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyTypeObject **slot = - interp->types.type_version_cache - + (version % TYPE_VERSION_CACHE_SIZE); - if (*slot) { - assert(PyType_Check(*slot)); - if ((*slot)->tp_version_tag == version) { - *slot = NULL; - } - } -#endif -} - PyTypeObject * _PyType_LookupByVersion(unsigned int version) { @@ -5786,7 +5769,6 @@ type_dealloc(PyObject *self) _PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE); _PyObject_GC_UNTRACK(type); - _PyType_ClearCodeByVersion(type->tp_version_tag); type_dealloc_common(type); // PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0 From c4436eb21c88d92b9899edc80f35d485363fe549 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 10:41:22 -0400 Subject: [PATCH 27/39] Rename typ_version to type_version suggested by @brandtbucher --- Include/internal/pycore_optimizer.h | 2 +- Python/optimizer_symbols.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 5bce857c6714e3..05a15ab197d159 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -33,7 +33,7 @@ struct _Py_UopsSymbol { int flags; // 0 bits: Top; 2 or more bits: Bottom PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) - int32_t typ_version; // currently stores type version + int32_t type_version; // currently stores type version }; #define UOP_FORMAT_TARGET 0 diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 2e66441e86271e..f42d773ade4c45 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -53,7 +53,7 @@ static _Py_UopsSymbol NO_SPACE_SYMBOL = { .flags = IS_NULL | NOT_NULL | NO_SPACE, .typ = NULL, .const_val = NULL, - .typ_version = 0, + .type_version = 0, }; _Py_UopsSymbol * @@ -77,7 +77,7 @@ sym_new(_Py_UOpsContext *ctx) self->flags = 0; self->typ = NULL; self->const_val = NULL; - self->typ_version = 0; + self->type_version = 0; return self; } @@ -157,7 +157,7 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version) { - sym->typ_version = version; + sym->type_version = version; } void @@ -267,7 +267,7 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym) int32_t _Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) { - return sym->typ_version; + return sym->type_version; } bool From 0d0c647cb781e23156a5a70b7e288043c16c6bc7 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 10:48:01 -0400 Subject: [PATCH 28/39] Change type of type_version to unsigned int to match `typ_version_tag` suggested by @brandtbucher --- Include/internal/pycore_optimizer.h | 6 +++--- Python/optimizer_symbols.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 05a15ab197d159..b62fc7d028411d 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -33,7 +33,7 @@ struct _Py_UopsSymbol { int flags; // 0 bits: Top; 2 or more bits: Bottom PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) - int32_t type_version; // currently stores type version + unsigned int type_version; // currently stores type version }; #define UOP_FORMAT_TARGET 0 @@ -124,11 +124,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); -extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); -extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version); +extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index f42d773ade4c45..5ce88a433440a9 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -155,7 +155,7 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } void -_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version) +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version) { sym->type_version = version; } @@ -264,7 +264,7 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym) return sym->typ; } -int32_t +unsigned int _Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) { return sym->type_version; @@ -287,7 +287,7 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) } bool -_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version) +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version) { return _Py_uop_sym_get_type_version(sym) == version; } From 9b80acbd3f4b7abd31158f7a76b1e731a18e1320 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 10:50:28 -0400 Subject: [PATCH 29/39] C formatting fix suggested by @brandtbucher --- Include/internal/pycore_typeobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 8c24fbbedee842..e1a488155817e2 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -115,7 +115,7 @@ struct types_state { // tp_version_tag % TYPE_VERSION_CACHE_SIZE // once was equal to the index in the table. // They are cleared when the type object is deallocated. - PyTypeObject* type_version_cache[TYPE_VERSION_CACHE_SIZE]; + PyTypeObject *type_version_cache[TYPE_VERSION_CACHE_SIZE]; }; From 6ddb1e7a7cc682179eb00b9752bd142ad8bfc85d Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 10:51:39 -0400 Subject: [PATCH 30/39] Rephrase test names suggested by @brandtbucher --- Lib/test/test_capi/test_opt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 98a33dca6bd221..0791b52a0ec897 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1351,9 +1351,9 @@ class Foo: guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 1) - def test_guard_type_version_removed_exiting(self): + def test_guard_type_version_removed_inlined(self): """ - Verify that the guard type version if we have an exiting function + Verify that the guard type version if we have an inlined function """ def fn(): @@ -1377,7 +1377,7 @@ class Foo: guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 1) - def test_guard_type_version_not_remove(self): + def test_guard_type_version_not_removed(self): """ Verify that the guard type version is not removed if we Modify the class """ From 9d3f914606fb12d0188f465d43cb2efc69e48dec Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 10:53:14 -0400 Subject: [PATCH 31/39] Rephrase comments suggested by @brandtbucher --- Objects/typeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index eba72bd0047370..fd9853c28dc9f0 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1048,9 +1048,9 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { The Tier 2 interpreter requires looking up the type object by the type version, so it can install watchers to understand when they change. -So we add a global cache from type version to weak references of type objects. +So we add a global cache from type version to borrowed references of type objects. -This is similar to how the cache used with function objects and versions. +This is similar to func_version_cache. */ void From 3e0a1e78ff88f89ea60c43080c28beddafd01467 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 11:00:15 -0400 Subject: [PATCH 32/39] Make existing inline test clear As per comment by @brandtbucher: > One that performs the mutation partway through execution like this. However, maybe change Foo.attr to 2 so we know it keeps running, but can see the effect of the changed value? I'd also make xxx just another class Bar, since I'm worried that our function watcher branch might invalidate this or something weird since it's just a function. --- Lib/test/test_capi/test_opt.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0791b52a0ec897..699552ce94b486 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1379,7 +1379,7 @@ class Foo: def test_guard_type_version_not_removed(self): """ - Verify that the guard type version is not removed if we Modify the class + Verify that the guard type version is not removed if we modify the class """ def thing(a): @@ -1389,21 +1389,22 @@ def thing(a): # for the first 90 iterations we set the attribute on this dummy function which shouldn't # trigger the type watcher # then after 90 it should trigger it and stop optimizing - setattr((Foo, xxx)[i < 90], "attr", 0) + # Note that the code needs to be in this weird form so it's optimized inline without any control flow + setattr((Foo, Bar)[i < 90], "attr", 2) x += a.attr return x class Foo: attr = 1 - def xxx(): + class Bar: pass res, ex = self._run_with_optimizer(thing, Foo()) opnames = list(iter_opnames(ex)) self.assertIsNotNone(ex) - self.assertEqual(res, 181) + self.assertEqual(res, 219) guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 2) From 3420a42543500d8de37047a523e52bc8bfe63ade Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 11:30:10 -0400 Subject: [PATCH 33/39] Add a test for escaping behavior, but allow it to fail for now suggested by @brandtbucher --- Lib/test/test_capi/test_opt.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 699552ce94b486..fc6d8b0a3f01d2 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1408,6 +1408,34 @@ class Bar: guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") self.assertEqual(guard_type_version_count, 2) + + @unittest.expectedFailure + def test_guard_type_version_not_removed_escaping(self): + """ + Verify that the guard type version is not removed if have an escaping function + """ + + def thing(a): + x = 0 + for i in range(100): + x += a.attr + # eval should be escaping and so should cause optimization to stop and preserve both type versions + eval("None") + x += a.attr + return x + + class Foo: + attr = 1 + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + # Note: This will actually be 1 for noe + # https://github.com/python/cpython/pull/119365#discussion_r1626220129 + self.assertEqual(guard_type_version_count, 2) + + def test_guard_type_version_executor_invalidated(self): """ Verify that the executor is invalided on a type change. From 6886b36fd8ba281a74e0c174f08aca59286764b6 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 11:32:58 -0400 Subject: [PATCH 34/39] Verify that expected type version is non zero suggested by @brandtbucher --- Python/optimizer_bytecodes.c | 1 + Python/optimizer_cases.c.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index bd69f69c909819..e28963eab475d3 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -116,6 +116,7 @@ dummy_func(void) { } op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + assert(type_version); if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); } else { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 82704fa82370d8..afd2eaede488a1 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -938,6 +938,7 @@ _Py_UopsSymbol *owner; owner = stack_pointer[-1]; uint32_t type_version = (uint32_t)this_instr->operand; + assert(type_version); if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); } else { From 376bb5e75769f8441b172a9f85c427d3a4418642 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 12:11:09 -0400 Subject: [PATCH 35/39] Fix typo --- Python/optimizer_bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e28963eab475d3..fc0a0dbaeb9fab 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -120,7 +120,7 @@ dummy_func(void) { if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); } else { - // add watcher so that whenever the type changes we invalide this + // add watcher so that whenever the type changes we invalidate this PyTypeObject *type = _PyType_LookupByVersion(type_version); // if the type is null, it was not found in the cache (there was a conflict) // with the key, in which case we can't trust the version From e025cc7d51b5168f0ecb3341e027ec0f8c47e127 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 12:20:41 -0400 Subject: [PATCH 36/39] Assert pytype watch never errors --- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index fc0a0dbaeb9fab..43ddbfe9b486ab 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -126,7 +126,8 @@ dummy_func(void) { // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); - PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + // should never fail because we reserved a slot + assert(PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type) == 0); _Py_BloomFilter_Add(dependencies, type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index afd2eaede488a1..a08020f0e1d128 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -942,13 +942,13 @@ if (sym_matches_type_version(owner, type_version)) { REPLACE_OP(this_instr, _NOP, 0, 0); } else { - // add watcher so that whenever the type changes we invalide this + // add watcher so that whenever the type changes we invalidate this PyTypeObject *type = _PyType_LookupByVersion(type_version); // if the type is null, it was not found in the cache (there was a conflict) // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); - PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + assert(PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type) == 0); _Py_BloomFilter_Add(dependencies, type); } } From 88f2d895b5311a65964c810c1db02fe9976f7291 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 13:12:23 -0400 Subject: [PATCH 37/39] Fix assertion --- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 43ddbfe9b486ab..c273d73c58f050 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -126,8 +126,9 @@ dummy_func(void) { // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); + int res = PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); // should never fail because we reserved a slot - assert(PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type) == 0); + assert(res == 0); _Py_BloomFilter_Add(dependencies, type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a08020f0e1d128..1e86ad769fb195 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -948,7 +948,9 @@ // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); - assert(PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type) == 0); + int res = PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + // should never fail because we reserved a slot + assert(res == 0); _Py_BloomFilter_Add(dependencies, type); } } From 82bc177e9b116b4e816957a58105ebbd3ac21f81 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Tue, 4 Jun 2024 15:34:40 -0400 Subject: [PATCH 38/39] Remove unnecessary assert --- Python/optimizer_bytecodes.c | 4 +--- Python/optimizer_cases.c.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c273d73c58f050..fc0a0dbaeb9fab 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -126,9 +126,7 @@ dummy_func(void) { // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); - int res = PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); - // should never fail because we reserved a slot - assert(res == 0); + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); _Py_BloomFilter_Add(dependencies, type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1e86ad769fb195..eee33989105f5a 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -948,9 +948,7 @@ // with the key, in which case we can't trust the version if (type) { sym_set_type_version(owner, type_version); - int res = PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); - // should never fail because we reserved a slot - assert(res == 0); + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); _Py_BloomFilter_Add(dependencies, type); } } From bdf0a4a3f8fbfed3831b03ff5d1bf028c3cb0d9f Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Fri, 7 Jun 2024 09:50:17 -0400 Subject: [PATCH 39/39] Handle if type versions are different https://github.com/python/cpython/pull/119365#discussion_r1630593619 --- Include/internal/pycore_optimizer.h | 2 +- Python/optimizer_bytecodes.c | 11 ++++++++--- Python/optimizer_cases.c.h | 11 ++++++++--- Python/optimizer_symbols.c | 8 +++++++- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index b62fc7d028411d..fd7833fd231299 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -128,7 +128,7 @@ extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int v extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); -extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version); +extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index fc0a0dbaeb9fab..e6fb85a90603eb 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -125,9 +125,14 @@ dummy_func(void) { // if the type is null, it was not found in the cache (there was a conflict) // with the key, in which case we can't trust the version if (type) { - sym_set_type_version(owner, type_version); - PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); - _Py_BloomFilter_Add(dependencies, type); + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index eee33989105f5a..78ef02016e026b 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -947,9 +947,14 @@ // if the type is null, it was not found in the cache (there was a conflict) // with the key, in which case we can't trust the version if (type) { - sym_set_type_version(owner, type_version); - PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); - _Py_BloomFilter_Add(dependencies, type); + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } } } break; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 5ce88a433440a9..f3d4078bf1a890 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -154,10 +154,16 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } } -void +bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version) { + // if the type version was already set, then it must be different and we should set it to bottom + if (sym->type_version) { + sym_set_bottom(ctx, sym); + return false; + } sym->type_version = version; + return true; } void