diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 76123987ac99f5..0aec3d7250d914 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -33,6 +33,8 @@ struct _Py_UopsSymbol { int flags; // 0 bits: Top; 2 or more bits: Bottom PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) + int32_t typ_version; // currently stores type version + int typ_version_offset; // bytecode offset where the last type version was set }; #define UOP_FORMAT_TARGET 0 @@ -96,6 +98,7 @@ struct _Py_UOpsContext { char done; char out_of_space; bool contradiction; + int64_t latest_escape_offset; // The current "executing" frame. _Py_UOpsAbstractFrame *frame; _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; @@ -123,9 +126,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version, int offset); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); +extern void _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version, int offset); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0491ff9b84d486..07ec1f0ac2f22f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1333,6 +1333,47 @@ def test_modified_local_is_seen_by_optimized_code(self): self.assertIs(type(s), float) self.assertEqual(s, 1024.0) + def test_guard_type_version_removed(self): + def thing(a): + x = 0 + for _ in range(100): + x += a.attr + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 1) + + def test_guard_type_version_not_removed(self): + def fn(): + pass + + def thing(a): + x = 0 + for _ in range(100): + x += a.attr + fn() + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 2) + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e5d3793bd4d204..75a8aa8f8573fe 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -310,9 +310,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_has_type _Py_uop_sym_has_type #define sym_get_type _Py_uop_sym_get_type #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION, OFFSET) _Py_uop_sym_set_type_version(ctx, SYM, VERSION, OFFSET) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness @@ -404,6 +406,7 @@ optimize_uops( ctx->done = false; ctx->out_of_space = false; ctx->contradiction = false; + ctx->latest_escape_offset = 0; _PyUOpInstruction *this_instr = NULL; for (int i = 0; !ctx->done; i++) { @@ -414,6 +417,10 @@ optimize_uops( opcode = this_instr->opcode; _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + ctx->latest_escape_offset = i; // i is the offset we're looping on + } + #ifdef Py_DEBUG if (get_lltrace() >= 3) { printf("%4d abs: ", (int)(this_instr - trace)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e5c982befb2411..c679f6b5674a59 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_get_type _Py_uop_sym_get_type #define sym_has_type _Py_uop_sym_has_type #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION, OFFSET) _Py_uop_sym_set_type_version(ctx, SYM, VERSION, OFFSET) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new @@ -113,6 +115,13 @@ dummy_func(void) { sym_set_type(right, &PyLong_Type); } + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type_version(owner, type_version, i); + } + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { if (sym_matches_type(left, &PyFloat_Type)) { if (sym_matches_type(right, &PyFloat_Type)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index cd4431d55ad908..c4c820ba1d1e41 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -935,6 +935,13 @@ } case _GUARD_TYPE_VERSION: { + _Py_UopsSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand; + if (sym_matches_type_version(owner, type_version, ctx->latest_escape_offset)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + sym_set_type_version(owner, type_version, i); break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e546eef306eeca..47ccc908fcccdb 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -52,7 +52,9 @@ static inline int get_lltrace(void) { static _Py_UopsSymbol NO_SPACE_SYMBOL = { .flags = IS_NULL | NOT_NULL | NO_SPACE, .typ = NULL, - .const_val = NULL + .const_val = NULL, + .typ_version = 0, + .typ_version_offset = 0, }; _Py_UopsSymbol * @@ -76,6 +78,8 @@ sym_new(_Py_UOpsContext *ctx) self->flags = 0; self->typ = NULL; self->const_val = NULL; + self->typ_version = 0; + self->typ_version_offset = 0; return self; } @@ -152,6 +156,13 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } } +void +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, int32_t version, int offset) +{ + sym->typ_version = version; + sym->typ_version_offset = offset; +} + void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val) { @@ -256,6 +267,18 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym) return sym->typ; } +int32_t +_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) +{ + return sym->typ_version; +} + +int +_Py_uop_sym_get_type_version_offset(_Py_UopsSymbol *sym) +{ + return sym->typ_version_offset; +} + bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -272,6 +295,14 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) return _Py_uop_sym_get_type(sym) == typ; } +bool +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, int32_t version, int offset) +{ + return _Py_uop_sym_get_type_version(sym) == version + && _Py_uop_sym_get_type_version_offset(sym) > offset; +} + + int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym) {