diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..358c2f44855c22 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1322,7 +1322,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 } } }, [BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 } } }, - [BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, + [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } }, [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 } } }, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0a85b19e06f158..eb1691775488cc 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -5,6 +5,8 @@ import unittest import gc import os +import random +import string import _opcode @@ -2362,6 +2364,24 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_INT", uops) self.assertNotIn("_GUARD_NOS_INT", uops) + def test_store_fast_pop_top_specialize_unicode(self): + def random_str(n): + return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(n)) + def testfunc(args): + a, b, n = args + c = '' + for _ in range(n): + c += a + b + return c + + r0, r1 = random_str(32), random_str(32) + res, ex = self._run_with_optimizer(testfunc, (r0, r1, TIER2_THRESHOLD)) + self.assertAlmostEqual(res, TIER2_THRESHOLD * (r0 + r1)) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_POP_TOP_NOP", uops) + self.assertNotIn("_POP_TOP_UNICODE", uops) + def test_attr_promotion_failure(self): # We're not testing for any specific uops here, just # testing it doesn't crash. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 27524d86355b9c..3b5411f8615d62 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1671,7 +1671,7 @@ def func(): INTERPRETER_FRAME = '9PihcP' else: INTERPRETER_FRAME = '9PhcP' - check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'P')) + check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'PP')) # function def func(): pass check(func, size('16Pi')) @@ -1688,7 +1688,7 @@ def bar(cls): check(bar, size('PP')) # generator def get_gen(): yield 1 - check(get_gen(), size('6P4c' + INTERPRETER_FRAME + 'P')) + check(get_gen(), size('6P4c' + INTERPRETER_FRAME + 'PP')) # iterator check(iter('abc'), size('lP')) # callable-iterator diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index dbeedb7ffe0ce6..4e8d43d62c1e3e 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,6 +1,6 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { - 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, + 227,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0, 0,0,0,0,0,243,184,0,0,0,128,0,94,0,82,1, 73,0,116,0,94,0,82,1,73,1,116,1,93,2,33,0, 82,2,52,1,0,0,0,0,0,0,31,0,93,2,33,0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 535e552e047475..ae7816901f55d4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -772,7 +772,7 @@ dummy_func( macro(BINARY_OP_SUBTRACT_FLOAT) = _GUARD_TOS_FLOAT + _GUARD_NOS_FLOAT + unused/5 + _BINARY_OP_SUBTRACT_FLOAT; - pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyUnicode_CheckExact(left_o)); @@ -780,15 +780,15 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL); + l = left; + r = right; res = PyStackRef_FromPyObjectSteal(res_o); } macro(BINARY_OP_ADD_UNICODE) = - _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_ADD_UNICODE; + _GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_ADD_UNICODE + _POP_TOP_UNICODE + _POP_TOP_UNICODE; // This is a subtle one. It's a super-instruction for // BINARY_OP_ADD_UNICODE followed by STORE_FAST diff --git a/Python/compile.c b/Python/compile.c index c04391e682f9ac..d05a67421a5918 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1431,6 +1431,10 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache, &optimized_instrs) < 0) { goto error; } + /* Reserve an extra word on the stack to ensure there is space for uops to + pass at least one item on the stack to a subsequent uop. + */ + stackdepth++; /** Assembly **/ co = _PyAssemble_MakeCodeObject(&u->u_metadata, const_cache, consts, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 46fc164a5b3bc2..f5c628a6dbeb27 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1197,6 +1197,8 @@ _PyStackRef right; _PyStackRef left; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); @@ -1205,16 +1207,18 @@ assert(PyUnicode_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); if (res_o == NULL) { stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); JUMP_TO_ERROR(); } + l = left; + r = right; res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..d014f8b5edf327 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -215,6 +215,8 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; // _GUARD_TOS_UNICODE { value = stack_pointer[-1]; @@ -246,13 +248,25 @@ assert(PyUnicode_CheckExact(right_o)); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); if (res_o == NULL) { JUMP_TO_LABEL(pop_2_error); } + l = left; + r = right; res = PyStackRef_FromPyObjectSteal(res_o); } + // _POP_TOP_UNICODE + { + value = r; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + // _POP_TOP_UNICODE + { + value = l; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8fbaf232ffa2e..7dc15fe7abdcad 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -318,7 +318,7 @@ dummy_func(void) { } } - op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { + op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) { if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); @@ -332,6 +332,8 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyUnicode_Type); } + l = left; + r = right; } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { @@ -561,6 +563,13 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP_UNICODE, (value -- )) { + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value))) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + } + op(_POP_TOP, (value -- )) { PyTypeObject *typ = sym_get_type(value); if (PyJitRef_IsBorrowed(value) || diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1e581afadc9569..cc2b5ed32ae910 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -141,6 +141,12 @@ } case _POP_TOP_UNICODE: { + JitOptRef value; + value = stack_pointer[-1]; + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value))) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -558,6 +564,8 @@ JitOptRef right; JitOptRef left; JitOptRef res; + JitOptRef l; + JitOptRef r; right = stack_pointer[-1]; left = stack_pointer[-2]; if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { @@ -577,7 +585,13 @@ res = sym_new_type(ctx, &PyUnicode_Type); stack_pointer += -1; } + l = left; + r = right; stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); break; }