diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..0c9c2c0ef7264a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1368,7 +1368,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_PY_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_STR_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_TUPLE_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_TYPE_1] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, + [CALL_TYPE_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, OPARG_SIMPLE, 0 } } }, [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { _CHECK_EXC_MATCH, OPARG_SIMPLE, 0 } } }, [COMPARE_OP] = { .nuops = 1, .uops = { { _COMPARE_OP, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index a9432401525ebb..f2cf4668e2f3da 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -336,24 +336,25 @@ extern "C" { #define _SWAP 537 #define _SWAP_2 538 #define _SWAP_3 539 -#define _TIER2_RESUME_CHECK 540 -#define _TO_BOOL 541 +#define _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW 540 +#define _TIER2_RESUME_CHECK 541 +#define _TO_BOOL 542 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 542 +#define _TO_BOOL_LIST 543 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 543 +#define _TO_BOOL_STR 544 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 544 -#define _UNPACK_SEQUENCE_LIST 545 -#define _UNPACK_SEQUENCE_TUPLE 546 -#define _UNPACK_SEQUENCE_TWO_TUPLE 547 +#define _UNPACK_SEQUENCE 545 +#define _UNPACK_SEQUENCE_LIST 546 +#define _UNPACK_SEQUENCE_TUPLE 547 +#define _UNPACK_SEQUENCE_TWO_TUPLE 548 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 547 +#define MAX_UOP_ID 548 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 52cbc2fffe484e..05844715d72c9f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -261,7 +261,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_NOS_NOT_NULL] = HAS_EXIT_FLAG, [_GUARD_THIRD_NULL] = HAS_DEOPT_FLAG, [_GUARD_CALLABLE_TYPE_1] = HAS_DEOPT_FLAG, - [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_CALL_TYPE_1] = HAS_ARG_FLAG, [_GUARD_CALLABLE_STR_1] = HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_TUPLE_1] = HAS_DEOPT_FLAG, @@ -327,6 +327,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_CALL_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, [_LOAD_CONST_UNDER_INLINE] = 0, [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, @@ -648,6 +649,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SWAP] = "_SWAP", [_SWAP_2] = "_SWAP_2", [_SWAP_3] = "_SWAP_3", + [_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = "_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW", [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK", [_TO_BOOL] = "_TO_BOOL", [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL", @@ -1283,6 +1285,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 3; case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: return 4; + case _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW: + return 3; case _LOAD_CONST_UNDER_INLINE: return 1; case _LOAD_CONST_UNDER_INLINE_BORROW: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0a85b19e06f158..4831206cecdfd1 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1819,9 +1819,10 @@ def testfunc(n): self.assertIsNotNone(ex) uops = get_opnames(ex) # When the result of type(...) is known, _CALL_TYPE_1 is replaced with - # _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW which is optimized away in + # _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW which is optimized away in # remove_unneeded_uops. self.assertNotIn("_CALL_TYPE_1", uops) + self.assertNotIn("_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) @@ -1841,6 +1842,21 @@ def testfunc(n): uops = get_opnames(ex) self.assertNotIn("_GUARD_IS_NOT_NONE_POP", uops) + def test_call_type_1_pop_top(self): + def testfunc(n): + x = 0 + for _ in range(n): + foo = eval('42') + x += type(foo) is int + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_TYPE_1", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_call_str_1(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-20-54-15.gh-issue-134584.ZNcziF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-20-54-15.gh-issue-134584.ZNcziF.rst new file mode 100644 index 00000000000000..520690bc25c4cf --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-20-54-15.gh-issue-134584.ZNcziF.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``_CALL_TYPE_1``. Patch by Tomas Roun diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 535e552e047475..54ac0cce9a980d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4030,17 +4030,14 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyType_Type); } - op(_CALL_TYPE_1, (callable, null, arg -- res)) { + op(_CALL_TYPE_1, (callable, null, arg -- res, a)) { PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); - DEAD(null); - DEAD(callable); - (void)callable; // Silence compiler warnings about unused variables - (void)null; STAT_INC(CALL, hit); + INPUTS_DEAD(); res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); - PyStackRef_CLOSE(arg); + a = arg; } macro(CALL_TYPE_1) = @@ -4048,7 +4045,8 @@ dummy_func( unused/2 + _GUARD_NOS_NULL + _GUARD_CALLABLE_TYPE_1 + - _CALL_TYPE_1; + _CALL_TYPE_1 + + POP_TOP; op(_GUARD_CALLABLE_STR_1, (callable, unused, unused -- callable, unused, unused)) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); @@ -5360,6 +5358,17 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(ptr); } + tier2 op(_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, arg -- value, a)) { + PyStackRef_CLOSE(arg); + (void)null; // Silence compiler warnings about unused variables + (void)callable; + DEAD(null); + DEAD(callable); + assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(callable))); + value = PyStackRef_FromPyObjectBorrow(ptr); + a = arg; + } + tier2 op(_LOAD_CONST_UNDER_INLINE, (ptr/4, old -- value, new)) { new = old; DEAD(old); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 46fc164a5b3bc2..5617cbc4b6b750 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5408,25 +5408,19 @@ case _CALL_TYPE_1: { _PyStackRef arg; - _PyStackRef null; - _PyStackRef callable; _PyStackRef res; + _PyStackRef a; oparg = CURRENT_OPARG(); arg = stack_pointer[-1]; - null = stack_pointer[-2]; - callable = stack_pointer[-3]; PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); - (void)callable; - (void)null; STAT_INC(CALL, hit); res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); + a = arg; stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = a; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); - stack_pointer = _PyFrame_GetStackPointer(frame); break; } @@ -7402,6 +7396,31 @@ break; } + case _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + _PyStackRef arg; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + _PyStackRef a; + arg = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(arg); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + (void)callable; + assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(callable))); + value = PyStackRef_FromPyObjectBorrow(ptr); + a = arg; + stack_pointer[-2] = value; + stack_pointer[-1] = a; + break; + } + case _LOAD_CONST_UNDER_INLINE: { _PyStackRef old; _PyStackRef value; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..bb21a9c6c87d79 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4539,6 +4539,8 @@ _PyStackRef callable; _PyStackRef arg; _PyStackRef res; + _PyStackRef a; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_NOS_NULL @@ -4565,15 +4567,18 @@ arg = stack_pointer[-1]; PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); - (void)callable; - (void)null; STAT_INC(CALL, hit); res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); + a = arg; + } + // _POP_TOP + { + value = a; stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 145a8c118d3612..fab6ad9486903c 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -566,7 +566,9 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_ONE, [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_TWO, + [_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, }; const bool op_skip[MAX_UOP_ID + 1] = { @@ -578,6 +580,10 @@ const bool op_skip[MAX_UOP_ID + 1] = { const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_TOP] = _NOP, + [_POP_TOP_NOP] = _NOP, + [_POP_TOP_INT] = _NOP, + [_POP_TOP_FLOAT] = _NOP, + [_POP_TOP_UNICODE] = _NOP, [_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, [_POP_TWO] = _POP_TOP, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8fbaf232ffa2e..87ec40ad56d45d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -561,6 +561,11 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, arg -- value, a)) { + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + a = arg; + } + op(_POP_TOP, (value -- )) { PyTypeObject *typ = sym_get_type(value); if (PyJitRef_IsBorrowed(value) || @@ -969,16 +974,17 @@ dummy_func(void) { next = sym_new_type(ctx, &PyLong_Type); } - op(_CALL_TYPE_1, (unused, unused, arg -- res)) { + op(_CALL_TYPE_1, (unused, unused, arg -- res, a)) { PyObject* type = (PyObject *)sym_get_type(arg); if (type) { res = sym_new_const(ctx, type); - REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, + REPLACE_OP(this_instr, _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)type); } else { res = sym_new_not_null(ctx); } + a = arg; } op(_CALL_STR_1, (unused, unused, arg -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1e581afadc9569..8753d89633ba75 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2133,18 +2133,21 @@ case _CALL_TYPE_1: { JitOptRef arg; JitOptRef res; + JitOptRef a; arg = stack_pointer[-1]; PyObject* type = (PyObject *)sym_get_type(arg); if (type) { res = sym_new_const(ctx, type); - REPLACE_OP(this_instr, _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, + REPLACE_OP(this_instr, _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)type); } else { res = sym_new_not_null(ctx); } + a = arg; stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = a; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } @@ -2791,6 +2794,21 @@ break; } + case _SWAP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + JitOptRef arg; + JitOptRef value; + JitOptRef a; + arg = stack_pointer[-1]; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + a = arg; + stack_pointer[-3] = value; + stack_pointer[-2] = a; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_CONST_UNDER_INLINE: { JitOptRef value; JitOptRef new;