From a2817753a9d4a5bfc8873d60a72309efa731cb4a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 7 Jul 2025 16:56:11 +0100 Subject: [PATCH 1/8] Faster side exits. --- Include/cpython/pystate.h | 3 + Include/internal/pycore_interp_structs.h | 1 + Include/internal/pycore_optimizer.h | 11 +- Include/internal/pycore_uop_ids.h | 379 ++++++++++++----------- Include/internal/pycore_uop_metadata.h | 4 + Python/bytecodes.c | 66 ++-- Python/ceval.c | 2 +- Python/ceval_macros.h | 5 +- Python/executor_cases.c.h | 71 +++-- Python/generated_cases.c.h | 2 + Python/optimizer.c | 46 ++- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 4 + Python/pystate.c | 1 + Tools/jit/template.c | 1 - 15 files changed, 338 insertions(+), 260 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index be582122118e44..e13b2b373c47b1 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -198,6 +198,9 @@ struct _ts { PyObject *current_executor; + /* Internal to the JIT */ + struct _PyExitData *jit_exit; + uint64_t dict_global_version; /* Used to store/retrieve `threading.local` keys/values for this thread */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f1f427d99dea69..dbf702d27762b1 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -944,6 +944,7 @@ struct _is { bool jit; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; + struct _PyExecutorObject *cold_executor; int executor_deletion_list_remaining_capacity; size_t trace_run_counter; _rare_events rare_events; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 8b7f12bf03d624..f644551c48fe5b 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -67,8 +67,9 @@ typedef struct { #endif } _PyUOpInstruction; -typedef struct { +typedef struct _PyExitData { uint32_t target; + uint16_t index; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; @@ -354,6 +355,14 @@ PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **exec_ptr, int chain_depth); +static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) +{ + _PyExitData *exit0 = exit - exit->index; + return (_PyExecutorObject *)(((char *)exit0) - offsetof(_PyExecutorObject, exits)); +} + +extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void); + static inline int is_terminator(const _PyUOpInstruction *uop) { int opcode = uop->opcode; diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index a9432401525ebb..684969a23c4d22 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -81,99 +81,100 @@ extern "C" { #define _CHECK_STACK_SPACE 357 #define _CHECK_STACK_SPACE_OPERAND 358 #define _CHECK_VALIDITY 359 -#define _COMPARE_OP 360 -#define _COMPARE_OP_FLOAT 361 -#define _COMPARE_OP_INT 362 -#define _COMPARE_OP_STR 363 -#define _CONTAINS_OP 364 -#define _CONTAINS_OP_DICT 365 -#define _CONTAINS_OP_SET 366 +#define _COLD_EXIT 360 +#define _COMPARE_OP 361 +#define _COMPARE_OP_FLOAT 362 +#define _COMPARE_OP_INT 363 +#define _COMPARE_OP_STR 364 +#define _CONTAINS_OP 365 +#define _CONTAINS_OP_DICT 366 +#define _CONTAINS_OP_SET 367 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY 367 -#define _COPY_1 368 -#define _COPY_2 369 -#define _COPY_3 370 +#define _COPY 368 +#define _COPY_1 369 +#define _COPY_2 370 +#define _COPY_3 371 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 371 +#define _CREATE_INIT_FRAME 372 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 372 +#define _DEOPT 373 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 373 -#define _DO_CALL_FUNCTION_EX 374 -#define _DO_CALL_KW 375 +#define _DO_CALL 374 +#define _DO_CALL_FUNCTION_EX 375 +#define _DO_CALL_KW 376 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 376 +#define _ERROR_POP_N 377 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 377 -#define _EXPAND_METHOD_KW 378 -#define _FATAL_ERROR 379 +#define _EXPAND_METHOD 378 +#define _EXPAND_METHOD_KW 379 +#define _FATAL_ERROR 380 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 380 -#define _FOR_ITER_GEN_FRAME 381 -#define _FOR_ITER_TIER_TWO 382 +#define _FOR_ITER 381 +#define _FOR_ITER_GEN_FRAME 382 +#define _FOR_ITER_TIER_TWO 383 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 383 -#define _GUARD_CALLABLE_ISINSTANCE 384 -#define _GUARD_CALLABLE_LEN 385 -#define _GUARD_CALLABLE_LIST_APPEND 386 -#define _GUARD_CALLABLE_STR_1 387 -#define _GUARD_CALLABLE_TUPLE_1 388 -#define _GUARD_CALLABLE_TYPE_1 389 -#define _GUARD_DORV_NO_DICT 390 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 391 -#define _GUARD_GLOBALS_VERSION 392 -#define _GUARD_IS_FALSE_POP 393 -#define _GUARD_IS_NONE_POP 394 -#define _GUARD_IS_NOT_NONE_POP 395 -#define _GUARD_IS_TRUE_POP 396 -#define _GUARD_KEYS_VERSION 397 -#define _GUARD_NOS_DICT 398 -#define _GUARD_NOS_FLOAT 399 -#define _GUARD_NOS_INT 400 -#define _GUARD_NOS_LIST 401 -#define _GUARD_NOS_NOT_NULL 402 -#define _GUARD_NOS_NULL 403 -#define _GUARD_NOS_OVERFLOWED 404 -#define _GUARD_NOS_TUPLE 405 -#define _GUARD_NOS_UNICODE 406 -#define _GUARD_NOT_EXHAUSTED_LIST 407 -#define _GUARD_NOT_EXHAUSTED_RANGE 408 -#define _GUARD_NOT_EXHAUSTED_TUPLE 409 -#define _GUARD_THIRD_NULL 410 -#define _GUARD_TOS_ANY_SET 411 -#define _GUARD_TOS_DICT 412 -#define _GUARD_TOS_FLOAT 413 -#define _GUARD_TOS_INT 414 -#define _GUARD_TOS_LIST 415 -#define _GUARD_TOS_OVERFLOWED 416 -#define _GUARD_TOS_SLICE 417 -#define _GUARD_TOS_TUPLE 418 -#define _GUARD_TOS_UNICODE 419 -#define _GUARD_TYPE_VERSION 420 -#define _GUARD_TYPE_VERSION_AND_LOCK 421 +#define _GUARD_BINARY_OP_EXTEND 384 +#define _GUARD_CALLABLE_ISINSTANCE 385 +#define _GUARD_CALLABLE_LEN 386 +#define _GUARD_CALLABLE_LIST_APPEND 387 +#define _GUARD_CALLABLE_STR_1 388 +#define _GUARD_CALLABLE_TUPLE_1 389 +#define _GUARD_CALLABLE_TYPE_1 390 +#define _GUARD_DORV_NO_DICT 391 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 392 +#define _GUARD_GLOBALS_VERSION 393 +#define _GUARD_IS_FALSE_POP 394 +#define _GUARD_IS_NONE_POP 395 +#define _GUARD_IS_NOT_NONE_POP 396 +#define _GUARD_IS_TRUE_POP 397 +#define _GUARD_KEYS_VERSION 398 +#define _GUARD_NOS_DICT 399 +#define _GUARD_NOS_FLOAT 400 +#define _GUARD_NOS_INT 401 +#define _GUARD_NOS_LIST 402 +#define _GUARD_NOS_NOT_NULL 403 +#define _GUARD_NOS_NULL 404 +#define _GUARD_NOS_OVERFLOWED 405 +#define _GUARD_NOS_TUPLE 406 +#define _GUARD_NOS_UNICODE 407 +#define _GUARD_NOT_EXHAUSTED_LIST 408 +#define _GUARD_NOT_EXHAUSTED_RANGE 409 +#define _GUARD_NOT_EXHAUSTED_TUPLE 410 +#define _GUARD_THIRD_NULL 411 +#define _GUARD_TOS_ANY_SET 412 +#define _GUARD_TOS_DICT 413 +#define _GUARD_TOS_FLOAT 414 +#define _GUARD_TOS_INT 415 +#define _GUARD_TOS_LIST 416 +#define _GUARD_TOS_OVERFLOWED 417 +#define _GUARD_TOS_SLICE 418 +#define _GUARD_TOS_TUPLE 419 +#define _GUARD_TOS_UNICODE 420 +#define _GUARD_TYPE_VERSION 421 +#define _GUARD_TYPE_VERSION_AND_LOCK 422 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 422 -#define _INIT_CALL_PY_EXACT_ARGS 423 -#define _INIT_CALL_PY_EXACT_ARGS_0 424 -#define _INIT_CALL_PY_EXACT_ARGS_1 425 -#define _INIT_CALL_PY_EXACT_ARGS_2 426 -#define _INIT_CALL_PY_EXACT_ARGS_3 427 -#define _INIT_CALL_PY_EXACT_ARGS_4 428 -#define _INSERT_NULL 429 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 423 +#define _INIT_CALL_PY_EXACT_ARGS 424 +#define _INIT_CALL_PY_EXACT_ARGS_0 425 +#define _INIT_CALL_PY_EXACT_ARGS_1 426 +#define _INIT_CALL_PY_EXACT_ARGS_2 427 +#define _INIT_CALL_PY_EXACT_ARGS_3 428 +#define _INIT_CALL_PY_EXACT_ARGS_4 429 +#define _INSERT_NULL 430 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -183,177 +184,177 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 430 +#define _IS_NONE 431 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 431 -#define _ITER_CHECK_RANGE 432 -#define _ITER_CHECK_TUPLE 433 -#define _ITER_JUMP_LIST 434 -#define _ITER_JUMP_RANGE 435 -#define _ITER_JUMP_TUPLE 436 -#define _ITER_NEXT_LIST 437 -#define _ITER_NEXT_LIST_TIER_TWO 438 -#define _ITER_NEXT_RANGE 439 -#define _ITER_NEXT_TUPLE 440 -#define _JUMP_TO_TOP 441 +#define _ITER_CHECK_LIST 432 +#define _ITER_CHECK_RANGE 433 +#define _ITER_CHECK_TUPLE 434 +#define _ITER_JUMP_LIST 435 +#define _ITER_JUMP_RANGE 436 +#define _ITER_JUMP_TUPLE 437 +#define _ITER_NEXT_LIST 438 +#define _ITER_NEXT_LIST_TIER_TWO 439 +#define _ITER_NEXT_RANGE 440 +#define _ITER_NEXT_TUPLE 441 +#define _JUMP_TO_TOP 442 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 442 -#define _LOAD_ATTR_CLASS 443 +#define _LOAD_ATTR 443 +#define _LOAD_ATTR_CLASS 444 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 444 -#define _LOAD_ATTR_METHOD_LAZY_DICT 445 -#define _LOAD_ATTR_METHOD_NO_DICT 446 -#define _LOAD_ATTR_METHOD_WITH_VALUES 447 -#define _LOAD_ATTR_MODULE 448 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 449 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 450 -#define _LOAD_ATTR_PROPERTY_FRAME 451 -#define _LOAD_ATTR_SLOT 452 -#define _LOAD_ATTR_WITH_HINT 453 +#define _LOAD_ATTR_INSTANCE_VALUE 445 +#define _LOAD_ATTR_METHOD_LAZY_DICT 446 +#define _LOAD_ATTR_METHOD_NO_DICT 447 +#define _LOAD_ATTR_METHOD_WITH_VALUES 448 +#define _LOAD_ATTR_MODULE 449 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 450 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 451 +#define _LOAD_ATTR_PROPERTY_FRAME 452 +#define _LOAD_ATTR_SLOT 453 +#define _LOAD_ATTR_WITH_HINT 454 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 454 +#define _LOAD_BYTECODE 455 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 455 -#define _LOAD_CONST_INLINE_BORROW 456 -#define _LOAD_CONST_UNDER_INLINE 457 -#define _LOAD_CONST_UNDER_INLINE_BORROW 458 +#define _LOAD_CONST_INLINE 456 +#define _LOAD_CONST_INLINE_BORROW 457 +#define _LOAD_CONST_UNDER_INLINE 458 +#define _LOAD_CONST_UNDER_INLINE_BORROW 459 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 459 -#define _LOAD_FAST_0 460 -#define _LOAD_FAST_1 461 -#define _LOAD_FAST_2 462 -#define _LOAD_FAST_3 463 -#define _LOAD_FAST_4 464 -#define _LOAD_FAST_5 465 -#define _LOAD_FAST_6 466 -#define _LOAD_FAST_7 467 +#define _LOAD_FAST 460 +#define _LOAD_FAST_0 461 +#define _LOAD_FAST_1 462 +#define _LOAD_FAST_2 463 +#define _LOAD_FAST_3 464 +#define _LOAD_FAST_4 465 +#define _LOAD_FAST_5 466 +#define _LOAD_FAST_6 467 +#define _LOAD_FAST_7 468 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 468 -#define _LOAD_FAST_BORROW_0 469 -#define _LOAD_FAST_BORROW_1 470 -#define _LOAD_FAST_BORROW_2 471 -#define _LOAD_FAST_BORROW_3 472 -#define _LOAD_FAST_BORROW_4 473 -#define _LOAD_FAST_BORROW_5 474 -#define _LOAD_FAST_BORROW_6 475 -#define _LOAD_FAST_BORROW_7 476 +#define _LOAD_FAST_BORROW 469 +#define _LOAD_FAST_BORROW_0 470 +#define _LOAD_FAST_BORROW_1 471 +#define _LOAD_FAST_BORROW_2 472 +#define _LOAD_FAST_BORROW_3 473 +#define _LOAD_FAST_BORROW_4 474 +#define _LOAD_FAST_BORROW_5 475 +#define _LOAD_FAST_BORROW_6 476 +#define _LOAD_FAST_BORROW_7 477 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 477 -#define _LOAD_GLOBAL_BUILTINS 478 -#define _LOAD_GLOBAL_MODULE 479 +#define _LOAD_GLOBAL 478 +#define _LOAD_GLOBAL_BUILTINS 479 +#define _LOAD_GLOBAL_MODULE 480 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 480 -#define _LOAD_SMALL_INT_0 481 -#define _LOAD_SMALL_INT_1 482 -#define _LOAD_SMALL_INT_2 483 -#define _LOAD_SMALL_INT_3 484 -#define _LOAD_SPECIAL 485 +#define _LOAD_SMALL_INT 481 +#define _LOAD_SMALL_INT_0 482 +#define _LOAD_SMALL_INT_1 483 +#define _LOAD_SMALL_INT_2 484 +#define _LOAD_SMALL_INT_3 485 +#define _LOAD_SPECIAL 486 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 486 +#define _MAKE_CALLARGS_A_TUPLE 487 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 487 +#define _MAKE_WARM 488 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 488 -#define _MAYBE_EXPAND_METHOD_KW 489 -#define _MONITOR_CALL 490 -#define _MONITOR_CALL_KW 491 -#define _MONITOR_JUMP_BACKWARD 492 -#define _MONITOR_RESUME 493 +#define _MAYBE_EXPAND_METHOD 489 +#define _MAYBE_EXPAND_METHOD_KW 490 +#define _MONITOR_CALL 491 +#define _MONITOR_CALL_KW 492 +#define _MONITOR_JUMP_BACKWARD 493 +#define _MONITOR_RESUME 494 #define _NOP NOP -#define _POP_CALL 494 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 495 -#define _POP_CALL_ONE 496 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 497 -#define _POP_CALL_TWO 498 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 499 +#define _POP_CALL 495 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 496 +#define _POP_CALL_ONE 497 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 498 +#define _POP_CALL_TWO 499 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 500 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 500 -#define _POP_JUMP_IF_TRUE 501 +#define _POP_JUMP_IF_FALSE 501 +#define _POP_JUMP_IF_TRUE 502 #define _POP_TOP POP_TOP -#define _POP_TOP_FLOAT 502 -#define _POP_TOP_INT 503 -#define _POP_TOP_LOAD_CONST_INLINE 504 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 -#define _POP_TOP_NOP 506 -#define _POP_TOP_UNICODE 507 -#define _POP_TWO 508 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 +#define _POP_TOP_FLOAT 503 +#define _POP_TOP_INT 504 +#define _POP_TOP_LOAD_CONST_INLINE 505 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 506 +#define _POP_TOP_NOP 507 +#define _POP_TOP_UNICODE 508 +#define _POP_TWO 509 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 510 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 510 +#define _PUSH_FRAME 511 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 511 -#define _PY_FRAME_GENERAL 512 -#define _PY_FRAME_KW 513 -#define _QUICKEN_RESUME 514 -#define _REPLACE_WITH_TRUE 515 +#define _PUSH_NULL_CONDITIONAL 512 +#define _PY_FRAME_GENERAL 513 +#define _PY_FRAME_KW 514 +#define _QUICKEN_RESUME 515 +#define _REPLACE_WITH_TRUE 516 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 516 -#define _SEND 517 -#define _SEND_GEN_FRAME 518 +#define _SAVE_RETURN_OFFSET 517 +#define _SEND 518 +#define _SEND_GEN_FRAME 519 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 519 -#define _STORE_ATTR 520 -#define _STORE_ATTR_INSTANCE_VALUE 521 -#define _STORE_ATTR_SLOT 522 -#define _STORE_ATTR_WITH_HINT 523 +#define _START_EXECUTOR 520 +#define _STORE_ATTR 521 +#define _STORE_ATTR_INSTANCE_VALUE 522 +#define _STORE_ATTR_SLOT 523 +#define _STORE_ATTR_WITH_HINT 524 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 524 -#define _STORE_FAST_0 525 -#define _STORE_FAST_1 526 -#define _STORE_FAST_2 527 -#define _STORE_FAST_3 528 -#define _STORE_FAST_4 529 -#define _STORE_FAST_5 530 -#define _STORE_FAST_6 531 -#define _STORE_FAST_7 532 +#define _STORE_FAST 525 +#define _STORE_FAST_0 526 +#define _STORE_FAST_1 527 +#define _STORE_FAST_2 528 +#define _STORE_FAST_3 529 +#define _STORE_FAST_4 530 +#define _STORE_FAST_5 531 +#define _STORE_FAST_6 532 +#define _STORE_FAST_7 533 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 533 -#define _STORE_SUBSCR 534 -#define _STORE_SUBSCR_DICT 535 -#define _STORE_SUBSCR_LIST_INT 536 -#define _SWAP 537 -#define _SWAP_2 538 -#define _SWAP_3 539 -#define _TIER2_RESUME_CHECK 540 -#define _TO_BOOL 541 +#define _STORE_SLICE 534 +#define _STORE_SUBSCR 535 +#define _STORE_SUBSCR_DICT 536 +#define _STORE_SUBSCR_LIST_INT 537 +#define _SWAP 538 +#define _SWAP_2 539 +#define _SWAP_3 540 +#define _TIER2_RESUME_CHECK 541 +#define _TO_BOOL 542 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 542 +#define _TO_BOOL_LIST 543 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 543 +#define _TO_BOOL_STR 544 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 544 -#define _UNPACK_SEQUENCE_LIST 545 -#define _UNPACK_SEQUENCE_TUPLE 546 -#define _UNPACK_SEQUENCE_TWO_TUPLE 547 +#define _UNPACK_SEQUENCE 545 +#define _UNPACK_SEQUENCE_LIST 546 +#define _UNPACK_SEQUENCE_TUPLE 547 +#define _UNPACK_SEQUENCE_TWO_TUPLE 548 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 547 +#define MAX_UOP_ID 548 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ff7e800aa9bb1a..28ddd6e8f86482 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -336,6 +336,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DEOPT] = 0, [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, + [_COLD_EXIT] = HAS_ESCAPES_FLAG, }; const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { @@ -419,6 +420,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_COLD_EXIT] = "_COLD_EXIT", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", [_COMPARE_OP_INT] = "_COMPARE_OP_INT", @@ -1301,6 +1303,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _TIER2_RESUME_CHECK: return 0; + case _COLD_EXIT: + return 0; default: return -1; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d9abc4c53d1f50..b3c8e750c41714 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2964,6 +2964,7 @@ dummy_func( else { this_instr[1].counter = initial_jump_backoff_counter(); assert(tstate->current_executor == NULL); + tstate->jit_exit = NULL; GOTO_TIER_TWO(executor); } } @@ -3028,6 +3029,7 @@ dummy_func( } DISPATCH_GOTO(); } + tstate->jit_exit = NULL; GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -5236,11 +5238,10 @@ dummy_func( #endif } - tier2 op(_EXIT_TRACE, (exit_p/4 --)) { + tier2 op(_EXIT_TRACE, (exit_p/4 -- )) { _PyExitData *exit = (_PyExitData *)exit_p; - PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; #if defined(Py_DEBUG) && !defined(_Py_JIT) + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 2) { printf("SIDE EXIT: [UOp "); @@ -5251,32 +5252,7 @@ dummy_func( _PyOpcode_OpName[target->op.code]); } #endif - if (exit->executor && !exit->executor->vm_data.valid) { - exit->temperature = initial_temperature_backoff_counter(); - Py_CLEAR(exit->executor); - } - if (exit->executor == NULL) { - _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int chain_depth = current_executor->vm_data.chain_depth + 1; - int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - GOTO_TIER_ONE(optimized < 0 ? NULL : target); - } - exit->temperature = initial_temperature_backoff_counter(); - } - exit->executor = executor; - } + tstate->jit_exit = exit; GOTO_TIER_TWO(exit->executor); } @@ -5375,6 +5351,7 @@ dummy_func( #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif + tstate->current_executor = (PyObject *)executor; assert(((_PyExecutorObject *)executor)->vm_data.valid); } @@ -5414,6 +5391,37 @@ dummy_func( assert(tstate->tracing || eval_breaker == FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version)); } + tier2 op(_COLD_EXIT, ( -- )) { + _PyExitData *exit = tstate->jit_exit; + assert(exit != NULL); + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + assert(tstate->current_executor == (PyObject *)previous_executor); + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int chain_depth = previous_executor->vm_data.chain_depth + 1; + int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(temperature); + GOTO_TIER_ONE(optimized < 0 ? NULL : target); + } + exit->temperature = initial_temperature_backoff_counter(); + } + assert(tstate->jit_exit == exit); + exit->executor = executor; + GOTO_TIER_TWO(exit->executor); + } + label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/ceval.c b/Python/ceval.c index 50665defd382a2..a502cb6714c70e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1152,7 +1152,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int uint64_t trace_uop_execution_counter = 0; #endif - assert(next_uop->opcode == _START_EXECUTOR); + assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); tier2_dispatch: for (;;) { uopcode = next_uop->opcode; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 187ec8fdd26584..9ebcefbcb85850 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -359,7 +359,6 @@ _PyFrame_SetStackPointer(frame, stack_pointer) do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - tstate->current_executor = (PyObject *)_executor; \ jit_func jitted = _executor->jit_code; \ /* Keep the shim frame alive via the executor: */ \ Py_INCREF(_executor); \ @@ -378,9 +377,8 @@ do { \ do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - tstate->current_executor = (PyObject *)_executor; \ next_uop = _executor->trace; \ - assert(next_uop->opcode == _START_EXECUTOR); \ + assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \ goto enter_tier_two; \ } while (0) #endif @@ -390,7 +388,6 @@ do { \ { \ tstate->current_executor = NULL; \ next_instr = (TARGET); \ - assert(tstate->current_executor == NULL); \ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ _PyFrame_SetStackPointer(frame, stack_pointer); \ stack_pointer = _PyFrame_GetStackPointer(frame); \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e152865e4ec9e8..b022f88515e346 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7113,9 +7113,8 @@ case _EXIT_TRACE: { PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); _PyExitData *exit = (_PyExitData *)exit_p; - PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; #if defined(Py_DEBUG) && !defined(_Py_JIT) + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (frame->lltrace >= 2) { _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7128,36 +7127,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } #endif - if (exit->executor && !exit->executor->vm_data.valid) { - exit->temperature = initial_temperature_backoff_counter(); - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_CLEAR(exit->executor); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - if (exit->executor == NULL) { - _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int chain_depth = current_executor->vm_data.chain_depth + 1; - _PyFrame_SetStackPointer(frame, stack_pointer); - int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - GOTO_TIER_ONE(optimized < 0 ? NULL : target); - } - exit->temperature = initial_temperature_backoff_counter(); - } - exit->executor = executor; - } + tstate->jit_exit = exit; GOTO_TIER_TWO(exit->executor); break; } @@ -7438,6 +7408,7 @@ #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif + tstate->current_executor = (PyObject *)executor; assert(((_PyExecutorObject *)executor)->vm_data.valid); break; } @@ -7487,4 +7458,40 @@ break; } + case _COLD_EXIT: { + _PyExitData *exit = tstate->jit_exit; + assert(exit != NULL); + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + stack_pointer = _PyFrame_GetStackPointer(frame); + assert(tstate->current_executor == (PyObject *)previous_executor); + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int chain_depth = previous_executor->vm_data.chain_depth + 1; + _PyFrame_SetStackPointer(frame, stack_pointer); + int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(temperature); + GOTO_TIER_ONE(optimized < 0 ? NULL : target); + } + exit->temperature = initial_temperature_backoff_counter(); + } + assert(tstate->jit_exit == exit); + exit->executor = executor; + GOTO_TIER_TWO(exit->executor); + break; + } + #undef TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index aa1eb373b7ba4b..e78c53bf213d97 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5595,6 +5595,7 @@ } DISPATCH_GOTO(); } + tstate->jit_exit = NULL; GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -7793,6 +7794,7 @@ this_instr[1].counter = initial_jump_backoff_counter(); stack_pointer = _PyFrame_GetStackPointer(frame); assert(tstate->current_executor == NULL); + tstate->jit_exit = NULL; GOTO_TIER_TWO(executor); } } diff --git a/Python/optimizer.c b/Python/optimizer.c index 8d01d605ef4a2a..6dfd52fe457211 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1182,9 +1182,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil } /* Initialize exits */ + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); for (int i = 0; i < exit_count; i++) { - executor->exits[i].executor = NULL; + executor->exits[i].index = i; executor->exits[i].temperature = initial_temperature_backoff_counter(); + executor->exits[i].executor = cold; } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1462,6 +1464,37 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s link_executor(executor); } +_PyExecutorObject * +_PyExecutor_GetColdExecutor(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->cold_executor != NULL) { + return interp->cold_executor; + } + _PyExecutorObject *cold = allocate_executor(0, 1); + assert((void *)cold->trace == (void *)cold->exits); + ((_PyUOpInstruction *)cold->exits)->opcode = _COLD_EXIT; + if (cold == NULL) { + Py_FatalError("Cannot allocate core JIT code"); + } + _Py_SetImmortal((PyObject *)cold); +#ifdef _Py_JIT + cold->jit_code = NULL; + cold->jit_side_entry = NULL; + cold->jit_size = 0; + // This is initialized to true so we can prevent the executor + // from being immediately detected as cold and invalidated. + cold->vm_data.warm = true; + if (_PyJIT_Compile(cold, cold->trace, length)) { + Py_DECREF(cold); + Py_FatalError("Cannot allocate core JIT code"); + } +#endif + interp->cold_executor = cold; + return cold; +} + + /* Detaches the executor from the code object (if any) that * holds a reference to it */ void @@ -1492,6 +1525,13 @@ executor_clear(PyObject *op) assert(executor->vm_data.valid == 1); unlink_executor(executor); executor->vm_data.valid = 0; + + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + ((_PyUOpInstruction *)executor->trace)->opcode = _COLD_EXIT; +#ifdef _Py_JIT + executor->jit_code = cold->jit_code; +#endif + /* It is possible for an executor to form a reference * cycle with itself, so decref'ing a side exit could * free the executor unless we hold a strong reference to it @@ -1499,7 +1539,9 @@ executor_clear(PyObject *op) Py_INCREF(executor); for (uint32_t i = 0; i < executor->exit_count; i++) { executor->exits[i].temperature = initial_unreachable_backoff_counter(); - Py_CLEAR(executor->exits[i].executor); + _PyExecutorObject *e = executor->exits[i].executor; + executor->exits[i].executor = cold; + Py_DECREF(e); } _Py_ExecutorDetach(executor); Py_DECREF(executor); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index aeff76affd8ace..9b2f482b5ddede 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1031,7 +1031,7 @@ dummy_func(void) { ctx->done = true; } - op(_EXIT_TRACE, (exit_p/4 --)) { + op(_EXIT_TRACE, (exit_p/4 -- )) { (void)exit_p; ctx->done = true; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 41402200c1683e..4346f8a59c0314 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3045,3 +3045,7 @@ break; } + case _COLD_EXIT: { + break; + } + diff --git a/Python/pystate.c b/Python/pystate.c index 0d4c26f92cec90..f60220cff58426 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1469,6 +1469,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_limit = NULL; tstate->what_event = -1; tstate->current_executor = NULL; + tstate->jit_exit = NULL; tstate->dict_global_version = 0; _tstate->c_stack_soft_limit = UINTPTR_MAX; diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 5ee26f93f1e266..5bb56163f9f4a6 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -51,7 +51,6 @@ do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - tstate->current_executor = (PyObject *)_executor; \ jit_func_preserve_none jitted = _executor->jit_side_entry; \ __attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \ } while (0) From ec7f0e27b9f563d6c71c73932d686986492dfb17 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 7 Jul 2025 17:09:30 +0100 Subject: [PATCH 2/8] Fix copy-and-paste error --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 6dfd52fe457211..822e2b81d5df2c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1485,7 +1485,7 @@ _PyExecutor_GetColdExecutor(void) // This is initialized to true so we can prevent the executor // from being immediately detected as cold and invalidated. cold->vm_data.warm = true; - if (_PyJIT_Compile(cold, cold->trace, length)) { + if (_PyJIT_Compile(cold, cold->trace, 1)) { Py_DECREF(cold); Py_FatalError("Cannot allocate core JIT code"); } From 911520a2135b08903fac1ec1ae897aa925eacf43 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 7 Jul 2025 17:23:07 +0100 Subject: [PATCH 3/8] Check validity at start of trace --- Python/bytecodes.c | 1 - Python/executor_cases.c.h | 1 - Python/optimizer.c | 11 +++-------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b3c8e750c41714..4f3fdeaa975ce7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5352,7 +5352,6 @@ dummy_func( current_executor = (_PyExecutorObject*)executor; #endif tstate->current_executor = (PyObject *)executor; - assert(((_PyExecutorObject *)executor)->vm_data.valid); } tier2 op(_MAKE_WARM, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b022f88515e346..8d46fe340685d4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7409,7 +7409,6 @@ current_executor = (_PyExecutorObject*)executor; #endif tstate->current_executor = (PyObject *)executor; - assert(((_PyExecutorObject *)executor)->vm_data.valid); break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 822e2b81d5df2c..71864ff487e714 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -584,6 +584,7 @@ translate_bytecode_to_trace( code->co_firstlineno, 2 * INSTR_IP(initial_instr, code)); ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); + ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, 0); ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0); uint32_t target = 0; @@ -1472,8 +1473,7 @@ _PyExecutor_GetColdExecutor(void) return interp->cold_executor; } _PyExecutorObject *cold = allocate_executor(0, 1); - assert((void *)cold->trace == (void *)cold->exits); - ((_PyUOpInstruction *)cold->exits)->opcode = _COLD_EXIT; + ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT; if (cold == NULL) { Py_FatalError("Cannot allocate core JIT code"); } @@ -1526,16 +1526,11 @@ executor_clear(PyObject *op) unlink_executor(executor); executor->vm_data.valid = 0; - _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); - ((_PyUOpInstruction *)executor->trace)->opcode = _COLD_EXIT; -#ifdef _Py_JIT - executor->jit_code = cold->jit_code; -#endif - /* It is possible for an executor to form a reference * cycle with itself, so decref'ing a side exit could * free the executor unless we hold a strong reference to it */ + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); Py_INCREF(executor); for (uint32_t i = 0; i < executor->exit_count; i++) { executor->exits[i].temperature = initial_unreachable_backoff_counter(); From 73832b244e3f634a4a2589908d53d50305fcb1fc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jul 2025 10:26:43 +0100 Subject: [PATCH 4/8] Tidy up --- Python/bytecodes.c | 6 +++--- Python/generated_cases.c.h | 4 ++-- Python/optimizer_bytecodes.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 4f3fdeaa975ce7..0244320e58817d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2964,7 +2964,7 @@ dummy_func( else { this_instr[1].counter = initial_jump_backoff_counter(); assert(tstate->current_executor == NULL); - tstate->jit_exit = NULL; + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); } } @@ -3029,7 +3029,7 @@ dummy_func( } DISPATCH_GOTO(); } - tstate->jit_exit = NULL; + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -5238,7 +5238,7 @@ dummy_func( #endif } - tier2 op(_EXIT_TRACE, (exit_p/4 -- )) { + tier2 op(_EXIT_TRACE, (exit_p/4 --)) { _PyExitData *exit = (_PyExitData *)exit_p; #if defined(Py_DEBUG) && !defined(_Py_JIT) _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e78c53bf213d97..7886353105ef16 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5595,7 +5595,7 @@ } DISPATCH_GOTO(); } - tstate->jit_exit = NULL; + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -7794,7 +7794,7 @@ this_instr[1].counter = initial_jump_backoff_counter(); stack_pointer = _PyFrame_GetStackPointer(frame); assert(tstate->current_executor == NULL); - tstate->jit_exit = NULL; + assert(executor != tstate->interp->cold_executor); GOTO_TIER_TWO(executor); } } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9b2f482b5ddede..aeff76affd8ace 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1031,7 +1031,7 @@ dummy_func(void) { ctx->done = true; } - op(_EXIT_TRACE, (exit_p/4 -- )) { + op(_EXIT_TRACE, (exit_p/4 --)) { (void)exit_p; ctx->done = true; } From a9297a0ea0482c1fd27c1de377038c2c13740723 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jul 2025 10:56:25 +0100 Subject: [PATCH 5/8] Null test before use --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 71864ff487e714..faeabd155e11be 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1473,10 +1473,10 @@ _PyExecutor_GetColdExecutor(void) return interp->cold_executor; } _PyExecutorObject *cold = allocate_executor(0, 1); - ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT; if (cold == NULL) { Py_FatalError("Cannot allocate core JIT code"); } + ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT; _Py_SetImmortal((PyObject *)cold); #ifdef _Py_JIT cold->jit_code = NULL; From 47b53d1063036123f3cf072eaeb3f29c9e1086ca Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jul 2025 10:59:00 +0100 Subject: [PATCH 6/8] Update asserts --- Python/jit.c | 2 +- Python/optimizer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/jit.c b/Python/jit.c index e232cc1f7d9250..c0c2b35fcb3e91 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -553,7 +553,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz group->emit(code, data, executor, NULL, &state); code += group->code_size; data += group->data_size; - assert(trace[0].opcode == _START_EXECUTOR); + assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); for (size_t i = 0; i < length; i++) { const _PyUOpInstruction *instruction = &trace[i]; group = &stencil_groups[instruction->opcode]; diff --git a/Python/optimizer.c b/Python/optimizer.c index faeabd155e11be..fc75ecb23dd331 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1130,7 +1130,7 @@ sanity_check(_PyExecutorObject *executor) } bool ended = false; uint32_t i = 0; - CHECK(executor->trace[0].opcode == _START_EXECUTOR); + CHECK(executor->trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; From 2564d418b5aed4ff2023870a889a3ba9fb28346e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jul 2025 11:01:37 +0100 Subject: [PATCH 7/8] Properly fix assert --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index fc75ecb23dd331..8c01c988b315ba 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1130,7 +1130,7 @@ sanity_check(_PyExecutorObject *executor) } bool ended = false; uint32_t i = 0; - CHECK(executor->trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); + CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT); for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; From 779bd7d9048a1f81b61f271bd66ac99ce17b1147 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 8 Jul 2025 11:37:07 +0100 Subject: [PATCH 8/8] Don't make object immortal until it is fully initialized --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 8c01c988b315ba..abd7bef85e96f5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1477,7 +1477,6 @@ _PyExecutor_GetColdExecutor(void) Py_FatalError("Cannot allocate core JIT code"); } ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT; - _Py_SetImmortal((PyObject *)cold); #ifdef _Py_JIT cold->jit_code = NULL; cold->jit_side_entry = NULL; @@ -1490,6 +1489,7 @@ _PyExecutor_GetColdExecutor(void) Py_FatalError("Cannot allocate core JIT code"); } #endif + _Py_SetImmortal((PyObject *)cold); interp->cold_executor = cold; return cold; }