From 54c551ed96d2021235874856940cfbfc4d729eaf Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 13 Jan 2025 01:58:59 +0100 Subject: [PATCH 01/13] Add free-threaded specialization for lists and tuples. --- Include/internal/pycore_list.h | 4 ++ Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/libregrtest/tsan.py | 1 + Objects/listobject.c | 28 ++++++++++- Objects/tupleobject.c | 10 ++-- Python/bytecodes.c | 64 ++++++++++++++++++++++++-- Python/executor_cases.c.h | 58 ++++++++++++++++++++++- Python/generated_cases.c.h | 60 ++++++++++++++++++++++-- Python/specialize.c | 42 +++++++++-------- Tools/cases_generator/analyzer.py | 6 +++ 10 files changed, 242 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 836ff30abfcedb..9a995471f063d5 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -13,6 +13,10 @@ extern void _PyList_DebugMallocStats(FILE *out); // _PyList_GetItemRef should be used only when the object is known as a list // because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does. extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); +#ifdef Py_GIL_DISABLED +// Returns -1 in case of races with other threads. +extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t i, PyObject ** result); +#endif #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 83e578cdd76fbd..6519b5c4bcb4bb 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -193,10 +193,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FOR_ITER_TIER_TWO] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_LIST] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG, - [_ITER_NEXT_LIST] = 0, + [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG, [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, - [_ITER_NEXT_TUPLE] = 0, + [_ITER_NEXT_TUPLE] = HAS_ARG_FLAG | HAS_JUMP_FLAG, [_ITER_CHECK_RANGE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_EXIT_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG, diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index 00d5779d950e72..5f83cd03067274 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -26,6 +26,7 @@ 'test_threadsignals', 'test_weakref', 'test_free_threading.test_slots', + 'test_free_threading.test_iteration', ] diff --git a/Objects/listobject.c b/Objects/listobject.c index bbd53e7de94a31..8d969c3e975ba8 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -357,7 +357,7 @@ list_get_item_ref(PyListObject *op, Py_ssize_t i) return NULL; } Py_ssize_t cap = list_capacity(ob_item); - assert(cap != -1 && cap >= size); + assert(cap != -1); if (!valid_index(i, cap)) { return NULL; } @@ -415,6 +415,32 @@ _PyList_GetItemRef(PyListObject *list, Py_ssize_t i) return list_get_item_ref(list, i); } +#ifdef Py_GIL_DISABLED +int +_PyList_GetItemRefNoLock(PyListObject *list, Py_ssize_t i, PyObject **result) +{ + assert(_Py_IsOwnedByCurrentThread((PyObject *)list) || + _PyObject_GC_IS_SHARED(list)); + if (!valid_index(i, PyList_GET_SIZE(list))) { + return 0; + } + PyObject **ob_item = _Py_atomic_load_ptr(&list->ob_item); + if (ob_item == NULL) { + return 0; + } + Py_ssize_t cap = list_capacity(ob_item); + assert(cap != -1); + if (!valid_index(i, cap)) { + return 0; + } + *result = _Py_TryXGetRef(&ob_item[i]); + if (*result == NULL) { + return -1; + } + return 1; +} +#endif + int PyList_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 002002eb455556..4e8afbce27681a 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1020,13 +1020,15 @@ tupleiter_next(PyObject *self) return NULL; assert(PyTuple_Check(seq)); - if (it->it_index < PyTuple_GET_SIZE(seq)) { - item = PyTuple_GET_ITEM(seq, it->it_index); - ++it->it_index; + Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index); + if ((size_t)idx < (size_t)PyTuple_GET_SIZE(seq)) { + item = PyTuple_GET_ITEM(seq, idx++); + FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, idx); return Py_NewRef(item); } - it->it_seq = NULL; + FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, -1); + FT_ATOMIC_STORE_PTR_RELAXED(it->it_seq, NULL); Py_DECREF(seq); return NULL; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8bab4ea16b629b..a4ecf04baadc04 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2929,7 +2929,7 @@ dummy_func( }; specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ForIter(iter, next_instr, oparg); @@ -2937,7 +2937,7 @@ dummy_func( } OPCODE_DEFERRED_INC(FOR_ITER); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } replaced op(_FOR_ITER, (iter -- iter, next)) { @@ -3015,10 +3015,20 @@ dummy_func( op(_ITER_CHECK_LIST, (iter -- iter)) { - EXIT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyListIter_Type); + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + EXIT_IF(Py_TYPE(iter_o) != &PyListIter_Type); +#ifdef Py_GIL_DISABLED + _PyListIterObject *it = (_PyListIterObject *)iter_o; + EXIT_IF(it->it_seq == NULL || + !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + !_PyObject_GC_IS_SHARED(it->it_seq)); +#endif } replaced op(_ITER_JUMP_LIST, (iter -- iter)) { +// For free-threaded Python, the loop exit can happen at any point during item +// retrieval, so separate ops don't make much sense. +#ifndef Py_GIL_DISABLED PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyListIter_Type); @@ -3036,10 +3046,12 @@ dummy_func( JUMPBY(oparg + 1); DISPATCH(); } +#endif } // Only used by Tier 2 op(_GUARD_NOT_EXHAUSTED_LIST, (iter -- iter)) { +#ifndef Py_GIL_DISABLED PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyListIter_Type); @@ -3049,6 +3061,7 @@ dummy_func( it->it_index = -1; EXIT_IF(1); } +#endif } op(_ITER_NEXT_LIST, (iter -- iter, next)) { @@ -3057,8 +3070,30 @@ dummy_func( assert(Py_TYPE(iter_o) == &PyListIter_Type); PyListObject *seq = it->it_seq; assert(seq); +#ifdef Py_GIL_DISABLED + assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || + _PyObject_GC_IS_SHARED(seq)); + STAT_INC(FOR_ITER, hit); + Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); + PyObject *item; + int result = _PyList_GetItemRefNoLock(it->it_seq, idx, &item); + // A negative result means we lost a race with another thread + // and we need to take the slow path. + EXIT_IF(result < 0); + if (result == 0) { + _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + PyStackRef_CLOSE(iter); + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); + DISPATCH(); + } + _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + next = PyStackRef_FromPyObjectSteal(item); +#else assert(it->it_index < PyList_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); +#endif } macro(FOR_ITER_LIST) = @@ -3073,8 +3108,11 @@ dummy_func( replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); +// For free-threaded Python, the loop exit can happen at any point during item +// retrieval, so separate ops don't make much sense. +#ifndef Py_GIL_DISABLED + _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { @@ -3086,6 +3124,7 @@ dummy_func( JUMPBY(oparg + 1); DISPATCH(); } +#endif } // Only used by Tier 2 @@ -3093,9 +3132,11 @@ dummy_func( PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); +#ifndef Py_GIL_DISABLED PyTupleObject *seq = it->it_seq; EXIT_IF(seq == NULL); EXIT_IF(it->it_index >= PyTuple_GET_SIZE(seq)); +#ifndef Py_GIL_DISABLED } op(_ITER_NEXT_TUPLE, (iter -- iter, next)) { @@ -3103,9 +3144,24 @@ dummy_func( _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); PyTupleObject *seq = it->it_seq; +#ifdef Py_GIL_DISABLED + STAT_INC(FOR_ITER, hit); + Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); + if (seq == NULL || (size_t)idx >= (size_t)PyTuple_GET_SIZE(seq)) { + _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + PyStackRef_CLOSE(iter); + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); + DISPATCH(); + } + _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, idx)); +#else assert(seq); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); +#endif } macro(FOR_ITER_TUPLE) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e40fa88be89172..80a7dc4c9ee3d9 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3626,10 +3626,20 @@ case _ITER_CHECK_LIST: { _PyStackRef iter; iter = stack_pointer[-1]; - if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyListIter_Type) { + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + if (Py_TYPE(iter_o) != &PyListIter_Type) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #ifdef Py_GIL_DISABLED + _PyListIterObject *it = (_PyListIterObject *)iter_o; + if (it->it_seq == NULL || + !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + !_PyObject_GC_IS_SHARED(it->it_seq)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #endif break; } @@ -3638,6 +3648,7 @@ case _GUARD_NOT_EXHAUSTED_LIST: { _PyStackRef iter; iter = stack_pointer[-1]; + #ifndef Py_GIL_DISABLED PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyListIter_Type); @@ -3653,20 +3664,47 @@ JUMP_TO_JUMP_TARGET(); } } + #endif break; } case _ITER_NEXT_LIST: { _PyStackRef iter; _PyStackRef next; + oparg = CURRENT_OPARG(); iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyListIter_Type); PyListObject *seq = it->it_seq; assert(seq); + #ifdef Py_GIL_DISABLED + assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || + _PyObject_GC_IS_SHARED(seq)); + STAT_INC(FOR_ITER, hit); + Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); + PyObject *item; + int result = _PyList_GetItemRefNoLock(it->it_seq, idx, &item); + // A negative result means we lost a race with another thread + // and we need to take the slow path. + if (result < 0) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (result == 0) { + _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + PyStackRef_CLOSE(iter); + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); + DISPATCH(); + } + _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + next = PyStackRef_FromPyObjectSteal(item); + #else assert(it->it_index < PyList_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + #endif stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -3691,6 +3729,7 @@ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); + #ifndef Py_GIL_DISABLED PyTupleObject *seq = it->it_seq; if (seq == NULL) { UOP_STAT_INC(uopcode, miss); @@ -3700,20 +3739,37 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifndef Py_GIL_DISABLED break; } case _ITER_NEXT_TUPLE: { _PyStackRef iter; _PyStackRef next; + oparg = CURRENT_OPARG(); iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); PyTupleObject *seq = it->it_seq; + #ifdef Py_GIL_DISABLED + STAT_INC(FOR_ITER, hit); + Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); + if (seq == NULL || (size_t)idx >= (size_t)PyTuple_GET_SIZE(seq)) { + _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + PyStackRef_CLOSE(iter); + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); + DISPATCH(); + } + _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, idx)); + #else assert(seq); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + #endif stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7028ba52faae96..6e16e49a70fcdf 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3942,7 +3942,7 @@ iter = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -3952,7 +3952,7 @@ } OPCODE_DEFERRED_INC(FOR_ITER); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _FOR_ITER { @@ -4049,10 +4049,20 @@ // _ITER_CHECK_LIST { iter = stack_pointer[-1]; - DEOPT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyListIter_Type, FOR_ITER); + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + DEOPT_IF(Py_TYPE(iter_o) != &PyListIter_Type, FOR_ITER); + #ifdef Py_GIL_DISABLED + _PyListIterObject *it = (_PyListIterObject *)iter_o; + DEOPT_IF(it->it_seq == NULL || + !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + !_PyObject_GC_IS_SHARED(it->it_seq), FOR_ITER); + #endif } // _ITER_JUMP_LIST { + // For free-threaded Python, the loop exit can happen at any point during item + // retrieval, so separate ops don't make much sense. + #ifndef Py_GIL_DISABLED PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyListIter_Type); @@ -4070,6 +4080,7 @@ JUMPBY(oparg + 1); DISPATCH(); } + #endif } // _ITER_NEXT_LIST { @@ -4078,8 +4089,30 @@ assert(Py_TYPE(iter_o) == &PyListIter_Type); PyListObject *seq = it->it_seq; assert(seq); + #ifdef Py_GIL_DISABLED + assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || + _PyObject_GC_IS_SHARED(seq)); + STAT_INC(FOR_ITER, hit); + Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); + PyObject *item; + int result = _PyList_GetItemRefNoLock(it->it_seq, idx, &item); + // A negative result means we lost a race with another thread + // and we need to take the slow path. + DEOPT_IF(result < 0, FOR_ITER); + if (result == 0) { + _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + PyStackRef_CLOSE(iter); + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); + DISPATCH(); + } + _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + next = PyStackRef_FromPyObjectSteal(item); + #else assert(it->it_index < PyList_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); + #endif } stack_pointer[0] = next; stack_pointer += 1; @@ -4146,8 +4179,11 @@ // _ITER_JUMP_TUPLE { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); + // For free-threaded Python, the loop exit can happen at any point during item + // retrieval, so separate ops don't make much sense. + #ifndef Py_GIL_DISABLED + _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { @@ -4159,6 +4195,7 @@ JUMPBY(oparg + 1); DISPATCH(); } + #endif } // _ITER_NEXT_TUPLE { @@ -4166,9 +4203,24 @@ _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); PyTupleObject *seq = it->it_seq; + #ifdef Py_GIL_DISABLED + STAT_INC(FOR_ITER, hit); + Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); + if (seq == NULL || (size_t)idx >= (size_t)PyTuple_GET_SIZE(seq)) { + _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + PyStackRef_CLOSE(iter); + STACK_SHRINK(1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); + DISPATCH(); + } + _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, idx)); + #else assert(seq); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); + #endif } stack_pointer[0] = next; stack_pointer += 1; diff --git a/Python/specialize.c b/Python/specialize.c index c9325c39210874..8282d41551e924 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2647,22 +2647,31 @@ int void _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) { - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER); - _PyForIterCache *cache = (_PyForIterCache *)(instr + 1); PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); PyTypeObject *tp = Py_TYPE(iter_o); if (tp == &PyListIter_Type) { - instr->op.code = FOR_ITER_LIST; - goto success; + _PyListIterObject *it = (_PyListIterObject *)iter_o; + if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) && + !_PyObject_GC_IS_SHARED(it->it_seq)) { + // Maybe this should just set GC_IS_SHARED in a critical + // section, instead of leaving it to the first iteration? + SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_ITER_LIST); + unspecialize(instr); + return; + } + specialize(instr, FOR_ITER_LIST); + return; } else if (tp == &PyTupleIter_Type) { - instr->op.code = FOR_ITER_TUPLE; - goto success; + specialize(instr, FOR_ITER_TUPLE); + return; } +#ifndef Py_GIL_DISABLED else if (tp == &PyRangeIter_Type) { - instr->op.code = FOR_ITER_RANGE; - goto success; + specialize(instr, FOR_ITER_RANGE); + return; } else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || @@ -2671,21 +2680,16 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER); - goto failure; + unspecialize(instr); + return; } - instr->op.code = FOR_ITER_GEN; - goto success; + specialize(instr, FOR_ITER_GEN); + return; } +#endif SPECIALIZATION_FAIL(FOR_ITER, _PySpecialization_ClassifyIterator(iter_o)); -failure: - STAT_INC(FOR_ITER, failure); - instr->op.code = FOR_ITER; - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(FOR_ITER, success); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } void diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 679beca3ec3a9d..b743ad508c2b42 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -594,6 +594,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyInterpreterState_GET", "_PyList_AppendTakeRef", "_PyList_FromStackRefSteal", + "_PyList_GetItemRefNoLock", "_PyList_ITEMS", "_PyLong_Add", "_PyLong_CompactValue", @@ -605,6 +606,8 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyLong_Multiply", "_PyLong_Subtract", "_PyManagedDictPointer_IsValues", + "_PyObject_CAST", + "_PyObject_GC_IS_SHARED", "_PyObject_GC_IS_TRACKED", "_PyObject_GC_MAY_BE_TRACKED", "_PyObject_GC_TRACK", @@ -625,6 +628,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_EnterRecursiveCallTstateUnchecked", "_Py_ID", "_Py_IsImmortal", + "_Py_IsOwnedByCurrentThread", "_Py_LeaveRecursiveCallPy", "_Py_LeaveRecursiveCallTstate", "_Py_NewRef", @@ -633,7 +637,9 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_TryIncrefCompare", "_Py_TryIncrefCompareStackRef", "_Py_atomic_load_ptr_acquire", + "_Py_atomic_load_ssize_relaxed", "_Py_atomic_load_uintptr_relaxed", + "_Py_atomic_store_ssize_relaxed", "_Py_set_eval_breaker_bit", "advance_backoff_counter", "assert", From 1433cd3e47f62c7bdda8abcba7b66a1345c14414 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 13 Jan 2025 14:32:08 +0100 Subject: [PATCH 02/13] Add specialization for range iterators and generators, both about as thread-safe as without spcialization (i.e. not much to none at all). --- Objects/rangeobject.c | 28 +++++++++++++++++----------- Python/bytecodes.c | 13 ++++++++----- Python/executor_cases.c.h | 11 +++++++---- Python/generated_cases.c.h | 11 +++++++---- Python/specialize.c | 4 ++-- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 2942ab624edf72..ea6529cf99d18c 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -7,6 +7,7 @@ #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_range.h" #include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_pyatomic_ft_wrappers.h" /* Support objects whose length is > PY_SSIZE_T_MAX. @@ -816,10 +817,12 @@ PyTypeObject PyRange_Type = { static PyObject * rangeiter_next(_PyRangeIterObject *r) { - if (r->len > 0) { - long result = r->start; - r->start = result + r->step; - r->len--; + long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len); + if (len > 0) { + long result = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); + FT_ATOMIC_STORE_LONG_RELAXED(r->start, result + r->step); + // Relaxed ops for maximum speed and minimum thread-safety. + FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - 1); return PyLong_FromLong(result); } return NULL; @@ -828,7 +831,7 @@ rangeiter_next(_PyRangeIterObject *r) static PyObject * rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored)) { - return PyLong_FromLong(r->len); + return PyLong_FromLong(FT_ATOMIC_LOAD_LONG_RELAXED(r->len)); } PyDoc_STRVAR(length_hint_doc, @@ -841,10 +844,11 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored)) PyObject *range; /* create a range object for pickling */ - start = PyLong_FromLong(r->start); + long lstart = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); + start = PyLong_FromLong(lstart); if (start == NULL) goto err; - stop = PyLong_FromLong(r->start + r->len * r->step); + stop = PyLong_FromLong(lstart + FT_ATOMIC_LOAD_LONG_RELAXED(r->len) * r->step); if (stop == NULL) goto err; step = PyLong_FromLong(r->step); @@ -871,12 +875,14 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state) if (index == -1 && PyErr_Occurred()) return NULL; /* silently clip the index value */ + long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len); if (index < 0) index = 0; - else if (index > r->len) - index = r->len; /* exhausted iterator */ - r->start += index * r->step; - r->len -= index; + else if (index > len) + index = len; /* exhausted iterator */ + FT_ATOMIC_STORE_LONG_RELAXED(r->start, + FT_ATOMIC_LOAD_LONG_RELAXED(r->start) + index * r->step); + FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - index); Py_RETURN_NONE; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a4ecf04baadc04..344ece5ae574cc 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3179,7 +3179,7 @@ dummy_func( _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); STAT_INC(FOR_ITER, hit); - if (r->len <= 0) { + if (FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0) { // Jump over END_FOR instruction. JUMPBY(oparg + 1); DISPATCH(); @@ -3190,16 +3190,19 @@ dummy_func( op(_GUARD_NOT_EXHAUSTED_RANGE, (iter -- iter)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); - EXIT_IF(r->len <= 0); + EXIT_IF(FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0); } op(_ITER_NEXT_RANGE, (iter -- iter, next)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); +#ifndef Py_GIL_DISABLED assert(r->len > 0); - long value = r->start; - r->start = value + r->step; - r->len--; +#endif + long value = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); + FT_ATOMIC_STORE_LONG_RELAXED(r->start, value + r->step); + FT_ATOMIC_STORE_LONG_RELAXED(r->len, + FT_ATOMIC_LOAD_LONG_RELAXED(r->len) - 1); PyObject *res = PyLong_FromLong(value); ERROR_IF(res == NULL, error); next = PyStackRef_FromPyObjectSteal(res); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 80a7dc4c9ee3d9..d82a43693546df 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3794,7 +3794,7 @@ iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); - if (r->len <= 0) { + if (FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -3807,10 +3807,13 @@ iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); + #ifndef Py_GIL_DISABLED assert(r->len > 0); - long value = r->start; - r->start = value + r->step; - r->len--; + #endif + long value = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); + FT_ATOMIC_STORE_LONG_RELAXED(r->start, value + r->step); + FT_ATOMIC_STORE_LONG_RELAXED(r->len, + FT_ATOMIC_LOAD_LONG_RELAXED(r->len) - 1); PyObject *res = PyLong_FromLong(value); if (res == NULL) JUMP_TO_ERROR(); next = PyStackRef_FromPyObjectSteal(res); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6e16e49a70fcdf..74883fafe96b57 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4139,7 +4139,7 @@ _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); STAT_INC(FOR_ITER, hit); - if (r->len <= 0) { + if (FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0) { // Jump over END_FOR instruction. JUMPBY(oparg + 1); DISPATCH(); @@ -4149,10 +4149,13 @@ { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); + #ifndef Py_GIL_DISABLED assert(r->len > 0); - long value = r->start; - r->start = value + r->step; - r->len--; + #endif + long value = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); + FT_ATOMIC_STORE_LONG_RELAXED(r->start, value + r->step); + FT_ATOMIC_STORE_LONG_RELAXED(r->len, + FT_ATOMIC_LOAD_LONG_RELAXED(r->len) - 1); PyObject *res = PyLong_FromLong(value); if (res == NULL) goto error; next = PyStackRef_FromPyObjectSteal(res); diff --git a/Python/specialize.c b/Python/specialize.c index 8282d41551e924..d5f80839f7ec47 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2668,12 +2668,13 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) specialize(instr, FOR_ITER_TUPLE); return; } -#ifndef Py_GIL_DISABLED else if (tp == &PyRangeIter_Type) { specialize(instr, FOR_ITER_RANGE); return; } else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { + // Generators are very much not thread-safe, so don't worry about + // the specialization not being thread-safe. assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR ); @@ -2686,7 +2687,6 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) specialize(instr, FOR_ITER_GEN); return; } -#endif SPECIALIZATION_FAIL(FOR_ITER, _PySpecialization_ClassifyIterator(iter_o)); unspecialize(instr); From a662ecf95f0beeae5a75e24585573c8c7b94aff8 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 13 Jan 2025 23:32:01 +0100 Subject: [PATCH 03/13] Add missing ifdef guard. --- Python/specialize.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index d5f80839f7ec47..f3eedd24df7c0e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2652,6 +2652,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); PyTypeObject *tp = Py_TYPE(iter_o); if (tp == &PyListIter_Type) { +#ifdef Py_GIL_DISABLED _PyListIterObject *it = (_PyListIterObject *)iter_o; if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) && !_PyObject_GC_IS_SHARED(it->it_seq)) { @@ -2661,6 +2662,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) unspecialize(instr); return; } +#endif specialize(instr, FOR_ITER_LIST); return; } From 2fef94bb01c68e7b7ecd7143d68334afc52748cb Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 13 Jan 2025 23:33:28 +0100 Subject: [PATCH 04/13] Fix copy-paste mistake. --- Python/bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 344ece5ae574cc..73764351f67591 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3136,7 +3136,7 @@ dummy_func( PyTupleObject *seq = it->it_seq; EXIT_IF(seq == NULL); EXIT_IF(it->it_index >= PyTuple_GET_SIZE(seq)); -#ifndef Py_GIL_DISABLED +#endif } op(_ITER_NEXT_TUPLE, (iter -- iter, next)) { From 0870ce78af9c363b6f9c8e6f097057931e0e4a90 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 14 Jan 2025 00:49:17 +0100 Subject: [PATCH 05/13] Regen cases. --- Python/executor_cases.c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d82a43693546df..1a21a4ca02b32e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3739,7 +3739,7 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #ifndef Py_GIL_DISABLED + #endif break; } From cf5fcd01ec39b5f0e8e57ba5ea24cc70eac5ef62 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 21 Jan 2025 13:32:26 +0100 Subject: [PATCH 06/13] Rework FOR_ITER specialization in the free-threaded build to only apply to uniquely referenced iterators. This handles the common case of 'for item in seq' (where 'seq' is a list, tuple or range object) and 'for item in generator_function()', but not, for example, 'g = gen(...); for item in g:'. --- Include/internal/pycore_list.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- .../test_iteration_deopt.py | 155 ++++++++++++++++++ Lib/test/test_opcache.py | 38 +++++ Objects/rangeobject.c | 28 ++-- Objects/tupleobject.c | 10 +- Python/bytecodes.c | 84 +++++----- Python/executor_cases.c.h | 72 ++++---- Python/generated_cases.c.h | 74 +++++---- Python/specialize.c | 21 ++- Tools/cases_generator/analyzer.py | 4 +- 11 files changed, 352 insertions(+), 138 deletions(-) create mode 100644 Lib/test/test_free_threading/test_iteration_deopt.py diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 9a995471f063d5..28bab04c6fdb7f 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -15,7 +15,7 @@ extern void _PyList_DebugMallocStats(FILE *out); extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); #ifdef Py_GIL_DISABLED // Returns -1 in case of races with other threads. -extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t i, PyObject ** result); +extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t, PyObject **); #endif #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6519b5c4bcb4bb..c18788df98e76b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -196,7 +196,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG, [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, - [_ITER_NEXT_TUPLE] = HAS_ARG_FLAG | HAS_JUMP_FLAG, + [_ITER_NEXT_TUPLE] = 0, [_ITER_CHECK_RANGE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_EXIT_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG, diff --git a/Lib/test/test_free_threading/test_iteration_deopt.py b/Lib/test/test_free_threading/test_iteration_deopt.py new file mode 100644 index 00000000000000..257fe99e3db102 --- /dev/null +++ b/Lib/test/test_free_threading/test_iteration_deopt.py @@ -0,0 +1,155 @@ +import dis +import queue +import threading +import time +import unittest +from test.support import (import_helper, cpython_only, Py_GIL_DISABLED, + requires_specialization_ft) + +_testinternalcapi = import_helper.import_module("_testinternalcapi") +_testlimitedcapi = import_helper.import_module("_testlimitedcapi") + +NUMTHREADS = 5 + +def get_tlbc_instructions(f): + co = dis._get_code_object(f) + tlbc = _testinternalcapi.get_tlbc(co) + return [i.opname for i in dis._get_instructions_bytes(tlbc)] + + +class IterationDeoptTests(unittest.TestCase): + def check_deopt(self, get_iter, opcode, is_generator=False): + input = range(100) + expected_len = len(input) + q = queue.Queue() + barrier = threading.Barrier(NUMTHREADS + 1) + done = threading.Event() + def worker(): + # A complicated dance to get a weak reference to an iterator + # _only_ (strongly) referenced by the for loop, so that we can + # force our loop to deopt mid-way through. + it = get_iter(input) + ref = _testlimitedcapi.pylong_fromvoidptr(it) + q.put(ref) + # We can't use enumerate without affecting the loop, so keep a + # manual counter. + i = 0 + loop_a_little_more = 5 + results = [] + try: + # Make sure we're not still specialized from a previous run. + ops = get_tlbc_instructions(worker) + self.assertIn('FOR_ITER', ops) + self.assertNotIn(opcode, ops) + for item in it: + results.append(item) + i += 1 + + # We have to be very careful exiting the loop, because + # if the main thread hasn't dereferenced the unsafe + # weakref to our iterator yet, exiting will make it + # invalid and cause a crash. Getting the timing right is + # difficult, though, since it depends on the OS + # scheduler and the system load. As a final safeguard, + # if we're close to finishing the loop, just wait for the + # main thread. + if i + loop_a_little_more > expected_len: + done.wait() + + if i == 1: + del it + # Warm up. The first iteration didn't count because of + # the extra reference to the iterator. + if i < 10: + continue + if i == 10: + ops = get_tlbc_instructions(worker) + self.assertIn(opcode, ops) + # Let the main thread know it's time to reference our iterator. + barrier.wait() + continue + # Continue iterating while at any time our loop may be + # forced to deopt, but try to get the thread scheduler + # to give the main thread a chance to run. + if not done.is_set(): + time.sleep(0) + continue + if loop_a_little_more: + # Loop a little more after 'done' is set to make sure we + # introduce a tsan-detectable race if the loop isn't + # deopting appropriately. + loop_a_little_more -= 1 + continue + break + self.assertEqual(results, list(input)[:i]) + except threading.BrokenBarrierError: + return + except Exception as e: + # In case the exception happened before the last barrier, + # reset it so nothing is left hanging. + barrier.reset() + # In case it's the final assertion that failed, just add it + # to the result queue so it'll show up in the normal test + # output. + q.put(e) + raise + q.put("SUCCESS") + # Reset specialization and thread-local bytecode from previous runs. + worker.__code__ = worker.__code__.replace() + threads = [threading.Thread(target=worker) for i in range(NUMTHREADS)] + for t in threads: + t.start() + # Get the "weakrefs" from the worker threads. + refs = [q.get() for i in range(NUMTHREADS)] + # Wait for each thread to finish its specialization check. + barrier.wait() + # Dereference the "weakrefs" we were sent in an extremely unsafe way. + iterators = [_testlimitedcapi.pylong_asvoidptr(ref) for ref in refs] + done.set() + self.assertNotIn(None, iterators) + # Read data that the iteration writes, to trigger data races if they + # don't deopt appropriately. + if is_generator: + for it in iterators: + it.gi_running + else: + for it in iterators: + it.__reduce__() + for t in threads: + t.join() + results = [q.get() for i in range(NUMTHREADS)] + self.assertEqual(results, ["SUCCESS"] * NUMTHREADS) + + @cpython_only + @requires_specialization_ft + @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") + def test_deopt_leaking_iterator_list(self): + def make_list_iter(input): + return iter(list(input)) + self.check_deopt(make_list_iter, 'FOR_ITER_LIST') + + @cpython_only + @requires_specialization_ft + @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") + def test_deopt_leaking_iterator_tuple(self): + def make_tuple_iter(input): + return iter(tuple(input)) + self.check_deopt(make_tuple_iter, 'FOR_ITER_TUPLE') + + @cpython_only + @requires_specialization_ft + @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") + def test_deopt_leaking_iterator_range(self): + def make_range_iter(input): + return iter(input) + self.check_deopt(make_range_iter, 'FOR_ITER_RANGE') + + @cpython_only + @requires_specialization_ft + @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") + def test_deopt_leaking_iterator_generator(self): + def gen(input): + for item in input: + yield item + self.check_deopt(gen, 'FOR_ITER_GEN', is_generator=True) + diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index c7cd4c2e8a3146..b6064cebf0dcac 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1631,6 +1631,44 @@ def compare_op_str(): self.assert_specialized(compare_op_str, "COMPARE_OP_STR") self.assert_no_opcode(compare_op_str, "COMPARE_OP") + @cpython_only + @requires_specialization_ft + def test_for_iter(self): + L = list(range(10)) + def for_iter_list(): + for i in L: + self.assertIn(i, L) + + for_iter_list() + self.assert_specialized(for_iter_list, "FOR_ITER_LIST") + self.assert_no_opcode(for_iter_list, "FOR_ITER") + + t = tuple(range(10)) + def for_iter_tuple(): + for i in t: + self.assertIn(i, t) + + for_iter_tuple() + self.assert_specialized(for_iter_tuple, "FOR_ITER_TUPLE") + self.assert_no_opcode(for_iter_tuple, "FOR_ITER") + + r = range(10) + def for_iter_range(): + for i in r: + self.assertIn(i, r) + + for_iter_range() + self.assert_specialized(for_iter_range, "FOR_ITER_RANGE") + self.assert_no_opcode(for_iter_range, "FOR_ITER") + + def for_iter_generator(): + for i in (i for i in range(10)): + i + 1 + + for_iter_generator() + self.assert_specialized(for_iter_generator, "FOR_ITER_GEN") + self.assert_no_opcode(for_iter_generator, "FOR_ITER") + if __name__ == "__main__": unittest.main() diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index ea6529cf99d18c..2942ab624edf72 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -7,7 +7,6 @@ #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_range.h" #include "pycore_tuple.h" // _PyTuple_ITEMS() -#include "pycore_pyatomic_ft_wrappers.h" /* Support objects whose length is > PY_SSIZE_T_MAX. @@ -817,12 +816,10 @@ PyTypeObject PyRange_Type = { static PyObject * rangeiter_next(_PyRangeIterObject *r) { - long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len); - if (len > 0) { - long result = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); - FT_ATOMIC_STORE_LONG_RELAXED(r->start, result + r->step); - // Relaxed ops for maximum speed and minimum thread-safety. - FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - 1); + if (r->len > 0) { + long result = r->start; + r->start = result + r->step; + r->len--; return PyLong_FromLong(result); } return NULL; @@ -831,7 +828,7 @@ rangeiter_next(_PyRangeIterObject *r) static PyObject * rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored)) { - return PyLong_FromLong(FT_ATOMIC_LOAD_LONG_RELAXED(r->len)); + return PyLong_FromLong(r->len); } PyDoc_STRVAR(length_hint_doc, @@ -844,11 +841,10 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored)) PyObject *range; /* create a range object for pickling */ - long lstart = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); - start = PyLong_FromLong(lstart); + start = PyLong_FromLong(r->start); if (start == NULL) goto err; - stop = PyLong_FromLong(lstart + FT_ATOMIC_LOAD_LONG_RELAXED(r->len) * r->step); + stop = PyLong_FromLong(r->start + r->len * r->step); if (stop == NULL) goto err; step = PyLong_FromLong(r->step); @@ -875,14 +871,12 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state) if (index == -1 && PyErr_Occurred()) return NULL; /* silently clip the index value */ - long len = FT_ATOMIC_LOAD_LONG_RELAXED(r->len); if (index < 0) index = 0; - else if (index > len) - index = len; /* exhausted iterator */ - FT_ATOMIC_STORE_LONG_RELAXED(r->start, - FT_ATOMIC_LOAD_LONG_RELAXED(r->start) + index * r->step); - FT_ATOMIC_STORE_LONG_RELAXED(r->len, len - index); + else if (index > r->len) + index = r->len; /* exhausted iterator */ + r->start += index * r->step; + r->len -= index; Py_RETURN_NONE; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 4e8afbce27681a..002002eb455556 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1020,15 +1020,13 @@ tupleiter_next(PyObject *self) return NULL; assert(PyTuple_Check(seq)); - Py_ssize_t idx = FT_ATOMIC_LOAD_SSIZE_RELAXED(it->it_index); - if ((size_t)idx < (size_t)PyTuple_GET_SIZE(seq)) { - item = PyTuple_GET_ITEM(seq, idx++); - FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, idx); + if (it->it_index < PyTuple_GET_SIZE(seq)) { + item = PyTuple_GET_ITEM(seq, it->it_index); + ++it->it_index; return Py_NewRef(item); } - FT_ATOMIC_STORE_SSIZE_RELAXED(it->it_index, -1); - FT_ATOMIC_STORE_PTR_RELAXED(it->it_seq, NULL); + it->it_seq = NULL; Py_DECREF(seq); return NULL; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 73764351f67591..3b477ba59481f5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3018,30 +3018,30 @@ dummy_func( PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); EXIT_IF(Py_TYPE(iter_o) != &PyListIter_Type); #ifdef Py_GIL_DISABLED + EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o)); _PyListIterObject *it = (_PyListIterObject *)iter_o; - EXIT_IF(it->it_seq == NULL || - !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + EXIT_IF(!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || !_PyObject_GC_IS_SHARED(it->it_seq)); #endif } replaced op(_ITER_JUMP_LIST, (iter -- iter)) { + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(iter_o) == &PyListIter_Type); // For free-threaded Python, the loop exit can happen at any point during item // retrieval, so separate ops don't make much sense. -#ifndef Py_GIL_DISABLED - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); +#ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); +#else _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { it->it_index = -1; - #ifndef Py_GIL_DISABLED if (seq != NULL) { it->it_seq = NULL; Py_DECREF(seq); } - #endif /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); @@ -3071,24 +3071,24 @@ dummy_func( PyListObject *seq = it->it_seq; assert(seq); #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); - Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); PyObject *item; - int result = _PyList_GetItemRefNoLock(it->it_seq, idx, &item); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); // A negative result means we lost a race with another thread // and we need to take the slow path. EXIT_IF(result < 0); if (result == 0) { - _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + it->it_index = -1; PyStackRef_CLOSE(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ JUMPBY(oparg + 2); DISPATCH(); } - _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + it->it_index++; next = PyStackRef_FromPyObjectSteal(item); #else assert(it->it_index < PyList_GET_SIZE(seq)); @@ -3103,7 +3103,11 @@ dummy_func( _ITER_NEXT_LIST; op(_ITER_CHECK_TUPLE, (iter -- iter)) { - EXIT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyTupleIter_Type); + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + EXIT_IF(Py_TYPE(iter_o) != &PyTupleIter_Type); +#ifdef Py_GIL_DISABLED + EXIT_IF(!_PyObject_IsUniquelyReferenced(iter_o)); +#endif } replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) { @@ -3111,20 +3115,23 @@ dummy_func( assert(Py_TYPE(iter_o) == &PyTupleIter_Type); // For free-threaded Python, the loop exit can happen at any point during item // retrieval, so separate ops don't make much sense. -#ifndef Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); +#endif _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; - if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { + if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) { +#ifndef Py_GIL_DISABLED if (seq != NULL) { it->it_seq = NULL; Py_DECREF(seq); } +#endif /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); } -#endif } // Only used by Tier 2 @@ -3132,11 +3139,12 @@ dummy_func( PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); -#ifndef Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); +#endif PyTupleObject *seq = it->it_seq; EXIT_IF(seq == NULL); EXIT_IF(it->it_index >= PyTuple_GET_SIZE(seq)); -#endif } op(_ITER_NEXT_TUPLE, (iter -- iter, next)) { @@ -3145,23 +3153,12 @@ dummy_func( assert(Py_TYPE(iter_o) == &PyTupleIter_Type); PyTupleObject *seq = it->it_seq; #ifdef Py_GIL_DISABLED - STAT_INC(FOR_ITER, hit); - Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); - if (seq == NULL || (size_t)idx >= (size_t)PyTuple_GET_SIZE(seq)) { - _Py_atomic_store_ssize_relaxed(&it->it_index, -1); - PyStackRef_CLOSE(iter); - STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ - JUMPBY(oparg + 2); - DISPATCH(); - } - _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, idx)); -#else + assert(_PyObject_IsUniquelyReferenced(iter_o)); +#endif assert(seq); + assert(it->it_index >= 0); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); -#endif } macro(FOR_ITER_TUPLE) = @@ -3173,13 +3170,19 @@ dummy_func( op(_ITER_CHECK_RANGE, (iter -- iter)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); EXIT_IF(Py_TYPE(r) != &PyRangeIter_Type); +#ifdef Py_GIL_DISABLED + EXIT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)r)); +#endif } replaced op(_ITER_JUMP_RANGE, (iter -- iter)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); +#ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced((PyObject *)r)); +#endif STAT_INC(FOR_ITER, hit); - if (FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0) { + if (r->len <= 0) { // Jump over END_FOR instruction. JUMPBY(oparg + 1); DISPATCH(); @@ -3190,19 +3193,19 @@ dummy_func( op(_GUARD_NOT_EXHAUSTED_RANGE, (iter -- iter)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); - EXIT_IF(FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0); + EXIT_IF(r->len <= 0); } op(_ITER_NEXT_RANGE, (iter -- iter, next)) { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); -#ifndef Py_GIL_DISABLED - assert(r->len > 0); +#ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced((PyObject *)r)); #endif - long value = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); - FT_ATOMIC_STORE_LONG_RELAXED(r->start, value + r->step); - FT_ATOMIC_STORE_LONG_RELAXED(r->len, - FT_ATOMIC_LOAD_LONG_RELAXED(r->len) - 1); + assert(r->len > 0); + long value = r->start; + r->start = value + r->step; + r->len--; PyObject *res = PyLong_FromLong(value); ERROR_IF(res == NULL, error); next = PyStackRef_FromPyObjectSteal(res); @@ -3217,6 +3220,9 @@ dummy_func( op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) { PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); +#ifdef Py_GIL_DISABLED + DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)gen)); +#endif DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(FOR_ITER, hit); gen_frame = &gen->gi_iframe; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1a21a4ca02b32e..e9e96201d38b0e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3632,6 +3632,10 @@ JUMP_TO_JUMP_TARGET(); } #ifdef Py_GIL_DISABLED + if (!_PyObject_IsUniquelyReferenced(iter_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } _PyListIterObject *it = (_PyListIterObject *)iter_o; if (it->it_seq == NULL || !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || @@ -3679,12 +3683,12 @@ PyListObject *seq = it->it_seq; assert(seq); #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); - Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); PyObject *item; - int result = _PyList_GetItemRefNoLock(it->it_seq, idx, &item); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); // A negative result means we lost a race with another thread // and we need to take the slow path. if (result < 0) { @@ -3692,14 +3696,14 @@ JUMP_TO_JUMP_TARGET(); } if (result == 0) { - _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + it->it_index = -1; PyStackRef_CLOSE(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ JUMPBY(oparg + 2); DISPATCH(); } - _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + it->it_index++; next = PyStackRef_FromPyObjectSteal(item); #else assert(it->it_index < PyList_GET_SIZE(seq)); @@ -3714,10 +3718,17 @@ case _ITER_CHECK_TUPLE: { _PyStackRef iter; iter = stack_pointer[-1]; - if (Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyTupleIter_Type) { + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + if (Py_TYPE(iter_o) != &PyTupleIter_Type) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + if (!_PyObject_IsUniquelyReferenced(iter_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #endif break; } @@ -3729,7 +3740,9 @@ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - #ifndef Py_GIL_DISABLED + #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); + #endif PyTupleObject *seq = it->it_seq; if (seq == NULL) { UOP_STAT_INC(uopcode, miss); @@ -3739,37 +3752,24 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #endif break; } case _ITER_NEXT_TUPLE: { _PyStackRef iter; _PyStackRef next; - oparg = CURRENT_OPARG(); iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); PyTupleObject *seq = it->it_seq; #ifdef Py_GIL_DISABLED - STAT_INC(FOR_ITER, hit); - Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); - if (seq == NULL || (size_t)idx >= (size_t)PyTuple_GET_SIZE(seq)) { - _Py_atomic_store_ssize_relaxed(&it->it_index, -1); - PyStackRef_CLOSE(iter); - STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ - JUMPBY(oparg + 2); - DISPATCH(); - } - _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, idx)); - #else + assert(_PyObject_IsUniquelyReferenced(iter_o)); + #endif assert(seq); + assert(it->it_index >= 0); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); - #endif stack_pointer[0] = next; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -3784,6 +3784,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + if (!_PyObject_IsUniquelyReferenced((PyObject *)r)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #endif break; } @@ -3794,7 +3800,7 @@ iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); - if (FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0) { + if (r->len <= 0) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -3807,13 +3813,13 @@ iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); - #ifndef Py_GIL_DISABLED - assert(r->len > 0); + #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced((PyObject *)r)); #endif - long value = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); - FT_ATOMIC_STORE_LONG_RELAXED(r->start, value + r->step); - FT_ATOMIC_STORE_LONG_RELAXED(r->len, - FT_ATOMIC_LOAD_LONG_RELAXED(r->len) - 1); + assert(r->len > 0); + long value = r->start; + r->start = value + r->step; + r->len--; PyObject *res = PyLong_FromLong(value); if (res == NULL) JUMP_TO_ERROR(); next = PyStackRef_FromPyObjectSteal(res); @@ -3833,6 +3839,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #endif if (gen->gi_frame_state >= FRAME_EXECUTING) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 74883fafe96b57..6a8b33e5debc31 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4008,6 +4008,9 @@ iter = stack_pointer[-1]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); + #ifdef Py_GIL_DISABLED + DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)gen), FOR_ITER); + #endif DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); STAT_INC(FOR_ITER, hit); gen_frame = &gen->gi_iframe; @@ -4052,6 +4055,7 @@ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(iter_o) != &PyListIter_Type, FOR_ITER); #ifdef Py_GIL_DISABLED + DEOPT_IF(!_PyObject_IsUniquelyReferenced(iter_o), FOR_ITER); _PyListIterObject *it = (_PyListIterObject *)iter_o; DEOPT_IF(it->it_seq == NULL || !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || @@ -4060,12 +4064,14 @@ } // _ITER_JUMP_LIST { + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + assert(Py_TYPE(iter_o) == &PyListIter_Type); // For free-threaded Python, the loop exit can happen at any point during item // retrieval, so separate ops don't make much sense. - #ifndef Py_GIL_DISABLED - PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); + #else _PyListIterObject *it = (_PyListIterObject *)iter_o; - assert(Py_TYPE(iter_o) == &PyListIter_Type); STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { @@ -4090,24 +4096,24 @@ PyListObject *seq = it->it_seq; assert(seq); #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || - _PyObject_GC_IS_SHARED(seq)); + _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); - Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); PyObject *item; - int result = _PyList_GetItemRefNoLock(it->it_seq, idx, &item); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); // A negative result means we lost a race with another thread // and we need to take the slow path. DEOPT_IF(result < 0, FOR_ITER); if (result == 0) { - _Py_atomic_store_ssize_relaxed(&it->it_index, -1); + it->it_index = -1; PyStackRef_CLOSE(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ JUMPBY(oparg + 2); DISPATCH(); } - _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); + it->it_index++; next = PyStackRef_FromPyObjectSteal(item); #else assert(it->it_index < PyList_GET_SIZE(seq)); @@ -4133,13 +4139,19 @@ iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); + #ifdef Py_GIL_DISABLED + DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)r), FOR_ITER); + #endif } // _ITER_JUMP_RANGE { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); + #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced((PyObject *)r)); + #endif STAT_INC(FOR_ITER, hit); - if (FT_ATOMIC_LOAD_LONG_RELAXED(r->len) <= 0) { + if (r->len <= 0) { // Jump over END_FOR instruction. JUMPBY(oparg + 1); DISPATCH(); @@ -4149,13 +4161,13 @@ { _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(r) == &PyRangeIter_Type); - #ifndef Py_GIL_DISABLED - assert(r->len > 0); + #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced((PyObject *)r)); #endif - long value = FT_ATOMIC_LOAD_LONG_RELAXED(r->start); - FT_ATOMIC_STORE_LONG_RELAXED(r->start, value + r->step); - FT_ATOMIC_STORE_LONG_RELAXED(r->len, - FT_ATOMIC_LOAD_LONG_RELAXED(r->len) - 1); + assert(r->len > 0); + long value = r->start; + r->start = value + r->step; + r->len--; PyObject *res = PyLong_FromLong(value); if (res == NULL) goto error; next = PyStackRef_FromPyObjectSteal(res); @@ -4177,7 +4189,11 @@ // _ITER_CHECK_TUPLE { iter = stack_pointer[-1]; - DEOPT_IF(Py_TYPE(PyStackRef_AsPyObjectBorrow(iter)) != &PyTupleIter_Type, FOR_ITER); + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + DEOPT_IF(Py_TYPE(iter_o) != &PyTupleIter_Type, FOR_ITER); + #ifdef Py_GIL_DISABLED + DEOPT_IF(!_PyObject_IsUniquelyReferenced(iter_o), FOR_ITER); + #endif } // _ITER_JUMP_TUPLE { @@ -4185,20 +4201,23 @@ assert(Py_TYPE(iter_o) == &PyTupleIter_Type); // For free-threaded Python, the loop exit can happen at any point during item // retrieval, so separate ops don't make much sense. - #ifndef Py_GIL_DISABLED + #ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); + #endif _PyTupleIterObject *it = (_PyTupleIterObject *)iter_o; STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; - if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { + if (seq == NULL || (size_t)it->it_index >= (size_t)PyTuple_GET_SIZE(seq)) { + #ifndef Py_GIL_DISABLED if (seq != NULL) { it->it_seq = NULL; Py_DECREF(seq); } + #endif /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); } - #endif } // _ITER_NEXT_TUPLE { @@ -4207,23 +4226,12 @@ assert(Py_TYPE(iter_o) == &PyTupleIter_Type); PyTupleObject *seq = it->it_seq; #ifdef Py_GIL_DISABLED - STAT_INC(FOR_ITER, hit); - Py_ssize_t idx = _Py_atomic_load_ssize_relaxed(&it->it_index); - if (seq == NULL || (size_t)idx >= (size_t)PyTuple_GET_SIZE(seq)) { - _Py_atomic_store_ssize_relaxed(&it->it_index, -1); - PyStackRef_CLOSE(iter); - STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ - JUMPBY(oparg + 2); - DISPATCH(); - } - _Py_atomic_store_ssize_relaxed(&it->it_index, idx + 1); - next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, idx)); - #else + assert(_PyObject_IsUniquelyReferenced(iter_o)); + #endif assert(seq); + assert(it->it_index >= 0); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); - #endif } stack_pointer[0] = next; stack_pointer += 1; diff --git a/Python/specialize.c b/Python/specialize.c index f3eedd24df7c0e..74251b1ab6db3a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2651,6 +2651,15 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) assert(_PyOpcode_Caches[FOR_ITER] == INLINE_CACHE_ENTRIES_FOR_ITER); PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); PyTypeObject *tp = Py_TYPE(iter_o); +#ifdef Py_GIL_DISABLED + // Only specialize for uniquely referenced iterators, so that we know + // they're only referenced by this one thread. This is more limiting + // than we need (event `it = iter(mylist); for item in it:` won't get + // specialized) but we don't have a way to check whether we're the only + // _thread_ who has access to the object. + if (!_PyObject_IsUniquelyReferenced(iter_o)) + goto failure; +#endif if (tp == &PyListIter_Type) { #ifdef Py_GIL_DISABLED _PyListIterObject *it = (_PyListIterObject *)iter_o; @@ -2658,9 +2667,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) !_PyObject_GC_IS_SHARED(it->it_seq)) { // Maybe this should just set GC_IS_SHARED in a critical // section, instead of leaving it to the first iteration? - SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_ITER_LIST); - unspecialize(instr); - return; + goto failure; } #endif specialize(instr, FOR_ITER_LIST); @@ -2681,14 +2688,12 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR ); /* Don't specialize if PEP 523 is active */ - if (_PyInterpreterState_GET()->eval_frame) { - SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER); - unspecialize(instr); - return; - } + if (_PyInterpreterState_GET()->eval_frame) + goto failure; specialize(instr, FOR_ITER_GEN); return; } +failure: SPECIALIZATION_FAIL(FOR_ITER, _PySpecialization_ClassifyIterator(iter_o)); unspecialize(instr); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b743ad508c2b42..1dda7862d68a40 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -606,13 +606,13 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyLong_Multiply", "_PyLong_Subtract", "_PyManagedDictPointer_IsValues", - "_PyObject_CAST", "_PyObject_GC_IS_SHARED", "_PyObject_GC_IS_TRACKED", "_PyObject_GC_MAY_BE_TRACKED", "_PyObject_GC_TRACK", "_PyObject_GetManagedDict", "_PyObject_InlineValues", + "_PyObject_IsUniquelyReferenced", "_PyObject_ManagedDictPointer", "_PyThreadState_HasStackSpace", "_PyTuple_FromArraySteal", @@ -637,9 +637,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_TryIncrefCompare", "_Py_TryIncrefCompareStackRef", "_Py_atomic_load_ptr_acquire", - "_Py_atomic_load_ssize_relaxed", "_Py_atomic_load_uintptr_relaxed", - "_Py_atomic_store_ssize_relaxed", "_Py_set_eval_breaker_bit", "advance_backoff_counter", "assert", From bb495b05f9c1a3d5224b88587af0682c0f29a12c Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 21 Jan 2025 16:05:24 +0100 Subject: [PATCH 07/13] Fix whitespace for linter. --- Lib/test/test_free_threading/test_iteration_deopt.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_free_threading/test_iteration_deopt.py b/Lib/test/test_free_threading/test_iteration_deopt.py index 257fe99e3db102..47549502163afb 100644 --- a/Lib/test/test_free_threading/test_iteration_deopt.py +++ b/Lib/test/test_free_threading/test_iteration_deopt.py @@ -127,7 +127,7 @@ def test_deopt_leaking_iterator_list(self): def make_list_iter(input): return iter(list(input)) self.check_deopt(make_list_iter, 'FOR_ITER_LIST') - + @cpython_only @requires_specialization_ft @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") @@ -135,7 +135,7 @@ def test_deopt_leaking_iterator_tuple(self): def make_tuple_iter(input): return iter(tuple(input)) self.check_deopt(make_tuple_iter, 'FOR_ITER_TUPLE') - + @cpython_only @requires_specialization_ft @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") @@ -143,7 +143,7 @@ def test_deopt_leaking_iterator_range(self): def make_range_iter(input): return iter(input) self.check_deopt(make_range_iter, 'FOR_ITER_RANGE') - + @cpython_only @requires_specialization_ft @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") @@ -152,4 +152,3 @@ def gen(input): for item in input: yield item self.check_deopt(gen, 'FOR_ITER_GEN', is_generator=True) - From 940b7c984a9638c81ff2710f71ae88209c7b3d81 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 21 Jan 2025 16:31:07 +0100 Subject: [PATCH 08/13] Incorporate changes from #128445 into the free-threaded branch of FOR_ITER_LIST specialization. --- Python/bytecodes.c | 6 ++---- Python/executor_cases.c.h | 9 +++------ Python/generated_cases.c.h | 11 +++-------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3b477ba59481f5..0fc248dd26fb73 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3082,10 +3082,8 @@ dummy_func( EXIT_IF(result < 0); if (result == 0) { it->it_index = -1; - PyStackRef_CLOSE(iter); - STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ - JUMPBY(oparg + 2); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); DISPATCH(); } it->it_index++; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e9e96201d38b0e..3397454156ee1e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3637,8 +3637,7 @@ JUMP_TO_JUMP_TARGET(); } _PyListIterObject *it = (_PyListIterObject *)iter_o; - if (it->it_seq == NULL || - !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || !_PyObject_GC_IS_SHARED(it->it_seq)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -3697,10 +3696,8 @@ } if (result == 0) { it->it_index = -1; - PyStackRef_CLOSE(iter); - STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ - JUMPBY(oparg + 2); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); DISPATCH(); } it->it_index++; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6a8b33e5debc31..c9f77804796f44 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4057,8 +4057,7 @@ #ifdef Py_GIL_DISABLED DEOPT_IF(!_PyObject_IsUniquelyReferenced(iter_o), FOR_ITER); _PyListIterObject *it = (_PyListIterObject *)iter_o; - DEOPT_IF(it->it_seq == NULL || - !_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + DEOPT_IF(!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || !_PyObject_GC_IS_SHARED(it->it_seq), FOR_ITER); #endif } @@ -4076,12 +4075,10 @@ PyListObject *seq = it->it_seq; if (seq == NULL || (size_t)it->it_index >= (size_t)PyList_GET_SIZE(seq)) { it->it_index = -1; - #ifndef Py_GIL_DISABLED if (seq != NULL) { it->it_seq = NULL; Py_DECREF(seq); } - #endif /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); DISPATCH(); @@ -4107,10 +4104,8 @@ DEOPT_IF(result < 0, FOR_ITER); if (result == 0) { it->it_index = -1; - PyStackRef_CLOSE(iter); - STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ - JUMPBY(oparg + 2); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); DISPATCH(); } it->it_index++; From a800d754c6aab5ce3c6b50b2106f1968a8160d6b Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 21 Jan 2025 16:34:43 +0100 Subject: [PATCH 09/13] Drop redundant assert. --- Python/bytecodes.c | 1 - Python/executor_cases.c.h | 1 - Python/generated_cases.c.h | 1 - 3 files changed, 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0fc248dd26fb73..05039de403ae48 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3154,7 +3154,6 @@ dummy_func( assert(_PyObject_IsUniquelyReferenced(iter_o)); #endif assert(seq); - assert(it->it_index >= 0); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3397454156ee1e..2695e903d641b1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3764,7 +3764,6 @@ assert(_PyObject_IsUniquelyReferenced(iter_o)); #endif assert(seq); - assert(it->it_index >= 0); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); stack_pointer[0] = next; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c9f77804796f44..0e0c1099c558a7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4224,7 +4224,6 @@ assert(_PyObject_IsUniquelyReferenced(iter_o)); #endif assert(seq); - assert(it->it_index >= 0); assert(it->it_index < PyTuple_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, it->it_index++)); } From 1781cad167dac5064493e5fc052c36c2dc1a353f Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Fri, 24 Jan 2025 14:21:55 +0100 Subject: [PATCH 10/13] Don't mark _PyList_GetItemRefNoLock as non-escaping. --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/executor_cases.c.h | 2 ++ Python/generated_cases.c.h | 2 ++ Tools/cases_generator/analyzer.py | 1 - 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 90d5e277d8d6ce..06cf525c48187f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2055,7 +2055,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, + [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index c18788df98e76b..7612fc9846de16 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -193,7 +193,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FOR_ITER_TIER_TWO] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_LIST] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG, - [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG, + [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, [_ITER_NEXT_TUPLE] = 0, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2695e903d641b1..7d3667f387bdf8 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3687,7 +3687,9 @@ _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); PyObject *item; + _PyFrame_SetStackPointer(frame, stack_pointer); int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); + stack_pointer = _PyFrame_GetStackPointer(frame); // A negative result means we lost a race with another thread // and we need to take the slow path. if (result < 0) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0e0c1099c558a7..bc229929c453f8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4098,7 +4098,9 @@ _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); PyObject *item; + _PyFrame_SetStackPointer(frame, stack_pointer); int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); + stack_pointer = _PyFrame_GetStackPointer(frame); // A negative result means we lost a race with another thread // and we need to take the slow path. DEOPT_IF(result < 0, FOR_ITER); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 1dda7862d68a40..8f361900d2dffa 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -594,7 +594,6 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyInterpreterState_GET", "_PyList_AppendTakeRef", "_PyList_FromStackRefSteal", - "_PyList_GetItemRefNoLock", "_PyList_ITEMS", "_PyLong_Add", "_PyLong_CompactValue", From 358199af22f1646ef4bf864a28f713bd235929cc Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Sun, 26 Jan 2025 22:21:04 +0100 Subject: [PATCH 11/13] Address reviewer comments, and drop test_iteration_deopt. --- Include/internal/pycore_list.h | 6 +- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/libregrtest/tsan.py | 1 - .../test_iteration_deopt.py | 154 ------------------ Objects/listobject.c | 6 +- Python/bytecodes.c | 19 ++- Python/executor_cases.c.h | 8 +- Python/generated_cases.c.h | 17 +- Python/specialize.c | 2 +- 10 files changed, 37 insertions(+), 180 deletions(-) delete mode 100644 Lib/test/test_free_threading/test_iteration_deopt.py diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 28bab04c6fdb7f..f13a3bf34b0a4f 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -8,6 +8,10 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#ifdef Py_GIL_DISABLED +#include "pycore_stackref.h" +#endif + PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); // _PyList_GetItemRef should be used only when the object is known as a list @@ -15,7 +19,7 @@ extern void _PyList_DebugMallocStats(FILE *out); extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); #ifdef Py_GIL_DISABLED // Returns -1 in case of races with other threads. -extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t, PyObject **); +extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t, _PyStackRef *); #endif #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 06cf525c48187f..e955abc6a64f9e 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2055,7 +2055,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7612fc9846de16..20d23e2625037b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -193,7 +193,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FOR_ITER_TIER_TWO] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_LIST] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG, - [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, + [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, [_ITER_NEXT_TUPLE] = 0, diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index 5f83cd03067274..00d5779d950e72 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -26,7 +26,6 @@ 'test_threadsignals', 'test_weakref', 'test_free_threading.test_slots', - 'test_free_threading.test_iteration', ] diff --git a/Lib/test/test_free_threading/test_iteration_deopt.py b/Lib/test/test_free_threading/test_iteration_deopt.py deleted file mode 100644 index 47549502163afb..00000000000000 --- a/Lib/test/test_free_threading/test_iteration_deopt.py +++ /dev/null @@ -1,154 +0,0 @@ -import dis -import queue -import threading -import time -import unittest -from test.support import (import_helper, cpython_only, Py_GIL_DISABLED, - requires_specialization_ft) - -_testinternalcapi = import_helper.import_module("_testinternalcapi") -_testlimitedcapi = import_helper.import_module("_testlimitedcapi") - -NUMTHREADS = 5 - -def get_tlbc_instructions(f): - co = dis._get_code_object(f) - tlbc = _testinternalcapi.get_tlbc(co) - return [i.opname for i in dis._get_instructions_bytes(tlbc)] - - -class IterationDeoptTests(unittest.TestCase): - def check_deopt(self, get_iter, opcode, is_generator=False): - input = range(100) - expected_len = len(input) - q = queue.Queue() - barrier = threading.Barrier(NUMTHREADS + 1) - done = threading.Event() - def worker(): - # A complicated dance to get a weak reference to an iterator - # _only_ (strongly) referenced by the for loop, so that we can - # force our loop to deopt mid-way through. - it = get_iter(input) - ref = _testlimitedcapi.pylong_fromvoidptr(it) - q.put(ref) - # We can't use enumerate without affecting the loop, so keep a - # manual counter. - i = 0 - loop_a_little_more = 5 - results = [] - try: - # Make sure we're not still specialized from a previous run. - ops = get_tlbc_instructions(worker) - self.assertIn('FOR_ITER', ops) - self.assertNotIn(opcode, ops) - for item in it: - results.append(item) - i += 1 - - # We have to be very careful exiting the loop, because - # if the main thread hasn't dereferenced the unsafe - # weakref to our iterator yet, exiting will make it - # invalid and cause a crash. Getting the timing right is - # difficult, though, since it depends on the OS - # scheduler and the system load. As a final safeguard, - # if we're close to finishing the loop, just wait for the - # main thread. - if i + loop_a_little_more > expected_len: - done.wait() - - if i == 1: - del it - # Warm up. The first iteration didn't count because of - # the extra reference to the iterator. - if i < 10: - continue - if i == 10: - ops = get_tlbc_instructions(worker) - self.assertIn(opcode, ops) - # Let the main thread know it's time to reference our iterator. - barrier.wait() - continue - # Continue iterating while at any time our loop may be - # forced to deopt, but try to get the thread scheduler - # to give the main thread a chance to run. - if not done.is_set(): - time.sleep(0) - continue - if loop_a_little_more: - # Loop a little more after 'done' is set to make sure we - # introduce a tsan-detectable race if the loop isn't - # deopting appropriately. - loop_a_little_more -= 1 - continue - break - self.assertEqual(results, list(input)[:i]) - except threading.BrokenBarrierError: - return - except Exception as e: - # In case the exception happened before the last barrier, - # reset it so nothing is left hanging. - barrier.reset() - # In case it's the final assertion that failed, just add it - # to the result queue so it'll show up in the normal test - # output. - q.put(e) - raise - q.put("SUCCESS") - # Reset specialization and thread-local bytecode from previous runs. - worker.__code__ = worker.__code__.replace() - threads = [threading.Thread(target=worker) for i in range(NUMTHREADS)] - for t in threads: - t.start() - # Get the "weakrefs" from the worker threads. - refs = [q.get() for i in range(NUMTHREADS)] - # Wait for each thread to finish its specialization check. - barrier.wait() - # Dereference the "weakrefs" we were sent in an extremely unsafe way. - iterators = [_testlimitedcapi.pylong_asvoidptr(ref) for ref in refs] - done.set() - self.assertNotIn(None, iterators) - # Read data that the iteration writes, to trigger data races if they - # don't deopt appropriately. - if is_generator: - for it in iterators: - it.gi_running - else: - for it in iterators: - it.__reduce__() - for t in threads: - t.join() - results = [q.get() for i in range(NUMTHREADS)] - self.assertEqual(results, ["SUCCESS"] * NUMTHREADS) - - @cpython_only - @requires_specialization_ft - @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") - def test_deopt_leaking_iterator_list(self): - def make_list_iter(input): - return iter(list(input)) - self.check_deopt(make_list_iter, 'FOR_ITER_LIST') - - @cpython_only - @requires_specialization_ft - @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") - def test_deopt_leaking_iterator_tuple(self): - def make_tuple_iter(input): - return iter(tuple(input)) - self.check_deopt(make_tuple_iter, 'FOR_ITER_TUPLE') - - @cpython_only - @requires_specialization_ft - @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") - def test_deopt_leaking_iterator_range(self): - def make_range_iter(input): - return iter(input) - self.check_deopt(make_range_iter, 'FOR_ITER_RANGE') - - @cpython_only - @requires_specialization_ft - @unittest.skipIf(not Py_GIL_DISABLED, "requires free-threading") - def test_deopt_leaking_iterator_generator(self): - def gen(input): - for item in input: - yield item - self.check_deopt(gen, 'FOR_ITER_GEN', is_generator=True) diff --git a/Objects/listobject.c b/Objects/listobject.c index 8d969c3e975ba8..1d21f12a6b0860 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -417,7 +417,7 @@ _PyList_GetItemRef(PyListObject *list, Py_ssize_t i) #ifdef Py_GIL_DISABLED int -_PyList_GetItemRefNoLock(PyListObject *list, Py_ssize_t i, PyObject **result) +_PyList_GetItemRefNoLock(PyListObject *list, Py_ssize_t i, _PyStackRef *result) { assert(_Py_IsOwnedByCurrentThread((PyObject *)list) || _PyObject_GC_IS_SHARED(list)); @@ -433,8 +433,8 @@ _PyList_GetItemRefNoLock(PyListObject *list, Py_ssize_t i, PyObject **result) if (!valid_index(i, cap)) { return 0; } - *result = _Py_TryXGetRef(&ob_item[i]); - if (*result == NULL) { + PyObject *obj = _Py_atomic_load_ptr(&ob_item[i]); + if (obj == NULL || !_Py_TryIncrefCompareStackRef(&ob_item[i], obj, result)) { return -1; } return 1; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 05039de403ae48..5c3326b0dc1871 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3028,8 +3028,10 @@ dummy_func( replaced op(_ITER_JUMP_LIST, (iter -- iter)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(iter_o) == &PyListIter_Type); -// For free-threaded Python, the loop exit can happen at any point during item -// retrieval, so separate ops don't make much sense. +// For free-threaded Python, the loop exit can happen at any point during +// item retrieval, so it doesn't make much sense to check and jump +// separately before item retrieval. Any length check we do here can be +// invalid by the time we actually try to fetch the item. #ifdef Py_GIL_DISABLED assert(_PyObject_IsUniquelyReferenced(iter_o)); #else @@ -3075,11 +3077,10 @@ dummy_func( assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); - PyObject *item; - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); // A negative result means we lost a race with another thread // and we need to take the slow path. - EXIT_IF(result < 0); + DEOPT_IF(result < 0); if (result == 0) { it->it_index = -1; /* Jump forward oparg, then skip following END_FOR instruction */ @@ -3087,7 +3088,6 @@ dummy_func( DISPATCH(); } it->it_index++; - next = PyStackRef_FromPyObjectSteal(item); #else assert(it->it_index < PyList_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); @@ -3110,9 +3110,8 @@ dummy_func( replaced op(_ITER_JUMP_TUPLE, (iter -- iter)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + (void)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); -// For free-threaded Python, the loop exit can happen at any point during item -// retrieval, so separate ops don't make much sense. #ifdef Py_GIL_DISABLED assert(_PyObject_IsUniquelyReferenced(iter_o)); #endif @@ -3218,6 +3217,10 @@ dummy_func( PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); #ifdef Py_GIL_DISABLED + // Since generators can't be used by multiple threads anyway we + // don't need to deopt here, but this lets us work on making + // generators thread-safe without necessarily having to + // specialize them thread-safely as well. DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)gen)); #endif DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7d3667f387bdf8..fdd5f67b7333c3 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3686,9 +3686,8 @@ assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); - PyObject *item; _PyFrame_SetStackPointer(frame, stack_pointer); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); stack_pointer = _PyFrame_GetStackPointer(frame); // A negative result means we lost a race with another thread // and we need to take the slow path. @@ -3703,7 +3702,6 @@ DISPATCH(); } it->it_index++; - next = PyStackRef_FromPyObjectSteal(item); #else assert(it->it_index < PyList_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); @@ -3838,6 +3836,10 @@ JUMP_TO_JUMP_TARGET(); } #ifdef Py_GIL_DISABLED + // Since generators can't be used by multiple threads anyway we + // don't need to deopt here, but this lets us work on making + // generators thread-safe without necessarily having to + // specialize them thread-safely as well. if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bc229929c453f8..9b16dc155c25d3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4009,6 +4009,10 @@ PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); #ifdef Py_GIL_DISABLED + // Since generators can't be used by multiple threads anyway we + // don't need to deopt here, but this lets us work on making + // generators thread-safe without necessarily having to + // specialize them thread-safely as well. DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)gen), FOR_ITER); #endif DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); @@ -4065,8 +4069,10 @@ { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(iter_o) == &PyListIter_Type); - // For free-threaded Python, the loop exit can happen at any point during item - // retrieval, so separate ops don't make much sense. + // For free-threaded Python, the loop exit can happen at any point during + // item retrieval, so it doesn't make much sense to check and jump + // separately before item retrieval. Any length check we do here can be + // invalid by the time we actually try to fetch the item. #ifdef Py_GIL_DISABLED assert(_PyObject_IsUniquelyReferenced(iter_o)); #else @@ -4097,9 +4103,8 @@ assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || _PyObject_GC_IS_SHARED(seq)); STAT_INC(FOR_ITER, hit); - PyObject *item; _PyFrame_SetStackPointer(frame, stack_pointer); - int result = _PyList_GetItemRefNoLock(seq, it->it_index, &item); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); stack_pointer = _PyFrame_GetStackPointer(frame); // A negative result means we lost a race with another thread // and we need to take the slow path. @@ -4111,7 +4116,6 @@ DISPATCH(); } it->it_index++; - next = PyStackRef_FromPyObjectSteal(item); #else assert(it->it_index < PyList_GET_SIZE(seq)); next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); @@ -4195,9 +4199,8 @@ // _ITER_JUMP_TUPLE { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + (void)iter_o; assert(Py_TYPE(iter_o) == &PyTupleIter_Type); - // For free-threaded Python, the loop exit can happen at any point during item - // retrieval, so separate ops don't make much sense. #ifdef Py_GIL_DISABLED assert(_PyObject_IsUniquelyReferenced(iter_o)); #endif diff --git a/Python/specialize.c b/Python/specialize.c index 74251b1ab6db3a..1df1131531c4e4 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2654,7 +2654,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) #ifdef Py_GIL_DISABLED // Only specialize for uniquely referenced iterators, so that we know // they're only referenced by this one thread. This is more limiting - // than we need (event `it = iter(mylist); for item in it:` won't get + // than we need (even `it = iter(mylist); for item in it:` won't get // specialized) but we don't have a way to check whether we're the only // _thread_ who has access to the object. if (!_PyObject_IsUniquelyReferenced(iter_o)) From 07d30334379ad4c92aefe95cf7adfeefc0cf5e14 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 18 Feb 2025 23:13:15 +0100 Subject: [PATCH 12/13] Regenerate cases after merge. --- Python/generated_cases.c.h | 68 +++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c75fc4b0aa6044..1de68b13d76772 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5506,15 +5506,27 @@ { iter = stack_pointer[-1]; PyGenObject *gen = (PyGenObject *)PyStackRef_AsPyObjectBorrow(iter); - DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); + if (Py_TYPE(gen) != &PyGen_Type) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #ifdef Py_GIL_DISABLED // Since generators can't be used by multiple threads anyway we // don't need to deopt here, but this lets us work on making // generators thread-safe without necessarily having to // specialize them thread-safely as well. - DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)gen), FOR_ITER); + if (!_PyObject_IsUniquelyReferenced((PyObject *)gen)) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #endif - DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); + if (gen->gi_frame_state >= FRAME_EXECUTING) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } STAT_INC(FOR_ITER, hit); gen_frame = &gen->gi_iframe; _PyFrame_StackPush(gen_frame, PyStackRef_None); @@ -5562,12 +5574,24 @@ { iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - DEOPT_IF(Py_TYPE(iter_o) != &PyListIter_Type, FOR_ITER); + if (Py_TYPE(iter_o) != &PyListIter_Type) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #ifdef Py_GIL_DISABLED - DEOPT_IF(!_PyObject_IsUniquelyReferenced(iter_o), FOR_ITER); + if (!_PyObject_IsUniquelyReferenced(iter_o)) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } _PyListIterObject *it = (_PyListIterObject *)iter_o; - DEOPT_IF(!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || - !_PyObject_GC_IS_SHARED(it->it_seq), FOR_ITER); + if (!_Py_IsOwnedByCurrentThread((PyObject *)it->it_seq) || + !_PyObject_GC_IS_SHARED(it->it_seq)) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #endif } // _ITER_JUMP_LIST @@ -5615,7 +5639,11 @@ stack_pointer = _PyFrame_GetStackPointer(frame); // A negative result means we lost a race with another thread // and we need to take the slow path. - DEOPT_IF(result < 0, FOR_ITER); + if (result < 0) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } if (result == 0) { it->it_index = -1; /* Jump forward oparg, then skip following END_FOR instruction */ @@ -5652,9 +5680,17 @@ { iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)PyStackRef_AsPyObjectBorrow(iter); - DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); + if (Py_TYPE(r) != &PyRangeIter_Type) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #ifdef Py_GIL_DISABLED - DEOPT_IF(!_PyObject_IsUniquelyReferenced((PyObject *)r), FOR_ITER); + if (!_PyObject_IsUniquelyReferenced((PyObject *)r)) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #endif } // _ITER_JUMP_RANGE @@ -5712,9 +5748,17 @@ { iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - DEOPT_IF(Py_TYPE(iter_o) != &PyTupleIter_Type, FOR_ITER); + if (Py_TYPE(iter_o) != &PyTupleIter_Type) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #ifdef Py_GIL_DISABLED - DEOPT_IF(!_PyObject_IsUniquelyReferenced(iter_o), FOR_ITER); + if (!_PyObject_IsUniquelyReferenced(iter_o)) { + UPDATE_MISS_STATS(FOR_ITER); + assert(_PyOpcode_Deopt[opcode] == (FOR_ITER)); + JUMP_TO_PREDICTED(FOR_ITER); + } #endif } // _ITER_JUMP_TUPLE From 375399de26589dc74dee7f577bbd3faac8ec002f Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Wed, 5 Mar 2025 16:29:33 +0100 Subject: [PATCH 13/13] Make the free-threaded FOR_ITER work in the tier 2 interpreter/jit. --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 153 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 6 +- Python/bytecodes.c | 30 ++++- Python/executor_cases.c.h | 12 +- Python/generated_cases.c.h | 1 + Python/optimizer.c | 1 + Python/optimizer_cases.c.h | 4 +- 8 files changed, 122 insertions(+), 87 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 3f5c8ecee51fc4..f353c0b946361a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2331,7 +2331,7 @@ _PyOpcode_macro_expansion[256] = { [FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { _FORMAT_WITH_SPEC, OPARG_SIMPLE, 0 } } }, [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, OPARG_REPLACED, 0 } } }, [FOR_ITER_GEN] = { .nuops = 3, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _FOR_ITER_GEN_FRAME, OPARG_SIMPLE, 1 }, { _PUSH_FRAME, OPARG_SIMPLE, 1 } } }, - [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, OPARG_SIMPLE, 1 }, { _ITER_JUMP_LIST, OPARG_REPLACED, 1 }, { _ITER_NEXT_LIST, OPARG_SIMPLE, 1 } } }, + [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, OPARG_SIMPLE, 1 }, { _ITER_JUMP_LIST, OPARG_REPLACED, 1 }, { _ITER_NEXT_LIST, OPARG_REPLACED, 1 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, OPARG_SIMPLE, 1 }, { _ITER_JUMP_RANGE, OPARG_REPLACED, 1 }, { _ITER_NEXT_RANGE, OPARG_SIMPLE, 1 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, OPARG_SIMPLE, 1 }, { _ITER_JUMP_TUPLE, OPARG_REPLACED, 1 }, { _ITER_NEXT_TUPLE, OPARG_SIMPLE, 1 } } }, [GET_AITER] = { .nuops = 1, .uops = { { _GET_AITER, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 5143b10def5396..095fd043090fbb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -164,126 +164,127 @@ extern "C" { #define _ITER_JUMP_RANGE 398 #define _ITER_JUMP_TUPLE 399 #define _ITER_NEXT_LIST 400 -#define _ITER_NEXT_RANGE 401 -#define _ITER_NEXT_TUPLE 402 -#define _JUMP_TO_TOP 403 +#define _ITER_NEXT_LIST_TIER_TWO 401 +#define _ITER_NEXT_RANGE 402 +#define _ITER_NEXT_TUPLE 403 +#define _JUMP_TO_TOP 404 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 404 -#define _LOAD_ATTR_CLASS 405 +#define _LOAD_ATTR 405 +#define _LOAD_ATTR_CLASS 406 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 406 -#define _LOAD_ATTR_METHOD_LAZY_DICT 407 -#define _LOAD_ATTR_METHOD_NO_DICT 408 -#define _LOAD_ATTR_METHOD_WITH_VALUES 409 -#define _LOAD_ATTR_MODULE 410 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 411 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 412 -#define _LOAD_ATTR_PROPERTY_FRAME 413 -#define _LOAD_ATTR_SLOT 414 -#define _LOAD_ATTR_WITH_HINT 415 +#define _LOAD_ATTR_INSTANCE_VALUE 407 +#define _LOAD_ATTR_METHOD_LAZY_DICT 408 +#define _LOAD_ATTR_METHOD_NO_DICT 409 +#define _LOAD_ATTR_METHOD_WITH_VALUES 410 +#define _LOAD_ATTR_MODULE 411 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 412 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 413 +#define _LOAD_ATTR_PROPERTY_FRAME 414 +#define _LOAD_ATTR_SLOT 415 +#define _LOAD_ATTR_WITH_HINT 416 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 416 +#define _LOAD_BYTECODE 417 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 417 -#define _LOAD_CONST_INLINE_BORROW 418 +#define _LOAD_CONST_INLINE 418 +#define _LOAD_CONST_INLINE_BORROW 419 #define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 419 -#define _LOAD_FAST_0 420 -#define _LOAD_FAST_1 421 -#define _LOAD_FAST_2 422 -#define _LOAD_FAST_3 423 -#define _LOAD_FAST_4 424 -#define _LOAD_FAST_5 425 -#define _LOAD_FAST_6 426 -#define _LOAD_FAST_7 427 +#define _LOAD_FAST 420 +#define _LOAD_FAST_0 421 +#define _LOAD_FAST_1 422 +#define _LOAD_FAST_2 423 +#define _LOAD_FAST_3 424 +#define _LOAD_FAST_4 425 +#define _LOAD_FAST_5 426 +#define _LOAD_FAST_6 427 +#define _LOAD_FAST_7 428 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 428 -#define _LOAD_GLOBAL_BUILTINS 429 -#define _LOAD_GLOBAL_MODULE 430 +#define _LOAD_GLOBAL 429 +#define _LOAD_GLOBAL_BUILTINS 430 +#define _LOAD_GLOBAL_MODULE 431 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 431 -#define _LOAD_SMALL_INT_0 432 -#define _LOAD_SMALL_INT_1 433 -#define _LOAD_SMALL_INT_2 434 -#define _LOAD_SMALL_INT_3 435 +#define _LOAD_SMALL_INT 432 +#define _LOAD_SMALL_INT_0 433 +#define _LOAD_SMALL_INT_1 434 +#define _LOAD_SMALL_INT_2 435 +#define _LOAD_SMALL_INT_3 436 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 436 +#define _MAKE_CALLARGS_A_TUPLE 437 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 437 +#define _MAKE_WARM 438 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 438 -#define _MAYBE_EXPAND_METHOD_KW 439 -#define _MONITOR_CALL 440 -#define _MONITOR_CALL_KW 441 -#define _MONITOR_JUMP_BACKWARD 442 -#define _MONITOR_RESUME 443 +#define _MAYBE_EXPAND_METHOD 439 +#define _MAYBE_EXPAND_METHOD_KW 440 +#define _MONITOR_CALL 441 +#define _MONITOR_CALL_KW 442 +#define _MONITOR_JUMP_BACKWARD 443 +#define _MONITOR_RESUME 444 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 444 -#define _POP_JUMP_IF_TRUE 445 +#define _POP_JUMP_IF_FALSE 445 +#define _POP_JUMP_IF_TRUE 446 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 446 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 447 +#define _POP_TOP_LOAD_CONST_INLINE 447 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 448 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 448 +#define _PUSH_FRAME 449 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 449 -#define _PY_FRAME_GENERAL 450 -#define _PY_FRAME_KW 451 -#define _QUICKEN_RESUME 452 -#define _REPLACE_WITH_TRUE 453 +#define _PUSH_NULL_CONDITIONAL 450 +#define _PY_FRAME_GENERAL 451 +#define _PY_FRAME_KW 452 +#define _QUICKEN_RESUME 453 +#define _REPLACE_WITH_TRUE 454 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 454 -#define _SEND 455 -#define _SEND_GEN_FRAME 456 +#define _SAVE_RETURN_OFFSET 455 +#define _SEND 456 +#define _SEND_GEN_FRAME 457 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 457 -#define _STORE_ATTR 458 -#define _STORE_ATTR_INSTANCE_VALUE 459 -#define _STORE_ATTR_SLOT 460 -#define _STORE_ATTR_WITH_HINT 461 +#define _START_EXECUTOR 458 +#define _STORE_ATTR 459 +#define _STORE_ATTR_INSTANCE_VALUE 460 +#define _STORE_ATTR_SLOT 461 +#define _STORE_ATTR_WITH_HINT 462 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 462 -#define _STORE_FAST_0 463 -#define _STORE_FAST_1 464 -#define _STORE_FAST_2 465 -#define _STORE_FAST_3 466 -#define _STORE_FAST_4 467 -#define _STORE_FAST_5 468 -#define _STORE_FAST_6 469 -#define _STORE_FAST_7 470 +#define _STORE_FAST 463 +#define _STORE_FAST_0 464 +#define _STORE_FAST_1 465 +#define _STORE_FAST_2 466 +#define _STORE_FAST_3 467 +#define _STORE_FAST_4 468 +#define _STORE_FAST_5 469 +#define _STORE_FAST_6 470 +#define _STORE_FAST_7 471 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 471 -#define _STORE_SUBSCR 472 +#define _STORE_SLICE 472 +#define _STORE_SUBSCR 473 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 473 -#define _TO_BOOL 474 +#define _TIER2_RESUME_CHECK 474 +#define _TO_BOOL 475 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -293,13 +294,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 475 +#define _UNPACK_SEQUENCE 476 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 475 +#define MAX_UOP_ID 476 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6d475721387f79..23a5584e374feb 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -185,7 +185,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FOR_ITER_TIER_TWO] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_LIST] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG, - [_ITER_NEXT_LIST] = HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_ITER_NEXT_LIST_TIER_TWO] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, [_ITER_NEXT_TUPLE] = 0, @@ -428,7 +428,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE", [_ITER_CHECK_TUPLE] = "_ITER_CHECK_TUPLE", - [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", + [_ITER_NEXT_LIST_TIER_TWO] = "_ITER_NEXT_LIST_TIER_TWO", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", @@ -889,7 +889,7 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _GUARD_NOT_EXHAUSTED_LIST: return 0; - case _ITER_NEXT_LIST: + case _ITER_NEXT_LIST_TIER_TWO: return 0; case _ITER_CHECK_TUPLE: return 0; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7eb1eec5af2652..e360452f3adfda 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3094,6 +3094,7 @@ dummy_func( // invalid by the time we actually try to fetch the item. #ifdef Py_GIL_DISABLED assert(_PyObject_IsUniquelyReferenced(iter_o)); + (void)iter_o; #else _PyListIterObject *it = (_PyListIterObject *)iter_o; STAT_INC(FOR_ITER, hit); @@ -3126,7 +3127,7 @@ dummy_func( #endif } - op(_ITER_NEXT_LIST, (iter -- iter, next)) { + replaced op(_ITER_NEXT_LIST, (iter -- iter, next)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; assert(Py_TYPE(iter_o) == &PyListIter_Type); @@ -3154,6 +3155,33 @@ dummy_func( #endif } + // Only used by Tier 2 + op(_ITER_NEXT_LIST_TIER_TWO, (iter -- iter, next)) { + PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + _PyListIterObject *it = (_PyListIterObject *)iter_o; + assert(Py_TYPE(iter_o) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + assert(seq); +#ifdef Py_GIL_DISABLED + assert(_PyObject_IsUniquelyReferenced(iter_o)); + assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) || + _PyObject_GC_IS_SHARED(seq)); + STAT_INC(FOR_ITER, hit); + int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next); + // A negative result means we lost a race with another thread + // and we need to take the slow path. + EXIT_IF(result < 0); + if (result == 0) { + it->it_index = -1; + EXIT_IF(1); + } + it->it_index++; +#else + assert(it->it_index < PyList_GET_SIZE(seq)); + next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++)); +#endif + } + macro(FOR_ITER_LIST) = unused/1 + // Skip over the counter _ITER_CHECK_LIST + diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index bd9f350b4b90bf..a79ace955b3e11 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4171,10 +4171,11 @@ break; } - case _ITER_NEXT_LIST: { + /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 because it is replaced */ + + case _ITER_NEXT_LIST_TIER_TWO: { _PyStackRef iter; _PyStackRef next; - oparg = CURRENT_OPARG(); iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); _PyListIterObject *it = (_PyListIterObject *)iter_o; @@ -4197,9 +4198,10 @@ } if (result == 0) { it->it_index = -1; - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); - DISPATCH(); + if (1) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } } it->it_index++; #else diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 916cebbebdd280..a24e4caa52a6c9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5601,6 +5601,7 @@ // invalid by the time we actually try to fetch the item. #ifdef Py_GIL_DISABLED assert(_PyObject_IsUniquelyReferenced(iter_o)); + (void)iter_o; #else _PyListIterObject *it = (_PyListIterObject *)iter_o; STAT_INC(FOR_ITER, hit); diff --git a/Python/optimizer.c b/Python/optimizer.c index e05523451da859..6fc5eabdf8b44e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -363,6 +363,7 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = { [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, [_FOR_ITER] = _FOR_ITER_TIER_TWO, + [_ITER_NEXT_LIST] = _ITER_NEXT_LIST_TIER_TWO, }; static const uint8_t diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 306599bea77877..78d873eda7d631 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1458,7 +1458,9 @@ break; } - case _ITER_NEXT_LIST: { + /* _ITER_NEXT_LIST is not a viable micro-op for tier 2 */ + + case _ITER_NEXT_LIST_TIER_TWO: { JitOptSymbol *next; next = sym_new_not_null(ctx); stack_pointer[0] = next;