From f14f0badfea9ba005bd01054d3dd49a85639bf5f Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 19 Jun 2025 23:57:00 +0500 Subject: [PATCH 01/19] Reset type cache after finalization --- Objects/typeobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b9d549610693c1..2090db49e5ed2c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10767,6 +10767,8 @@ slot_tp_finalize(PyObject *self) } } + PyType_Modified(Py_TYPE(self)); + _PyThreadState_PopCStackRef(tstate, &cref); /* Restore the saved exception. */ From 61fc657b0833d19befe51d54e73c2f2831fa9e11 Mon Sep 17 00:00:00 2001 From: Mikhail Borisov <43937008+fxeqxmulfx@users.noreply.github.com> Date: Mon, 23 Jun 2025 00:27:39 +0500 Subject: [PATCH 02/19] Add tests --- Lib/test/test_gc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..710a2b3eb2af73 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1517,6 +1517,23 @@ def test_ast_fini(self): """) assert_python_ok("-c", code) + def test_reset_type_cache_after_finalization(self): + # https://github.com/python/cpython/issues/135552 + code = textwrap.dedent(""" + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + + + class Node(BaseNode): + pass + + + BaseNode.next = Node() + BaseNode.next.next = Node() + """) + assert_python_ok("-c", code) + def setUpModule(): global enabled, debug From d8124a958df2ad12850fe1e8d03dcf79b65ba6c0 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 00:37:02 +0500 Subject: [PATCH 03/19] Organize tests --- Lib/test/test_gc.py | 47 ++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 710a2b3eb2af73..58c1882886002e 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1125,6 +1125,37 @@ def test_something(self): """) assert_python_ok("-c", source) + def test_do_not_cleanup_type_subclasses_before_finalization(self): + # https://github.com/python/cpython/issues/135552 + code = textwrap.dedent(""" + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + + class Node(BaseNode): + pass + + BaseNode.next = Node() + BaseNode.next.next = Node() + """) + assert_python_ok("-c", code) + + code_inside_function = textwrap.dedent(""" + def test(): + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + + class Node(BaseNode): + pass + + BaseNode.next = Node() + BaseNode.next.next = Node() + + test() + """) + assert_python_ok("-c", code_inside_function) + class IncrementalGCTests(unittest.TestCase): @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @@ -1517,22 +1548,6 @@ def test_ast_fini(self): """) assert_python_ok("-c", code) - def test_reset_type_cache_after_finalization(self): - # https://github.com/python/cpython/issues/135552 - code = textwrap.dedent(""" - class BaseNode: - def __del__(self): - BaseNode.next = BaseNode.next.next - - - class Node(BaseNode): - pass - - - BaseNode.next = Node() - BaseNode.next.next = Node() - """) - assert_python_ok("-c", code) def setUpModule(): From bb215104e47a6ac8b91bba6d901954a77447bf22 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 00:37:20 +0500 Subject: [PATCH 04/19] Remove wrong solution --- Objects/typeobject.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2090db49e5ed2c..b9d549610693c1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10767,8 +10767,6 @@ slot_tp_finalize(PyObject *self) } } - PyType_Modified(Py_TYPE(self)); - _PyThreadState_PopCStackRef(tstate, &cref); /* Restore the saved exception. */ From 0e649ab0424e69b3e403d1ddc54574ea447fe995 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 00:56:15 +0500 Subject: [PATCH 05/19] Do not clear weakrefs to types before finalization of instances --- Python/gc.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/Python/gc.c b/Python/gc.c index 7b0e6d6e803504..8f46bd39e54c8c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -858,6 +858,21 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } +/* Move types from unreachable set to prevent clearing of type's subclasses */ +static void +move_types_from_unreachable(PyGC_Head *unreachable, PyGC_Head *to) +{ + PyGC_Head *gc, *next; + for(gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyType_Check(op)) { + gc_list_move(gc, to); + } + } +} + /* Clear all weakrefs to unreachable objects, and if such a weakref has a * callback, invoke it if necessary. Note that it's possible for such * weakrefs to be outside the unreachable set -- indeed, those are precisely @@ -1698,6 +1713,7 @@ gc_collect_region(PyThreadState *tstate, { PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ + PyGC_Head types; /* unreachable types */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; @@ -1736,6 +1752,15 @@ gc_collect_region(PyThreadState *tstate, } } + /* All types in the unreachable set should be handled after the + * instances of those types are finalized. Otherwise, when we clear + * the weak references, the subclasses list will also be cleared, and + * the type's cache will not be properly invalidated from + * within the __del__ method. + */ + gc_list_init(&types); + move_types_from_unreachable(&unreachable, &types); + /* Clear weakrefs and invoke callbacks as necessary. */ stats->collected += handle_weakrefs(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); @@ -1744,6 +1769,20 @@ gc_collect_region(PyThreadState *tstate, /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); + + /* Clear weakrefs to types and invoke callbacks as necessary. */ + stats->collected += handle_weakrefs(&types, to); + gc_list_validate_space(to, gcstate->visited_space); + validate_list(to, collecting_clear_unreachable_clear); + + /* Call tp_finalize on types. */ + finalize_garbage(tstate, &types); + + /* Merge types back to unreachable to properly process resurected + * objects and so on. + */ + gc_list_merge(&types, &unreachable); + /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ From ac394bc36fe5a3fc9dbb1d011eee6995b78b92d4 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 01:10:02 +0500 Subject: [PATCH 06/19] Add news --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst new file mode 100644 index 00000000000000..cc7067540b9886 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -0,0 +1,4 @@ +Split the handling of weak references in the GC if both types and instances +within a generation are unreachable. This prevent the clearing of the +type's subclasses list too early. This fix a segfault that occurs when +__del__ modifies the base's attributes and tries to access them from self. From 3b1873893a547289252793e6f869f2392d4ee53d Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 13:00:45 +0500 Subject: [PATCH 07/19] Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst Co-authored-by: sobolevn --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst index cc7067540b9886..d13582bc14d13f 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -1,4 +1,4 @@ Split the handling of weak references in the GC if both types and instances within a generation are unreachable. This prevent the clearing of the type's subclasses list too early. This fix a segfault that occurs when -__del__ modifies the base's attributes and tries to access them from self. +:meth:`~object.__del__` modifies the base's attributes and tries to access them from self. From de8841c95cfe80c172bd93fc362f30c56f17477b Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 14:06:16 +0500 Subject: [PATCH 08/19] Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst Co-authored-by: Mikhail Efimov --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst index d13582bc14d13f..2822bb576ee392 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -1,4 +1,4 @@ Split the handling of weak references in the GC if both types and instances within a generation are unreachable. This prevent the clearing of the -type's subclasses list too early. This fix a segfault that occurs when +type's subclasses list too early. This fix a crash that occurs when :meth:`~object.__del__` modifies the base's attributes and tries to access them from self. From f8745e0614dc6a47268c7d21239da0865f6cd844 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 15:51:15 +0500 Subject: [PATCH 09/19] Split unreachable to types and objects while deduce --- Python/gc.c | 81 +++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 8f46bd39e54c8c..5354a2e29ed80c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -665,11 +665,13 @@ visit_reachable(PyObject *op, void *arg) * So we can not gc_list_* functions for unreachable until we remove the flag. */ static void -move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) +move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachable_types) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = GC_NEXT(young); + PyGC_Head *to; + PyObject *op; /* Invariants: all objects "to the left" of us in young are reachable * (directly or indirectly) from outside the young list as it was at entry. @@ -720,9 +722,17 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // No need to gc->next->prev = prev because it is single linked. prev->_gc_next = gc->_gc_next; + op = FROM_GC(gc); + if (unreachable_types != NULL && PyType_Check(op)) { + to = unreachable_types; + } + else { + to = unreachable; + } + // We can't use gc_list_append() here because we use // NEXT_MASK_UNREACHABLE here. - PyGC_Head *last = GC_PREV(unreachable); + PyGC_Head *last = GC_PREV(to); // NOTE: Since all objects in unreachable set has // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. // But this may pollute the unreachable list head's 'next' pointer @@ -730,8 +740,8 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // damage is repaired when this function ends. last->_gc_next = flags | (uintptr_t)gc; _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = flags | (uintptr_t)unreachable; - unreachable->_gc_prev = (uintptr_t)gc; + gc->_gc_next = flags | (uintptr_t)to; + to->_gc_prev = (uintptr_t)gc; } gc = _PyGCHead_NEXT(prev); } @@ -740,6 +750,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak unreachable->_gc_next &= _PyGC_PREV_MASK; + if (unreachable_types != NULL) { + unreachable_types->_gc_next &= _PyGC_PREV_MASK; + } } /* In theory, all tuples should be younger than the @@ -858,21 +871,6 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } -/* Move types from unreachable set to prevent clearing of type's subclasses */ -static void -move_types_from_unreachable(PyGC_Head *unreachable, PyGC_Head *to) -{ - PyGC_Head *gc, *next; - for(gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - - if (PyType_Check(op)) { - gc_list_move(gc, to); - } - } -} - /* Clear all weakrefs to unreachable objects, and if such a weakref has a * callback, invoke it if necessary. Note that it's possible for such * weakrefs to be outside the unreachable set -- indeed, those are precisely @@ -1184,6 +1182,8 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, them to the "unreachable" list. This step also needs to move back to "base" all objects that were initially marked as unreachable but are referred transitively by the reachable objects (the ones with strictly positive reference count). +4. Split unreachable objects and unreachable types to prevent clearing types + before instances. Contracts: @@ -1198,7 +1198,8 @@ flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ static inline void -deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { +deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable, + PyGC_Head* unreachable_types) { validate_list(base, collecting_clear_unreachable_clear); /* Using ob_refcnt and gc_refs, calculate which objects in the * container set are reachable from outside the set (i.e., have a @@ -1242,10 +1243,19 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * objects will remain unreachable, so it would be more efficient to move * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. + * + * Note on types: All types in the unreachable set should be handled after + * the instances of those types are finalized. Otherwise, when we clear + * the weak references, the subclasses list will also be cleared, and + * the type's cache will not be properly invalidated from + * within the __del__ method. */ - move_unreachable(base, unreachable); // gc_prev is pointer again + move_unreachable(base, unreachable, unreachable_types); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); + if (unreachable_types != NULL) { + validate_list(unreachable_types, collecting_set_unreachable_set); + } } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1273,7 +1283,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, // have the PREV_MARK_COLLECTING set, but the objects are going to be // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; - deduce_unreachable(resurrected, still_unreachable); + deduce_unreachable(resurrected, still_unreachable, NULL); clear_unreachable_mask(still_unreachable); // Move the resurrected objects to the old generation for future collection. @@ -1713,7 +1723,7 @@ gc_collect_region(PyThreadState *tstate, { PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head types; /* unreachable types */ + PyGC_Head unreachable_types; /* unreachable types */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; @@ -1721,7 +1731,8 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - deduce_unreachable(from, &unreachable); + gc_list_init(&unreachable_types); + deduce_unreachable(from, &unreachable, &unreachable_types); validate_consistent_old_space(from); untrack_tuples(from); validate_consistent_old_space(to); @@ -1738,6 +1749,7 @@ gc_collect_region(PyThreadState *tstate, // NEXT_MASK_UNREACHABLE is cleared here. // After move_legacy_finalizers(), unreachable is normal list. move_legacy_finalizers(&unreachable, &finalizers); + move_legacy_finalizers(&unreachable_types, &finalizers); /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. @@ -1745,22 +1757,18 @@ gc_collect_region(PyThreadState *tstate, move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); + validate_list(&unreachable_types, collecting_set_unreachable_clear); /* Print debugging information. */ if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { debug_cycle("collectable", FROM_GC(gc)); } + gc = GC_NEXT(&unreachable_types); + for (; gc != &unreachable_types; gc = GC_NEXT(gc)) { + debug_cycle("collectable", FROM_GC(gc)); + } } - /* All types in the unreachable set should be handled after the - * instances of those types are finalized. Otherwise, when we clear - * the weak references, the subclasses list will also be cleared, and - * the type's cache will not be properly invalidated from - * within the __del__ method. - */ - gc_list_init(&types); - move_types_from_unreachable(&unreachable, &types); - /* Clear weakrefs and invoke callbacks as necessary. */ stats->collected += handle_weakrefs(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); @@ -1771,17 +1779,18 @@ gc_collect_region(PyThreadState *tstate, finalize_garbage(tstate, &unreachable); /* Clear weakrefs to types and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&types, to); + stats->collected += handle_weakrefs(&unreachable_types, to); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); + validate_list(&unreachable_types, collecting_set_unreachable_clear); /* Call tp_finalize on types. */ - finalize_garbage(tstate, &types); + finalize_garbage(tstate, &unreachable_types); /* Merge types back to unreachable to properly process resurected * objects and so on. */ - gc_list_merge(&types, &unreachable); + gc_list_merge(&unreachable_types, &unreachable); /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the From 3fe0f152217bd8bdd939c9fb59ce7910db2275ca Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 24 Jun 2025 15:07:38 +0500 Subject: [PATCH 10/19] Simplify tests --- Lib/test/test_gc.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 58c1882886002e..a6d9ce6f91ff2d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1127,30 +1127,23 @@ def test_something(self): def test_do_not_cleanup_type_subclasses_before_finalization(self): # https://github.com/python/cpython/issues/135552 - code = textwrap.dedent(""" + code = """ class BaseNode: def __del__(self): BaseNode.next = BaseNode.next.next + BaseNode.next.next class Node(BaseNode): pass BaseNode.next = Node() BaseNode.next.next = Node() - """) - assert_python_ok("-c", code) + """ + assert_python_ok("-c", textwrap.dedent(code)) - code_inside_function = textwrap.dedent(""" + code_inside_function = textwrap.dedent(F""" def test(): - class BaseNode: - def __del__(self): - BaseNode.next = BaseNode.next.next - - class Node(BaseNode): - pass - - BaseNode.next = Node() - BaseNode.next.next = Node() + {textwrap.indent(code, ' ')} test() """) From 1ad18ecdd66b786cf885fec97ded644357300b67 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 1 Jul 2025 00:59:45 +0500 Subject: [PATCH 11/19] Remove unreachable_types pass --- Python/gc.c | 72 ++++++++++++++++------------------------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 5354a2e29ed80c..ee486cf56334b9 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -665,13 +665,11 @@ visit_reachable(PyObject *op, void *arg) * So we can not gc_list_* functions for unreachable until we remove the flag. */ static void -move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachable_types) +move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = GC_NEXT(young); - PyGC_Head *to; - PyObject *op; /* Invariants: all objects "to the left" of us in young are reachable * (directly or indirectly) from outside the young list as it was at entry. @@ -722,17 +720,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachabl // No need to gc->next->prev = prev because it is single linked. prev->_gc_next = gc->_gc_next; - op = FROM_GC(gc); - if (unreachable_types != NULL && PyType_Check(op)) { - to = unreachable_types; - } - else { - to = unreachable; - } - // We can't use gc_list_append() here because we use // NEXT_MASK_UNREACHABLE here. - PyGC_Head *last = GC_PREV(to); + PyGC_Head *last = GC_PREV(unreachable); // NOTE: Since all objects in unreachable set has // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. // But this may pollute the unreachable list head's 'next' pointer @@ -740,8 +730,8 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachabl // damage is repaired when this function ends. last->_gc_next = flags | (uintptr_t)gc; _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = flags | (uintptr_t)to; - to->_gc_prev = (uintptr_t)gc; + gc->_gc_next = flags | (uintptr_t)unreachable; + unreachable->_gc_prev = (uintptr_t)gc; } gc = _PyGCHead_NEXT(prev); } @@ -750,9 +740,6 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachabl young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak unreachable->_gc_next &= _PyGC_PREV_MASK; - if (unreachable_types != NULL) { - unreachable_types->_gc_next &= _PyGC_PREV_MASK; - } } /* In theory, all tuples should be younger than the @@ -920,7 +907,12 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * will run and potentially cause a crash. See bpo-38006 for * one example. */ - _PyWeakref_ClearRef((PyWeakReference *)op); + if (!((PyWeakReference *)op)->is_subclass) { + _PyWeakref_ClearRef((PyWeakReference *)op); + } + else { + continue; + } } if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { @@ -938,9 +930,15 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * all the weakrefs, and move the weakrefs with callbacks * that must be called into wrcb_to_call. */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { + PyWeakReference *wr_next = *wrlist; + for (wr = wr_next; wr != NULL; wr = wr_next) { PyGC_Head *wrasgc; /* AS_GC(wr) */ + wr_next = wr->wr_next; + if (wr->is_subclass == 1) { + continue; + } + /* _PyWeakref_ClearRef clears the weakref but leaves * the callback pointer intact. Obscure: it also * changes *wrlist. @@ -1182,8 +1180,6 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, them to the "unreachable" list. This step also needs to move back to "base" all objects that were initially marked as unreachable but are referred transitively by the reachable objects (the ones with strictly positive reference count). -4. Split unreachable objects and unreachable types to prevent clearing types - before instances. Contracts: @@ -1198,8 +1194,7 @@ flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ static inline void -deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable, - PyGC_Head* unreachable_types) { +deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { validate_list(base, collecting_clear_unreachable_clear); /* Using ob_refcnt and gc_refs, calculate which objects in the * container set are reachable from outside the set (i.e., have a @@ -1250,12 +1245,9 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable, * the type's cache will not be properly invalidated from * within the __del__ method. */ - move_unreachable(base, unreachable, unreachable_types); // gc_prev is pointer again + move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); - if (unreachable_types != NULL) { - validate_list(unreachable_types, collecting_set_unreachable_set); - } } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1283,7 +1275,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, // have the PREV_MARK_COLLECTING set, but the objects are going to be // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; - deduce_unreachable(resurrected, still_unreachable, NULL); + deduce_unreachable(resurrected, still_unreachable); clear_unreachable_mask(still_unreachable); // Move the resurrected objects to the old generation for future collection. @@ -1723,7 +1715,6 @@ gc_collect_region(PyThreadState *tstate, { PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head unreachable_types; /* unreachable types */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; @@ -1731,8 +1722,7 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - gc_list_init(&unreachable_types); - deduce_unreachable(from, &unreachable, &unreachable_types); + deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); untrack_tuples(from); validate_consistent_old_space(to); @@ -1749,7 +1739,6 @@ gc_collect_region(PyThreadState *tstate, // NEXT_MASK_UNREACHABLE is cleared here. // After move_legacy_finalizers(), unreachable is normal list. move_legacy_finalizers(&unreachable, &finalizers); - move_legacy_finalizers(&unreachable_types, &finalizers); /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. @@ -1757,16 +1746,11 @@ gc_collect_region(PyThreadState *tstate, move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); - validate_list(&unreachable_types, collecting_set_unreachable_clear); /* Print debugging information. */ if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { debug_cycle("collectable", FROM_GC(gc)); } - gc = GC_NEXT(&unreachable_types); - for (; gc != &unreachable_types; gc = GC_NEXT(gc)) { - debug_cycle("collectable", FROM_GC(gc)); - } } /* Clear weakrefs and invoke callbacks as necessary. */ @@ -1778,20 +1762,6 @@ gc_collect_region(PyThreadState *tstate, /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); - /* Clear weakrefs to types and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable_types, to); - gc_list_validate_space(to, gcstate->visited_space); - validate_list(to, collecting_clear_unreachable_clear); - validate_list(&unreachable_types, collecting_set_unreachable_clear); - - /* Call tp_finalize on types. */ - finalize_garbage(tstate, &unreachable_types); - - /* Merge types back to unreachable to properly process resurected - * objects and so on. - */ - gc_list_merge(&unreachable_types, &unreachable); - /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ From bc0297ca2e5a65d46abc5ef52b97c15db40af556 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 1 Jul 2025 01:02:16 +0500 Subject: [PATCH 12/19] Remove obsolete comments --- Python/gc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index ee486cf56334b9..0f7fd7edcff615 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1238,12 +1238,6 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * objects will remain unreachable, so it would be more efficient to move * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. - * - * Note on types: All types in the unreachable set should be handled after - * the instances of those types are finalized. Otherwise, when we clear - * the weak references, the subclasses list will also be cleared, and - * the type's cache will not be properly invalidated from - * within the __del__ method. */ move_unreachable(base, unreachable); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); @@ -1761,7 +1755,6 @@ gc_collect_region(PyThreadState *tstate, /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); - /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ From 3294f2e4cec9c4705fdd338a8b49b6e1a6c84e9f Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 1 Jul 2025 01:03:30 +0500 Subject: [PATCH 13/19] Add is_subclass to PyWeakReference and do not clear those weakrefs in GC --- Include/cpython/weakrefobject.h | 1 + Objects/typeobject.c | 3 +++ Objects/weakrefobject.c | 1 + 3 files changed, 5 insertions(+) diff --git a/Include/cpython/weakrefobject.h b/Include/cpython/weakrefobject.h index da8e77cddaca63..853f6a0a3f1a3e 100644 --- a/Include/cpython/weakrefobject.h +++ b/Include/cpython/weakrefobject.h @@ -38,6 +38,7 @@ struct _PyWeakReference { */ PyMutex *weakrefs_lock; #endif + uint8_t is_subclass; }; PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6e7471cb5941a7..3e83a8dc1015b7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -9265,6 +9265,9 @@ add_subclass(PyTypeObject *base, PyTypeObject *type) return -1; } + ((PyWeakReference*)ref)->is_subclass = 1; + + // Only get tp_subclasses after creating the key and value. // PyWeakref_NewRef() can trigger a garbage collection which can execute // arbitrary Python code and so modify base->tp_subclasses. diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index bd4c4ac9b3475a..f1fe12fc7043f2 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -72,6 +72,7 @@ init_weakref(PyWeakReference *self, PyObject *ob, PyObject *callback) _PyObject_SetMaybeWeakref(ob); _PyObject_SetMaybeWeakref((PyObject *)self); #endif + self->is_subclass = 0; } // Clear the weakref and steal its callback into `callback`, if provided. From 6c45159aeb4df37dc11dd2cc27641993ef72f961 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 1 Jul 2025 23:49:53 +0500 Subject: [PATCH 14/19] Create weakrefs for subclasses with sentinel callback to properly handle them in GC --- Include/cpython/weakrefobject.h | 1 - Include/internal/pycore_interp_structs.h | 1 + Include/internal/pycore_weakref.h | 5 +++ Objects/typeobject.c | 5 +-- Objects/weakrefobject.c | 55 +++++++++++++++++++++++- Python/gc.c | 4 +- Python/pylifecycle.c | 6 +++ Python/pystate.c | 2 + 8 files changed, 71 insertions(+), 8 deletions(-) diff --git a/Include/cpython/weakrefobject.h b/Include/cpython/weakrefobject.h index 853f6a0a3f1a3e..da8e77cddaca63 100644 --- a/Include/cpython/weakrefobject.h +++ b/Include/cpython/weakrefobject.h @@ -38,7 +38,6 @@ struct _PyWeakReference { */ PyMutex *weakrefs_lock; #endif - uint8_t is_subclass; }; PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self); diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f1f427d99dea69..a3357d65e7b754 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -701,6 +701,7 @@ struct _Py_interp_static_objects { _PyGC_Head_UNUSED _hamt_empty_gc_not_used; PyHamtObject hamt_empty; PyBaseExceptionObject last_resort_memory_error; + PyObject *subclasses_weakref_sentinel; } singletons; }; diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index 4ed8928c0b92a8..71c06ec75e6aaf 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() +#include "pycore_interp_structs.h" // PyInterpreterState #include "pycore_lock.h" // PyMutex_LockFlags() #include "pycore_object.h" // _Py_REF_IS_MERGED() #include "pycore_pyatomic_ft_wrappers.h" @@ -127,6 +128,10 @@ extern void _PyWeakref_ClearWeakRefsNoCallbacks(PyObject *obj); PyAPI_FUNC(int) _PyWeakref_IsDead(PyObject *weakref); +int _PyWeakref_InitSubclassSentinel(PyInterpreterState *interp); +PyObject * _PyWeakref_NewSubclassRef(PyObject *ob); +int _PyWeakref_IsSubclassRef(PyWeakReference *weakref); + #ifdef __cplusplus } #endif diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3e83a8dc1015b7..55a11a74ecacc8 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -9259,15 +9259,12 @@ add_subclass(PyTypeObject *base, PyTypeObject *type) if (key == NULL) return -1; - PyObject *ref = PyWeakref_NewRef((PyObject *)type, NULL); + PyObject *ref = _PyWeakref_NewSubclassRef((PyObject *)type); if (ref == NULL) { Py_DECREF(key); return -1; } - ((PyWeakReference*)ref)->is_subclass = 1; - - // Only get tp_subclasses after creating the key and value. // PyWeakref_NewRef() can trigger a garbage collection which can execute // arbitrary Python code and so modify base->tp_subclasses. diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index f1fe12fc7043f2..e5a8b33c2d2e93 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -1,5 +1,7 @@ #include "Python.h" #include "pycore_critical_section.h" +#include "pycore_global_objects.h" // _Py_INTERP_STATIC_OBJECT() +#include "pycore_interp_structs.h" // _PyInterpreterState_GET() #include "pycore_lock.h" #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_GET_WEAKREFS_LISTPTR() @@ -72,7 +74,6 @@ init_weakref(PyWeakReference *self, PyObject *ob, PyObject *callback) _PyObject_SetMaybeWeakref(ob); _PyObject_SetMaybeWeakref((PyObject *)self); #endif - self->is_subclass = 0; } // Clear the weakref and steal its callback into `callback`, if provided. @@ -1133,3 +1134,55 @@ _PyWeakref_IsDead(PyObject *weakref) { return _PyWeakref_IS_DEAD(weakref); } + +int +_PyWeakref_InitSubclassSentinel(PyInterpreterState *interp) +{ + PyObject *code = (PyObject *)PyCode_NewEmpty( + "", + "", + 0); + if (code == NULL) { + return -1; + } + + PyObject *globals = PyDict_New(); + if (globals == NULL) { + Py_DECREF(code); + return -1; + } + + PyObject *func = PyFunction_New(code, globals); + Py_DECREF(globals); + Py_DECREF(code); + if (func == NULL) { + return -1; + } + + _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel) = func; + return 0; +} + +PyObject * +_PyWeakref_NewSubclassRef(PyObject *ob) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp != interp->runtime->interpreters.main) { + interp = interp->runtime->interpreters.main; + } + PyObject *func = _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel); + return PyWeakref_NewRef(ob, func); +} + +int +_PyWeakref_IsSubclassRef(PyWeakReference *weakref) +{ + assert(weakref != NULL); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp != interp->runtime->interpreters.main) { + interp = interp->runtime->interpreters.main; + } + PyObject *func = _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel); + return weakref->wr_callback == func; +} diff --git a/Python/gc.c b/Python/gc.c index 0f7fd7edcff615..f46861f51fc1ee 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -907,7 +907,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * will run and potentially cause a crash. See bpo-38006 for * one example. */ - if (!((PyWeakReference *)op)->is_subclass) { + if (!_PyWeakref_IsSubclassRef((PyWeakReference *)op)) { _PyWeakref_ClearRef((PyWeakReference *)op); } else { @@ -935,7 +935,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyGC_Head *wrasgc; /* AS_GC(wr) */ wr_next = wr->wr_next; - if (wr->is_subclass == 1) { + if (_PyWeakref_IsSubclassRef(wr) == 1) { continue; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 724fda63511282..4deca6cd924899 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1376,6 +1376,12 @@ init_interp_main(PyThreadState *tstate) return _PyStatus_ERR("failed to set builtin dict watcher"); } + if (is_main_interp) { + if (_PyWeakref_InitSubclassSentinel(interp) < 0) { + return _PyStatus_ERR("failed to create subclasses weakref sentinel"); + } + } + assert(!_PyErr_Occurred(tstate)); return _PyStatus_OK(); diff --git a/Python/pystate.c b/Python/pystate.c index 0d4c26f92cec90..c782f79df0982c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -760,6 +760,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->audit_hooks); + Py_CLEAR(_Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel)); + // At this time, all the threads should be cleared so we don't need atomic // operations for instrumentation_version or eval_breaker. interp->ceval.instrumentation_version = 0; From e5e95ffeb1103807b6d274381ba7b44d8da71313 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 1 Jul 2025 23:53:53 +0500 Subject: [PATCH 15/19] Update news entry --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst index 2822bb576ee392..3d8e2e906475d1 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -1,4 +1,4 @@ -Split the handling of weak references in the GC if both types and instances -within a generation are unreachable. This prevent the clearing of the -type's subclasses list too early. This fix a crash that occurs when -:meth:`~object.__del__` modifies the base's attributes and tries to access them from self. +Do not clear type's subclasses weak references in the GC to prevent the clearing +of the type's subclasses list too early. This fix a crash that occurs when +:meth:`~object.__del__` modifies the base's attributes and tries to access them +from self. From edf634c2078700e509bf19985f59212f8ceb25e3 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Wed, 2 Jul 2025 00:03:22 +0500 Subject: [PATCH 16/19] Add some comments --- Include/internal/pycore_weakref.h | 5 +++++ Python/gc.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index 71c06ec75e6aaf..d97d60eed54ed0 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -128,8 +128,13 @@ extern void _PyWeakref_ClearWeakRefsNoCallbacks(PyObject *obj); PyAPI_FUNC(int) _PyWeakref_IsDead(PyObject *weakref); +// Create sentinel callback object for subclasses weakrefs int _PyWeakref_InitSubclassSentinel(PyInterpreterState *interp); + +// Create new PyWeakReference object with sentinel callback PyObject * _PyWeakref_NewSubclassRef(PyObject *ob); + +// Check if weakref has sentinel callback int _PyWeakref_IsSubclassRef(PyWeakReference *weakref); #ifdef __cplusplus diff --git a/Python/gc.c b/Python/gc.c index f46861f51fc1ee..da162d4c3fff91 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -906,6 +906,8 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * reference cycle. If we don't clear the weakref, the callback * will run and potentially cause a crash. See bpo-38006 for * one example. + * + * If this is a subclass weakref we can safely ignore it's cleanup. */ if (!_PyWeakref_IsSubclassRef((PyWeakReference *)op)) { _PyWeakref_ClearRef((PyWeakReference *)op); @@ -935,6 +937,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyGC_Head *wrasgc; /* AS_GC(wr) */ wr_next = wr->wr_next; + + /* If this is a subclass weakref we can safely ignore it's cleanup. + * It has only sentinel callback (no-op) and we also can safely + * not invoke them. + */ if (_PyWeakref_IsSubclassRef(wr) == 1) { continue; } From bd7d9f3f82965da0d36ad65e80bec42f6cf6422c Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Wed, 2 Jul 2025 19:04:07 +0500 Subject: [PATCH 17/19] Crazy idea to use noop-callback as a sentinel --- Objects/weakrefobject.c | 134 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 11 deletions(-) diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index e5a8b33c2d2e93..2fc1e002308094 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -5,10 +5,13 @@ #include "pycore_lock.h" #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_GET_WEAKREFS_LISTPTR() +#include "pycore_opcode_metadata.h" // RESUME #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pystate.h" #include "pycore_weakref.h" // _PyWeakref_GET_REF() + + #ifdef Py_GIL_DISABLED /* * Thread-safety for free-threaded builds @@ -1135,32 +1138,141 @@ _PyWeakref_IsDead(PyObject *weakref) return _PyWeakref_IS_DEAD(weakref); } +static const uint8_t noop_func_bytecode[6] = { + RESUME, RESUME_AT_FUNC_START, + LOAD_CONST, 0, + RETURN_VALUE, 0 +}; + +static const uint8_t noop_func_linetable[2] = { + (1 << 7) // New entry. + | (PY_CODE_LOCATION_INFO_NO_COLUMNS << 3) + | (3 - 1), // Three code units. + 0, // Offset from co_firstlineno. +}; + +PyCodeObject * +create_new_noop_code(const char *filename, const char *funcname, int firstlineno) +{ + PyObject *nulltuple = NULL; + PyObject *filename_str = NULL; + PyObject *funcname_str = NULL; + PyObject *code_bytes = NULL; + PyObject *linetable_bytes = NULL; + PyObject *wr_str = NULL; + PyObject *consts = NULL; + PyObject *varnames = NULL; + PyCodeObject *result = NULL; + + nulltuple = PyTuple_New(0); + if (nulltuple == NULL) { + goto failed; + } + funcname_str = PyUnicode_FromString(funcname); + if (funcname_str == NULL) { + goto failed; + } + filename_str = PyUnicode_FromString(filename); + if (filename_str == NULL) { + goto failed; + } + code_bytes = PyBytes_FromStringAndSize((const char *)noop_func_bytecode, 6); + if (code_bytes == NULL) { + goto failed; + } + linetable_bytes = PyBytes_FromStringAndSize((const char *)noop_func_linetable, 2); + if (linetable_bytes == NULL) { + goto failed; + } + + consts = PyTuple_Pack(1, Py_None); + if (consts == NULL) { + goto failed; + } + + wr_str = PyUnicode_FromString("wr"); + if (wr_str == NULL) { + goto failed; + } + + varnames = PyTuple_Pack(1, wr_str); + if (varnames == NULL) { + goto failed; + } + +#define emptybytes (PyObject *)&_Py_SINGLETON(bytes_empty) + + int argcount = 1; + int posonlyargcount = 0; + int kwonlyargcount = 0; + int nlocals = 1; + int stacksize = 1; + int flags = CO_OPTIMIZED | CO_NEWLOCALS; + + result = PyUnstable_Code_NewWithPosOnlyArgs( + argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, + code_bytes, consts, nulltuple, varnames, nulltuple, nulltuple, + filename_str, funcname_str, funcname_str, firstlineno, + linetable_bytes, emptybytes); + +#undef emptybytes + +failed: + Py_XDECREF(nulltuple); + Py_XDECREF(funcname_str); + Py_XDECREF(filename_str); + Py_XDECREF(code_bytes); + Py_XDECREF(linetable_bytes); + Py_XDECREF(consts); + Py_XDECREF(varnames); + Py_XDECREF(wr_str); + + return result; +} + int _PyWeakref_InitSubclassSentinel(PyInterpreterState *interp) { - PyObject *code = (PyObject *)PyCode_NewEmpty( + int status = 0; + + PyObject *globals = NULL; + PyObject *func = NULL; + PyObject *code = (PyObject *)create_new_noop_code( "", "", - 0); + 1); if (code == NULL) { return -1; } - PyObject *globals = PyDict_New(); + globals = PyDict_New(); if (globals == NULL) { - Py_DECREF(code); - return -1; + status = -1; + goto failed; } - PyObject *func = PyFunction_New(code, globals); - Py_DECREF(globals); - Py_DECREF(code); + func = PyFunction_New(code, globals); if (func == NULL) { - return -1; + status = -1; + goto failed; } +#ifdef Py_DEBUG + PyObject *result = PyObject_CallOneArg(func, Py_None); + if (!result) { + status = -1; + goto failed; + } + Py_DECREF(result); +#endif + _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel) = func; - return 0; + +failed: + Py_XDECREF(code); + Py_XDECREF(globals); + + return status; } PyObject * @@ -1184,5 +1296,5 @@ _PyWeakref_IsSubclassRef(PyWeakReference *weakref) interp = interp->runtime->interpreters.main; } PyObject *func = _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel); - return weakref->wr_callback == func; + return func != NULL && weakref->wr_callback == func; } From 987f6a38748ccc0df51743c0e47bed35d1b1d20d Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Wed, 2 Jul 2025 22:34:51 +0500 Subject: [PATCH 18/19] Immortalize sentinel callback --- Include/internal/pycore_weakref.h | 3 +++ Objects/weakrefobject.c | 9 +++++++++ Python/pystate.c | 3 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index d97d60eed54ed0..fb2fd419f0ba78 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -131,6 +131,9 @@ PyAPI_FUNC(int) _PyWeakref_IsDead(PyObject *weakref); // Create sentinel callback object for subclasses weakrefs int _PyWeakref_InitSubclassSentinel(PyInterpreterState *interp); +// Clear sentinel callback object +void _PyWeakref_ClearSubclassSentinel(PyInterpreterState *interp); + // Create new PyWeakReference object with sentinel callback PyObject * _PyWeakref_NewSubclassRef(PyObject *ob); diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 2fc1e002308094..976868a22795a7 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -1266,6 +1266,7 @@ _PyWeakref_InitSubclassSentinel(PyInterpreterState *interp) Py_DECREF(result); #endif + _Py_SetImmortal(func); _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel) = func; failed: @@ -1275,6 +1276,14 @@ _PyWeakref_InitSubclassSentinel(PyInterpreterState *interp) return status; } +void +_PyWeakref_ClearSubclassSentinel(PyInterpreterState *interp) +{ + if (interp == interp->runtime->interpreters.main) { + Py_CLEAR(_Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel)); + } +} + PyObject * _PyWeakref_NewSubclassRef(PyObject *ob) { diff --git a/Python/pystate.c b/Python/pystate.c index c782f79df0982c..5500e183e2a835 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -24,6 +24,7 @@ #include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_time.h" // _PyTime_Init() #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() +#include "pycore_weakref.h" // _PyWeakref_ClearSubclassSentinel() /* -------------------------------------------------------------------------- @@ -760,7 +761,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->audit_hooks); - Py_CLEAR(_Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel)); + _PyWeakref_ClearSubclassSentinel(interp); // At this time, all the threads should be cleared so we don't need atomic // operations for instrumentation_version or eval_breaker. From be87675788644b0adc91dd861893dfd7625aca08 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 3 Jul 2025 01:13:20 +0500 Subject: [PATCH 19/19] Clear weakref sentinel in the right place --- Objects/weakrefobject.c | 6 ++++++ Python/pylifecycle.c | 4 ++++ Python/pystate.c | 2 -- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 976868a22795a7..e3689a63afe9b1 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -1280,6 +1280,12 @@ void _PyWeakref_ClearSubclassSentinel(PyInterpreterState *interp) { if (interp == interp->runtime->interpreters.main) { + PyObject *func = _Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel); + + assert(PyObject_GC_IsTracked(func) == 0); + _Py_SetMortal(_Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel), 1); + PyObject_GC_Track(func); + Py_CLEAR(_Py_INTERP_SINGLETON(interp, subclasses_weakref_sentinel)); } } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 4deca6cd924899..aa80c057dcfbc0 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1876,6 +1876,10 @@ finalize_interp_types(PyInterpreterState *interp) _PyObject_FinalizeUniqueIdPool(interp); #endif + // We clear subclass sentinel here because some weakrefs may + // hold reference to it (for example, type's subclasses) + _PyWeakref_ClearSubclassSentinel(interp); + _PyCode_Fini(interp); // Call _PyUnicode_ClearInterned() before _PyDict_Fini() since it uses diff --git a/Python/pystate.c b/Python/pystate.c index 5500e183e2a835..d95831f8ae6773 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -761,8 +761,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->audit_hooks); - _PyWeakref_ClearSubclassSentinel(interp); - // At this time, all the threads should be cleared so we don't need atomic // operations for instrumentation_version or eval_breaker. interp->ceval.instrumentation_version = 0;