From f14f0badfea9ba005bd01054d3dd49a85639bf5f Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Thu, 19 Jun 2025 23:57:00 +0500 Subject: [PATCH 01/10] Reset type cache after finalization --- Objects/typeobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b9d549610693c1..2090db49e5ed2c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10767,6 +10767,8 @@ slot_tp_finalize(PyObject *self) } } + PyType_Modified(Py_TYPE(self)); + _PyThreadState_PopCStackRef(tstate, &cref); /* Restore the saved exception. */ From 61fc657b0833d19befe51d54e73c2f2831fa9e11 Mon Sep 17 00:00:00 2001 From: Mikhail Borisov <43937008+fxeqxmulfx@users.noreply.github.com> Date: Mon, 23 Jun 2025 00:27:39 +0500 Subject: [PATCH 02/10] Add tests --- Lib/test/test_gc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..710a2b3eb2af73 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1517,6 +1517,23 @@ def test_ast_fini(self): """) assert_python_ok("-c", code) + def test_reset_type_cache_after_finalization(self): + # https://github.com/python/cpython/issues/135552 + code = textwrap.dedent(""" + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + + + class Node(BaseNode): + pass + + + BaseNode.next = Node() + BaseNode.next.next = Node() + """) + assert_python_ok("-c", code) + def setUpModule(): global enabled, debug From d8124a958df2ad12850fe1e8d03dcf79b65ba6c0 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 00:37:02 +0500 Subject: [PATCH 03/10] Organize tests --- Lib/test/test_gc.py | 47 ++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 710a2b3eb2af73..58c1882886002e 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1125,6 +1125,37 @@ def test_something(self): """) assert_python_ok("-c", source) + def test_do_not_cleanup_type_subclasses_before_finalization(self): + # https://github.com/python/cpython/issues/135552 + code = textwrap.dedent(""" + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + + class Node(BaseNode): + pass + + BaseNode.next = Node() + BaseNode.next.next = Node() + """) + assert_python_ok("-c", code) + + code_inside_function = textwrap.dedent(""" + def test(): + class BaseNode: + def __del__(self): + BaseNode.next = BaseNode.next.next + + class Node(BaseNode): + pass + + BaseNode.next = Node() + BaseNode.next.next = Node() + + test() + """) + assert_python_ok("-c", code_inside_function) + class IncrementalGCTests(unittest.TestCase): @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @@ -1517,22 +1548,6 @@ def test_ast_fini(self): """) assert_python_ok("-c", code) - def test_reset_type_cache_after_finalization(self): - # https://github.com/python/cpython/issues/135552 - code = textwrap.dedent(""" - class BaseNode: - def __del__(self): - BaseNode.next = BaseNode.next.next - - - class Node(BaseNode): - pass - - - BaseNode.next = Node() - BaseNode.next.next = Node() - """) - assert_python_ok("-c", code) def setUpModule(): From bb215104e47a6ac8b91bba6d901954a77447bf22 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 00:37:20 +0500 Subject: [PATCH 04/10] Remove wrong solution --- Objects/typeobject.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2090db49e5ed2c..b9d549610693c1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10767,8 +10767,6 @@ slot_tp_finalize(PyObject *self) } } - PyType_Modified(Py_TYPE(self)); - _PyThreadState_PopCStackRef(tstate, &cref); /* Restore the saved exception. */ From 0e649ab0424e69b3e403d1ddc54574ea447fe995 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 00:56:15 +0500 Subject: [PATCH 05/10] Do not clear weakrefs to types before finalization of instances --- Python/gc.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/Python/gc.c b/Python/gc.c index 7b0e6d6e803504..8f46bd39e54c8c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -858,6 +858,21 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } +/* Move types from unreachable set to prevent clearing of type's subclasses */ +static void +move_types_from_unreachable(PyGC_Head *unreachable, PyGC_Head *to) +{ + PyGC_Head *gc, *next; + for(gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyType_Check(op)) { + gc_list_move(gc, to); + } + } +} + /* Clear all weakrefs to unreachable objects, and if such a weakref has a * callback, invoke it if necessary. Note that it's possible for such * weakrefs to be outside the unreachable set -- indeed, those are precisely @@ -1698,6 +1713,7 @@ gc_collect_region(PyThreadState *tstate, { PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ + PyGC_Head types; /* unreachable types */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; @@ -1736,6 +1752,15 @@ gc_collect_region(PyThreadState *tstate, } } + /* All types in the unreachable set should be handled after the + * instances of those types are finalized. Otherwise, when we clear + * the weak references, the subclasses list will also be cleared, and + * the type's cache will not be properly invalidated from + * within the __del__ method. + */ + gc_list_init(&types); + move_types_from_unreachable(&unreachable, &types); + /* Clear weakrefs and invoke callbacks as necessary. */ stats->collected += handle_weakrefs(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); @@ -1744,6 +1769,20 @@ gc_collect_region(PyThreadState *tstate, /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); + + /* Clear weakrefs to types and invoke callbacks as necessary. */ + stats->collected += handle_weakrefs(&types, to); + gc_list_validate_space(to, gcstate->visited_space); + validate_list(to, collecting_clear_unreachable_clear); + + /* Call tp_finalize on types. */ + finalize_garbage(tstate, &types); + + /* Merge types back to unreachable to properly process resurected + * objects and so on. + */ + gc_list_merge(&types, &unreachable); + /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ From ac394bc36fe5a3fc9dbb1d011eee6995b78b92d4 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 01:10:02 +0500 Subject: [PATCH 06/10] Add news --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst new file mode 100644 index 00000000000000..cc7067540b9886 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -0,0 +1,4 @@ +Split the handling of weak references in the GC if both types and instances +within a generation are unreachable. This prevent the clearing of the +type's subclasses list too early. This fix a segfault that occurs when +__del__ modifies the base's attributes and tries to access them from self. From 3b1873893a547289252793e6f869f2392d4ee53d Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 13:00:45 +0500 Subject: [PATCH 07/10] Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst Co-authored-by: sobolevn --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst index cc7067540b9886..d13582bc14d13f 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -1,4 +1,4 @@ Split the handling of weak references in the GC if both types and instances within a generation are unreachable. This prevent the clearing of the type's subclasses list too early. This fix a segfault that occurs when -__del__ modifies the base's attributes and tries to access them from self. +:meth:`~object.__del__` modifies the base's attributes and tries to access them from self. From de8841c95cfe80c172bd93fc362f30c56f17477b Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 14:06:16 +0500 Subject: [PATCH 08/10] Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst Co-authored-by: Mikhail Efimov --- .../2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst index d13582bc14d13f..2822bb576ee392 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issue-135552.eG9vzP.rst @@ -1,4 +1,4 @@ Split the handling of weak references in the GC if both types and instances within a generation are unreachable. This prevent the clearing of the -type's subclasses list too early. This fix a segfault that occurs when +type's subclasses list too early. This fix a crash that occurs when :meth:`~object.__del__` modifies the base's attributes and tries to access them from self. From f8745e0614dc6a47268c7d21239da0865f6cd844 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 23 Jun 2025 15:51:15 +0500 Subject: [PATCH 09/10] Split unreachable to types and objects while deduce --- Python/gc.c | 81 +++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 8f46bd39e54c8c..5354a2e29ed80c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -665,11 +665,13 @@ visit_reachable(PyObject *op, void *arg) * So we can not gc_list_* functions for unreachable until we remove the flag. */ static void -move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) +move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachable_types) { // previous elem in the young list, used for restore gc_prev. PyGC_Head *prev = young; PyGC_Head *gc = GC_NEXT(young); + PyGC_Head *to; + PyObject *op; /* Invariants: all objects "to the left" of us in young are reachable * (directly or indirectly) from outside the young list as it was at entry. @@ -720,9 +722,17 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // No need to gc->next->prev = prev because it is single linked. prev->_gc_next = gc->_gc_next; + op = FROM_GC(gc); + if (unreachable_types != NULL && PyType_Check(op)) { + to = unreachable_types; + } + else { + to = unreachable; + } + // We can't use gc_list_append() here because we use // NEXT_MASK_UNREACHABLE here. - PyGC_Head *last = GC_PREV(unreachable); + PyGC_Head *last = GC_PREV(to); // NOTE: Since all objects in unreachable set has // NEXT_MASK_UNREACHABLE flag, we set it unconditionally. // But this may pollute the unreachable list head's 'next' pointer @@ -730,8 +740,8 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) // damage is repaired when this function ends. last->_gc_next = flags | (uintptr_t)gc; _PyGCHead_SET_PREV(gc, last); - gc->_gc_next = flags | (uintptr_t)unreachable; - unreachable->_gc_prev = (uintptr_t)gc; + gc->_gc_next = flags | (uintptr_t)to; + to->_gc_prev = (uintptr_t)gc; } gc = _PyGCHead_NEXT(prev); } @@ -740,6 +750,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) young->_gc_next &= _PyGC_PREV_MASK; // don't let the pollution of the list head's next pointer leak unreachable->_gc_next &= _PyGC_PREV_MASK; + if (unreachable_types != NULL) { + unreachable_types->_gc_next &= _PyGC_PREV_MASK; + } } /* In theory, all tuples should be younger than the @@ -858,21 +871,6 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } -/* Move types from unreachable set to prevent clearing of type's subclasses */ -static void -move_types_from_unreachable(PyGC_Head *unreachable, PyGC_Head *to) -{ - PyGC_Head *gc, *next; - for(gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - - if (PyType_Check(op)) { - gc_list_move(gc, to); - } - } -} - /* Clear all weakrefs to unreachable objects, and if such a weakref has a * callback, invoke it if necessary. Note that it's possible for such * weakrefs to be outside the unreachable set -- indeed, those are precisely @@ -1184,6 +1182,8 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, them to the "unreachable" list. This step also needs to move back to "base" all objects that were initially marked as unreachable but are referred transitively by the reachable objects (the ones with strictly positive reference count). +4. Split unreachable objects and unreachable types to prevent clearing types + before instances. Contracts: @@ -1198,7 +1198,8 @@ flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ static inline void -deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { +deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable, + PyGC_Head* unreachable_types) { validate_list(base, collecting_clear_unreachable_clear); /* Using ob_refcnt and gc_refs, calculate which objects in the * container set are reachable from outside the set (i.e., have a @@ -1242,10 +1243,19 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * objects will remain unreachable, so it would be more efficient to move * the reachable objects instead. But this is a one-time cost, probably not * worth complicating the code to speed just a little. + * + * Note on types: All types in the unreachable set should be handled after + * the instances of those types are finalized. Otherwise, when we clear + * the weak references, the subclasses list will also be cleared, and + * the type's cache will not be properly invalidated from + * within the __del__ method. */ - move_unreachable(base, unreachable); // gc_prev is pointer again + move_unreachable(base, unreachable, unreachable_types); // gc_prev is pointer again validate_list(base, collecting_clear_unreachable_clear); validate_list(unreachable, collecting_set_unreachable_set); + if (unreachable_types != NULL) { + validate_list(unreachable_types, collecting_set_unreachable_set); + } } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving @@ -1273,7 +1283,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, // have the PREV_MARK_COLLECTING set, but the objects are going to be // removed so we can skip the expense of clearing the flag. PyGC_Head* resurrected = unreachable; - deduce_unreachable(resurrected, still_unreachable); + deduce_unreachable(resurrected, still_unreachable, NULL); clear_unreachable_mask(still_unreachable); // Move the resurrected objects to the old generation for future collection. @@ -1713,7 +1723,7 @@ gc_collect_region(PyThreadState *tstate, { PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head types; /* unreachable types */ + PyGC_Head unreachable_types; /* unreachable types */ PyGC_Head *gc; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; @@ -1721,7 +1731,8 @@ gc_collect_region(PyThreadState *tstate, assert(!_PyErr_Occurred(tstate)); gc_list_init(&unreachable); - deduce_unreachable(from, &unreachable); + gc_list_init(&unreachable_types); + deduce_unreachable(from, &unreachable, &unreachable_types); validate_consistent_old_space(from); untrack_tuples(from); validate_consistent_old_space(to); @@ -1738,6 +1749,7 @@ gc_collect_region(PyThreadState *tstate, // NEXT_MASK_UNREACHABLE is cleared here. // After move_legacy_finalizers(), unreachable is normal list. move_legacy_finalizers(&unreachable, &finalizers); + move_legacy_finalizers(&unreachable_types, &finalizers); /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. @@ -1745,22 +1757,18 @@ gc_collect_region(PyThreadState *tstate, move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); + validate_list(&unreachable_types, collecting_set_unreachable_clear); /* Print debugging information. */ if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { debug_cycle("collectable", FROM_GC(gc)); } + gc = GC_NEXT(&unreachable_types); + for (; gc != &unreachable_types; gc = GC_NEXT(gc)) { + debug_cycle("collectable", FROM_GC(gc)); + } } - /* All types in the unreachable set should be handled after the - * instances of those types are finalized. Otherwise, when we clear - * the weak references, the subclasses list will also be cleared, and - * the type's cache will not be properly invalidated from - * within the __del__ method. - */ - gc_list_init(&types); - move_types_from_unreachable(&unreachable, &types); - /* Clear weakrefs and invoke callbacks as necessary. */ stats->collected += handle_weakrefs(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); @@ -1771,17 +1779,18 @@ gc_collect_region(PyThreadState *tstate, finalize_garbage(tstate, &unreachable); /* Clear weakrefs to types and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&types, to); + stats->collected += handle_weakrefs(&unreachable_types, to); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); + validate_list(&unreachable_types, collecting_set_unreachable_clear); /* Call tp_finalize on types. */ - finalize_garbage(tstate, &types); + finalize_garbage(tstate, &unreachable_types); /* Merge types back to unreachable to properly process resurected * objects and so on. */ - gc_list_merge(&types, &unreachable); + gc_list_merge(&unreachable_types, &unreachable); /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the From 3fe0f152217bd8bdd939c9fb59ce7910db2275ca Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 24 Jun 2025 15:07:38 +0500 Subject: [PATCH 10/10] Simplify tests --- Lib/test/test_gc.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 58c1882886002e..a6d9ce6f91ff2d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1127,30 +1127,23 @@ def test_something(self): def test_do_not_cleanup_type_subclasses_before_finalization(self): # https://github.com/python/cpython/issues/135552 - code = textwrap.dedent(""" + code = """ class BaseNode: def __del__(self): BaseNode.next = BaseNode.next.next + BaseNode.next.next class Node(BaseNode): pass BaseNode.next = Node() BaseNode.next.next = Node() - """) - assert_python_ok("-c", code) + """ + assert_python_ok("-c", textwrap.dedent(code)) - code_inside_function = textwrap.dedent(""" + code_inside_function = textwrap.dedent(F""" def test(): - class BaseNode: - def __del__(self): - BaseNode.next = BaseNode.next.next - - class Node(BaseNode): - pass - - BaseNode.next = Node() - BaseNode.next.next = Node() + {textwrap.indent(code, ' ')} test() """)