8000 gh-135552: Skip clearing of tp_subclasses weakrefs in GC by sergey-miryanov · Pull Request #136147 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-135552: Skip clearing of tp_subclasses weakrefs in GC #136147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f14f0ba
Reset type cache after finalization
sergey-miryanov Jun 19, 2025
61fc657
Add tests
fxeqxmulfx Jun 22, 2025
d8124a9
Organize tests
sergey-miryanov Jun 22, 2025
bb21510
Remove wrong solution
sergey-miryanov Jun 22, 2025
0e649ab
Do not clear weakrefs to types before finalization of instances
sergey-miryanov Jun 22, 2025
ff79617
Merge branch 'main' into gh-135552-fix-gc-segfault
sergey-miryanov Jun 22, 2025
ac394bc
Add news
sergey-miryanov Jun 22, 2025
3b18738
Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issu…
sergey-miryanov Jun 23, 2025
de8841c
Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-01-07-09.gh-issu…
sergey-miryanov Jun 23, 2025
f8745e0
Split unreachable to types and objects while deduce
sergey-miryanov Jun 23, 2025
f166933
Merge branch 'gh-135552-fix-gc-segfault' of github.com:sergey-miryano…
sergey-miryanov Jun 23, 2025
3fe0f15
Simplify tests
sergey-miryanov Jun 24, 2025
96f09db
Merge branch 'main' into gh-135552-fix-gc-segfault-2
sergey-miryanov Jun 30, 2025
1ad18ec
Remove unreachable_types pass
sergey-miryanov Jun 30, 2025
bc0297c
Remove obsolete comments
sergey-miryanov Jun 30, 2025
3294f2e
Add is_subclass to PyWeakReference and do not clear those weakrefs in GC
sergey-miryanov Jun 30, 2025
6c45159
Create weakrefs for subclasses with sentinel callback to properly han…
sergey-miryanov Jul 1, 2025
e5e95ff
Update news entry
sergey-miryanov Jul 1, 2025
edf634c
Add some comments
sergey-miryanov Jul 1, 2025
bd7d9f3
Crazy idea to use noop-callback as a sentinel
sergey-miryan 8000 ov Jul 2, 2025
987f6a3
Immortalize sentinel callback
sergey-miryanov Jul 2, 2025
be87675
Clear weakref sentinel in the right place
sergey-miryanov Jul 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Split unreachable to types and objects while deduce
  • Loading branch information
sergey-miryanov committed Jun 23, 2025
commit f8745e0614dc6a47268c7d21239da0865f6cd844
81 changes: 45 additions & 36 deletions Python/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -665,11 +665,13 @@ visit_reachable(PyObject *op, void *arg)
* So we can not gc_list_* functions for unreachable until we remove the flag.
*/
static void
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable, PyGC_Head *unreachable_types)
{
// previous elem in the young list, used for restore gc_prev.
PyGC_Head *prev = young;
PyGC_Head *gc = GC_NEXT(young);
PyGC_Head *to;
PyObject *op;

/* Invariants: all objects "to the left" of us in young are reachable
* (directly or indirectly) from outside the young list as it was at entry.
Expand Down Expand Up @@ -720,18 +722,26 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
// No need to gc->next->prev = prev because it is single linked.
prev->_gc_next = gc->_gc_next;

op = FROM_GC(gc);
if (unreachable_types != NULL && PyType_Check(op)) {
to = unreachable_types;
}
else {
to = unreachable;
}

// We can't use gc_list_append() here because we use
// NEXT_MASK_UNREACHABLE here.
PyGC_Head *last = GC_PREV(unreachable);
PyGC_Head *last = GC_PREV(to);
// NOTE: Since all objects in unreachable set has
// NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
// But this may pollute the unreachable list head's 'next' pointer
// too. That's semantically senseless but expedient here - the
// damage is repaired when this function ends.
last->_gc_next = flags | (uintptr_t)gc;
_PyGCHead_SET_PREV(gc, last);
gc->_gc_next = flags | (uintptr_t)unreachable;
unreachable->_gc_prev = (uintptr_t)gc;
gc->_gc_next = flags | (uintptr_t)to;
to->_gc_prev = (uintptr_t)gc;
}
gc = _PyGCHead_NEXT(prev);
}
Expand All @@ -740,6 +750,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
young->_gc_next &= _PyGC_PREV_MASK;
// don't let the pollution of the list head's next pointer leak
unreachable->_gc_next &= _PyGC_PREV_MASK;
if (unreachable_types != NULL) {
unreachable_types->_gc_next &= _PyGC_PREV_MASK;
}
}

/* In theory, all tuples should be younger than the
Expand Down Expand Up @@ -858,21 +871,6 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers)
}
}

/* Move types from unreachable set to prevent clearing of type's subclasses */
static void
move_types_from_unreachable(PyGC_Head *unreachable, PyGC_Head *to)
{
PyGC_Head *gc, *next;
for(gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
PyObject *op = FROM_GC(gc);
next = GC_NEXT(gc);

if (PyType_Check(op)) {
gc_list_move(gc, to);
}
}
}

/* Clear all weakrefs to unreachable objects, and if such a weakref has a
* callback, invoke it if necessary. Note that it's possible for such
* weakrefs to be outside the unreachable set -- indeed, those are precisely
Expand Down Expand Up @@ -1184,6 +1182,8 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate,
them to the "unreachable" list. This step also needs to move back to "base" all
objects that were initially marked as unreachable but are referred transitively
by the reachable objects (the ones with strictly positive reference count).
4. Split unreachable objects and unreachable types to prevent clearing types
before instances.

Contracts:

Expand All @@ -1198,7 +1198,8 @@ flag is cleared (for example, by using 'clear_unreachable_mask' function or
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
list and we can not use most gc_list_* functions for it. */
static inline void
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable,
PyGC_Head* unreachable_types) {
validate_list(base, collecting_clear_unreachable_clear);
/* Using ob_refcnt and gc_refs, calculate which objects in the
* container set are reachable from outside the set (i.e., have a
Expand Down Expand Up @@ -1242,10 +1243,19 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
* objects will remain unreachable, so it would be more efficient to move
* the reachable objects instead. But this is a one-time cost, probably not
* worth complicating the code to speed just a little.
*
* Note on types: All types in the unreachable set should be handled after
* the instances of those types are finalized. Otherwise, when we clear
* the weak references, the subclasses list will also be cleared, and
* the type's cache will not be properly invalidated from
* within the __del__ method.
*/
move_unreachable(base, unreachable); // gc_prev is pointer again
move_unreachable(base, unreachable, unreachable_types); // gc_prev is pointer again
validate_list(base, collecting_clear_unreachable_clear);
validate_list(unreachable, collecting_set_unreachable_set);
if (unreachable_types != NULL) {
validate_list(unreachable_types, collecting_set_unreachable_set);
}
}

/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
Expand Down Expand Up @@ -1273,7 +1283,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
// have the PREV_MARK_COLLECTING set, but the objects are going to be
// removed so we can skip the expense of clearing the flag.
PyGC_Head* resurrected = unreachable;
deduce_unreachable(resurrected, still_unreachable);
deduce_unreachable(resurrected, still_unreachable, NULL);
clear_unreachable_mask(still_unreachable);

// Move the resurrected objects to the old generation for future collection.
Expand Down Expand Up @@ -1713,15 +1723,16 @@ gc_collect_region(PyThreadState *tstate,
{
PyGC_Head unreachable; /* non-problematic unreachable trash */
PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
PyGC_Head types; /* unreachable types */
PyGC_Head unreachable_types; /* unreachable types */
PyGC_Head *gc; /* initialize to prevent a compiler warning */
GCState *gcstate = &tstate->interp->gc;

assert(gcstate->garbage != NULL);
assert(!_PyErr_Occurred(tstate));

gc_list_init(&unreachable);
deduce_unreachable(from, &unreachable);
gc_list_init(&unreachable_types);
deduce_unreachable(from, &unreachable, &unreachable_types);
validate_consistent_old_space(from);
untrack_tuples(from);
validate_consistent_old_space(to);
Expand All @@ -1738,29 +1749,26 @@ gc_collect_region(PyThreadState *tstate,
// NEXT_MASK_UNREACHABLE is cleared here.
// After move_legacy_finalizers(), unreachable is normal list.
move_legacy_finalizers(&unreachable, &finalizers);
move_legacy_finalizers(&unreachable_types, &finalizers);
/* finalizers contains the unreachable objects with a legacy finalizer;
* unreachable objects reachable *from* those are also uncollectable,
* and we move those into the finalizers list too.
*/
move_legacy_finalizer_reachable(&finalizers);
validate_list(&finalizers, collecting_clear_unreachable_clear);
validate_list(&unreachable, collecting_set_unreachable_clear);
validate_list(&unreachable_types, collecting_set_unreachable_clear);
/* Print debugging information. */
if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
debug_cycle("collectable", FROM_GC(gc));
}
gc = GC_NEXT(&unreachable_types);
for (; gc != &unreachable_types; gc = GC_NEXT(gc)) {
debug_cycle("collectable", FROM_GC(gc));
}
}

/* All types in the unreachable set should be handled after the
* instances of those types are finalized. Otherwise, when we clear
* the weak references, the subclasses list will also be cleared, and
* the type's cache will not be properly invalidated from
* within the __del__ method.
*/
gc_list_init(&types);
move_types_from_unreachable(&unreachable, &types);

/* Clear weakrefs and invoke callbacks as necessary. */
stats->collected += handle_weakrefs(&unreachable, to);
gc_list_validate_space(to, gcstate->visited_space);
Expand All @@ -1771,17 +1779,18 @@ gc_collect_region(PyThreadState *tstate,
finalize_garbage(tstate, &unreachable);

/* Clear weakrefs to types and invoke callbacks as necessary. */
stats->collected += handle_weakrefs(&types, to);
stats->collected += handle_weakrefs(&unreachable_types, to);
gc_list_validate_space(to, gcstate->visited_space);
validate_list(to, collecting_clear_unreachable_clear);
validate_list(&unreachable_types, collecting_set_unreachable_clear);

/* Call tp_finalize on types. */
finalize_garbage(tstate, &types);
finalize_garbage(tstate, &unreachable_types);

/* Merge types back to unreachable to properly process resurected
* objects and so on.
*/
gc_list_merge(&types, &unreachable);
gc_list_merge(&unreachable_types, &unreachable);

/* Handle any objects that may have resurrected after the call
* to 'finalize_garbage' and continue the collection with the
Expand Down
0