diff --git a/Lib/test/test_finalization.py b/Lib/test/test_finalization.py index 42871f8a09b16b..f8f20d4ebd6bc0 100644 --- a/Lib/test/test_finalization.py +++ b/Lib/test/test_finalization.py @@ -276,12 +276,17 @@ def test_simple_resurrect(self): s = SelfCycleResurrector() ids = [id(s)] wr = weakref.ref(s) + wrc = weakref.ref(s, lambda x: None) del s gc.collect() self.assert_del_calls(ids) self.assert_survivors(ids) - # XXX is this desirable? - self.assertIs(wr(), None) + # This used to be None because weakrefs were cleared before + # calling finalizers. Now they are cleared after. + self.assertIsNot(wr(), None) + # A weakref with a callback is still cleared before calling + # finalizers. + self.assertIsNone(wrc()) # When trying to destroy the object a second time, __del__ # isn't called anymore (and the object isn't resurrected). self.clear_survivors() @@ -378,18 +383,27 @@ def check_non_resurrecting_chain(self, classes): def check_resurrecting_chain(self, classes): N = len(classes) + def dummy_callback(ref): + pass with SimpleBase.test(): nodes = self.build_chain(classes) N = len(nodes) ids = [id(s) for s in nodes] survivor_ids = [id(s) for s in nodes if isinstance(s, SimpleResurrector)] wrs = [weakref.ref(s) for s in nodes] + wrcs = [weakref.ref(s, dummy_callback) for s in nodes] del nodes gc.collect() self.assert_del_calls(ids) self.assert_survivors(survivor_ids) - # XXX desirable? - self.assertEqual([wr() for wr in wrs], [None] * N) + for wr in wrs: + # These values used to be None because weakrefs were cleared + # before calling finalizers. Now they are cleared after. + self.assertIsNotNone(wr()) + for wr in wrcs: + # Weakrefs with callbacks are still cleared before calling + # finalizers. + self.assertIsNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 85c43055d0dcec..ff60d7c7a8447d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -309,6 +309,26 @@ def func(): self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) + def test_datetime_weakref_cycle(self): + # https://github.com/python/cpython/issues/132413 + # If the weakref used by the datetime extension gets cleared by the GC (due to being + # in an unreachable cycle) then datetime functions would crash (get_module_state() + # was returning a NULL pointer). This bug is fixed by clearing weakrefs without + # callbacks *after* running finalizers. + code = """if 1: + import _datetime + class C: + def __del__(self): + print('__del__ called') + _datetime.timedelta(days=1) # crash? + + l = [C()] + l.append(l) + """ + rc, stdout, stderr = assert_python_ok("-c", code) + self.assertEqual(rc, 0) + self.assertEqual(stdout.strip(), b'__del__ called') + @refcount_test def test_frame(self): def f(): @@ -658,9 +678,8 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # If the callback resurrected one of these guys, the instance - # would be damaged, with an empty __dict__. - self.assertEqual(x, None) + # The weakref should be cleared before executing the callback. + self.assertIsNone(x) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -1330,6 +1349,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820c(self): # Corresponds to temp2c.py in the bug report. This is pretty # elaborate. @@ -1405,6 +1425,7 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index b487bcabf01ca4..92be2763e5ed1e 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -808,7 +808,12 @@ def test_closefd_attr(self): def test_garbage_collection(self): # FileIO objects are collected, and collecting them flushes # all data to disk. - with warnings_helper.check_warnings(('', ResourceWarning)): + # + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) f = self.FileIO(os_helper.TESTFN, "wb") f.write(b"abcxxx") f.f = f @@ -1809,7 +1814,11 @@ def test_garbage_collection(self): # C BufferedReader objects are collected. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.f = f @@ -2158,7 +2167,11 @@ def test_garbage_collection(self): # all data to disk. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.write(b"123xxx") @@ -4080,7 +4093,8 @@ def test_garbage_collection(self): # C TextIOWrapper objects are collected, and collecting them flushes # all data to disk. # The Python version has __del__, so it ends in gc.garbage instead. - with warnings_helper.check_warnings(('', ResourceWarning)): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "wb") b = self.BufferedWriter(rawio) t = self.TextIOWrapper(b, encoding="ascii") diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst new file mode 100644 index 00000000000000..ea30a43fc25d41 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst @@ -0,0 +1,7 @@ +Fix a bug caused by the garbage collector clearing weakrefs too early. The +weakrefs in the ``tp_subclasses`` dictionary are needed in order to correctly +invalidate type caches (for example, by calling ``PyType_Modified()``). +Clearing weakrefs before calling finalizers causes the caches to not be +correctly invalidated. That can cause crashes since the caches can refer to +invalid objects. Defer the clearing of weakrefs without callbacks until after +finalizers are executed. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 7a6426593d021f..b9f16f3210dc8d 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -218,7 +218,7 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { goto error; } - if (ref != NULL) { + if (ref != NULL && ref != Py_None) { PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); /* We only need "current" for pointer comparison. */ diff --git a/Modules/gc_weakref.txt b/Modules/gc_weakref.txt index f53fb99dd6cdcb..c3b8cc743ccd21 100644 --- a/Modules/gc_weakref.txt +++ b/Modules/gc_weakref.txt @@ -1,6 +1,16 @@ Intro ===== +************************************************************************** +Note: this document was written long ago, before PEP 442 (safe object +finalization) was implemented. While that has changed some things, this +document is still mostly accurate. Just note that the rules being discussed +here apply to the unreachable set of objects *after* non-legacy finalizers +have been called. Also, the clearing of weakrefs has been changed to happen +later in the collection (after running finalizers but before tp_clear is +called). +************************************************************************** + The basic rule for dealing with weakref callbacks (and __del__ methods too, for that matter) during cyclic gc: diff --git a/Python/gc.c b/Python/gc.c index 88849a43680d2e..09c2da64414847 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -858,60 +858,31 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } -/* Clear all weakrefs to unreachable objects, and if such a weakref has a - * callback, invoke it if necessary. Note that it's possible for such - * weakrefs to be outside the unreachable set -- indeed, those are precisely - * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for - * overview & some details. Some weakrefs with callbacks may be reclaimed - * directly by this routine; the number reclaimed is the return value. Other - * weakrefs with callbacks may be moved into the `old` generation. Objects - * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in - * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, - * no object in `unreachable` is weakly referenced anymore. +/* Handle weakref callbacks. Note that it's possible for such weakrefs to be + * outside the unreachable set -- indeed, those are precisely the weakrefs + * whose callbacks must be invoked. See gc_weakref.txt for overview & some + * details. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) +handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) { PyGC_Head *gc; - PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ PyGC_Head *next; int num_freed = 0; - if (allow_callbacks) { - gc_list_init(&wrcb_to_call); - } + gc_list_init(&wrcb_to_call); - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot invoke any callbacks until - * all weakrefs to unreachable objects are cleared, lest the callback - * resurrect an unreachable object via a still-active weakref. We - * make another pass over wrcb_to_call, invoking callbacks, after this - * pass completes. + /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the + * callback needs to be invoked. We make another pass over wrcb_to_call, + * invoking callbacks, after this pass completes. */ for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyWeakReference **wrlist; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); next = GC_NEXT(gc); - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { continue; } @@ -923,23 +894,39 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) */ wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. + /* `op` may have some weakrefs. March over the list and move the + * weakrefs with callbacks that must be called into wrcb_to_call. */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { - PyGC_Head *wrasgc; /* AS_GC(wr) */ - - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - - if (!allow_callbacks) { - continue; + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). + next_wr = wr->wr_next; + + // Weakrefs with callbacks always need to be cleared before + // executing the callback. Sometimes the callback will call + // the ref object, to check if it's actually a dead reference + // (KeyedRef does this, for example). We want to indicate that it + // is dead, even though it is possible a finalizer might resurrect + // it. Clearing also prevents the callback from being executing + // more than once. + // + // Since Python 2.3, all weakrefs to cyclic garbage have + // been cleared *before* calling finalizers. However, since + // tp_subclasses started being necessary to invalidate caches + // (e.g. by PyType_Modified()), that clearing has created a bug. + // If the weakref to the subclass is cleared before a finalizer + // is called, the cache may not be correctly invalidated. That + // can lead to segfaults since the caches can refer to deallocated + // objects. Delaying the clear of weakrefs until *after* + // finalizers have been called fixes that bug. + if (wr->wr_callback != NULL) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); } if (wr->wr_callback == NULL) { @@ -962,10 +949,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) * outside the current generation, CT may be reachable from the * callback. Then the callback could resurrect insane objects. * - * Since the callback is never needed and may be unsafe in this case, - * wr is simply left in the unreachable set. Note that because we - * already called _PyWeakref_ClearRef(wr), its callback will never - * trigger. + * Since the callback is never needed and may be unsafe in this + * case, wr is simply left in the unreachable set. Note that + * clear_weakrefs() will ensure its callback will not trigger + * inside delete_garbage(). * * OTOH, if wr isn't part of CT, we should invoke the callback: the * weakref outlived the trash. Note that since wr isn't CT in this @@ -976,8 +963,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) * is moved to wrcb_to_call in this case. */ if (gc_is_collecting(AS_GC((PyObject *)wr))) { - /* it should already have been cleared above */ - _PyObject_ASSERT((PyObject*)wr, wr->wr_object == Py_None); continue; } @@ -987,17 +972,13 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) Py_INCREF(wr); /* Move wr to wrcb_to_call, for the next pass. */ - wrasgc = AS_GC((PyObject *)wr); + PyGC_Head *wrasgc = AS_GC((PyObject *)wr); // wrasgc is reachable, but next isn't, so they can't be the same _PyObject_ASSERT((PyObject *)wr, wrasgc != next); gc_list_move(wrasgc, &wrcb_to_call); } } - if (!allow_callbacks) { - return 0; - } - /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ @@ -1007,9 +988,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) PyObject *callback; gc = (PyGC_Head*)wrcb_to_call._gc_next; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); _PyObject_ASSERT(op, PyWeakref_Check(op)); - wr = (PyWeakReference *)op; + PyWeakReference *wr = (PyWeakReference *)op; callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); @@ -1048,6 +1029,62 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) return num_freed; } +/* Clear all weakrefs to unreachable objects. When this returns, no object in + * `unreachable` is weakly referenced anymore. + */ +static void +clear_weakrefs(PyGC_Head *unreachable) +{ + PyGC_Head *gc; + PyGC_Head *next; + + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set must be cleared. If we + * allow its callback to execute inside delete_garbage(), it + * could expose objects that have tp_clear already called on + * them. Or, it could resurrect unreachable objects. One way + * this can happen is if some container objects do not implement + * tp_traverse. Then, wr_object can be outside the unreachable + * set but can be deallocated as a result of breaking the + * reference cycle. If we don't clear the weakref, the callback + * will run and potentially cause a crash. See bpo-38006 for + * one example. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs. + */ + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void debug_cycle(const char *msg, PyObject *op) { @@ -1747,8 +1784,8 @@ gc_collect_region(PyThreadState *tstate, } } - /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to, true); + /* Invoke weakref callbacks as necessary. */ + stats->collected += handle_weakref_callbacks(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1765,10 +1802,18 @@ gc_collect_region(PyThreadState *tstate, /* Clear weakrefs to objects in the unreachable set. No Python-level * code must be allowed to access those unreachable objects. During * delete_garbage(), finalizers outside the unreachable set might run - * and create new weakrefs. If those weakrefs were not cleared, they - * could reveal unreachable objects. Callbacks are not executed. + * and if those weakrefs were not cleared, that could reveal unreachable + * objects. + * + * We used to clear weakrefs earlier, before calling finalizers. That + * causes at least two problems. First, the finalizers could create + * new weakrefs, that refer to unreachable objects. Those would not be + * cleared and could cause the problem described above (see GH-91636 as + * an example). Second, we need the weakrefs in the tp_subclasses to + * *not* be cleared so that caches based on the type version are correctly + * invalidated (see GH-135552 as a bug caused by this). */ - handle_weakrefs(&final_unreachable, NULL, false); + clear_weakrefs(&final_unreachable); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d46598b23b3b2f..876bd29e803f47 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1491,22 +1491,13 @@ move_legacy_finalizer_reachable(struct collection_state *state) return 0; } -// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// optionally enqueued in `wrcb_to_call`, but not invoked yet. +// Weakrefs with callbacks are enqueued in `wrcb_to_call`, but not invoked +// yet. static void -clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) +find_weakref_callbacks(struct collection_state *state) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { - if (PyWeakref_Check(op)) { - // Clear weakrefs that are themselves unreachable to ensure their - // callbacks will not be executed later from a `tp_clear()` - // inside delete_garbage(). That would be unsafe: it could - // resurrect a dead object or access a an already cleared object. - // See bpo-38006 for one example. - _PyWeakref_ClearRef((PyWeakReference *)op); - } - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { continue; } @@ -1518,16 +1509,36 @@ clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) // `op` may have some weakrefs. March over the list, clear // all the weakrefs, and enqueue the weakrefs with callbacks // that must be called into wrcb_to_call. - for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { - // _PyWeakref_ClearRef clears the weakref but leaves - // the callback pointer intact. Obscure: it also - // changes *wrlist. - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - - if (!enqueue_callbacks) { - continue; + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). + next_wr = wr->wr_next; + + // Weakrefs with callbacks always need to be cleared before + // executing the callback. Sometimes the callback will call + // the ref object, to check if it's actually a dead reference + // (KeyedRef does this, for example). We want to indicate that it + // is dead, even though it is possible a finalizer might resurrect + // it. Clearing also prevents the callback from being executing + // more than once. + // + // Since Python 2.3, all weakrefs to cyclic garbage have + // been cleared *before* calling finalizers. However, since + // tp_subclasses started being necessary to invalidate caches + // (e.g. by PyType_Modified()), that clearing has created a bug. + // If the weakref to the subclass is cleared before a finalizer + // is called, the cache may not be correctly invalidated. That + // can lead to segfaults since the caches can refer to deallocated + // objects. Delaying the clear of weakrefs until *after* + // finalizers have been called fixes that bug. + if (wr->wr_callback != NULL) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); } // We do not invoke callbacks for weakrefs that are themselves @@ -1549,6 +1560,43 @@ clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) } } +// Clear all weakrefs to unreachable objects. +static void +clear_weakrefs(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (PyWeakref_Check(op)) { + // Clear weakrefs that are themselves unreachable to ensure their + // callbacks will not be executed later from a `tp_clear()` + // inside delete_garbage(). That would be unsafe: it could + // resurrect a dead object or access a an already cleared object. + // See bpo-38006 for one example. + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs, and enqueue the weakrefs with callbacks + // that must be called into wrcb_to_call. + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + // _PyWeakref_ClearRef clears the weakref but leaves + // the callback pointer intact. Obscure: it also + // changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void call_weakref_callbacks(struct collection_state *state) { @@ -2214,8 +2262,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Record the number of live GC objects interp->gc.long_lived_total = state->long_lived_total; - // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state, true); + // Find weakref callbacks we will honor (but do not call them). + find_weakref_callbacks(state); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2235,9 +2283,18 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Clear weakrefs to objects in the unreachable set. No Python-level // code must be allowed to access those unreachable objects. During // delete_garbage(), finalizers outside the unreachable set might - // run and create new weakrefs. If those weakrefs were not cleared, - // they could reveal unreachable objects. - clear_weakrefs(state, false); + // run and if those weakrefs were not cleared, that could reveal + // unreachable objects. + // + // We used to clear weakrefs earlier, before calling finalizers. + // That causes at least two problems. First, the finalizers could + // create new weakrefs, that refer to unreachable objects. Those + // would not be cleared and could cause the problem described above + // (see GH-91636 as an example). Second, we need the weakrefs in the + // tp_subclasses to *not* be cleared so that caches based on the type + // version are correctly invalidated (see GH-135552 as a bug caused by + // this). + clear_weakrefs(state); } _PyEval_StartTheWorld(interp);