diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 9d092749b90096..7883f8417425e2 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -418,54 +418,7 @@ PyAPI_FUNC(void) _Py_NO_RETURN _PyObject_AssertFailed( const char *function); -/* Trashcan mechanism, thanks to Christian Tismer. - -When deallocating a container object, it's possible to trigger an unbounded -chain of deallocations, as each Py_DECREF in turn drops the refcount on "the -next" object in the chain to 0. This can easily lead to stack overflows, -especially in threads (which typically have less stack space to work with). - -A container object can avoid this by bracketing the body of its tp_dealloc -function with a pair of macros: - -static void -mytype_dealloc(mytype *p) -{ - ... declarations go here ... - - PyObject_GC_UnTrack(p); // must untrack first - Py_TRASHCAN_BEGIN(p, mytype_dealloc) - ... The body of the deallocator goes here, including all calls ... - ... to Py_DECREF on contained objects. ... - Py_TRASHCAN_END // there should be no code after this -} - -CAUTION: Never return from the middle of the body! If the body needs to -"get out early", put a label immediately before the Py_TRASHCAN_END -call, and goto it. Else the call-depth counter (see below) will stay -above 0 forever, and the trashcan will never get emptied. - -How it works: The BEGIN macro increments a call-depth counter. So long -as this counter is small, the body of the deallocator is run directly without -further ado. But if the counter gets large, it instead adds p to a list of -objects to be deallocated later, skips the body of the deallocator, and -resumes execution after the END macro. The tp_dealloc routine then returns -without deallocating anything (and so unbounded call-stack depth is avoided). - -When the call stack finishes unwinding again, code generated by the END macro -notices this, and calls another routine to deallocate all the objects that -may have been added to the list of deferred deallocations. In effect, a -chain of N deallocations is broken into (N-1)/(Py_TRASHCAN_HEADROOM-1) pieces, -with the call stack never exceeding a depth of Py_TRASHCAN_HEADROOM. - -Since the tp_dealloc of a subclass typically calls the tp_dealloc of the base -class, we need to ensure that the trashcan is only triggered on the tp_dealloc -of the actual class being deallocated. Otherwise we might end up with a -partially-deallocated object. To check this, the tp_dealloc function must be -passed as second argument to Py_TRASHCAN_BEGIN(). -*/ - -/* Python 3.9 private API, invoked by the macros below. */ +/* Python 3.9 private API */ PyAPI_FUNC(int) _PyTrash_begin(PyThreadState *tstate, PyObject *op); PyAPI_FUNC(void) _PyTrash_end(PyThreadState *tstate); @@ -473,28 +426,25 @@ PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate); -/* Python 3.10 private API, invoked by the Py_TRASHCAN_BEGIN(). */ +/* Python 3.10 private API */ /* To avoid raising recursion errors during dealloc trigger trashcan before we reach * recursion limit. To avoid trashing, we don't attempt to empty the trashcan until - * we have headroom above the trigger limit */ + * we have headroom above the trigger limit + * + * FIXME: could this be moved into object.c? + */ #define Py_TRASHCAN_HEADROOM 50 +/* Backwards compatibility macros. The trashcan mechanism is now integrated + * with _Py_Dealloc and so these do nothing. The assert() is present to handle + * the case that a 'goto' statement precedes Py_TRASHCAN_END. */ #define Py_TRASHCAN_BEGIN(op, dealloc) \ -do { \ - PyThreadState *tstate = PyThreadState_Get(); \ - if (tstate->c_recursion_remaining <= Py_TRASHCAN_HEADROOM && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ - _PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ - break; \ - } \ - tstate->c_recursion_remaining--; - /* The body of the deallocator is here. */ + do { + #define Py_TRASHCAN_END \ - tstate->c_recursion_remaining++; \ - if (tstate->delete_later && tstate->c_recursion_remaining > (Py_TRASHCAN_HEADROOM*2)) { \ - _PyTrash_thread_destroy_chain(tstate); \ - } \ -} while (0); + assert(1); \ + } while (0); PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj); diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-27-15-41-16.gh-issue-124715.uzszn9.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-27-15-41-16.gh-issue-124715.uzszn9.rst new file mode 100644 index 00000000000000..2dc803ac2293af --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-27-15-41-16.gh-issue-124715.uzszn9.rst @@ -0,0 +1,7 @@ +Protect against stack overflows in tp_dealloc for all types that implement +GC (i.e. the "trashcan" mechanism). The `Py_TRASHCAN_BEGIN` and +`Py_TRASHCAN_END` macros are now only present for backwards compatibility and +don't need to be used. A suble API change is that tp_dealloc methods are now +called with GC objects untracked. Previously they would be tracked if the type +had them originally tracked. `PyObject_CallFinalizerFromDealloc()` will now +track GC objects if they are resurrected. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index bc5fff54a62b6d..4224d4ce0a0865 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -412,9 +412,12 @@ buffered_dealloc(buffered *self) { PyTypeObject *tp = Py_TYPE(self); self->finalizing = 1; - if (_PyIOBase_finalize((PyObject *) self) < 0) + if (_PyIOBase_finalize((PyObject *) self) < 0) { + // if the object is resurrected, it will be tracked again by + // PyObject_CallFinalizerFromDealloc() + assert(_PyObject_GC_IS_TRACKED(self)); return; - _PyObject_GC_UNTRACK(self); + } self->ok = 0; if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)self); @@ -2298,7 +2301,6 @@ static void bufferedrwpair_dealloc(rwpair *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)self); (void)bufferedrwpair_clear(self); diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index fb66d3db0f7a1f..8e1d4d5c57e3b0 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -886,7 +886,6 @@ static void bytesio_dealloc(bytesio *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); if (self->exports > 0) { PyErr_SetString(PyExc_SystemError, "deallocated BytesIO object has exported buffers"); diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 8dae465fd20f8b..b92fe07cd45a31 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -558,9 +558,12 @@ fileio_dealloc(fileio *self) { PyTypeObject *tp = Py_TYPE(self); self->finalizing = 1; - if (_PyIOBase_finalize((PyObject *) self) < 0) + if (_PyIOBase_finalize((PyObject *) self) < 0) { + // if the object is resurrected, it will be tracked again by + // PyObject_CallFinalizerFromDealloc() + assert(_PyObject_GC_IS_TRACKED(self)); return; - _PyObject_GC_UNTRACK(self); + } if (self->stat_atopen != NULL) { PyMem_Free(self->stat_atopen); self->stat_atopen = NULL; diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index 419e5516b5c11e..68f855979caf47 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -372,10 +372,12 @@ iobase_dealloc(iobase *self) if (_PyType_HasFeature(Py_TYPE(self), Py_TPFLAGS_HEAPTYPE)) { Py_INCREF(Py_TYPE(self)); } + // if the object is resurrected, it will be tracked again by + // PyObject_CallFinalizerFromDealloc() + assert(_PyObject_GC_IS_TRACKED(self)); return; } PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); Py_CLEAR(self->dict); diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 6d48bcb552b4bf..5946109d3ed105 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -609,7 +609,6 @@ static void stringio_dealloc(stringio *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); self->ok = 0; if (self->buf) { PyMem_Free(self->buf); diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 439e26c5271939..8b2128b134d11d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -284,7 +284,6 @@ static void incrementalnewlinedecoder_dealloc(nldecoder_object *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); (void)incrementalnewlinedecoder_clear(self); tp->tp_free((PyObject *)self); Py_DECREF(tp); @@ -1456,10 +1455,13 @@ textiowrapper_dealloc(textio *self) { PyTypeObject *tp = Py_TYPE(self); self->finalizing = 1; - if (_PyIOBase_finalize((PyObject *) self) < 0) + if (_PyIOBase_finalize((PyObject *) self) < 0) { + // if the object is resurrected, it will be tracked again by + // PyObject_CallFinalizerFromDealloc() + assert(_PyObject_GC_IS_TRACKED(self)); return; + } self->ok = 0; - _PyObject_GC_UNTRACK(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)self); (void)textiowrapper_clear(self); diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index ec5c298066a587..58d75fcc363ad4 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -463,9 +463,12 @@ winconsoleio_dealloc(winconsoleio *self) { PyTypeObject *tp = Py_TYPE(self); self->finalizing = 1; - if (_PyIOBase_finalize((PyObject *) self) < 0) + if (_PyIOBase_finalize((PyObject *) self) < 0) { + // if the object is resurrected, it will be tracked again by + // PyObject_CallFinalizerFromDealloc() + assert(_PyObject_GC_IS_TRACKED(self)); return; - _PyObject_GC_UNTRACK(self); + } if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); Py_CLEAR(self->dict); diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index a80e4670665a22..6c7b57ae308440 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2440,7 +2440,6 @@ typedef struct { static void bytearrayiter_dealloc(bytesiterobject *it) { - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ba6636808d90e0..ce2a84100e3069 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3223,7 +3223,6 @@ typedef struct { static void striter_dealloc(striterobject *it) { - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } diff --git a/Objects/cellobject.c b/Objects/cellobject.c index b1154e4ca4ace6..3d5e4268b90401 100644 --- a/Objects/cellobject.c +++ b/Objects/cellobject.c @@ -74,7 +74,6 @@ PyCell_Set(PyObject *op, PyObject *value) static void cell_dealloc(PyCellObject *op) { - _PyObject_GC_UNTRACK(op); Py_XDECREF(op->ob_ref); PyObject_GC_Del(op); } diff --git a/Objects/classobject.c b/Objects/classobject.c index 69a7d5f046e30d..d6f61085d40b6b 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -237,7 +237,6 @@ method_new_impl(PyTypeObject *type, PyObject *function, PyObject *instance) static void method_dealloc(PyMethodObject *im) { - _PyObject_GC_UNTRACK(im); if (im->im_weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)im); Py_DECREF(im->im_func); @@ -432,7 +431,6 @@ instancemethod_getattro(PyObject *self, PyObject *name) static void instancemethod_dealloc(PyObject *self) { - _PyObject_GC_UNTRACK(self); Py_DECREF(PyInstanceMethod_GET_FUNCTION(self)); PyObject_GC_Del(self); } diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 4eccd1704eb95a..69d909e7060e60 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -22,7 +22,6 @@ static void descr_dealloc(PyObject *self) { PyDescrObject *descr = (PyDescrObject *)self; - _PyObject_GC_UNTRACK(descr); Py_XDECREF(descr->d_type); Py_XDECREF(descr->d_name); Py_XDECREF(descr->d_qualname); @@ -1187,7 +1186,6 @@ static void mappingproxy_dealloc(PyObject *self) { mappingproxyobject *pp = (mappingproxyobject *)self; - _PyObject_GC_UNTRACK(pp); Py_DECREF(pp->mapping); PyObject_GC_Del(pp); } @@ -1633,7 +1631,6 @@ property_dealloc(PyObject *self) { propertyobject *gs = (propertyobject *)self; - _PyObject_GC_UNTRACK(self); Py_XDECREF(gs->prop_get); Py_XDECREF(gs->prop_set); Py_XDECREF(gs->prop_del); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f38ab1b2865e99..fd047c9ecddb2f 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5015,8 +5015,6 @@ static void dictiter_dealloc(PyObject *self) { dictiterobject *di = (dictiterobject *)self; - /* bpo-31095: UnTrack is needed before calling any callbacks */ - _PyObject_GC_UNTRACK(di); Py_XDECREF(di->di_dict); Py_XDECREF(di->di_result); PyObject_GC_Del(di); @@ -5815,8 +5813,6 @@ static void dictview_dealloc(PyObject *self) { _PyDictViewObject *dv = (_PyDictViewObject *)self; - /* bpo-31095: UnTrack is needed before calling any callbacks */ - _PyObject_GC_UNTRACK(dv); Py_XDECREF(dv->dv_dict); PyObject_GC_Del(dv); } diff --git a/Objects/exceptions.c b/Objects/exceptions.c index fda62f159c1540..117b62d09acb1b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -698,7 +698,6 @@ SystemExit_clear(PySystemExitObject *self) static void SystemExit_dealloc(PySystemExitObject *self) { - _PyObject_GC_UNTRACK(self); SystemExit_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -887,7 +886,6 @@ BaseExceptionGroup_clear(PyBaseExceptionGroupObject *self) static void BaseExceptionGroup_dealloc(PyBaseExceptionGroupObject *self) { - _PyObject_GC_UNTRACK(self); BaseExceptionGroup_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1605,7 +1603,6 @@ ImportError_clear(PyImportErrorObject *self) static void ImportError_dealloc(PyImportErrorObject *self) { - _PyObject_GC_UNTRACK(self); ImportError_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1995,7 +1992,6 @@ OSError_clear(PyOSErrorObject *self) static void OSError_dealloc(PyOSErrorObject *self) { - _PyObject_GC_UNTRACK(self); OSError_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -2266,7 +2262,6 @@ NameError_clear(PyNameErrorObject *self) static void NameError_dealloc(PyNameErrorObject *self) { - _PyObject_GC_UNTRACK(self); NameError_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -2342,7 +2337,6 @@ AttributeError_clear(PyAttributeErrorObject *self) static void AttributeError_dealloc(PyAttributeErrorObject *self) { - _PyObject_GC_UNTRACK(self); AttributeError_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -2480,7 +2474,6 @@ SyntaxError_clear(PySyntaxErrorObject *self) static void SyntaxError_dealloc(PySyntaxErrorObject *self) { - _PyObject_GC_UNTRACK(self); SyntaxError_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -2930,7 +2923,6 @@ UnicodeError_clear(PyUnicodeErrorObject *self) static void UnicodeError_dealloc(PyUnicodeErrorObject *self) { - _PyObject_GC_UNTRACK(self); UnicodeError_clear(self); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -3389,8 +3381,6 @@ _PyErr_NoMemory(PyThreadState *tstate) static void MemoryError_dealloc(PyBaseExceptionObject *self) { - _PyObject_GC_UNTRACK(self); - BaseException_clear(self); /* If this is a subclass of MemoryError, we don't need to diff --git a/Objects/frameobject.c b/Objects/frameobject.c index f3a66ffc9aac8f..18496fbd103f83 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1637,10 +1637,6 @@ frame_dealloc(PyFrameObject *f) /* It is the responsibility of the owning generator/coroutine * to have cleared the generator pointer */ - if (_PyObject_GC_IS_TRACKED(f)) { - _PyObject_GC_UNTRACK(f); - } - Py_TRASHCAN_BEGIN(f, frame_dealloc); /* GH-106092: If f->f_frame was on the stack and we reached the maximum * nesting depth for deallocations, the trashcan may have delayed this diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 8df0da800980a9..d8fa101c531d3b 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1049,10 +1049,15 @@ func_dealloc(PyFunctionObject *op) handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); if (Py_REFCNT(op) > 1) { Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + // Ensure it's tracked again before we return. _Py_Dealloc untracks + // it but, when it's resurrected, it needs to be re-tracked to avoid + // memory leaks in the case of cycles. + if (!_PyObject_GC_IS_TRACKED(op)) { + PyObject_GC_Track(op); + } return; } Py_SET_REFCNT(op, 0); - _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); } @@ -1254,7 +1259,6 @@ typedef struct { static void cm_dealloc(classmethod *cm) { - _PyObject_GC_UNTRACK((PyObject *)cm); Py_XDECREF(cm->cm_callable); Py_XDECREF(cm->cm_dict); Py_TYPE(cm)->tp_free((PyObject *)cm); @@ -1473,7 +1477,6 @@ typedef struct { static void sm_dealloc(staticmethod *sm) { - _PyObject_GC_UNTRACK((PyObject *)sm); Py_XDECREF(sm->sm_callable); Py_XDECREF(sm->sm_dict); Py_TYPE(sm)->tp_free((PyObject *)sm); diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index 64b4e2645cbaee..10296e25050039 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -31,7 +31,6 @@ ga_dealloc(PyObject *self) { gaobject *alias = (gaobject *)self; - _PyObject_GC_UNTRACK(self); if (alias->weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *)alias); } diff --git a/Objects/genobject.c b/Objects/genobject.c index 41cf8fdcc9dee8..b81753dcf7f356 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -127,17 +127,15 @@ gen_dealloc(PyGenObject *gen) { PyObject *self = (PyObject *) gen; - _PyObject_GC_UNTRACK(gen); - if (gen->gi_weakreflist != NULL) PyObject_ClearWeakRefs(self); - _PyObject_GC_TRACK(self); + _PyObject_GC_TRACK(self); // _Py_Dealloc calls untrack if (PyObject_CallFinalizerFromDealloc(self)) return; /* resurrected. :( */ - _PyObject_GC_UNTRACK(self); + _PyObject_GC_UNTRACK(self); // tp_del requires this if (PyAsyncGen_CheckExact(gen)) { /* We have to handle this case for asynchronous generators right here, because this code has to be between UNTRACK @@ -1222,7 +1220,6 @@ PyTypeObject PyCoro_Type = { static void coro_wrapper_dealloc(PyCoroWrapper *cw) { - _PyObject_GC_UNTRACK((PyObject *)cw); Py_CLEAR(cw->cw_coroutine); PyObject_GC_Del(cw); } @@ -1691,7 +1688,6 @@ async_gen_asend_dealloc(PyAsyncGenASend *o) return; } - _PyObject_GC_UNTRACK((PyObject *)o); Py_CLEAR(o->ags_gen); Py_CLEAR(o->ags_sendval); @@ -1913,7 +1909,6 @@ async_gen_asend_new(PyAsyncGenObject *gen, PyObject *sendval) static void async_gen_wrapped_val_dealloc(_PyAsyncGenWrappedValue *o) { - _PyObject_GC_UNTRACK((PyObject *)o); Py_CLEAR(o->agw_val); _Py_FREELIST_FREE(async_gens, o, PyObject_GC_Del); } @@ -1998,10 +1993,12 @@ static void async_gen_athrow_dealloc(PyAsyncGenAThrow *o) { if (PyObject_CallFinalizerFromDealloc((PyObject *)o)) { + // if the object is resurrected, it will be tracked again by + // PyObject_CallFinalizerFromDealloc() + assert(_PyObject_GC_IS_TRACKED(o)); return; } - _PyObject_GC_UNTRACK((PyObject *)o); Py_CLEAR(o->agt_gen); Py_CLEAR(o->agt_args); PyObject_GC_Del(o); diff --git a/Objects/iterobject.c b/Objects/iterobject.c index 135ced9ea1f268..8d8b9996828ecb 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -33,7 +33,6 @@ PySeqIter_New(PyObject *seq) static void iter_dealloc(seqiterobject *it) { - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } @@ -197,7 +196,6 @@ PyCallIter_New(PyObject *callable, PyObject *sentinel) static void calliter_dealloc(calliterobject *it) { - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_callable); Py_XDECREF(it->it_sentinel); PyObject_GC_Del(it); @@ -307,7 +305,6 @@ typedef struct { static void anextawaitable_dealloc(anextawaitableobject *obj) { - _PyObject_GC_UNTRACK(obj); Py_XDECREF(obj->wrapped); Py_XDECREF(obj->default_value); PyObject_GC_Del(obj); diff --git a/Objects/listobject.c b/Objects/listobject.c index 8abe15d6674140..338a5b1e3c32b0 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3854,7 +3854,6 @@ static void listiter_dealloc(PyObject *self) { _PyListIterObject *it = (_PyListIterObject *)self; - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index a2472d4873641d..b4113b281c19f9 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -112,7 +112,6 @@ mbuf_release(_PyManagedBufferObject *self) self->flags |= _Py_MANAGED_BUFFER_RELEASED; /* PyBuffer_Release() decrements master->obj and sets it to NULL. */ - _PyObject_GC_UNTRACK(self); PyBuffer_Release(&self->master); } @@ -1141,7 +1140,6 @@ memory_dealloc(PyObject *_self) { PyMemoryViewObject *self = (PyMemoryViewObject *)_self; assert(self->exports == 0); - _PyObject_GC_UNTRACK(self); _memory_release(self); Py_CLEAR(self->mbuf); if (self->weakreflist != NULL) @@ -3307,7 +3305,6 @@ static void memoryiter_dealloc(PyObject *self) { memoryiterobject *it = (memoryiterobject *)self; - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } diff --git a/Objects/object.c b/Objects/object.c index 8a819dd336e421..46781fb415fcfc 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -526,9 +526,17 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) * never happened. */ _Py_ResurrectReference(self); - _PyObject_ASSERT(self, - (!_PyType_IS_GC(Py_TYPE(self)) - || _PyObject_GC_IS_TRACKED(self))); + if (_PyObject_IS_GC(self)) { + /* If we get here, it must have originally been tracked when + * _Py_Dealloc was called. Make it tracked again so it can't become + * part of a reference cycle and leak. Some types expect the object + * to be tracked when this returns non-zero. + */ + if (!_PyObject_GC_IS_TRACKED(self)) { + _PyObject_GC_TRACK(self); + } + } + return -1; } @@ -2748,7 +2756,35 @@ Py_ReprLeave(PyObject *obj) PyErr_SetRaisedException(exc); } -/* Trashcan support. */ +/* Trashcan mechanism, thanks to Christian Tismer for the original. + +When deallocating a container object, it's possible to trigger an +unbounded chain of deallocations, as each Py_DECREF in turn drops the +refcount on "the next" object in the chain to 0. This can easily lead +to stack overflows, especially in threads (which typically have less +stack space to work with). + +We avoid this by checking the nesting level of dealloc_gc() calls. + +How it works: Entering dealloc_gc() increments a call-depth counter. +So long as this counter is small, the dealloc method of the type is run +directly without further ado. But if the counter gets large, it instead +adds the object to a list of objects to be deallocated later and skips +calling the deallocator. In that case, dealloc_gc() returns without +deallocating anything (and so unbounded call-stack depth is avoided). + +When the call stack finishes unwinding again, code generated by the END +macro notices this, and calls another routine to deallocate all the +objects that may have been added to the list of deferred deallocations. +In effect, a chain of N deallocations is broken into +(N-1)/(Py_TRASHCAN_HEADROOM-1) pieces, with the call stack never +exceeding a depth of Py_TRASHCAN_HEADROOM. +*/ + +/* To avoid raising recursion errors during dealloc trigger trashcan before we reach + * recursion limit. To avoid trashing, we don't attempt to empty the trashcan until + * we have headroom above the trigger limit */ +#define Py_TRASHCAN_HEADROOM 50 /* Add op to the gcstate->trash_delete_later list. Called when the current * call-stack depth gets large. op must be a currently untracked gc'ed @@ -2758,6 +2794,11 @@ void _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, _PyObject_IS_GC(op)); + /* untrack is done by dealloc_gc()() so the following assert must be + * true. Previously Py_TRASHCAN_BEGIN() did not untrack itself and it was + * the responsibility of the users of the trashcan mechanism to ensure that + * untrack was called first. + */ _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); _PyObject_ASSERT(op, Py_REFCNT(op) == 0); #ifdef Py_GIL_DISABLED @@ -2810,6 +2851,47 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) tstate->c_recursion_remaining++; } +static void +dealloc_gc(PyObject *op) +{ + if (_PyObject_GC_IS_TRACKED(op)) { + // The trashcan list re-uses the GC prev pointer and so we must + // untrack the object first, if it is tracked. + // + // It used to be the case that tp_dealloc was called with the + // object tracked (if it originally was) and the method had to + // make sure to call untrack before calling tp_free. Now we do + // the untrack here. Types using the private API need to make + // sure not to call _PyObject_GC_UNTRACK() since that will fail + // if the object is already untracked. Types using the public + // API are safe since PyObject_GC_UnTrack() can be called with + // the object already untracked. + _PyObject_GC_UNTRACK(op); + } + PyThreadState *tstate = PyThreadState_Get(); + if (tstate->c_recursion_remaining <= Py_TRASHCAN_HEADROOM) { + /* Store the object (to be deallocated later) */ + _PyTrash_thread_deposit_object(tstate, (PyObject *)op); + } + else { + tstate->c_recursion_remaining--; + destructor dealloc = Py_TYPE(op)->tp_dealloc; + (*dealloc)(op); + tstate->c_recursion_remaining++; + if (tstate->delete_later && tstate->c_recursion_remaining > (Py_TRASHCAN_HEADROOM*2)) { + _PyTrash_thread_destroy_chain(tstate); + } + } +} + +static void +dealloc_simple(PyObject *op) +{ + PyTypeObject *type = Py_TYPE(op); + destructor dealloc = type->tp_dealloc; + (*dealloc)(op); +} + void _Py_NO_RETURN _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg, const char *file, int line, const char *function) @@ -2861,11 +2943,11 @@ _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg, } +/* Called by Py_DECREF() when refcount goes to zero. */ void _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); - destructor dealloc = type->tp_dealloc; #ifdef Py_DEBUG PyThreadState *tstate = _PyThreadState_GET(); PyObject *old_exc = tstate != NULL ? tstate->current_exception : NULL; @@ -2885,7 +2967,18 @@ _Py_Dealloc(PyObject *op) #ifdef Py_TRACE_REFS _Py_ForgetReference(op); #endif - (*dealloc)(op); + if (!_PyType_IS_GC(type)) { + // definitely non-GC object, just call tp_dealloc + dealloc_simple(op); + } + else if (type->tp_is_gc == NULL || type->tp_is_gc(op)) { + // supports GC track, gets trashcan protection for tp_dealloc + dealloc_gc(op); + } + else { + // actually doesn't support GC tracking + dealloc_simple(op); + } #ifdef Py_DEBUG // gh-89373: The tp_dealloc function must leave the current exception diff --git a/Objects/odictobject.c b/Objects/odictobject.c index e151023dd764bf..e54475916610b5 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1635,7 +1635,6 @@ typedef struct { static void odictiter_dealloc(odictiterobject *di) { - _PyObject_GC_UNTRACK(di); Py_XDECREF(di->di_odict); Py_XDECREF(di->di_current); if ((di->kind & _odict_ITER_ITEMS) == _odict_ITER_ITEMS) { diff --git a/Objects/setobject.c b/Objects/setobject.c index c5f96d25585fa4..b8b0fc8c64153c 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -787,7 +787,6 @@ static void setiter_dealloc(setiterobject *si) { /* bpo-31095: UnTrack is needed before calling any callbacks */ - _PyObject_GC_UNTRACK(si); Py_XDECREF(si->si_set); PyObject_GC_Del(si); } diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 1b6d35998c2b69..df5b510ccd96a2 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -343,7 +343,6 @@ Create a slice object. This is used for extended slicing (e.g. a[0:10:2])."); static void slice_dealloc(PySliceObject *r) { - _PyObject_GC_UNTRACK(r); Py_DECREF(r->step); Py_DECREF(r->start); Py_DECREF(r->stop); diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index f14f10ab9c0a46..cb6dcd529732d0 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -986,7 +986,6 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) static void tupleiter_dealloc(_PyTupleIterObject *it) { - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3368c1ef577d14..42fbfc767fc5af 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2434,10 +2434,12 @@ subtype_dealloc(PyObject *self) return; } if (type->tp_del) { + _PyObject_GC_TRACK(self); // _Py_Dealloc untracks type->tp_del(self); if (Py_REFCNT(self) > 0) { return; } + _PyObject_GC_UNTRACK(self); } /* Find the nearest base with a different tp_dealloc */ @@ -2475,11 +2477,6 @@ subtype_dealloc(PyObject *self) /* We get here only if the type has GC */ - /* UnTrack and re-Track around the trashcan macro, alas */ - /* See explanation at end of function for full disclosure */ - PyObject_GC_UnTrack(self); - Py_TRASHCAN_BEGIN(self, subtype_dealloc); - /* Find the nearest base with a different tp_dealloc */ base = type; while ((/*basedealloc =*/ base->tp_dealloc) == subtype_dealloc) { @@ -2490,12 +2487,10 @@ subtype_dealloc(PyObject *self) has_finalizer = type->tp_finalize || type->tp_del; if (type->tp_finalize) { - _PyObject_GC_TRACK(self); if (PyObject_CallFinalizerFromDealloc(self) < 0) { /* Resurrected */ - goto endlabel; + return; } - _PyObject_GC_UNTRACK(self); } /* If we added a weaklist, we clear it. Do this *before* calling tp_del, @@ -2511,11 +2506,11 @@ subtype_dealloc(PyObject *self) } if (type->tp_del) { - _PyObject_GC_TRACK(self); + _PyObject_GC_TRACK(self); // _Py_Dealloc untracks type->tp_del(self); if (Py_REFCNT(self) > 0) { /* Resurrected */ - goto endlabel; + return; } _PyObject_GC_UNTRACK(self); } @@ -2556,13 +2551,6 @@ subtype_dealloc(PyObject *self) /* Extract the type again; tp_del may have changed it */ type = Py_TYPE(self); - /* Call the base tp_dealloc(); first retrack self if - * basedealloc knows about gc. - */ - if (_PyType_IS_GC(base)) { - _PyObject_GC_TRACK(self); - } - // Don't read type memory after calling basedealloc() since basedealloc() // can deallocate the type and free its memory. int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE @@ -2578,46 +2566,6 @@ subtype_dealloc(PyObject *self) if (type_needs_decref) { _Py_DECREF_TYPE(type); } - - endlabel: - Py_TRASHCAN_END - - /* Explanation of the weirdness around the trashcan macros: - - Q. What do the trashcan macros do? - - A. Read the comment titled "Trashcan mechanism" in object.h. - For one, this explains why there must be a call to GC-untrack - before the trashcan begin macro. Without understanding the - trashcan code, the answers to the following questions don't make - sense. - - Q. Why do we GC-untrack before the trashcan and then immediately - GC-track again afterward? - - A. In the case that the base class is GC-aware, the base class - probably GC-untracks the object. If it does that using the - UNTRACK macro, this will crash when the object is already - untracked. Because we don't know what the base class does, the - only safe thing is to make sure the object is tracked when we - call the base class dealloc. But... The trashcan begin macro - requires that the object is *untracked* before it is called. So - the dance becomes: - - GC untrack - trashcan begin - GC track - - Q. Why did the last question say "immediately GC-track again"? - It's nowhere near immediately. - - A. Because the code *used* to re-track immediately. Bad Idea. - self has a refcount of 0, and if gc ever gets its hands on it - (which can happen if any weakref callback gets invoked), it - looks like trash to gc too, and gc also tries to delete self - then. But we're already deleting self. Double deallocation is - a subtle disaster. - */ } static PyTypeObject *solid_base(PyTypeObject *type); @@ -6048,7 +5996,6 @@ type_dealloc(PyObject *self) // Assert this is a heap-allocated type object _PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE); - _PyObject_GC_UNTRACK(type); type_dealloc_common(type); // PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0 @@ -10237,7 +10184,6 @@ bufferwrapper_dealloc(PyObject *self) { PyBufferWrapper *bw = (PyBufferWrapper *)self; - _PyObject_GC_UNTRACK(self); Py_XDECREF(bw->mv); Py_XDECREF(bw->obj); Py_TYPE(self)->tp_free(self); @@ -11387,7 +11333,6 @@ super_dealloc(PyObject *self) { superobject *su = (superobject *)self; - _PyObject_GC_UNTRACK(self); Py_XDECREF(su->obj); Py_XDECREF(su->type); Py_XDECREF(su->obj_type); diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 51d93ed8b5ba8c..d745fa4fa26b82 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -127,8 +127,6 @@ constevaluator_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); constevaluatorobject *ce = (constevaluatorobject *)self; - _PyObject_GC_UNTRACK(self); - Py_XDECREF(ce->value); Py_TYPE(self)->tp_free(self); @@ -455,8 +453,6 @@ typevar_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); typevarobject *tv = (typevarobject *)self; - _PyObject_GC_UNTRACK(self); - Py_DECREF(tv->name); Py_XDECREF(tv->bound); Py_XDECREF(tv->evaluate_bound); @@ -924,8 +920,6 @@ paramspecattr_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); paramspecattrobject *psa = (paramspecattrobject *)self; - _PyObject_GC_UNTRACK(self); - Py_XDECREF(psa->__origin__); Py_TYPE(self)->tp_free(self); @@ -1147,8 +1141,6 @@ paramspec_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); paramspecobject *ps = (paramspecobject *)self; - _PyObject_GC_UNTRACK(self); - Py_DECREF(ps->name); Py_XDECREF(ps->bound); Py_XDECREF(ps->default_value); @@ -1491,7 +1483,6 @@ static void typevartuple_dealloc(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); typevartupleobject *tvt = (typevartupleobject *)self; Py_DECREF(tvt->name); @@ -1803,7 +1794,6 @@ static void typealias_dealloc(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); typealiasobject *ta = (typealiasobject *)self; Py_DECREF(ta->name); Py_XDECREF(ta->type_params); @@ -2190,7 +2180,6 @@ static void generic_dealloc(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); - _PyObject_GC_UNTRACK(self); Py_TYPE(self)->tp_free(self); Py_DECREF(tp); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0f502ccdaf5767..93e1ce51a458f3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15737,7 +15737,6 @@ typedef struct { static void unicodeiter_dealloc(unicodeiterobject *it) { - _PyObject_GC_UNTRACK(it); Py_XDECREF(it->it_seq); PyObject_GC_Del(it); } diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 6e65a653a95c46..eecab84d5a2be0 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -19,8 +19,6 @@ unionobject_dealloc(PyObject *self) { unionobject *alias = (unionobject *)self; - _PyObject_GC_UNTRACK(self); - Py_XDECREF(alias->args); Py_XDECREF(alias->parameters); Py_TYPE(self)->tp_free(self); diff --git a/Python/ceval.c b/Python/ceval.c index 6e62939adb3745..6cc93ae03a5b00 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -84,8 +84,7 @@ } \ _Py_DECREF_STAT_INC(); \ if (--op->ob_refcnt == 0) { \ - destructor dealloc = Py_TYPE(op)->tp_dealloc; \ - (*dealloc)(op); \ + _Py_Dealloc(op); \ } \ } while (0) diff --git a/Python/context.c b/Python/context.c index ddb03555f9e402..c6bc11134ce0a3 100644 --- a/Python/context.c +++ b/Python/context.c @@ -519,8 +519,6 @@ context_tp_traverse(PyContext *self, visitproc visit, void *arg) static void context_tp_dealloc(PyContext *self) { - _PyObject_GC_UNTRACK(self); - if (self->ctx_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject*)self); } diff --git a/Python/optimizer.c b/Python/optimizer.c index 978649faa04d45..0baec7afec83d6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -254,7 +254,6 @@ static void unlink_executor(_PyExecutorObject *executor); static void uop_dealloc(_PyExecutorObject *self) { - _PyObject_GC_UNTRACK(self); assert(self->vm_data.code == NULL); unlink_executor(self); #ifdef _Py_JIT