From 628f6eb0035db9d716554bf53ce164a88b652eed Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 16 Nov 2023 16:41:21 -0500 Subject: [PATCH 1/4] gh-111924: Use PyMutex for runtime global locks. This replaces some usages of PyThread_type_lock with PyMutex, which does not require memory allocation to initialize. --- Include/internal/pycore_atexit.h | 5 +- Include/internal/pycore_ceval.h | 3 +- Include/internal/pycore_ceval_state.h | 3 +- Include/internal/pycore_crossinterp.h | 3 +- Include/internal/pycore_import.h | 3 +- Include/internal/pycore_lock.h | 10 ++ Include/internal/pycore_pymem.h | 5 +- Include/internal/pycore_pystate.h | 4 +- Include/internal/pycore_runtime.h | 4 +- Include/internal/pycore_unicodeobject.h | 3 +- Objects/obmalloc.c | 61 +++-------- Objects/unicodeobject.c | 4 +- Python/ceval_gil.c | 37 ++----- Python/crossinterp.c | 28 +---- Python/import.c | 10 +- Python/pylifecycle.c | 14 +-- Python/pystate.c | 133 +++--------------------- Python/sysmodule.c | 12 +-- 18 files changed, 91 insertions(+), 251 deletions(-) diff --git a/Include/internal/pycore_atexit.h b/Include/internal/pycore_atexit.h index 3966df70e2616f..4dcda8f517c787 100644 --- a/Include/internal/pycore_atexit.h +++ b/Include/internal/pycore_atexit.h @@ -1,5 +1,8 @@ #ifndef Py_INTERNAL_ATEXIT_H #define Py_INTERNAL_ATEXIT_H + +#include "pycore_lock.h" // PyMutex + #ifdef __cplusplus extern "C" { #endif @@ -15,7 +18,7 @@ extern "C" { typedef void (*atexit_callbackfunc)(void); struct _atexit_runtime_state { - PyThread_type_lock mutex; + PyMutex mutex; #define NEXITFUNCS 32 atexit_callbackfunc callbacks[NEXITFUNCS]; int ncallbacks; diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index c372b7224fb047..eeaf25b25e551f 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -41,8 +41,7 @@ PyAPI_FUNC(int) _PyEval_MakePendingCalls(PyThreadState *); #endif extern void _Py_FinishPendingCalls(PyThreadState *tstate); -extern void _PyEval_InitState(PyInterpreterState *, PyThread_type_lock); -extern void _PyEval_FiniState(struct _ceval_state *ceval); +extern void _PyEval_InitState(PyInterpreterState *); extern void _PyEval_SignalReceived(PyInterpreterState *interp); // bitwise flags: diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 072bbcda0c3c82..28738980eb49be 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex #include "pycore_gil.h" // struct _gil_runtime_state @@ -15,7 +16,7 @@ typedef int (*_Py_pending_call_func)(void *); struct _pending_calls { int busy; - PyThread_type_lock lock; + PyMutex mutex; /* Request for running pending calls. */ int32_t calls_to_do; #define NPENDINGCALLS 32 diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index ee9ff0090c2484..41e685d4668ddd 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex #include "pycore_pyerrors.h" @@ -128,7 +129,7 @@ struct _xidregitem { struct _xidregistry { int global; /* builtin types or heap types */ int initialized; - PyThread_type_lock mutex; + PyMutex mutex; struct _xidregitem *head; }; diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index 117e46bb86285d..c84f87a831bf38 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -9,6 +9,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex #include "pycore_hashtable.h" // _Py_hashtable_t #include "pycore_time.h" // _PyTime_t @@ -47,7 +48,7 @@ struct _import_runtime_state { Py_ssize_t last_module_index; struct { /* A lock to guard the cache. */ - PyThread_type_lock mutex; + PyMutex mutex; /* The actual cache of (filename, name, PyModuleDef) for modules. Only legacy (single-phase init) extension modules are added and only if they support multiple initialization (m_size >- 0) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 25c3cf5377b778..bd455ea57c521b 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -108,6 +108,16 @@ typedef enum _PyLockFlags { extern PyLockStatus _PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout_ns, _PyLockFlags flags); +// Lock a mutex with aditional options. See _PyLockFlags for details. +static inline void +PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) +{ + uint8_t expected = _Py_UNLOCKED; + if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) { + _PyMutex_LockTimed(m, -1, flags); + } +} + // Unlock a mutex, returns 0 if the mutex is not locked (used for improved // error messages). extern int _PyMutex_TryUnlock(PyMutex *m); diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 6b5113714dbeb2..8631ca34a5e616 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -1,5 +1,8 @@ #ifndef Py_INTERNAL_PYMEM_H #define Py_INTERNAL_PYMEM_H + +#include "pycore_lock.h" // PyMutex + #ifdef __cplusplus extern "C" { #endif @@ -30,7 +33,7 @@ typedef struct { } debug_alloc_api_t; struct _pymem_allocators { - PyThread_type_lock mutex; + PyMutex mutex; struct { PyMemAllocatorEx raw; PyMemAllocatorEx mem; diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 7fa952e371d7b4..c031a38cd6bfa3 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -220,9 +220,9 @@ PyAPI_FUNC(int) _PyState_AddModule( extern int _PyOS_InterruptOccurred(PyThreadState *tstate); #define HEAD_LOCK(runtime) \ - PyThread_acquire_lock((runtime)->interpreters.mutex, WAIT_LOCK) + PyMutex_LockFlags(&(runtime)->interpreters.mutex, _Py_LOCK_DONT_DETACH) #define HEAD_UNLOCK(runtime) \ - PyThread_release_lock((runtime)->interpreters.mutex) + PyMutex_Unlock(&(runtime)->interpreters.mutex) // Get the configuration of the current interpreter. // The caller must hold the GIL. diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index e6efe8b646e86f..d7d5b4c4df3824 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -174,7 +174,7 @@ typedef struct pyruntimestate { unsigned long _finalizing_id; struct pyinterpreters { - PyThread_type_lock mutex; + PyMutex mutex; /* The linked list of interpreters, newest first. */ PyInterpreterState *head; /* The runtime's initial interpreter, which has a special role @@ -236,7 +236,7 @@ typedef struct pyruntimestate { Py_OpenCodeHookFunction open_code_hook; void *open_code_userdata; struct { - PyThread_type_lock mutex; + PyMutex mutex; _Py_AuditHookEntry *head; } audit_hooks; diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index a0d00af92e0f5d..7ee540154b23d8 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex #include "pycore_fileutils.h" // _Py_error_handler #include "pycore_identifier.h" // _Py_Identifier #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI @@ -277,7 +278,7 @@ extern PyTypeObject _PyUnicodeASCIIIter_Type; /* --- Other API ---------------------------------------------------------- */ struct _Py_unicode_runtime_ids { - PyThread_type_lock lock; + PyMutex mutex; // next_index value must be preserved when Py_Initialize()/Py_Finalize() // is called multiple times: see _PyUnicode_FromId() implementation. Py_ssize_t next_index; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2761c774209786..b737c030957564 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -329,13 +329,9 @@ int _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *old_alloc) { - if (ALLOCATORS_MUTEX == NULL) { - /* The runtime must be initializing. */ - return set_default_allocator_unlocked(domain, pydebug, old_alloc); - } - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); int res = set_default_allocator_unlocked(domain, pydebug, old_alloc); - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); return res; } @@ -467,9 +463,9 @@ set_up_allocators_unlocked(PyMemAllocatorName allocator) int _PyMem_SetupAllocators(PyMemAllocatorName allocator) { - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); int res = set_up_allocators_unlocked(allocator); - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); return res; } @@ -554,9 +550,9 @@ get_current_allocator_name_unlocked(void) const char* _PyMem_GetCurrentAllocatorName(void) { - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); const char *name = get_current_allocator_name_unlocked(); - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); return name; } @@ -653,14 +649,9 @@ set_up_debug_hooks_unlocked(void) void PyMem_SetupDebugHooks(void) { - if (ALLOCATORS_MUTEX == NULL) { - /* The runtime must not be completely initialized yet. */ - set_up_debug_hooks_unlocked(); - return; - } - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); set_up_debug_hooks_unlocked(); - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); } static void @@ -696,53 +687,33 @@ set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) void PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { - if (ALLOCATORS_MUTEX == NULL) { - /* The runtime must not be completely initialized yet. */ - get_allocator_unlocked(domain, allocator); - return; - } - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); get_allocator_unlocked(domain, allocator); - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); } void PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { - if (ALLOCATORS_MUTEX == NULL) { - /* The runtime must not be completely initialized yet. */ - set_allocator_unlocked(domain, allocator); - return; - } - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); set_allocator_unlocked(domain, allocator); - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); } void PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) { - if (ALLOCATORS_MUTEX == NULL) { - /* The runtime must not be completely initialized yet. */ - *allocator = _PyObject_Arena; - return; - } - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); *allocator = _PyObject_Arena; - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); } void PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) { - if (ALLOCATORS_MUTEX == NULL) { - /* The runtime must not be completely initialized yet. */ - _PyObject_Arena = *allocator; - return; - } - PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK); + PyMutex_Lock(&ALLOCATORS_MUTEX); _PyObject_Arena = *allocator; - PyThread_release_lock(ALLOCATORS_MUTEX); + PyMutex_Unlock(&ALLOCATORS_MUTEX); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cffc06297a9aee..a1b23d209a54bb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1904,7 +1904,7 @@ _PyUnicode_FromId(_Py_Identifier *id) if (index < 0) { struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_state.ids; - PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); + PyMutex_Lock(&rt_ids->mutex); // Check again to detect concurrent access. Another thread can have // initialized the index while this thread waited for the lock. index = _Py_atomic_load_ssize(&id->index); @@ -1914,7 +1914,7 @@ _PyUnicode_FromId(_Py_Identifier *id) rt_ids->next_index++; _Py_atomic_store_ssize(&id->index, index); } - PyThread_release_lock(rt_ids->lock); + PyMutex_Unlock(&rt_ids->mutex); } assert(index >= 0); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 92c4b2fee9f863..255e838fd25655 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -589,9 +589,7 @@ _PyEval_ReInitThreads(PyThreadState *tstate) take_gil(tstate); struct _pending_calls *pending = &tstate->interp->ceval.pending; - if (_PyThread_at_fork_reinit(&pending->lock) < 0) { - return _PyStatus_ERR("Can't reinitialize pending calls lock"); - } + pending->mutex = (PyMutex){0}; /* Destroy all threads except the current one */ _PyThreadState_DeleteExcept(tstate); @@ -720,13 +718,10 @@ _PyEval_AddPendingCall(PyInterpreterState *interp, assert(_Py_IsMainInterpreter(interp)); pending = &_PyRuntime.ceval.pending_mainthread; } - /* Ensure that _PyEval_InitState() was called - and that _PyEval_FiniState() is not called yet. */ - assert(pending->lock != NULL); - PyThread_acquire_lock(pending->lock, WAIT_LOCK); + PyMutex_Lock(&pending->mutex); int result = _push_pending_call(pending, func, arg, flags); - PyThread_release_lock(pending->lock); + PyMutex_Unlock(&pending->mutex); /* signal main loop */ SIGNAL_PENDING_CALLS(interp); @@ -768,9 +763,9 @@ _make_pending_calls(struct _pending_calls *pending) int flags = 0; /* pop one item off the queue while holding the lock */ - PyThread_acquire_lock(pending->lock, WAIT_LOCK); + PyMutex_Lock(&pending->mutex); _pop_pending_call(pending, &func, &arg, &flags); - PyThread_release_lock(pending->lock); + PyMutex_Unlock(&pending->mutex); /* having released the lock, perform the callback */ if (func == NULL) { @@ -795,7 +790,7 @@ make_pending_calls(PyInterpreterState *interp) /* Only one thread (per interpreter) may run the pending calls at once. In the same way, we don't do recursive pending calls. */ - PyThread_acquire_lock(pending->lock, WAIT_LOCK); + PyMutex_Lock(&pending->mutex); if (pending->busy) { /* A pending call was added after another thread was already handling the pending calls (and had already "unsignaled"). @@ -807,11 +802,11 @@ make_pending_calls(PyInterpreterState *interp) care of any remaining pending calls. Until then, though, all the interpreter's threads will be tripping the eval breaker every time it's checked. */ - PyThread_release_lock(pending->lock); + PyMutex_Unlock(&pending->mutex); return 0; } pending->busy = 1; - PyThread_release_lock(pending->lock); + PyMutex_Unlock(&pending->mutex); /* unsignal before starting to call callbacks, so that any callback added in-between re-signals */ @@ -892,23 +887,9 @@ Py_MakePendingCalls(void) } void -_PyEval_InitState(PyInterpreterState *interp, PyThread_type_lock pending_lock) +_PyEval_InitState(PyInterpreterState *interp) { _gil_initialize(&interp->_gil); - - struct _pending_calls *pending = &interp->ceval.pending; - assert(pending->lock == NULL); - pending->lock = pending_lock; -} - -void -_PyEval_FiniState(struct _ceval_state *ceval) -{ - struct _pending_calls *pending = &ceval->pending; - if (pending->lock != NULL) { - PyThread_free_lock(pending->lock); - pending->lock = NULL; - } } diff --git a/Python/crossinterp.c b/Python/crossinterp.c index a908f9ae340ee9..6beb319782eeca 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -454,16 +454,16 @@ _xidregistry_clear(struct _xidregistry *xidregistry) static void _xidregistry_lock(struct _xidregistry *registry) { - if (registry->mutex != NULL) { - PyThread_acquire_lock(registry->mutex, WAIT_LOCK); + if (registry->global) { + PyMutex_Lock(®istry->mutex); } } static void _xidregistry_unlock(struct _xidregistry *registry) { - if (registry->mutex != NULL) { - PyThread_release_lock(registry->mutex); + if (registry->global) { + PyMutex_Unlock(®istry->mutex); } } @@ -856,19 +856,10 @@ _xidregistry_init(struct _xidregistry *registry) registry->initialized = 1; if (registry->global) { - // We manage the mutex lifecycle in pystate.c. - assert(registry->mutex != NULL); - // Registering the builtins is cheap so we don't bother doing it lazily. assert(registry->head == NULL); _register_builtins_for_crossinterpreter_data(registry); } - else { - // Within an interpreter we rely on the GIL instead of a separate lock. - assert(registry->mutex == NULL); - - // There's nothing else to initialize. - } } static void @@ -880,17 +871,6 @@ _xidregistry_fini(struct _xidregistry *registry) registry->initialized = 0; _xidregistry_clear(registry); - - if (registry->global) { - // We manage the mutex lifecycle in pystate.c. - assert(registry->mutex != NULL); - } - else { - // There's nothing else to finalize. - - // Within an interpreter we rely on the GIL instead of a separate lock. - assert(registry->mutex == NULL); - } } diff --git a/Python/import.c b/Python/import.c index f37393bbdc4269..1d6e0cf6e13fc3 100644 --- a/Python/import.c +++ b/Python/import.c @@ -415,11 +415,7 @@ remove_module(PyThreadState *tstate, PyObject *name) Py_ssize_t _PyImport_GetNextModuleIndex(void) { - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); - LAST_MODULE_INDEX++; - Py_ssize_t index = LAST_MODULE_INDEX; - PyThread_release_lock(EXTENSIONS.mutex); - return index; + return _Py_atomic_add_ssize(&LAST_MODULE_INDEX, 1) + 1; } static const char * @@ -879,13 +875,13 @@ gets even messier. static inline void extensions_lock_acquire(void) { - PyThread_acquire_lock(_PyRuntime.imports.extensions.mutex, WAIT_LOCK); + PyMutex_Lock(&_PyRuntime.imports.extensions.mutex); } static inline void extensions_lock_release(void) { - PyThread_release_lock(_PyRuntime.imports.extensions.mutex); + PyMutex_Unlock(&_PyRuntime.imports.extensions.mutex); } /* Magic for extension modules (built-in as well as dynamically diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ac8d5208322882..d763f444412920 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -3055,13 +3055,13 @@ wait_for_thread_shutdown(PyThreadState *tstate) int Py_AtExit(void (*func)(void)) { struct _atexit_runtime_state *state = &_PyRuntime.atexit; - PyThread_acquire_lock(state->mutex, WAIT_LOCK); + PyMutex_Lock(&state->mutex); if (state->ncallbacks >= NEXITFUNCS) { - PyThread_release_lock(state->mutex); + PyMutex_Unlock(&state->mutex); return -1; } state->callbacks[state->ncallbacks++] = func; - PyThread_release_lock(state->mutex); + PyMutex_Unlock(&state->mutex); return 0; } @@ -3071,18 +3071,18 @@ call_ll_exitfuncs(_PyRuntimeState *runtime) atexit_callbackfunc exitfunc; struct _atexit_runtime_state *state = &runtime->atexit; - PyThread_acquire_lock(state->mutex, WAIT_LOCK); + PyMutex_Lock(&state->mutex); while (state->ncallbacks > 0) { /* pop last function from the list */ state->ncallbacks--; exitfunc = state->callbacks[state->ncallbacks]; state->callbacks[state->ncallbacks] = NULL; - PyThread_release_lock(state->mutex); + PyMutex_Unlock(&state->mutex); exitfunc(); - PyThread_acquire_lock(state->mutex, WAIT_LOCK); + PyMutex_Lock(&state->mutex); } - PyThread_release_lock(state->mutex); + PyMutex_Unlock(&state->mutex); fflush(stdout); fflush(stderr); diff --git a/Python/pystate.c b/Python/pystate.c index 89e9bddc6de060..730e66f586dcae 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -379,49 +379,11 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS static const _PyRuntimeState initial = _PyRuntimeState_INIT(_PyRuntime); _Py_COMP_DIAG_POP -#define NUMLOCKS 8 -#define LOCKS_INIT(runtime) \ - { \ - &(runtime)->interpreters.mutex, \ - &(runtime)->xi.registry.mutex, \ - &(runtime)->unicode_state.ids.lock, \ - &(runtime)->imports.extensions.mutex, \ - &(runtime)->ceval.pending_mainthread.lock, \ - &(runtime)->atexit.mutex, \ - &(runtime)->audit_hooks.mutex, \ - &(runtime)->allocators.mutex, \ - } - -static int -alloc_for_runtime(PyThread_type_lock locks[NUMLOCKS]) -{ - /* Force default allocator, since _PyRuntimeState_Fini() must - use the same allocator than this function. */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - for (int i = 0; i < NUMLOCKS; i++) { - PyThread_type_lock lock = PyThread_allocate_lock(); - if (lock == NULL) { - for (int j = 0; j < i; j++) { - PyThread_free_lock(locks[j]); - locks[j] = NULL; - } - break; - } - locks[i] = lock; - } - - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - return 0; -} - static void init_runtime(_PyRuntimeState *runtime, void *open_code_hook, void *open_code_userdata, _Py_AuditHookEntry *audit_hook_head, - Py_ssize_t unicode_next_index, - PyThread_type_lock locks[NUMLOCKS]) + Py_ssize_t unicode_next_index) { assert(!runtime->preinitializing); assert(!runtime->preinitialized); @@ -435,12 +397,6 @@ init_runtime(_PyRuntimeState *runtime, PyPreConfig_InitPythonConfig(&runtime->preconfig); - PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime); - for (int i = 0; i < NUMLOCKS; i++) { - assert(locks[i] != NULL); - *lockptrs[i] = locks[i]; - } - // Set it to the ID of the main thread of the main interpreter. runtime->main_thread = PyThread_get_thread_ident(); @@ -466,11 +422,6 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) // is called multiple times. Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index; - PyThread_type_lock locks[NUMLOCKS]; - if (alloc_for_runtime(locks) != 0) { - return _PyStatus_NO_MEMORY(); - } - if (runtime->_initialized) { // Py_Initialize() must be running again. // Reset to _PyRuntimeState_INIT. @@ -489,7 +440,7 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) } init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head, - unicode_next_index, locks); + unicode_next_index); return _PyStatus_OK(); } @@ -509,23 +460,6 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) if (PyThread_tss_is_created(&runtime->trashTSSkey)) { PyThread_tss_delete(&runtime->trashTSSkey); } - - /* Force the allocator used by _PyRuntimeState_Init(). */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); -#define FREE_LOCK(LOCK) \ - if (LOCK != NULL) { \ - PyThread_free_lock(LOCK); \ - LOCK = NULL; \ - } - - PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime); - for (int i = 0; i < NUMLOCKS; i++) { - FREE_LOCK(*lockptrs[i]); - } - -#undef FREE_LOCK - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); } #ifdef HAVE_FORK @@ -537,28 +471,23 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) // This was initially set in _PyRuntimeState_Init(). runtime->main_thread = PyThread_get_thread_ident(); - /* Force default allocator, since _PyRuntimeState_Fini() must - use the same allocator than this function. */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime); - int reinit_err = 0; - for (int i = 0; i < NUMLOCKS; i++) { - reinit_err += _PyThread_at_fork_reinit(lockptrs[i]); - } - - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - // Clears the parking lot. Any waiting threads are dead. This must be // called before releasing any locks that use the parking lot. _PyParkingLot_AfterFork(); + // Re-initialize global locks + runtime->interpreters.mutex = (PyMutex){0}; + runtime->xi.registry.mutex = (PyMutex){0}; + runtime->unicode_state.ids.mutex = (PyMutex){0}; + runtime->imports.extensions.mutex = (PyMutex){0}; + runtime->ceval.pending_mainthread.mutex = (PyMutex){0}; + runtime->atexit.mutex = (PyMutex){0}; + runtime->audit_hooks.mutex = (PyMutex){0}; + runtime->allocators.mutex = (PyMutex){0}; + /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does * not force the default allocator. */ - reinit_err += _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex); - - if (reinit_err < 0) { + if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) { return _PyStatus_ERR("Failed to reinitialize runtime locks"); } @@ -594,24 +523,6 @@ _PyInterpreterState_Enable(_PyRuntimeState *runtime) { struct pyinterpreters *interpreters = &runtime->interpreters; interpreters->next_id = 0; - - /* Py_Finalize() calls _PyRuntimeState_Fini() which clears the mutex. - Create a new mutex if needed. */ - if (interpreters->mutex == NULL) { - /* Force default allocator, since _PyRuntimeState_Fini() must - use the same allocator than this function. */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - interpreters->mutex = PyThread_allocate_lock(); - - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - if (interpreters->mutex == NULL) { - return _PyStatus_ERR("Can't initialize threads for interpreter"); - } - } - return _PyStatus_OK(); } @@ -654,8 +565,7 @@ free_interpreter(PyInterpreterState *interp) static PyStatus init_interpreter(PyInterpreterState *interp, _PyRuntimeState *runtime, int64_t id, - PyInterpreterState *next, - PyThread_type_lock pending_lock) + PyInterpreterState *next) { if (interp->_initialized) { return _PyStatus_ERR("interpreter already initialized"); @@ -684,7 +594,7 @@ init_interpreter(PyInterpreterState *interp, return status; } - _PyEval_InitState(interp, pending_lock); + _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); PyConfig_InitPythonConfig(&interp->config); _PyType_InitCache(interp); @@ -730,11 +640,6 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp) } } - PyThread_type_lock pending_lock = PyThread_allocate_lock(); - if (pending_lock == NULL) { - return _PyStatus_NO_MEMORY(); - } - /* We completely serialize creation of multiple interpreters, since it simplifies things here and blocking concurrent calls isn't a problem. Regardless, we must fully block subinterpreter creation until @@ -781,11 +686,10 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp) interpreters->head = interp; status = init_interpreter(interp, runtime, - id, old_head, pending_lock); + id, old_head); if (_PyStatus_EXCEPTION(status)) { goto error; } - pending_lock = NULL; HEAD_UNLOCK(runtime); @@ -796,9 +700,6 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp) error: HEAD_UNLOCK(runtime); - if (pending_lock != NULL) { - PyThread_free_lock(pending_lock); - } if (interp != NULL) { free_interpreter(interp); } @@ -1003,8 +904,6 @@ PyInterpreterState_Delete(PyInterpreterState *interp) zapthreads(interp); - _PyEval_FiniState(&interp->ceval); - // XXX These two calls should be done at the end of clear_interpreter(), // but currently some objects get decref'ed after that. #ifdef Py_REF_DEBUG diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c17de44731b703..b6b08b6ac16a1e 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -451,15 +451,9 @@ PySys_AddAuditHook(Py_AuditHookFunction hook, void *userData) e->hookCFunction = (Py_AuditHookFunction)hook; e->userData = userData; - if (runtime->audit_hooks.mutex == NULL) { - /* The runtime must not be initialized yet. */ - add_audit_hook_entry_unlocked(runtime, e); - } - else { - PyThread_acquire_lock(runtime->audit_hooks.mutex, WAIT_LOCK); - add_audit_hook_entry_unlocked(runtime, e); - PyThread_release_lock(runtime->audit_hooks.mutex); - } + PyMutex_Lock(&runtime->audit_hooks.mutex); + add_audit_hook_entry_unlocked(runtime, e); + PyMutex_Unlock(&runtime->audit_hooks.mutex); return 0; } From e2227da44cbdfc4c0898dc17b8bd2f00d6c81237 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 7 Dec 2023 13:09:00 -0500 Subject: [PATCH 2/4] Changes from review --- Include/internal/pycore_lock.h | 7 +++++++ Python/crossinterp.c | 1 + Python/pystate.c | 24 ++++++++++++++++-------- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index bd455ea57c521b..a10d20c8f19823 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -92,6 +92,13 @@ PyMutex_IsLocked(PyMutex *m) return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0; } +// Re-initializes the mutex after a fork to the unlocked state. +static inline void +_PyMutex_at_fork_reinit(PyMutex *m) +{ + *m = (PyMutex){0}; +} + typedef enum _PyLockFlags { // Do not detach/release the GIL when waiting on the lock. _Py_LOCK_DONT_DETACH = 0, diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 6beb319782eeca..b2ec2fc23992b2 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -457,6 +457,7 @@ _xidregistry_lock(struct _xidregistry *registry) if (registry->global) { PyMutex_Lock(®istry->mutex); } + // else: Within an interpreter we rely on the GIL instead of a separate lock. } static void diff --git a/Python/pystate.c b/Python/pystate.c index 730e66f586dcae..18baefd56f9940 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -379,6 +379,18 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS static const _PyRuntimeState initial = _PyRuntimeState_INIT(_PyRuntime); _Py_COMP_DIAG_POP +#define LOCKS_INIT(runtime) \ + { \ + &(runtime)->interpreters.mutex, \ + &(runtime)->xi.registry.mutex, \ + &(runtime)->unicode_state.ids.mutex, \ + &(runtime)->imports.extensions.mutex, \ + &(runtime)->ceval.pending_mainthread.mutex, \ + &(runtime)->atexit.mutex, \ + &(runtime)->audit_hooks.mutex, \ + &(runtime)->allocators.mutex, \ + } + static void init_runtime(_PyRuntimeState *runtime, void *open_code_hook, void *open_code_userdata, @@ -476,14 +488,10 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyParkingLot_AfterFork(); // Re-initialize global locks - runtime->interpreters.mutex = (PyMutex){0}; - runtime->xi.registry.mutex = (PyMutex){0}; - runtime->unicode_state.ids.mutex = (PyMutex){0}; - runtime->imports.extensions.mutex = (PyMutex){0}; - runtime->ceval.pending_mainthread.mutex = (PyMutex){0}; - runtime->atexit.mutex = (PyMutex){0}; - runtime->audit_hooks.mutex = (PyMutex){0}; - runtime->allocators.mutex = (PyMutex){0}; + PyMutex *locks[] = LOCKS_INIT(runtime); + for (size_t i = 0; i < Py_ARRAY_LENGTH(locks); i++) { + _PyMutex_at_fork_reinit(locks[i]); + } /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does * not force the default allocator. */ From 3ab46f59334a1c985085d6627301405916ddab01 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 7 Dec 2023 13:11:53 -0500 Subject: [PATCH 3/4] Use _PyMutex_at_fork_reinit in ceval_gil.c --- Python/ceval_gil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 255e838fd25655..636e4db898f2d9 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -589,7 +589,7 @@ _PyEval_ReInitThreads(PyThreadState *tstate) take_gil(tstate); struct _pending_calls *pending = &tstate->interp->ceval.pending; - pending->mutex = (PyMutex){0}; + _PyMutex_at_fork_reinit(&pending->mutex); /* Destroy all threads except the current one */ _PyThreadState_DeleteExcept(tstate); From 8ef8314c1df493c1b079e91e149f8373f58f35e8 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 7 Dec 2023 13:20:25 -0500 Subject: [PATCH 4/4] Use memset instead C99 initializer to fix Windows build --- Include/internal/pycore_lock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index a10d20c8f19823..7a89410211b90a 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -96,7 +96,7 @@ PyMutex_IsLocked(PyMutex *m) static inline void _PyMutex_at_fork_reinit(PyMutex *m) { - *m = (PyMutex){0}; + memset(m, 0, sizeof(*m)); } typedef enum _PyLockFlags {