8000 gh-111569: Implement Python critical section API by colesbury · Pull Request #111571 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-111569: Implement Python critical section API #111571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-111569: Implement Python critical section API
Critical sections are helpers to replace the global interpreter lock
with finer grained locking. They provide similar guarantees to the GIL
and avoid the deadlock risk that plain locking involves. Critical
sections are implicitly ended whenever the GIL would be released. They
are resumed when the GIL would be acquired. Nested critical sections
behave as if the sections were interleaved.
  • Loading branch information
colesbury committed Oct 31, 2023
commit 6bf34f54d997b0557bf4d7bdf4a99cafb23c80d4
7 changes: 7 additions & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,13 @@ struct _ts {

struct _py_trashcan trash;

/* Tagged pointer to top-most critical section, or zero if there is no
* active critical section. Critical sections are only used in
* `--disable-gil` builds (i.e., when Py_NOGIL is defined to 1). In the
* default build, this field is always zero.
*/
uintptr_t critical_section;

/* Called when a thread state is deleted normally, but not when it
* is destroyed after fork().
* Pain: to prevent rare but fatal shutdown errors (issue 18808),
Expand Down
204 changes: 204 additions & 0 deletions Include/internal/pycore_critical_section.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#ifndef Py_INTERNAL_CRITICAL_SECTION_H
#define Py_INTERNAL_CRITICAL_SECTION_H

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_lock.h" // PyMutex
#include "pycore_pystate.h" // _PyThreadState_GET()
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

// Implementation of Python critical sections: helpers to replace the global
// interpreter lock with fine grained locking.
//
// A Python critical section is a region of code that can only be executed by
// a single thread at at time. The regions begin with a call to
// Py_BEGIN_CRITICAL_SECTION and ends with Py_END_CRITICAL_SECTION. Active
// critical sections are *implicitly* suspended whenever the thread calls
// `_PyThreadState_Detach()` (i.e., at tiems when the interpreter would have
// released the GIL).
//
// The most recent critical section is resumed when `_PyThreadState_Attach()`
// is called. See `_PyCriticalSection_Resume()`.
//
// The purpose of implicitly ending critical sections is to avoid deadlocks
// when locking multiple objects. Any time a thread would have released the
// GIL, it releases all locks from active critical sections. This includes
// times when a thread blocks while attempting to acquire a lock.
//
// Note that the macros Py_BEGIN_CRITICAL_SECTION and Py_END_CRITICAL_SECTION
// are no-ops in non-free-threaded builds.
//
// Example usage:
// Py_BEGIN_CRITICAL_SECTION(op);
// ...
// Py_END_CRITICAL_SECTION;
//
// To lock two objects at once:
// Py_BEGIN_CRITICAL_SECTION2(op1, op2);
// ...
// Py_END_CRITICAL_SECTION2;


// Tagged pointers to critical sections use the two least significant bits to
// mark if the pointed-to critical section is inactive and whether it is a
// _PyCriticalSection2 object.
#define _Py_CRITICAL_SECTION_INACTIVE 0x1
#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2
#define _Py_CRITICAL_SECTION_MASK 0x3

#ifdef Py_NOGIL
#define Py_BEGIN_CRITICAL_SECTION(op) { \
_PyCriticalSection _cs; \
_PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex)

#define Py_END_CRITICAL_SECTION \
_PyCriticalSection_End(&_cs); \
}

#define Py_BEGIN_CRITICAL_SECTION2(a, b) { \
_PyCriticalSection2 _cs2; \
_PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex)

#define Py_END_CRITICAL_SECTION2 \
_PyCriticalSection2_End(&_cs2); \
}
#else
// The critical section APIs are no-ops with the GIL.
#define Py_BEGIN_CRITICAL_SECTION(op)
#define Py_END_CRITICAL_SECTION
#define Py_BEGIN_CRITICAL_SECTION2(a, b)
#define Py_END_CRITICAL_SECTION2
#endif

typedef struct {
// Tagged pointer to an outer active critical section (or 0).
// The two least-significant-bits indicate whether the pointed-to critical
// section is inactive and whether it is a _PyCriticalSection2 object.
uintptr_t prev;

// Mutex used to protect critical section
PyMutex *mutex;
} _PyCriticalSection;

// A critical section protected by two mutexes. Use
// _PyCriticalSection2_Begin and _PyCriticalSection2_End.
typedef struct {
_PyCriticalSection base;

PyMutex *mutex2;
} _PyCriticalSection2;

static inline int
_PyCriticalSection_IsActive(uintptr_t tag)
{
return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0;
}

// Resumes the top-most critical section.
PyAPI_FUNC(void)
_PyCriticalSection_Resume(PyThreadState *tstate);

// (private) slow path for locking the mutex
PyAPI_FUNC(void)
_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m);

PyAPI_FUNC(void)
_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
int is_m1_locked);

static inline void
_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m)
{
if (PyMutex_LockFast(&m->v)) {
PyThreadState *tstate = _PyThreadState_GET();
c->mutex = m;
c->prev = tstate->critical_section;
tstate->critical_section = (uintptr_t)c;
}
else {
_PyCriticalSection_BeginSlow(c, m);
}
}

// Removes the top-most critical section from the thread's of critical
// sections. If the new top-most critical section is inactive, then it is
// resumed.
static inline void
_PyCriticalSection_Pop(_PyCriticalSection *c)
{
PyThreadState *tstate = _PyThreadState_GET();
uintptr_t prev = c->prev;
tstate->critical_section = prev;

if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) {
_PyCriticalSection_Resume(tstate);
}
}

static inline void
_PyCriticalSection_End(_PyCriticalSection *c)
{
PyMutex_Unlock(c->mutex);
_PyCriticalSection_Pop(c);
}

static inline void
_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
{
if (m1 == m2) {
// If the two mutex arguments are the same, treat this as a critical
// section with a single mutex.
c->mutex2 = NULL;
_PyCriticalSection_Begin(&c->base, m1);
return;
}

if ((uintptr_t)m2 < (uintptr_t)m1) {
// Sort the mutexes so that the lower address is locked first.
PyMutex *tmp = m1;
m1 = m2;
m2 = tmp;
}

if (PyMutex_LockFast(&m1->v)) {
if (PyMutex_LockFast(&m2->v)) {
PyThreadState *tstate = _PyThreadState_GET();
c->base.mutex = m1;
c->mutex2 = m2;
c->base.prev = tstate->critical_section;

uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES;
tstate->critical_section = p;
}
else {
_PyCriticalSection2_BeginSlow(c, m1, m2, 1);
}
}
else {
_PyCriticalSection2_BeginSlow(c, m1, m2, 0);
}
}

static inline void
_PyCriticalSection2_End(_PyCriticalSection2 *c)
{
if (c->mutex2) {
PyMutex_Unlock(c->mutex2);
}
PyMutex_Unlock(c->base.mutex);
_PyCriticalSection_Pop(&c->base);
}

PyAPI_FUNC(void)
_PyCriticalSection_SuspendAll(PyThreadState *tstate);

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */
17 changes: 14 additions & 3 deletions Include/internal/pycore_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,13 @@ extern "C" {
// PyMutex_Lock(&m);
// ...
// PyMutex_Unlock(&m);
typedef struct _PyMutex {
uint8_t v;
} PyMutex;

// NOTE: In Py_NOGIL builds, struct _PyMutex is defined in Include/object.h
#ifndef Py_NOGIL
struct _PyMutex { uint8_t v; };
#endif

typedef struct _PyMutex PyMutex;

#define _Py_UNLOCKED 0
#define _Py_LOCKED 1
Expand All @@ -46,6 +50,13 @@ PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
// (private) slow path for unlocking the mutex
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);

static inline int
PyMutex_LockFast(uint8_t *lock_bits)
{
uint8_t expected = _Py_UNLOCKED;
return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED);
}

// Locks the mutex.
//
// If the mutex is currently locked, the calling thread will be parked until
Expand Down
6 changes: 4 additions & 2 deletions Include/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ check by comparing the reference count field to the immortality reference count.
{ \
0, \
0, \
0, \
{ 0 }, \
0, \
_Py_IMMORTAL_REFCNT_LOCAL, \
0, \
Expand Down Expand Up @@ -204,10 +204,12 @@ struct _object {
// Create a shared field from a refcnt and desired flags
#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))

struct _PyMutex { uint8_t v; };

struct _object {
uintptr_t ob_tid; // thread id (or zero)
uint16_t _padding;
uint8_t ob_mutex; // per-object lock
struct _PyMutex ob_mutex; // per-object lock
uint8_t ob_gc_bits; // gc-related state
uint32_t ob_ref_local; // local reference count
Py_ssize_t ob_ref_shared; // shared (atomic) reference count
Expand Down
2 changes: 2 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ PYTHON_OBJS= \
Python/codecs.o \
Python/compile.o \
Python/context.o \
Python/critical_section.o \
Python/crossinterp.o \
Python/dynamic_annotations.o \
Python/errors.o \
Expand Down Expand Up @@ -1801,6 +1802,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_complexobject.h \
$(srcdir)/Include/internal/pycore_condvar.h \
$(srcdir)/Include/internal/pycore_context.h \
$(srcdir)/Include/internal/pycore_critical_section.h \
$(srcdir)/Include/internal/pycore_crossinterp.h \
$(srcdir)/Include/internal/pycore_dict.h \
$(srcdir)/Include/internal/pycore_dict_state.h \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Implement "Python Critical Sections" from :pep:`703`. These are macros to
help replace the GIL with per-object locks in the ``--disable-gil`` build of
CPython. The macros are no-ops in the default build.
2 changes: 1 addition & 1 deletion Modules/Setup.stdlib.in
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
@MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c
@MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/vectorcall_limited.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/pyos.c _testcapi/immortal.c _testcapi/heaptype_relative.c _testcapi/gc.c _testcapi/sys.c
@MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
@MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c
Expand Down
3 changes: 3 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1719,6 +1719,9 @@ module_exec(PyObject *module)
if (_PyTestInternalCapi_Init_Set(module) < 0) {
return 1;
}
if (_PyTestInternalCapi_Init_CriticalSection(module) < 0) {
return 1;
}

if (PyModule_Add(module, "SIZEOF_PYGC_HEAD",
PyLong_FromSsize_t(sizeof(PyGC_Head))) < 0) {
Expand Down
1 change: 1 addition & 0 deletions Modules/_testinternalcapi/parts.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
int _PyTestInternalCapi_Init_Lock(PyObject *module);
int _PyTestInternalCapi_Init_PyTime(PyObject *module);
int _PyTestInternalCapi_Init_Set(PyObject *module);
int _PyTestInternalCapi_Init_CriticalSection(PyObject *module);

#endif // Py_TESTINTERNALCAPI_PARTS_H
Loading
0