8000 GH-109369: Merge all eval-breaker flags and monitoring version into one word. by markshannon · Pull Request #109846 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-109369: Merge all eval-breaker flags and monitoring version into one word. #109846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use one bit per check in eval_breaker
  • Loading branch information
markshannon committed Sep 13, 2023
commit 2b8766e808782b742e47970b0b3c4601d5cf7dce
31 changes: 31 additions & 0 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {

#include "pycore_interp.h" // PyInterpreterState.eval_frame
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "cpython/pyatomic.h"

/* Forward declarations */
struct pyruntimestate;
Expand Down Expand Up @@ -193,6 +194,36 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);


#define _PY_GIL_DROP_REQUEST_BIT 0
#define _PY_SIGNALS_PENDING_BIT 1
#define _PY_CALLS_TO_DO_BIT 2
#define _PY_ASYNC_EXCEPTION_BIT 3
#define _PY_GC_SCHEDULED_BIT 4


static inline void
_Py_set_eval_breaker_bit(PyInterpreterState *interp, int32_t bit, int32_t set)
{
assert(set == 0 || set == 1);
int32_t to_set = set << bit;
int32_t mask = 1 << bit;
int32_t old = _Py_atomic_load_int32(&interp->ceval.eval_breaker2);
if ((old & mask) == to_set) {
return;
}
int32_t new;
do {
new = (old & ~mask) | (set << bit);
} while (!_Py_atomic_compare_exchange_int32(&interp->ceval.eval_breaker2, &old, new));
}

static inline bool
_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit)
{
return _Py_atomic_load_int32(&interp->ceval.eval_breaker2) & (1 << bit);
}


#ifdef __cplusplus
}
#endif
Expand Down
15 changes: 4 additions & 11 deletions Include/internal/pycore_ceval_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct _pending_calls {
int busy;
PyThread_type_lock lock;
/* Request for running pending calls. */
_Py_atomic_int calls_to_do;
int32_t calls_to_do;
/* Request for looking at the `async_exc` field of the current
thread state.
Guarded by the GIL. */
Expand Down Expand Up @@ -60,11 +60,6 @@ struct _ceval_runtime_state {
int _not_used;
#endif
} perf;
/* Request for checking signals. It is shared by all interpreters (see
bpo-40513). Any thread of any interpreter can receive a signal, but only
the main thread of the main interpreter can handle signals: see
_Py_ThreadCanHandleSignals(). */
_Py_atomic_int signals_pending;
/* Pending calls to be made only on the main thread. */
struct _pending_calls pending_mainthread;
};
Expand All @@ -85,14 +80,12 @@ struct _ceval_state {
* the fast path in the eval loop.
* It is by far the hottest field in this struct and
* should be placed at the beginning. */
_Py_atomic_int eval_breaker;
/* Request for dropping the GIL */
_Py_atomic_int gil_drop_request;
int32_t eval_breaker2;
/* Avoid false sharing */
int64_t padding[7];
int recursion_limit;
struct _gil_runtime_state *gil;
int own_gil;
/* The GC is ready to be executed */
_Py_atomic_int gc_scheduled;
struct _pending_calls pending;
};

Expand Down
7 changes: 2 additions & 5 deletions Modules/gcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*/

#include "Python.h"
#include "pycore_ceval.h" // _PyEval_SignalReceived()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_initconfig.h"
Expand Down Expand Up @@ -2274,11 +2275,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
if (gcstate->collecting == 1) {
return;
}
struct _ceval_state *ceval = &interp->ceval;
if (!_Py_atomic_load_relaxed(&ceval->gc_scheduled)) {
_Py_atomic_store_relaxed(&ceval->gc_scheduled, 1);
_Py_atomic_store_relaxed(&ceval->eval_breaker, 1);
}
_Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
}

void
Expand Down
8000
5 changes: 2 additions & 3 deletions Modules/signalmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1767,9 +1767,8 @@ PyErr_CheckSignals(void)
Python code to ensure signals are handled. Checking for the GC here
allows long running native code to clean cycles created using the C-API
even if it doesn't run the evaluation loop */
struct _ceval_state *interp_ceval_state = &tstate->interp->ceval;
if (_Py_atomic_load_relaxed(&interp_ceval_state->gc_scheduled)) {
_Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0);
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
_Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
_Py_RunGC(tstate);
}

Expand Down
2 changes: 1 addition & 1 deletion Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ dummy_func(
/* Possibly combine these two checks */
DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version
!= tstate->interp->monitoring_version, RESUME);
DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME);
DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME);
}

inst(INSTRUMENTED_RESUME, (--)) {
Expand Down
124 changes: 28 additions & 96 deletions Python/ceval_gil.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

#include "Python.h"
#include "pycore_atomic.h" // _Py_atomic_int
#include "cpython/pyatomic.h" // _Py_atomic_load_int32
#include "pycore_ceval.h" // _PyEval_SignalReceived()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // _Py_RunGC()
Expand Down Expand Up @@ -57,89 +58,45 @@
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL)
#endif

/* This can set eval_breaker to 0 even though gil_drop_request became
1. We believe this is all right because the eval loop will release
the GIL eventually anyway. */
static inline void
COMPUTE_EVAL_BREAKER(PyInterpreterState *interp,
struct _ceval_runtime_state *ceval,
struct _ceval_state *ceval2)
{
_Py_atomic_store_relaxed(&ceval2->eval_breaker,
_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)
| _Py_atomic_load_relaxed_int32(&ceval->signals_pending)
| (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do))
| _Py_atomic_load_relaxed_int32(&ceval->pending_mainthread.calls_to_do)
| ceval2->pending.async_exc
| _Py_atomic_load_relaxed_int32(&ceval2->gc_scheduled));
}


static inline void
SET_GIL_DROP_REQUEST(PyInterpreterState *interp)
{
struct _ceval_state *ceval2 = &interp->ceval;
_Py_atomic_store_relaxed(&ceval2->gil_drop_request, 1);
_Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
_Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 1);
}


static inline void
RESET_GIL_DROP_REQUEST(PyInterpreterState *interp)
{
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
_Py_atomic_store_relaxed(&ceval2->gil_drop_request, 0);
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
_Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 0);
}


static inline void
SIGNAL_PENDING_CALLS(struct _pending_calls *pending, PyInterpreterState *interp)
SIGNAL_PENDING_CALLS(PyInterpreterState *interp)
{
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
_Py_atomic_store_relaxed(&pending->calls_to_do, 1);
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
_Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1);
}


static inline void
UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp)
{
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
if (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)) {
_Py_atomic_store_relaxed(&ceval->pending_mainthread.calls_to_do, 0);
}
_Py_atomic_store_relaxed(&ceval2->pending.calls_to_do, 0);
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
_Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 0);
}


static inline void
SIGNAL_PENDING_SIGNALS(PyInterpreterState *interp, int force)
{
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
_Py_atomic_store_relaxed(&ceval->signals_pending, 1);
if (force) {
_Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
}
else {
/* eval_breaker is not set to 1 if thread_can_handle_signals() is false */
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
}
_Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1);
}


static inline void
UNSIGNAL_PENDING_SIGNALS(PyInterpreterState *interp)
{
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
_Py_atomic_store_relaxed(&ceval->signals_pending, 0);
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
_Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 0);
}


Expand All @@ -148,17 +105,16 @@ SIGNAL_ASYNC_EXC(PyInterpreterState *interp)
{
struct _ceval_state *ceval2 = &interp->ceval;
ceval2->pending.async_exc = 1;
_Py_atomic_store_relaxed(&ceval2->eval_breaker, 1);
_Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1);
}


static inline void
UNSIGNAL_ASYNC_EXC(PyInterpreterState *interp)
{
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
ceval2->pending.async_exc = 0;
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
_Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0);
}


Expand Down Expand Up @@ -308,7 +264,7 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate)
the GIL, and that's the only time we might delete the
interpreter, so checking tstate first prevents the crash.
See https://github.com/python/cpython/issues/104341. */
if (tstate != NULL && _Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
if (tstate != NULL && _Py_eval_breaker_bit_is_set(tstate->interp, _PY_GIL_DROP_REQUEST_BIT)) {
MUTEX_LOCK(gil->switch_mutex);
/* Not switched yet => wait */
if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate)
Expand Down Expand Up @@ -434,17 +390,7 @@ take_gil(PyThreadState *tstate)
}
assert(_PyThreadState_CheckConsistency(tstate));

if (_Py_atomic_load_relaxed(&ceval->gil_drop_request)) {
RESET_GIL_DROP_REQUEST(interp);
}
else {
/* bpo-40010: eval_breaker should be recomputed to be set to 1 if there
is a pending signal: signal received by another thread which cannot
handle signals.

Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */
COMPUTE_EVAL_BREAKER(interp, &_PyRuntime.ceval, ceval);
}
RESET_GIL_DROP_REQUEST(interp);

/* Don't access tstate if the thread must exit */
if (tstate->async_exc != NULL) {
Expand Down Expand Up @@ -771,6 +717,8 @@ _push_pending_call(struct _pending_calls *pending,
pending->calls[i].func = func;
pending->calls[i].arg = arg;
pending->last = j;
assert(pending->calls_to_do < NPENDINGCALLS);
pending->calls_to_do++;
return 0;
}

Expand Down Expand Up @@ -798,6 +746,8 @@ _pop_pending_call(struct _pending_calls *pending,
if (i >= 0) {
pending->calls[i] = (struct _pending_call){0};
pending->first = (i + 1) % NPENDINGCALLS;
assert(pending->calls_to_do > 0);
pending->calls_to_do--;
}
}

Expand Down Expand Up @@ -827,7 +777,7 @@ _PyEval_AddPendingCall(PyInterpreterState *interp,
PyThread_release_lock(pending->lock);

/* signal main loop */
SIGNAL_PENDING_CALLS(pending, interp);
SIGNAL_PENDING_CALLS(interp);
return result;
}

Expand Down Expand Up @@ -860,15 +810,7 @@ handle_signals(PyThreadState *tstate)
static inline int
maybe_has_pending_calls(PyInterpreterState *interp)
{
struct _pending_calls *pending = &interp->ceval.pending;
if (_Py_atomic_load_relaxed_int32(&pending->calls_to_do)) {
return 1;
}
if (!_Py_IsMainThread() || !_Py_IsMainInterpreter(interp)) {
return 0;
}
pending = &_PyRuntime.ceval.pending_mainthread;
return _Py_atomic_load_relaxed_int32(&pending->calls_to_do);
return _Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT) ? 1 : 0;
}

static int
Expand Down Expand Up @@ -928,18 +870,21 @@ make_pending_calls(PyInterpreterState *interp)
if (_make_pending_calls(pending) != 0) {
pending->busy = 0;
/* There might not be more calls to make, but we play it safe. */
SIGNAL_PENDING_CALLS(pending, interp);
SIGNAL_PENDING_CALLS(interp);
return -1;
}

if (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)) {
if (_make_pending_calls(pending_main) != 0) {
pending->busy = 0;
/* There might not be more calls to make, but we play it safe. */
SIGNAL_PENDING_CALLS(pending_main, interp);
SIGNAL_PENDING_CALLS(interp);
return -1;
}
}
else if (pending_main->calls_to_do) {
SIGNAL_PENDING_CALLS(interp);
}

pending->busy = 0;
return 0;
Expand Down Expand Up @@ -1084,9 +1029,10 @@ _Py_HandlePending(PyThreadState *tstate)
_PyRuntimeState * const runtime = &_PyRuntime;
struct _ceval_runtime_state *ceval = &runtime->ceval;
struct _ceval_state *interp_ceval_state = &tstate->interp->ceval;
PyInterpreterState *interp = tstate->interp;

/* Pending signals */
if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) {
if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) {
if (handle_signals(tstate) != 0) {
return -1;
}
Expand All @@ -1100,14 +1046,13 @@ _Py_HandlePending(PyThreadState *tstate)
}

/* GC scheduled to run */
if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gc_scheduled)) {
_Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0);
COMPUTE_EVAL_BREAKER(tstate->interp, ceval, interp_ceval_state);
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) {
_Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0);
_Py_RunGC(tstate);
}

/* GIL drop request */
if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gil_drop_request)) {
if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GIL_DROP_REQUEST_BIT)) {
/* Give another thread a chance */
if (_PyThreadState_SwapNoGIL(NULL) != tstate) {
Py_FatalError("tstate mix-up");
Expand All @@ -1132,19 +1077,6 @@ _Py_HandlePending(PyThreadState *tstate)
Py_DECREF(exc);
return -1;
}


// It is possible that some of the conditions that trigger the eval breaker
// are called in a different thread than the Python thread. An example of
// this is bpo-42296: On Windows, _PyEval_SignalReceived() can be called in
// a different thread than the Python thread, in which case
// _Py_ThreadCanHandleSignals() is wrong. Recompute eval_breaker in the
// current Python thread with the correct _Py_ThreadCanHandleSignals()
// value. It prevents to interrupt the eval loop at every instruction if
// the current Python thread cannot handle signals (if
// _Py_ThreadCanHandleSignals() is false).
COMPUTE_EVAL_BREAKER(tstate->interp, ceval, interp_ceval_state);

return 0;
}

Loading
0