10000 GH-113710: Backedge counter improvements. (GH-115166) · fsc-eriker/cpython@a66c82f · GitHub
[go: up one dir, main page]

Skip to content

Commit a66c82f

Browse files
markshannonfsc-eriker
authored andcommitted
pythonGH-113710: Backedge counter improvements. (pythonGH-115166)
1 parent 1e5b86d commit a66c82f

File tree

7 files changed

+81
-55
lines changed

7 files changed

+81
-55
lines changed

Include/cpython/optimizer.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ typedef struct {
7171

7272
PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor);
7373

74+
_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
75+
7476
PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
7577

7678
PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
@@ -80,8 +82,6 @@ PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int o
8082
int
8183
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);
8284

83-
extern _PyOptimizerObject _PyOptimizer_Default;
84-
8585
void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
8686
void _Py_ExecutorClear(_PyExecutorObject *);
8787
void _Py_BloomFilter_Init(_PyBloomFilter *);
@@ -96,7 +96,11 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
9696

9797
#define OPTIMIZER_BITS_IN_COUNTER 4
9898
/* Minimum of 16 additional executions before retry */
99-
#define MINIMUM_TIER2_BACKOFF 4
99+
#define MIN_TIER2_BACKOFF 4
100+
#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
101+
#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
102+
/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
103+
#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
100104

101105
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
102106
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6

Include/internal/pycore_interp.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,10 @@ struct _is {
239239
struct callable_cache callable_cache;
240240
_PyOptimizerObject *optimizer;
241241
_PyExecutorObject *executor_list_head;
242-
uint16_t optimizer_resume_threshold;
243-
uint16_t optimizer_backedge_threshold;
242+
/* These values are shifted and offset to speed up check in JUMP_BACKWARD */
243+
uint32_t optimizer_resume_threshold;
244+
uint32_t optimizer_backedge_threshold;
245+
244246
uint32_t next_func_version;
245247
_rare_events rare_events;
246248
PyDict_WatchCallback builtins_dict_watcher;

Python/bytecodes.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,13 +2318,16 @@ dummy_func(
23182318
assert(oparg <= INSTR_OFFSET());
23192319
JUMPBY(-oparg);
23202320
#if ENABLE_SPECIALIZATION
2321-
this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
2321+
uint16_t counter = this_instr[1].cache;
2322+
this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
23222323
/* We are using unsigned values, but we really want signed values, so
2323-
* do the 2s complement comparison manually */
2324-
uint16_t ucounter = this_instr[1].cache + (1 << 15);
2325-
uint16_t threshold = tstate->interp->optimizer_backedge_threshold + (1 << 15);
2324+
* do the 2s complement adjustment manually */
2325+
uint32_t offset_counter = counter ^ (1 << 15);
2326+
uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
2327+
assert((threshold & OPTIMIZER_BITS_MASK) == 0);
2328+
// Use '>=' not '>' so that the optimizer/backoff bits do not effect the result.
23262329
// Double-check that the opcode isn't instrumented or something:
2327-
if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
2330+
if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) {
23282331
OPT_STAT_INC(attempts);
23292332
_Py_CODEUNIT *start = this_instr;
23302333
/* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
@@ -2338,18 +2341,18 @@ dummy_func(
23382341
// Rewind and enter the executor:
23392342
assert(start->op.code == ENTER_EXECUTOR);
23402343
next_instr = start;
2341-
this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
2344+
this_instr[1].cache &= OPTIMIZER_BITS_MASK;
23422345
}
23432346
else {
2344-
int backoff = this_instr[1].cache & ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
2345-
if (backoff < MINIMUM_TIER2_BACKOFF) {
2346-
backoff = MINIMUM_TIER2_BACKOFF;
2347+
int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
2348+
backoff++;
2349+
if (backoff < MIN_TIER2_BACKOFF) {
2350+
backoff = MIN_TIER2_BACKOFF;
23472351
}
2348-
else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
2349-
backoff++;
2352+
else if (backoff > MAX_TIER2_BACKOFF) {
2353+
backoff = MAX_TIER2_BACKOFF;
23502354
}
2351-
assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
2352-
this_instr[1].cache = ((1 << 16) - ((1 << OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
2355+
this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
23532356
}
23542357
}
23552358
#endif /* ENABLE_SPECIALIZATION */

Python/generated_cases.c.h

Lines changed: 16 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ never_optimize(
109109
_PyExecutorObject **exec,
110110
int Py_UNUSED(stack_entries))
111111
{
112+
/* Although it should be benign for this to be called,
113+
* it shouldn't happen, so fail in debug builds. */
114+
assert(0 && "never optimize should never be called");
112115
return 0;
113116
}
114117

@@ -120,38 +123,53 @@ PyTypeObject _PyDefaultOptimizer_Type = {
120123
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
121124
};
122125

123-
_PyOptimizerObject _PyOptimizer_Default = {
126+
static _PyOptimizerObject _PyOptimizer_Default = {
124127
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
125128
.optimize = never_optimize,
126-
.resume_threshold = INT16_MAX,
127-
.backedge_threshold = INT16_MAX,
129+
.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
130+
.backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
128131
};
129132

133+
static uint32_t
134+
shift_and_offset_threshold(uint16_t threshold)
135+
{
136+
return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
137+
}
138+
130139
_PyOptimizerObject *
131140
PyUnstable_GetOptimizer(void)
132141
{
133142
PyInterpreterState *interp = _PyInterpreterState_GET();
134143
if (interp->optimizer == &_PyOptimizer_Default) {
135144
return NULL;
136145
}
137-
assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold);
138-
assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold);
146+
assert(interp->optimizer_backedge_threshold ==
147+
shift_and_offset_threshold(interp->optimizer->backedge_threshold));
148+
assert(interp->optimizer_resume_threshold ==
149+
shift_and_offset_threshold(interp->optimizer->resume_threshold));
139150
Py_INCREF(interp->optimizer);
140151
return interp->optimizer;
141152
}
142153

143-
void
144-
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
154+
_PyOptimizerObject *
155+
_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
145156
{
146-
PyInterpreterState *interp = _PyInterpreterState_GET();
147157
if (optimizer == NULL) {
148158
optimizer = &_PyOptimizer_Default;
149159
}
150160
_PyOptimizerObject *old = interp->optimizer;
151161
Py_INCREF(optimizer);
152162
interp->optimizer = optimizer;
153-
interp->optimizer_backedge_threshold = optimizer->backedge_threshold;
154-
interp->optimizer_resume_threshold = optimizer->resume_threshold;
163+
interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
164+
interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
165+
return old;
166+
}
167+
168+
void
169+
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
170+
{
171+
PyInterpreterState *interp = _PyInterpreterState_GET();
172+
_PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
155173
Py_DECREF(old);
156174
}
157175

@@ -860,10 +878,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
860878
return NULL;
861879
}
862880
opt->optimize = uop_optimize;
863-
opt->resume_threshold = INT16_MAX;
864-
// Need at least 3 iterations to settle specializations.
865-
// A few lower bits of the counter are reserved for other flags.
866-
opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;
881+
opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
882+
// Need a few iterations to settle specializations,
883+
// and to ammortize the cost of optimization.
884+
opt->backedge_threshold = 16;
867885
return (PyObject *)opt;
868886
}
869887

@@ -950,7 +968,7 @@ PyUnstable_Optimizer_NewCounter(void)
950968
return NULL;
951969
}
952970
opt->base.optimize = counter_optimize;
953-
opt->base.resume_threshold = INT16_MAX;
971+
opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
954972
opt->base.backedge_threshold = 0;
955973
opt->count = 0;
956974
return (PyObject *)opt;

Python/pylifecycle.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate)
16271627

16281628
// Invalidate all executors and turn off tier 2 optimizer
16291629
_Py_Executors_InvalidateAll(interp);
1630-
Py_XDECREF(interp->optimizer);
1631-
interp->optimizer = &_PyOptimizer_Default;
1630+
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
1631+
Py_XDECREF(old);
16321632

16331633
// Stop watching __builtin__ modifications
16341634
PyDict_Unwatch(0, interp->builtins);

Python/pystate.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -625,9 +625,7 @@ init_interpreter(PyInterpreterState *interp,
625625
}
626626
interp->sys_profile_initialized = false;
627627
interp->sys_trace_initialized = false;
628-
interp->optimizer = &_PyOptimizer_Default;
629-
interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
630-
interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
628+
(void)_Py_SetOptimizer(interp, NULL);
631629
interp->next_func_version = 1;
632630
interp->executor_list_head = NULL;
633631
if (interp != &runtime->_main_interpreter) {
@@ -780,10 +778,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
780778
tstate->_status.cleared = 0;
781779
}
782780

783-
Py_CLEAR(interp->optimizer);
784-
interp->optimizer = &_PyOptimizer_Default;
785-
interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold;
786-
interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold;
781+
_PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
782+
Py_DECREF(old);
787783

788784
/* It is possible that any of the objects below have a finalizer
789785
that runs Python code or otherwise relies on a thread state

0 commit comments

Comments
 (0)
0