8000 [3.11] GH-93354: Use exponential backoff to avoid excessive specializ… · python/cpython@852dca8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 852dca8

Browse files
committed
[3.11] GH-93354: Use exponential backoff to avoid excessive specialization attempts. (GH-93355)
(cherry picked from commit eb618d5) Co-authored-by: Mark Shannon <mark@hotpy.org>
1 parent 5024a9b commit 852dca8

File tree

4 files changed

+93
-46
lines changed

4 files changed

+93
-46
lines changed

Include/internal/pycore_code.h

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,6 @@ extern void _PyLineTable_InitAddressRange(
233233
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
234234
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
235235

236-
237-
#define ADAPTIVE_CACHE_BACKOFF 64
238-
239236
/* Specialization functions */
240237

241238
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
@@ -475,6 +472,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
475472
}
476473

477474

475+
/** Counters
476+
* The first 16-bit value in each inline cache is a counter.
477+
* When counting misses, the counter is treated as a simple unsigned value.
478+
*
479+
* When counting executions until the next specialization attempt,
480+
* exponential backoff is used to reduce the number of specialization failures.
481+
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
482+
* On a specialization failure, the backoff exponent is incremented and the
483+
* counter set to (2**backoff - 1).
484+
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
485+
*/
486+
487+
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
488+
#define ADAPTIVE_BACKOFF_BITS 4
489+
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
490+
#define ADAPTIVE_BACKOFF_START 5
491+
492+
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
493+
494+
495+
static inline uint16_t
496+
adaptive_counter_bits(int value, int backoff) {
497+
return (value << ADAPTIVE_BACKOFF_BITS) |
498+
(backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
499+
}
500+
501+
static inline uint16_t
502+
adaptive_counter_start(void) {
503+
unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
504+
return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
505+
}
506+
507+
static inline uint16_t
508+
adaptive_counter_backoff(uint16_t counter) {
509+
unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
510+
backoff++;
511+
if (backoff > MAX_BACKOFF_VALUE) {
512+
backoff = MAX_BACKOFF_VALUE;
513+
}
514+
unsigned int value = (1 << backoff) - 1;
515+
return adaptive_counter_bits(value, backoff);
516+
}
517+
518+
478519
#ifdef __cplusplus
479520
}
480521
#endif
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Use exponential backoff for specialization counters in the interpreter. Can
2+
reduce the number of failed specializations significantly and avoid slowdown
3+
for those parts of a program that are not suitable for specialization.

Python/ceval.c

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,7 +1560,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
15601560
dtrace_function_entry(frame); \
15611561
}
15621562

1563+
#define ADAPTIVE_COUNTER_IS_ZERO(cache) \
1564+
(cache)->counter < (1<<ADAPTIVE_BACKOFF_BITS)
15631565

1566+
#define DECREMENT_ADAPTIVE_COUNTER(cache) \
1567+
(cache)->counter -= (1<<ADAPTIVE_BACKOFF_BITS)
15641568

15651569
static int
15661570
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
@@ -2155,7 +2159,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21552159

21562160
TARGET(BINARY_SUBSCR_ADAPTIVE) {
21572161
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
2158-
if (cache->counter == 0) {
2162+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
21592163
PyObject *sub = TOP();
21602164
PyObject *container = SECOND();
21612165
next_instr--;
@@ -2166,7 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21662170
}
21672171
else {
21682172
STAT_INC(BINARY_SUBSCR, deferred);
2169-
cache->counter--;
2173+
DECREMENT_ADAPTIVE_COUNTER(cache);
21702174
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
21712175
}
21722176
}
@@ -2320,7 +2324,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
23202324

23212325
TARGET(STORE_SUBSCR_ADAPTIVE) {
23222326
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
2323-
if (cache->counter == 0) {
2327+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
23242328
PyObject *sub = TOP();
23252329
PyObject *container = SECOND();
23262330
next_instr--;
@@ -2331,7 +2335,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
23312335
}
23322336
else {
23332337
STAT_INC(STORE_SUBSCR, deferred);
2334-
cache->counter--;
2338+
DECREMENT_ADAPTIVE_COUNTER(cache);
23352339
JUMP_TO_INSTRUCTION(STORE_SUBSCR);
23362340
}
23372341
}
@@ -2813,15 +2817,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
28132817
TARGET(UNPACK_SEQUENCE_ADAPTIVE) {
28142818
assert(cframe.use_tracing == 0);
28152819
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
2816-
if (cache->counter == 0) {
2820+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
28172821
PyObject *seq = TOP();
28182822
next_instr--;
28192823
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
28202824
NOTRACE_DISPATCH_SAME_OPARG();
28212825
}
28222826
else {
28232827
STAT_INC(UNPACK_SEQUENCE, deferred);
2824-
cache->counter--;
2828+
DECREMENT_ADAPTIVE_COUNTER(cache);
28252829
JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE);
28262830
}
28272831
}
@@ -3054,7 +3058,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
30543058
TARGET(LOAD_GLOBAL_ADAPTIVE) {
30553059
assert(cframe.use_tracing == 0);
30563060
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
3057-
if (cache->counter == 0) {
3061+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
30583062
PyObject *name = GETITEM(names, oparg>>1);
30593063
next_instr--;
30603064
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) {
@@ -3064,7 +3068,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
30643068
}
30653069
else {
30663070
STAT_INC(LOAD_GLOBAL, deferred);
3067-
cache->counter--;
3071+
DECREMENT_ADAPTIVE_COUNTER(cache);
30683072
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
30693073
}
30703074
}
@@ -3478,7 +3482,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
34783482
TARGET(LOAD_ATTR_ADAPTIVE) {
34793483
assert(cframe.use_tracing == 0);
34803484
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
3481-
if (cache->counter == 0) {
3485+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
34823486
PyObject *owner = TOP();
34833487
PyObject *name = GETITEM(names, oparg);
34843488
next_instr--;
@@ -3489,7 +3493,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
34893493
}
34903494
else {
34913495
STAT_INC(LOAD_ATTR, deferred);
3492-
cache->counter--;
3496+
DECREMENT_ADAPTIVE_COUNTER(cache);
34933497
JUMP_TO_INSTRUCTION(LOAD_ATTR);
34943498
}
34953499
}
@@ -3587,7 +3591,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
35873591< 10000 /td>
TARGET(STORE_ATTR_ADAPTIVE) {
35883592
assert(cframe.use_tracing == 0);
35893593
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
3590-
if (cache->counter == 0) {
3594+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
35913595
PyObject *owner = TOP();
35923596
PyObject *name = GETITEM(names, oparg);
35933597
next_instr--;
@@ -3598,7 +3602,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
35983602
}
35993603
else {
36003604
STAT_INC(STORE_ATTR, deferred);
3601-
cache->counter--;
3605+
DECREMENT_ADAPTIVE_COUNTER(cache);
36023606
JUMP_TO_INSTRUCTION(STORE_ATTR);
36033607
}
36043608
}
@@ -3717,7 +3721,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
37173721
TARGET(COMPARE_OP_ADAPTIVE) {
37183722
assert(cframe.use_tracing == 0);
37193723
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
3720-
if (cache->counter == 0) {
3724+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
37213725
PyObject *right = TOP();
37223726
PyObject *left = SECOND();
37233727
next_instr--;
@@ -3726,7 +3730,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
37263730
}
37273731
else {
37283732
STAT_INC(COMPARE_OP, deferred);
3729-
cache->counter--;
3733+
DECREMENT_ADAPTIVE_COUNTER(cache);
37303734
JUMP_TO_INSTRUCTION(COMPARE_OP);
37313735
}
37323736
}
@@ -4524,7 +4528,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
45244528
TARGET(LOAD_METHOD_ADAPTIVE) {
45254529
assert(cframe.use_tracing == 0);
45264530
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;
4527-
if (cache->counter == 0) {
4531+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
45284532
PyObject *owner = TOP();
45294533
PyObject *name = GETITEM(names, oparg);
45304534
next_instr--;
@@ -4535,7 +4539,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
45354539
}
45364540
else {
45374541
STAT_INC(LOAD_METHOD, deferred);
4538-
cache->counter--;
4542+
DECREMENT_ADAPTIVE_COUNTER(cache);
45394543
JUMP_TO_INSTRUCTION(LOAD_METHOD);
45404544
}
45414545
}
@@ -4816,7 +4820,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
48164820

48174821
TARGET(CALL_ADAPTIVE) {
48184822
_PyCallCache *cache = (_PyCallCache *)next_instr;
4819-
if (cache->counter == 0) {
4823+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
48204824
next_instr--;
48214825
int is_meth = is_method(stack_pointer, oparg);
48224826
int nargs = oparg + is_meth;
@@ -4830,7 +4834,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
48304834
}
48314835
else {
48324836
STAT_INC(CALL, deferred);
4833-
cache->counter--;
4837+
DECREMENT_ADAPTIVE_COUNTER(cache);
48344838
goto call_function;
48354839
}
48364840
}
@@ -5561,7 +5565,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
55615565
TARGET(BINARY_OP_ADAPTIVE) {
55625566
assert(cframe.use_tracing == 0);
55635567
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
5564-
if (cache->counter == 0) {
5568+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
55655569
PyObject *lhs = SECOND();
55665570
PyObject *rhs = TOP();
55675571
next_instr--;
@@ -5570,7 +5574,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
55705574
}
55715575
else {
55725576
STAT_INC(BINARY_OP, deferred);
5573-
cache->counter--;
5577+
DECREMENT_ADAPTIVE_COUNTER(cache);
55745578
JUMP_TO_INSTRUCTION(BINARY_OP);
55755579
}
55765580
}
@@ -5698,7 +5702,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
56985702
assert(adaptive_opcode);
56995703
_Py_SET_OPCODE(next_instr[-1], adaptive_opcode);
57005704
STAT_INC(opcode, deopt);
5701-
*counter = ADAPTIVE_CACHE_BACKOFF;
5705+
*counter = adaptive_counter_start();
57025706
}
57035707
next_instr--;
57045708
DISPATCH_GOTO();

0 commit comments

Comments
 (0)
0