8000 bpo-46841: Use inline caching for calls (GH-31709) · python-docs-tr/cpython@f193631 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f193631

Browse files
authored
bpo-46841: Use inline caching for calls (pythonGH-31709)
1 parent 105b9ac commit f193631

File tree

16 files changed

+491
-732
lines changed

16 files changed

+491
-732
lines changed

Include/cpython/code.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ struct PyCodeObject {
105105
/* Quickened instructions and cache, or NULL
106106
This should be treated as opaque by all code except the specializer and
107107
interpreter. */
108-
union _cache_or_instruction *co_quickened;
108+
_Py_CODEUNIT *co_quickened;
109109

110110
};
111111

Include/internal/pycore_code.h

Lines changed: 26 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -8,50 +8,10 @@ extern "C" {
88
* Specialization and quickening structs and helper functions
99
*/
1010

11-
typedef struct {
12-
int32_t cache_count;
13-
int32_t _; /* Force 8 byte size */
14-
} _PyEntryZero;
15-
16-
typedef struct {
17-
uint8_t original_oparg;
18-
uint8_t counter;
19-
uint16_t index;
20-
uint32_t version;
21-
} _PyAdaptiveEntry;
2211

23-
typedef struct {
24-
/* Borrowed ref */
25-
PyObject *obj;
26-
} _PyObjectCache;
27-
28-
typedef struct {
29-
uint32_t func_version;
30-
uint16_t min_args;
31-
uint16_t defaults_len;
32-
} _PyCallCache;
33-
34-
35-
/* Add specialized versions of entries to this union.
36-
*
37-
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
38-
* Preserving this invariant is necessary because:
39-
- If any one form uses more space, then all must and on 64 bit machines
40-
this is likely to double the memory consumption of caches
41-
- The function for calculating the offset of caches assumes a 4:1
42-
cache:instruction size ratio. Changing that would need careful
43-
analysis to choose a new function.
44-
*/
45-
typedef union {
46-
_PyEntryZero zero;
47-
_PyAdaptiveEntry adaptive;
48-
_PyObjectCache obj;
49-
_PyCallCache call;
50-
} SpecializedCacheEntry;
51-
52-
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
53-
54-
/* Inline caches */
12+
// Inline caches. If you change the number of cache entries for an instruction,
13+
// you must *also* update the number of cache entries in Lib/opcode.py and bump
14+
// the magic number in Lib/importlib/_bootstrap_external.py!
5515

5616
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
5717

@@ -112,73 +72,22 @@ typedef struct {
11272

11373
#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
11474

115-
/* Maximum size of code to quicken, in code units. */
116-
#define MAX_SIZE_TO_QUICKEN 5000
117-
118-
typedef union _cache_or_instruction {
119-
_Py_CODEUNIT code[1];
120-
SpecializedCacheEntry entry;
121-
} SpecializedCacheOrInstruction;
75+
typedef struct {
76+
_Py_CODEUNIT counter;
77+
_Py_CODEUNIT func_version[2];
78+
_Py_CODEUNIT min_args;
79+
} _PyCallCache;
12280

123-
/* Get pointer to the nth cache entry, from the first instruction and n.
124-
* Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
125-
* The zeroth entry immediately precedes the instructions.
126-
*/
127-
static inline SpecializedCacheEntry *
128-
_GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n)
129-
{
130-
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
131-
assert(&last_cache_plus_one->code[0] == first_instr);
132-
return &last_cache_plus_one[-1-n].entry;
133-
}
81+
#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
13482

135-
/* Following two functions form a pair.
136-
*
137-
* oparg_from_offset_and_index() is used to compute the oparg
138-
* when quickening, so that offset_from_oparg_and_nexti()
139-
* can be used at runtime to compute the offset.
140-
*
141-
* The relationship between the three values is currently
142-
* offset == (index>>1) + oparg
143-
* This relation is chosen based on the following observations:
144-
* 1. typically 1 in 4 instructions need a cache
145-
* 2. instructions that need a cache typically use 2 entries
146-
* These observations imply: offset ≈ index/2
147-
* We use the oparg to fine tune the relation to avoid wasting space
148-
* and allow consecutive instructions to use caches.
149-
*
150-
* If the number of cache entries < number of instructions/2 we will waste
151-
* some small amoount of space.
152-
* If the number of cache entries > (number of instructions/2) + 255, then
153-
* some instructions will not be able to use a cache.
154-
* In practice, we expect some small amount of wasted space in a shorter functions
155-
* and only functions exceeding a 1000 lines or more not to have enugh cache space.
156-
*
157-
*/
158-
static inline int
159-
oparg_from_offset_and_nexti(int offset, int nexti)
160-
{
161-
return offset-(nexti>>1);
162-
}
83+
typedef struct {
84+
_Py_CODEUNIT counter;
85+
} _PyPrecallCache;
16386

164-
static inline int
165-
offset_from_oparg_and_nexti(int oparg, int nexti)
166-
{
167-
return (nexti>>1)+oparg;
168-
}
87+
#define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
16988

170-
/* Get pointer to the cache entry associated with an instruction.
171-
* nexti is the index of the instruction plus one.
172-
* nexti is used as it corresponds to the instruction pointer in the interpreter.
173-
* This doesn't check that an entry has been allocated for that instruction. */
174-
static inline SpecializedCacheEntry *
175-
_GetSpecializedCacheEntryForInstruction(const _Py_CODEUNIT *first_instr, int nexti, int oparg)
176-
{
177-
return _GetSpecializedCacheEntry(
178-
first_instr,
179-
offset_from_oparg_and_nexti(oparg, nexti)
180-
);
181-
}
89+
/* Maximum size of code to quicken, in code units. */
90+
#define MAX_SIZE_TO_QUICKEN 10000
18291

18392
#define QUICKENING_WARMUP_DELAY 8
18493

@@ -205,6 +114,13 @@ _Py_IncrementCountAndMaybeQuicken(PyCodeObject *code)
205114

206115
extern Py_ssize_t _Py_QuickenedCount;
207116

117+
// Borrowed references to common callables:
118+
struct callable_cache {
119+
PyObject *isinstance;
120+
PyObject *len;
121+
PyObject *list_append;
122+
};
123+
208124
/* "Locals plus" for a code object is the set of locals + cell vars +
209125
* free vars. This relates to variable names as well as offsets into
210126
* the "fast locals" storage array of execution frames. The compiler
@@ -332,11 +248,6 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
332248

333249
#define ADAPTIVE_CACHE_BACKOFF 64
334250

335-
static inline void
336-
cache_backoff(_PyAdaptiveEntry *entry) {
337-
entry->counter = ADAPTIVE_CACHE_BACKOFF;
338-
}
339-
340251
/* Specialization functions */
341252

342253
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
@@ -348,10 +259,10 @@ extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
348259
PyObject *name);
349260
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
350261
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
351-
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
352-
PyObject *kwnames, SpecializedCacheEntry *cache);
353-
extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
354-
PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins);
262+
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
263+
int nargs, PyObject *kwnames);
264+
extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr,
265+
int nargs, PyObject *kwnames, int oparg);
355266
extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
356267
int oparg);
357268
extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,

Include/internal/pycore_global_strings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ struct _Py_global_strings {
269269
STRUCT_FOR_ID(inf)
270270
STRUCT_FOR_ID(intersection)
271271
STRUCT_FOR_ID(isatty)
272+
STRUCT_FOR_ID(isinstance)
272273
STRUCT_FOR_ID(items)
273274
STRUCT_FOR_ID(iter)
274275
STRUCT_FOR_ID(join)
@@ -278,6 +279,7 @@ struct _Py_global_strings {
278279
STRUCT_FOR_ID(last_type)
279280
STRUCT_FOR_ID(last_value)
280281
STRUCT_FOR_ID(latin1)
282+
STRUCT_FOR_ID(len)
281283
STRUCT_FOR_ID(line)
282284
STRUCT_FOR_ID(lineno)
283285
STRUCT_FOR_ID(listcomp)

Include/internal/pycore_interp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern "C" {
1212

1313
#include "pycore_atomic.h" // _Py_atomic_address
1414
#include "pycore_ast_state.h" // struct ast_state
15+
#include "pycore_code.h" // struct callable_cache
1516
#include "pycore_context.h" // struct _Py_context_state
1617
#include "pycore_dict.h" // struct _Py_dict_state
1718
#include "pycore_exceptions.h" // struct _Py_exc_state
@@ -176,6 +177,7 @@ struct _is {
176177

177178
struct ast_state ast;
178179
struct type_cache type_cache;
180+
struct callable_cache callable_cache;
179181

180182
/* The following fields are here to avoid allocation during init.
181183
The data is exposed through PyInterpreterState pointer fields.

Include/internal/pycore_runtime_init.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ extern "C" {
884884
INIT_ID(inf), \
885885
INIT_ID(intersection), \
886886
INIT_ID(isatty), \
887+
INIT_ID(isinstance), \
887888
INIT_ID(items), \
888889
INIT_ID(iter), \
889890
INIT_ID(join), \
@@ -893,6 +894,7 @@ extern "C" {
893894
INIT_ID(last_type), \
894895
INIT_ID(last_value), \
895896
INIT_ID(latin1), \
897+
INIT_ID(len), \
896898
INIT_ID(line), \
897899
INIT_ID(lineno), \
898900
INIT_ID(listcomp), \

0 commit comments

Comments
 (0)
0