8000 bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter (GH-26638) · python/cpython@eecbc7c · GitHub
[go: up one dir, main page]

Skip to content

Commit eecbc7c

Browse files
authored
bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter (GH-26638)
* Add specializations of LOAD_GLOBAL. * Add more stats. * Remove old opcache; it is no longer used. * Add NEWS
1 parent fafcfff commit eecbc7c

File tree

9 files changed

+209
-337
lines changed

9 files changed

+209
-337
lines changed

Include/cpython/code.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -106,20 +106,6 @@ struct PyCodeObject {
106106
interpreter. */
107107
union _cache_or_instruction *co_quickened;
108108

109-
/* Per opcodes just-in-time cache
110-
*
111-
* To reduce cache size, we use indirect mapping from opcode index to
112-
* cache object:
113-
* cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
114-
*/
115-
116-
// co_opcache_map is indexed by (next_instr - first_instr).
117-
// * 0 means there is no cache for this opcode.
118-
// * n > 0 means there is cache in co_opcache[n-1].
119-
unsigned char *co_opcache_map;
120-
_PyOpcache *co_opcache;
121-
int co_opcache_flag; // used to determine when create a cache.
122-
unsigned char co_opcache_size; // length of co_opcache.
123109
};
124110

125111
/* Masks for co_flags above */

Include/internal/pycore_code.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ typedef struct {
4848
uint32_t dk_version_or_hint;
4949
} _PyLoadAttrCache;
5050

51+
typedef struct {
52+
uint32_t module_keys_version;
53+
uint32_t builtin_keys_version;
54+
} _PyLoadGlobalCache;
55+
5156
/* Add specialized versions of entries to this union.
5257
*
5358
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -62,6 +67,7 @@ typedef union {
6267
_PyEntryZero zero;
6368
_PyAdaptiveEntry adaptive;
6469
_PyLoadAttrCache load_attr;
70+
_PyLoadGlobalCache load_global;
6571
} SpecializedCacheEntry;
6672

6773
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
@@ -254,8 +260,6 @@ PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
254260

255261
/* Private API */
256262

257-
int _PyCode_InitOpcache(PyCodeObject *co);
258-
259263
/* Getters for internal PyCodeObject data. */
260264
PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *);
261265
PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
@@ -318,24 +322,25 @@ cache_backoff(_PyAdaptiveEntry *entry) {
318322
/* Specialization functions */
319323

320324
int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
325+
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
321326

322327
#define SPECIALIZATION_STATS 0
323328
#if SPECIALIZATION_STATS
324329

325-
typedef struct _specialization_stats {
330+
typedef struct _stats {
326331
uint64_t specialization_success;
327332
uint64_t specialization_failure;
328-
uint64_t loadattr_hit;
329-
uint64_t loadattr_deferred;
330-
uint64_t loadattr_miss;
331-
uint64_t loadattr_deopt;
333+
uint64_t hit;
334+
uint64_t deferred;
335+
uint64_t miss;
336+
uint64_t deopt;
332337
} SpecializationStats;
333338

334-
extern SpecializationStats _specialization_stats;
335-
#define STAT_INC(name) _specialization_stats.name++
339+
extern SpecializationStats _specialization_stats[256];
340+
#define STAT_INC(opname, name) _specialization_stats[opname].name++
336341
void _Py_PrintSpecializationStats(void);
337342
#else
338-
#define STAT_INC(name) ((void)0)
343+
#define STAT_INC(opname, name) ((void)0)
339344
#endif
340345

341346

Include/opcode.h

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/opcode.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,4 +226,7 @@ def jabs_op(name, op):
226226
"LOAD_ATTR_WITH_HINT",
227227
"LOAD_ATTR_SLOT",
228228
"LOAD_ATTR_MODULE",
229+
"LOAD_GLOBAL_ADAPTIVE",
230+
"LOAD_GLOBAL_MODULE",
231+
"LOAD_GLOBAL_BUILTIN",
229232
]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Implement adaptive specialization for LOAD_GLOBAL
2+
3+
Two specialized forms of LOAD_GLOBAL are added:
4+
5+
* LOAD_GLOBAL_MODULE
6+
7+
* LOAD_GLOBAL_BUILTIN

Objects/codeobject.c

Lines changed: 6 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
350350
/* not set */
351351
co->co_weakreflist = NULL;
352352
co->co_extra = NULL;
353-
co->co_opcache_map = NULL;
354-
co->co_opcache = NULL;
355-
co->co_opcache_flag = 0;
356-
co->co_opcache_size = 0;
353+
357354
co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
358355
co->co_quickened = NULL;
359356
}
@@ -912,55 +909,6 @@ new_linesiterator(PyCodeObject *code)
912909
return li;
913910
}
914911

915-
916-
/******************
917-
* the opcache
918-
******************/
919-
920-
int
921-
_PyCode_InitOpcache(PyCodeObject *co)
922-
{
923-
Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
924-
co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
925-
if (co->co_opcache_map == NULL) {
926-
return -1;
927-
}
928-
929-
const _Py_CODEUNIT *opcodes = (const _Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
930-
Py_ssize_t opts = 0;
931-
932-
for (Py_ssize_t i = 0; i < co_size;) {
933-
unsigned char opcode = _Py_OPCODE(opcodes[i]);
934-
i++; // 'i' is now aligned to (next_instr - first_instr)
935-
936-
// TODO: LOAD_METHOD
937-
if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
938-
opts++;
939-
co->co_opcache_map[i] = (unsigned char)opts;
940-
if (opts > 254) {
941-
break;
942-
}
943-
}
944-
}
945-
946-
if (opts) {
947-
co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
948-
if (co->co_opcache == NULL) {
949-
PyMem_Free(co->co_opcache_map);
950-
return -1;
951-
}
952-
}
953-
else {
954-
PyMem_Free(co->co_opcache_map);
955-
co->co_opcache_map = NULL;
956-
co->co_opcache = NULL;
957-
}
958-
959-
co->co_opcache_size = (unsigned char)opts;
960-
return 0;
961-
}
962-
963-
964912
/******************
965913
* "extra" frame eval info (see PEP 523)
966914
******************/
@@ -1207,15 +1155,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
12071155
static void
12081156
code_dealloc(PyCodeObject *co)
12091157
{
1210-
if (co->co_opcache != NULL) {
1211-
PyMem_Free(co->co_opcache);
1212-
}
1213-
if (co->co_opcache_map != NULL) {
1214-
PyMem_Free(co->co_opcache_map);
1215-
}
1216-
co->co_opcache_flag = 0;
1217-
co->co_opcache_size = 0;
1218-
12191158
if (co->co_extra != NULL) {
12201159
PyInterpreterState *interp = _PyInterpreterState_GET();
12211160
_PyCodeObjectExtra *co_extra = co->co_extra;
@@ -1442,12 +1381,11 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
14421381
res += co->co_ncellvars * sizeof(Py_ssize_t);
14431382
}
14441383

1445-
if (co->co_opcache != NULL) {
1446-
assert(co->co_opcache_map != NULL);
1447-
// co_opcache_map
1448-
res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
1449-
// co_opcache
1450-
res += co->co_opcache_size * sizeof(_PyOpcache);
1384+
if (co->co_quickened != NULL) {
1385+
Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count;
1386+
count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/
1387+
sizeof(SpecializedCacheEntry);
1388+
res += count * sizeof(SpecializedCacheEntry);
14511389
}
14521390

14531391
return PyLong_FromSsize_t(res);

0 commit comments

Comments
 (0)
0