8000 bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter by markshannon · Pull Request #26638 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter #26638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 14, 2021
Merged
Next Next commit
Add specializations of LOAD_GLOBAL.
  • Loading branch information
markshannon committed Jun 10, 2021
commit 6e57707f51ab5779f9f4142ef1765126a6c19033
7 changes: 7 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ typedef struct {
uint32_t dk_version_or_hint;
} _PyLoadAttrCache;

typedef struct {
uint32_t module_keys_version;
uint32_t builtin_keys_version;
} _PyLoadGlobalCache;

/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
Expand All @@ -62,6 +67,7 @@ typedef union {
_PyEntryZero zero;
_PyAdaptiveEntry adaptive;
_PyLoadAttrCache load_attr;
_PyLoadGlobalCache load_global;
} SpecializedCacheEntry;

#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
Expand Down Expand Up @@ -318,6 +324,7 @@ cache_backoff(_PyAdaptiveEntry *entry) {
/* Specialization functions */

int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);

#define SPECIALIZATION_STATS 0
#if SPECIALIZATION_STATS
Expand Down
3 changes: 3 additions & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,4 +226,7 @@ def jabs_op(name, op):
"LOAD_ATTR_WITH_HINT",
"LOAD_ATTR_SLOT",
"LOAD_ATTR_MODULE",
"LOAD_GLOBAL_ADAPTIVE",
"LOAD_GLOBAL_MODULE",
"LOAD_GLOBAL_BUILTIN",
]
105 changes: 66 additions & 39 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2974,30 +2974,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
}

case TARGET(LOAD_GLOBAL): {
PyObject *name;
PREDICTED(LOAD_GLOBAL);
PyObject *name = GETITEM(names, oparg);
PyObject *v;
if (PyDict_CheckExact(GLOBALS())
&& PyDict_CheckExact(BUILTINS()))
{
OPCACHE_CHECK();
if (co_opcache != NULL && co_opcache->optimized > 0) {
_PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;

if (lg->globals_ver ==
((PyDictObject *)GLOBALS())->ma_version_tag
&& lg->builtins_ver ==
((PyDictObject *)BUILTINS())->ma_version_tag)
{
PyObject *ptr = lg->ptr;
OPCACHE_STAT_GLOBAL_HIT();
assert(ptr != NULL);
Py_INCREF(ptr);
PUSH(ptr);
DISPATCH();
}
}

name = GETITEM(names, oparg);
v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(),
(PyDictObject *)BUILTINS(),
name);
Expand All @@ -3010,25 +2992,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
}
goto error;
}

if (co_opcache != NULL) {
_PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;

if (co_opcache->optimized == 0) {
/* Wasn't optimized before. */
OPCACHE_STAT_GLOBAL_OPT();
} else {
OPCACHE_STAT_GLOBAL_MISS();
}

co_opcache->optimized = 1;
lg->globals_ver =
((PyDictObject *)GLOBALS())->ma_version_tag;
lg->builtins_ver =
((PyDictObject *)BUILTINS())->ma_version_tag;
lg->ptr = v; /* borrowed */
}

Py_INCREF(v);
}
else {
Expand Down Expand Up @@ -3059,6 +3022,58 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
DISPATCH();
}

case TARGET(LOAD_GLOBAL_ADAPTIVE): {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
PyObject *name = GETITEM(names, cache->adaptive.original_oparg);
next_instr--;
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name, cache) < 0) {
goto error;
}
DISPATCH();
}
else {
cache->adaptive.counter--;
oparg = cache->adaptive.original_oparg;
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
}
}

case TARGET(LOAD_GLOBAL_MODULE): {
DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
PyDictObject *dict = (PyDictObject *)GLOBALS();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyLoadGlobalCache *cache1 = &caches[-1].load_global;
DEOPT_IF(dict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we also need to check the case where it's a different globals dict? It can't be done from Python (except if someone creates a new function using the code object from another function, or passes the code object to exec()) but can easily be done from C. There is the remote chance that (in that already unlikely case) the keys version is the same.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the keys version is the same, then it has the same keys in the same order and is the same kind of dict.
In which case it doesn't matter if it is a different dictionary, because we cache the index, not the value.

As an aside, you can get different dictionaries with the same keys as module dicts at different times.

class C: pass
d1 = C().__dict__
d2 = C().__dict__
# d1 and d2 should share keys
m = ModuleType()
m.__dict__ = d1
# Specialize
m.__dict__ = d2
# globals in m would see same keys as when specialized

PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
PyObject *res = ep->me_value;
DEOPT_IF(res == NULL, LOAD_ATTR);
record_cache_hit(cache0);
Py_INCREF(res);
PUSH(res);
DISPATCH();
}

case TARGET(LOAD_GLOBAL_BUILTIN): {
DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL);
PyDictObject *mdict = (PyDictObject *)GLOBALS();
PyDictObject *bdict = (PyDictObject *)BUILTINS();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyLoadGlobalCache *cache1 = &caches[-1].load_global;
DEOPT_IF(mdict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
DEOPT_IF(bdict->ma_keys->dk_version != cache1->builtin_keys_version, LOAD_GLOBAL);
PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index;
PyObject *res = ep->me_value;
DEOPT_IF(res == NULL, LOAD_ATTR);
record_cache_hit(cache0);
Py_INCREF(res);
PUSH(res);
DISPATCH();
}

case TARGET(DELETE_FAST): {
PyObject *v = GETLOCAL(oparg);
if (v != NULL) {
Expand Down Expand Up @@ -4461,6 +4476,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
JUMP_TO_INSTRUCTION(LOAD_ATTR);
}

LOAD_GLOBAL_miss:
{
_PyAdaptiveEntry *cache = &GET_CACHE()->adaptive;
record_cache_miss(cache);
if (too_many_cache_misses(cache)) {
next_instr[-1] = _Py_MAKECODEUNIT(LOAD_GLOBAL_ADAPTIVE, _Py_OPARG(next_instr[-1]));
cache_backoff(cache);
}
oparg = cache->original_oparg;
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
}

error:
/* Double-check exception status. */
#ifdef NDEBUG
Expand Down
6 changes: 3 additions & 3 deletions Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 67 additions & 0 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,13 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
Values of zero are ignored. */
static uint8_t adaptive_opcodes[256] = {
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
};

/* The number of cache entries required for a "family" of instructions. */
static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 2,
[LOAD_GLOBAL] = 2,
};

/* Return the oparg for the cache_offset and instruction index.
Expand Down Expand Up @@ -368,3 +370,68 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
return 0;
}


int
_Py_Specialize_LoadGlobal(
PyObject *globals, PyObject *builtins,
_Py_CODEUNIT *instr, PyObject *name,
SpecializedCacheEntry *cache)
{
_PyAdaptiveEntry *cache0 = &cache->adaptive;
_PyLoadGlobalCache *cache1 = &cache[-1].load_global;
assert(PyUnicode_CheckExact(name));
Py_hash_t hash = PyObject_Hash(name);
if (hash == -1) {
return -1;
}
if (!PyDict_CheckExact(globals)) {
goto fail;
}
if (((PyDictObject *)globals)->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
goto fail;
}
PyObject *value;
Py_ssize_t index = _Py_dict ED2E _lookup((PyDictObject *)globals, name, hash, &value);
assert (index != DKIX_ERROR);
if (index != DKIX_EMPTY) {
if (index != (uint16_t)index) {
goto fail;
}
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
if (keys_version == 0) {
goto fail;
}
cache1->module_keys_version = keys_version;
cache0->index = index;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was going to ask about this (Py_ssize_t -> uint16_t) but looks like GHA beat me to it. :) Does this limit us on the size of the builtins/globals dicts or are dicts already constrained to 2^16 entries?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We just can't optimize access to variables with index > 64k.
If your module has more than 64k variables, then you have plenty of other problems 🙂

*instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr));
goto success;
}
if (!PyDict_CheckExact(builtins)) {
goto fail;
}
if (((PyDictObject *)builtins)->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
goto fail;
}
index = _Py_dict_lookup((PyDictObject *)builtins, name, hash, &value);
assert (index != DKIX_ERROR);
if (index != (uint16_t)index) {
goto fail;
}
cache1->module_keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
if (keys_version == 0) {
goto fail;
}
cache1->builtin_keys_version = keys_version;
cache0->index = index;
*instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr));
goto success;
fail:
assert(!PyErr_Occurred());
cache_backoff(cache0);
return 0;
success:
assert(!PyErr_Occurred());
cache0->counter = saturating_start();
return 0;
}
0