Add specializations of LOAD_GLOBAL.

python · markshannon · Jun 14, 2021 · Jun 2, 2021 · Jun 4, 2021 · Jun 8, 2021
commit 6e57707f51ab5779f9f4142ef1765126a6c19033
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
@@ -48,6 +48,11 @@ typedef struct {
     uint32_t dk_version_or_hint;
 } _PyLoadAttrCache;
 
+typedef struct {
+    uint32_t module_keys_version;
+    uint32_t builtin_keys_version;
+} _PyLoadGlobalCache;
+
 /* Add specialized versions of entries to this union.
  *
  * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@@ -62,6 +67,7 @@ typedef union {
     _PyEntryZero zero;
     _PyAdaptiveEntry adaptive;
     _PyLoadAttrCache load_attr;
+    _PyLoadGlobalCache load_global;
 } SpecializedCacheEntry;
 
 #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
@@ -318,6 +324,7 @@ cache_backoff(_PyAdaptiveEntry *entry) {
 /* Specialization functions */
 
 int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
+int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
 
 #define SPECIALIZATION_STATS 0
 #if SPECIALIZATION_STATS

diff --git a/Include/opcode.h b/Include/opcode.h
diff --git a/Lib/opcode.py b/Lib/opcode.py
@@ -226,4 +226,7 @@ def jabs_op(name, op):
     "LOAD_ATTR_WITH_HINT",
     "LOAD_ATTR_SLOT",
     "LOAD_ATTR_MODULE",
+    "LOAD_GLOBAL_ADAPTIVE",
+    "LOAD_GLOBAL_MODULE",
+    "LOAD_GLOBAL_BUILTIN",
 ]
@@ -2974,30 +2974,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
         }
 
         case TARGET(LOAD_GLOBAL): {
-            PyObject *name;
+            PREDICTED(LOAD_GLOBAL);
+            PyObject *name = GETITEM(names, oparg);
             PyObject *v;
             if (PyDict_CheckExact(GLOBALS())
                 && PyDict_CheckExact(BUILTINS()))
             {
-                OPCACHE_CHECK();
-                if (co_opcache != NULL && co_opcache->optimized > 0) {
-                    _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;
-
-                    if (lg->globals_ver ==
-                            ((PyDictObject *)GLOBALS())->ma_version_tag
-                        && lg->builtins_ver ==
-                           ((PyDictObject *)BUILTINS())->ma_version_tag)
-                    {
-                        PyObject *ptr = lg->ptr;
-                        OPCACHE_STAT_GLOBAL_HIT();
-                        assert(ptr != NULL);
-                        Py_INCREF(ptr);
-                        PUSH(ptr);
-                        DISPATCH();
-                    }
-                }
-
-                name = GETITEM(names, oparg);
                 v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(),
                                        (PyDictObject *)BUILTINS(),
                                        name);
@@ -3010,25 +2992,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
                     }
                     goto error;
                 }
-
-                if (co_opcache != NULL) {
-                    _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg;
-
-                    if (co_opcache->optimized == 0) {
-                        /* Wasn't optimized before. */
-                        OPCACHE_STAT_GLOBAL_OPT();
-                    } else {
-                        OPCACHE_STAT_GLOBAL_MISS();
-                    }
-
-                    co_opcache->optimized = 1;
-                    lg->globals_ver =
-                        ((PyDictObject *)GLOBALS())->ma_version_tag;
-                    lg->builtins_ver =
-                        ((PyDictObject *)BUILTINS())->ma_version_tag;
-                    lg->ptr = v; /* borrowed */
-                }
-
                 Py_INCREF(v);
             }
             else {
@@ -3059,6 +3022,58 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
             DISPATCH();
         }
 
+        case TARGET(LOAD_GLOBAL_ADAPTIVE): {
+            SpecializedCacheEntry *cache = GET_CACHE();
+            if (cache->adaptive.counter == 0) {
+                PyObject *name = GETITEM(names, cache->adaptive.original_oparg);
+                next_instr--;
+                if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name, cache) < 0) {
+                    goto error;
+                }
+                DISPATCH();
+            }
+            else {
+                cache->adaptive.counter--;
+                oparg = cache->adaptive.original_oparg;
+                JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
+            }
+        }
+
+        case TARGET(LOAD_GLOBAL_MODULE): {
+            DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
+            PyDictObject *dict = (PyDictObject *)GLOBALS();
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadGlobalCache *cache1 = &caches[-1].load_global;
+            DEOPT_IF(dict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
+            PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index;
+            PyObject *res = ep->me_value;
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            record_cache_hit(cache0);
+            Py_INCREF(res);
+            PUSH(res);
+            DISPATCH();
+        }
+
+        case TARGET(LOAD_GLOBAL_BUILTIN): {
+            DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL);
+            DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL);
+            PyDictObject *mdict = (PyDictObject *)GLOBALS();
+            PyDictObject *bdict = (PyDictObject *)BUILTINS();
+            SpecializedCacheEntry *caches = GET_CACHE();
+            _PyAdaptiveEntry *cache0 = &caches[0].adaptive;
+            _PyLoadGlobalCache *cache1 = &caches[-1].load_global;
+            DEOPT_IF(mdict->ma_keys->dk_version != cache1->module_keys_version, LOAD_GLOBAL);
+            DEOPT_IF(bdict->ma_keys->dk_version != cache1->builtin_keys_version, LOAD_GLOBAL);
+            PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index;
+            PyObject *res = ep->me_value;
+            DEOPT_IF(res == NULL, LOAD_ATTR);
+            record_cache_hit(cache0);
+            Py_INCREF(res);
+            PUSH(res);
+            DISPATCH();
+        }
+
         case TARGET(DELETE_FAST): {
             PyObject *v = GETLOCAL(oparg);
             if (v != NULL) {
@@ -4461,6 +4476,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
         JUMP_TO_INSTRUCTION(LOAD_ATTR);
     }
 
+LOAD_GLOBAL_miss:
+    {
+        _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive;
+        record_cache_miss(cache);
+        if (too_many_cache_misses(cache)) {
+            next_instr[-1] = _Py_MAKECODEUNIT(LOAD_GLOBAL_ADAPTIVE, _Py_OPARG(next_instr[-1]));
+            cache_backoff(cache);
+        }
+        oparg = cache->original_oparg;
+        JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
+    }
+
 error:
         /* Double-check exception status. */
 #ifdef NDEBUG

diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
diff --git a/Python/specialize.c b/Python/specialize.c
@@ -77,11 +77,13 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
   Values of zero are ignored. */
 static uint8_t adaptive_opcodes[256] = {
     [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
+    [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
 };
 
 /* The number of cache entries required for a "family" of instructions. */
 static uint8_t cache_requirements[256] = {
     [LOAD_ATTR] = 2,
+    [LOAD_GLOBAL] = 2,
 };
 
 /* Return the oparg for the cache_offset and instruction index.
@@ -368,3 +370,68 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp
     return 0;
 }
 
+
+int
+_Py_Specialize_LoadGlobal(
+    PyObject *globals, PyObject *builtins,
+    _Py_CODEUNIT *instr, PyObject *name,
+    SpecializedCacheEntry *cache)
+{
+    _PyAdaptiveEntry *cache0 = &cache->adaptive;
+    _PyLoadGlobalCache *cache1 = &cache[-1].load_global;
+    assert(PyUnicode_CheckExact(name));
+    Py_hash_t hash = PyObject_Hash(name);
+    if (hash == -1) {
+        return -1;
+    }
+    if (!PyDict_CheckExact(globals)) {
+        goto fail;
+    }
+    if (((PyDictObject *)globals)->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
+        goto fail;
+    }
+    PyObject *value;
+    Py_ssize_t index = _Py_dict
ED2E
_lookup((PyDictObject *)globals, name, hash, &value);
+    assert (index != DKIX_ERROR);
+    if (index != DKIX_EMPTY) {
+        if (index != (uint16_t)index) {
+            goto fail;
+        }
+        uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
+        if (keys_version == 0) {
+            goto fail;
+        }
+        cache1->module_keys_version = keys_version;
+        cache0->index = index;
+        *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_MODULE, _Py_OPARG(*instr));
+        goto success;
+    }
+    if (!PyDict_CheckExact(builtins)) {
+        goto fail;
+    }
+    if (((PyDictObject *)builtins)->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
+        goto fail;
+    }
+    index = _Py_dict_lookup((PyDictObject *)builtins, name, hash, &value);
+    assert (index != DKIX_ERROR);
+    if (index != (uint16_t)index) {
+        goto fail;
+    }
+    cache1->module_keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
+    uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
+    if (keys_version == 0) {
+        goto fail;
+    }
+    cache1->builtin_keys_version = keys_version;
+    cache0->index = index;
+    *instr = _Py_MAKECODEUNIT(LOAD_GLOBAL_BUILTIN, _Py_OPARG(*instr));
+    goto success;
+fail:
+    assert(!PyErr_Occurred());
+    cache_backoff(cache0);
+    return 0;
+success:
+    assert(!PyErr_Occurred());
+    cache0->counter = saturating_start();
+    return 0;
+}