Merge branch 'main' into inline-cache-binary-subscr

python · markshannon · Mar 1, 2022 · Feb 28, 2022 · Feb 28, 2022 · Feb 28, 2022
commit 8381b7ff668d1550f387c9238fc5df023ff36d88
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
@@ -98,8 +98,6 @@ typedef struct {
     _Py_CODEUNIT func_version;
 } _PyBinarySubscrCache;
 
-#define INLINE_CACHE_ENTRIES_BINARY_OP \
-    (sizeof(_PyBinaryOpCache) / sizeof(_Py_CODEUNIT))
 
 #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
     (sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT))
@@ -127,15 +125,6 @@ _GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n)
     return &last_cache_plus_one[-1-n].entry;
 }
 
-/* Returns a borrowed reference */
-static inline PyObject*
-_PyQuickenedGetObject(const _Py_CODEUNIT *first_instr, uint16_t index)
-{
-    SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
-    assert(&last_cache_plus_one->code[0] == first_instr);
-    return last_cache_plus_one[-1-index].entry.obj.obj;
-}
-
 /* Following two functions form a pair.
  *
  * oparg_from_offset_and_index() is used to compute the oparg

diff --git a/Include/opcode.h b/Include/opcode.h
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
@@ -348,6 +348,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
     /* not set */
     co->co_weakreflist = NULL;
     co->co_extra = NULL;
+    co->_co_obj_cache = NULL;
 
     co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
     co->co_quickened = NULL;
@@ -1369,6 +1370,9 @@ code_dealloc(PyCodeObject *co)
         PyMem_Free(co->co_quickened);
         _Py_QuickenedCount--;
     }
+    if (co->_co_obj_cache) {
+        PyMem_Free(co->_co_obj_cache);
+    }
     PyObject_Free(co);
 }
 

@@ -2197,7 +2197,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
             PyObject *sub = TOP();
             PyObject *container = SECOND();
             _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
-            PyObject *cached = _PyQuickenedGetObject(first_instr, cache->object);
+            PyObject *cached = frame->f_code->_co_obj_cache[cache->object];
             assert(PyFunction_Check(cached));
             PyFunctionObject *getitem = (PyFunctionObject *)cached;
             uint32_t type_version = read32(&cache->type_version);

diff --git a/Python/specialize.c b/Python/specialize.c
@@ -69,7 +69,7 @@ static uint8_t cache_requirements[256] = {
 
 /* The number of object cache entries required for a "family" of instructions. */
 static const uint8_t object_cache_requirements[256] = {
-    [BINARY_SUBSCR] = 5,
+    [BINARY_SUBSCR] = 1,
 };
 
 Py_ssize_t _Py_QuickenedCount = 0;
@@ -290,14 +290,6 @@ _Py_PrintSpecializationStats(int to_file)
 #define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
 #endif
 
-static void
-_PyQuickenedSetObject(const _Py_CODEUNIT *first_instr, uint16_t index, PyObject *obj)
-{
-    SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
-    assert(&last_cache_plus_one->code[0] == first_instr);
-    last_cache_plus_one[-1-index].entry.obj.obj = obj;
-}
-
 static SpecializedCacheOrInstruction *
 allocate(int cache_count, int instruction_count)
 {
@@ -364,17 +356,25 @@ entries_needed(const _Py_CODEUNIT *code, int len)
     int previous_opcode = -1;
     for (int i = 0; i < len; i++) {
         uint8_t opcode = _Py_OPCODE(code[i]);
-        if (object_cache_requirements[opcode]) {
-            cache_offset += object_cache_requirements[opcode];
-        }
-        else if (previous_opcode != EXTENDED_ARG) {
+        if (previous_opcode != EXTENDED_ARG) {
             oparg_from_instruction_and_update_offset(i, opcode, 0, &cache_offset);
         }
         previous_opcode = opcode;
     }
     return cache_offset + 1;   // One extra for the count entry
 }
 
+static int
+object_slots_needed(const _Py_CODEUNIT *code, int len)
+{
+    int count = 0;
+    for (int i = 0; i < len; i++) {
+        uint8_t opcode = _Py_OPCODE(code[i]);
+        count += object_cache_requirements[opcode];
+    }
+    return count;
+}
+
 static inline _Py_CODEUNIT *
 first_instruction(SpecializedCacheOrInstruction *quickened)
 {
@@ -392,6 +392,7 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
 {
     _Py_CODEUNIT *instructions = first_instruction(quickened);
     int cache_offset = 0;
+    int object_offset = 0;
     int previous_opcode = -1;
     int previous_oparg = 0;
     for(int i = 0; i < len; i++) {
@@ -403,9 +404,11 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
                 instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
                 if (object_cache_requirements[opcode]) {
                     assert(_PyOpcode_InlineCacheEntries[opcode] >= 2);
-                    instructions[i+2] = cache_offset;
-                    cache_offset += object_cache_requirements[opcode];
+                    instructions[i+2] = object_offset;
+                    object_offset += object_cache_requirements[opcode];
                 }
+                previous_opcode = -1;
+                i += _PyOpcode_InlineCacheEntries[opcode];
             }
             else if (previous_opcode != EXTENDED_ARG) {
                 int new_oparg = oparg_from_instruction_and_update_offset(
@@ -484,9 +487,16 @@ _Py_Quicken(PyCodeObject *code) {
         code->co_warmup = QUICKENING_WARMUP_COLDEST;
         return 0;
     }
+    int obj_count = object_slots_needed(code->co_firstinstr, instr_count);
+    code->_co_obj_cache = PyMem_Malloc(obj_count*sizeof(PyObject *));
+    code->_co_obj_cache_len = obj_count;
+    if (code->_co_obj_cache == NULL) {
+        return -1;
+    }
     int entry_count = entries_needed(code->co_firstinstr, instr_count);
     SpecializedCacheOrInstruction *quickened = allocate(entry_count, instr_count);
     if (quickened == NULL) {
+        PyMem_Free(code->_co_obj_cache);
         return -1;
     }
     _Py_CODEUNIT *new_instructions = first_instruction(quickened);
@@ -1406,7 +1416,9 @@ _Py_Specialize_BinarySubscr(
             goto fail;
         }
         cache->func_version = version;
-        _PyQuickenedSetObject(code->co_firstinstr, cache->object, descriptor);
+        assert(code->_co_obj_cache != NULL);
+        assert(cache->object >= 0 && cache->object < code->_co_obj_cache_len);
+        code->_co_obj_cache[cache->object] =  descriptor;
         *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
         goto success;
     }