8000 bpo-46841: Inline cache for `BINARY_SUBSCR`. by markshannon · Pull Request #31618 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-46841: Inline cache for BINARY_SUBSCR. #31618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 1, 2022
Prev Previous commit
Next Next commit
Merge branch 'main' into inline-cache-binary-subscr
  • Loading branch information
markshannon committed Feb 28, 2022
commit 8381b7ff668d1550f387c9238fc5df023ff36d88
11 changes: 0 additions & 11 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ typedef struct {
_Py_CODEUNIT func_version;
} _PyBinarySubscrCache;

#define INLINE_CACHE_ENTRIES_BINARY_OP \
(sizeof(_PyBinaryOpCache) / sizeof(_Py_CODEUNIT))

#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
(sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT))
Expand Down Expand Up @@ -127,15 +125,6 @@ _GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n)
return &last_cache_plus_one[-1-n].entry;
}

/* Returns a borrowed reference */
static inline PyObject*
_PyQuickenedGetObject(const _Py_CODEUNIT *first_instr, uint16_t index)
{
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
assert(&last_cache_plus_one->code[0] == first_instr);
return last_cache_plus_one[-1-index].entry.obj.obj;
}

/* Following two functions form a pair.
*
* oparg_from_offset_and_index() is used to compute the oparg
Expand Down
7 changes: 1 addition & 6 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear 10000 on GitHub.

4 changes: 4 additions & 0 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
/* not set */
co->co_weakreflist = NULL;
co->co_extra = NULL;
co->_co_obj_cache = NULL;

co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
co->co_quickened = NULL;
Expand Down Expand Up @@ -1369,6 +1370,9 @@ code_dealloc(PyCodeObject *co)
PyMem_Free(co->co_quickened);
_Py_QuickenedCount--;
}
if (co->_co_obj_cache) {
PyMem_Free(co->_co_obj_cache);
}
PyObject_Free(co);
}

Expand Down
2 changes: 1 addition & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2197,7 +2197,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
PyObject *sub = TOP();
PyObject *container = SECOND();
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
PyObject *cached = _PyQuickenedGetObject(first_instr, cache->object);
PyObject *cached = frame->f_code->_co_obj_cache[cache->object];
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
uint32_t type_version = read32(&cache->type_version);
Expand Down
44 changes: 28 additions & 16 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static uint8_t cache_requirements[256] = {

/* The number of object cache entries required for a "family" of instructions. */
static const uint8_t object_cache_requirements[256] = {
[BINARY_SUBSCR] = 5,
[BINARY_SUBSCR] = 1,
};

Py_ssize_t _Py_QuickenedCount = 0;
Expand Down Expand Up @@ -290,14 +290,6 @@ _Py_PrintSpecializationStats(int to_file)
#define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
#endif

static void
_PyQuickenedSetObject(const _Py_CODEUNIT *first_instr, uint16_t index, PyObject *obj)
{
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
assert(&last_cache_plus_one->code[0] == first_instr);
last_cache_plus_one[-1-index].entry.obj.obj = obj;
}

static SpecializedCacheOrInstruction *
allocate(int cache_count, int instruction_count)
{
Expand Down Expand Up @@ -364,17 +356,25 @@ entries_needed(const _Py_CODEUNIT *code, int len)
int previous_opcode = -1;
for (int i = 0; i < len; i++) {
uint8_t opcode = _Py_OPCODE(code[i]);
if (object_cache_requirements[opcode]) {
cache_offset += object_cache_requirements[opcode];
}
else if (previous_opcode != EXTENDED_ARG) {
if (previous_opcode != EXTENDED_ARG) {
oparg_from_instruction_and_update_offset(i, opcode, 0, &cache_offset);
}
previous_opcode = opcode;
}
return cache_offset + 1; // One extra for the count entry
}

static int
object_slots_needed(const _Py_CODEUNIT *code, int len)
{
int count = 0;
for (int i = 0; i < len; i++) {
uint8_t opcode = _Py_OPCODE(code[i]);
count += object_cache_requirements[opcode];
}
return count;
}

static inline _Py_CODEUNIT *
first_instruction(SpecializedCacheOrInstruction *quickened)
{
Expand All @@ -392,6 +392,7 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
{
_Py_CODEUNIT *instructions = first_instruction(quickened);
int cache_offset = 0;
int object_offset = 0;
int previous_opcode = -1;
int previous_oparg = 0;
for(int i = 0; i < len; i++) {
Expand All @@ -403,9 +404,11 @@ optimize(SpecializedCacheOrInstruction *quickened, int len)
instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg);
if (object_cache_requirements[opcode]) {
assert(_PyOpcode_InlineCacheEntries[opcode] >= 2);
instructions[i+2] = cache_offset;
cache_offset += object_cache_requirements[opcode];
instructions[i+2] = object_offset;
object_offset += object_cache_requirements[opcode];
}
previous_opcode = -1;
i += _PyOpcode_InlineCacheEntries[opcode];
}
else if (previous_opcode != EXTENDED_ARG) {
int new_oparg = oparg_from_instruction_and_update_offset(
Expand Down Expand Up @@ -484,9 +487,16 @@ _Py_Quicken(PyCodeObject *code) {
code->co_warmup = QUICKENING_WARMUP_COLDEST;
return 0;
}
int obj_count = object_slots_needed(code->co_firstinstr, instr_count);
code->_co_obj_cache = PyMem_Malloc(obj_count*sizeof(PyObject *));
code->_co_obj_cache_len = obj_count;
if (code->_co_obj_cache == NULL) {
return -1;
}
int entry_count = entries_needed(code->co_firstinstr, instr_count);
SpecializedCacheOrInstruction *quickened = allocate(entry_count, instr_count);
if (quickened == NULL) {
PyMem_Free(code->_co_obj_cache);
return -1;
}
_Py_CODEUNIT *new_instructions = first_instruction(quickened);
Expand Down Expand Up @@ -1406,7 +1416,9 @@ _Py_Specialize_BinarySubscr(
goto fail;
}
cache->func_version = version;
_PyQuickenedSetObject(code->co_firstinstr, cache->object, descriptor);
assert(code->_co_obj_cache != NULL);
assert(cache->object >= 0 && cache->object < code->_co_obj_cache_len);
code->_co_obj_cache[cache->object] = descriptor;
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
goto success;
}
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0