8000 bpo-46841: Inline cache for `BINARY_SUBSCR`. by markshannon · Pull Request #31618 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-46841: Inline cache for BINARY_SUBSCR. #31618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 1, 2022
2 changes: 2 additions & 0 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct PyCodeObject {
_Py_CODEUNIT *co_firstinstr; /* Pointer to first instruction, used for quickening.
Unlike the other "hot" fields, this one is
actually derived from co_code. */
PyObject **_co_obj_cache; /* Array of borrowed references to objects, for specialized code. */
PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
int co_flags; /* CO_..., see below */
int co_warmup; /* Warmup counter for quickening */
Expand Down Expand Up @@ -90,6 +91,7 @@ struct PyCodeObject {
int co_nplaincellvars; 8000 /* number of non-arg cell variables */
int co_ncellvars; /* total number of cell variables */
int co_nfreevars; /* number of free variables */
int _co_obj_cache_len; /* number of entries in _co_obj_cache */
// lazily-computed values
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
Expand Down
16 changes: 13 additions & 3 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,23 @@ typedef struct {
} _PyBinaryOpCache;

#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)

typedef struct {
_Py_CODEUNIT counter;
} _PyUnpackSequenceCache;


#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
(sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT))
CACHE_ENTRIES(_PyUnpackSequenceCache)

typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT object;
_Py_CODEUNIT type_version;
_Py_CODEUNIT _t1;
_Py_CODEUNIT func_version;
} _PyBinarySubscrCache;

#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)

/* Maximum size of code to quicken, in code units. */
#define MAX_SIZE_TO_QUICKEN 5000
Expand Down Expand Up @@ -316,7 +326,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec
extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, PyCodeObject *code);
extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, SpecializedCacheEntry *cache);
Expand Down
1 change: 1 addition & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.11a5 3480 (New CALL opcodes, second iteration)
# Python 3.11a5 3481 (Use inline cache for BINARY_OP)
# Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
# Python 3.11a5 3483 (Use inline caching for BINARY_SUBSCR)

# Python 3.12 will start with magic number 3500

Expand All @@ -403,7 +404,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3482).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3483).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

_PYCACHE = '__pycache__'
Expand Down
2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0):

def_op('UNARY_INVERT', 15)

def_op('BINARY_SUBSCR', 25)
def_op('BINARY_SUBSCR', 25, 5)

def_op('GET_LEN', 30)
def_op('MATCH_MAPPING', 31)
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_capi.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +33 6D40 5,7 @@ class C(): pass
*_, count = line.split(b' ')
count = int(count)
self.assertLessEqual(count, i*5)
self.assertGreaterEqual(count, i*5-1)
self.assertGreaterEqual(count, i*5-2)

def test_mapping_keys_values_items(self):
class Mapping1(dict):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use inline cache for :opcode:`BINARY_SUBSCR`.
8 changes: 8 additions & 0 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
/* not set */
co->co_weakreflist = NULL;
co->co_extra = NULL;
co->_co_obj_cache = NULL;

co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
co->co_quickened = NULL;
Expand Down Expand Up @@ -1369,6 +1370,9 @@ code_dealloc(PyCodeObject *co)
PyMem_Free(co->co_quickened);
_Py_QuickenedCount--;
}
if (co->_co_obj_cache) {
PyMem_Free(co->_co_obj_cache);
}
PyObject_Free(co);
}

Expand Down Expand Up @@ -1921,6 +1925,10 @@ _PyStaticCode_Dealloc(PyCodeObject *co)
co->co_quickened = NULL;
_Py_QuickenedCount--;
}
if (co->_co_obj_cache) {
PyMem_Free(co->_co_obj_cache);
co->_co_obj_cache = NULL;
}
co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
PyMem_Free(co->co_extra);
co->co_extra = NULL;
Expand Down
63 changes: 33 additions & 30 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 17 additions & 13 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2102,25 +2102,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
SET_TOP(res);
if (res == NULL)
goto error;
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
DISPATCH();
}

TARGET(BINARY_SUBSCR_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (cache->counter == 0) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, frame->f_code) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
cache->adaptive.counter--;
assert(cache->adaptive.original_oparg == 0);
/* No need to set oparg here; it isn't used by BINARY_SUBSCR */
cache->counter--;
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
}
Expand All @@ -2146,6 +2145,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(list);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
NOTRACE_DISPATCH();
}

Expand All @@ -2170,6 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(tuple);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
NOTRACE_DISPATCH();
}

Expand All @@ -2188,18 +2189,20 @@ _PyEval_EvalFrameDefault(P C850 yThreadState *tstate, _PyInterpreterFrame *frame, int
Py_DECREF(sub);
SET_TOP(res);
Py_DECREF(dict);
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
DISPATCH();
}

TARGET(BINARY_SUBSCR_GETITEM) {
PyObject *sub = TOP();
PyObject *container = SECOND();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyObjectCache *cache1 = &caches[-1].obj;
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
PyObject *cached = frame->f_code->_co_obj_cache[cache->object];
uint32_t type_version = read32(&cache->type_version);
DEOPT_IF(Py_TYPE(container)->tp_version_tag != type_version, BINARY_SUBSCR);
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
assert(code->co_argcount == 2);
Expand All @@ -2218,6 +2221,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
new_frame->localsplus[i] = NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
Expand Down Expand Up @@ -5602,7 +5606,7 @@ MISS_WITH_CACHE(PRECALL)
MISS_WITH_CACHE(CALL)
MISS_WITH_INLINE_CACHE(BINARY_OP)
MISS_WITH_CACHE(COMPARE_OP)
MISS_WITH_CACHE(BINARY_SUBSCR)
MISS_WITH_INLINE_CACHE(BINARY_SUBSCR)
MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE)
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)

Expand Down
Loading
0