8000 bpo-46841: Inline cache for `BINARY_SUBSCR`. by markshannon · Pull Request #31618 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-46841: Inline cache for BINARY_SUBSCR. #31618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 1, 2022
Remove per-code-object-cache and add small per-heap-type cache.
  • Loading branch information
markshannon committed Mar 1, 2022
commit 05787b3cd7311c13440057e656d04ee6da44ac79
1 change: 0 additions & 1 deletion Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ struct PyCodeObject {
_Py_CODEUNIT *co_firstinstr; /* Pointer to first instruction, used for quickening.
Unlike the other "hot" fields, this one is
actually derived from co_code. */
PyObject **_co_obj_cache; /* Array of borrowed references to objects, for specialized code. */
PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
int co_flags; /* CO_..., see below */
int co_warmup; /* Warmup counter for quickening */
Expand Down
8 changes: 8 additions & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,13 @@ struct _typeobject {
vectorcallfunc tp_vectorcall;
};

/* This struct is used by the specializer
* It should should be treated as opaque blob
* by any other code */
struct _specialization_cache {
PyObject *getitem;
};

/* The *real* layout of a type object when allocated on the heap */
typedef struct _heaptypeobject {
/* Note: there's a dependency on the order of these members
Expand All @@ -247,6 +254,7 @@ typedef struct _heaptypeobject {
struct _dictkeysobject *ht_cached_keys;
PyObject *ht_module;
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
struct _specialization_cache _spec_cache; // Internal -- DO NOT USE.
/* here are optional user slots, followed by the members. */
} PyHeapTypeObject;

Expand Down
1 change: 0 additions & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ typedef struct {

typedef struct {
_Py_CODEUNIT counter;
_Py_CODEUNIT object;
_Py_CODEUNIT type_version;
_Py_CODEUNIT _t1;
_Py_CODEUNIT func_version;
Expand Down
2 changes: 1 addition & 1 deletion Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0):

def_op('UNARY_INVERT', 15)

def_op('BINARY_SUBSCR', 25, 5)
def_op('BINARY_SUBSCR', 25, 4)

def_op('GET_LEN', 30)
def_op('MATCH_MAPPING', 31)
Expand Down
4 changes: 3 additions & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,9 @@ def delx(self): del self.__x
'3P' # PyMappingMethods
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P')
'6P'
'1P' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
Expand Down
8 changes: 0 additions & 8 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
/* not set */
co->co_weakreflist = NULL;
co->co_extra = NULL;
co->_co_obj_cache = NULL;

co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
co->co_quickened = NULL;
Expand Down Expand Up @@ -1370,9 +1369,6 @@ code_dealloc(PyCodeObject *co)
PyMem_Free(co->co_quickened);
_Py_QuickenedCount--;
}
if (co->_co_obj_cache) {
PyMem_Free(co->_co_obj_cache);
}
PyObject_Free(co);
}

Expand Down Expand Up @@ -1925,10 +1921,6 @@ _PyStaticCode_Dealloc(PyCodeObject *co)
co->co_quickened = NULL;
_Py_QuickenedCount--;
}
if (co->_co_obj_cache) {
PyMem_Free(co->_co_obj_cache);
co->_co_obj_cache = NULL;
}
co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
PyMem_Free(co->co_extra);
co->co_extra = NULL;
Expand Down
65 changes: 32 additions & 33 deletions Programs/test_frozenmain.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2197,9 +2197,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
PyObject *sub = TOP();
PyObject *container = SECOND();
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
PyObject *cached = frame->f_code->_co_obj_cache[cache->object];
uint32_t type_version = read32(&cache->type_version);
DEOPT_IF(Py_TYPE(container)->tp_version_tag != type_version, BINARY_SUBSCR);
PyTypeObject *tp = Py_TYPE(container);
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR);
Expand Down
25 changes: 6 additions & 19 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,17 +364,6 @@ entries_needed(const _Py_CODEUNIT *code, int len)
return cache_offset + 1; // One extra for the count entry
}

static int
object_slots_needed(const _Py_CODEUNIT *code, int len)
{
int count = 0;
for (int i = 0; i < len; i++) {
uint8_t opcode = _Py_OPCODE(code[i]);
count += object_cache_requirements[opcode];
}
return count;
}

static inline _Py_CODEUNIT *
first_instruction(SpecializedCacheOrInstruction *quickened)
{
Expand Down Expand Up @@ -487,15 +476,9 @@ _Py_Quicken(PyCodeObject *code) {
code->co_warmup = QUICKENING_WARMUP_COLDEST;
return 0;
}
int obj_count = object_slots_needed(code->co_firstinstr, instr_count);
code->_co_obj_cache = PyMem_Malloc(obj_count*sizeof(PyObject *));
if (code->_co_obj_cache == NULL) {
return -1;
}
int entry_count = entries_needed(code->co_firstinstr, instr_count);
SpecializedCacheOrInstruction *quickened = allocate(entry_count, instr_count);
if (quickened == NULL) {
PyMem_Free(code->_co_obj_cache);
return -1;
}
_Py_CODEUNIT *new_instructions = first_instruction(quickened);
Expand Down Expand Up @@ -583,6 +566,7 @@ initial_counter_value(void) {
#define SPEC_FAIL_SUBSCR_PY_SIMPLE 20
#define SPEC_FAIL_SUBSCR_PY_OTHER 21
#define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22
#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23

/* Binary op */

Expand Down Expand Up @@ -1396,6 +1380,10 @@ _Py_Specialize_BinarySubscr(
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
goto fail;
}
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *fcode = (PyCodeObject *)func->func_code;
int kind = function_kind(fcode);
Expand All @@ -1415,8 +1403,7 @@ _Py_Specialize_BinarySubscr(
goto fail;
}
cache->func_version = version;
assert(code->_co_obj_cache != NULL);
code->_co_obj_cache[cache->object] = descriptor;
((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor;
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
goto success;
}
Expand Down
0