8000 bpo-46841: Use inline caching for calls by brandtbucher · Pull Request #31709 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-46841: Use inline caching for calls #31709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 7, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use a per-interpreter callable cache
  • Loading branch information
brandtbucher committed Mar 6, 2022
commit c5d6922739ce4545bf837be08c2e5947c42c8199
15 changes: 9 additions & 6 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ extern "C" {


// Inline caches. If you change the number of cache entries for an instruction,
// you must *also* bump the magic number in Lib/importlib/_bootstap_external.py!
// you must *also* update the number of cache entries in Lib/opcode.py and bump
// the magic number in Lib/importlib/_bootstap_external.py!

#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))

Expand Down Expand Up @@ -113,9 +114,12 @@ _Py_IncrementCountAndMaybeQuicken(PyCodeObject *code)

extern Py_ssize_t _Py_QuickenedCount;

extern PyObject *builtin_isinstance;
extern PyObject *builtin_len;
extern PyObject *builtin_list_append;
// Borrowed references to common callables:
struct callable_cache {
PyObject *isinstance;
PyObject *len;
PyObject *list_append;
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the existence of PyList_Type as part of the API means that list.append must be per-process unique.
In other words, list_append could be static.

I'm happy to leave it as is for now, though. We should look to make the whole struct static, although the mutability of builtin functions makes that tricky for isinstance and len.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe each interpreter has its own builtins module (check out _PyBuiltin_Init), so making this static could be tricky. As you said, though: probably worth looking into in the future.


/* "Locals plus" for a code object is the set of locals + cell vars +
* free vars. This relates to variable names as well as offsets into
Expand Down Expand Up @@ -258,8 +262,7 @@ extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CO
extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
int nargs, PyObject *kwnames);
extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr,
int nargs, PyObject *kwnames,
PyObject *builtins, int oparg);
int nargs, PyObject *kwnames, int oparg);
extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
int oparg);
extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(inf)
STRUCT_FOR_ID(intersection)
STRUCT_FOR_ID(isatty)
STRUCT_FOR_ID(isinstance)
STRUCT_FOR_ID(items)
STRUCT_FOR_ID(iter)
STRUCT_FOR_ID(join)
Expand All @@ -278,6 +279,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(last_type)
STRUCT_FOR_ID(last_value)
STRUCT_FOR_ID(latin1)
STRUCT_FOR_ID(len)
STRUCT_FOR_ID(line)
STRUCT_FOR_ID(lineno)
STRUCT_FOR_ID(listcomp)
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ extern "C" {

#include "pycore_atomic.h" // _Py_atomic_address
#include "pycore_ast_state.h" // struct ast_state
#include "pycore_code.h" // struct callable_cache
#include "pycore_context.h" // struct _Py_context_state
#include "pycore_dict.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
Expand Down Expand Up @@ -176,6 +177,7 @@ struct _is {

struct ast_state ast;
struct type_cache type_cache;
struct callable_cache callable_cache;

/* The following fields are here to avoid allocation during init.
The data is exposed through PyInterpreterState pointer fields.
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ extern "C" {
INIT_ID(inf), \
INIT_ID(intersection), \
INIT_ID(isatty), \
INIT_ID(isinstance), \
INIT_ID(items), \
INIT_ID(iter), \
INIT_ID(join), \
Expand All @@ -893,6 +894,7 @@ extern "C" {
INIT_ID(last_type), \
INIT_ID(last_value), \
INIT_ID(latin1), \
INIT_ID(len), \
INIT_ID(line), \
INIT_ID(lineno), \
INIT_ID(listcomp), \
Expand Down
22 changes: 7 additions & 15 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -4569,14 +4569,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
/* Move ownership of reference from stack to call_shape
* and make sure that NULL is cleared from stack */
PyObject *function = PEEK(nargs + 1);
#ifdef Py_STATS
extern int _PySpecialization_ClassifyCallable(PyObject *);
SpecializationStats *stats =
&_py_stats.opcode_stats[PRECALL].specialization;
stats->failure++;
int kind = _PySpecialization_ClassifyCallable(function);
stats->failure_kinds[kind]++;
#endif
if (!is_method && Py_TYPE(function) == &PyMethod_Type) {
PyObject *meth = ((PyMethodObject *)function)->im_func;
PyObject *self = ((PyMethodObject *)function)->im_self;
Expand Down Expand Up @@ -4694,8 +4686,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
int nargs = oparg + is_meth;
PyObject *callable = PEEK(nargs + 1);
int err = _Py_Specialize_Precall(callable, next_instr, nargs,
call_shape.kwnames,
BUILTINS(), oparg);
call_shape.kwnames, oparg);
if (err < 0) {
goto error;
}
Expand Down Expand Up @@ -5011,7 +5002,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
int total_args = oparg + is_meth;
DEOPT_IF(total_args != 1, PRECALL);
PyObject *callable = PEEK(total_args + 1);
DEOPT_IF(callable != builtin_len, PRECALL);
PyInterpreterState *interp = _PyInterpreterState_GET();
DEOPT_IF(callable != interp->callable_cache.len, PRECALL);
STAT_INC(PRECALL, hit);
SKIP_CALL();
PyObject *arg = TOP();
Expand Down Expand Up @@ -5040,8 +5032,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
int total_args = oparg + is_meth;
PyObject *callable = PEEK(total_args + 1);
DEOPT_IF(total_args != 2, PRECALL);

DEOPT_IF(callable != builtin_isinstance, PRECALL);
PyInterpreterState *interp = _PyInterpreterState_GET();
DEOPT_IF(callable != interp->callable_cache.isinstance, PRECALL);
STAT_INC(PRECALL, hit);
SKIP_CALL();
PyObject *cls = POP();
Expand Down Expand Up @@ -5070,8 +5062,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(call_shape.kwnames == NULL);
assert(oparg == 1);
PyObject *callable = PEEK(3);
assert(builtin_list_append);
DEOPT_IF(callable != builtin_list_append, PRECALL);
PyInterpreterState *interp = _PyInterpreterState_GET();
DEOPT_IF(callable != interp->callable_cache.list_append, PRECALL);
PyObject *list = SECOND();
DEOPT_IF(!PyList_Check(list), PRECALL);
STAT_INC(PRECALL, hit);
Expand Down
10 changes: 10 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,16 @@ pycore_init_builtins(PyThreadState *tstate)
Py_INCREF(builtins_dict);
interp->builtins = builtins_dict;

PyObject *isinstance = PyDict_GetItem(builtins_dict, &_Py_ID(isinstance));
assert(isinstance);
interp->callable_cache.isinstance = isinstance;
PyObject *len = PyDict_GetItem(builti F438 ns_dict, &_Py_ID(len));
assert(len);
interp->callable_cache.len = len;
PyObject *list_append = _PyType_Lookup(&PyList_Type, &_Py_ID(append));
assert(list_append);
interp->callable_cache.list_append = list_append;

if (_PyBuiltins_AddExceptions(bimod) < 0) {
return _PyStatus_ERR("failed to add exceptions to builtins");
}
Expand Down
32 changes: 9 additions & 23 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -1577,7 +1577,7 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs,

static int
specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, PyObject *builtins)
PyObject *kwnames)
{
assert(_Py_OPCODE(*instr) == PRECALL_ADAPTIVE);
if (PyCFunction_GET_FUNCTION(callable) == NULL) {
Expand All @@ -1596,12 +1596,8 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
return 1;
}
/* len(o) */
if (builtin_len == NULL) {
// Use builtins_copy to protect against mutated builtins:
builtin_len = PyDict_GetItemString(
_PyInterpreterState_GET()->builtins_copy, "len");
}
if (callable == builtin_len) {
PyInterpreterState *interp = _PyInterpreterState_GET();
if (callable == interp->callable_cache.len) {
*instr = _Py_MAKECODEUNIT(PRECALL_NO_KW_LEN,
_Py_OPARG(*instr));
return 0;
Expand All @@ -1617,12 +1613,8 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
}
if (nargs == 2) {
/* isinstance(o1, o2) */
if (builtin_isinstance == NULL) {
// Use builtins_copy to protect against mutated builtins:
builtin_isinstance = PyDict_GetItemString(
_PyInterpreterState_GET()->builtins_copy, "isinstance");
}
if (callable == builtin_isinstance) {
PyInterpreterState *interp = _PyInterpreterState_GET();
if (callable == interp->callable_cache.isinstance) {
*instr = _Py_MAKECODEUNIT(PRECALL_NO_KW_ISINSTANCE,
_Py_OPARG(*instr));
return 0;
Expand Down Expand Up @@ -1688,14 +1680,14 @@ call_fail_kind(PyObject *callable)

int
_Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
PyObject *kwnames, PyObject *builtins, int oparg)
PyObject *kwnames, int oparg)
{
assert(_PyOpcode_InlineCacheEntries[PRECALL] ==
INLINE_CACHE_ENTRIES_PRECALL);
_PyPrecallCache *cache = (_PyPrecallCache *)(instr + 1);
int fail;
if (PyCFunction_CheckExact(callable)) {
fail = specialize_c_call(callable, instr, nargs, kwnames, builtins);
fail = specialize_c_call(callable, instr, nargs, kwnames);
}
else if (PyFunction_Check(callable)) {
*instr = _Py_MAKECODEUNIT(PRECALL_PYFUNC, _Py_OPARG(*instr));
Expand All @@ -1717,12 +1709,12 @@ _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
fail = -1;
}
if (fail) {
STAT_INC(CALL, failure);
STAT_INC(PRECALL, failure);
assert(!PyErr_Occurred());
cache->counter = ADAPTIVE_CACHE_BACKOFF;
}
else {
STAT_INC(CALL, success);
STAT_INC(PRECALL, success);
assert(!PyErr_Occurred());
cache->counter = initial_counter_value();
}
Expand Down Expand Up @@ -2132,10 +2124,4 @@ int
return SPEC_FAIL_OTHER;
}

int
_PySpecialization_ClassifyCallable(PyObject *callable)
{
return call_fail_kind(callable);
}

#endif
0