From 4e63a49e0bfd9d5acb855a90fcb2146a6a8e2775 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 16 Nov 2021 16:03:57 +0000 Subject: [PATCH 1/4] Specialize for calls to type and other builtin classes with 1 argument. --- Include/opcode.h | 48 +++++++++++++++++++++-------------------- Lib/opcode.py | 2 ++ Python/ceval.c | 32 +++++++++++++++++++++++++++ Python/opcode_targets.h | 24 ++++++++++----------- Python/specialize.c | 18 ++++++++++++++++ 5 files changed, 89 insertions(+), 35 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index f22f7e94f6190c..7dfa8f3df969be 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -139,29 +139,31 @@ extern "C" { #define CALL_FUNCTION_LEN 43 #define CALL_FUNCTION_ISINSTANCE 44 #define CALL_FUNCTION_PY_SIMPLE 45 -#define JUMP_ABSOLUTE_QUICK 46 -#define LOAD_ATTR_ADAPTIVE 47 -#define LOAD_ATTR_INSTANCE_VALUE 48 -#define LOAD_ATTR_WITH_HINT 55 -#define LOAD_ATTR_SLOT 56 -#define LOAD_ATTR_MODULE 57 -#define LOAD_GLOBAL_ADAPTIVE 58 -#define LOAD_GLOBAL_MODULE 59 -#define LOAD_GLOBAL_BUILTIN 62 -#define LOAD_METHOD_ADAPTIVE 63 -#define LOAD_METHOD_CACHED 64 -#define LOAD_METHOD_CLASS 65 -#define LOAD_METHOD_MODULE 66 -#define LOAD_METHOD_NO_DICT 67 -#define STORE_ATTR_ADAPTIVE 75 -#define STORE_ATTR_INSTANCE_VALUE 76 -#define STORE_ATTR_SLOT 77 -#define STORE_ATTR_WITH_HINT 78 -#define LOAD_FAST__LOAD_FAST 79 -#define STORE_FAST__LOAD_FAST 80 -#define LOAD_FAST__LOAD_CONST 81 -#define LOAD_CONST__LOAD_FAST 87 -#define STORE_FAST__STORE_FAST 88 +#define CALL_FUNCTION_TYPE_1 46 +#define CALL_FUNCTION_BUILTIN_CLASS_1 47 +#define JUMP_ABSOLUTE_QUICK 48 +#define LOAD_ATTR_ADAPTIVE 55 +#define LOAD_ATTR_INSTANCE_VALUE 56 +#define LOAD_ATTR_WITH_HINT 57 +#define LOAD_ATTR_SLOT 58 +#define LOAD_ATTR_MODULE 59 +#define LOAD_GLOBAL_ADAPTIVE 62 +#define LOAD_GLOBAL_MODULE 63 +#define LOAD_GLOBAL_BUILTIN 64 +#define LOAD_METHOD_ADAPTIVE 65 +#define LOAD_METHOD_CACHED 66 +#define LOAD_METHOD_CLASS 67 +#define LOAD_METHOD_MODULE 75 +#define LOAD_METHOD_NO_DICT 76 +#define STORE_ATTR_ADAPTIVE 77 +#define STORE_ATTR_INSTANCE_VALUE 78 +#define STORE_ATTR_SLOT 79 +#define STORE_ATTR_WITH_HINT 80 +#define LOAD_FAST__LOAD_FAST 81 +#define STORE_FAST__LOAD_FAST 87 +#define LOAD_FAST__LOAD_CONST 88 +#define LOAD_CONST__LOAD_FAST 123 +#define STORE_FAST__STORE_FAST 127 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index e5889bca4c161c..744c3c0ef436f8 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -252,6 +252,8 @@ def jabs_op(name, op): "CALL_FUNCTION_LEN", "CALL_FUNCTION_ISINSTANCE", "CALL_FUNCTION_PY_SIMPLE", + "CALL_FUNCTION_TYPE_1", + "CALL_FUNCTION_BUILTIN_CLASS_1", "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_INSTANCE_VALUE", diff --git a/Python/ceval.c b/Python/ceval.c index a8bbad33552e46..47232fd2b1d32e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4764,6 +4764,38 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr goto start_frame; } + TARGET(CALL_FUNCTION_TYPE_1) { + assert(GET_CACHE()->adaptive.original_oparg == 1); + PyObject *obj = TOP(); + PyObject *callable = SECOND(); + DEOPT_IF(callable != (PyObject *)&PyType_Type, CALL_FUNCTION); + PyObject *res = Py_NewRef(Py_TYPE(obj)); + STACK_SHRINK(1); + Py_DECREF(callable); + Py_DECREF(obj); + SET_TOP(res); + DISPATCH(); + } + + TARGET(CALL_FUNCTION_BUILTIN_CLASS_1) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + assert(cache0->original_oparg == 1); + PyObject *callable = SECOND(); + PyObject *arg = TOP(); + PyTypeObject *tp = Py_TYPE(callable); + DEOPT_IF(tp->tp_version_tag != cache0->version, CALL_FUNCTION); + STACK_SHRINK(1); + PyObject *res = tp->tp_vectorcall((PyObject *)tp, stack_pointer, 1, NULL); + SET_TOP(res); + Py_DECREF(tp); + Py_DECREF(arg); + if (res == NULL) { + goto error; + } + DISPATCH(); + } + TARGET(CALL_FUNCTION_BUILTIN_O) { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 872a6883119926..c88bf7429e8357 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -45,28 +45,28 @@ static void *opcode_targets[256] = { &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, &&TARGET_CALL_FUNCTION_PY_SIMPLE, + &&TARGET_CALL_FUNCTION_TYPE_1, + &&TARGET_CALL_FUNCTION_BUILTIN_CLASS_1, &&TARGET_JUMP_ABSOLUTE_QUICK, - &&TARGET_LOAD_ATTR_ADAPTIVE, - &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, - &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, + &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,20 +74,20 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -122,11 +122,11 @@ static void *opcode_targets[256] = { &&TARGET_COPY, &&TARGET_JUMP_IF_NOT_EXC_MATCH, &&TARGET_BINARY_OP, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, diff --git a/Python/specialize.c b/Python/specialize.c index b384675560be79..286c8396f315a0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -490,6 +490,7 @@ initial_counter_value(void) { #define SPEC_FAIL_PYCFUNCTION_NOARGS 16 #define SPEC_FAIL_BAD_CALL_FLAGS 17 #define SPEC_FAIL_CLASS 18 +#define SPEC_FAIL_PYTHON_CLASS 19 /* COMPARE_OP */ #define SPEC_FAIL_STRING_COMPARE 13 @@ -1289,6 +1290,23 @@ specialize_class_call( PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache) { + assert(PyType_Check(callable)); + PyTypeObject *tp = (PyTypeObject *)callable; + if (tp->tp_new == PyBaseObject_Type.tp_new) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_PYTHON_CLASS); + return -1; + } + if (nargs == 1) { + if (tp == &PyType_Type) { + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_TYPE_1, _Py_OPARG(*instr)); + return 0; + } + if ((tp->tp_flags & Py_TPFLAGS_IMMUTABLETYPE) && tp->tp_vectorcall != NULL) { + cache->adaptive.version = tp->tp_version_tag; + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_BUILTIN_CLASS_1, _Py_OPARG(*instr)); + return 0; + } + } SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS); return -1; } From 6b0738ca6097297de8a6fd8d7eb4595f6ee36d47 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 7 Dec 2021 11:04:36 +0000 Subject: [PATCH 2/4] Add news. --- .../Core and Builtins/2021-12-07-11-04-21.bpo-44525.6OWCgr.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-12-07-11-04-21.bpo-44525.6OWCgr.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-07-11-04-21.bpo-44525.6OWCgr.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-07-11-04-21.bpo-44525.6OWCgr.rst new file mode 100644 index 00000000000000..8e1533f477e3d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-07-11-04-21.bpo-44525.6OWCgr.rst @@ -0,0 +1,3 @@ +Specialize the CALL_FUNCTION instruction for calls to builtin types with a +single argument. Speeds up ``range(x)``, ``list(x)``, and specifically +``type(obj)``. From 82bc7084ee3e776880ad238e847d893d847973f1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 15 Dec 2021 12:19:30 +0000 Subject: [PATCH 3/4] Only specialize non-method calls to builtin classes. --- Python/ceval.c | 2 ++ Python/specialize.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 3416084bb207d1..45800a8aab55e3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4855,6 +4855,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(CALL_NO_KW_TYPE_1) { + assert(STACK_ADJUST_IS_RESET); assert(GET_CACHE()->adaptive.original_oparg == 1); PyObject *obj = TOP(); PyObject *callable = SECOND(); @@ -4868,6 +4869,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(CALL_NO_KW_BUILTIN_CLASS_1) { + assert(STACK_ADJUST_IS_RESET); SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; assert(cache0->original_oparg == 1); diff --git a/Python/specialize.c b/Python/specialize.c index 2b8484d83d5b08..5cf327df475c7c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -494,6 +494,7 @@ initial_counter_value(void) { #define SPEC_FAIL_PYTHON_CLASS 19 #define SPEC_FAIL_C_METHOD_CALL 20 #define SPEC_FAIL_METHDESCR_NON_METHOD 21 +#define SPEC_FAIL_METHOD_CALL_CLASS 22 /* COMPARE_OP */ #define SPEC_FAIL_STRING_COMPARE 13 @@ -1266,6 +1267,10 @@ specialize_class_call( { assert(PyType_Check(callable)); PyTypeObject *tp = (PyTypeObject *)callable; + if (_Py_OPCODE(instr[-1]) == PRECALL_METHOD) { + SPECIALIZATION_FAIL(CALL_NO_KW, SPEC_FAIL_METHOD_CALL_CLASS); + return -1; + } if (tp->tp_new == PyBaseObject_Type.tp_new) { SPECIALIZATION_FAIL(CALL_NO_KW, SPEC_FAIL_PYTHON_CLASS); return -1; From ac14f9b8cbfe935b70e30d0f1dc8073dbdf9cc31 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 15 Dec 2021 12:55:38 +0000 Subject: [PATCH 4/4] Fix CALL_NO_KW_BUILTIN_CLASS_1 --- Python/ceval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 45800a8aab55e3..b9444b22138f02 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4875,7 +4875,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cache0->original_oparg == 1); PyObject *callable = SECOND(); PyObject *arg = TOP(); - PyTypeObject *tp = Py_TYPE(callable); + DEOPT_IF(!PyType_Check(callable), CALL_NO_KW); + PyTypeObject *tp = (PyTypeObject *)callable; DEOPT_IF(tp->tp_version_tag != cache0->version, CALL_NO_KW); STACK_SHRINK(1); PyObject *res = tp->tp_vectorcall((PyObject *)tp, stack_pointer, 1, NULL);