From 158010e1f2de24fba72ac97a31c09e15b016ec5a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 8 Dec 2021 14:20:52 +0000 Subject: [PATCH 1/3] Spacialize calls to normal Python classes. --- Include/internal/pycore_ceval.h | 2 + Include/internal/pycore_code.h | 2 +- Include/internal/pycore_object.h | 2 + Include/opcode.h | 94 ++++++++++---------- Lib/importlib/_bootstrap_external.py | 3 +- Lib/opcode.py | 2 + Objects/dictobject.c | 8 +- Objects/typeobject.c | 20 +++++ Python/ceval.c | 90 +++++++++++++++++++ Python/compile.c | 3 + Python/opcode_targets.h | 32 +++---- Python/specialize.c | 127 +++++++++++++++++++++++++++ 12 files changed, 317 insertions(+), 68 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 20508d4a687475..cedbd59952bcaf 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -115,6 +115,8 @@ struct _interpreter_frame *_PyEval_GetFrame(void); PyObject *_Py_MakeCoro(PyFunctionObject *func); +extern PyFunctionObject *_Py_InitCleanupFunc; + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 496d52f580f1f3..43b3cd2d5c39b2 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -288,7 +288,7 @@ void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, #define COLLECT_SPECIALIZATION_STATS_DETAILED PRINT_SPECIALIZATION_STATS_DETAILED #endif -#define SPECIALIZATION_FAILURE_KINDS 20 +#define SPECIALIZATION_FAILURE_KINDS 30 #if COLLECT_SPECIALIZATION_STATS diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 9041a4dc8a3ce5..c5981a24de7ef2 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -188,8 +188,10 @@ extern int _Py_CheckSlotResult( #define _PyType_IsReady(type) ((type)->tp_dict != NULL) extern PyObject* _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems); +PyObject *_PyType_NewManagedObject(PyTypeObject *type); extern int _PyObject_InitializeDict(PyObject *obj); +int _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp); extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyDictValues *values, PyObject *name, PyObject *value); PyObject * _PyObject_GetInstanceAttribute(PyObject *obj, PyDictValues *values, diff --git a/Include/opcode.h b/Include/opcode.h index f22f7e94f6190c..40d05af9760abe 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -18,6 +18,7 @@ extern "C" { #define UNARY_NEGATIVE 11 #define UNARY_NOT 12 #define UNARY_INVERT 15 +#define EXIT_INIT_CHECK 16 #define BINARY_SUBSCR 25 #define GET_LEN 30 #define MATCH_MAPPING 31 @@ -116,52 +117,53 @@ extern "C" { #define BINARY_OP_ADD_INT 8 #define BINARY_OP_ADD_FLOAT 13 #define BINARY_OP_ADD_UNICODE 14 -#define BINARY_OP_INPLACE_ADD_UNICODE 16 -#define BINARY_OP_MULTIPLY_INT 17 -#define BINARY_OP_MULTIPLY_FLOAT 18 -#define BINARY_OP_SUBTRACT_INT 19 -#define BINARY_OP_SUBTRACT_FLOAT 20 -#define COMPARE_OP_ADAPTIVE 21 -#define COMPARE_OP_FLOAT_JUMP 22 -#define COMPARE_OP_INT_JUMP 23 -#define COMPARE_OP_STR_JUMP 24 -#define BINARY_SUBSCR_ADAPTIVE 26 -#define BINARY_SUBSCR_GETITEM 27 -#define BINARY_SUBSCR_LIST_INT 28 -#define BINARY_SUBSCR_TUPLE_INT 29 -#define BINARY_SUBSCR_DICT 34 -#define STORE_SUBSCR_ADAPTIVE 36 -#define STORE_SUBSCR_LIST_INT 38 -#define STORE_SUBSCR_DICT 39 -#define CALL_FUNCTION_ADAPTIVE 40 -#define CALL_FUNCTION_BUILTIN_O 41 -#define CALL_FUNCTION_BUILTIN_FAST 42 -#define CALL_FUNCTION_LEN 43 -#define CALL_FUNCTION_ISINSTANCE 44 -#define CALL_FUNCTION_PY_SIMPLE 45 -#define JUMP_ABSOLUTE_QUICK 46 -#define LOAD_ATTR_ADAPTIVE 47 -#define LOAD_ATTR_INSTANCE_VALUE 48 -#define LOAD_ATTR_WITH_HINT 55 -#define LOAD_ATTR_SLOT 56 -#define LOAD_ATTR_MODULE 57 -#define LOAD_GLOBAL_ADAPTIVE 58 -#define LOAD_GLOBAL_MODULE 59 -#define LOAD_GLOBAL_BUILTIN 62 -#define LOAD_METHOD_ADAPTIVE 63 -#define LOAD_METHOD_CACHED 64 -#define LOAD_METHOD_CLASS 65 -#define LOAD_METHOD_MODULE 66 -#define LOAD_METHOD_NO_DICT 67 -#define STORE_ATTR_ADAPTIVE 75 -#define STORE_ATTR_INSTANCE_VALUE 76 -#define STORE_ATTR_SLOT 77 -#define STORE_ATTR_WITH_HINT 78 -#define LOAD_FAST__LOAD_FAST 79 -#define STORE_FAST__LOAD_FAST 80 -#define LOAD_FAST__LOAD_CONST 81 -#define LOAD_CONST__LOAD_FAST 87 -#define STORE_FAST__STORE_FAST 88 +#define BINARY_OP_INPLACE_ADD_UNICODE 17 +#define BINARY_OP_MULTIPLY_INT 18 +#define BINARY_OP_MULTIPLY_FLOAT 19 +#define BINARY_OP_SUBTRACT_INT 20 +#define BINARY_OP_SUBTRACT_FLOAT 21 +#define COMPARE_OP_ADAPTIVE 22 +#define COMPARE_OP_FLOAT_JUMP 23 +#define COMPARE_OP_INT_JUMP 24 +#define COMPARE_OP_STR_JUMP 26 +#define BINARY_SUBSCR_ADAPTIVE 27 +#define BINARY_SUBSCR_GETITEM 28 +#define BINARY_SUBSCR_LIST_INT 29 +#define BINARY_SUBSCR_TUPLE_INT 34 +#define BINARY_SUBSCR_DICT 36 +#define STORE_SUBSCR_ADAPTIVE 38 +#define STORE_SUBSCR_LIST_INT 39 +#define STORE_SUBSCR_DICT 40 +#define CALL_FUNCTION_ADAPTIVE 41 +#define CALL_FUNCTION_BUILTIN_O 42 +#define CALL_FUNCTION_BUILTIN_FAST 43 +#define CALL_FUNCTION_LEN 44 +#define CALL_FUNCTION_ISINSTANCE 45 +#define CALL_FUNCTION_PY_SIMPLE 46 +#define CALL_FUNCTION_ALLOC_AND_ENTER_INIT 47 +#define JUMP_ABSOLUTE_QUICK 48 +#define LOAD_ATTR_ADAPTIVE 55 +#define LOAD_ATTR_INSTANCE_VALUE 56 +#define LOAD_ATTR_WITH_HINT 57 +#define LOAD_ATTR_SLOT 58 +#define LOAD_ATTR_MODULE 59 +#define LOAD_GLOBAL_ADAPTIVE 62 +#define LOAD_GLOBAL_MODULE 63 +#define LOAD_GLOBAL_BUILTIN 64 +#define LOAD_METHOD_ADAPTIVE 65 +#define LOAD_METHOD_CACHED 66 +#define LOAD_METHOD_CLASS 67 +#define LOAD_METHOD_MODULE 75 +#define LOAD_METHOD_NO_DICT 76 +#define STORE_ATTR_ADAPTIVE 77 +#define STORE_ATTR_INSTANCE_VALUE 78 +#define STORE_ATTR_SLOT 79 +#define STORE_ATTR_WITH_HINT 80 +#define LOAD_FAST__LOAD_FAST 81 +#define STORE_FAST__LOAD_FAST 87 +#define LOAD_FAST__LOAD_CONST 88 +#define LOAD_CONST__LOAD_FAST 123 +#define STORE_FAST__STORE_FAST 127 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 6970e9f0a94d49..790c82d716dc53 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -371,6 +371,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.11a3 3464 (bpo-45636: Merge numeric BINARY_*/INPLACE_* into # BINARY_OP) # Python 3.11a3 3465 (Add COPY_FREE_VARS opcode) +# Python 3.11a3 3469 (Add EXIT_INIT_CHECK opcode) # # MAGIC must change whenever the bytecode emitted by the compiler may no @@ -380,7 +381,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3465).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3469).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' diff --git a/Lib/opcode.py b/Lib/opcode.py index e5889bca4c161c..7b6d6994de5729 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -67,6 +67,7 @@ def jabs_op(name, op): def_op('UNARY_NOT', 12) def_op('UNARY_INVERT', 15) +def_op('EXIT_INIT_CHECK', 16) def_op('BINARY_SUBSCR', 25) @@ -252,6 +253,7 @@ def jabs_op(name, op): "CALL_FUNCTION_LEN", "CALL_FUNCTION_ISINSTANCE", "CALL_FUNCTION_PY_SIMPLE", + "CALL_FUNCTION_ALLOC_AND_ENTER_INIT", "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_INSTANCE_VALUE", diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 7ce4b9069f77ef..a2d16df94bcfdd 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4957,11 +4957,10 @@ _PyDict_NewKeysForClass(void) #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys) -static int -init_inline_values(PyObject *obj, PyTypeObject *tp) +int +_PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp) { assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); - // assert(type->tp_dictoffset > 0); -- TO DO Update this assert. assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); PyDictKeysObject *keys = CACHED_KEYS(tp); assert(keys != NULL); @@ -4972,6 +4971,7 @@ init_inline_values(PyObject *obj, PyTypeObject *tp) assert(size > 0); PyDictValues *values = new_values(size); if (values == NULL) { + *_PyObject_ValuesPointer(obj) = NULL; PyErr_NoMemory(); return -1; } @@ -4991,7 +4991,7 @@ _PyObject_InitializeDict(PyObject *obj) return 0; } if (tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - return init_inline_values(obj, tp); + return _PyObject_InitInlineValues(obj, tp); } PyObject *dict; if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2fd93b61c0b2b0..1a1972bfc7a8c9 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1139,6 +1139,26 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) return obj; } +PyObject * +_PyType_NewManagedObject(PyTypeObject *type) +{ + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + assert(_PyType_IS_GC(type)); + assert(type->tp_new == PyBaseObject_Type.tp_new); + assert(type->tp_alloc == PyType_GenericAlloc); + assert(type->tp_itemsize == 0); + PyObject *obj = PyType_GenericAlloc(type, 0); + if (obj == NULL) { + return PyErr_NoMemory(); + } + *_PyObject_ManagedDictPointer(obj) = NULL; + if (_PyObject_InitInlineValues(obj, type)) { + Py_DECREF(obj); + return NULL; + } + return obj; +} + PyObject * _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems) { diff --git a/Python/ceval.c b/Python/ceval.c index 4f5ccf51e9cfe7..ffa987cf1c3bcb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -102,6 +102,8 @@ static InterpreterFrame * _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, PyObject* const* args, size_t argcount, PyObject *kwnames); +static InterpreterFrame * +_PyEvalFramePush(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals); static void _PyEvalFrameClearAndPop(PyThreadState *tstate, InterpreterFrame *frame); @@ -2720,6 +2722,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(EXIT_INIT_CHECK) { + assert(STACK_LEVEL() == 2); + PyObject *should_be_none = TOP(); + if (should_be_none != Py_None) { + PyErr_Format(PyExc_TypeError, + "__init__() should return None, not '%.200s'", + Py_TYPE(should_be_none)->tp_name); + goto error; + } + Py_DECREF(Py_None); + STACK_SHRINK(1); + DISPATCH(); + } + TARGET(POP_EXCEPT) { PyObject *type, *value, *traceback; _PyErr_StackItem *exc_info; @@ -4741,6 +4757,63 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr goto start_frame; } + TARGET(CALL_FUNCTION_ALLOC_AND_ENTER_INIT) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int argcount = cache0->original_oparg; + _PyObjectCache *cache1 = &caches[-1].obj; + PyObject *callable = PEEK(argcount+1); + DEOPT_IF(!PyType_Check(callable), CALL_FUNCTION); + PyTypeObject *tp = (PyTypeObject *)callable; + DEOPT_IF(tp->tp_version_tag != cache0->version, CALL_FUNCTION); + PyFunctionObject *init = (PyFunctionObject *)cache1->obj; + PyCodeObject *code = (PyCodeObject *)init->func_code; + DEOPT_IF(code->co_argcount != argcount+1, CALL_FUNCTION); + PyObject *self = _PyType_NewManagedObject(tp); + if (self == NULL) { + goto error; + } + PEEK(argcount+1) = self; + Py_DECREF(tp); + assert(_Py_InitCleanupFunc != NULL); + InterpreterFrame *shim = _PyEvalFramePush(tstate, _Py_InitCleanupFunc, NULL); + if (shim == NULL) { + goto error; + } + shim->previous = frame; + shim->depth = frame->depth + 1; + shim->f_lasti = 1; + if (_Py_EnterRecursiveCall(tstate, "")) { + tstate->recursion_remaining--; + goto exit_unwind; + } + /* Push self onto stack of shim */ + Py_INCREF(self); + shim->stacktop = 1; + shim->localsplus[0] = self; + size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; + InterpreterFrame *init_frame = _PyThreadState_BumpFramePointer(tstate, size); + if (init_frame == NULL) { + _PyEvalFrameClearAndPop(tstate, shim); + goto error; + } + _PyFrame_InitializeSpecials(init_frame, init, + NULL, code->co_nlocalsplus); + /* Copy self followed by args to __init__ frame */ + STACK_SHRINK(argcount+1); + _PyFrame_SetStackPointer(frame, stack_pointer); + for (int i = 0; i < argcount+1; i++) { + init_frame->localsplus[i] = stack_pointer[i]; + } + for (int i = argcount+1; i < code->co_nlocalsplus; i++) { + init_frame->localsplus[i] = NULL; + } + init_frame->previous = shim; + init_frame->depth = shim->depth + 1; + frame = cframe.current_frame = init_frame; + goto start_frame; + } + TARGET(CALL_FUNCTION_BUILTIN_O) { assert(cframe.use_tracing == 0); /* Builtin METH_O functions */ @@ -5858,6 +5931,23 @@ make_coro(PyThreadState *tstate, PyFunctionObject *func, return gen; } +static InterpreterFrame * +_PyEvalFramePush(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals) +{ + PyCodeObject * code = (PyCodeObject *)func->func_code; + size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; + InterpreterFrame *frame = _PyThreadState_BumpFramePointer(tstate, size); + if (frame == NULL) { + return NULL; + } + _PyFrame_InitializeSpecials(frame, func, locals, code->co_nlocalsplus); + PyObject **localsarray = &frame->localsplus[0]; + for (int i = 0; i < code->co_nlocalsplus; i++) { + localsarray[i] = NULL; + } + return frame; +} + /* Consumes all the references to the args */ static InterpreterFrame * _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, diff --git a/Python/compile.c b/Python/compile.c index 6138031833ac93..e370c5f232253d 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1115,6 +1115,9 @@ stack_effect(int opcode, int oparg, int jump) case LOAD_GLOBAL: return 1; + case EXIT_INIT_CHECK: + return -1; + /* Exception handling pseudo-instructions */ case SETUP_FINALLY: /* 0 in the normal flow. diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 872a6883119926..8c016460986948 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -15,6 +15,7 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_UNICODE, &&TARGET_UNARY_INVERT, + &&TARGET_EXIT_INIT_CHECK, &&TARGET_BINARY_OP_INPLACE_ADD_UNICODE, &&TARGET_BINARY_OP_MULTIPLY_INT, &&TARGET_BINARY_OP_MULTIPLY_FLOAT, @@ -23,20 +24,20 @@ static void *opcode_targets[256] = { &&TARGET_COMPARE_OP_ADAPTIVE, &&TARGET_COMPARE_OP_FLOAT_JUMP, &&TARGET_COMPARE_OP_INT_JUMP, - &&TARGET_COMPARE_OP_STR_JUMP, &&TARGET_BINARY_SUBSCR, + &&TARGET_COMPARE_OP_STR_JUMP, &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, - &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_PUSH_EXC_INFO, - &&TARGET_STORE_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_STORE_SUBSCR_ADAPTIVE, &&TARGET_STORE_SUBSCR_LIST_INT, &&TARGET_STORE_SUBSCR_DICT, &&TARGET_CALL_FUNCTION_ADAPTIVE, @@ -45,28 +46,27 @@ static void *opcode_targets[256] = { &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, &&TARGET_CALL_FUNCTION_PY_SIMPLE, + &&TARGET_CALL_FUNCTION_ALLOC_AND_ENTER_INIT, &&TARGET_JUMP_ABSOLUTE_QUICK, - &&TARGET_LOAD_ATTR_ADAPTIVE, - &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, - &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, + &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,20 +74,20 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -122,11 +122,11 @@ static void *opcode_targets[256] = { &&TARGET_COPY, &&TARGET_JUMP_IF_NOT_EXC_MATCH, &&TARGET_BINARY_OP, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, diff --git a/Python/specialize.c b/Python/specialize.c index cdc535396fa762..1c92fd1192f572 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -6,6 +6,8 @@ #include "pycore_object.h" #include "opcode.h" #include "structmember.h" // struct PyMemberDef, T_OFFSET_EX +#include "pycore_function.h" // _PyFunction_FromConstructor() + #include // rand() @@ -491,6 +493,7 @@ initial_counter_value(void) { #define SPEC_FAIL_PYCFUNCTION_NOARGS 16 #define SPEC_FAIL_BAD_CALL_FLAGS 17 #define SPEC_FAIL_CLASS 18 +#define SPEC_FAIL_INIT_NOT_SIMPLE 25 /* COMPARE_OP */ #define SPEC_FAIL_STRING_COMPARE 13 @@ -1256,11 +1259,56 @@ _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *ins return 0; } + +static PyFunctionObject * +get_init_for_simple_managed_python_class(PyTypeObject *tp) +{ + _Py_IDENTIFIER(__init__); + if (tp->tp_new != PyBaseObject_Type.tp_new) { + return NULL; + } + if (tp->tp_alloc != PyType_GenericAlloc) { + return NULL; + } + if ((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { + return NULL; + } + PyObject *init = _PyType_LookupId(tp, &PyId___init__); + if (init == NULL || !PyFunction_Check(init)) { + return NULL; + } + int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init)); + if (kind != SIMPLE_FUNCTION) { + return NULL; + } + return (PyFunctionObject *)init; +} + +static int +setup_init_cleanup_func(void); + static int specialize_class_call( PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache) { + if (setup_init_cleanup_func()) { + return -1; + } + PyTypeObject *tp = (PyTypeObject *)callable; + PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); + if (init) { + if (((PyCodeObject *)init->func_code)->co_argcount != nargs+1) { + SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return -1; + } + _PyAdaptiveEntry *cache0 = &cache[0].adaptive; + _PyObjectCache *cache1 = &cache[-1].obj; + cache0->version = tp->tp_version_tag; + cache1->obj = (PyObject *)init; /* borrowed */ + *instr = _Py_MAKECODEUNIT(CALL_FUNCTION_ALLOC_AND_ENTER_INIT, _Py_OPARG(*instr)); + return 0; + } SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS); return -1; } @@ -1587,3 +1635,82 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, STAT_INC(COMPARE_OP, specialization_success); adaptive->counter = initial_counter_value(); } + +PyFunctionObject *_Py_InitCleanupFunc = NULL; + +char INIT_CLEANUP_CODE[8] = { + LOAD_ASSERTION_ERROR, 0, + RAISE_VARARGS, 1, + EXIT_INIT_CHECK, 0, + RETURN_VALUE, 0 +}; + +static int +setup_init_cleanup_func(void) { + if (_Py_InitCleanupFunc != NULL) { + return 0; + } + PyObject *empty_bytes = PyBytes_FromStringAndSize(NULL, 0); + PyObject *empty_tuple = PyTuple_New(0); + PyObject *empty_str = _PyUnicode_FromASCII("", 0); + PyObject *name = _PyUnicode_FromASCII("type.__call__", strlen("type.__call__")); + PyObject *code = PyBytes_FromStringAndSize(INIT_CLEANUP_CODE, 8); + if (empty_bytes == NULL || empty_str == NULL || name == NULL || code == NULL) { + goto cleanup; + } + struct _PyCodeConstructor con = { + .filename = empty_str, + .name = name, + .qualname = name, + .flags = CO_NEWLOCALS | CO_OPTIMIZED, + + .code = code, + .firstlineno = 1, + .linetable = empty_bytes, + .endlinetable = empty_bytes, + .columntable = empty_bytes, + + .consts = empty_tuple, + .names = empty_tuple, + + .localsplusnames = empty_tuple, + .localspluskinds = empty_bytes, + + .argcount = 0, + .posonlyargcount = 0, + .kwonlyargcount = 0, + + .stacksize = 2, + + .exceptiontable = empty_bytes, + }; + + PyCodeObject *codeobj = _PyCode_New(&con); + if (codeobj == NULL) { + goto cleanup; + } + PyObject *globals = PyDict_New(); + if (globals == NULL) { + Py_DECREF(codeobj); + goto cleanup; + } + PyFrameConstructor desc = { + .fc_globals = globals, + .fc_builtins = globals, + .fc_name = codeobj->co_name, + .fc_qualname = codeobj->co_name, + .fc_code = (PyObject *)codeobj, + .fc_defaults = NULL, + .fc_kwdefaults = NULL, + .fc_closure = NULL + }; + _Py_InitCleanupFunc = _PyFunction_FromConstructor(&desc); +cleanup: + Py_XDECREF(empty_bytes); + Py_XDECREF(empty_tuple); + Py_XDECREF(empty_str); + Py_XDECREF(name); + Py_XDECREF(code); + PyErr_Clear(); + return _Py_InitCleanupFunc == NULL ? -1 : 0; +} From c607dfe563707fcce00a5665fb8bae446db6bf7a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 5 Jan 2022 10:56:38 +0000 Subject: [PATCH 2/3] Don't change magic number. --- Lib/importlib/_bootstrap_external.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 5ead6caf9f3c75..29324664cea864 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -375,6 +375,8 @@ def _write_atomic(path, data, mode=0o666): # Python 3.11a4 3467 (Change CALL_xxx opcodes) # Python 3.11a4 3468 (Add SEND opcode) # Python 3.11a4 3469 (bpo-45711: remove type, traceback from exc_info) +# Python 3.11a4 3470 (bpo-46221: PREP_RERAISE_STAR no longer pushes lasti) +# Python 3.11a4 3471 (bpo-46202: remove pop POP_EXCEPT_AND_RERAISE) # # MAGIC must change whenever the bytecode emitted by the compiler may no @@ -384,7 +386,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3469).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3471).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' From e6002c4e4265761292a94772ff36b335c04c7770 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 5 Jan 2022 11:12:06 +0000 Subject: [PATCH 3/3] Add news --- .../2022-01-05-11-12-00.bpo-44525.4E3Pwn.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-01-05-11-12-00.bpo-44525.4E3Pwn.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-05-11-12-00.bpo-44525.4E3Pwn.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-11-12-00.bpo-44525.4E3Pwn.rst new file mode 100644 index 00000000000000..5633097f4a3fdd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-11-12-00.bpo-44525.4E3Pwn.rst @@ -0,0 +1,11 @@ +Specializes calls to most Python classes. Specifically, any class that +inherits from ``object``, or another Python class, and does not override +``__new__``. + +The specialized instruction does the following: + +1. Creates the object (by calling ``object.__new__``) +2. Pushes a shim frame to the frame stack (to cleanup after ``__init__``) +3. Pushes the frame for ``__init__`` to the frame stack + +Speeds up the instantiation of most Python classes.