From 79e45a1ea6c7aa7d6181da2355efdcea0b718c72 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 24 Apr 2025 23:11:57 +0100 Subject: [PATCH 01/21] plumbing for registering specialization --- Include/internal/pycore_c_array.h | 2 +- Include/internal/pycore_code.h | 2 + Include/internal/pycore_interp_structs.h | 5 +++ Include/object.h | 2 + Modules/arraymodule.c | 34 ++++++++++++++- Python/pystate.c | 2 + Python/specialize.c | 53 +++++++++++++++++++++--- 7 files changed, 93 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_c_array.h b/Include/internal/pycore_c_array.h index 7a10fc866c88f1..14c1fe03683db5 100644 --- a/Include/internal/pycore_c_array.h +++ b/Include/internal/pycore_c_array.h @@ -14,7 +14,7 @@ extern "C" { typedef struct { void *array; /* pointer to the array */ - int allocated_entries; /* pointer to the capacity of the array */ + int allocated_entries; /* the capacity of the array */ size_t item_size; /* size of each element */ int initial_num_entries; /* initial allocation size */ } _Py_c_array_t; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 635d2b24f4bdff..843086235598f5 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -491,6 +491,8 @@ typedef struct { binaryopactionfunc action; } _PyBinaryOpSpecializationDescr; +PyAPI_DATA(int) _Py_Specialize_AddBinaryOpExtention(_PyBinaryOpSpecializationDescr* descr); + /* Comparison bit masks. */ /* Note this evaluates its arguments twice each */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 45d878af967b86..f7189ca44247ad 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -8,6 +8,7 @@ extern "C" { #endif #include "pycore_ast_state.h" // struct ast_state +#include "pycore_c_array.h" // _Py_c_array_t #include "pycore_llist.h" // struct llist_node #include "pycore_opcode_utils.h" // NUM_COMMON_CONSTANTS #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR @@ -951,6 +952,10 @@ struct _is { # endif #endif + /* Specialization extensions */ + Py_ssize_t num_binop_specializer_extentions; + _Py_c_array_t binop_specializer_extentions; + /* the initial PyInterpreterState.threads.head */ _PyThreadStateImpl _initial_thread; // _initial_thread should be the last field of PyInterpreterState. diff --git a/Include/object.h b/Include/object.h index 8cc83abb8574e3..47a33cf9632fd0 100644 --- a/Include/object.h +++ b/Include/object.h @@ -348,6 +348,8 @@ typedef int (*objobjproc)(PyObject *, PyObject *); typedef int (*visitproc)(PyObject *, void *); typedef int (*traverseproc)(PyObject *, visitproc, void *); +struct _PyBinaryOpSpecializationDescr; +typedef int (*binopspecfunc)(PyObject *lhs, PyObject *rhs, int oparg, struct _PyBinaryOpSpecializationDescr* descr); typedef void (*freefunc)(void *); typedef void (*destructor)(PyObject *); diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 401a3a7072b846..0a76db412ed896 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -14,6 +14,8 @@ #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "opcode.h" // binary op opargs (NB_*) + #include // offsetof() #include @@ -3201,6 +3203,33 @@ do { \ state->str_ ## string = tmp; \ } while (0) +static inline int +array_guard(PyObject *lhs, PyObject *rhs) +{ + fprintf(stderr, "array_guard\n"); + return 0; +} + +static PyObject * +array_action(PyObject *lhs, PyObject *rhs) +{ + return NULL; +} + +static int +array_register_specializations(void) +{ + _PyBinaryOpSpecializationDescr descr = { + .oparg = NB_MULTIPLY, + .guard = array_guard, + .action = array_action, + }; + if (_Py_Specialize_AddBinaryOpExtention(&descr) < 0) { + return -1; + } + return 0; +} + static int array_modexec(PyObject *m) { @@ -3240,6 +3269,10 @@ array_modexec(PyObject *m) } Py_DECREF(res); + if (array_register_specializations() < 0) { + return -1; + } + if (PyModule_AddType(m, state->ArrayType) < 0) { return -1; } @@ -3252,7 +3285,6 @@ array_modexec(PyObject *m) if (PyModule_Add(m, "typecodes", typecodes) < 0) { return -1; } - return 0; } diff --git a/Python/pystate.c b/Python/pystate.c index 1ac134400856d4..e2a77eb7fafdce 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -873,6 +873,8 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) interp->ceval.instrumentation_version = 0; tstate->eval_breaker = 0; + _Py_CArray_Fini(&interp->binop_specializer_extentions); + for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; } diff --git a/Python/specialize.c b/Python/specialize.c index 59ec9a4cad6b9c..e382cb1eaa3db5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2556,13 +2556,32 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply}, }; +int +_Py_Specialize_AddBinaryOpExtention(_PyBinaryOpSpecializationDescr* descr) +{ + PyThreadState *tstate = PyThreadState_Get(); + _Py_c_array_t *extensions = &tstate->interp->binop_specializer_extentions; + Py_ssize_t idx = tstate->interp->num_binop_specializer_extentions; + if (idx == 0) { + _Py_CArray_Init(extensions, sizeof(_PyBinaryOpSpecializationDescr), 10); + } + if (_Py_CArray_EnsureCapacity(extensions, idx) < 0) { + return -1; + } + _PyBinaryOpSpecializationDescr* descrs = (_PyBinaryOpSpecializationDescr*)extensions->array; + descrs[idx] = *descr; + tstate->interp->num_binop_specializer_extentions++; + return 0; +} + static int -binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, - _PyBinaryOpSpecializationDescr **descr) +binary_op_extended_specialization_from_list( + _PyBinaryOpSpecializationDescr *descrs, size_t size, + PyObject *lhs, PyObject *rhs, int oparg, + _PyBinaryOpSpecializationDescr **descr) { - size_t n = sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr); - for (size_t i = 0; i < n; i++) { - _PyBinaryOpSpecializationDescr *d = &binaryop_extend_descrs[i]; + for (size_t i = 0; i < size; i++) { + _PyBinaryOpSpecializationDescr *d = &descrs[i]; if (d->oparg == oparg && d->guard(lhs, rhs)) { *descr = d; return 1; @@ -2571,6 +2590,30 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, return 0; } +static int +binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, + _PyBinaryOpSpecializationDescr **descr) +{ + if (binary_op_extended_specialization_from_list( + binaryop_extend_descrs, + sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr), + lhs, rhs, oparg, descr)) + { + return 1; + } + + PyThreadState *tstate = PyThreadState_Get(); + _Py_c_array_t *extensions = &tstate->interp->binop_specializer_extentions; + if (binary_op_extended_specialization_from_list( + (_PyBinaryOpSpecializationDescr *)extensions->array, + tstate->interp->num_binop_specializer_extentions, + lhs, rhs, oparg, descr)) + { + return 1; + } + return 0; +} + Py_NO_INLINE void _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *instr, int oparg, _PyStackRef *locals) From 72dfba429a4ddaf44bdcd4b91eec27513954c2b2 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 28 Apr 2025 22:41:23 +0100 Subject: [PATCH 02/21] basic array plugin --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 4 ++-- Modules/arraymodule.c | 29 ++++++++++++++++------- Python/bytecodes.c | 4 +++- Python/executor_cases.c.h | 8 ++++++- Python/generated_cases.c.h | 8 ++++++- Python/specialize.c | 11 +++++---- 7 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dc7ecc998c4477..f5604a047840f6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1082,7 +1082,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9b88763da07ba7..dce8f3ac4acf75 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -94,8 +94,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 0a76db412ed896..fb13f28c5f5857 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -3204,25 +3204,38 @@ do { \ } while (0) static inline int -array_guard(PyObject *lhs, PyObject *rhs) +array_subscr_guard(PyObject *lhs, PyObject *rhs) { - fprintf(stderr, "array_guard\n"); - return 0; + PyObject *exc = PyErr_GetRaisedException(); + PyObject *module = PyType_GetModuleByDef(Py_TYPE(lhs), &arraymodule); + if (module == NULL) { + if (!PyErr_Occurred() || PyErr_ExceptionMatches(PyExc_TypeError)) { + /* lhs is not an array instance - ignore the TypeError (if any) */ + PyErr_SetRaisedException(exc); + return 0; + } + else { + _PyErr_ChainExceptions1(exc); + return -1; + } + } + PyErr_SetRaisedException(exc); + return array_Check(lhs, get_array_state(module)); } static PyObject * -array_action(PyObject *lhs, PyObject *rhs) +array_subscr_action(PyObject *lhs, PyObject *rhs) { - return NULL; + return array_subscr(lhs, rhs); } static int array_register_specializations(void) { _PyBinaryOpSpecializationDescr descr = { - .oparg = NB_MULTIPLY, - .guard = array_guard, - .action = array_action, + .oparg = NB_SUBSCR, + .guard = array_subscr_guard, + .action = array_subscr_action, }; if (_Py_Specialize_AddBinaryOpExtention(&descr) < 0) { return -1; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5a52efaaec8042..2dfb36dd6a998f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -803,7 +803,8 @@ dummy_func( assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); assert(d && d->guard); int res = d->guard(left_o, right_o); - DEOPT_IF(!res); + ERROR_IF(res < 0, error); + DEOPT_IF(res == 0); } pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { @@ -816,6 +817,7 @@ dummy_func( PyObject *res_o = d->action(left_o, right_o); DECREF_INPUTS(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7f3c3141ad00b9..3d9e54c066d783 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1161,7 +1161,10 @@ _PyFrame_SetStackPointer(frame, stack_pointer); int res = d->guard(left_o, right_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (!res) { + if (res < 0) { + JUMP_TO_ERROR(); + } + if (res == 0) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } @@ -1193,6 +1196,9 @@ stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); + if (res_o == NULL) { + JUMP_TO_ERROR(); + } res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; stack_pointer += 1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ee54b385b7064e..934ad3ad6a77dc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -287,7 +287,10 @@ _PyFrame_SetStackPointer(frame, stack_pointer); int res = d->guard(left_o, right_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (!res) { + if (res < 0) { + JUMP_TO_LABEL(error); + } + if (res == 0) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -315,6 +318,9 @@ stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; diff --git a/Python/specialize.c b/Python/specialize.c index e382cb1eaa3db5..968bd398c8db21 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2534,7 +2534,7 @@ LONG_FLOAT_ACTION(compactlong_float_multiply, *) LONG_FLOAT_ACTION(compactlong_float_true_div, /) #undef LONG_FLOAT_ACTION -static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = { +static _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = { /* long-long arithmetic */ {NB_OR, compactlongs_guard, compactlongs_or}, {NB_AND, compactlongs_guard, compactlongs_and}, @@ -2582,6 +2582,8 @@ binary_op_extended_specialization_from_list( { for (size_t i = 0; i < size; i++) { _PyBinaryOpSpecializationDescr *d = &descrs[i]; + assert(d != NULL); + assert(d->guard != NULL); if (d->oparg == oparg && d->guard(lhs, rhs)) { *descr = d; return 1; @@ -2594,9 +2596,10 @@ static int binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, _PyBinaryOpSpecializationDescr **descr) { + typedef _PyBinaryOpSpecializationDescr descr_type; if (binary_op_extended_specialization_from_list( - binaryop_extend_descrs, - sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr), + binaryop_extend_builtins, + sizeof(binaryop_extend_builtins)/sizeof(descr_type), lhs, rhs, oparg, descr)) { return 1; @@ -2605,7 +2608,7 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, PyThreadState *tstate = PyThreadState_Get(); _Py_c_array_t *extensions = &tstate->interp->binop_specializer_extentions; if (binary_op_extended_specialization_from_list( - (_PyBinaryOpSpecializationDescr *)extensions->array, + (descr_type*)extensions->array, tstate->interp->num_binop_specializer_extentions, lhs, rhs, oparg, descr)) { From 70ebe719fdf09ddad96560c769e7005d76ece049 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 29 Apr 2025 22:22:32 +0100 Subject: [PATCH 03/21] use PyType_GetBaseByToken --- Modules/arraymodule.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index fb13f28c5f5857..210b2eabe6280e 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -2965,6 +2965,7 @@ static PyType_Slot array_slots[] = { {Py_tp_alloc, PyType_GenericAlloc}, {Py_tp_new, array_new}, {Py_tp_traverse, array_tp_traverse}, + {Py_tp_token, Py_TP_USE_SPEC}, /* as sequence */ {Py_sq_length, array_length}, @@ -3207,20 +3208,15 @@ static inline int array_subscr_guard(PyObject *lhs, PyObject *rhs) { PyObject *exc = PyErr_GetRaisedException(); - PyObject *module = PyType_GetModuleByDef(Py_TYPE(lhs), &arraymodule); - if (module == NULL) { - if (!PyErr_Occurred() || PyErr_ExceptionMatches(PyExc_TypeError)) { - /* lhs is not an array instance - ignore the TypeError (if any) */ - PyErr_SetRaisedException(exc); - return 0; - } - else { - _PyErr_ChainExceptions1(exc); - return -1; + int ret = PyType_GetBaseByToken(Py_TYPE(lhs), &array_spec, NULL); + if (ret < 0) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + ret = 0; } } - PyErr_SetRaisedException(exc); - return array_Check(lhs, get_array_state(module)); + _PyErr_ChainExceptions1(exc); + return ret; } static PyObject * From d42d2a6385504d73aaf84d29ce473a0e053f75e0 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 2 May 2025 19:29:22 +0100 Subject: [PATCH 04/21] method on type --- Include/cpython/object.h | 8 ++ Include/internal/pycore_c_array.h | 2 +- Include/internal/pycore_code.h | 4 +- Include/internal/pycore_interp_structs.h | 5 -- Include/typeslots.h | 1 + Lib/test/test_sys.py | 2 +- Modules/arraymodule.c | 99 ++++++++++++++---------- Objects/typeslots.inc | 1 + Python/bytecodes.c | 3 +- Python/executor_cases.c.h | 3 +- Python/generated_cases.c.h | 3 +- Python/pystate.c | 2 - Python/specialize.c | 56 +++----------- 13 files changed, 90 insertions(+), 99 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 3a4d65f7712c63..ccbb831a18a4ad 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -143,6 +143,11 @@ typedef struct { * backwards-compatibility */ typedef Py_ssize_t printfunc; +/* Specialize a binary by setting the descriptor pointer */ +struct _PyBinopSpecializationDescr; +typedef int(*binop_specialize_func)(PyObject *v, PyObject *w, int oparg, + struct _PyBinopSpecializationDescr **descr); + // If this structure is modified, Doc/includes/typestruct.h should be updated // as well. struct _typeobject { @@ -233,6 +238,9 @@ struct _typeobject { /* bitset of which type-watchers care about this type */ unsigned char tp_watched; + /* callback that may specialize BINARY_OP */ + binop_specialize_func tp_binop_specialize; + /* Number of tp_version_tag values used. * Set to _Py_ATTR_CACHE_UNUSED if the attribute cache is * disabled for this type (e.g. due to custom MRO entries). diff --git a/Include/internal/pycore_c_array.h b/Include/internal/pycore_c_array.h index 14c1fe03683db5..7a10fc866c88f1 100644 --- a/Include/internal/pycore_c_array.h +++ b/Include/internal/pycore_c_array.h @@ -14,7 +14,7 @@ extern "C" { typedef struct { void *array; /* pointer to the array */ - int allocated_entries; /* the capacity of the array */ + int allocated_entries; /* pointer to the capacity of the array */ size_t item_size; /* size of each element */ int initial_num_entries; /* initial allocation size */ } _Py_c_array_t; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 843086235598f5..d2b837505f1d67 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -485,14 +485,12 @@ adaptive_counter_backoff(_Py_BackoffCounter counter) { typedef int (*binaryopguardfunc)(PyObject *lhs, PyObject *rhs); typedef PyObject *(*binaryopactionfunc)(PyObject *lhs, PyObject *rhs); -typedef struct { +typedef struct _PyBinopSpecializationDescr { int oparg; binaryopguardfunc guard; binaryopactionfunc action; } _PyBinaryOpSpecializationDescr; -PyAPI_DATA(int) _Py_Specialize_AddBinaryOpExtention(_PyBinaryOpSpecializationDescr* descr); - /* Comparison bit masks. */ /* Note this evaluates its arguments twice each */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f7189ca44247ad..45d878af967b86 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -8,7 +8,6 @@ extern "C" { #endif #include "pycore_ast_state.h" // struct ast_state -#include "pycore_c_array.h" // _Py_c_array_t #include "pycore_llist.h" // struct llist_node #include "pycore_opcode_utils.h" // NUM_COMMON_CONSTANTS #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR @@ -952,10 +951,6 @@ struct _is { # endif #endif - /* Specialization extensions */ - Py_ssize_t num_binop_specializer_extentions; - _Py_c_array_t binop_specializer_extentions; - /* the initial PyInterpreterState.threads.head */ _PyThreadStateImpl _initial_thread; // _initial_thread should be the last field of PyInterpreterState. diff --git a/Include/typeslots.h b/Include/typeslots.h index a7f3017ec02e92..980e714714e786 100644 --- a/Include/typeslots.h +++ b/Include/typeslots.h @@ -93,4 +93,5 @@ #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000 /* New in 3.14 */ #define Py_tp_token 83 +#define Py_tp_binop_specialize 84 #endif diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 10c3e0e9a1d2bb..780062ec71f15b 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1776,7 +1776,7 @@ def delx(self): del self.__x check((1,2,3), vsize('') + self.P + 3*self.P) # type # static type: PyTypeObject - fmt = 'P2nPI13Pl4Pn9Pn12PIPc' + fmt = 'P2nPI13Pl4Pn9Pn12PI3Pc' s = vsize(fmt) check(int, s) typeid = 'n' if support.Py_GIL_DISABLED else '' diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 210b2eabe6280e..c6c41e200a7ba1 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -850,6 +850,10 @@ array_richcompare(PyObject *v, PyObject *w, int op) return res; } +static int +array_binop_specialize(PyObject *v, PyObject *w, int oparg, + _PyBinaryOpSpecializationDescr **descr); + static Py_ssize_t array_length(PyObject *op) { @@ -2966,6 +2970,7 @@ static PyType_Slot array_slots[] = { {Py_tp_new, array_new}, {Py_tp_traverse, array_tp_traverse}, {Py_tp_token, Py_TP_USE_SPEC}, + {Py_tp_binop_specialize, array_binop_specialize}, /* as sequence */ {Py_sq_length, array_length}, @@ -2998,6 +3003,61 @@ static PyType_Spec array_spec = { .slots = array_slots, }; +static inline int +array_subscr_guard(PyObject *lhs, PyObject *rhs) +{ + PyObject *exc = PyErr_GetRaisedException(); + int ret = PyType_GetBaseByToken(Py_TYPE(lhs), &array_spec, NULL); + if (ret < 0) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + ret = 0; + } + } + _PyErr_ChainExceptions1(exc); + return ret; +} + +static PyObject * +array_subscr_action(PyObject *lhs, PyObject *rhs) +{ + return array_subscr(lhs, rhs); +} + +static int +array_binop_specialize(PyObject *v, PyObject *w, int oparg, + _PyBinaryOpSpecializationDescr **descr) +{ + array_state *state = find_array_state_by_type(Py_TYPE(v)); + + if (!array_Check(v, state)) { + return 0; + } + + *descr = NULL; + switch(oparg) { + case NB_SUBSCR: + if (array_subscr_guard(v, w)) { + *descr = (_PyBinaryOpSpecializationDescr*)PyMem_Malloc( + sizeof(_PyBinaryOpSpecializationDescr)); + if (*descr == NULL) { + PyErr_NoMemory(); + return -1; + } + **descr = (_PyBinaryOpSpecializationDescr) { + .oparg = oparg, + .guard = array_subscr_guard, + .action = array_subscr_action, + }; + return 1; + } + break; + } + + return 0; +} + + /*********************** Array Iterator **************************/ /*[clinic input] @@ -3204,41 +3264,6 @@ do { \ state->str_ ## string = tmp; \ } while (0) -static inline int -array_subscr_guard(PyObject *lhs, PyObject *rhs) -{ - PyObject *exc = PyErr_GetRaisedException(); - int ret = PyType_GetBaseByToken(Py_TYPE(lhs), &array_spec, NULL); - if (ret < 0) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Clear(); - ret = 0; - } - } - _PyErr_ChainExceptions1(exc); - return ret; -} - -static PyObject * -array_subscr_action(PyObject *lhs, PyObject *rhs) -{ - return array_subscr(lhs, rhs); -} - -static int -array_register_specializations(void) -{ - _PyBinaryOpSpecializationDescr descr = { - .oparg = NB_SUBSCR, - .guard = array_subscr_guard, - .action = array_subscr_action, - }; - if (_Py_Specialize_AddBinaryOpExtention(&descr) < 0) { - return -1; - } - return 0; -} - static int array_modexec(PyObject *m) { @@ -3278,10 +3303,6 @@ array_modexec(PyObject *m) } Py_DECREF(res); - if (array_register_specializations() < 0) { - return -1; - } - if (PyModule_AddType(m, state->ArrayType) < 0) { return -1; } diff --git a/Objects/typeslots.inc b/Objects/typeslots.inc index 642160fe0bd8bc..f197c3f5023670 100644 --- a/Objects/typeslots.inc +++ b/Objects/typeslots.inc @@ -82,3 +82,4 @@ {offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)}, {-1, offsetof(PyTypeObject, tp_vectorcall)}, {-1, offsetof(PyHeapTypeObject, ht_token)}, +{-1, offsetof(PyTypeObject, tp_binop_specialize)}, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2dfb36dd6a998f..f8ca8cc2546134 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -801,7 +801,8 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr; assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); - assert(d && d->guard); + assert(d); + assert(d->guard); int res = d->guard(left_o, right_o); ERROR_IF(res < 0, error); DEOPT_IF(res == 0); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3d9e54c066d783..dc8201b5330608 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1157,7 +1157,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr; assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); - assert(d && d->guard); + assert(d); + assert(d->guard); _PyFrame_SetStackPointer(frame, stack_pointer); int res = d->guard(left_o, right_o); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 934ad3ad6a77dc..577a7b661f33fe 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -283,7 +283,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr; assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); - assert(d && d->guard); + assert(d); + assert(d->guard); _PyFrame_SetStackPointer(frame, stack_pointer); int res = d->guard(left_o, right_o); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/pystate.c b/Python/pystate.c index e2a77eb7fafdce..1ac134400856d4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -873,8 +873,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) interp->ceval.instrumentation_version = 0; tstate->eval_breaker = 0; - _Py_CArray_Fini(&interp->binop_specializer_extentions); - for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; } diff --git a/Python/specialize.c b/Python/specialize.c index 968bd398c8db21..0bc91fa68e208c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2556,32 +2556,14 @@ static _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = { {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply}, }; -int -_Py_Specialize_AddBinaryOpExtention(_PyBinaryOpSpecializationDescr* descr) -{ - PyThreadState *tstate = PyThreadState_Get(); - _Py_c_array_t *extensions = &tstate->interp->binop_specializer_extentions; - Py_ssize_t idx = tstate->interp->num_binop_specializer_extentions; - if (idx == 0) { - _Py_CArray_Init(extensions, sizeof(_PyBinaryOpSpecializationDescr), 10); - } - if (_Py_CArray_EnsureCapacity(extensions, idx) < 0) { - return -1; - } - _PyBinaryOpSpecializationDescr* descrs = (_PyBinaryOpSpecializationDescr*)extensions->array; - descrs[idx] = *descr; - tstate->interp->num_binop_specializer_extentions++; - return 0; -} - static int -binary_op_extended_specialization_from_list( - _PyBinaryOpSpecializationDescr *descrs, size_t size, - PyObject *lhs, PyObject *rhs, int oparg, - _PyBinaryOpSpecializationDescr **descr) +binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, + _PyBinaryOpSpecializationDescr **descr) { + typedef _PyBinaryOpSpecializationDescr descr_type; + size_t size = sizeof(binaryop_extend_builtins)/sizeof(descr_type); for (size_t i = 0; i < size; i++) { - _PyBinaryOpSpecializationDescr *d = &descrs[i]; + descr_type *d = &binaryop_extend_builtins[i]; assert(d != NULL); assert(d->guard != NULL); if (d->oparg == oparg && d->guard(lhs, rhs)) { @@ -2589,30 +2571,14 @@ binary_op_extended_specialization_from_list( return 1; } } - return 0; -} -static int -binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, - _PyBinaryOpSpecializationDescr **descr) -{ - typedef _PyBinaryOpSpecializationDescr descr_type; - if (binary_op_extended_specialization_from_list( - binaryop_extend_builtins, - sizeof(binaryop_extend_builtins)/sizeof(descr_type), - lhs, rhs, oparg, descr)) - { - return 1; - } + if (Py_TYPE(lhs)->tp_binop_specialize != NULL) { + int ret = Py_TYPE(lhs)->tp_binop_specialize(lhs, rhs, oparg, descr); + if (ret < 0) { + return -1; + } - PyThreadState *tstate = PyThreadState_Get(); - _Py_c_array_t *extensions = &tstate->interp->binop_specializer_extentions; - if (binary_op_extended_specialization_from_list( - (descr_type*)extensions->array, - tstate->interp->num_binop_specializer_extentions, - lhs, rhs, oparg, descr)) - { - return 1; + return ret; } return 0; } From 8fda4076fddd01ed2a177d73fea8985850b92eff Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 4 May 2025 20:30:47 +0100 Subject: [PATCH 05/21] fix error --- Python/bytecodes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f8ca8cc2546134..d48a41250c0568 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -804,7 +804,7 @@ dummy_func( assert(d); assert(d->guard); int res = d->guard(left_o, right_o); - ERROR_IF(res < 0, error); + ERROR_IF(res < 0); DEOPT_IF(res == 0); } @@ -818,7 +818,7 @@ dummy_func( PyObject *res_o = d->action(left_o, right_o); DECREF_INPUTS(); - ERROR_IF(res_o == NULL, error); + ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } From 726fa8144927a31578f30f713f373ba2692eaa72 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sun, 4 May 2025 19:32:53 +0000 Subject: [PATCH 06/21] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst new file mode 100644 index 00000000000000..88c6fa4b1dc568 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst @@ -0,0 +1 @@ +Add option for extension modules to specialised `BINARY_OP` instructions. Applied to `array` objects. From b47c1493307d1bfb99f9f9b9396a3914ca1e4002 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 4 May 2025 20:38:59 +0100 Subject: [PATCH 07/21] typo --- .../2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst index 88c6fa4b1dc568..8ab8c9b0d5ea3f 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst @@ -1 +1,2 @@ -Add option for extension modules to specialised `BINARY_OP` instructions. Applied to `array` objects. +Add option for extension modules to specialised ``BINARY_OP`` instructions. +Applied to ``array`` objects. From 0abb5098124128e24f2175203c8e3a2231622e8d Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 4 May 2025 21:08:18 +0100 Subject: [PATCH 08/21] const --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 0bc91fa68e208c..4ed4b4631f8117 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2534,7 +2534,7 @@ LONG_FLOAT_ACTION(compactlong_float_multiply, *) LONG_FLOAT_ACTION(compactlong_float_true_div, /) #undef LONG_FLOAT_ACTION -static _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = { +static const _PyBinaryOpSpecializationDescr binaryop_extend_builtins[] = { /* long-long arithmetic */ {NB_OR, compactlongs_guard, compactlongs_or}, {NB_AND, compactlongs_guard, compactlongs_and}, From 2b4c610f72eda12c4e65894022272dc3e2db5b33 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sun, 4 May 2025 21:20:40 +0100 Subject: [PATCH 09/21] const --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 4ed4b4631f8117..6c521ab425a459 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2563,7 +2563,7 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, typedef _PyBinaryOpSpecializationDescr descr_type; size_t size = sizeof(binaryop_extend_builtins)/sizeof(descr_type); for (size_t i = 0; i < size; i++) { - descr_type *d = &binaryop_extend_builtins[i]; + descr_type *d = (descr_type *)&binaryop_extend_builtins[i]; assert(d != NULL); assert(d->guard != NULL); if (d->oparg == oparg && d->guard(lhs, rhs)) { From 89b6c376f7f8112bd6699ce87990aa777a497bd6 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 10:21:58 +0100 Subject: [PATCH 10/21] set oparg, check *descr --- Python/specialize.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 6c521ab425a459..e92e9ca57ef7b8 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2577,7 +2577,16 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, if (ret < 0) { return -1; } - + if (ret == 1) { + if (*descr == NULL) { + PyErr_Format( + PyExc_ValueError, + "tp_binop_specialize of '%.200s' returned 1 with *descr == NULL", + Py_TYPE(lhs)->tp_name); + return -1; + } + (*descr)->oparg = oparg; + } return ret; } return 0; From 6630a95dda49cc094f1e5d9b32dc947c06fce91c Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 12:07:35 +0100 Subject: [PATCH 11/21] add free --- Include/internal/pycore_code.h | 7 ++++++- Modules/arraymodule.c | 9 +++++++++ Python/bytecodes.c | 7 ++++++- Python/executor_cases.c.h | 11 +++++++++-- Python/generated_cases.c.h | 13 ++++++++++--- 5 files changed, 40 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index d2b837505f1d67..1c77730e417ef7 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -482,13 +482,18 @@ adaptive_counter_backoff(_Py_BackoffCounter counter) { /* Specialization Extensions */ /* callbacks for an external specialization */ + +struct _PyBinopSpecializationDescr; + typedef int (*binaryopguardfunc)(PyObject *lhs, PyObject *rhs); -typedef PyObject *(*binaryopactionfunc)(PyObject *lhs, PyObject *rhs); +typedef PyObject* (*binaryopactionfunc)(PyObject *lhs, PyObject *rhs); +typedef void (*binaryopfreefunc)(struct _PyBinopSpecializationDescr *descr); typedef struct _PyBinopSpecializationDescr { int oparg; binaryopguardfunc guard; binaryopactionfunc action; + binaryopfreefunc free; } _PyBinaryOpSpecializationDescr; /* Comparison bit masks. */ diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index c6c41e200a7ba1..a5fe2ce58964a5 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -3024,6 +3024,14 @@ array_subscr_action(PyObject *lhs, PyObject *rhs) return array_subscr(lhs, rhs); } +static void +array_subscr_free(_PyBinaryOpSpecializationDescr* descr) +{ + if (descr != NULL) { + PyMem_Free(descr); + } +} + static int array_binop_specialize(PyObject *v, PyObject *w, int oparg, _PyBinaryOpSpecializationDescr **descr) @@ -3048,6 +3056,7 @@ array_binop_specialize(PyObject *v, PyObject *w, int oparg, .oparg = oparg, .guard = array_subscr_guard, .action = array_subscr_action, + .free = array_subscr_free, }; return 1; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d48a41250c0568..a54b31f2ef8cd8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -805,7 +805,12 @@ dummy_func( assert(d->guard); int res = d->guard(left_o, right_o); ERROR_IF(res < 0); - DEOPT_IF(res == 0); + if (res == 0) { + if (d->free) { + d->free(d); + } + DEOPT_IF(true); + } } pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index dc8201b5330608..334cb6f650ecfa 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1166,8 +1166,15 @@ JUMP_TO_ERROR(); } if (res == 0) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); + if (d->free) { + _PyFrame_SetStackPointer(frame, stack_pointer); + d->free(d); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 577a7b661f33fe..efa6167b796de3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -292,9 +292,16 @@ JUMP_TO_LABEL(error); } if (res == 0) { - UPDATE_MISS_STATS(BINARY_OP); - assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); - JUMP_TO_PREDICTED(BINARY_OP); + if (d->free) { + _PyFrame_SetStackPointer(frame, stack_pointer); + d->free(d); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + if (true) { + UPDATE_MISS_STATS(BINARY_OP); + assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); + JUMP_TO_PREDICTED(BINARY_OP); + } } } /* Skip -4 cache entry */ From f42d42f37fa27b9ca39814d2d855c1783369c25d Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 12:26:26 +0100 Subject: [PATCH 12/21] review comments --- Modules/arraymodule.c | 1 + Python/specialize.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index a5fe2ce58964a5..4d2ff32cabe467 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -3324,6 +3324,7 @@ array_modexec(PyObject *m) if (PyModule_Add(m, "typecodes", typecodes) < 0) { return -1; } + return 0; } diff --git a/Python/specialize.c b/Python/specialize.c index e92e9ca57ef7b8..2801f3e9e79a81 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2572,8 +2572,9 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, } } - if (Py_TYPE(lhs)->tp_binop_specialize != NULL) { - int ret = Py_TYPE(lhs)->tp_binop_specialize(lhs, rhs, oparg, descr); + PyTypeObject *lhs_type = Py_TYPE(lhs); + if (lhs_type->tp_binop_specialize != NULL) { + int ret = lhs_type->tp_binop_specialize(lhs, rhs, oparg, descr); if (ret < 0) { return -1; } @@ -2581,8 +2582,8 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, if (*descr == NULL) { PyErr_Format( PyExc_ValueError, - "tp_binop_specialize of '%.200s' returned 1 with *descr == NULL", - Py_TYPE(lhs)->tp_name); + "tp_binop_specialize of '%T' returned 1 with *descr == NULL", + lhs_type->tp_name); return -1; } (*descr)->oparg = oparg; From 509b27fed862c854a0e05c48a1033f2733f7a096 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 5 May 2025 12:39:08 +0100 Subject: [PATCH 13/21] whitespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Include/cpython/object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index ccbb831a18a4ad..0cdb4a801d378b 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -145,7 +145,7 @@ typedef Py_ssize_t printfunc; /* Specialize a binary by setting the descriptor pointer */ struct _PyBinopSpecializationDescr; -typedef int(*binop_specialize_func)(PyObject *v, PyObject *w, int oparg, +typedef int (*binop_specialize_func)(PyObject *v, PyObject *w, int oparg, struct _PyBinopSpecializationDescr **descr); // If this structure is modified, Doc/includes/typestruct.h should be updated From 5c1ed68e089ac16ec311d724e8a50177e91b7a0b Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 12:43:14 +0100 Subject: [PATCH 14/21] fix error --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 2801f3e9e79a81..6b485646ba8551 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2583,7 +2583,7 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, PyErr_Format( PyExc_ValueError, "tp_binop_specialize of '%T' returned 1 with *descr == NULL", - lhs_type->tp_name); + lhs); return -1; } (*descr)->oparg = oparg; From fd24d7b8bddd377a85df269ee66c25f384471ebe Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 12:49:46 +0100 Subject: [PATCH 15/21] remove unused --- Include/object.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Include/object.h b/Include/object.h index 47a33cf9632fd0..8cc83abb8574e3 100644 --- a/Include/object.h +++ b/Include/object.h @@ -348,8 +348,6 @@ typedef int (*objobjproc)(PyObject *, PyObject *); typedef int (*visitproc)(PyObject *, void *); typedef int (*traverseproc)(PyObject *, visitproc, void *); -struct _PyBinaryOpSpecializationDescr; -typedef int (*binopspecfunc)(PyObject *lhs, PyObject *rhs, int oparg, struct _PyBinaryOpSpecializationDescr* descr); typedef void (*freefunc)(void *); typedef void (*destructor)(PyObject *); From b15ad6111b4d387bd63291929a7307ae3d516307 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 13:15:36 +0100 Subject: [PATCH 16/21] use Py_ARRAY_LENGTH --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 6b485646ba8551..edcb6766b48fb2 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2561,7 +2561,7 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, _PyBinaryOpSpecializationDescr **descr) { typedef _PyBinaryOpSpecializationDescr descr_type; - size_t size = sizeof(binaryop_extend_builtins)/sizeof(descr_type); + size_t size = Py_ARRAY_LENGTH(binaryop_extend_builtins); for (size_t i = 0; i < size; i++) { descr_type *d = (descr_type *)&binaryop_extend_builtins[i]; assert(d != NULL); From 81a300866d4ee89575097d5b329f40f7ca40de38 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 13:16:59 +0100 Subject: [PATCH 17/21] fix deopt --- Include/internal/pycore_opcode_metadata.h | 1 - Include/internal/pycore_uop_metadata.h | 4 --- Python/bytecodes.c | 3 +++ Python/executor_cases.c.h | 32 +---------------------- Python/generated_cases.c.h | 5 ++++ Python/optimizer_cases.c.h | 4 +-- 6 files changed, 10 insertions(+), 39 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index f5604a047840f6..fbb696e1755590 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1333,7 +1333,6 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, - [BINARY_OP_EXTEND] = { .nuops = 2, .uops = { { _GUARD_BINARY_OP_EXTEND, 4, 1 }, { _BINARY_OP_EXTEND, 4, 1 } } }, [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 3, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_MULTIPLY_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_MULTIPLY_INT, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index dce8f3ac4acf75..922d8a6cadf08c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -94,7 +94,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -423,7 +422,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GET_ITER] = "_GET_ITER", [_GET_LEN] = "_GET_LEN", [_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER", - [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND", [_GUARD_CALLABLE_STR_1] = "_GUARD_CALLABLE_STR_1", [_GUARD_CALLABLE_TUPLE_1] = "_GUARD_CALLABLE_TUPLE_1", [_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1", @@ -760,8 +758,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _BINARY_OP_INPLACE_ADD_UNICODE: return 2; - case _GUARD_BINARY_OP_EXTEND: - return 0; case _BINARY_OP_EXTEND: return 2; case _BINARY_SLICE: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a54b31f2ef8cd8..cc47e57175d727 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -809,6 +809,9 @@ dummy_func( if (d->free) { d->free(d); } + _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1); + write_ptr(cache->external_cache, NULL); + this_instr->op.code = BINARY_OP; DEOPT_IF(true); } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 334cb6f650ecfa..662e050c5c478a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1147,37 +1147,7 @@ break; } - case _GUARD_BINARY_OP_EXTEND: { - _PyStackRef right; - _PyStackRef left; - right = stack_pointer[-1]; - left = stack_pointer[-2]; - PyObject *descr = (PyObject *)CURRENT_OPERAND0(); - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr*)descr; - assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); - assert(d); - assert(d->guard); - _PyFrame_SetStackPointer(frame, stack_pointer); - int res = d->guard(left_o, right_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (res < 0) { - JUMP_TO_ERROR(); - } - if (res == 0) { - if (d->free) { - _PyFrame_SetStackPointer(frame, stack_pointer); - d->free(d); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - if (true) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - } - break; - } + /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ case _BINARY_OP_EXTEND: { _PyStackRef right; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index efa6167b796de3..072951d2a5fcf6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -297,6 +297,11 @@ d->free(d); stack_pointer = _PyFrame_GetStackPointer(frame); } + _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(this_instr+1); + _PyFrame_SetStackPointer(frame, stack_pointer); + write_ptr(cache->external_cache, NULL); + stack_pointer = _PyFrame_GetStackPointer(frame); + this_instr->op.code = BINARY_OP; if (true) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3f91f7eefc7bb0..8e8b2ecfa5b0cf 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -554,9 +554,7 @@ break; } - case _GUARD_BINARY_OP_EXTEND: { - break; - } + /* _GUARD_BINARY_OP_EXTEND is not a viable micro-op for tier 2 */ case _BINARY_OP_EXTEND: { JitOptSymbol *res; From 0190ecff0b0d55383c1c1c330ba3d963c5e92773 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 14:01:26 +0100 Subject: [PATCH 18/21] 'experimental' comment --- Include/cpython/object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 0cdb4a801d378b..86575df53bde3d 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -238,7 +238,7 @@ struct _typeobject { /* bitset of which type-watchers care about this type */ unsigned char tp_watched; - /* callback that may specialize BINARY_OP */ + /* callback that may specialize BINARY_OP -- Experimental API */ binop_specialize_func tp_binop_specialize; /* Number of tp_version_tag values used. From 78f05767c3ce312a4b9b937d9c45bd8e3bf9ef61 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 15:13:07 +0100 Subject: [PATCH 19/21] credits --- Include/cpython/object.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 86575df53bde3d..fbbaccc9a06734 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -238,7 +238,11 @@ struct _typeobject { /* bitset of which type-watchers care about this type */ unsigned char tp_watched; - /* callback that may specialize BINARY_OP -- Experimental API */ + /* callback that may specialize BINARY_OP + * this is an experimental API based on the ideas in the paper + * Cross Module Quickening - The Curious Case of C Extensions + * by Felix Berlakovich and Stefan Brunthaler. + */ binop_specialize_func tp_binop_specialize; /* Number of tp_version_tag values used. From 90fb993189836e64acc1578c545400263055e297 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 May 2025 15:18:40 +0100 Subject: [PATCH 20/21] comment on commutativity --- Python/specialize.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index edcb6766b48fb2..fe4a65ee5f8079 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2560,6 +2560,11 @@ static int binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg, _PyBinaryOpSpecializationDescr **descr) { + /* We are currently using this only for NB_SUBSCR, which is not + * commutative. Will need to revisit this function when we use + * this for operators which are. + */ + typedef _PyBinaryOpSpecializationDescr descr_type; size_t size = Py_ARRAY_LENGTH(binaryop_extend_builtins); for (size_t i = 0; i < size; i++) { From 454bfc54b6cd69b3d30ca7eacfadd3071d4af601 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 5 May 2025 15:20:09 +0100 Subject: [PATCH 21/21] Apply suggestions from code review Co-authored-by: Michael Droettboom --- Include/cpython/object.h | 2 +- .../2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index fbbaccc9a06734..818fc7d05605ff 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -143,7 +143,7 @@ typedef struct { * backwards-compatibility */ typedef Py_ssize_t printfunc; -/* Specialize a binary by setting the descriptor pointer */ +/* Specialize a binary op by setting the descriptor pointer */ struct _PyBinopSpecializationDescr; typedef int (*binop_specialize_func)(PyObject *v, PyObject *w, int oparg, struct _PyBinopSpecializationDescr **descr); diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst index 8ab8c9b0d5ea3f..a391ce16339c26 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-19-32-47.gh-issue-133395.VhWWEP.rst @@ -1,2 +1,2 @@ -Add option for extension modules to specialised ``BINARY_OP`` instructions. +Add option for extension modules to specialize ``BINARY_OP`` instructions. Applied to ``array`` objects.