From 12a80bd2670f83bb7549ef8954a9881248d8ca55 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Mon, 16 Aug 2021 16:28:47 +0200 Subject: [PATCH 1/6] bpo-44850: Speedup methodcaller via vectorcall. --- .../2021-08-16-17-52-26.bpo-44850.r8jx5u.rst | 2 + Modules/_operator.c | 59 ++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst diff --git a/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst b/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst new file mode 100644 index 00000000000000..09bb4b53860015 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst @@ -0,0 +1,2 @@ +Calls to ``operator.methodcaller`` are now 25-33% faster thanks to the use of +the vectorcall protocol. diff --git a/Modules/_operator.c b/Modules/_operator.c index f051513fc793a0..a2265821ee11aa 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1,5 +1,6 @@ #include "Python.h" #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "structmember.h" #include "clinic/_operator.c.h" typedef struct { @@ -1478,14 +1479,33 @@ typedef struct { PyObject *name; PyObject *args; PyObject *kwds; + PyObject **vectorcall_args; /* Borrowed references */ + PyObject *vectorcall_kwnames; + vectorcallfunc vectorcall; } methodcallerobject; +static PyObject * +methodcaller_vectorcall( + methodcallerobject *mc, PyObject *const *args, size_t nargsf, PyObject* kwnames) +{ + if (!_PyArg_CheckPositional("methodcaller", PyVectorcall_NARGS(nargsf), 1, 1) + || !_PyArg_NoKwnames("methodcaller", kwnames)) { + return NULL; + } + mc->vectorcall_args[0] = args[0]; + return PyObject_VectorcallMethod( + mc->name, mc->vectorcall_args, + (1 + PyTuple_GET_SIZE(mc->args)) | PY_VECTORCALL_ARGUMENTS_OFFSET, + mc->vectorcall_kwnames); +} + /* AC 3.5: variable number of arguments, not currently support by AC */ static PyObject * methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { methodcallerobject *mc; - PyObject *name; + PyObject *name, *key, *value; + Py_ssize_t nargs, i, ppos; if (PyTuple_GET_SIZE(args) < 1) { PyErr_SetString(PyExc_TypeError, "methodcaller needs at least " @@ -1521,6 +1541,32 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } + nargs = PyTuple_GET_SIZE(args) - 1; + mc->vectorcall_args = PyMem_Calloc( + 1 + nargs + (kwds ? PyDict_Size(kwds) : 0), + sizeof(PyObject *)); + if (!mc->vectorcall_args) { + return PyErr_NoMemory(); + } + /* The first item of vectorcall_args will be filled with obj. */ + memcpy(mc->vectorcall_args + 1, PySequence_Fast_ITEMS(mc->args), + nargs * sizeof(PyObject *)); + if (kwds) { + mc->vectorcall_kwnames = PySequence_Tuple(kwds); + if (!mc->vectorcall_kwnames) { + return NULL; + } + i = ppos = 0; + while (PyDict_Next(kwds, &ppos, &key, &value)) { + mc->vectorcall_args[1 + nargs + i] = value; + ++i; + } + } + else { + mc->vectorcall_kwnames = NULL; + } + mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; + PyObject_GC_Track(mc); return (PyObject *)mc; } @@ -1531,6 +1577,7 @@ methodcaller_clear(methodcallerobject *mc) Py_CLEAR(mc->name); Py_CLEAR(mc->args); Py_CLEAR(mc->kwds); + Py_CLEAR(mc->vectorcall_kwnames); return 0; } @@ -1540,6 +1587,7 @@ methodcaller_dealloc(methodcallerobject *mc) PyTypeObject *tp = Py_TYPE(mc); PyObject_GC_UnTrack(mc); (void)methodcaller_clear(mc); + PyMem_Free(mc->vectorcall_args); tp->tp_free(mc); Py_DECREF(tp); } @@ -1696,6 +1744,12 @@ static PyMethodDef methodcaller_methods[] = { reduce_doc}, {NULL} }; + +static PyMemberDef methodcaller_members[] = { + {"__vectorcalloffset__", T_PYSSIZET, offsetof(methodcallerobject, vectorcall), READONLY}, + {NULL} +}; + PyDoc_STRVAR(methodcaller_doc, "methodcaller(name, ...) --> methodcaller object\n\ \n\ @@ -1711,6 +1765,7 @@ static PyType_Slot methodcaller_type_slots[] = { {Py_tp_traverse, methodcaller_traverse}, {Py_tp_clear, methodcaller_clear}, {Py_tp_methods, methodcaller_methods}, + {Py_tp_members, methodcaller_members}, {Py_tp_new, methodcaller_new}, {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_repr, methodcaller_repr}, @@ -1722,7 +1777,7 @@ static PyType_Spec methodcaller_type_spec = { .basicsize = sizeof(methodcallerobject), .itemsize = 0, .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_IMMUTABLETYPE), + Py_TPFLAGS_HAVE_VECTORCALL | Py_TPFLAGS_IMMUTABLETYPE), .slots = methodcaller_type_slots, }; From 8be0aed5baa41fb3a5544ce2b8b8969b21bf74be Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 21 Jul 2023 13:22:34 +0200 Subject: [PATCH 2/6] optimize methodcaller construction --- .../2021-08-16-17-52-26.bpo-44850.r8jx5u.rst | 3 +- Modules/_operator.c | 74 +++++++------------ 2 files changed, 29 insertions(+), 48 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst b/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst index 09bb4b53860015..97aff42c521af5 100644 --- a/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst +++ b/Misc/NEWS.d/next/Library/2021-08-16-17-52-26.bpo-44850.r8jx5u.rst @@ -1,2 +1 @@ -Calls to ``operator.methodcaller`` are now 25-33% faster thanks to the use of -the vectorcall protocol. +Improve performance of ``operator.methodcaller`` by use of the the vectorcall protocol. Patch by Anthony Lee and Pieter Eendebak. diff --git a/Modules/_operator.c b/Modules/_operator.c index a548e26e520a1b..11e46007d6439f 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -2,6 +2,7 @@ #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_runtime.h" // _Py_ID() + #include "structmember.h" // PyMemberDef #include "clinic/_operator.c.h" @@ -1548,7 +1549,7 @@ static PyType_Spec attrgetter_type_spec = { typedef struct { PyObject_HEAD PyObject *name; - PyObject *args; + PyObject *xargs; // reference to arguments passed in constructor PyObject *kwds; PyObject **vectorcall_args; /* Borrowed references */ PyObject *vectorcall_kwnames; @@ -1566,7 +1567,7 @@ methodcaller_vectorcall( mc->vectorcall_args[0] = args[0]; return PyObject_VectorcallMethod( mc->name, mc->vectorcall_args, - (1 + PyTuple_GET_SIZE(mc->args)) | PY_VECTORCALL_ARGUMENTS_OFFSET, + (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } @@ -1576,7 +1577,6 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { methodcallerobject *mc; PyObject *name, *key, *value; - Py_ssize_t nargs, i, ppos; if (PyTuple_GET_SIZE(args) < 1) { PyErr_SetString(PyExc_TypeError, "methodcaller needs at least " @@ -1598,37 +1598,34 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } - name = PyTuple_GET_ITEM(args, 0); Py_INCREF(name); PyUnicode_InternInPlace(&name); mc->name = name; + mc->xargs = Py_XNewRef(args); // allows us to use borrowed references mc->kwds = Py_XNewRef(kwds); - mc->args = PyTuple_GetSlice(args, 1, PyTuple_GET_SIZE(args)); - if (mc->args == NULL) { - Py_DECREF(mc); - return NULL; - } - - nargs = PyTuple_GET_SIZE(args) - 1; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); mc->vectorcall_args = PyMem_Calloc( - 1 + nargs + (kwds ? PyDict_Size(kwds) : 0), + nargs + (kwds ? PyDict_Size(kwds) : 0), sizeof(PyObject *)); if (!mc->vectorcall_args) { return PyErr_NoMemory(); } - /* The first item of vectorcall_args will be filled with obj. */ - memcpy(mc->vectorcall_args + 1, PySequence_Fast_ITEMS(mc->args), + /* The first item of vectorcall_args will be filled with obj later */ + if (nargs>1) { + memcpy(mc->vectorcall_args, PySequence_Fast_ITEMS(args), nargs * sizeof(PyObject *)); + } if (kwds) { mc->vectorcall_kwnames = PySequence_Tuple(kwds); if (!mc->vectorcall_kwnames) { return NULL; } - i = ppos = 0; + Py_ssize_t i = 0; + Py_ssize_t ppos = 0; while (PyDict_Next(kwds, &ppos, &key, &value)) { - mc->vectorcall_args[1 + nargs + i] = value; + mc->vectorcall_args[ nargs + i] = value; ++i; } } @@ -1645,7 +1642,7 @@ static int methodcaller_clear(methodcallerobject *mc) { Py_CLEAR(mc->name); - Py_CLEAR(mc->args); + Py_CLEAR(mc->xargs); Py_CLEAR(mc->kwds); Py_CLEAR(mc->vectorcall_kwnames); return 0; @@ -1666,30 +1663,12 @@ static int methodcaller_traverse(methodcallerobject *mc, visitproc visit, void *arg) { Py_VISIT(mc->name); - Py_VISIT(mc->args); + Py_VISIT(mc->xargs); Py_VISIT(mc->kwds); Py_VISIT(Py_TYPE(mc)); return 0; } -static PyObject * -methodcaller_call(methodcallerobject *mc, PyObject *args, PyObject *kw) -{ - PyObject *method, *obj, *result; - - if (!_PyArg_NoKeywords("methodcaller", kw)) - return NULL; - if (!_PyArg_CheckPositional("methodcaller", PyTuple_GET_SIZE(args), 1, 1)) - return NULL; - obj = PyTuple_GET_ITEM(args, 0); - method = PyObject_GetAttr(obj, mc->name); - if (method == NULL) - return NULL; - result = PyObject_Call(method, mc->args, mc->kwds); - Py_DECREF(method); - return result; -} - static PyObject * methodcaller_repr(methodcallerobject *mc) { @@ -1703,7 +1682,7 @@ methodcaller_repr(methodcallerobject *mc) } numkwdargs = mc->kwds != NULL ? PyDict_GET_SIZE(mc->kwds) : 0; - numposargs = PyTuple_GET_SIZE(mc->args); + numposargs = PyTuple_GET_SIZE(mc->xargs) - 1; numtotalargs = numposargs + numkwdargs; if (numtotalargs == 0) { @@ -1719,7 +1698,7 @@ methodcaller_repr(methodcallerobject *mc) } for (i = 0; i < numposargs; ++i) { - PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->args, i)); + PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->xargs, i+1)); if (onerepr == NULL) goto done; PyTuple_SET_ITEM(argreprs, i, onerepr); @@ -1769,17 +1748,16 @@ methodcaller_repr(methodcallerobject *mc) static PyObject * methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) { - PyObject *newargs; if (!mc->kwds || PyDict_GET_SIZE(mc->kwds) == 0) { Py_ssize_t i; - Py_ssize_t callargcount = PyTuple_GET_SIZE(mc->args); - newargs = PyTuple_New(1 + callargcount); + Py_ssize_t newarg_size = PyTuple_GET_SIZE(mc->vectorcall_args); + PyObject * newargs = PyTuple_New(newarg_size); if (newargs == NULL) return NULL; PyTuple_SET_ITEM(newargs, 0, Py_NewRef(mc->name)); - for (i = 0; i < callargcount; ++i) { - PyObject *arg = PyTuple_GET_ITEM(mc->args, i); - PyTuple_SET_ITEM(newargs, i + 1, Py_NewRef(arg)); + for (i = 1; i < newarg_size; ++i) { + PyObject *arg = PyTuple_GET_ITEM(mc->xargs, i); + PyTuple_SET_ITEM(newargs, i, Py_NewRef(arg)); } return Py_BuildValue("ON", Py_TYPE(mc), newargs); } @@ -1797,7 +1775,12 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) constructor = PyObject_VectorcallDict(partial, newargs, 2, mc->kwds); Py_DECREF(partial); - return Py_BuildValue("NO", constructor, mc->args); + PyObject *args = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); + if (!args) { + Py_DECREF(constructor); + return NULL; + } + return Py_BuildValue("NO", constructor, args); } } @@ -1822,7 +1805,6 @@ r.name('date', foo=1)."); static PyType_Slot methodcaller_type_slots[] = { {Py_tp_doc, (void *)methodcaller_doc}, {Py_tp_dealloc, methodcaller_dealloc}, - {Py_tp_call, methodcaller_call}, {Py_tp_traverse, methodcaller_traverse}, {Py_tp_clear, methodcaller_clear}, {Py_tp_methods, methodcaller_methods}, From 83c76bbf03215087252ccf710c3a022db33f3c6f Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 21 Jul 2023 13:39:13 +0200 Subject: [PATCH 3/6] fix bug in methodcaller_reduce --- Modules/_operator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_operator.c b/Modules/_operator.c index 11e46007d6439f..78cd5135170d68 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1750,7 +1750,7 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) { if (!mc->kwds || PyDict_GET_SIZE(mc->kwds) == 0) { Py_ssize_t i; - Py_ssize_t newarg_size = PyTuple_GET_SIZE(mc->vectorcall_args); + Py_ssize_t newarg_size = PyTuple_GET_SIZE(mc->xargs); PyObject * newargs = PyTuple_New(newarg_size); if (newargs == NULL) return NULL; From 09e3ee329d799e22df8de6e7489caee97ac1ecbb Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 21 Jul 2023 13:53:46 +0200 Subject: [PATCH 4/6] restore methodcaller_call --- Modules/_operator.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Modules/_operator.c b/Modules/_operator.c index 78cd5135170d68..69389e8c5b4080 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1669,6 +1669,33 @@ methodcaller_traverse(methodcallerobject *mc, visitproc visit, void *arg) return 0; } +static PyObject * +methodcaller_call(methodcallerobject *mc, PyObject *args, PyObject *kw) +{ + PyObject *method, *obj, *result; + + if (!_PyArg_NoKeywords("methodcaller", kw)) + return NULL; + if (!_PyArg_CheckPositional("methodcaller", PyTuple_GET_SIZE(args), 1, 1)) + return NULL; + obj = PyTuple_GET_ITEM(args, 0); + method = PyObject_GetAttr(obj, mc->name); + if (method == NULL) + return NULL; + + + PyObject *cargs = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); + if (cargs == NULL) { + Py_DECREF(method); + return NULL; + } + + result = PyObject_Call(method, cargs, mc->kwds); + Py_DECREF(cargs); + Py_DECREF(method); + return result; +} + static PyObject * methodcaller_repr(methodcallerobject *mc) { @@ -1805,6 +1832,7 @@ r.name('date', foo=1)."); static PyType_Slot methodcaller_type_slots[] = { {Py_tp_doc, (void *)methodcaller_doc}, {Py_tp_dealloc, methodcaller_dealloc}, + {Py_tp_call, methodcaller_call}, {Py_tp_traverse, methodcaller_traverse}, {Py_tp_clear, methodcaller_clear}, {Py_tp_methods, methodcaller_methods}, From 5d715c52cc85425a807b208c26c2808a8744d4fc Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 23 Jul 2023 21:09:54 +0200 Subject: [PATCH 5/6] avoid looping over the dict twice --- Modules/_operator.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Modules/_operator.c b/Modules/_operator.c index 69389e8c5b4080..d4a6a93a7c333f 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1618,14 +1618,16 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) nargs * sizeof(PyObject *)); } if (kwds) { - mc->vectorcall_kwnames = PySequence_Tuple(kwds); + const Py_ssize_t nkwds = PyDict_Size(kwds); + + mc->vectorcall_kwnames = PyTuple_New(nkwds); if (!mc->vectorcall_kwnames) { return NULL; } - Py_ssize_t i = 0; - Py_ssize_t ppos = 0; + Py_ssize_t i = 0, ppos = 0; while (PyDict_Next(kwds, &ppos, &key, &value)) { - mc->vectorcall_args[ nargs + i] = value; + PyTuple_SET_ITEM(mc->vectorcall_kwnames, i, Py_NewRef(key)); + mc->vectorcall_args[nargs + i] = value; // borrowed reference ++i; } } From 18806741d5925a376a3d4244d12507df9954b47f Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 24 Jul 2023 21:45:08 +0200 Subject: [PATCH 6/6] convert methodcaller_clear to void return type --- Modules/_operator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_operator.c b/Modules/_operator.c index d4a6a93a7c333f..df1fba421aba41 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1640,14 +1640,13 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)mc; } -static int +static void methodcaller_clear(methodcallerobject *mc) { Py_CLEAR(mc->name); Py_CLEAR(mc->xargs); Py_CLEAR(mc->kwds); Py_CLEAR(mc->vectorcall_kwnames); - return 0; } static void