8000 bpo-37358: Use vectorcall for functools.partial (GH-14284) · python/cpython@ed184c0 · GitHub
[go: up one dir, main page]

Skip to content

Commit ed184c0

Browse files
jdemeyermiss-islington
authored andcommitted
bpo-37358: Use vectorcall for functools.partial (GH-14284)
https://bugs.python.org/issue37358
1 parent dc3f99f commit ed184c0

File tree

2 files changed

+99
-66
lines changed

2 files changed

+99
-66
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimized ``functools.partial`` by using vectorcall.

Modules/_functoolsmodule.c

Lines changed: 98 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@ typedef struct {
1818
PyObject *fn;
1919
PyObject *args;
2020
PyObject *kw;
21-
PyObject *dict;
21+
PyObject *dict; /* __dict__ */
2222
PyObject *weakreflist; /* List of weak references */
23-
int use_fastcall;
23+
vectorcallfunc vectorcall;
2424
} partialobject;
2525

2626
static PyTypeObject partial_type;
2727

28+
static void partial_setvectorcall(partialobject *pto);
29+
2830
static PyObject *
2931
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
3032
{
@@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
107109
return NULL;
108110
}
109111

110-
pto->use_fastcall = (_PyVectorcall_Function(func) != NULL);
111-
112+
partial_setvectorcall(pto);
112113
return (PyObject *)pto;
113114
}
114115

@@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
126127
Py_TYPE(pto)->tp_free(pto);
127128
}
128129

130+
131+
/* Merging keyword arguments using the vectorcall convention is messy, so
132+
* if we would need to do that, we stop using vectorcall and fall back
133+
* to using partial_call() instead. */
134+
_Py_NO_INLINE static PyObject *
135+
partial_vectorcall_fallback(partialobject *pto, PyObject *const *args,
136+
size_t nargsf, PyObject *kwnames)
137+
{
138+
pto->vectorcall = NULL;
139+
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
140+
return _PyObject_MakeTpCall((PyObject *)pto, args, nargs, kwnames);
141+
}
142+
129143
static PyObject *
130-
partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs,
131-
PyObject *kwargs)
144+
partial_vectorcall(partialobject *pto, PyObject *const *args,
145+
size_t nargsf, PyObject *kwnames)
132146
{
133-
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
134-
PyObject *ret;
135-
PyObject **stack, **stack_buf = NULL;
136-
Py_ssize_t nargs2, pto_nargs;
147+
/* pto->kw is mutable, so need to check every time */
148+
if (PyDict_GET_SIZE(pto->kw)) {
149+
return partial_vectorcall_fallback(pto, args, nargsf, kwnames);
150+
}
151+
152+
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
153+
Py_ssize_t nargs_total = nargs;
154+
if (kwnames != NULL) {
155+
nargs_total += PyTuple_GET_SIZE(kwnames);
156+
}
157+
158+
PyObject **pto_args = _PyTuple_ITEMS(pto->args);
159+
Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
137160

138-
pto_nargs = PyTuple_GET_SIZE(pto->args);
139-
nargs2 = pto_nargs + nargs;
161+
/* Fast path if we're called without arguments */
162+
if (nargs_total == 0) {
163+
return _PyObject_Vectorcall(pto->fn, pto_args, pto_nargs, NULL);
164+
}
140165

141-
if (pto_nargs == 0) {
142-
stack = args;
166+
/* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
167+
* positional argument */
168+
if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
169+
PyObject **newargs = (PyObject **)args - 1;
170+
PyObject *tmp = newargs[0];
171+
newargs[0] = pto_args[0];
172+
PyObject *ret = _PyObject_Vectorcall(pto->fn, newargs, nargs + 1, kwnames);
173+
newargs[0] = tmp;
174+
return ret;
143175
}
144-
else if (nargs == 0) {
145-
stack = _PyTuple_ITEMS(pto->args);
176+
177+
Py_ssize_t newnargs_total = pto_nargs + nargs_total;
178+
179+
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
180+
PyObject *ret;
181+
PyObject **stack;
182+
183+
if (newnargs_total <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
184+
stack = small_stack;
146185
}
147186
else {
148-
if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
149-
stack = small_stack;
150-
}
151-
else {
152-
stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *));
153-
if (stack_buf == NULL) {
154-
PyErr_NoMemory();
155-
return NULL;
156-
}
157-
stack = stack_buf;
187+
stack = PyMem_Malloc(newnargs_total * sizeof(PyObject *));
188+
if (stack == NULL) {
189+
PyErr_NoMemory();
190+
return NULL;
158191
}
159-
160-
/* use borrowed references */
161-
memcpy(stack,
162-
_PyTuple_ITEMS(pto->args),
163-
pto_nargs * sizeof(PyObject*));
164-
memcpy(&stack[pto_nargs],
165-
args,
166-
nargs * sizeof(PyObject*));
167192
}
168193

169-
ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs);
170-
PyMem_Free(stack_buf);
194+
/* Copy to new stack, using borrowed references */
195+
memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
196+
memcpy(stack + pto_nargs, args, nargs_total * sizeof(PyObject*));
197+
198+
ret = _PyObject_Vectorcall(pto->fn, stack, pto_nargs + nargs, kwnames);
199+
if (stack != small_stack) {
200+
PyMem_Free(stack);
201+
}
171202
return ret;
172203
}
173204

174-
static PyObject *
175-
partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs)
205+
/* Set pto->vectorcall depending on the parameters of the partial object */
206+
static void
207+
partial_setvectorcall(partialobject *pto)
176208
{
177-
PyObject *ret, *args2;
178-
179-
/* Note: tupleconcat() is optimized for empty tuples */
180-
args2 = PySequence_Concat(pto->args, args);
181-
if (args2 == NULL) {
182-
return NULL;
209+
if (_PyVectorcall_Function(pto->fn) == NULL) {
210+
/* Don't use vectorcall if the underlying function doesn't support it */
211+
pto->vectorcall = NULL;
212+
}
213+
/* We could have a special case if there are no arguments,
214+
* but that is unlikely (why use partial without arguments?),
215+
* so we don't optimize that */
216+
else {
217+
pto->vectorcall = (vectorcallfunc)partial_vectorcall;
183218
}
184-
assert(PyTuple_Check(args2));
185-
186-
ret = PyObject_Call(pto->fn, args2, kwargs);
187-
Py_DECREF(args2);
188-
return ret;
189219
}
190220

221+
191222
static PyObject *
192223
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
193224
{
194-
PyObject *kwargs2, *res;
195-
196-
assert (PyCallable_Check(pto->fn));
197-
assert (PyTuple_Check(pto->args));
198-
assert (PyDict_Check(pto->kw));
225+
assert(PyCallable_Check(pto->fn));
226+
assert(PyTuple_Check(pto->args));
227+
assert(PyDict_Check(pto->kw));
199228

229+
/* Merge keywords */
230+
PyObject *kwargs2;
200231
if (PyDict_GET_SIZE(pto->kw) == 0) {
201232
/* kwargs can be NULL */
202233
kwargs2 = kwargs;
@@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
219250
}
220251
}
221252

222-
223-
if (pto->use_fastcall) {
224-
res = partial_fastcall(pto,
225-
_PyTuple_ITEMS(args),
226-
PyTuple_GET_SIZE(args),
227-
kwargs2);
228-
}
229-
else {
230-
res = partial_call_impl(pto, args, kwargs2);
253+
/* Merge positional arguments */
254+
/* Note: tupleconcat() is optimized for empty tuples */
255+
PyObject *args2 = PySequence_Concat(pto->args, args);
256+
if (args2 == NULL) {
257+
Py_XDECREF(kwargs2);
258+
return NULL;
231259
}
260+
261+
PyObject *res = PyObject_Call(pto->fn, args2, kwargs2);
262+
Py_DECREF(args2);
232263
Py_XDECREF(kwargs2);
233264
return res;
234265
}
@@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
365396
Py_INCREF(dict);
366397

367398
Py_INCREF(fn);
368-
pto->use_fastcall = (_PyVectorcall_Function(fn) != NULL);
369399
Py_SETREF(pto->fn, fn);
370400
Py_SETREF(pto->args, fnargs);
371401
Py_SETREF(pto->kw, kw);
372402
Py_XSETREF(pto->dict, dict);
403+
partial_setvectorcall(pto);
373404
Py_RETURN_NONE;
374405
}
375406

@@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
386417
0, /* tp_itemsize */
387418
/* methods */
388419
(destructor)partial_dealloc, /* tp_dealloc */
389-
0, /* tp_vectorcall_offset */
420+
offsetof(partialobject, vectorcall),/* tp_vectorcall_offset */
390421
0, /* tp_getattr */
391422
0, /* tp_setattr */
392423
0, /* tp_as_async */
@@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
401432
PyObject_GenericSetAttr, /* tp_setattro */
402433
0, /* tp_as_buffer */
403434
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
404-
Py_TPFLAGS_BASETYPE, /* tp_flags */
435+
Py_TPFLAGS_BASETYPE |
436+
_Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */
405437
partial_doc, /* tp_doc */
406438
(traverseproc)partial_traverse, /* tp_traverse */
407439
0, /* tp_clear */

0 commit comments

Comments
 (0)
0