@@ -18,13 +18,15 @@ typedef struct {
18
18
PyObject * fn ;
19
19
PyObject * args ;
20
20
PyObject * kw ;
21
- PyObject * dict ;
21
+ PyObject * dict ; /* __dict__ */
22
22
PyObject * weakreflist ; /* List of weak references */
23
- int use_fastcall ;
23
+ vectorcallfunc vectorcall ;
24
24
} partialobject ;
25
25
26
26
static PyTypeObject partial_type ;
27
27
28
+ static void partial_setvectorcall (partialobject * pto );
29
+
28
30
static PyObject *
29
31
partial_new (PyTypeObject * type , PyObject * args , PyObject * kw )
30
32
{
@@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
107
109
return NULL ;
108
110
}
109
111
110
- pto -> use_fastcall = (_PyVectorcall_Function (func ) != NULL );
111
-
112
+ partial_setvectorcall (pto );
112
113
return (PyObject * )pto ;
113
114
}
114
115
@@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
126
127
Py_TYPE (pto )-> tp_free (pto );
127
128
}
128
129
130
+
131
+ /* Merging keyword arguments using the vectorcall convention is messy, so
132
+ * if we would need to do that, we stop using vectorcall and fall back
133
+ * to using partial_call() instead. */
134
+ _Py_NO_INLINE static PyObject *
135
+ partial_vectorcall_fallback (partialobject * pto , PyObject * const * args ,
136
+ size_t nargsf , PyObject * kwnames )
137
+ {
138
+ pto -> vectorcall = NULL ;
139
+ Py_ssize_t nargs = PyVectorcall_NARGS (nargsf );
140
+ return _PyObject_MakeTpCall ((PyObject * )pto , args , nargs , kwnames );
141
+ }
142
+
129
143
static PyObject *
130
- partial_fastcall (partialobject * pto , PyObject * * args , Py_ssize_t nargs ,
131
- PyObject * kwargs )
144
+ partial_vectorcall (partialobject * pto , PyObject * const * args ,
145
+ size_t nargsf , PyObject * kwnames )
132
146
{
133
- PyObject * small_stack [_PY_FASTCALL_SMALL_STACK ];
134
- PyObject * ret ;
135
- PyObject * * stack , * * stack_buf = NULL ;
136
- Py_ssize_t nargs2 , pto_nargs ;
147
+ /* pto->kw is mutable, so need to check every time */
148
+ if (PyDict_GET_SIZE (pto -> kw )) {
149
+ return partial_vectorcall_fallback (pto , args , nargsf , kwnames );
150
+ }
151
+
152
+ Py_ssize_t nargs = PyVectorcall_NARGS (nargsf );
153
+ Py_ssize_t nargs_total = nargs ;
154
+ if (kwnames != NULL ) {
155
+ nargs_total += PyTuple_GET_SIZE (kwnames );
156
+ }
157
+
158
+ PyObject * * pto_args = _PyTuple_ITEMS (pto -> args );
159
+ Py_ssize_t pto_nargs = PyTuple_GET_SIZE (pto -> args );
137
160
138
- pto_nargs = PyTuple_GET_SIZE (pto -> args );
139
- nargs2 = pto_nargs + nargs ;
161
+ /* Fast path if we're called without arguments */
162
+ if (nargs_total == 0 ) {
163
+ return _PyObject_Vectorcall (pto -> fn , pto_args , pto_nargs , NULL );
164
+ }
140
165
141
- if (pto_nargs == 0 ) {
142
- stack = args ;
166
+ /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
167
+ * positional argument */
168
+ if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET )) {
169
+ PyObject * * newargs = (PyObject * * )args - 1 ;
170
+ PyObject * tmp = newargs [0 ];
171
+ newargs [0 ] = pto_args [0 ];
172
+ PyObject * ret = _PyObject_Vectorcall (pto -> fn , newargs , nargs + 1 , kwnames );
173
+ newargs [0 ] = tmp ;
174
+ return ret ;
143
175
}
144
- else if (nargs == 0 ) {
145
- stack = _PyTuple_ITEMS (pto -> args );
176
+
177
+ Py_ssize_t newnargs_total = pto_nargs + nargs_total ;
178
+
179
+ PyObject * small_stack [_PY_FASTCALL_SMALL_STACK ];
180
+ PyObject * ret ;
181
+ PyObject * * stack ;
182
+
183
+ if (newnargs_total <= (Py_ssize_t )Py_ARRAY_LENGTH (small_stack )) {
184
+ stack = small_stack ;
146
185
}
147
186
else {
148
- if (nargs2 <= (Py_ssize_t )Py_ARRAY_LENGTH (small_stack )) {
149
- stack = small_stack ;
150
- }
151
- else {
152
- stack_buf = PyMem_Malloc (nargs2 * sizeof (PyObject * ));
153
- if (stack_buf == NULL ) {
154
- PyErr_NoMemory ();
155
- return NULL ;
156
- }
157
- stack = stack_buf ;
187
+ stack = PyMem_Malloc (newnargs_total * sizeof (PyObject * ));
188
+ if (stack == NULL ) {
189
+ PyErr_NoMemory ();
190
+ return NULL ;
158
191
}
159
-
160
- /* use borrowed references */
161
- memcpy (stack ,
162
- _PyTuple_ITEMS (pto -> args ),
163
- pto_nargs * sizeof (PyObject * ));
164
- memcpy (& stack [pto_nargs ],
165
- args ,
166
- nargs * sizeof (PyObject * ));
167
192
}
168
193
169
- ret = _PyObject_FastCallDict (pto -> fn , stack , nargs2 , kwargs );
170
- PyMem_Free (stack_buf );
194
+ /* Copy to new stack, using borrowed references */
195
+ memcpy (stack , pto_args , pto_nargs * sizeof (PyObject * ));
196
+ memcpy (stack + pto_nargs , args , nargs_total * sizeof (PyObject * ));
197
+
198
+ ret = _PyObject_Vectorcall (pto -> fn , stack , pto_nargs + nargs , kwnames );
199
+ if (stack != small_stack ) {
200
+ PyMem_Free (stack );
201
+ }
171
202
return ret ;
172
203
}
173
204
174
- static PyObject *
175
- partial_call_impl (partialobject * pto , PyObject * args , PyObject * kwargs )
205
+ /* Set pto->vectorcall depending on the parameters of the partial object */
206
+ static void
207
+ partial_setvectorcall (partialobject * pto )
176
208
{
177
- PyObject * ret , * args2 ;
178
-
179
- /* Note: tupleconcat() is optimized for empty tuples */
180
- args2 = PySequence_Concat (pto -> args , args );
181
- if (args2 == NULL ) {
182
- return NULL ;
209
+ if (_PyVectorcall_Function (pto -> fn ) == NULL ) {
210
+ /* Don't use vectorcall if the underlying function doesn't support it */
211
+ pto -> vectorcall = NULL ;
212
+ }
213
+ /* We could have a special case if there are no arguments,
214
+ * but that is unlikely (why use partial without arguments?),
215
+ * so we don't optimize that */
216
+ else {
217
+ pto -> vectorcall = (vectorcallfunc )partial_vectorcall ;
183
218
}
184
- assert (PyTuple_Check (args2 ));
185
-
186
- ret = PyObject_Call (pto -> fn , args2 , kwargs );
187
- Py_DECREF (args2 );
188
- return ret ;
189
219
}
190
220
221
+
191
222
static PyObject *
192
223
partial_call (partialobject * pto , PyObject * args , PyObject * kwargs )
193
224
{
194
- PyObject * kwargs2 , * res ;
195
-
196
- assert (PyCallable_Check (pto -> fn ));
197
- assert (PyTuple_Check (pto -> args ));
198
- assert (PyDict_Check (pto -> kw ));
225
+ assert (PyCallable_Check (pto -> fn ));
226
+ assert (PyTuple_Check (pto -> args ));
227
+ assert (PyDict_Check (pto -> kw ));
199
228
229
+ /* Merge keywords */
230
+ PyObject * kwargs2 ;
200
231
if (PyDict_GET_SIZE (pto -> kw ) == 0 ) {
201
232
/* kwargs can be NULL */
202
233
kwargs2 = kwargs ;
@@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
219
250
}
220
251
}
221
252
222
-
223
- if (pto -> use_fastcall ) {
224
- res = partial_fastcall (pto ,
225
- _PyTuple_ITEMS (args ),
226
- PyTuple_GET_SIZE (args ),
227
- kwargs2 );
228
- }
229
- else {
230
- res = partial_call_impl (pto , args , kwargs2 );
253
+ /* Merge positional arguments */
254
+ /* Note: tupleconcat() is optimized for empty tuples */
255
+ PyObject * args2 = PySequence_Concat (pto -> args , args );
256
+ if (args2 == NULL ) {
257
+ Py_XDECREF (kwargs2 );
258
+ return NULL ;
231
259
}
260
+
261
+ PyObject * res = PyObject_Call (pto -> fn , args2 , kwargs2 );
262
+ Py_DECREF (args2 );
232
263
Py_XDECREF (kwargs2 );
233
264
return res ;
234
265
}
@@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
365
396
Py_INCREF (dict );
366
397
367
398
Py_INCREF (fn );
368
- pto -> use_fastcall = (_PyVectorcall_Function (fn ) != NULL );
369
399
Py_SETREF (pto -> fn , fn );
370
400
Py_SETREF (pto -> args , fnargs );
371
401
Py_SETREF (pto -> kw , kw );
372
402
Py_XSETREF (pto -> dict , dict );
403
+ partial_setvectorcall (pto );
373
404
Py_RETURN_NONE ;
374
405
}
375
406
@@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
386
417
0 , /* tp_itemsize */
387
418
/* methods */
388
419
(destructor )partial_dealloc , /* tp_dealloc */
389
- 0 , /* tp_vectorcall_offset */
420
+ offsetof( partialobject , vectorcall ), /* tp_vectorcall_offset */
390
421
0 , /* tp_getattr */
391
422
0 , /* tp_setattr */
392
423
0 , /* tp_as_async */
@@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
401
432
PyObject_GenericSetAttr , /* tp_setattro */
402
433
0 , /* tp_as_buffer */
403
434
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
404
- Py_TPFLAGS_BASETYPE , /* tp_flags */
435
+ Py_TPFLAGS_BASETYPE |
436
+ _Py_TPFLAGS_HAVE_VECTORCALL , /* tp_flags */
405
437
partial_doc , /* tp_doc */
406
438
(traverseproc )partial_traverse , /* tp_traverse */
407
439
0 , /* tp_clear */
0 commit comments