8000 Merge branch main into main-slp · stackless-dev/stackless@9ab9a2f · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Feb 13, 2025. It is now read-only.

Commit 9ab9a2f

Browse files
author
Anselm Kruis
committed
Merge branch main into main-slp
2 parents 0feca7a + 91234a1 commit 9ab9a2f

File tree

13 files changed

+285
-16
lines changed

13 files changed

+285
-16
lines changed

Doc/whatsnew/3.8.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,10 @@ Optimizations
860860
methods up to 20--50%. (Contributed by Serhiy Storchaka in :issue:`23867`,
861861
:issue:`35582` and :issue:`36127`.)
862862

863+
* ``LOAD_GLOBAL`` instruction now uses new "per opcode cache" mechanism.
864+
It is about 40% faster now. (Contributed by Yury Selivanov and Inada Naoki in
865+
:issue:`26219`.)
866+
863867

864868
Build and C API Changes
865869
=======================

Include/code.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ typedef uint16_t _Py_CODEUNIT;
1717
# define _Py_OPARG(word) ((word) >> 8)
1818
#endif
1919

20+
typedef struct _PyOpcache _PyOpcache;
21+
2022
/* Bytecode object */
2123
typedef struct {
2224
PyObject_HEAD
@@ -49,6 +51,21 @@ typedef struct {
4951
Type is a void* to keep the format private in codeobject.c to force
5052
people to go through the proper APIs. */
5153
void *co_extra;
54+
55+
/* Per opcodes just-in-time cache
56+
*
57+
* To reduce cache size, we use indirect mapping from opcode index to
58+
* cache object:
59+
* cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
60+
*/
61+
62+
// co_opcache_map is indexed by (next_instr - first_instr).
63+
// * 0 means there is no cache for this opcode.
64+
// * n > 0 means there is cache in co_opcache[n-1].
65+
unsigned char *co_opcache_map;
66+
_PyOpcache *co_opcache;
67+
int co_opcache_flag; // used to determine when create a cache.
68+
unsigned char co_opcache_size; // length of co_opcache.
5269
} PyCodeObject;
5370

5471
/* Masks for co_flags above */

Include/internal/pycore_ceval.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ PyAPI_FUNC(void) _PyEval_SignalAsyncExc(
3131
PyAPI_FUNC(void) _PyEval_ReInitThreads(
3232
_PyRuntimeState *runtime);
3333

34+
/* Private function */
35+
void _PyEval_Fini(void);
36+
3437
#ifdef __cplusplus
3538
}
3639
#endif

Include/internal/pycore_code.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef Py_INTERNAL_CODE_H
2+
#define Py_INTERNAL_CODE_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
typedef struct {
8+
PyObject *ptr; /* Cached pointer (borrowed reference) */
9+
uint64_t globals_ver; /* ma_version of global dict */
10+
uint64_t builtins_ver; /* ma_version of builtin dict */
11+
} _PyOpcache_LoadGlobal;
12+
13+
struct _PyOpcache {
14+
union {
15+
_PyOpcache_LoadGlobal lg;
16+
} u;
17+
char optimized;
18+
};
19+
20+
/* Private API */
21+
int _PyCode_InitOpcache(PyCodeObject *co);
22+
23+
24+
#ifdef __cplusplus
25+
}
26+
#endif
27+
#endif /* !Py_INTERNAL_CODE_H */

Lib/test/test_dict_version.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,14 @@ def test_setitem_same_value(self):
8080

8181
# setting a key to the same value with dict.__setitem__
8282
# must change the version
83-
self.check_version_changed(d, d.__setitem__, 'key', value)
83+
self.check_version_dont_change(d, d.__setitem__, 'key', value)
8484

8585
# setting a key to the same value with dict.update
8686
# must change the version
87-
self.check_version_changed(d, d.update, key=value)
87+
self.check_version_dont_change(d, d.update, key=value)
8888

8989
d2 = self.new_dict(key=value)
90-
self.check_version_changed(d, d.update, d2)
90+
self.check_version_dont_change(d, d.update, d2)
9191

9292
def test_setitem_equal(self):
9393
class AlwaysEqual:

Makefile.pre.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,7 @@ PYTHON_HEADERS= \
11221122
$(srcdir)/Include/internal/pycore_accu.h \
11231123
$(srcdir)/Include/internal/pycore_atomic.h \
11241124
$(srcdir)/Include/internal/pycore_ceval.h \
1125+
$(srcdir)/Include/internal/pycore_code.h \
11251126
$(srcdir)/Include/internal/pycore_condvar.h \
11261127
$(srcdir)/Include/internal/pycore_context.h \
11271128
$(srcdir)/Include/internal/pycore_fileutils.h \
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Implemented per opcode cache mechanism and ``LOAD_GLOBAL`` instruction use
2+
it. ``LOAD_GLOBAL`` is now about 40% faster. Contributed by Yury Selivanov,
3+
and Inada Naoki.

Objects/codeobject.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
#include "Python.h"
44
#include "code.h"
5+
#include "opcode.h"
56
#include "structmember.h"
7+
#include "pycore_code.h"
68
#include "pycore_pystate.h"
79
#include "pycore_tupleobject.h"
810
#include "clinic/codeobject.c.h"
@@ -233,9 +235,56 @@ PyCode_New(int argcount, int posonlyargcount, int kwonlyargcount,
233235
co->co_zombieframe = NULL;
234236
co->co_weakreflist = NULL;
235237
co->co_extra = NULL;
238+
239+
co->co_opcache_map = NULL;
240+
co->co_opcache = NULL;
241+
co->co_opcache_flag = 0;
242+
co->co_opcache_size = 0;
236243
return co;
237244
}
238245

246+
int
247+
_PyCode_InitOpcache(PyCodeObject *co)
248+
{
249+
Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
250+
co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
251+
if (co->co_opcache_map == NULL) {
252+
return -1;
253+
}
254+
255+
_Py_CODEUNIT *opcodes = (_Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
256+
Py_ssize_t opts = 0;
257+
258+
for (Py_ssize_t i = 0; i < co_size;) {
259+
unsigned char opcode = _Py_OPCODE(opcodes[i]);
260+
i++; // 'i' is now aligned to (next_instr - first_instr)
261+
262+
// TODO: LOAD_METHOD, LOAD_ATTR
263+
if (opcode == LOAD_GLOBAL) {
264+
co->co_opcache_map[i] = ++opts;
265+
if (opts > 254) {
266+
break;
267+
}
268+
}
269+
}
270+
271+
if (opts) {
272+
co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
273+
if (co->co_opcache == NULL) {
274+
PyMem_FREE(co->co_opcache_map);
275+
return -1;
276+
}
277+
}
278+
else {
279+
PyMem_FREE(co->co_opcache_map);
280+
co->co_opcache_map = NULL;
281+
co->co_opcache = NULL;
282+
}
283+
284+
co->co_opcache_size = opts;
285+
return 0;
286+
}
287+
239288
PyCodeObject *
240289
PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
241290
{
@@ -458,6 +507,15 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
458507
static void
459508
code_dealloc(PyCodeObject *co)
460509
{
510+
if (co->co_opcache != NULL) {
511+
PyMem_FREE(co->co_opcache);
512+
}
513+
if (co->co_opcache_map != NULL) {
514+
PyMem_FREE(co->co_opcache_map);
515+
}
516+
co->co_opcache_flag = 0;
517+
co->co_opcache_size = 0;
518+
461519
if (co->co_extra != NULL) {
462520
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
463521
_PyCodeObjectExtra *co_extra = co->co_extra;
@@ -504,6 +562,13 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
504562
res += sizeof(_PyCodeObjectExtra) +
505563
(co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]);
506564
}
565+
if (co->co_opcache != NULL) {
566+
assert(co->co_opcache_map != NULL);
567+
// co_opcache_map
568+
res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
569+
// co_opcache
570+
res += co->co_opcache_size * sizeof(_PyOpcache);
571+
}
507572
return PyLong_FromSsize_t(res);
508573
}
509574

Objects/dictobject.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,20 +1080,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
10801080
return 0;
10811081
}
10821082

1083-
if (_PyDict_HasSplitTable(mp)) {
1084-
mp->ma_values[ix] = value;
1085-
if (old_value == NULL) {
1086-
/* pending state */
1087-
assert(ix == mp->ma_used);
1088-
mp->ma_used++;
1083+
if (old_value != value) {
1084+
if (_PyDict_HasSplitTable(mp)) {
1085+
mp->ma_values[ix] = value;
1086+
if (old_value == NULL) {
1087+
/* pending state */
1088+
assert(ix == mp->ma_used);
1089+
mp->ma_used++;
1090+
}
10891091
}
1092+
else {
1093+
assert(old_value != NULL);
1094+
DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1095+
}
1096+
mp->ma_version_tag = DICT_NEXT_VERSION();
10901097
}
1091-
else {
1092-
assert(old_value != NULL);
1093-
DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1094-
}
1095-
1096-
mp->ma_version_tag = DICT_NEXT_VERSION();
10971098
Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
10981099
ASSERT_CONSISTENT(mp);
10991100
Py_DECREF(key);

PCbuild/pythoncore.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@
164164
<ClInclude Include="..\Include\import.h" />
165165
<ClInclude Include="..\Include\internal\pycore_accu.h" />
166166
<ClInclude Include="..\Include\internal\pycore_atomic.h" />
167+
<ClInclude Include="..\Include\internal\pycore_code.h" />
167168
<ClInclude Include="..\Include\internal\pycore_ceval.h" />
168169
<ClInclude Include="..\Include\internal\pycore_condvar.h" />
169170
<ClInclude Include="..\Include\internal\pycore_context.h" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@
192192
<ClInclude Include="..\Include\internal\pycore_atomic.h">
193193
<Filter>Include</Filter>
194194
</ClInclude>
195+
<ClInclude Include="..\Include\internal\pycore_code.h">
196+
<Filter>Include</Filter>
197+
</ClInclude>
195198
<ClInclude Include="..\Include\internal\pycore_ceval.h">
196199
<Filter>Include</Filter>
197200
</ClInclude>

0 commit comments

Comments
 (0)
0