8000 py/qstr: Add support for sorted qstr pools. · micropython/micropython@64c79a5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 64c79a5

Browse files
jimmodpgeorge
authored andcommitted
py/qstr: Add support for sorted qstr pools.
This provides a significant performance boost for qstr_find_strn, which is called a lot during parsing and loading of .mpy files, as well as interning of string objects (which happens in most string methods that return new strings). Also adds comments to explain the "static" qstrs. These are part of the .mpy ABI and avoid needing to duplicate string data for QSTRs known to already be in the firmware. The static pool isn't currently sorted, but in the future we could either split the static pool into the sorted regions, or in the next .mpy version just sort them. Based on initial work done by @amirgon in #6896. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
1 parent e910533 commit 64c79a5

File tree

5 files changed

+200
-54
lines changed

5 files changed

+200
-54
lines changed

py/makeqstrdata.py

Lines changed: 84 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@
5252
codepoint2name[ord("|")] = "pipe"
5353
codepoint2name[ord("~")] = "tilde"
5454

55-
# static qstrs, should be sorted
55+
# static qstrs, these must maintain a specific order for .mpy compatibility
56+
# See QSTR_LAST_STATIC at the top of py/persistentcode.c
5657

5758
static_qstr_list = [
5859
"",
@@ -222,6 +223,71 @@
222223
"zip",
223224
]
224225

226+
# Additional QSTRs that must have index <255 because they are stored in
227+
# `mp_binary_op_method_name` and `mp_unary_op_method_name` (see py/objtype.c).
228+
# These are not part of the .mpy compatibility list, but we place them in the
229+
# fixed unsorted pool (i.e. QDEF0) to ensure their indices are small.
230+
operator_qstr_list = {
231+
"__bool__",
232+
"__pos__",
233+
"__neg__",
234+
"__invert__",
235+
"__abs__",
236+
"__float__",
237+
"__complex__",
238+
"__sizeof__",
239+
"__lt__",
240+
"__gt__",
241+
"__eq__",
242+
"__le__",
243+
"__ge__",
244+
"__ne__",
245+
"__contains__",
246+
"__iadd__",
247+
"__isub__",
248+
"__imul__",
249+
"__imatmul__",
250+
"__ifloordiv__",
251+
"__itruediv__",
252+
"__imod__",
253+
"__ipow__",
254+
"__ior__",
255+
"__ixor__",
256+
"__iand__",
257+
"__ilshift__",
258+
"__irshift__",
259+
"__add__",
260+
"__sub__",
261+
"__mul__",
262+
"__matmul__",
263+
"__floordiv__",
264+
"__truediv__",
265+
"__mod__",
266+
"__divmod__",
267+
"__pow__",
268+
"__or__",
269+
"__xor__",
270+
"__and__",
271+
"__lshift__",
272+
"__rshift__",
273+
"__radd__",
274+
"__rsub__",
275+
"__rmul__",
276+
"__rmatmul__",
277+
"__rfloordiv__",
278+
"__rtruediv__",
279+
"__rmod__",
280+
"__rpow__",
281+
"__ror__",
282+
"__rxor__",
283+
"__rand__",
284+
"__rlshift__",
285+
"__rrshift__",
286+
"__get__",
287+
"__set__",
288+
"__delete__",
289+
}
290+
225291

226292
# this must match the equivalent function in qstr.c
227293
def compute_hash(qstr, bytes_hash):
@@ -244,22 +310,13 @@ def esc_char(m):
244310
return re.sub(r"[^A-Za-z0-9_]", esc_char, qst)
245311

246312

313+
static_qstr_list_ident = list(map(qstr_escape, static_qstr_list))
314+
315+
247316
def parse_input_headers(infiles):
248317
qcfgs = {}
249318
qstrs = {}
250319

251-
# add static qstrs
252-
for qstr in static_qstr_list:
253-
# work out the corresponding qstr name
254-
ident = qstr_escape(qstr)
255-
256-
# don't add duplicates
257-< F438 /span>
assert ident not in qstrs
258-
259-
# add the qstr to the list, with order number to retain original order in file
260-
order = len(qstrs) - 300000
261-
qstrs[ident] = (order, ident, qstr)
262-
263320
# read the qstrs in from the input files
264321
for infile in infiles:
265322
with open(infile, "rt") as f:
@@ -294,22 +351,12 @@ def parse_input_headers(infiles):
294351
ident = qstr_escape(qstr)
295352

296353
# don't add duplicates
354+
if ident in static_qstr_list_ident:
355+
continue
297356
if ident in qstrs:
298357
continue
299358

300-
# add the qstr to the list, with order number to retain original order in file
301-
order = len(qstrs)
302-
# but put special method names like __add__ at the top of list, so
303-
# that their id's fit into a byte
304-
if ident == "":
305-
# Sort empty qstr above all still
306-
order = -200000
307-
elif ident == "__dir__":
308-
# Put __dir__ after empty qstr for builtin dir() to work
309-
order = -190000
310-
elif ident.startswith("__"):
311-
order -= 100000
312-
qstrs[ident] = (order, ident, qstr)
359+
qstrs[ident] = (ident, qstr)
313360

314361
if not qcfgs:
315362
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
@@ -348,12 +395,19 @@ def print_qstr_data(qcfgs, qstrs):
348395
print("")
349396

350397
# add NULL qstr with no hash or data
351-
print('QDEF(MP_QSTRnull, 0, 0, "")')
398+
print('QDEF0(MP_QSTRnull, 0, 0, "")')
399+
400+
# add static qstrs to the first unsorted pool
401+
for qstr in static_qstr_list:
402+
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
403+
print("QDEF0(MP_QSTR_%s, %s)" % (qstr_escape(qstr), qbytes))
352404

353-
# go through each qstr and print it out
354-
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
405+
# add remaining qstrs to the sorted (by value) pool (unless they're in
406+
# operator_qstr_list, in which case add them to the unsorted pool)
407+
for ident, qstr in sorted(qstrs.values(), key=lambda x: x[1]):
355408
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
356-
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
409+
pool = 0 if qstr in operator_qstr_list else 1
410+
print("QDEF%d(MP_QSTR_%s, %s)" % (pool, ident, qbytes))
357411

358412

359413
def do_work(infiles):

py/persistentcode.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@
4040

4141
#include "py/smallint.h"
4242

43+
// makeqstrdata.py has a fixed list of qstrs at the start that we can assume
44+
// are available with know indices on all MicroPython implementations, and
45+
// avoid needing to duplicate the string data in the .mpy file. This is the
46+
// last one in that list (anything with a qstr less than or equal to this is
47+
// assumed to be in the list).
4348
#define QSTR_LAST_STATIC MP_QSTR_zip
4449

4550
#if MICROPY_DYNAMIC_COMPILER

py/qstr.c

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@
3333
#include "py/gc.h"
3434
#include "py/runtime.h"
3535

36-
// NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings)
37-
// ultimately we will replace this with a static hash table of some kind
38-
3936
#if MICROPY_DEBUG_VERBOSE // print debugging info
4037
#define DEBUG_printf DEBUG_printf
4138
#else // don't print debugging info
@@ -74,38 +71,94 @@ size_t qstr_compute_hash(const byte *data, size_t len) {
7471
return hash;
7572
}
7673

74+
// The first pool is the static qstr table. The contents must remain stable as
75+
// it is part of the .mpy ABI. See the top of py/persistentcode.c and
76+
// static_qstr_list in makeqstrdata.py. This pool is unsorted (although in a
77+
// future .mpy version we could re-order them and make it sorted). It also
78+
// contains additional qstrs that must have IDs <256, see operator_qstr_list
79+
// in makeqstrdata.py.
80+
const qstr_hash_t mp_qstr_const_hashes_static[] = {
81+
#ifndef NO_QSTR
82+
#define QDEF0(id, hash, len, str) hash,
83+
#define QDEF1(id, hash, len, str)
84+
#include "genhdr/qstrdefs.generated.h"
85+
#undef QDEF0
86+
#undef QDEF1
87+
#endif
88+
};
89+
90+
const qstr_len_t mp_qstr_const_lengths_static[] = {
91+
#ifndef NO_QSTR
92+
#define QDEF0(id, hash, len, str) len,
93+
#define QDEF1(id, hash, len, str)
94+
#include "genhdr/qstrdefs.generated.h"
95+
#undef QDEF0
96+
#undef QDEF1
97+
#endif
98+
};
99+
100+
const qstr_pool_t mp_qstr_const_pool_static = {
101+
NULL, // no previous pool
102+
0, // no previous pool
103+
false, // is_sorted
104+
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
105+
MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
106+
(qstr_hash_t *)mp_qstr_const_hashes_static,
107+
(qstr_len_t *)mp_qstr_const_lengths_static,
108+
{
109+
#ifndef NO_QSTR
110+
#define QDEF0(id, hash, len, str) str,
111+
#define QDEF1(id, hash, len, str)
112+
#include "genhdr/qstrdefs.generated.h"
113+
#undef QDEF0
114+
#undef QDEF1
115+
#endif
116+
},
117+
};
118+
119+
// The next pool is the remainder of the qstrs defined in the firmware. This
120+
// is sorted.
77121
const qstr_hash_t mp_qstr_const_hashes[] = {
78122
#ifndef NO_QSTR
79-
#define QDEF(id, hash, len, str) hash,
123+
#define QDEF0(id, hash, len, str)
124+
#define QDEF1(id, hash, len, str) hash,
80125
#include "genhdr/qstrdefs.generated.h"
81-
#undef QDEF
126+
#undef QDEF0
127+
#undef QDEF1
82128
#endif
83129
};
84130

85131
const qstr_len_t mp_qstr_const_lengths[] = {
86132
#ifndef NO_QSTR
87-
#define QDEF(id, hash, len, str) len,
133+
#define QDEF0(id, hash, len, str)
134+
#define QDEF1(id, hash, len, str) len,
88135
#include "genhdr/qstrdefs.generated.h"
89-
#undef QDEF
136+
#undef QDEF0
137+
#undef QDEF1
90138
#endif
91139
};
92140

93141
const qstr_pool_t mp_qstr_const_pool = {
94-
NULL, // no previous pool
95-
0, // no previous pool
142+
&mp_qstr_const_pool_static,
143+
MP_QSTRnumber_of_static,
144+
true, // is_sorted
96145
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
97-
MP_QSTRnumber_of, // corresponds to number of strings in array just below
146+
MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
98147
(qstr_hash_t *)mp_qstr_const_hashes,
99148
(qstr_len_t *)mp_qstr_const_lengths,
100149
{
101 10000 150
#ifndef NO_QSTR
102-
#define QDEF(id, hash, len, str) str,
151+
#define QDEF0(id, hash, len, str)
152+
#define QDEF1(id, hash, len, str) str,
103153
#include "genhdr/qstrdefs.generated.h"
104-
#undef QDEF
154+
#undef QDEF0
155+
#undef QDEF1
105156
#endif
106157
},
107158
};
108159

160+
// If frozen code is enabled, then there is an additional, sorted, ROM pool
161+
// containing additional qstrs required by the frozen code.
109162
#ifdef MICROPY_QSTR_EXTRA_POOL
110163
extern const qstr_pool_t MICROPY_QSTR_EXTRA_POOL;
111164
#define CONST_POOL MICROPY_QSTR_EXTRA_POOL
@@ -185,7 +238,24 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
185238

186239
// search pools for the data
187240
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
188-
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
241+
size_t low = 0;
242+
size_t high = pool->len - 1;
243+
244+
// binary search inside the pool
245+
if (pool->is_sorted) {
246+
while (high - low > 1) {
247+
size_t mid = (low + high) / 2;
248+
int cmp = strncmp(str, pool->qstrs[mid], str_len);
249+
if (cmp <= 0) {
250+
high = mid;
251+
} else {
252+
low = mid;
253+
}
254+
}
255+
}
256+
257+
// sequential search for the remaining strings
258+
for (mp_uint_t at = low; at < high + 1; at++) {
189259
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
190260
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
191261
return pool->total_prev_len + at;
@@ -194,7 +264,7 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
194264
}
195265

196266
// not found; return null qstr
197-
return 0;
267+
return MP_QSTRnull;
198268
}
199269

200270
qstr qstr_from_str(const char *str) {

py/qstr.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,21 @@
3838
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
3939
enum {
4040
#ifndef NO_QSTR
41-
#define QDEF(id, hash, len, str) id,
41+
#define QDEF0(id, hash, len, str) id,
42+
#define QDEF1(id, hash, len, str)
4243
#include "genhdr/qstrdefs.generated.h"
43-
#undef QDEF
44+
#undef QDEF0
45+
#undef QDEF1
46+
#endif
47+
MP_QSTRnumber_of_static,
48+
MP_QSTRstart_of_main = MP_QSTRnumber_of_static - 1, // unused but shifts the enum counter back one
49+
50+
#ifndef NO_QSTR
51+
#define QDEF0(id, hash, len, str)
52+
#define QDEF1(id, hash, len, str) id,
53+
#include "genhdr/qstrdefs.generated.h"
54+
#undef QDEF0
55+
#undef QDEF1
4456
#endif
4557
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
4658
};
@@ -66,7 +78,8 @@ typedef uint16_t qstr_len_t;
6678

6779
typedef struct _qstr_pool_t {
6880
const struct _qstr_pool_t *prev;
69-
size_t total_prev_len;
81+
size_t total_prev_len : (8 * sizeof(size_t) - 1);
82+
size_t is_sorted : 1;
7083
size_t alloc;
7184
size_t len;
7285
qstr_hash_t *hashes;

0 commit comments

Comments
 (0)
0