8000 py: Faster qstr search. · micropython/micropython@c661055 · GitHub
[go: up one dir, main page]

Skip to content

Commit c661055

Browse files
committed
py: Faster qstr search.
Today qstr implementation scans strings sequntially. In cases there are many strings this can become very inefficient. This change improves qstr search performance by using binary search in sorted qstr pools, when possible. This change introduces an option to create a sorted string pool, which is then searched by a binary search instead of sequential search. qstr pool can be either "sorted" or "unsorted", whereas the unsorted is searched sequentally as today. Native modules (MP_ROM_QSTR) and frozen modules generate sorted pools. Currently runtime strings are unsorted. The constant string pools is split into two and a new pool is introduced, "special_const_pool". This is required because the first sequence of strings already requires special ordering therefore created unsorted, while the rest of the constants are generated sorted. qstr_find_strn searches strings in each pool. If the pool is sorted and larger than a threshold, it will be search using binary search instead of sequential search, significantly improving performance.
1 parent ff9c708 commit c661055

File tree

5 files changed

+125
-23
lines changed

5 files changed

+125
-23
lines changed

py/makeqstrdata.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,21 @@ def print_qstr_data(qcfgs, qstrs):
347347
print("")
348348

349349
# add NULL qstr with no hash or data
350-
print('QDEF(MP_QSTRnull, 0, 0, "")')
350+
print('QDEF0(MP_QSTRnull, 0, 0, "")')
351351

352-
# go through each qstr and print it out
353-
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
352+
# split qstr values into two pools. static consts first.
353+
q0_values = [q for q in qstrs.values() if q[0] < 0]
354+
q1_values = [q for q in qstrs.values() if q[0] >= 0]
355+
356+
# go through each qstr in pool 0 and print it out. pool0 has special sort.
357+
for order, ident, qstr in sorted(q0_values, key=lambda x: x[0]):
358+
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
359+
print("QDEF0(MP_QSTR_%s, %s)" % (ident, qbytes))
360+
361+
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
362+
for order, ident, qstr in sorted(q1_values, key=lambda x: x[2]):
354363
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
355-
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
364+
print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes))
356365

357366

358367
def do_work(infiles):

py/qstr.c

Lines changed: 94 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -74,34 +74,82 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) {
7474
return hash;
7575
}
7676

77-
const qstr_hash_t mp_qstr_const_hashes[] = {
77+
const qstr_hash_t mp_qstr_const_hashes0[] = {
7878
#ifndef NO_QSTR
79-
#define QDEF(id, hash, len, str) hash,
79+
#define QDEF0(id, hash, len, str) hash,
80+
#define QDEF1(id, hash, len, str)
8081
#include "genhdr/qstrdefs.generated.h"
81-
#undef QDEF
82+
#undef QDEF0
83+
#undef QDEF1
8284
#endif
8385
};
8486

85-
const qstr_len_t mp_qstr_const_lengths[] = {
87+
const qstr_hash_t mp_qstr_const_hashes1[] = {
8688
#ifndef NO_QSTR
87-
#define QDEF(id, hash, len, str) len,
89+
#define QDEF0(id, hash, len, str)
90+
#define QDEF1(id, hash, len, str) hash,
8891
#include "genhdr/qstrdefs.generated.h"
89-
#undef QDEF
92+
#undef QDEF0
93+
#undef QDEF1
9094
#endif
9195
};
9296

97+
const qstr_len_t mp_qstr_const_lengths0[] = {
98+
#ifndef NO_QSTR
99+
#define QDEF0(id, hash, len, str) len,
100+
#define QDEF1(id, hash, len, str)
101+
#include "genhdr/qstrdefs.generated.h"
102+
#undef QDEF0
103+
#undef QDEF1
104+
#endif
105+
};
106+
107+
const qstr_len_t mp_qstr_const_lengths1[] = {
108+
#ifndef NO_QSTR
109+
#define QDEF0(id, hash, len, str)
110+
#define QDEF1(id, hash, len, str) len,
111+
#include "genhdr/qstrdefs.generated.h"
112+
#undef QDEF0
113+
#undef QDEF1
114+
#endif
115+
};
116+
117+
const qstr_pool_t mp_qstr_special_const_pool = {
118+
NULL, // no previous pool
119+
0, // no previous pool
120+
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
121+
MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below
122+
(qstr_hash_t *)mp_qstr_const_hashes0,
123+
(qstr_len_t *)mp_qstr_const_lengths0,
124+
false, // special constant qstrs are not sorted
125+
{
126+
#ifndef NO_QSTR
127+
#define QDEF0(id, hash, len, str) str,
128+
#define QDEF1(id, hash, len, str)
129+
#include "genhdr/qstrdefs.generated.h"
130+
#undef QDEF0
131+
#undef QDEF1
132+
#endif
133+
(const char *)"", // spacer for MP_QSTRspecial_const_number_of
134+
},
135+
};
136+
93137
const qstr_pool_t mp_qstr_const_pool = {
94-
NULL, // no previous pool
95-
0, // no previous pool
138+
(qstr_pool_t *)&mp_qstr_special_const_pool,
139+
MP_QSTRspecial_const_number_of + 1,
96140
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
97-
MP_QSTRnumber_of, // corresponds to number of strings in array just below
98-
(qstr_hash_t *)mp_qstr_const_hashes,
99-
(qstr_len_t *)mp_qstr_const_lengths,
141+
MP_QSTRnumber_of -
142+
(MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below
143+
(qstr_hash_t *)mp_qstr_const_hashes1,
144+
(qstr_len_t *)mp_qstr_const_lengths1,
145+
true, // constant qstrs are sorted
100146
{
101147
#ifndef NO_QSTR
102-
#define QDEF(id, hash, len, str) str,
148+
#define QDEF0(id, hash, len, str)
149+
#define QDEF1(id, hash, len, str) str,
103150
#include "genhdr/qstrdefs.generated.h"
104-
#undef QDEF
151+
#undef QDEF0
152+
#undef QDEF1
105153
#endif
106154
},
107155
};
@@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
164212
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
165213
pool->alloc = new_alloc;
166214
pool->len = 0;
215+
pool->sorted = false;
167216
MP_STATE_VM(last_pool) = pool;
168217
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
169218
}
@@ -179,13 +228,43 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
179228
return MP_STATE_VM(last_pool)->total_prev_len + at;
180229
}
181230

231+
#define MP_QSTR_SEARCH_THRESHOLD 10
232+
182233
qstr qstr_find_strn(const char *str, size_t str_len) {
183-
// work out hash of str
184234
mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len);
185235

186236
// search pools for the data
187237
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
188-
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
238+
size_t low = 0;
239+
size_t high = pool->len - 1;
240+
241+
// binary search inside the pool
242+
if (pool->sorted) {
243+
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
244+
size_t mid = (low + high + 1) / 2;
245+
size_t len = pool->lengths[mid];
246+
if (len > str_len) {
247+
len = str_len;
248+
}
249+
int cmp = memcmp(pool->qstrs[mid], str, str_len);
250+
if (cmp < 0) {
251+
low = mid;
252+
} else if (cmp > 0) {
253+
high = mid;
254+
} else {
255+
if (pool->lengths[mid] < str_len) {
256+
low = mid;
257+
} else if (pool->lengths[mid] > str_len) {
258+
high = mid;
259+
} else {
260+
return pool->total_prev_len + mid;
261+
}
262+
}
263+
}
264+
}
265+
266+
// sequential search for the remaining strings
267+
for (mp_uint_t at = low; at < high + 1; at++) {
189268
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
190269
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
191270
return pool->total_prev_len + at;

py/qstr.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,21 @@
3838
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
3939
enum {
4040
#ifndef NO_QSTR
41-
#define QDEF(id, hash, len, str) id,
41+
42+
#define QDEF0(id, hash, len, str) id,
43+
#define QDEF1(id, hash, len, str)
4244
#include "genhdr/qstrdefs.generated.h"
43-
#undef QDEF
45+
#undef QDEF0
46+
#undef QDEF1
47+
48+
MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above
49+
50+
#define QDEF0(id, hash, len, str)
51+
#define QDEF1(id, hash, len, str) id,
52+
#include "genhdr/qstrdefs.generated.h"
53+
#undef QDEF0
54+
#undef QDEF1
55+
4456
#endif
4557
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
4658
};
@@ -70,6 +82,7 @@ typedef struct _qstr_pool_t {
7082
size_t len;
7183
qstr_hash_t *hashes;
7284
qstr_len_t *lengths;
85+
bool sorted;
7386
const char *qstrs[];
7487
} qstr_pool_t;
7588

tools/makemanifest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ def main():
415415
b'#include "py/emitglue.h"\n'
416416
b"extern const qstr_pool_t mp_qstr_const_pool;\n"
417417
b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n"
418-
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n"
418+
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, false, 0\n"
419419
b"};\n"
420420
b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n'
421421
b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n"

tools/mpy-tool.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ def freeze_mpy(base_qstrs, raw_codes):
815815
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
816816
continue
817817
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
818-
new = sorted(new.values(), key=lambda x: x[0])
818+
new = sorted(new.values(), key=lambda x: x[2])
819819

820820
print('#include "py/mpconfig.h"')
821821
print('#include "py/objint.h"')
@@ -889,6 +889,7 @@ def freeze_mpy(base_qstrs, raw_codes):
889889
print(" %u, // used entries" % len(new))
890890
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
891891
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
892+
print(" true, // entries are sorted")
892893
print(" {")
893894
for _, _, qstr, qbytes in new:
894895
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))

0 commit comments

Comments
 (0)
0