8000 py: Faster qstr search. · micropython/micropython@8532c21 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8532c21

Browse files
committed
py: Faster qstr search.
Today qstr implementation scans strings sequntially. In cases there are many strings this can become very inefficient. This change improves qstr search performance by using binary search in sorted qstr pools, when possible. This change introduces an option to create a sorted string pool, which is then searched by a binary search instead of sequential search. qstr pool can be either "sorted" or "unsorted", whereas the unsorted is searched sequentally as today. Native modules (MP_ROM_QSTR) and frozen modules generate sorted pools. Currently runtime strings are unsorted. The constant string pools is split into two and a new pool is introduced, "special_const_pool". This is required because the first sequence of strings already requires special ordering therefore created unsorted, while the rest of the constants are generated sorted. qstr_find_strn searches strings in each pool. If the pool is sorted and larger than a threshold, it will be search using binary search instead of sequential search, significantly improving performance.
1 parent 902da05 commit 8532c21

File tree

5 files changed

+96
-16
lines changed

5 files changed

+96
-16
lines changed

py/makeqstrdata.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,14 +350,23 @@ def print_qstr_data(qcfgs, qstrs):
350350

351351
# add NULL qstr with no hash or data
352352
print(
353-
'QDEF(MP_QSTRnull, (const byte*)"%s%s" "")'
353+
'QDEF0(MP_QSTRnull, (const byte*)"%s%s" "")'
354354
% ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len)
355355
)
356356

357-
# go through each qstr and print it out
358-
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
357+
# split qstr values into two pools. static consts first.
358+
q0_values = [q for q in qstrs.values() if q[0] < 0]
359+
q1_values = [q for q in qstrs.values() if q[0] >= 0]
360+
361+
# go through each qstr in pool 0 and print it out. pool0 has special sort.
362+
for order, ident, qstr in sorted(q0_values, key=lambda x: x[0]):
363+
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
364+
print("QDEF0(MP_QSTR_%s, %s)" % (ident, qbytes))
365+
366+
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
367+
for order, ident, qstr in sorted(q1_values, key=lambda x: x[2]):
359368
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
360-
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
369+
print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes))
361370

362371

363372
def do_work(infiles):

py/qstr.c

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,38 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) {
100100
return hash;
101101
}
102102

103+
const qstr_pool_t mp_qstr_special_const_pool = {
104+
NULL, // no previous pool
105+
0, // no previous pool
106+
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
107+
MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below
108+
false, // special constant qstrs are not sorted
109+
{
110+
#ifndef NO_QSTR
111+
#define QDEF0(id, str) str,
112+
#define QDEF1(id, str)
113+
#include "genhdr/qstrdefs.generated.h"
114+
#undef QDEF0
115+
#undef QDEF1
116+
#endif
117+
(const byte *)"", // spacer for MP_QSTRspecial_const_number_of
118+
},
119+
};
120+
103121
const qstr_pool_t mp_qstr_const_pool = {
104-
NULL, // no previous pool
105-
0, // no previous pool
122+
(qstr_pool_t *)&mp_qstr_special_const_pool,
123+
MP_QSTRspecial_const_number_of + 1,
106124
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
107-
MP_QSTRnumber_of, // corresponds to number of strings in array just below
125+
MP_QSTRnumber_of -
126+
(MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below
127+
true, // constant qstrs are sorted
108128
{
109129
#ifndef NO_QSTR
110-
#define QDEF(id, str) str,
130+
#define QDEF0(id, str)
131+
#define QDEF1(id, str) str,
111132
#include "genhdr/qstrdefs.generated.h"
112-
#undef QDEF
133+
#undef QDEF0
134+
#undef QDEF1
113135
#endif
114136
},
115137
};
@@ -160,6 +182,7 @@ STATIC qstr qstr_add(const byte *q_ptr) {
160182
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
161183
pool->alloc = new_alloc;
162184
pool->len = 0;
185+
pool->sorted = false;
163186
MP_STATE_VM(last_pool) = pool;
164187
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
165188
}
@@ -171,14 +194,48 @@ STATIC qstr qstr_add(const byte *q_ptr) {
171194
return MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len - 1;
172195
}
173196

197+
#define MP_QSTR_SEARCH_THRESHOLD 10
198+
174199
qstr qstr_find_strn(const char *str, size_t str_len) {
175-
// work out hash of str
176200
mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len);
177201

178202
// search pools for the data
179203
for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
180-
for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
181-
if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && memcmp(Q_GET_DATA(*q), str, str_len) == 0) {
204+
size_t low = 0;
205+
size_t high = pool->len - 1;
206+
207+
// binary search inside the pool
208+
if (pool->sorted) {
209+
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
210+
size_t mid = (low + high + 1) / 2;
211+
const byte **q = pool->qstrs + mid;
212+
size_t len = Q_GET_LENGTH(*q);
213+
if (len > str_len) {
214+
len = str_len;
215+
}
216+
int cmp = memcmp(Q_GET_DATA(*q), str, str_len);
217+
if (cmp < 0) {
218+
low = mid;
219+
} else if (cmp > 0) {
220+
high = mid;
221+
} else {
222+
if (Q_GET_LENGTH(*q) < str_len) {
223+
low = mid;
224+
} else if (Q_GET_LENGTH(*q) > str_len) {
225+
high = mid;
226+
} else {
227+
return pool->total_prev_len + (q - pool->qstrs);
228+
}
229+
}
230+
}
231+
}
232+
233+
// sequential search for the remaining strings
234+
for (const byte **q = pool->qstrs + low; q != pool->qstrs + high + 1; q++) {
235+
if (*q &&
236+
Q_GET_HASH(*q) == str_hash &&
237+
Q_GET_LENGTH(*q) == str_len &&
238+
memcmp(Q_GET_DATA(*q), str, str_len) == 0) {
182239
return pool->total_prev_len + (q - pool->qstrs);
183240
}
184241
}

py/qstr.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,21 @@
3838
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
3939
enum {
4040
#ifndef NO_QSTR
41-
#define QDEF(id, str) id,
41+
42+
#define QDEF0(id, str) id,
43+
#define QDEF1(id, str)
4244
#include "genhdr/qstrdefs.generated.h"
43-
#undef QDEF
45+
#undef QDEF0
46+
#undef QDEF1
47+
48+
MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above
49+
50+
#define QDEF0(id, str)
51+
#define QDEF1(id, str) id,
52+
#include "genhdr/qstrdefs.generated.h"
53+
#undef QDEF0
54+
#undef QDEF1
55+
4456
#endif
4557
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
4658
};
@@ -52,6 +64,7 @@ typedef struct _qstr_pool_t {
5264
size_t total_prev_len;
5365
size_t alloc;
5466
size_t len;
67+
bool sorted;
5568
const byte *qstrs[];
5669
} qstr_pool_t;
5770

tools/makemanifest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def main():
326326
b'#include "py/emitglue.h"\n'
327327
b"extern const qstr_pool_t mp_qstr_const_pool;\n"
328328
b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n"
329-
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n"
329+
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, false, 0\n"
330330
b"};\n"
331331
b'const char mp_frozen_mpy_names[1] = {"\\0"};\n'
332332
b"const mp_raw_code_t *const mp_frozen_mpy_content[1] = {NULL};\n"

tools/mpy-tool.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ def freeze_mpy(base_qstrs, raw_codes):
826826
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
827827
continue
828828
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str)
829-
new = sorted(new.values(), key=lambda x: x[0])
829+
new = sorted(new.values(), key=lambda x: x[2])
830830

831831
print('#include "py/mpconfig.h"')
832832
print('#include "py/objint.h"')
@@ -890,6 +890,7 @@ def freeze_mpy(base_qstrs, raw_codes):
890890
print(" MP_QSTRnumber_of, // previous pool size")
891891
print(" %u, // allocated entries" % qstr_pool_alloc)
892892
print(" %u, // used entries" % len(new))
893+
print(" true, // entries are sorted")
893894
print(" {")
894895
for _, _, qstr in new:
895896
print(

0 commit comments

Comments
 (0)
0