8000 py/qstr: Sort qstrs by hash. · micropython/micropython@cc808fe · GitHub
[go: up one dir, main page]

Skip to content

Commit cc808fe

Browse files
committed
py/qstr: Sort qstrs by hash.
Instead of sorting by string, sort qstrs by (hash, len). This allows faster binary serach on qstr_find_strn, since it's faster to compare hashes than strings. A few strings needed to be moved to special string pool (QDEF0) because their qstr is assumed to be small (8 bit) on py/scope.c
1 parent 5808635 commit cc808fe

File tree

3 files changed

+21
-14
lines changed

3 files changed

+21
-14
lines changed

py/makeqstrdata.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,12 @@
6161
" ",
6262
"*",
6363
"/",
64+
"<dictcomp>",
65+
"<genexpr>",
66+
"<lambda>",
67+
"<listcomp>",
6468
"<module>",
69+
"<setcomp>",
6570
"_",
6671
"__call__",
6772
"__class__",
@@ -373,7 +378,7 @@ def print_qstr_data(qstrs):
373378
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
374379

375380
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
376-
for q in sorted(q1_values, key=lambda x: x.qstr):
381+
for q in sorted(q1_values, key=lambda x: (x.qhash, x.qlen)):
377382
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
378383

379384

py/qstr.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -242,23 +242,25 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
242242
if (pool->sorted) {
243243
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
244244
size_t mid = (low + high + 1) / 2;
245-
size_t len = pool->lengths[mid];
246-
if (len > str_len) {
247-
len = str_len;
248-
}
249-
int cmp = memcmp(pool->qstrs[mid], str, str_len);
245+
int cmp = pool->hashes[mid] - str_hash;
246+
if (cmp == 0) cmp = pool->lengths[mid] - str_len;
250247
if (cmp < 0) {
251248
low = mid;
252249
} else if (cmp > 0) {
253250
high = mid;
254251
} else {
255-
if (pool->lengths[mid] < str_len) {
256-
low = mid;
257-
} else if (pool->lengths[mid] > str_len) {
258-
high = mid;
259-
} else {
260-
return pool->total_prev_len + mid;
261-
}
252+
// avoid a rare (hash,len) collisions
253+
while (MP_UNLIKELY(
254+
pool->lengths[mid] != str_len ||
255+
memcmp(pool->qstrs[mid], str, str_len) != 0)) {
256+
mid++;
257+
if (mid > high ||
258+
pool->hashes[mid] != str_hash ||
259+
pool->lengths[mid] != str_len) {
260+
return 0;
261+
}
262+
}
263+
return pool->total_prev_len + mid;
262264
}
263265
}
264266
}

tools/mpy-tool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
12731273
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
12741274
continue
12751275
new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str)
1276-
new = sorted(new.values(), key=lambda x: x.qstr)
1276+
new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen))
12771277

12781278
print('#include "py/mpconfig.h"')
12791279
print('#include "py/objint.h"')

0 commit comments

Comments
 (0)
0