10000 py: Faster qstr search by amirgon · Pull Request #6896 · micropython/micropython · GitHub
[go: up one dir, main page]

Skip to content

py: Faster qstr search #6896

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 117 additions & 39 deletions py/makeqstrdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
codepoint2name[ord("|")] = "pipe"
codepoint2name[ord("~")] = "tilde"

# static qstrs, should be sorted
# static qstrs, unsorted.

static_qstr_list = [
"",
Expand All @@ -61,7 +61,12 @@
" ",
"*",
"/",
"<dictcomp>",
"<genexpr>",
"<lambda>",
"<listcomp>",
"<module>",
"<setcomp>",
"_",
"__call__",
"__class__",
Expand All @@ -84,6 +89,64 @@
"__repr__",
"__setitem__",
"__str__",
"__bool__",
"__pos__",
"__neg__",
"__invert__",
"__abs__",
"__float__",
"__complex__",
"__sizeof__",
"__lt__",
"__gt__",
"__eq__",
"__le__",
"__ge__",
"__ne__",
"__contains__",
"__iadd__",
"__isub__",
"__imul__",
"__imatmul__",
"__ifloordiv__",
"__itruediv__",
"__imod__",
"__ipow__",
"__ior__",
"__ixor__",
"__iand__",
"__ilshift__",
"__irshift__",
"__add__",
"__sub__",
"__mul__",
"__matmul__",
"__floordiv__",
"__truediv__",
"__mod__",
"__divmod__",
"__pow__",
"__or__",
"__xor__",
"__and__",
"__lshift__",
"__rshift__",
"__radd__",
"__rsub__",
"__rmul__",
"__rmatmul__",
"__rfloordiv__",
"__rtruediv__",
"__rmod__",
"__rpow__",
"__ror__",
"__rxor__",
"__rand__",
"__rlshift__",
"__rrshift__",
"__get__",
"__set__",
"__delete__",
"ArithmeticError",
"AssertionError",
"AttributeError",
Expand Down Expand Up @@ -257,7 +320,7 @@ def parse_input_headers(infiles):

# add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = Qstr(order, ident, qstr)

# read the qstrs in from the input files
for infile in infiles:
Expand Down Expand Up @@ -298,17 +361,7 @@ def parse_input_headers(infiles):

8000 # add the qstr to the list, with order number to retain original order in file
order = len(qstrs)
# but put special method names like __add__ at the top of list, so
# that their id's fit into a byte
if ident == "":
# Sort empty qstr above all still
order = -200000
elif ident == "__dir__":
# Put __dir__ after empty qstr for builtin dir() to work
order = -190000
elif ident.startswith("__"):
order -= 100000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = Qstr(order, ident, qstr)

if not qcfgs:
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
Expand All @@ -317,47 +370,72 @@ def parse_input_headers(infiles):
return qcfgs, qstrs


def escape_bytes(qstr, qbytes):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in qbytes)
class Qstr:
cfg_bytes_len = 0
cfg_bytes_hash = 0

def __init__(self, order, ident, qstr):
self.order = order
self.ident = ident
self.qstr = qstr

def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
qbytes = bytes_cons(qstr, "utf8")
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if qlen >= (1 << (8 * cfg_bytes_len)):
print("qstr is too long:", qstr)
assert False
qdata = escape_bytes(qstr, qbytes)
return '%d, %d, "%s"' % (qhash, qlen, qdata)
@property
def qbytes(self):
return bytes_cons(self.qstr, "utf8")

@property
def qlen(self):
if len(self.qbytes) >= (1 << (8 * Qstr.cfg_bytes_len)):
print("qstr is too long:", self.qstr)
assert False
return len(self.qbytes)

def print_qstr_data(qcfgs, qstrs):
# get config variables
cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
@property
def qhash(self):
return compute_hash(self.qbytes, Qstr.cfg_bytes_hash)

def _escape_bytes(self):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in self.qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return self.qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in self.qbytes)

@property
def qdata(self):
return self._escape_bytes()


def print_qstr_data(qstrs):
# print out the starter of the generated C header file
print("// This file was automatically generated by makeqstrdata.py")
print("")

# add NULL qstr with no hash or data
print('QDEF(MP_QSTRnull, 0, 0, "")')
print('QDEF0(MP_QSTRnull, 0, 0, "")')

# split qstr values into two pools. static consts first.
q0_values = [q for q in qstrs.values() if q.order < 0]
q1_values = [q for q in qstrs.values() if q.order >= 0]

# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
# go through each qstr in pool 0 and print it out. pool0 has special sort.
for q in sorted(q0_values, key=lambda x: x.qhash):
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))

# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
for q in sorted(q1_values, key=lambda x: x.qhash):
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))


def do_work(infiles):
qcfgs, qstrs = parse_input_headers(infiles)
print_qstr_data(qcfgs, qstrs)

# get config variables
Qstr.cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
Qstr.cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])

print_qstr_data(qstrs)


if __name__ == "__main__":
Expand Down
100 changes: 86 additions & 14 deletions py/qstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,34 +74,82 @@ size_t qstr_compute_hash(const byte *data, size_t len) {
return hash;
}

const qstr_hash_t mp_qstr_const_hashes[] = {
const qstr_hash_t mp_qstr_const_hashes0[] = {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) hash,
#define QDEF0(id, hash, len, str) hash,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_len_t mp_qstr_const_lengths[] = {
const qstr_hash_t mp_qstr_const_hashes1[] = {
#ifndef NO_QSTR
#define QDEF(id, 57D8 hash, len, str) len,
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) hash,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_len_t mp_qstr_const_lengths0[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) len,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_len_t mp_qstr_const_lengths1[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) len,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};

const qstr_pool_t mp_qstr_special_const_pool = {
NULL, // no previous pool
0, // no previous pool
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes0,
(qstr_len_t *)mp_qstr_const_lengths0,
true,
{
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) str,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
(const char *)"", // spacer for MP_QSTRspecial_const_number_of
},
};

const qstr_pool_t mp_qstr_const_pool = {
NULL, // no previous pool
0, // no previous pool
(qstr_pool_t *)&mp_qstr_special_const_pool,
MP_QSTRspecial_const_number_of + 1,
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRnumber_of, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes,
(qstr_len_t *)mp_qstr_const_lengths,
MP_QSTRnumber_of -
(MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes1,
(qstr_len_t *)mp_qstr_const_lengths1,
true, // constant qstrs are sorted
{
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) str,
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) str,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
},
};
Expand Down Expand Up @@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
pool->alloc = new_alloc;
pool->len = 0;
pool->sorted = false;
MP_STATE_VM(last_pool) = pool;
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
}
Expand All @@ -185,7 +234,30 @@ qstr qstr_find_strn(const char *str, size_t str_len) {

// search pools for the data
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
size_t low = 0;
size_t high = pool->len - 1;

// binary search inside the pool
if (pool->sorted) {
while (high - low > 1) {
size_t mid = (low + high) / 2;
int cmp = pool->hashes[mid] - str_hash;
if (cmp > 0) {
high = mid;
} else {
low = mid;
if (MP_UNLIKELY(cmp == 0)) {
while (low > 0 && pool->hashes[low - 1] == str_hash) {
low--;
}
break;
}
}
}
}

// sequential search for the remaining strings
for (mp_uint_t at = low; at < high + 1; at++) {
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
return pool->total_prev_len + at;
Expand Down
17 changes: 15 additions & 2 deletions py/qstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,21 @@
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
enum {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) id,

#define QDEF0(id, hash, len, str) id,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1

MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above

#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) id,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1

#endif
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
};
Expand Down Expand Up @@ -71,6 +83,7 @@ typedef struct _qstr_pool_t {
size_t len;
qstr_hash_t *hashes;
qstr_len_t *lengths;
bool sorted;
const char *qstrs[];
} qstr_pool_t;

Expand Down
2 changes: 1 addition & 1 deletion tools/makemanifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def main():
b'#include "py/emitglue.h"\n'
b"extern const qstr_pool_t mp_qstr_const_pool;\n"
b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n"
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n"
b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0, NULL, NULL, false, NULL\n"
b"};\n"
b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n'
b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n"
Expand Down
Loading
0