Merge branch 'qstr_bsearch' into update_micropython_v1.19

This commit is contained in:
Amir Gonnen 2022-07-03 00:22:59 +03:00
commit b912b45c15
4 changed files with 185 additions and 61 deletions

View File

@ -61,7 +61,12 @@ static_qstr_list = [
" ", " ",
"*", "*",
"/", "/",
"<dictcomp>",
"<genexpr>",
"<lambda>",
"<listcomp>",
"<module>", "<module>",
"<setcomp>",
"_", "_",
"__call__", "__call__",
"__class__", "__class__",
@ -223,6 +228,8 @@ static_qstr_list = [
] ]
# this must match the equivalent function in qstr.c # this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash): def compute_hash(qstr, bytes_hash):
hash = 5381 hash = 5381
for b in qstr: for b in qstr:
@ -257,7 +264,7 @@ def parse_input_headers(infiles):
# add the qstr to the list, with order number to retain original order in file # add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000 order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr) qstrs[ident] = Qstr(order, ident, qstr)
# read the qstrs in from the input files # read the qstrs in from the input files
for infile in infiles: for infile in infiles:
@ -308,7 +315,7 @@ def parse_input_headers(infiles):
order = -190000 order = -190000
elif ident.startswith("__"): elif ident.startswith("__"):
order -= 100000 order -= 100000
qstrs[ident] = (order, ident, qstr) qstrs[ident] = Qstr(order, ident, qstr)
if not qcfgs: if not qcfgs:
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
@ -317,47 +324,72 @@ def parse_input_headers(infiles):
return qcfgs, qstrs return qcfgs, qstrs
def escape_bytes(qstr, qbytes): class Qstr:
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): cfg_bytes_len = 0
# qstr is all printable ASCII so render it as-is (for easier debugging) cfg_bytes_hash = 0
return qstr
else: def __init__(self, order, ident, qstr):
# qstr contains non-printable codes so render entire thing as hex pairs self.order = order
return "".join(("\\x%02x" % b) for b in qbytes) self.ident = ident
self.qstr = qstr
@property
def qbytes(self):
return bytes_cons(self.qstr, "utf8")
@property
def qlen(self):
if len(self.qbytes) >= (1 << (8 * Qstr.cfg_bytes_len)):
print("qstr is too long:", self.qstr)
assert False
return len(self.qbytes)
@property
def qhash(self):
return compute_hash(self.qbytes, Qstr.cfg_bytes_hash)
def _escape_bytes(self):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in self.qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return self.qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in self.qbytes)
@property
def qdata(self):
return self._escape_bytes()
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): def print_qstr_data(qstrs):
qbytes = bytes_cons(qstr, "utf8")
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if qlen >= (1 << (8 * cfg_bytes_len)):
print("qstr is too long:", qstr)
assert False
qdata = escape_bytes(qstr, qbytes)
return '%d, %d, "%s"' % (qhash, qlen, qdata)
def print_qstr_data(qcfgs, qstrs):
# get config variables
cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
# print out the starter of the generated C header file # print out the starter of the generated C header file
print("// This file was automatically generated by makeqstrdata.py") print("// This file was automatically generated by makeqstrdata.py")
print("") print("")
# add NULL qstr with no hash or data # add NULL qstr with no hash or data
print('QDEF(MP_QSTRnull, 0, 0, "")') print('QDEF0(MP_QSTRnull, 0, 0, "")')
# go through each qstr and print it out # split qstr values into two pools. static consts first.
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): q0_values = [q for q in qstrs.values() if q.order < 0]
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) q1_values = [q for q in qstrs.values() if q.order >= 0]
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
# go through each qstr in pool 0 and print it out. pool0 has special sort.
for q in sorted(q0_values, key=lambda x: x.order):
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
for q in sorted(q1_values, key=lambda x: (x.qhash, x.qlen)):
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
def do_work(infiles): def do_work(infiles):
qcfgs, qstrs = parse_input_headers(infiles) qcfgs, qstrs = parse_input_headers(infiles)
print_qstr_data(qcfgs, qstrs)
# get config variables
Qstr.cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
Qstr.cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
print_qstr_data(qstrs)
if __name__ == "__main__": if __name__ == "__main__":

103
py/qstr.c
View File

@ -74,34 +74,82 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) {
return hash; return hash;
} }
const qstr_hash_t mp_qstr_const_hashes[] = { const qstr_hash_t mp_qstr_const_hashes0[] = {
#ifndef NO_QSTR #ifndef NO_QSTR
#define QDEF(id, hash, len, str) hash, #define QDEF0(id, hash, len, str) hash,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h" #include "genhdr/qstrdefs.generated.h"
#undef QDEF #undef QDEF0
#undef QDEF1
#endif #endif
}; };
const qstr_len_t mp_qstr_const_lengths[] = { const qstr_hash_t mp_qstr_const_hashes1[] = {
#ifndef NO_QSTR #ifndef NO_QSTR
#define QDEF(id, hash, len, str) len, #define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) hash,
#include "genhdr/qstrdefs.generated.h" #include "genhdr/qstrdefs.generated.h"
#undef QDEF #undef QDEF0
#undef QDEF1
#endif #endif
}; };
const qstr_len_t mp_qstr_const_lengths0[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) len,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};
const qstr_len_t mp_qstr_const_lengths1[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) len,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};
const qstr_pool_t mp_qstr_special_const_pool = {
NULL, // no previous pool
0, // no previous pool
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes0,
(qstr_len_t *)mp_qstr_const_lengths0,
false, // special constant qstrs are not sorted
{
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) str,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
(const char *)"", // spacer for MP_QSTRspecial_const_number_of
},
};
const qstr_pool_t mp_qstr_const_pool = { const qstr_pool_t mp_qstr_const_pool = {
NULL, // no previous pool (qstr_pool_t *)&mp_qstr_special_const_pool,
0, // no previous pool MP_QSTRspecial_const_number_of + 1,
MICROPY_ALLOC_QSTR_ENTRIES_INIT, MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRnumber_of, // corresponds to number of strings in array just below MP_QSTRnumber_of -
(qstr_hash_t *)mp_qstr_const_hashes, (MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below
(qstr_len_t *)mp_qstr_const_lengths, (qstr_hash_t *)mp_qstr_const_hashes1,
(qstr_len_t *)mp_qstr_const_lengths1,
true, // constant qstrs are sorted
{ {
#ifndef NO_QSTR #ifndef NO_QSTR
#define QDEF(id, hash, len, str) str, #define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) str,
#include "genhdr/qstrdefs.generated.h" #include "genhdr/qstrdefs.generated.h"
#undef QDEF #undef QDEF0
#undef QDEF1
#endif #endif
}, },
}; };
@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
pool->alloc = new_alloc; pool->alloc = new_alloc;
pool->len = 0; pool->len = 0;
pool->sorted = false;
MP_STATE_VM(last_pool) = pool; MP_STATE_VM(last_pool) = pool;
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc); DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
} }
@ -179,13 +228,37 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
return MP_STATE_VM(last_pool)->total_prev_len + at; return MP_STATE_VM(last_pool)->total_prev_len + at;
} }
#define MP_QSTR_SEARCH_THRESHOLD 10
qstr qstr_find_strn(const char *str, size_t str_len) { qstr qstr_find_strn(const char *str, size_t str_len) {
// work out hash of str
mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len); mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len);
// search pools for the data // search pools for the data
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
for (mp_uint_t at = 0, top = pool->len; at < top; at++) { size_t low = 0;
size_t high = pool->len - 1;
// binary search inside the pool
if (pool->sorted) {
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
size_t mid = (low + high + 1) / 2;
int cmp = pool->hashes[mid] - str_hash;
if (cmp == 0) {
cmp = pool->lengths[mid] - str_len;
}
if (cmp > 0) {
high = mid;
} else {
low = mid;
if (cmp == 0) {
break;
}
}
}
}
// sequential search for the remaining strings
for (mp_uint_t at = low; at < high + 1; at++) {
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
&& memcmp(pool->qstrs[at], str, str_len) == 0) { && memcmp(pool->qstrs[at], str, str_len) == 0) {
return pool->total_prev_len + at; return pool->total_prev_len + at;

View File

@ -38,9 +38,21 @@
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
enum { enum {
#ifndef NO_QSTR #ifndef NO_QSTR
#define QDEF(id, hash, len, str) id,
#define QDEF0(id, hash, len, str) id,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h" #include "genhdr/qstrdefs.generated.h"
#undef QDEF #undef QDEF0
#undef QDEF1
MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) id,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif #endif
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
}; };
@ -71,6 +83,7 @@ typedef struct _qstr_pool_t {
size_t len; size_t len;
qstr_hash_t *hashes; qstr_hash_t *hashes;
qstr_len_t *lengths; qstr_len_t *lengths;
bool sorted;
const char *qstrs[]; const char *qstrs[];
} qstr_pool_t; } qstr_pool_t;

View File

@ -1396,8 +1396,8 @@ def freeze_mpy(base_qstrs, compiled_modules):
# don't add duplicates # don't add duplicates
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
continue continue
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str)
new = sorted(new.values(), key=lambda x: x[0]) new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen))
print('#include "py/mpconfig.h"') print('#include "py/mpconfig.h"')
print('#include "py/objint.h"') print('#include "py/objint.h"')
@ -1438,9 +1438,9 @@ def freeze_mpy(base_qstrs, compiled_modules):
print("enum {") print("enum {")
for i in range(len(new)): for i in range(len(new)):
if i == 0: if i == 0:
print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1]) print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i].ident)
else: else:
print(" MP_QSTR_%s," % new[i][1]) print(" MP_QSTR_%s," % new[i].ident)
print("};") print("};")
# As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
@ -1460,18 +1460,17 @@ def freeze_mpy(base_qstrs, compiled_modules):
print() print()
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
qstr_size = {"metadata": 0, "data": 0} qstr_size = {"metadata": 0, "data": 0}
for _, _, _, qbytes in new: for q in new:
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH) print(" %d," % q.qhash)
print(" %d," % qhash)
print("};") print("};")
print() print()
print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {") print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
for _, _, _, qbytes in new: for q in new:
print(" %d," % len(qbytes)) print(" %d," % len(q.qbytes))
qstr_size["metadata"] += ( qstr_size["metadata"] += (
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
) )
qstr_size["data"] += len(qbytes) qstr_size["data"] += len(q.qbytes)
print("};") print("};")
print() print()
print("extern const qstr_pool_t mp_qstr_const_pool;") print("extern const qstr_pool_t mp_qstr_const_pool;")
@ -1482,11 +1481,15 @@ def freeze_mpy(base_qstrs, compiled_modules):
print(" %u, // used entries" % len(new)) print(" %u, // used entries" % len(new))
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,") print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
print(" true, // entries are sorted")
print(" {") print(" {")
for _, _, qstr, qbytes in new: for q in new:
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes)) print(' "%s",' % q.qdata)
qstr_content += ( qstr_content += (
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1 config.MICROPY_QSTR_BYTES_IN_LEN
+ config.MICROPY_QSTR_BYTES_IN_HASH
+ len(q.qbytes)
+ 1
) )
print(" },") print(" },")
print("};") print("};")
@ -1781,6 +1784,8 @@ def main():
# Create initial list of global qstrs. # Create initial list of global qstrs.
global_qstrs = GlobalQStrList() global_qstrs = GlobalQStrList()
qstrutil.Qstr.cfg_bytes_len = config.MICROPY_QSTR_BYTES_IN_LEN
qstrutil.Qstr.cfg_bytes_hash = config.MICROPY_QSTR_BYTES_IN_HASH
# Load all .mpy files. # Load all .mpy files.
try: try:
@ -1788,6 +1793,7 @@ def main():
except MPYReadError as er: except MPYReadError as er:
print(er, file=sys.stderr) print(er, file=sys.stderr)
sys.exit(1) sys.exit(1)
base_qstrs = {}
if args.hexdump: if args.hexdump:
hexdump_mpy(compiled_modules) hexdump_mpy(compiled_modules)