Merge branch 'qstr_bsearch' into update_micropython_v1.19

This commit is contained in:
Amir Gonnen 2022-07-03 00:22:59 +03:00
commit b912b45c15
4 changed files with 185 additions and 61 deletions

View File

@ -61,7 +61,12 @@ static_qstr_list = [
" ",
"*",
"/",
"<dictcomp>",
"<genexpr>",
"<lambda>",
"<listcomp>",
"<module>",
"<setcomp>",
"_",
"__call__",
"__class__",
@ -223,6 +228,8 @@ static_qstr_list = [
]
# this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash):
hash = 5381
for b in qstr:
@ -257,7 +264,7 @@ def parse_input_headers(infiles):
# add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = Qstr(order, ident, qstr)
# read the qstrs in from the input files
for infile in infiles:
@ -308,7 +315,7 @@ def parse_input_headers(infiles):
order = -190000
elif ident.startswith("__"):
order -= 100000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = Qstr(order, ident, qstr)
if not qcfgs:
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
@ -317,47 +324,72 @@ def parse_input_headers(infiles):
return qcfgs, qstrs
def escape_bytes(qstr, qbytes):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
class Qstr:
cfg_bytes_len = 0
cfg_bytes_hash = 0
def __init__(self, order, ident, qstr):
self.order = order
self.ident = ident
self.qstr = qstr
@property
def qbytes(self):
return bytes_cons(self.qstr, "utf8")
@property
def qlen(self):
if len(self.qbytes) >= (1 << (8 * Qstr.cfg_bytes_len)):
print("qstr is too long:", self.qstr)
assert False
return len(self.qbytes)
@property
def qhash(self):
return compute_hash(self.qbytes, Qstr.cfg_bytes_hash)
def _escape_bytes(self):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in self.qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return qstr
return self.qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in qbytes)
return "".join(("\\x%02x" % b) for b in self.qbytes)
@property
def qdata(self):
return self._escape_bytes()
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
qbytes = bytes_cons(qstr, "utf8")
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if qlen >= (1 << (8 * cfg_bytes_len)):
print("qstr is too long:", qstr)
assert False
qdata = escape_bytes(qstr, qbytes)
return '%d, %d, "%s"' % (qhash, qlen, qdata)
def print_qstr_data(qcfgs, qstrs):
# get config variables
cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
def print_qstr_data(qstrs):
# print out the starter of the generated C header file
print("// This file was automatically generated by makeqstrdata.py")
print("")
# add NULL qstr with no hash or data
print('QDEF(MP_QSTRnull, 0, 0, "")')
print('QDEF0(MP_QSTRnull, 0, 0, "")')
# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
# split qstr values into two pools. static consts first.
q0_values = [q for q in qstrs.values() if q.order < 0]
q1_values = [q for q in qstrs.values() if q.order >= 0]
# go through each qstr in pool 0 and print it out. pool0 has special sort.
for q in sorted(q0_values, key=lambda x: x.order):
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
for q in sorted(q1_values, key=lambda x: (x.qhash, x.qlen)):
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
def do_work(infiles):
qcfgs, qstrs = parse_input_headers(infiles)
print_qstr_data(qcfgs, qstrs)
# get config variables
Qstr.cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
Qstr.cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
print_qstr_data(qstrs)
if __name__ == "__main__":

101
py/qstr.c
View File

@ -74,34 +74,82 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) {
return hash;
}
const qstr_hash_t mp_qstr_const_hashes[] = {
const qstr_hash_t mp_qstr_const_hashes0[] = {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) hash,
#define QDEF0(id, hash, len, str) hash,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
};
const qstr_len_t mp_qstr_const_lengths[] = {
const qstr_hash_t mp_qstr_const_hashes1[] = {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) len,
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) hash,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
};
const qstr_pool_t mp_qstr_const_pool = {
const qstr_len_t mp_qstr_const_lengths0[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str) len,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};
const qstr_len_t mp_qstr_const_lengths1[] = {
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) len,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
};
const qstr_pool_t mp_qstr_special_const_pool = {
NULL, // no previous pool
0, // no previous pool
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRnumber_of, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes,
(qstr_len_t *)mp_qstr_const_lengths,
MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes0,
(qstr_len_t *)mp_qstr_const_lengths0,
false, // special constant qstrs are not sorted
{
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) str,
#define QDEF0(id, hash, len, str) str,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
#endif
(const char *)"", // spacer for MP_QSTRspecial_const_number_of
},
};
const qstr_pool_t mp_qstr_const_pool = {
(qstr_pool_t *)&mp_qstr_special_const_pool,
MP_QSTRspecial_const_number_of + 1,
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
MP_QSTRnumber_of -
(MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below
(qstr_hash_t *)mp_qstr_const_hashes1,
(qstr_len_t *)mp_qstr_const_lengths1,
true, // constant qstrs are sorted
{
#ifndef NO_QSTR
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) str,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
},
};
@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
pool->alloc = new_alloc;
pool->len = 0;
pool->sorted = false;
MP_STATE_VM(last_pool) = pool;
DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
}
@ -179,13 +228,37 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
return MP_STATE_VM(last_pool)->total_prev_len + at;
}
#define MP_QSTR_SEARCH_THRESHOLD 10
qstr qstr_find_strn(const char *str, size_t str_len) {
// work out hash of str
mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len);
// search pools for the data
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
size_t low = 0;
size_t high = pool->len - 1;
// binary search inside the pool
if (pool->sorted) {
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
size_t mid = (low + high + 1) / 2;
int cmp = pool->hashes[mid] - str_hash;
if (cmp == 0) {
cmp = pool->lengths[mid] - str_len;
}
if (cmp > 0) {
high = mid;
} else {
low = mid;
if (cmp == 0) {
break;
}
}
}
}
// sequential search for the remaining strings
for (mp_uint_t at = low; at < high + 1; at++) {
if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
&& memcmp(pool->qstrs[at], str, str_len) == 0) {
return pool->total_prev_len + at;

View File

@ -38,9 +38,21 @@
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
enum {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) id,
#define QDEF0(id, hash, len, str) id,
#define QDEF1(id, hash, len, str)
#include "genhdr/qstrdefs.generated.h"
#undef QDEF
#undef QDEF0
#undef QDEF1
MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above
#define QDEF0(id, hash, len, str)
#define QDEF1(id, hash, len, str) id,
#include "genhdr/qstrdefs.generated.h"
#undef QDEF0
#undef QDEF1
#endif
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
};
@ -71,6 +83,7 @@ typedef struct _qstr_pool_t {
size_t len;
qstr_hash_t *hashes;
qstr_len_t *lengths;
bool sorted;
const char *qstrs[];
} qstr_pool_t;

View File

@ -1396,8 +1396,8 @@ def freeze_mpy(base_qstrs, compiled_modules):
# don't add duplicates
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
continue
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
new = sorted(new.values(), key=lambda x: x[0])
new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str)
new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen))
print('#include "py/mpconfig.h"')
print('#include "py/objint.h"')
@ -1438,9 +1438,9 @@ def freeze_mpy(base_qstrs, compiled_modules):
print("enum {")
for i in range(len(new)):
if i == 0:
print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1])
print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i].ident)
else:
print(" MP_QSTR_%s," % new[i][1])
print(" MP_QSTR_%s," % new[i].ident)
print("};")
# As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
@ -1460,18 +1460,17 @@ def freeze_mpy(base_qstrs, compiled_modules):
print()
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
qstr_size = {"metadata": 0, "data": 0}
for _, _, _, qbytes in new:
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
print(" %d," % qhash)
for q in new:
print(" %d," % q.qhash)
print("};")
print()
print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
for _, _, _, qbytes in new:
print(" %d," % len(qbytes))
for q in new:
print(" %d," % len(q.qbytes))
qstr_size["metadata"] += (
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
)
qstr_size["data"] += len(qbytes)
qstr_size["data"] += len(q.qbytes)
print("};")
print()
print("extern const qstr_pool_t mp_qstr_const_pool;")
@ -1482,11 +1481,15 @@ def freeze_mpy(base_qstrs, compiled_modules):
print(" %u, // used entries" % len(new))
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
print(" true, // entries are sorted")
print(" {")
for _, _, qstr, qbytes in new:
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
for q in new:
print(' "%s",' % q.qdata)
qstr_content += (
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
config.MICROPY_QSTR_BYTES_IN_LEN
+ config.MICROPY_QSTR_BYTES_IN_HASH
+ len(q.qbytes)
+ 1
)
print(" },")
print("};")
@ -1781,6 +1784,8 @@ def main():
# Create initial list of global qstrs.
global_qstrs = GlobalQStrList()
qstrutil.Qstr.cfg_bytes_len = config.MICROPY_QSTR_BYTES_IN_LEN
qstrutil.Qstr.cfg_bytes_hash = config.MICROPY_QSTR_BYTES_IN_HASH
# Load all .mpy files.
try:
@ -1788,6 +1793,7 @@ def main():
except MPYReadError as er:
print(er, file=sys.stderr)
sys.exit(1)
base_qstrs = {}
if args.hexdump:
hexdump_mpy(compiled_modules)