py/makeqstrdata.py: Refactor qstr class.

Use Qstr class instead of tuple, where properties are calculated only when
accessed.
This is needed as preparation to using hash as the sort key instead the
qstr string. It also makes the code more readable when referring to a qstr
in py/makeqstrdata.py and tools/mpy-tool.py (for example, refer to q.order
instead of q[0], or q.qstr instead of q[2])

Signed-off-by: Amir Gonnen <amirgonnen@gmail.com>
This commit is contained in:
Amir Gonnen 2022-02-20 00:10:25 +02:00
parent 18eefe7621
commit 497e519cbc
2 changed files with 70 additions and 47 deletions

View File

@ -223,6 +223,8 @@ static_qstr_list = [
]
# this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash):
hash = 5381
for b in qstr:
@ -257,7 +259,7 @@ def parse_input_headers(infiles):
# add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = Qstr(order, ident, qstr)
# read the qstrs in from the input files
for infile in infiles:
@ -308,7 +310,7 @@ def parse_input_headers(infiles):
order = -190000
elif ident.startswith("__"):
order -= 100000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = Qstr(order, ident, qstr)
if not qcfgs:
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
@ -317,31 +319,44 @@ def parse_input_headers(infiles):
return qcfgs, qstrs
def escape_bytes(qstr, qbytes):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in qbytes)
class Qstr:
cfg_bytes_len = 0
cfg_bytes_hash = 0
def __init__(self, order, ident, qstr):
self.order = order
self.ident = ident
self.qstr = qstr
@property
def qbytes(self):
return bytes_cons(self.qstr, "utf8")
@property
def qlen(self):
if len(self.qbytes) >= (1 << (8 * Qstr.cfg_bytes_len)):
print("qstr is too long:", self.qstr)
assert False
return len(self.qbytes)
@property
def qhash(self):
return compute_hash(self.qbytes, Qstr.cfg_bytes_hash)
def _escape_bytes(self):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in self.qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return self.qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in self.qbytes)
@property
def qdata(self):
return self._escape_bytes()
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
qbytes = bytes_cons(qstr, "utf8")
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if qlen >= (1 << (8 * cfg_bytes_len)):
print("qstr is too long:", qstr)
assert False
qdata = escape_bytes(qstr, qbytes)
return '%d, %d, "%s"' % (qhash, qlen, qdata)
def print_qstr_data(qcfgs, qstrs):
# get config variables
cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
def print_qstr_data(qstrs):
# print out the starter of the generated C header file
print("// This file was automatically generated by makeqstrdata.py")
print("")
@ -350,23 +365,26 @@ def print_qstr_data(qcfgs, qstrs):
print('QDEF0(MP_QSTRnull, 0, 0, "")')
# split qstr values into two pools. static consts first.
q0_values = [q for q in qstrs.values() if q[0] < 0]
q1_values = [q for q in qstrs.values() if q[0] >= 0]
q0_values = [q for q in qstrs.values() if q.order < 0]
q1_values = [q for q in qstrs.values() if q.order >= 0]
# go through each qstr in pool 0 and print it out. pool0 has special sort.
for order, ident, qstr in sorted(q0_values, key=lambda x: x[0]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF0(MP_QSTR_%s, %s)" % (ident, qbytes))
for q in sorted(q0_values, key=lambda x: x.order):
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
for order, ident, qstr in sorted(q1_values, key=lambda x: x[2]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes))
for q in sorted(q1_values, key=lambda x: x.qstr):
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
def do_work(infiles):
qcfgs, qstrs = parse_input_headers(infiles)
print_qstr_data(qcfgs, qstrs)
# get config variables
Qstr.cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
Qstr.cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
print_qstr_data(qstrs)
if __name__ == "__main__":

View File

@ -1396,8 +1396,8 @@ def freeze_mpy(base_qstrs, compiled_modules):
# don't add duplicates
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
continue
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
new = sorted(new.values(), key=lambda x: x[2])
new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str)
new = sorted(new.values(), key=lambda x: x.qstr)
print('#include "py/mpconfig.h"')
print('#include "py/objint.h"')
@ -1438,9 +1438,9 @@ def freeze_mpy(base_qstrs, compiled_modules):
print("enum {")
for i in range(len(new)):
if i == 0:
print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1])
print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i].ident)
else:
print(" MP_QSTR_%s," % new[i][1])
print(" MP_QSTR_%s," % new[i].ident)
print("};")
# As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
@ -1460,18 +1460,17 @@ def freeze_mpy(base_qstrs, compiled_modules):
print()
print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
qstr_size = {"metadata": 0, "data": 0}
for _, _, _, qbytes in new:
qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
print(" %d," % qhash)
for q in new:
print(" %d," % q.qhash)
print("};")
print()
print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
for _, _, _, qbytes in new:
print(" %d," % len(qbytes))
for q in new:
print(" %d," % len(q.qbytes))
qstr_size["metadata"] += (
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
)
qstr_size["data"] += len(qbytes)
qstr_size["data"] += len(q.qbytes)
print("};")
print()
print("extern const qstr_pool_t mp_qstr_const_pool;")
@ -1484,10 +1483,13 @@ def freeze_mpy(base_qstrs, compiled_modules):
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
print(" true, // entries are sorted")
print(" {")
for _, _, qstr, qbytes in new:
print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
for q in new:
print(' "%s",' % q.qdata)
qstr_content += (
config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
config.MICROPY_QSTR_BYTES_IN_LEN
+ config.MICROPY_QSTR_BYTES_IN_HASH
+ len(q.qbytes)
+ 1
)
print(" },")
print("};")
@ -1782,6 +1784,8 @@ def main():
# Create initial list of global qstrs.
global_qstrs = GlobalQStrList()
qstrutil.Qstr.cfg_bytes_len = config.MICROPY_QSTR_BYTES_IN_LEN
qstrutil.Qstr.cfg_bytes_hash = config.MICROPY_QSTR_BYTES_IN_HASH
# Load all .mpy files.
try:
@ -1789,6 +1793,7 @@ def main():
except MPYReadError as er:
print(er, file=sys.stderr)
sys.exit(1)
base_qstrs = {}
if args.hexdump:
hexdump_mpy(compiled_modules)