diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index e332ab94e..78e829e3e 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -347,12 +347,21 @@ def print_qstr_data(qcfgs, qstrs): print("") # add NULL qstr with no hash or data - print('QDEF(MP_QSTRnull, 0, 0, "")') + print('QDEF0(MP_QSTRnull, 0, 0, "")') - # go through each qstr and print it out - for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): + # split qstr values into two pools. static consts first. + q0_values = [q for q in qstrs.values() if q[0] < 0] + q1_values = [q for q in qstrs.values() if q[0] >= 0] + + # go through each qstr in pool 0 and print it out. pool0 has special sort. + for order, ident, qstr in sorted(q0_values, key=lambda x: x[0]): qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) - print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) + print("QDEF0(MP_QSTR_%s, %s)" % (ident, qbytes)) + + # go through each qstr in pool 1 and print it out. pool1 is regularly sorted. + for order, ident, qstr in sorted(q1_values, key=lambda x: x[2]): + qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) + print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes)) def do_work(infiles): diff --git a/py/qstr.c b/py/qstr.c index f9ca10683..7a06d956d 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -74,34 +74,82 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) { return hash; } -const qstr_hash_t mp_qstr_const_hashes[] = { +const qstr_hash_t mp_qstr_const_hashes0[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) hash, +#define QDEF0(id, hash, len, str) hash, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; -const qstr_len_t mp_qstr_const_lengths[] = { +const qstr_hash_t mp_qstr_const_hashes1[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) len, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) hash, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; +const qstr_len_t mp_qstr_const_lengths0[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) len, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_len_t mp_qstr_const_lengths1[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) len, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_pool_t mp_qstr_special_const_pool = { + NULL, // no previous pool + 0, // no previous pool + MICROPY_ALLOC_QSTR_ENTRIES_INIT, + MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes0, + (qstr_len_t *)mp_qstr_const_lengths0, + false, // special constant qstrs are not sorted + { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) str, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif + (const char *)"", // spacer for MP_QSTRspecial_const_number_of + }, +}; + const qstr_pool_t mp_qstr_const_pool = { - NULL, // no previous pool - 0, // no previous pool + (qstr_pool_t *)&mp_qstr_special_const_pool, + MP_QSTRspecial_const_number_of + 1, MICROPY_ALLOC_QSTR_ENTRIES_INIT, - MP_QSTRnumber_of, // corresponds to number of strings in array just below - (qstr_hash_t *)mp_qstr_const_hashes, - (qstr_len_t *)mp_qstr_const_lengths, + MP_QSTRnumber_of - + (MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes1, + (qstr_len_t *)mp_qstr_const_lengths1, + true, // constant qstrs are sorted { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) str, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }, }; @@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_alloc; pool->len = 0; + pool->sorted = false; MP_STATE_VM(last_pool) = pool; DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc); } @@ -179,13 +228,43 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { return MP_STATE_VM(last_pool)->total_prev_len + at; } +#define MP_QSTR_SEARCH_THRESHOLD 10 + qstr qstr_find_strn(const char *str, size_t str_len) { - // work out hash of str mp_uint_t str_hash = qstr_compute_hash((const byte *)str, str_len); // search pools for the data for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + size_t low = 0; + size_t high = pool->len - 1; + + // binary search inside the pool + if (pool->sorted) { + while (high - low > MP_QSTR_SEARCH_THRESHOLD) { + size_t mid = (low + high + 1) / 2; + size_t len = pool->lengths[mid]; + if (len > str_len) { + len = str_len; + } + int cmp = memcmp(pool->qstrs[mid], str, str_len); + if (cmp < 0) { + low = mid; + } else if (cmp > 0) { + high = mid; + } else { + if (pool->lengths[mid] < str_len) { + low = mid; + } else if (pool->lengths[mid] > str_len) { + high = mid; + } else { + return pool->total_prev_len + mid; + } + } + } + } + + // sequential search for the remaining strings + for (mp_uint_t at = low; at < high + 1; at++) { if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { return pool->total_prev_len + at; diff --git a/py/qstr.h b/py/qstr.h index fa634f90b..6a2098217 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,9 +38,21 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) id, + +#define QDEF0(id, hash, len, str) id, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 + + MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above + +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) id, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif MP_QSTRnumber_of, // no underscore so it can't clash with any of the above }; @@ -71,6 +83,7 @@ typedef struct _qstr_pool_t { size_t len; qstr_hash_t *hashes; qstr_len_t *lengths; + bool sorted; const char *qstrs[]; } qstr_pool_t; diff --git a/tools/makemanifest.py b/tools/makemanifest.py index 8cdc3eb77..c3df17755 100644 --- a/tools/makemanifest.py +++ b/tools/makemanifest.py @@ -415,7 +415,7 @@ def main(): b'#include "py/emitglue.h"\n' b"extern const qstr_pool_t mp_qstr_const_pool;\n" b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n" - b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n" + b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, false, 0\n" b"};\n" b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n' b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n" diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 31212fd5b..8a6baba6c 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -1397,7 +1397,7 @@ def freeze_mpy(base_qstrs, compiled_modules): if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: continue new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) - new = sorted(new.values(), key=lambda x: x[0]) + new = sorted(new.values(), key=lambda x: x[2]) print('#include "py/mpconfig.h"') print('#include "py/objint.h"') @@ -1482,6 +1482,7 @@ def freeze_mpy(base_qstrs, compiled_modules): print(" %u, // used entries" % len(new)) print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,") + print(" true, // entries are sorted") print(" {") for _, _, qstr, qbytes in new: print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))