py/qstr: Sort qstrs by hash.

Instead of sorting by string, sort qstrs by (hash, len).
This allows faster binary serach on qstr_find_strn, since it's faster
to compare hashes than strings.
A few strings needed to be moved to special string pool (QDEF0)
because their qstr is assumed to be small (8 bit) on py/scope.c
This commit is contained in:
Amir Gonnen 2022-02-23 22:23:43 +02:00
parent 497e519cbc
commit b7e692c8e4
3 changed files with 21 additions and 14 deletions

View File

@ -61,7 +61,12 @@ static_qstr_list = [
" ", " ",
"*", "*",
"/", "/",
"<dictcomp>",
"<genexpr>",
"<lambda>",
"<listcomp>",
"<module>", "<module>",
"<setcomp>",
"_", "_",
"__call__", "__call__",
"__class__", "__class__",
@ -373,7 +378,7 @@ def print_qstr_data(qstrs):
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata)) print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted. # go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
for q in sorted(q1_values, key=lambda x: x.qstr): for q in sorted(q1_values, key=lambda x: (x.qhash, x.qlen)):
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata)) print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))

View File

@ -242,24 +242,26 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
if (pool->sorted) { if (pool->sorted) {
while (high - low > MP_QSTR_SEARCH_THRESHOLD) { while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
size_t mid = (low + high + 1) / 2; size_t mid = (low + high + 1) / 2;
size_t len = pool->lengths[mid]; int cmp = pool->hashes[mid] - str_hash;
if (len > str_len) { if (cmp == 0) cmp = pool->lengths[mid] - str_len;
len = str_len;
}
int cmp = memcmp(pool->qstrs[mid], str, str_len);
if (cmp < 0) { if (cmp < 0) {
low = mid; low = mid;
} else if (cmp > 0) { } else if (cmp > 0) {
high = mid; high = mid;
} else { } else {
if (pool->lengths[mid] < str_len) { // avoid a rare (hash,len) collisions
low = mid; while (MP_UNLIKELY(
} else if (pool->lengths[mid] > str_len) { pool->lengths[mid] != str_len ||
high = mid; memcmp(pool->qstrs[mid], str, str_len) != 0)) {
} else { mid++;
return pool->total_prev_len + mid; if (mid > high ||
pool->hashes[mid] != str_hash ||
pool->lengths[mid] != str_len) {
return 0;
} }
} }
return pool->total_prev_len + mid;
}
} }
} }

View File

@ -1397,7 +1397,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
continue continue
new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str) new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str)
new = sorted(new.values(), key=lambda x: x.qstr) new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen))
print('#include "py/mpconfig.h"') print('#include "py/mpconfig.h"')
print('#include "py/objint.h"') print('#include "py/objint.h"')