From 9a91fe1640ddbe5b55e7454541059372b971f400 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 14 Nov 2024 11:48:25 -0300 Subject: Add extra size when resizing tables with deleted keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this extra space, sequences of insertions/deletions (and some other uses) can have unpexpected low performances. See the added tests for an example, and *Mathematical Models to Analyze Lua Hybrid Tables and Why They Need a Fix* (Martínez, Nicaud, Rotondo; arXiv:2208.13602v2) for detais. --- ltable.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'ltable.c') diff --git a/ltable.c b/ltable.c index 923f3eaa..0192d039 100644 --- a/ltable.c +++ b/ltable.c @@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) { ** Structure to count the keys in a table. ** 'total' is the total number of keys in the table. ** 'na' is the number of *array indices* in the table (see 'arrayindex'). +** 'deleted' is true if there are deleted nodes in the hash part. ** 'nums' is a "count array" where 'nums[i]' is the number of integer ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation ** of 'nums'. @@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) { typedef struct { unsigned total; unsigned na; + int deleted; unsigned nums[MAXABITS + 1]; } Counters; @@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) { /* -** Count keys in hash part of table 't'. +** Count keys in hash part of table 't'. As this only happens during +** a rehash, all nodes have been used. A node can have a nil value only +** if it was deleted after being created. */ static void numusehash (const Table *t, Counters *ct) { unsigned i = sizenode(t); unsigned total = 0; while (i--) { Node *n = &t->node[i]; - if (!isempty(gval(n))) { + if (isempty(gval(n))) { + /* entry was deleted; key cannot be nil */ + lua_assert(isdummy(t) || !keyisnil(n)); + ct->deleted = 1; + } + else { total++; if (keyisinteger(n)) countint(keyival(n), ct); @@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { unsigned asize; /* optimal size for array part */ Counters ct; unsigned i; + unsigned nsize; /* size for the hash part */ setlimittosize(t); /* reset counts */ for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; ct.na = 0; + ct.deleted = 0; ct.total = 1; /* count extra key */ if (ttisinteger(ek)) countint(ivalue(ek), &ct); /* extra key may go to array */ @@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { numusearray(t, &ct); /* count keys in array part */ asize = computesizes(&ct); /* compute new size for array part */ } + /* all keys not in the array part go to the hash part */ + nsize = ct.total - ct.na; + if (ct.deleted) { /* table has deleted entries? */ + /* insertion-deletion-insertion: give hash some extra size to + avoid constant resizings */ + nsize += nsize >> 2; + } /* resize the table to new computed sizes */ - luaH_resize(L, t, asize, ct.total - ct.na); + luaH_resize(L, t, asize, nsize); } - - /* ** }============================================================= */ -- cgit v1.2.3-55-g6feb