aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2024-11-14 11:48:25 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2024-11-14 11:48:25 -0300
commit9a91fe1640ddbe5b55e7454541059372b971f400 (patch)
tree902589105ff77ee8f3000e80f7e0459cd21eb068
parent2491b87c10db530eac2f3d81cd39f95875d16cd5 (diff)
downloadlua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.gz
lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.bz2
lua-9a91fe1640ddbe5b55e7454541059372b971f400.zip
Add extra size when resizing tables with deleted keys
Without this extra space, sequences of insertions/deletions (and some other uses) can have unpexpected low performances. See the added tests for an example, and *Mathematical Models to Analyze Lua Hybrid Tables and Why They Need a Fix* (Martínez, Nicaud, Rotondo; arXiv:2208.13602v2) for detais.
-rw-r--r--ltable.c26
-rw-r--r--testes/nextvar.lua62
2 files changed, 82 insertions, 6 deletions
diff --git a/ltable.c b/ltable.c
index 923f3eaa..0192d039 100644
--- a/ltable.c
+++ b/ltable.c
@@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) {
461** Structure to count the keys in a table. 461** Structure to count the keys in a table.
462** 'total' is the total number of keys in the table. 462** 'total' is the total number of keys in the table.
463** 'na' is the number of *array indices* in the table (see 'arrayindex'). 463** 'na' is the number of *array indices* in the table (see 'arrayindex').
464** 'deleted' is true if there are deleted nodes in the hash part.
464** 'nums' is a "count array" where 'nums[i]' is the number of integer 465** 'nums' is a "count array" where 'nums[i]' is the number of integer
465** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation 466** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
466** of 'nums'. 467** of 'nums'.
@@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) {
468typedef struct { 469typedef struct {
469 unsigned total; 470 unsigned total;
470 unsigned na; 471 unsigned na;
472 int deleted;
471 unsigned nums[MAXABITS + 1]; 473 unsigned nums[MAXABITS + 1];
472} Counters; 474} Counters;
473 475
@@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) {
560 562
561 563
562/* 564/*
563** Count keys in hash part of table 't'. 565** Count keys in hash part of table 't'. As this only happens during
566** a rehash, all nodes have been used. A node can have a nil value only
567** if it was deleted after being created.
564*/ 568*/
565static void numusehash (const Table *t, Counters *ct) { 569static void numusehash (const Table *t, Counters *ct) {
566 unsigned i = sizenode(t); 570 unsigned i = sizenode(t);
567 unsigned total = 0; 571 unsigned total = 0;
568 while (i--) { 572 while (i--) {
569 Node *n = &t->node[i]; 573 Node *n = &t->node[i];
570 if (!isempty(gval(n))) { 574 if (isempty(gval(n))) {
575 /* entry was deleted; key cannot be nil */
576 lua_assert(isdummy(t) || !keyisnil(n));
577 ct->deleted = 1;
578 }
579 else {
571 total++; 580 total++;
572 if (keyisinteger(n)) 581 if (keyisinteger(n))
573 countint(keyival(n), ct); 582 countint(keyival(n), ct);
@@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
799 unsigned asize; /* optimal size for array part */ 808 unsigned asize; /* optimal size for array part */
800 Counters ct; 809 Counters ct;
801 unsigned i; 810 unsigned i;
811 unsigned nsize; /* size for the hash part */
802 setlimittosize(t); 812 setlimittosize(t);
803 /* reset counts */ 813 /* reset counts */
804 for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; 814 for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
805 ct.na = 0; 815 ct.na = 0;
816 ct.deleted = 0;
806 ct.total = 1; /* count extra key */ 817 ct.total = 1; /* count extra key */
807 if (ttisinteger(ek)) 818 if (ttisinteger(ek))
808 countint(ivalue(ek), &ct); /* extra key may go to array */ 819 countint(ivalue(ek), &ct); /* extra key may go to array */
@@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
815 numusearray(t, &ct); /* count keys in array part */ 826 numusearray(t, &ct); /* count keys in array part */
816 asize = computesizes(&ct); /* compute new size for array part */ 827 asize = computesizes(&ct); /* compute new size for array part */
817 } 828 }
829 /* all keys not in the array part go to the hash part */
830 nsize = ct.total - ct.na;
831 if (ct.deleted) { /* table has deleted entries? */
832 /* insertion-deletion-insertion: give hash some extra size to
833 avoid constant resizings */
834 nsize += nsize >> 2;
835 }
818 /* resize the table to new computed sizes */ 836 /* resize the table to new computed sizes */
819 luaH_resize(L, t, asize, ct.total - ct.na); 837 luaH_resize(L, t, asize, nsize);
820} 838}
821 839
822
823
824/* 840/*
825** }============================================================= 841** }=============================================================
826*/ 842*/
diff --git a/testes/nextvar.lua b/testes/nextvar.lua
index 85c9ff6b..00e509f8 100644
--- a/testes/nextvar.lua
+++ b/testes/nextvar.lua
@@ -23,6 +23,12 @@ local function printTable (t)
23 end 23 end
24end 24end
25---------------------------------------------------------------- 25----------------------------------------------------------------
26local function countentries (t)
27 local e = 0
28 for _ in pairs(t) do e = e + 1 end
29 return e
30end
31----------------------------------------------------------------
26 32
27 33
28local function check (t, na, nh) 34local function check (t, na, nh)
@@ -115,6 +121,24 @@ do -- overflow (must wrap-around)
115 assert(k == nil) 121 assert(k == nil)
116end 122end
117 123
124
125do
126 -- alternate insertions and deletions in an almost full hash.
127 -- In versions pre-5.5, that causes constant rehashings and
128 -- takes a long time to complete.
129 local a = {}
130 for i = 1, 2^11 - 1 do
131 a[i .. ""] = true
132 end
133
134 for i = 1, 1e5 do
135 local key = i .. "."
136 a[key] = true
137 a[key] = nil
138 end
139 assert(countentries(a) == 2^11 - 1)
140end
141
118if not T then 142if not T then
119 (Message or print) 143 (Message or print)
120 ('\n >>> testC not active: skipping tests for table sizes <<<\n') 144 ('\n >>> testC not active: skipping tests for table sizes <<<\n')
@@ -202,6 +226,23 @@ for i = 1,lim do
202 check(a, 0, mp2(i)) 226 check(a, 0, mp2(i))
203end 227end
204 228
229
230-- insert and delete elements until a rehash occurr. Caller must ensure
231-- that a rehash will change the shape of the table. Must repeat because
232-- the insertion may collide with the deleted element, and then there is
233-- no rehash.
234local function forcerehash (t)
235 local na, nh = T.querytab(t)
236 local i = 10000
237 repeat
238 i = i + 1
239 t[i] = true
240 t[i] = undef
241 local nna, nnh = T.querytab(t)
242 until nna ~= na or nnh ~= nh
243end
244
245
205do 246do
206 local a = {} 247 local a = {}
207 for i=1,16 do a[i] = i end 248 for i=1,16 do a[i] = i end
@@ -212,7 +253,7 @@ do
212 a[30] = undef 253 a[30] = undef
213 check(a, 0, 8) -- 5 elements in the hash part: [12]-[16] 254 check(a, 0, 8) -- 5 elements in the hash part: [12]-[16]
214 a[10] = 1 255 a[10] = 1
215 for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash 256 forcerehash(a)
216 check(a, 16, 1) 257 check(a, 16, 1)
217 for i=1,14 do a[i] = true; a[i] = undef end 258 for i=1,14 do a[i] = true; a[i] = undef end
218 check(a, 16, 1) -- no rehash... 259 check(a, 16, 1) -- no rehash...
@@ -242,6 +283,25 @@ do -- "almost sparse" arrays
242end 283end
243 284
244 285
286do
287 -- alternate insertions and deletions should give some extra
288 -- space for the hash part. Otherwise, a mix of insertions/deletions
289 -- could cause too many rehashes. (See the other test for "alternate
290 -- insertions and deletions" in this file.)
291 local a = {}
292 for i = 1, 256 do
293 a[i .. ""] = true
294 end
295 check(a, 0, 256) -- hash part is full
296 a["256"] = nil -- delete a key
297 forcerehash(a)
298 -- table has only 255 elements, but it got some extra space;
299 -- otherwise, almost each delete-insert would rehash the table again.
300 assert(countentries(a) == 255)
301 check(a, 0, 512)
302end
303
304
245-- size tests for vararg 305-- size tests for vararg
246lim = 35 306lim = 35
247local function foo (n, ...) 307local function foo (n, ...)