diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-11-14 11:48:25 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-11-14 11:48:25 -0300 |
commit | 9a91fe1640ddbe5b55e7454541059372b971f400 (patch) | |
tree | 902589105ff77ee8f3000e80f7e0459cd21eb068 | |
parent | 2491b87c10db530eac2f3d81cd39f95875d16cd5 (diff) | |
download | lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.gz lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.bz2 lua-9a91fe1640ddbe5b55e7454541059372b971f400.zip |
Add extra size when resizing tables with deleted keys
Without this extra space, sequences of insertions/deletions (and
some other uses) can have unpexpected low performances. See the
added tests for an example, and *Mathematical Models to Analyze Lua
Hybrid Tables and Why They Need a Fix* (MartÃnez, Nicaud, Rotondo;
arXiv:2208.13602v2) for detais.
-rw-r--r-- | ltable.c | 26 | ||||
-rw-r--r-- | testes/nextvar.lua | 62 |
2 files changed, 82 insertions, 6 deletions
@@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
461 | ** Structure to count the keys in a table. | 461 | ** Structure to count the keys in a table. |
462 | ** 'total' is the total number of keys in the table. | 462 | ** 'total' is the total number of keys in the table. |
463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). | 463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). |
464 | ** 'deleted' is true if there are deleted nodes in the hash part. | ||
464 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer | 465 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer |
465 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation | 466 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation |
466 | ** of 'nums'. | 467 | ** of 'nums'. |
@@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
468 | typedef struct { | 469 | typedef struct { |
469 | unsigned total; | 470 | unsigned total; |
470 | unsigned na; | 471 | unsigned na; |
472 | int deleted; | ||
471 | unsigned nums[MAXABITS + 1]; | 473 | unsigned nums[MAXABITS + 1]; |
472 | } Counters; | 474 | } Counters; |
473 | 475 | ||
@@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) { | |||
560 | 562 | ||
561 | 563 | ||
562 | /* | 564 | /* |
563 | ** Count keys in hash part of table 't'. | 565 | ** Count keys in hash part of table 't'. As this only happens during |
566 | ** a rehash, all nodes have been used. A node can have a nil value only | ||
567 | ** if it was deleted after being created. | ||
564 | */ | 568 | */ |
565 | static void numusehash (const Table *t, Counters *ct) { | 569 | static void numusehash (const Table *t, Counters *ct) { |
566 | unsigned i = sizenode(t); | 570 | unsigned i = sizenode(t); |
567 | unsigned total = 0; | 571 | unsigned total = 0; |
568 | while (i--) { | 572 | while (i--) { |
569 | Node *n = &t->node[i]; | 573 | Node *n = &t->node[i]; |
570 | if (!isempty(gval(n))) { | 574 | if (isempty(gval(n))) { |
575 | /* entry was deleted; key cannot be nil */ | ||
576 | lua_assert(isdummy(t) || !keyisnil(n)); | ||
577 | ct->deleted = 1; | ||
578 | } | ||
579 | else { | ||
571 | total++; | 580 | total++; |
572 | if (keyisinteger(n)) | 581 | if (keyisinteger(n)) |
573 | countint(keyival(n), ct); | 582 | countint(keyival(n), ct); |
@@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { | |||
799 | unsigned asize; /* optimal size for array part */ | 808 | unsigned asize; /* optimal size for array part */ |
800 | Counters ct; | 809 | Counters ct; |
801 | unsigned i; | 810 | unsigned i; |
811 | unsigned nsize; /* size for the hash part */ | ||
802 | setlimittosize(t); | 812 | setlimittosize(t); |
803 | /* reset counts */ | 813 | /* reset counts */ |
804 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; | 814 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; |
805 | ct.na = 0; | 815 | ct.na = 0; |
816 | ct.deleted = 0; | ||
806 | ct.total = 1; /* count extra key */ | 817 | ct.total = 1; /* count extra key */ |
807 | if (ttisinteger(ek)) | 818 | if (ttisinteger(ek)) |
808 | countint(ivalue(ek), &ct); /* extra key may go to array */ | 819 | countint(ivalue(ek), &ct); /* extra key may go to array */ |
@@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { | |||
815 | numusearray(t, &ct); /* count keys in array part */ | 826 | numusearray(t, &ct); /* count keys in array part */ |
816 | asize = computesizes(&ct); /* compute new size for array part */ | 827 | asize = computesizes(&ct); /* compute new size for array part */ |
817 | } | 828 | } |
829 | /* all keys not in the array part go to the hash part */ | ||
830 | nsize = ct.total - ct.na; | ||
831 | if (ct.deleted) { /* table has deleted entries? */ | ||
832 | /* insertion-deletion-insertion: give hash some extra size to | ||
833 | avoid constant resizings */ | ||
834 | nsize += nsize >> 2; | ||
835 | } | ||
818 | /* resize the table to new computed sizes */ | 836 | /* resize the table to new computed sizes */ |
819 | luaH_resize(L, t, asize, ct.total - ct.na); | 837 | luaH_resize(L, t, asize, nsize); |
820 | } | 838 | } |
821 | 839 | ||
822 | |||
823 | |||
824 | /* | 840 | /* |
825 | ** }============================================================= | 841 | ** }============================================================= |
826 | */ | 842 | */ |
diff --git a/testes/nextvar.lua b/testes/nextvar.lua index 85c9ff6b..00e509f8 100644 --- a/testes/nextvar.lua +++ b/testes/nextvar.lua | |||
@@ -23,6 +23,12 @@ local function printTable (t) | |||
23 | end | 23 | end |
24 | end | 24 | end |
25 | ---------------------------------------------------------------- | 25 | ---------------------------------------------------------------- |
26 | local function countentries (t) | ||
27 | local e = 0 | ||
28 | for _ in pairs(t) do e = e + 1 end | ||
29 | return e | ||
30 | end | ||
31 | ---------------------------------------------------------------- | ||
26 | 32 | ||
27 | 33 | ||
28 | local function check (t, na, nh) | 34 | local function check (t, na, nh) |
@@ -115,6 +121,24 @@ do -- overflow (must wrap-around) | |||
115 | assert(k == nil) | 121 | assert(k == nil) |
116 | end | 122 | end |
117 | 123 | ||
124 | |||
125 | do | ||
126 | -- alternate insertions and deletions in an almost full hash. | ||
127 | -- In versions pre-5.5, that causes constant rehashings and | ||
128 | -- takes a long time to complete. | ||
129 | local a = {} | ||
130 | for i = 1, 2^11 - 1 do | ||
131 | a[i .. ""] = true | ||
132 | end | ||
133 | |||
134 | for i = 1, 1e5 do | ||
135 | local key = i .. "." | ||
136 | a[key] = true | ||
137 | a[key] = nil | ||
138 | end | ||
139 | assert(countentries(a) == 2^11 - 1) | ||
140 | end | ||
141 | |||
118 | if not T then | 142 | if not T then |
119 | (Message or print) | 143 | (Message or print) |
120 | ('\n >>> testC not active: skipping tests for table sizes <<<\n') | 144 | ('\n >>> testC not active: skipping tests for table sizes <<<\n') |
@@ -202,6 +226,23 @@ for i = 1,lim do | |||
202 | check(a, 0, mp2(i)) | 226 | check(a, 0, mp2(i)) |
203 | end | 227 | end |
204 | 228 | ||
229 | |||
230 | -- insert and delete elements until a rehash occurr. Caller must ensure | ||
231 | -- that a rehash will change the shape of the table. Must repeat because | ||
232 | -- the insertion may collide with the deleted element, and then there is | ||
233 | -- no rehash. | ||
234 | local function forcerehash (t) | ||
235 | local na, nh = T.querytab(t) | ||
236 | local i = 10000 | ||
237 | repeat | ||
238 | i = i + 1 | ||
239 | t[i] = true | ||
240 | t[i] = undef | ||
241 | local nna, nnh = T.querytab(t) | ||
242 | until nna ~= na or nnh ~= nh | ||
243 | end | ||
244 | |||
245 | |||
205 | do | 246 | do |
206 | local a = {} | 247 | local a = {} |
207 | for i=1,16 do a[i] = i end | 248 | for i=1,16 do a[i] = i end |
@@ -212,7 +253,7 @@ do | |||
212 | a[30] = undef | 253 | a[30] = undef |
213 | check(a, 0, 8) -- 5 elements in the hash part: [12]-[16] | 254 | check(a, 0, 8) -- 5 elements in the hash part: [12]-[16] |
214 | a[10] = 1 | 255 | a[10] = 1 |
215 | for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash | 256 | forcerehash(a) |
216 | check(a, 16, 1) | 257 | check(a, 16, 1) |
217 | for i=1,14 do a[i] = true; a[i] = undef end | 258 | for i=1,14 do a[i] = true; a[i] = undef end |
218 | check(a, 16, 1) -- no rehash... | 259 | check(a, 16, 1) -- no rehash... |
@@ -242,6 +283,25 @@ do -- "almost sparse" arrays | |||
242 | end | 283 | end |
243 | 284 | ||
244 | 285 | ||
286 | do | ||
287 | -- alternate insertions and deletions should give some extra | ||
288 | -- space for the hash part. Otherwise, a mix of insertions/deletions | ||
289 | -- could cause too many rehashes. (See the other test for "alternate | ||
290 | -- insertions and deletions" in this file.) | ||
291 | local a = {} | ||
292 | for i = 1, 256 do | ||
293 | a[i .. ""] = true | ||
294 | end | ||
295 | check(a, 0, 256) -- hash part is full | ||
296 | a["256"] = nil -- delete a key | ||
297 | forcerehash(a) | ||
298 | -- table has only 255 elements, but it got some extra space; | ||
299 | -- otherwise, almost each delete-insert would rehash the table again. | ||
300 | assert(countentries(a) == 255) | ||
301 | check(a, 0, 512) | ||
302 | end | ||
303 | |||
304 | |||
245 | -- size tests for vararg | 305 | -- size tests for vararg |
246 | lim = 35 | 306 | lim = 35 |
247 | local function foo (n, ...) | 307 | local function foo (n, ...) |