diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-11-14 11:48:25 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-11-14 11:48:25 -0300 |
| commit | 9a91fe1640ddbe5b55e7454541059372b971f400 (patch) | |
| tree | 902589105ff77ee8f3000e80f7e0459cd21eb068 | |
| parent | 2491b87c10db530eac2f3d81cd39f95875d16cd5 (diff) | |
| download | lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.gz lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.bz2 lua-9a91fe1640ddbe5b55e7454541059372b971f400.zip | |
Add extra size when resizing tables with deleted keys
Without this extra space, sequences of insertions/deletions (and
some other uses) can have unpexpected low performances. See the
added tests for an example, and *Mathematical Models to Analyze Lua
Hybrid Tables and Why They Need a Fix* (MartÃnez, Nicaud, Rotondo;
arXiv:2208.13602v2) for detais.
| -rw-r--r-- | ltable.c | 26 | ||||
| -rw-r--r-- | testes/nextvar.lua | 62 |
2 files changed, 82 insertions, 6 deletions
| @@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
| 461 | ** Structure to count the keys in a table. | 461 | ** Structure to count the keys in a table. |
| 462 | ** 'total' is the total number of keys in the table. | 462 | ** 'total' is the total number of keys in the table. |
| 463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). | 463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). |
| 464 | ** 'deleted' is true if there are deleted nodes in the hash part. | ||
| 464 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer | 465 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer |
| 465 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation | 466 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation |
| 466 | ** of 'nums'. | 467 | ** of 'nums'. |
| @@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
| 468 | typedef struct { | 469 | typedef struct { |
| 469 | unsigned total; | 470 | unsigned total; |
| 470 | unsigned na; | 471 | unsigned na; |
| 472 | int deleted; | ||
| 471 | unsigned nums[MAXABITS + 1]; | 473 | unsigned nums[MAXABITS + 1]; |
| 472 | } Counters; | 474 | } Counters; |
| 473 | 475 | ||
| @@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) { | |||
| 560 | 562 | ||
| 561 | 563 | ||
| 562 | /* | 564 | /* |
| 563 | ** Count keys in hash part of table 't'. | 565 | ** Count keys in hash part of table 't'. As this only happens during |
| 566 | ** a rehash, all nodes have been used. A node can have a nil value only | ||
| 567 | ** if it was deleted after being created. | ||
| 564 | */ | 568 | */ |
| 565 | static void numusehash (const Table *t, Counters *ct) { | 569 | static void numusehash (const Table *t, Counters *ct) { |
| 566 | unsigned i = sizenode(t); | 570 | unsigned i = sizenode(t); |
| 567 | unsigned total = 0; | 571 | unsigned total = 0; |
| 568 | while (i--) { | 572 | while (i--) { |
| 569 | Node *n = &t->node[i]; | 573 | Node *n = &t->node[i]; |
| 570 | if (!isempty(gval(n))) { | 574 | if (isempty(gval(n))) { |
| 575 | /* entry was deleted; key cannot be nil */ | ||
| 576 | lua_assert(isdummy(t) || !keyisnil(n)); | ||
| 577 | ct->deleted = 1; | ||
| 578 | } | ||
| 579 | else { | ||
| 571 | total++; | 580 | total++; |
| 572 | if (keyisinteger(n)) | 581 | if (keyisinteger(n)) |
| 573 | countint(keyival(n), ct); | 582 | countint(keyival(n), ct); |
| @@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { | |||
| 799 | unsigned asize; /* optimal size for array part */ | 808 | unsigned asize; /* optimal size for array part */ |
| 800 | Counters ct; | 809 | Counters ct; |
| 801 | unsigned i; | 810 | unsigned i; |
| 811 | unsigned nsize; /* size for the hash part */ | ||
| 802 | setlimittosize(t); | 812 | setlimittosize(t); |
| 803 | /* reset counts */ | 813 | /* reset counts */ |
| 804 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; | 814 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; |
| 805 | ct.na = 0; | 815 | ct.na = 0; |
| 816 | ct.deleted = 0; | ||
| 806 | ct.total = 1; /* count extra key */ | 817 | ct.total = 1; /* count extra key */ |
| 807 | if (ttisinteger(ek)) | 818 | if (ttisinteger(ek)) |
| 808 | countint(ivalue(ek), &ct); /* extra key may go to array */ | 819 | countint(ivalue(ek), &ct); /* extra key may go to array */ |
| @@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { | |||
| 815 | numusearray(t, &ct); /* count keys in array part */ | 826 | numusearray(t, &ct); /* count keys in array part */ |
| 816 | asize = computesizes(&ct); /* compute new size for array part */ | 827 | asize = computesizes(&ct); /* compute new size for array part */ |
| 817 | } | 828 | } |
| 829 | /* all keys not in the array part go to the hash part */ | ||
| 830 | nsize = ct.total - ct.na; | ||
| 831 | if (ct.deleted) { /* table has deleted entries? */ | ||
| 832 | /* insertion-deletion-insertion: give hash some extra size to | ||
| 833 | avoid constant resizings */ | ||
| 834 | nsize += nsize >> 2; | ||
| 835 | } | ||
| 818 | /* resize the table to new computed sizes */ | 836 | /* resize the table to new computed sizes */ |
| 819 | luaH_resize(L, t, asize, ct.total - ct.na); | 837 | luaH_resize(L, t, asize, nsize); |
| 820 | } | 838 | } |
| 821 | 839 | ||
| 822 | |||
| 823 | |||
| 824 | /* | 840 | /* |
| 825 | ** }============================================================= | 841 | ** }============================================================= |
| 826 | */ | 842 | */ |
diff --git a/testes/nextvar.lua b/testes/nextvar.lua index 85c9ff6b..00e509f8 100644 --- a/testes/nextvar.lua +++ b/testes/nextvar.lua | |||
| @@ -23,6 +23,12 @@ local function printTable (t) | |||
| 23 | end | 23 | end |
| 24 | end | 24 | end |
| 25 | ---------------------------------------------------------------- | 25 | ---------------------------------------------------------------- |
| 26 | local function countentries (t) | ||
| 27 | local e = 0 | ||
| 28 | for _ in pairs(t) do e = e + 1 end | ||
| 29 | return e | ||
| 30 | end | ||
| 31 | ---------------------------------------------------------------- | ||
| 26 | 32 | ||
| 27 | 33 | ||
| 28 | local function check (t, na, nh) | 34 | local function check (t, na, nh) |
| @@ -115,6 +121,24 @@ do -- overflow (must wrap-around) | |||
| 115 | assert(k == nil) | 121 | assert(k == nil) |
| 116 | end | 122 | end |
| 117 | 123 | ||
| 124 | |||
| 125 | do | ||
| 126 | -- alternate insertions and deletions in an almost full hash. | ||
| 127 | -- In versions pre-5.5, that causes constant rehashings and | ||
| 128 | -- takes a long time to complete. | ||
| 129 | local a = {} | ||
| 130 | for i = 1, 2^11 - 1 do | ||
| 131 | a[i .. ""] = true | ||
| 132 | end | ||
| 133 | |||
| 134 | for i = 1, 1e5 do | ||
| 135 | local key = i .. "." | ||
| 136 | a[key] = true | ||
| 137 | a[key] = nil | ||
| 138 | end | ||
| 139 | assert(countentries(a) == 2^11 - 1) | ||
| 140 | end | ||
| 141 | |||
| 118 | if not T then | 142 | if not T then |
| 119 | (Message or print) | 143 | (Message or print) |
| 120 | ('\n >>> testC not active: skipping tests for table sizes <<<\n') | 144 | ('\n >>> testC not active: skipping tests for table sizes <<<\n') |
| @@ -202,6 +226,23 @@ for i = 1,lim do | |||
| 202 | check(a, 0, mp2(i)) | 226 | check(a, 0, mp2(i)) |
| 203 | end | 227 | end |
| 204 | 228 | ||
| 229 | |||
| 230 | -- insert and delete elements until a rehash occurr. Caller must ensure | ||
| 231 | -- that a rehash will change the shape of the table. Must repeat because | ||
| 232 | -- the insertion may collide with the deleted element, and then there is | ||
| 233 | -- no rehash. | ||
| 234 | local function forcerehash (t) | ||
| 235 | local na, nh = T.querytab(t) | ||
| 236 | local i = 10000 | ||
| 237 | repeat | ||
| 238 | i = i + 1 | ||
| 239 | t[i] = true | ||
| 240 | t[i] = undef | ||
| 241 | local nna, nnh = T.querytab(t) | ||
| 242 | until nna ~= na or nnh ~= nh | ||
| 243 | end | ||
| 244 | |||
| 245 | |||
| 205 | do | 246 | do |
| 206 | local a = {} | 247 | local a = {} |
| 207 | for i=1,16 do a[i] = i end | 248 | for i=1,16 do a[i] = i end |
| @@ -212,7 +253,7 @@ do | |||
| 212 | a[30] = undef | 253 | a[30] = undef |
| 213 | check(a, 0, 8) -- 5 elements in the hash part: [12]-[16] | 254 | check(a, 0, 8) -- 5 elements in the hash part: [12]-[16] |
| 214 | a[10] = 1 | 255 | a[10] = 1 |
| 215 | for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash | 256 | forcerehash(a) |
| 216 | check(a, 16, 1) | 257 | check(a, 16, 1) |
| 217 | for i=1,14 do a[i] = true; a[i] = undef end | 258 | for i=1,14 do a[i] = true; a[i] = undef end |
| 218 | check(a, 16, 1) -- no rehash... | 259 | check(a, 16, 1) -- no rehash... |
| @@ -242,6 +283,25 @@ do -- "almost sparse" arrays | |||
| 242 | end | 283 | end |
| 243 | 284 | ||
| 244 | 285 | ||
| 286 | do | ||
| 287 | -- alternate insertions and deletions should give some extra | ||
| 288 | -- space for the hash part. Otherwise, a mix of insertions/deletions | ||
| 289 | -- could cause too many rehashes. (See the other test for "alternate | ||
| 290 | -- insertions and deletions" in this file.) | ||
| 291 | local a = {} | ||
| 292 | for i = 1, 256 do | ||
| 293 | a[i .. ""] = true | ||
| 294 | end | ||
| 295 | check(a, 0, 256) -- hash part is full | ||
| 296 | a["256"] = nil -- delete a key | ||
| 297 | forcerehash(a) | ||
| 298 | -- table has only 255 elements, but it got some extra space; | ||
| 299 | -- otherwise, almost each delete-insert would rehash the table again. | ||
| 300 | assert(countentries(a) == 255) | ||
| 301 | check(a, 0, 512) | ||
| 302 | end | ||
| 303 | |||
| 304 | |||
| 245 | -- size tests for vararg | 305 | -- size tests for vararg |
| 246 | lim = 35 | 306 | lim = 35 |
| 247 | local function foo (n, ...) | 307 | local function foo (n, ...) |
