diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-11-14 11:48:25 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-11-14 11:48:25 -0300 |
commit | 9a91fe1640ddbe5b55e7454541059372b971f400 (patch) | |
tree | 902589105ff77ee8f3000e80f7e0459cd21eb068 /ltable.c | |
parent | 2491b87c10db530eac2f3d81cd39f95875d16cd5 (diff) | |
download | lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.gz lua-9a91fe1640ddbe5b55e7454541059372b971f400.tar.bz2 lua-9a91fe1640ddbe5b55e7454541059372b971f400.zip |
Add extra size when resizing tables with deleted keys
Without this extra space, sequences of insertions/deletions (and
some other uses) can have unpexpected low performances. See the
added tests for an example, and *Mathematical Models to Analyze Lua
Hybrid Tables and Why They Need a Fix* (MartÃnez, Nicaud, Rotondo;
arXiv:2208.13602v2) for detais.
Diffstat (limited to 'ltable.c')
-rw-r--r-- | ltable.c | 26 |
1 files changed, 21 insertions, 5 deletions
@@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
461 | ** Structure to count the keys in a table. | 461 | ** Structure to count the keys in a table. |
462 | ** 'total' is the total number of keys in the table. | 462 | ** 'total' is the total number of keys in the table. |
463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). | 463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). |
464 | ** 'deleted' is true if there are deleted nodes in the hash part. | ||
464 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer | 465 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer |
465 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation | 466 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation |
466 | ** of 'nums'. | 467 | ** of 'nums'. |
@@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
468 | typedef struct { | 469 | typedef struct { |
469 | unsigned total; | 470 | unsigned total; |
470 | unsigned na; | 471 | unsigned na; |
472 | int deleted; | ||
471 | unsigned nums[MAXABITS + 1]; | 473 | unsigned nums[MAXABITS + 1]; |
472 | } Counters; | 474 | } Counters; |
473 | 475 | ||
@@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) { | |||
560 | 562 | ||
561 | 563 | ||
562 | /* | 564 | /* |
563 | ** Count keys in hash part of table 't'. | 565 | ** Count keys in hash part of table 't'. As this only happens during |
566 | ** a rehash, all nodes have been used. A node can have a nil value only | ||
567 | ** if it was deleted after being created. | ||
564 | */ | 568 | */ |
565 | static void numusehash (const Table *t, Counters *ct) { | 569 | static void numusehash (const Table *t, Counters *ct) { |
566 | unsigned i = sizenode(t); | 570 | unsigned i = sizenode(t); |
567 | unsigned total = 0; | 571 | unsigned total = 0; |
568 | while (i--) { | 572 | while (i--) { |
569 | Node *n = &t->node[i]; | 573 | Node *n = &t->node[i]; |
570 | if (!isempty(gval(n))) { | 574 | if (isempty(gval(n))) { |
575 | /* entry was deleted; key cannot be nil */ | ||
576 | lua_assert(isdummy(t) || !keyisnil(n)); | ||
577 | ct->deleted = 1; | ||
578 | } | ||
579 | else { | ||
571 | total++; | 580 | total++; |
572 | if (keyisinteger(n)) | 581 | if (keyisinteger(n)) |
573 | countint(keyival(n), ct); | 582 | countint(keyival(n), ct); |
@@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { | |||
799 | unsigned asize; /* optimal size for array part */ | 808 | unsigned asize; /* optimal size for array part */ |
800 | Counters ct; | 809 | Counters ct; |
801 | unsigned i; | 810 | unsigned i; |
811 | unsigned nsize; /* size for the hash part */ | ||
802 | setlimittosize(t); | 812 | setlimittosize(t); |
803 | /* reset counts */ | 813 | /* reset counts */ |
804 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; | 814 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; |
805 | ct.na = 0; | 815 | ct.na = 0; |
816 | ct.deleted = 0; | ||
806 | ct.total = 1; /* count extra key */ | 817 | ct.total = 1; /* count extra key */ |
807 | if (ttisinteger(ek)) | 818 | if (ttisinteger(ek)) |
808 | countint(ivalue(ek), &ct); /* extra key may go to array */ | 819 | countint(ivalue(ek), &ct); /* extra key may go to array */ |
@@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) { | |||
815 | numusearray(t, &ct); /* count keys in array part */ | 826 | numusearray(t, &ct); /* count keys in array part */ |
816 | asize = computesizes(&ct); /* compute new size for array part */ | 827 | asize = computesizes(&ct); /* compute new size for array part */ |
817 | } | 828 | } |
829 | /* all keys not in the array part go to the hash part */ | ||
830 | nsize = ct.total - ct.na; | ||
831 | if (ct.deleted) { /* table has deleted entries? */ | ||
832 | /* insertion-deletion-insertion: give hash some extra size to | ||
833 | avoid constant resizings */ | ||
834 | nsize += nsize >> 2; | ||
835 | } | ||
818 | /* resize the table to new computed sizes */ | 836 | /* resize the table to new computed sizes */ |
819 | luaH_resize(L, t, asize, ct.total - ct.na); | 837 | luaH_resize(L, t, asize, nsize); |
820 | } | 838 | } |
821 | 839 | ||
822 | |||
823 | |||
824 | /* | 840 | /* |
825 | ** }============================================================= | 841 | ** }============================================================= |
826 | */ | 842 | */ |