aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2024-10-28 14:15:21 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2024-10-28 14:15:21 -0300
commit0de81911525bc62bc2a8fc52a368102afed7022b (patch)
treebe93ddd67efeb8a283da6479fd928533bae4148f
parent853311e5b1c1d9fe9d006e3a4f322e9916764933 (diff)
downloadlua-0de81911525bc62bc2a8fc52a368102afed7022b.tar.gz
lua-0de81911525bc62bc2a8fc52a368102afed7022b.tar.bz2
lua-0de81911525bc62bc2a8fc52a368102afed7022b.zip
New structure to count keys in a table for rehashing
-rw-r--r--ltable.c115
1 files changed, 65 insertions, 50 deletions
diff --git a/ltable.c b/ltable.c
index 86ef1092..3451445c 100644
--- a/ltable.c
+++ b/ltable.c
@@ -456,15 +456,29 @@ static int keyinarray (Table *t, lua_Integer key) {
456** ============================================================== 456** ==============================================================
457*/ 457*/
458 458
459
459/* 460/*
460** Compute the optimal size for the array part of table 't'. 'nums' is a 461** Structure to count the keys in a table.
461** "count array" where 'nums[i]' is the number of integers in the table 462** 'total' is the total number of keys in the table.
462** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of 463** 'na' is the number of *array indices* in the table (see 'arrayindex').
463** integer keys in the table and leaves with the number of keys that 464** 'nums' is a "count array" where 'nums[i]' is the number of integer
464** will go to the array part; return the optimal size. (The condition 465** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
465** 'twotoi > 0' in the for loop stops the loop if 'twotoi' overflows.) 466** of 'nums'.
466*/ 467*/
467static unsigned computesizes (unsigned nums[], unsigned *pna) { 468typedef struct {
469 unsigned total;
470 unsigned na;
471 unsigned nums[MAXABITS + 1];
472} Counters;
473
474/*
475** Compute the optimal size for the array part of table 't'.
476** 'ct->na' enters with the total number of array indices in the table
477** and leaves with the number of keys that will go to the array part;
478** return the optimal size. (The condition 'twotoi > 0' in the for loop
479** stops the loop if 'twotoi' overflows.)
480*/
481static unsigned computesizes (Counters *ct) {
468 int i; 482 int i;
469 unsigned int twotoi; /* 2^i (candidate for optimal size) */ 483 unsigned int twotoi; /* 2^i (candidate for optimal size) */
470 unsigned int a = 0; /* number of elements smaller than 2^i */ 484 unsigned int a = 0; /* number of elements smaller than 2^i */
@@ -472,28 +486,26 @@ static unsigned computesizes (unsigned nums[], unsigned *pna) {
472 unsigned int optimal = 0; /* optimal size for array part */ 486 unsigned int optimal = 0; /* optimal size for array part */
473 /* loop while keys can fill more than half of total size */ 487 /* loop while keys can fill more than half of total size */
474 for (i = 0, twotoi = 1; 488 for (i = 0, twotoi = 1;
475 twotoi > 0 && *pna > twotoi / 2; 489 twotoi > 0 && ct->na > twotoi / 2;
476 i++, twotoi *= 2) { 490 i++, twotoi *= 2) {
477 a += nums[i]; 491 a += ct->nums[i];
478 if (a > twotoi/2) { /* more than half elements present? */ 492 if (a > twotoi/2) { /* more than half elements present? */
479 optimal = twotoi; /* optimal size (till now) */ 493 optimal = twotoi; /* optimal size (till now) */
480 na = a; /* all elements up to 'optimal' will go to array part */ 494 na = a; /* all elements up to 'optimal' will go to array part */
481 } 495 }
482 } 496 }
483 lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal); 497 lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
484 *pna = na; 498 ct->na = na;
485 return optimal; 499 return optimal;
486} 500}
487 501
488 502
489static unsigned countint (lua_Integer key, unsigned int *nums) { 503static void countint (lua_Integer key, Counters *ct) {
490 unsigned int k = arrayindex(key); 504 unsigned int k = arrayindex(key);
491 if (k != 0) { /* is 'key' an appropriate array index? */ 505 if (k != 0) { /* is 'key' an array index? */
492 nums[luaO_ceillog2(k)]++; /* count as such */ 506 ct->nums[luaO_ceillog2(k)]++; /* count as such */
493 return 1; 507 ct->na++;
494 } 508 }
495 else
496 return 0;
497} 509}
498 510
499 511
@@ -504,11 +516,9 @@ l_sinline int arraykeyisempty (const Table *t, lua_Unsigned key) {
504 516
505 517
506/* 518/*
507** Count keys in array part of table 't': Fill 'nums[i]' with 519** Count keys in array part of table 't'.
508** number of keys that will go into corresponding slice and return
509** total number of non-nil keys.
510*/ 520*/
511static unsigned numusearray (const Table *t, unsigned *nums) { 521static void numusearray (const Table *t, Counters *ct) {
512 int lg; 522 int lg;
513 unsigned int ttlg; /* 2^lg */ 523 unsigned int ttlg; /* 2^lg */
514 unsigned int ause = 0; /* summation of 'nums' */ 524 unsigned int ause = 0; /* summation of 'nums' */
@@ -528,27 +538,29 @@ static unsigned numusearray (const Table *t, unsigned *nums) {
528 if (!arraykeyisempty(t, i)) 538 if (!arraykeyisempty(t, i))
529 lc++; 539 lc++;
530 } 540 }
531 nums[lg] += lc; 541 ct->nums[lg] += lc;
532 ause += lc; 542 ause += lc;
533 } 543 }
534 return ause; 544 ct->total += ause;
545 ct->na += ause;
535} 546}
536 547
537 548
538static unsigned numusehash (const Table *t, unsigned *nums, unsigned *pna) { 549/*
539 unsigned totaluse = 0; /* total number of elements */ 550** Count keys in hash part of table 't'.
540 unsigned ause = 0; /* elements added to 'nums' (can go to array part) */ 551*/
552static void numusehash (const Table *t, Counters *ct) {
541 unsigned i = sizenode(t); 553 unsigned i = sizenode(t);
554 unsigned total = 0;
542 while (i--) { 555 while (i--) {
543 Node *n = &t->node[i]; 556 Node *n = &t->node[i];
544 if (!isempty(gval(n))) { 557 if (!isempty(gval(n))) {
558 total++;
545 if (keyisinteger(n)) 559 if (keyisinteger(n))
546 ause += countint(keyival(n), nums); 560 countint(keyival(n), ct);
547 totaluse++;
548 } 561 }
549 } 562 }
550 *pna += ause; 563 ct->total += total;
551 return totaluse;
552} 564}
553 565
554 566
@@ -566,12 +578,11 @@ static size_t concretesize (unsigned int size) {
566** do nothing. Else, if new size is zero, free the old array. (It must 578** do nothing. Else, if new size is zero, free the old array. (It must
567** be present, as the sizes are different.) Otherwise, allocate a new 579** be present, as the sizes are different.) Otherwise, allocate a new
568** array, move the common elements to new proper position, and then 580** array, move the common elements to new proper position, and then
569** frees old array. 581** frees the old array.
570** When array grows, we could reallocate it, but we still would need 582** We could reallocate the array, but we still would need to move the
571** to move the elements to their new position, so the copy implicit 583** elements to their new position, so the copy implicit in realloc is a
572** in realloc is a waste. When array shrinks, it always erases some 584** waste. Moreover, most allocators will move the array anyway when the
573** elements that should still be in the array, so we must reallocate in 585** new size is double the old one (the most common case).
574** two steps anyway. It is simpler to always reallocate in two steps.
575*/ 586*/
576static Value *resizearray (lua_State *L , Table *t, 587static Value *resizearray (lua_State *L , Table *t,
577 unsigned oldasize, 588 unsigned oldasize,
@@ -590,10 +601,10 @@ static Value *resizearray (lua_State *L , Table *t,
590 if (np == NULL) /* allocation error? */ 601 if (np == NULL) /* allocation error? */
591 return NULL; 602 return NULL;
592 if (oldasize > 0) { 603 if (oldasize > 0) {
604 /* move common elements to new position */
593 Value *op = t->array - oldasize; /* real original array */ 605 Value *op = t->array - oldasize; /* real original array */
594 unsigned tomove = (oldasize < newasize) ? oldasize : newasize; 606 unsigned tomove = (oldasize < newasize) ? oldasize : newasize;
595 lua_assert(tomove > 0); 607 lua_assert(tomove > 0);
596 /* move common elements to new position */
597 memcpy(np + newasize - tomove, 608 memcpy(np + newasize - tomove,
598 op + oldasize - tomove, 609 op + oldasize - tomove,
599 concretesize(tomove)); 610 concretesize(tomove));
@@ -723,6 +734,9 @@ static void clearNewSlice (Table *t, unsigned oldasize, unsigned newasize) {
723** into the table, initializes the new part of the array (if any) with 734** into the table, initializes the new part of the array (if any) with
724** nils and reinserts the elements of the old hash back into the new 735** nils and reinserts the elements of the old hash back into the new
725** parts of the table. 736** parts of the table.
737** Note that if the new size for the arry part ('newasize') is equal to
738** the old one ('oldasize'), this function will do nothing with that
739** part.
726*/ 740*/
727void luaH_resize (lua_State *L, Table *t, unsigned newasize, 741void luaH_resize (lua_State *L, Table *t, unsigned newasize,
728 unsigned nhsize) { 742 unsigned nhsize) {
@@ -762,33 +776,34 @@ void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) {
762 luaH_resize(L, t, nasize, nsize); 776 luaH_resize(L, t, nasize, nsize);
763} 777}
764 778
779
765/* 780/*
766** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i 781** Rehash a table. First, count its keys. If there are array indices
782** outside the array part, compute the new best size for that part.
783** Then, resize the table.
767*/ 784*/
768static void rehash (lua_State *L, Table *t, const TValue *ek) { 785static void rehash (lua_State *L, Table *t, const TValue *ek) {
769 unsigned asize; /* optimal size for array part */ 786 unsigned asize; /* optimal size for array part */
770 unsigned na = 0; /* number of keys candidate for the array part */ 787 Counters ct;
771 unsigned nums[MAXABITS + 1];
772 unsigned i; 788 unsigned i;
773 unsigned totaluse; /* total number of keys */
774 for (i = 0; i <= MAXABITS; i++) nums[i] = 0; /* reset counts */
775 setlimittosize(t); 789 setlimittosize(t);
776 totaluse = 1; /* count extra key */ 790 /* reset counts */
791 for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
792 ct.na = 0;
793 ct.total = 1; /* count extra key */
777 if (ttisinteger(ek)) 794 if (ttisinteger(ek))
778 na += countint(ivalue(ek), nums); /* extra key may go to array */ 795 countint(ivalue(ek), &ct); /* extra key may go to array */
779 totaluse += numusehash(t, nums, &na); /* count keys in hash part */ 796 numusehash(t, &ct); /* count keys in hash part */
780 if (na == 0) { 797 if (ct.na == 0) {
781 /* no new keys to enter array part; keep it with the same size */ 798 /* no new keys to enter array part; keep it with the same size */
782 asize = luaH_realasize(t); 799 asize = luaH_realasize(t);
783 } 800 }
784 else { /* compute best size for array part */ 801 else { /* compute best size for array part */
785 unsigned n = numusearray(t, nums); /* count keys in array part */ 802 numusearray(t, &ct); /* count keys in array part */
786 totaluse += n; /* all keys in array part are keys */ 803 asize = computesizes(&ct); /* compute new size for array part */
787 na += n; /* all keys in array part are candidates for new array part */
788 asize = computesizes(nums, &na); /* compute new size for array part */
789 } 804 }
790 /* resize the table to new computed sizes */ 805 /* resize the table to new computed sizes */
791 luaH_resize(L, t, asize, totaluse - na); 806 luaH_resize(L, t, asize, ct.total - ct.na);
792} 807}
793 808
794 809