diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-10-28 14:15:21 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2024-10-28 14:15:21 -0300 |
| commit | 0de81911525bc62bc2a8fc52a368102afed7022b (patch) | |
| tree | be93ddd67efeb8a283da6479fd928533bae4148f | |
| parent | 853311e5b1c1d9fe9d006e3a4f322e9916764933 (diff) | |
| download | lua-0de81911525bc62bc2a8fc52a368102afed7022b.tar.gz lua-0de81911525bc62bc2a8fc52a368102afed7022b.tar.bz2 lua-0de81911525bc62bc2a8fc52a368102afed7022b.zip | |
New structure to count keys in a table for rehashing
| -rw-r--r-- | ltable.c | 115 |
1 files changed, 65 insertions, 50 deletions
| @@ -456,15 +456,29 @@ static int keyinarray (Table *t, lua_Integer key) { | |||
| 456 | ** ============================================================== | 456 | ** ============================================================== |
| 457 | */ | 457 | */ |
| 458 | 458 | ||
| 459 | |||
| 459 | /* | 460 | /* |
| 460 | ** Compute the optimal size for the array part of table 't'. 'nums' is a | 461 | ** Structure to count the keys in a table. |
| 461 | ** "count array" where 'nums[i]' is the number of integers in the table | 462 | ** 'total' is the total number of keys in the table. |
| 462 | ** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of | 463 | ** 'na' is the number of *array indices* in the table (see 'arrayindex'). |
| 463 | ** integer keys in the table and leaves with the number of keys that | 464 | ** 'nums' is a "count array" where 'nums[i]' is the number of integer |
| 464 | ** will go to the array part; return the optimal size. (The condition | 465 | ** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation |
| 465 | ** 'twotoi > 0' in the for loop stops the loop if 'twotoi' overflows.) | 466 | ** of 'nums'. |
| 466 | */ | 467 | */ |
| 467 | static unsigned computesizes (unsigned nums[], unsigned *pna) { | 468 | typedef struct { |
| 469 | unsigned total; | ||
| 470 | unsigned na; | ||
| 471 | unsigned nums[MAXABITS + 1]; | ||
| 472 | } Counters; | ||
| 473 | |||
| 474 | /* | ||
| 475 | ** Compute the optimal size for the array part of table 't'. | ||
| 476 | ** 'ct->na' enters with the total number of array indices in the table | ||
| 477 | ** and leaves with the number of keys that will go to the array part; | ||
| 478 | ** return the optimal size. (The condition 'twotoi > 0' in the for loop | ||
| 479 | ** stops the loop if 'twotoi' overflows.) | ||
| 480 | */ | ||
| 481 | static unsigned computesizes (Counters *ct) { | ||
| 468 | int i; | 482 | int i; |
| 469 | unsigned int twotoi; /* 2^i (candidate for optimal size) */ | 483 | unsigned int twotoi; /* 2^i (candidate for optimal size) */ |
| 470 | unsigned int a = 0; /* number of elements smaller than 2^i */ | 484 | unsigned int a = 0; /* number of elements smaller than 2^i */ |
| @@ -472,28 +486,26 @@ static unsigned computesizes (unsigned nums[], unsigned *pna) { | |||
| 472 | unsigned int optimal = 0; /* optimal size for array part */ | 486 | unsigned int optimal = 0; /* optimal size for array part */ |
| 473 | /* loop while keys can fill more than half of total size */ | 487 | /* loop while keys can fill more than half of total size */ |
| 474 | for (i = 0, twotoi = 1; | 488 | for (i = 0, twotoi = 1; |
| 475 | twotoi > 0 && *pna > twotoi / 2; | 489 | twotoi > 0 && ct->na > twotoi / 2; |
| 476 | i++, twotoi *= 2) { | 490 | i++, twotoi *= 2) { |
| 477 | a += nums[i]; | 491 | a += ct->nums[i]; |
| 478 | if (a > twotoi/2) { /* more than half elements present? */ | 492 | if (a > twotoi/2) { /* more than half elements present? */ |
| 479 | optimal = twotoi; /* optimal size (till now) */ | 493 | optimal = twotoi; /* optimal size (till now) */ |
| 480 | na = a; /* all elements up to 'optimal' will go to array part */ | 494 | na = a; /* all elements up to 'optimal' will go to array part */ |
| 481 | } | 495 | } |
| 482 | } | 496 | } |
| 483 | lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal); | 497 | lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal); |
| 484 | *pna = na; | 498 | ct->na = na; |
| 485 | return optimal; | 499 | return optimal; |
| 486 | } | 500 | } |
| 487 | 501 | ||
| 488 | 502 | ||
| 489 | static unsigned countint (lua_Integer key, unsigned int *nums) { | 503 | static void countint (lua_Integer key, Counters *ct) { |
| 490 | unsigned int k = arrayindex(key); | 504 | unsigned int k = arrayindex(key); |
| 491 | if (k != 0) { /* is 'key' an appropriate array index? */ | 505 | if (k != 0) { /* is 'key' an array index? */ |
| 492 | nums[luaO_ceillog2(k)]++; /* count as such */ | 506 | ct->nums[luaO_ceillog2(k)]++; /* count as such */ |
| 493 | return 1; | 507 | ct->na++; |
| 494 | } | 508 | } |
| 495 | else | ||
| 496 | return 0; | ||
| 497 | } | 509 | } |
| 498 | 510 | ||
| 499 | 511 | ||
| @@ -504,11 +516,9 @@ l_sinline int arraykeyisempty (const Table *t, lua_Unsigned key) { | |||
| 504 | 516 | ||
| 505 | 517 | ||
| 506 | /* | 518 | /* |
| 507 | ** Count keys in array part of table 't': Fill 'nums[i]' with | 519 | ** Count keys in array part of table 't'. |
| 508 | ** number of keys that will go into corresponding slice and return | ||
| 509 | ** total number of non-nil keys. | ||
| 510 | */ | 520 | */ |
| 511 | static unsigned numusearray (const Table *t, unsigned *nums) { | 521 | static void numusearray (const Table *t, Counters *ct) { |
| 512 | int lg; | 522 | int lg; |
| 513 | unsigned int ttlg; /* 2^lg */ | 523 | unsigned int ttlg; /* 2^lg */ |
| 514 | unsigned int ause = 0; /* summation of 'nums' */ | 524 | unsigned int ause = 0; /* summation of 'nums' */ |
| @@ -528,27 +538,29 @@ static unsigned numusearray (const Table *t, unsigned *nums) { | |||
| 528 | if (!arraykeyisempty(t, i)) | 538 | if (!arraykeyisempty(t, i)) |
| 529 | lc++; | 539 | lc++; |
| 530 | } | 540 | } |
| 531 | nums[lg] += lc; | 541 | ct->nums[lg] += lc; |
| 532 | ause += lc; | 542 | ause += lc; |
| 533 | } | 543 | } |
| 534 | return ause; | 544 | ct->total += ause; |
| 545 | ct->na += ause; | ||
| 535 | } | 546 | } |
| 536 | 547 | ||
| 537 | 548 | ||
| 538 | static unsigned numusehash (const Table *t, unsigned *nums, unsigned *pna) { | 549 | /* |
| 539 | unsigned totaluse = 0; /* total number of elements */ | 550 | ** Count keys in hash part of table 't'. |
| 540 | unsigned ause = 0; /* elements added to 'nums' (can go to array part) */ | 551 | */ |
| 552 | static void numusehash (const Table *t, Counters *ct) { | ||
| 541 | unsigned i = sizenode(t); | 553 | unsigned i = sizenode(t); |
| 554 | unsigned total = 0; | ||
| 542 | while (i--) { | 555 | while (i--) { |
| 543 | Node *n = &t->node[i]; | 556 | Node *n = &t->node[i]; |
| 544 | if (!isempty(gval(n))) { | 557 | if (!isempty(gval(n))) { |
| 558 | total++; | ||
| 545 | if (keyisinteger(n)) | 559 | if (keyisinteger(n)) |
| 546 | ause += countint(keyival(n), nums); | 560 | countint(keyival(n), ct); |
| 547 | totaluse++; | ||
| 548 | } | 561 | } |
| 549 | } | 562 | } |
| 550 | *pna += ause; | 563 | ct->total += total; |
| 551 | return totaluse; | ||
| 552 | } | 564 | } |
| 553 | 565 | ||
| 554 | 566 | ||
| @@ -566,12 +578,11 @@ static size_t concretesize (unsigned int size) { | |||
| 566 | ** do nothing. Else, if new size is zero, free the old array. (It must | 578 | ** do nothing. Else, if new size is zero, free the old array. (It must |
| 567 | ** be present, as the sizes are different.) Otherwise, allocate a new | 579 | ** be present, as the sizes are different.) Otherwise, allocate a new |
| 568 | ** array, move the common elements to new proper position, and then | 580 | ** array, move the common elements to new proper position, and then |
| 569 | ** frees old array. | 581 | ** frees the old array. |
| 570 | ** When array grows, we could reallocate it, but we still would need | 582 | ** We could reallocate the array, but we still would need to move the |
| 571 | ** to move the elements to their new position, so the copy implicit | 583 | ** elements to their new position, so the copy implicit in realloc is a |
| 572 | ** in realloc is a waste. When array shrinks, it always erases some | 584 | ** waste. Moreover, most allocators will move the array anyway when the |
| 573 | ** elements that should still be in the array, so we must reallocate in | 585 | ** new size is double the old one (the most common case). |
| 574 | ** two steps anyway. It is simpler to always reallocate in two steps. | ||
| 575 | */ | 586 | */ |
| 576 | static Value *resizearray (lua_State *L , Table *t, | 587 | static Value *resizearray (lua_State *L , Table *t, |
| 577 | unsigned oldasize, | 588 | unsigned oldasize, |
| @@ -590,10 +601,10 @@ static Value *resizearray (lua_State *L , Table *t, | |||
| 590 | if (np == NULL) /* allocation error? */ | 601 | if (np == NULL) /* allocation error? */ |
| 591 | return NULL; | 602 | return NULL; |
| 592 | if (oldasize > 0) { | 603 | if (oldasize > 0) { |
| 604 | /* move common elements to new position */ | ||
| 593 | Value *op = t->array - oldasize; /* real original array */ | 605 | Value *op = t->array - oldasize; /* real original array */ |
| 594 | unsigned tomove = (oldasize < newasize) ? oldasize : newasize; | 606 | unsigned tomove = (oldasize < newasize) ? oldasize : newasize; |
| 595 | lua_assert(tomove > 0); | 607 | lua_assert(tomove > 0); |
| 596 | /* move common elements to new position */ | ||
| 597 | memcpy(np + newasize - tomove, | 608 | memcpy(np + newasize - tomove, |
| 598 | op + oldasize - tomove, | 609 | op + oldasize - tomove, |
| 599 | concretesize(tomove)); | 610 | concretesize(tomove)); |
| @@ -723,6 +734,9 @@ static void clearNewSlice (Table *t, unsigned oldasize, unsigned newasize) { | |||
| 723 | ** into the table, initializes the new part of the array (if any) with | 734 | ** into the table, initializes the new part of the array (if any) with |
| 724 | ** nils and reinserts the elements of the old hash back into the new | 735 | ** nils and reinserts the elements of the old hash back into the new |
| 725 | ** parts of the table. | 736 | ** parts of the table. |
| 737 | ** Note that if the new size for the arry part ('newasize') is equal to | ||
| 738 | ** the old one ('oldasize'), this function will do nothing with that | ||
| 739 | ** part. | ||
| 726 | */ | 740 | */ |
| 727 | void luaH_resize (lua_State *L, Table *t, unsigned newasize, | 741 | void luaH_resize (lua_State *L, Table *t, unsigned newasize, |
| 728 | unsigned nhsize) { | 742 | unsigned nhsize) { |
| @@ -762,33 +776,34 @@ void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) { | |||
| 762 | luaH_resize(L, t, nasize, nsize); | 776 | luaH_resize(L, t, nasize, nsize); |
| 763 | } | 777 | } |
| 764 | 778 | ||
| 779 | |||
| 765 | /* | 780 | /* |
| 766 | ** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i | 781 | ** Rehash a table. First, count its keys. If there are array indices |
| 782 | ** outside the array part, compute the new best size for that part. | ||
| 783 | ** Then, resize the table. | ||
| 767 | */ | 784 | */ |
| 768 | static void rehash (lua_State *L, Table *t, const TValue *ek) { | 785 | static void rehash (lua_State *L, Table *t, const TValue *ek) { |
| 769 | unsigned asize; /* optimal size for array part */ | 786 | unsigned asize; /* optimal size for array part */ |
| 770 | unsigned na = 0; /* number of keys candidate for the array part */ | 787 | Counters ct; |
| 771 | unsigned nums[MAXABITS + 1]; | ||
| 772 | unsigned i; | 788 | unsigned i; |
| 773 | unsigned totaluse; /* total number of keys */ | ||
| 774 | for (i = 0; i <= MAXABITS; i++) nums[i] = 0; /* reset counts */ | ||
| 775 | setlimittosize(t); | 789 | setlimittosize(t); |
| 776 | totaluse = 1; /* count extra key */ | 790 | /* reset counts */ |
| 791 | for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0; | ||
| 792 | ct.na = 0; | ||
| 793 | ct.total = 1; /* count extra key */ | ||
| 777 | if (ttisinteger(ek)) | 794 | if (ttisinteger(ek)) |
| 778 | na += countint(ivalue(ek), nums); /* extra key may go to array */ | 795 | countint(ivalue(ek), &ct); /* extra key may go to array */ |
| 779 | totaluse += numusehash(t, nums, &na); /* count keys in hash part */ | 796 | numusehash(t, &ct); /* count keys in hash part */ |
| 780 | if (na == 0) { | 797 | if (ct.na == 0) { |
| 781 | /* no new keys to enter array part; keep it with the same size */ | 798 | /* no new keys to enter array part; keep it with the same size */ |
| 782 | asize = luaH_realasize(t); | 799 | asize = luaH_realasize(t); |
| 783 | } | 800 | } |
| 784 | else { /* compute best size for array part */ | 801 | else { /* compute best size for array part */ |
| 785 | unsigned n = numusearray(t, nums); /* count keys in array part */ | 802 | numusearray(t, &ct); /* count keys in array part */ |
| 786 | totaluse += n; /* all keys in array part are keys */ | 803 | asize = computesizes(&ct); /* compute new size for array part */ |
| 787 | na += n; /* all keys in array part are candidates for new array part */ | ||
| 788 | asize = computesizes(nums, &na); /* compute new size for array part */ | ||
| 789 | } | 804 | } |
| 790 | /* resize the table to new computed sizes */ | 805 | /* resize the table to new computed sizes */ |
| 791 | luaH_resize(L, t, asize, totaluse - na); | 806 | luaH_resize(L, t, asize, ct.total - ct.na); |
| 792 | } | 807 | } |
| 793 | 808 | ||
| 794 | 809 | ||
