aboutsummaryrefslogtreecommitdiff
path: root/blocksort.c
diff options
context:
space:
mode:
Diffstat (limited to 'blocksort.c')
-rw-r--r--blocksort.c335
1 files changed, 202 insertions, 133 deletions
diff --git a/blocksort.c b/blocksort.c
index 85a02de..ec42672 100644
--- a/blocksort.c
+++ b/blocksort.c
@@ -8,7 +8,7 @@
8 This file is a part of bzip2 and/or libbzip2, a program and 8 This file is a part of bzip2 and/or libbzip2, a program and
9 library for lossless, block-sorting data compression. 9 library for lossless, block-sorting data compression.
10 10
11 Copyright (C) 1996-1999 Julian R Seward. All rights reserved. 11 Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
12 12
13 Redistribution and use in source and binary forms, with or without 13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions 14 modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
43 43
44 Julian Seward, Cambridge, UK. 44 Julian Seward, Cambridge, UK.
45 jseward@acm.org 45 jseward@acm.org
46 bzip2/libbzip2 version 0.9.5 of 24 May 1999 46 bzip2/libbzip2 version 1.0 of 21 March 2000
47 47
48 This program is based on (at least) the work of: 48 This program is based on (at least) the work of:
49 Mike Burrows 49 Mike Burrows
@@ -56,6 +56,13 @@
56 Jon L. Bentley 56 Jon L. Bentley
57 57
58 For more information on these sources, see the manual. 58 For more information on these sources, see the manual.
59
60 To get some idea how the block sorting algorithms in this file
61 work, read my paper
62 On the Performance of BWT Sorting Algorithms
63 in Proceedings of the IEEE Data Compression Conference 2000,
64 Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
65 file implements the algorithm called cache in the paper.
59--*/ 66--*/
60 67
61 68
@@ -232,11 +239,11 @@ void fallbackQSort3 ( UInt32* fmap,
232/* Pre: 239/* Pre:
233 nblock > 0 240 nblock > 0
234 eclass exists for [0 .. nblock-1] 241 eclass exists for [0 .. nblock-1]
235 ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block 242 ((UChar*)eclass) [0 .. nblock-1] holds block
236 ptr exists for [0 .. nblock-1] 243 ptr exists for [0 .. nblock-1]
237 244
238 Post: 245 Post:
239 ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block 246 ((UChar*)eclass) [0 .. nblock-1] holds block
240 All other areas of eclass destroyed 247 All other areas of eclass destroyed
241 fmap [0 .. nblock-1] holds sorted order 248 fmap [0 .. nblock-1] holds sorted order
242 bhtab [ 0 .. 2+(nblock/32) ] destroyed 249 bhtab [ 0 .. 2+(nblock/32) ] destroyed
@@ -260,7 +267,7 @@ void fallbackSort ( UInt32* fmap,
260 Int32 H, i, j, k, l, r, cc, cc1; 267 Int32 H, i, j, k, l, r, cc, cc1;
261 Int32 nNotDone; 268 Int32 nNotDone;
262 Int32 nBhtab; 269 Int32 nBhtab;
263 UInt16* eclass16 = (UInt16*)eclass; 270 UChar* eclass8 = (UChar*)eclass;
264 271
265 /*-- 272 /*--
266 Initial 1-char radix sort to generate 273 Initial 1-char radix sort to generate
@@ -269,12 +276,12 @@ void fallbackSort ( UInt32* fmap,
269 if (verb >= 4) 276 if (verb >= 4)
270 VPrintf0 ( " bucket sorting ...\n" ); 277 VPrintf0 ( " bucket sorting ...\n" );
271 for (i = 0; i < 257; i++) ftab[i] = 0; 278 for (i = 0; i < 257; i++) ftab[i] = 0;
272 for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++; 279 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
273 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 280 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
274 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 281 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
275 282
276 for (i = 0; i < nblock; i++) { 283 for (i = 0; i < nblock; i++) {
277 j = eclass16[i] >> 8; 284 j = eclass8[i];
278 k = ftab[j] - 1; 285 k = ftab[j] - 1;
279 ftab[j] = k; 286 ftab[j] = k;
280 fmap[k] = i; 287 fmap[k] = i;
@@ -354,7 +361,7 @@ void fallbackSort ( UInt32* fmap,
354 361
355 /*-- 362 /*--
356 Reconstruct the original block in 363 Reconstruct the original block in
357 eclass16 [0 .. nblock-1] [15:8], since the 364 eclass8 [0 .. nblock-1], since the
358 previous phase destroyed it. 365 previous phase destroyed it.
359 --*/ 366 --*/
360 if (verb >= 4) 367 if (verb >= 4)
@@ -363,7 +370,7 @@ void fallbackSort ( UInt32* fmap,
363 for (i = 0; i < nblock; i++) { 370 for (i = 0; i < nblock; i++) {
364 while (ftabCopy[j] == 0) j++; 371 while (ftabCopy[j] == 0) j++;
365 ftabCopy[j]--; 372 ftabCopy[j]--;
366 eclass16[fmap[i]] = j << 8; 373 eclass8[fmap[i]] = (UChar)j;
367 } 374 }
368 AssertH ( j < 256, 1005 ); 375 AssertH ( j < 256, 1005 );
369} 376}
@@ -386,67 +393,116 @@ static
386__inline__ 393__inline__
387Bool mainGtU ( UInt32 i1, 394Bool mainGtU ( UInt32 i1,
388 UInt32 i2, 395 UInt32 i2,
389 UInt16* block, 396 UChar* block,
390 UInt16* quadrant, 397 UInt16* quadrant,
391 UInt32 nblock, 398 UInt32 nblock,
392 Int32* budget ) 399 Int32* budget )
393{ 400{
394 Int32 k; 401 Int32 k;
402 UChar c1, c2;
395 UInt16 s1, s2; 403 UInt16 s1, s2;
396 404
397 AssertD ( i1 != i2, "mainGtU" ); 405 AssertD ( i1 != i2, "mainGtU" );
398 406 /* 1 */
399 s1 = block[i1]; s2 = block[i2]; 407 c1 = block[i1]; c2 = block[i2];
400 if (s1 != s2) return (s1 > s2); 408 if (c1 != c2) return (c1 > c2);
401 i1 += 2; i2 += 2; 409 i1++; i2++;
402 410 /* 2 */
403 s1 = block[i1]; s2 = block[i2]; 411 c1 = block[i1]; c2 = block[i2];
404 if (s1 != s2) return (s1 > s2); 412 if (c1 != c2) return (c1 > c2);
405 i1 += 2; i2 += 2; 413 i1++; i2++;
406 414 /* 3 */
407 s1 = block[i1]; s2 = block[i2]; 415 c1 = block[i1]; c2 = block[i2];
408 if (s1 != s2) return (s1 > s2); 416 if (c1 != c2) return (c1 > c2);
409 i1 += 2; i2 += 2; 417 i1++; i2++;
410 418 /* 4 */
411 s1 = block[i1]; s2 = block[i2]; 419 c1 = block[i1]; c2 = block[i2];
412 if (s1 != s2) return (s1 > s2); 420 if (c1 != c2) return (c1 > c2);
413 i1 += 2; i2 += 2; 421 i1++; i2++;
414 422 /* 5 */
415 s1 = block[i1]; s2 = block[i2]; 423 c1 = block[i1]; c2 = block[i2];
416 if (s1 != s2) return (s1 > s2); 424 if (c1 != c2) return (c1 > c2);
417 i1 += 2; i2 += 2; 425 i1++; i2++;
418 426 /* 6 */
419 s1 = block[i1]; s2 = block[i2]; 427 c1 = block[i1]; c2 = block[i2];
420 if (s1 != s2) return (s1 > s2); 428 if (c1 != c2) return (c1 > c2);
421 i1 += 2; i2 += 2; 429 i1++; i2++;
430 /* 7 */
431 c1 = block[i1]; c2 = block[i2];
432 if (c1 != c2) return (c1 > c2);
433 i1++; i2++;
434 /* 8 */
435 c1 = block[i1]; c2 = block[i2];
436 if (c1 != c2) return (c1 > c2);
437 i1++; i2++;
438 /* 9 */
439 c1 = block[i1]; c2 = block[i2];
440 if (c1 != c2) return (c1 > c2);
441 i1++; i2++;
442 /* 10 */
443 c1 = block[i1]; c2 = block[i2];
444 if (c1 != c2) return (c1 > c2);
445 i1++; i2++;
446 /* 11 */
447 c1 = block[i1]; c2 = block[i2];
448 if (c1 != c2) return (c1 > c2);
449 i1++; i2++;
450 /* 12 */
451 c1 = block[i1]; c2 = block[i2];
452 if (c1 != c2) return (c1 > c2);
453 i1++; i2++;
422 454
423 k = nblock + 8; 455 k = nblock + 8;
424 456
425 do { 457 do {
426 458 /* 1 */
427 s1 = block[i1]; s2 = block[i2]; 459 c1 = block[i1]; c2 = block[i2];
460 if (c1 != c2) return (c1 > c2);
461 s1 = quadrant[i1]; s2 = quadrant[i2];
428 if (s1 != s2) return (s1 > s2); 462 if (s1 != s2) return (s1 > s2);
463 i1++; i2++;
464 /* 2 */
465 c1 = block[i1]; c2 = block[i2];
466 if (c1 != c2) return (c1 > c2);
429 s1 = quadrant[i1]; s2 = quadrant[i2]; 467 s1 = quadrant[i1]; s2 = quadrant[i2];
430 if (s1 != s2) return (s1 > s2); 468 if (s1 != s2) return (s1 > s2);
431 i1 += 2; i2 += 2; 469 i1++; i2++;
432 470 /* 3 */
433 s1 = block[i1]; s2 = block[i2]; 471 c1 = block[i1]; c2 = block[i2];
472 if (c1 != c2) return (c1 > c2);
473 s1 = quadrant[i1]; s2 = quadrant[i2];
434 if (s1 != s2) return (s1 > s2); 474 if (s1 != s2) return (s1 > s2);
475 i1++; i2++;
476 /* 4 */
477 c1 = block[i1]; c2 = block[i2];
478 if (c1 != c2) return (c1 > c2);
435 s1 = quadrant[i1]; s2 = quadrant[i2]; 479 s1 = quadrant[i1]; s2 = quadrant[i2];
436 if (s1 != s2) return (s1 > s2); 480 if (s1 != s2) return (s1 > s2);
437 i1 += 2; i2 += 2; 481 i1++; i2++;
438 482 /* 5 */
439 s1 = block[i1]; s2 = block[i2]; 483 c1 = block[i1]; c2 = block[i2];
484 if (c1 != c2) return (c1 > c2);
485 s1 = quadrant[i1]; s2 = quadrant[i2];
440 if (s1 != s2) return (s1 > s2); 486 if (s1 != s2) return (s1 > s2);
487 i1++; i2++;
488 /* 6 */
489 c1 = block[i1]; c2 = block[i2];
490 if (c1 != c2) return (c1 > c2);
441 s1 = quadrant[i1]; s2 = quadrant[i2]; 491 s1 = quadrant[i1]; s2 = quadrant[i2];
442 if (s1 != s2) return (s1 > s2); 492 if (s1 != s2) return (s1 > s2);
443 i1 += 2; i2 += 2; 493 i1++; i2++;
444 494 /* 7 */
445 s1 = block[i1]; s2 = block[i2]; 495 c1 = block[i1]; c2 = block[i2];
496 if (c1 != c2) return (c1 > c2);
497 s1 = quadrant[i1]; s2 = quadrant[i2];
446 if (s1 != s2) return (s1 > s2); 498 if (s1 != s2) return (s1 > s2);
499 i1++; i2++;
500 /* 8 */
501 c1 = block[i1]; c2 = block[i2];
502 if (c1 != c2) return (c1 > c2);
447 s1 = quadrant[i1]; s2 = quadrant[i2]; 503 s1 = quadrant[i1]; s2 = quadrant[i2];
448 if (s1 != s2) return (s1 > s2); 504 if (s1 != s2) return (s1 > s2);
449 i1 += 2; i2 += 2; 505 i1++; i2++;
450 506
451 if (i1 >= nblock) i1 -= nblock; 507 if (i1 >= nblock) i1 -= nblock;
452 if (i2 >= nblock) i2 -= nblock; 508 if (i2 >= nblock) i2 -= nblock;
@@ -467,13 +523,14 @@ Bool mainGtU ( UInt32 i1,
467 because the number of elems to sort is 523 because the number of elems to sort is
468 usually small, typically <= 20. 524 usually small, typically <= 20.
469--*/ 525--*/
526static
470Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 527Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
471 9841, 29524, 88573, 265720, 528 9841, 29524, 88573, 265720,
472 797161, 2391484 }; 529 797161, 2391484 };
473 530
474static 531static
475void mainSimpleSort ( UInt32* ptr, 532void mainSimpleSort ( UInt32* ptr,
476 UInt16* block, 533 UChar* block,
477 UInt16* quadrant, 534 UInt16* quadrant,
478 Int32 nblock, 535 Int32 nblock,
479 Int32 lo, 536 Int32 lo,
@@ -568,19 +625,19 @@ void mainSimpleSort ( UInt32* ptr,
568 } \ 625 } \
569} 626}
570 627
571
572static 628static
573__inline__ 629__inline__
574UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) 630UChar mmed3 ( UChar a, UChar b, UChar c )
575{ 631{
576 UInt16 t; 632 UChar t;
577 if (a > b) { t = a; a = b; b = t; }; 633 if (a > b) { t = a; a = b; b = t; };
578 if (b > c) { t = b; b = c; c = t; }; 634 if (b > c) {
579 if (a > b) b = a; 635 b = c;
636 if (a > b) b = a;
637 }
580 return b; 638 return b;
581} 639}
582 640
583
584#define mmin(a,b) ((a) < (b)) ? (a) : (b) 641#define mmin(a,b) ((a) < (b)) ? (a) : (b)
585 642
586#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 643#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
@@ -609,7 +666,7 @@ UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c )
609 666
610static 667static
611void mainQSort3 ( UInt32* ptr, 668void mainQSort3 ( UInt32* ptr,
612 UInt16* block, 669 UChar* block,
613 UInt16* quadrant, 670 UInt16* quadrant,
614 Int32 nblock, 671 Int32 nblock,
615 Int32 loSt, 672 Int32 loSt,
@@ -679,7 +736,7 @@ void mainQSort3 ( UInt32* ptr,
679 AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 736 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
680 737
681 if (gtHi < ltLo) { 738 if (gtHi < ltLo) {
682 mpush(lo, hi, d+2 ); 739 mpush(lo, hi, d+1 );
683 continue; 740 continue;
684 } 741 }
685 742
@@ -691,7 +748,7 @@ void mainQSort3 ( UInt32* ptr,
691 748
692 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 749 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
693 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 750 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
694 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2; 751 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
695 752
696 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 753 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
697 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 754 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
@@ -722,11 +779,11 @@ void mainQSort3 ( UInt32* ptr,
722/* Pre: 779/* Pre:
723 nblock > N_OVERSHOOT 780 nblock > N_OVERSHOOT
724 block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 781 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
725 ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block 782 ((UChar*)block32) [0 .. nblock-1] holds block
726 ptr exists for [0 .. nblock-1] 783 ptr exists for [0 .. nblock-1]
727 784
728 Post: 785 Post:
729 ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block 786 ((UChar*)block32) [0 .. nblock-1] holds block
730 All other areas of block32 destroyed 787 All other areas of block32 destroyed
731 ftab [0 .. 65536 ] destroyed 788 ftab [0 .. 65536 ] destroyed
732 ptr [0 .. nblock-1] holds sorted order 789 ptr [0 .. nblock-1] holds sorted order
@@ -739,40 +796,47 @@ void mainQSort3 ( UInt32* ptr,
739 796
740static 797static
741void mainSort ( UInt32* ptr, 798void mainSort ( UInt32* ptr,
742 UInt16* block, 799 UChar* block,
743 UInt16* quadrant, 800 UInt16* quadrant,
744 UInt32* ftab, 801 UInt32* ftab,
745 Int32 nblock, 802 Int32 nblock,
746 Int32 verb, 803 Int32 verb,
747 Int32* budget ) 804 Int32* budget )
748{ 805{
749 Int32 i, j, k, m, ss, sb; 806 Int32 i, j, k, ss, sb;
750 Int32 runningOrder[256]; 807 Int32 runningOrder[256];
751 Int32 copy[256];
752 Bool bigDone[256]; 808 Bool bigDone[256];
809 Int32 copyStart[256];
810 Int32 copyEnd [256];
753 UChar c1; 811 UChar c1;
754 Int32 numQSorted; 812 Int32 numQSorted;
755 Int32 biggestSoFar;
756 UInt16 s; 813 UInt16 s;
757
758 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 814 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
759 815
760 /*-- Stripe the block data into 16 bits, and at the 816 /*-- set up the 2-byte frequency table --*/
761 same time set up the 2-byte frequency table
762 --*/
763 for (i = 65536; i >= 0; i--) ftab[i] = 0; 817 for (i = 65536; i >= 0; i--) ftab[i] = 0;
764 818
765 s = block[0]; 819 j = block[0] << 8;
766 for (i = 1; i < nblock; i++) { 820 i = nblock-1;
821 for (; i >= 3; i -= 4) {
822 quadrant[i] = 0;
823 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
824 ftab[j]++;
825 quadrant[i-1] = 0;
826 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
827 ftab[j]++;
828 quadrant[i-2] = 0;
829 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
830 ftab[j]++;
831 quadrant[i-3] = 0;
832 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
833 ftab[j]++;
834 }
835 for (; i >= 0; i--) {
767 quadrant[i] = 0; 836 quadrant[i] = 0;
768 s = (s << 8) | block[i]; 837 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
769 block[i-1] = s; 838 ftab[j]++;
770 ftab[s]++;
771 } 839 }
772 quadrant[0] = 0;
773 s = (s << 8) | (block[0] >> 8);
774 block[nblock-1] = s;
775 ftab[s]++;
776 840
777 /*-- (emphasises close relationship of block & quadrant) --*/ 841 /*-- (emphasises close relationship of block & quadrant) --*/
778 for (i = 0; i < BZ_N_OVERSHOOT; i++) { 842 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
@@ -785,9 +849,29 @@ void mainSort ( UInt32* ptr,
785 /*-- Complete the initial radix sort --*/ 849 /*-- Complete the initial radix sort --*/
786 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 850 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
787 851
788 for (i = 0; i < nblock; i++) { 852 s = block[0] << 8;
789 s = block[i]; 853 i = nblock-1;
790 j = ftab[s] - 1; 854 for (; i >= 3; i -= 4) {
855 s = (s >> 8) | (block[i] << 8);
856 j = ftab[s] -1;
857 ftab[s] = j;
858 ptr[j] = i;
859 s = (s >> 8) | (block[i-1] << 8);
860 j = ftab[s] -1;
861 ftab[s] = j;
862 ptr[j] = i-1;
863 s = (s >> 8) | (block[i-2] << 8);
864 j = ftab[s] -1;
865 ftab[s] = j;
866 ptr[j] = i-2;
867 s = (s >> 8) | (block[i-3] << 8);
868 j = ftab[s] -1;
869 ftab[s] = j;
870 ptr[j] = i-3;
871 }
872 for (; i >= 0; i--) {
873 s = (s >> 8) | (block[i] << 8);
874 j = ftab[s] -1;
791 ftab[s] = j; 875 ftab[s] = j;
792 ptr[j] = i; 876 ptr[j] = i;
793 } 877 }
@@ -826,13 +910,13 @@ void mainSort ( UInt32* ptr,
826 The main sorting loop. 910 The main sorting loop.
827 --*/ 911 --*/
828 912
829 biggestSoFar = numQSorted = 0; 913 numQSorted = 0;
830 914
831 for (i = 0; i <= 255; i++) { 915 for (i = 0; i <= 255; i++) {
832 916
833 /*-- 917 /*--
834 Process big buckets, starting with the least full. 918 Process big buckets, starting with the least full.
835 Basically this is a 4-step process in which we call 919 Basically this is a 3-step process in which we call
836 mainQSort3 to sort the small buckets [ss, j], but 920 mainQSort3 to sort the small buckets [ss, j], but
837 also make a big effort to avoid the calls if we can. 921 also make a big effort to avoid the calls if we can.
838 --*/ 922 --*/
@@ -869,39 +953,38 @@ void mainSort ( UInt32* ptr,
869 } 953 }
870 } 954 }
871 955
956 AssertH ( !bigDone[ss], 1006 );
957
872 /*-- 958 /*--
873 Step 2: 959 Step 2:
874 Deal specially with case [ss, ss]. This establishes the 960 Now scan this big bucket [ss] so as to synthesise the
875 sorted order for [ss, ss] without any comparisons. 961 sorted order for small buckets [t, ss] for all t,
876 A clever trick, cryptically described as steps Q6b and Q6c 962 including, magically, the bucket [ss,ss] too.
877 in SRC-124 (aka BW94). Compared to bzip2, this makes it 963 This will avoid doing Real Work in subsequent Step 1's.
878 practical not to use a preliminary run-length coder.
879 --*/ 964 --*/
880 { 965 {
881 Int32 put0, get0, put1, get1; 966 for (j = 0; j <= 255; j++) {
882 Int32 sbn = (ss << 8) + ss; 967 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
883 Int32 lo = ftab[sbn] & CLEARMASK; 968 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
884 Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1; 969 }
885 UChar ssc = (UChar)ss; 970 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
886 put0 = lo; 971 k = ptr[j]-1; if (k < 0) k += nblock;
887 get0 = ftab[ss << 8] & CLEARMASK; 972 c1 = block[k];
888 put1 = hi; 973 if (!bigDone[c1])
889 get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1; 974 ptr[ copyStart[c1]++ ] = k;
890 while (get0 < put0) {
891 j = ptr[get0]-1; if (j < 0) j += nblock;
892 c1 = (UChar)(block[j] >> 8);
893 if (c1 == ssc) { ptr[put0] = j; put0++; };
894 get0++;
895 } 975 }
896 while (get1 > put1) { 976 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
897 j = ptr[get1]-1; if (j < 0) j += nblock; 977 k = ptr[j]-1; if (k < 0) k += nblock;
898 c1 = (UChar)(block[j] >> 8); 978 c1 = block[k];
899 if (c1 == ssc) { ptr[put1] = j; put1--; }; 979 if (!bigDone[c1])
900 get1--; 980 ptr[ copyEnd[c1]-- ] = k;
901 } 981 }
902 ftab[sbn] |= SETMASK;
903 } 982 }
904 983
984 AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
985
986 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
987
905 /*-- 988 /*--
906 Step 3: 989 Step 3:
907 The [ss] big bucket is now done. Record this fact, 990 The [ss] big bucket is now done. Record this fact,
@@ -950,7 +1033,7 @@ void mainSort ( UInt32* ptr,
950 1033
951 while ((bbSize >> shifts) > 65534) shifts++; 1034 while ((bbSize >> shifts) > 65534) shifts++;
952 1035
953 for (j = 0; j < bbSize; j++) { 1036 for (j = bbSize-1; j >= 0; j--) {
954 Int32 a2update = ptr[bbStart + j]; 1037 Int32 a2update = ptr[bbStart + j];
955 UInt16 qVal = (UInt16)(j >> shifts); 1038 UInt16 qVal = (UInt16)(j >> shifts);
956 quadrant[a2update] = qVal; 1039 quadrant[a2update] = qVal;
@@ -960,26 +1043,6 @@ void mainSort ( UInt32* ptr,
960 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 1043 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
961 } 1044 }
962 1045
963 /*--
964 Step 4:
965 Now scan this big bucket [ss] so as to synthesise the
966 sorted order for small buckets [t, ss] for all t != ss.
967 This will avoid doing Real Work in subsequent Step 1's.
968 --*/
969 for (j = 0; j <= 255; j++)
970 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
971
972 m = ftab[(ss+1) << 8] & CLEARMASK;
973 for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) {
974 k = ptr[j] - 1; if (k < 0) k += nblock;
975 c1 = (UChar)(block[k] >> 8);
976 if ( ! bigDone[c1] ) {
977 ptr[copy[c1]] = k;
978 copy[c1] ++;
979 }
980 }
981
982 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
983 } 1046 }
984 1047
985 if (verb >= 4) 1048 if (verb >= 4)
@@ -996,19 +1059,19 @@ void mainSort ( UInt32* ptr,
996/* Pre: 1059/* Pre:
997 nblock > 0 1060 nblock > 0
998 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 1061 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
999 ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block 1062 ((UChar*)arr2) [0 .. nblock-1] holds block
1000 arr1 exists for [0 .. nblock-1] 1063 arr1 exists for [0 .. nblock-1]
1001 1064
1002 Post: 1065 Post:
1003 ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block 1066 ((UChar*)arr2) [0 .. nblock-1] holds block
1004 All other areas of block destroyed 1067 All other areas of block destroyed
1005 ftab [ 0 .. 65536 ] destroyed 1068 ftab [ 0 .. 65536 ] destroyed
1006 arr1 [0 .. nblock-1] holds sorted order 1069 arr1 [0 .. nblock-1] holds sorted order
1007*/ 1070*/
1008void blockSort ( EState* s ) 1071void BZ2_blockSort ( EState* s )
1009{ 1072{
1010 UInt32* ptr = s->ptr; 1073 UInt32* ptr = s->ptr;
1011 UInt16* block = s->block; 1074 UChar* block = s->block;
1012 UInt32* ftab = s->ftab; 1075 UInt32* ftab = s->ftab;
1013 Int32 nblock = s->nblock; 1076 Int32 nblock = s->nblock;
1014 Int32 verb = s->verbosity; 1077 Int32 verb = s->verbosity;
@@ -1019,10 +1082,16 @@ void blockSort ( EState* s )
1019 Int32 i; 1082 Int32 i;
1020 1083
1021 if (nblock < 10000) { 1084 if (nblock < 10000) {
1022 for (i = 0; i < nblock; i++) block[i] <<= 8;
1023 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1085 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1024 } else { 1086 } else {
1025 quadrant = &(block[nblock+BZ_N_OVERSHOOT]); 1087 /* Calculate the location for quadrant, remembering to get
1088 the alignment right. Assumes that &(block[0]) is at least
1089 2-byte aligned -- this should be ok since block is really
1090 the first section of arr2.
1091 */
1092 i = nblock+BZ_N_OVERSHOOT;
1093 if (i & 1) i++;
1094 quadrant = (UInt16*)(&(block[i]));
1026 1095
1027 /* (wfact-1) / 3 puts the default-factor-30 1096 /* (wfact-1) / 3 puts the default-factor-30
1028 transition point at very roughly the same place as 1097 transition point at very roughly the same place as