diff options
Diffstat (limited to 'blocksort.c')
-rw-r--r-- | blocksort.c | 335 |
1 files changed, 202 insertions, 133 deletions
diff --git a/blocksort.c b/blocksort.c index 85a02de..ec42672 100644 --- a/blocksort.c +++ b/blocksort.c | |||
@@ -8,7 +8,7 @@ | |||
8 | This file is a part of bzip2 and/or libbzip2, a program and | 8 | This file is a part of bzip2 and/or libbzip2, a program and |
9 | library for lossless, block-sorting data compression. | 9 | library for lossless, block-sorting data compression. |
10 | 10 | ||
11 | Copyright (C) 1996-1999 Julian R Seward. All rights reserved. | 11 | Copyright (C) 1996-2000 Julian R Seward. All rights reserved. |
12 | 12 | ||
13 | Redistribution and use in source and binary forms, with or without | 13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions | 14 | modification, are permitted provided that the following conditions |
@@ -43,7 +43,7 @@ | |||
43 | 43 | ||
44 | Julian Seward, Cambridge, UK. | 44 | Julian Seward, Cambridge, UK. |
45 | jseward@acm.org | 45 | jseward@acm.org |
46 | bzip2/libbzip2 version 0.9.5 of 24 May 1999 | 46 | bzip2/libbzip2 version 1.0 of 21 March 2000 |
47 | 47 | ||
48 | This program is based on (at least) the work of: | 48 | This program is based on (at least) the work of: |
49 | Mike Burrows | 49 | Mike Burrows |
@@ -56,6 +56,13 @@ | |||
56 | Jon L. Bentley | 56 | Jon L. Bentley |
57 | 57 | ||
58 | For more information on these sources, see the manual. | 58 | For more information on these sources, see the manual. |
59 | |||
60 | To get some idea how the block sorting algorithms in this file | ||
61 | work, read my paper | ||
62 | On the Performance of BWT Sorting Algorithms | ||
63 | in Proceedings of the IEEE Data Compression Conference 2000, | ||
64 | Snowbird, Utah, USA, 27-30 March 2000. The main sort in this | ||
65 | file implements the algorithm called cache in the paper. | ||
59 | --*/ | 66 | --*/ |
60 | 67 | ||
61 | 68 | ||
@@ -232,11 +239,11 @@ void fallbackQSort3 ( UInt32* fmap, | |||
232 | /* Pre: | 239 | /* Pre: |
233 | nblock > 0 | 240 | nblock > 0 |
234 | eclass exists for [0 .. nblock-1] | 241 | eclass exists for [0 .. nblock-1] |
235 | ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block | 242 | ((UChar*)eclass) [0 .. nblock-1] holds block |
236 | ptr exists for [0 .. nblock-1] | 243 | ptr exists for [0 .. nblock-1] |
237 | 244 | ||
238 | Post: | 245 | Post: |
239 | ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block | 246 | ((UChar*)eclass) [0 .. nblock-1] holds block |
240 | All other areas of eclass destroyed | 247 | All other areas of eclass destroyed |
241 | fmap [0 .. nblock-1] holds sorted order | 248 | fmap [0 .. nblock-1] holds sorted order |
242 | bhtab [ 0 .. 2+(nblock/32) ] destroyed | 249 | bhtab [ 0 .. 2+(nblock/32) ] destroyed |
@@ -260,7 +267,7 @@ void fallbackSort ( UInt32* fmap, | |||
260 | Int32 H, i, j, k, l, r, cc, cc1; | 267 | Int32 H, i, j, k, l, r, cc, cc1; |
261 | Int32 nNotDone; | 268 | Int32 nNotDone; |
262 | Int32 nBhtab; | 269 | Int32 nBhtab; |
263 | UInt16* eclass16 = (UInt16*)eclass; | 270 | UChar* eclass8 = (UChar*)eclass; |
264 | 271 | ||
265 | /*-- | 272 | /*-- |
266 | Initial 1-char radix sort to generate | 273 | Initial 1-char radix sort to generate |
@@ -269,12 +276,12 @@ void fallbackSort ( UInt32* fmap, | |||
269 | if (verb >= 4) | 276 | if (verb >= 4) |
270 | VPrintf0 ( " bucket sorting ...\n" ); | 277 | VPrintf0 ( " bucket sorting ...\n" ); |
271 | for (i = 0; i < 257; i++) ftab[i] = 0; | 278 | for (i = 0; i < 257; i++) ftab[i] = 0; |
272 | for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++; | 279 | for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; |
273 | for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; | 280 | for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; |
274 | for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; | 281 | for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; |
275 | 282 | ||
276 | for (i = 0; i < nblock; i++) { | 283 | for (i = 0; i < nblock; i++) { |
277 | j = eclass16[i] >> 8; | 284 | j = eclass8[i]; |
278 | k = ftab[j] - 1; | 285 | k = ftab[j] - 1; |
279 | ftab[j] = k; | 286 | ftab[j] = k; |
280 | fmap[k] = i; | 287 | fmap[k] = i; |
@@ -354,7 +361,7 @@ void fallbackSort ( UInt32* fmap, | |||
354 | 361 | ||
355 | /*-- | 362 | /*-- |
356 | Reconstruct the original block in | 363 | Reconstruct the original block in |
357 | eclass16 [0 .. nblock-1] [15:8], since the | 364 | eclass8 [0 .. nblock-1], since the |
358 | previous phase destroyed it. | 365 | previous phase destroyed it. |
359 | --*/ | 366 | --*/ |
360 | if (verb >= 4) | 367 | if (verb >= 4) |
@@ -363,7 +370,7 @@ void fallbackSort ( UInt32* fmap, | |||
363 | for (i = 0; i < nblock; i++) { | 370 | for (i = 0; i < nblock; i++) { |
364 | while (ftabCopy[j] == 0) j++; | 371 | while (ftabCopy[j] == 0) j++; |
365 | ftabCopy[j]--; | 372 | ftabCopy[j]--; |
366 | eclass16[fmap[i]] = j << 8; | 373 | eclass8[fmap[i]] = (UChar)j; |
367 | } | 374 | } |
368 | AssertH ( j < 256, 1005 ); | 375 | AssertH ( j < 256, 1005 ); |
369 | } | 376 | } |
@@ -386,67 +393,116 @@ static | |||
386 | __inline__ | 393 | __inline__ |
387 | Bool mainGtU ( UInt32 i1, | 394 | Bool mainGtU ( UInt32 i1, |
388 | UInt32 i2, | 395 | UInt32 i2, |
389 | UInt16* block, | 396 | UChar* block, |
390 | UInt16* quadrant, | 397 | UInt16* quadrant, |
391 | UInt32 nblock, | 398 | UInt32 nblock, |
392 | Int32* budget ) | 399 | Int32* budget ) |
393 | { | 400 | { |
394 | Int32 k; | 401 | Int32 k; |
402 | UChar c1, c2; | ||
395 | UInt16 s1, s2; | 403 | UInt16 s1, s2; |
396 | 404 | ||
397 | AssertD ( i1 != i2, "mainGtU" ); | 405 | AssertD ( i1 != i2, "mainGtU" ); |
398 | 406 | /* 1 */ | |
399 | s1 = block[i1]; s2 = block[i2]; | 407 | c1 = block[i1]; c2 = block[i2]; |
400 | if (s1 != s2) return (s1 > s2); | 408 | if (c1 != c2) return (c1 > c2); |
401 | i1 += 2; i2 += 2; | 409 | i1++; i2++; |
402 | 410 | /* 2 */ | |
403 | s1 = block[i1]; s2 = block[i2]; | 411 | c1 = block[i1]; c2 = block[i2]; |
404 | if (s1 != s2) return (s1 > s2); | 412 | if (c1 != c2) return (c1 > c2); |
405 | i1 += 2; i2 += 2; | 413 | i1++; i2++; |
406 | 414 | /* 3 */ | |
407 | s1 = block[i1]; s2 = block[i2]; | 415 | c1 = block[i1]; c2 = block[i2]; |
408 | if (s1 != s2) return (s1 > s2); | 416 | if (c1 != c2) return (c1 > c2); |
409 | i1 += 2; i2 += 2; | 417 | i1++; i2++; |
410 | 418 | /* 4 */ | |
411 | s1 = block[i1]; s2 = block[i2]; | 419 | c1 = block[i1]; c2 = block[i2]; |
412 | if (s1 != s2) return (s1 > s2); | 420 | if (c1 != c2) return (c1 > c2); |
413 | i1 += 2; i2 += 2; | 421 | i1++; i2++; |
414 | 422 | /* 5 */ | |
415 | s1 = block[i1]; s2 = block[i2]; | 423 | c1 = block[i1]; c2 = block[i2]; |
416 | if (s1 != s2) return (s1 > s2); | 424 | if (c1 != c2) return (c1 > c2); |
417 | i1 += 2; i2 += 2; | 425 | i1++; i2++; |
418 | 426 | /* 6 */ | |
419 | s1 = block[i1]; s2 = block[i2]; | 427 | c1 = block[i1]; c2 = block[i2]; |
420 | if (s1 != s2) return (s1 > s2); | 428 | if (c1 != c2) return (c1 > c2); |
421 | i1 += 2; i2 += 2; | 429 | i1++; i2++; |
430 | /* 7 */ | ||
431 | c1 = block[i1]; c2 = block[i2]; | ||
432 | if (c1 != c2) return (c1 > c2); | ||
433 | i1++; i2++; | ||
434 | /* 8 */ | ||
435 | c1 = block[i1]; c2 = block[i2]; | ||
436 | if (c1 != c2) return (c1 > c2); | ||
437 | i1++; i2++; | ||
438 | /* 9 */ | ||
439 | c1 = block[i1]; c2 = block[i2]; | ||
440 | if (c1 != c2) return (c1 > c2); | ||
441 | i1++; i2++; | ||
442 | /* 10 */ | ||
443 | c1 = block[i1]; c2 = block[i2]; | ||
444 | if (c1 != c2) return (c1 > c2); | ||
445 | i1++; i2++; | ||
446 | /* 11 */ | ||
447 | c1 = block[i1]; c2 = block[i2]; | ||
448 | if (c1 != c2) return (c1 > c2); | ||
449 | i1++; i2++; | ||
450 | /* 12 */ | ||
451 | c1 = block[i1]; c2 = block[i2]; | ||
452 | if (c1 != c2) return (c1 > c2); | ||
453 | i1++; i2++; | ||
422 | 454 | ||
423 | k = nblock + 8; | 455 | k = nblock + 8; |
424 | 456 | ||
425 | do { | 457 | do { |
426 | 458 | /* 1 */ | |
427 | s1 = block[i1]; s2 = block[i2]; | 459 | c1 = block[i1]; c2 = block[i2]; |
460 | if (c1 != c2) return (c1 > c2); | ||
461 | s1 = quadrant[i1]; s2 = quadrant[i2]; | ||
428 | if (s1 != s2) return (s1 > s2); | 462 | if (s1 != s2) return (s1 > s2); |
463 | i1++; i2++; | ||
464 | /* 2 */ | ||
465 | c1 = block[i1]; c2 = block[i2]; | ||
466 | if (c1 != c2) return (c1 > c2); | ||
429 | s1 = quadrant[i1]; s2 = quadrant[i2]; | 467 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
430 | if (s1 != s2) return (s1 > s2); | 468 | if (s1 != s2) return (s1 > s2); |
431 | i1 += 2; i2 += 2; | 469 | i1++; i2++; |
432 | 470 | /* 3 */ | |
433 | s1 = block[i1]; s2 = block[i2]; | 471 | c1 = block[i1]; c2 = block[i2]; |
472 | if (c1 != c2) return (c1 > c2); | ||
473 | s1 = quadrant[i1]; s2 = quadrant[i2]; | ||
434 | if (s1 != s2) return (s1 > s2); | 474 | if (s1 != s2) return (s1 > s2); |
475 | i1++; i2++; | ||
476 | /* 4 */ | ||
477 | c1 = block[i1]; c2 = block[i2]; | ||
478 | if (c1 != c2) return (c1 > c2); | ||
435 | s1 = quadrant[i1]; s2 = quadrant[i2]; | 479 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
436 | if (s1 != s2) return (s1 > s2); | 480 | if (s1 != s2) return (s1 > s2); |
437 | i1 += 2; i2 += 2; | 481 | i1++; i2++; |
438 | 482 | /* 5 */ | |
439 | s1 = block[i1]; s2 = block[i2]; | 483 | c1 = block[i1]; c2 = block[i2]; |
484 | if (c1 != c2) return (c1 > c2); | ||
485 | s1 = quadrant[i1]; s2 = quadrant[i2]; | ||
440 | if (s1 != s2) return (s1 > s2); | 486 | if (s1 != s2) return (s1 > s2); |
487 | i1++; i2++; | ||
488 | /* 6 */ | ||
489 | c1 = block[i1]; c2 = block[i2]; | ||
490 | if (c1 != c2) return (c1 > c2); | ||
441 | s1 = quadrant[i1]; s2 = quadrant[i2]; | 491 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
442 | if (s1 != s2) return (s1 > s2); | 492 | if (s1 != s2) return (s1 > s2); |
443 | i1 += 2; i2 += 2; | 493 | i1++; i2++; |
444 | 494 | /* 7 */ | |
445 | s1 = block[i1]; s2 = block[i2]; | 495 | c1 = block[i1]; c2 = block[i2]; |
496 | if (c1 != c2) return (c1 > c2); | ||
497 | s1 = quadrant[i1]; s2 = quadrant[i2]; | ||
446 | if (s1 != s2) return (s1 > s2); | 498 | if (s1 != s2) return (s1 > s2); |
499 | i1++; i2++; | ||
500 | /* 8 */ | ||
501 | c1 = block[i1]; c2 = block[i2]; | ||
502 | if (c1 != c2) return (c1 > c2); | ||
447 | s1 = quadrant[i1]; s2 = quadrant[i2]; | 503 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
448 | if (s1 != s2) return (s1 > s2); | 504 | if (s1 != s2) return (s1 > s2); |
449 | i1 += 2; i2 += 2; | 505 | i1++; i2++; |
450 | 506 | ||
451 | if (i1 >= nblock) i1 -= nblock; | 507 | if (i1 >= nblock) i1 -= nblock; |
452 | if (i2 >= nblock) i2 -= nblock; | 508 | if (i2 >= nblock) i2 -= nblock; |
@@ -467,13 +523,14 @@ Bool mainGtU ( UInt32 i1, | |||
467 | because the number of elems to sort is | 523 | because the number of elems to sort is |
468 | usually small, typically <= 20. | 524 | usually small, typically <= 20. |
469 | --*/ | 525 | --*/ |
526 | static | ||
470 | Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, | 527 | Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, |
471 | 9841, 29524, 88573, 265720, | 528 | 9841, 29524, 88573, 265720, |
472 | 797161, 2391484 }; | 529 | 797161, 2391484 }; |
473 | 530 | ||
474 | static | 531 | static |
475 | void mainSimpleSort ( UInt32* ptr, | 532 | void mainSimpleSort ( UInt32* ptr, |
476 | UInt16* block, | 533 | UChar* block, |
477 | UInt16* quadrant, | 534 | UInt16* quadrant, |
478 | Int32 nblock, | 535 | Int32 nblock, |
479 | Int32 lo, | 536 | Int32 lo, |
@@ -568,19 +625,19 @@ void mainSimpleSort ( UInt32* ptr, | |||
568 | } \ | 625 | } \ |
569 | } | 626 | } |
570 | 627 | ||
571 | |||
572 | static | 628 | static |
573 | __inline__ | 629 | __inline__ |
574 | UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) | 630 | UChar mmed3 ( UChar a, UChar b, UChar c ) |
575 | { | 631 | { |
576 | UInt16 t; | 632 | UChar t; |
577 | if (a > b) { t = a; a = b; b = t; }; | 633 | if (a > b) { t = a; a = b; b = t; }; |
578 | if (b > c) { t = b; b = c; c = t; }; | 634 | if (b > c) { |
579 | if (a > b) b = a; | 635 | b = c; |
636 | if (a > b) b = a; | ||
637 | } | ||
580 | return b; | 638 | return b; |
581 | } | 639 | } |
582 | 640 | ||
583 | |||
584 | #define mmin(a,b) ((a) < (b)) ? (a) : (b) | 641 | #define mmin(a,b) ((a) < (b)) ? (a) : (b) |
585 | 642 | ||
586 | #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ | 643 | #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ |
@@ -609,7 +666,7 @@ UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) | |||
609 | 666 | ||
610 | static | 667 | static |
611 | void mainQSort3 ( UInt32* ptr, | 668 | void mainQSort3 ( UInt32* ptr, |
612 | UInt16* block, | 669 | UChar* block, |
613 | UInt16* quadrant, | 670 | UInt16* quadrant, |
614 | Int32 nblock, | 671 | Int32 nblock, |
615 | Int32 loSt, | 672 | Int32 loSt, |
@@ -679,7 +736,7 @@ void mainQSort3 ( UInt32* ptr, | |||
679 | AssertD ( unHi == unLo-1, "mainQSort3(2)" ); | 736 | AssertD ( unHi == unLo-1, "mainQSort3(2)" ); |
680 | 737 | ||
681 | if (gtHi < ltLo) { | 738 | if (gtHi < ltLo) { |
682 | mpush(lo, hi, d+2 ); | 739 | mpush(lo, hi, d+1 ); |
683 | continue; | 740 | continue; |
684 | } | 741 | } |
685 | 742 | ||
@@ -691,7 +748,7 @@ void mainQSort3 ( UInt32* ptr, | |||
691 | 748 | ||
692 | nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; | 749 | nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; |
693 | nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; | 750 | nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; |
694 | nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2; | 751 | nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; |
695 | 752 | ||
696 | if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); | 753 | if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); |
697 | if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); | 754 | if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); |
@@ -722,11 +779,11 @@ void mainQSort3 ( UInt32* ptr, | |||
722 | /* Pre: | 779 | /* Pre: |
723 | nblock > N_OVERSHOOT | 780 | nblock > N_OVERSHOOT |
724 | block32 exists for [0 .. nblock-1 +N_OVERSHOOT] | 781 | block32 exists for [0 .. nblock-1 +N_OVERSHOOT] |
725 | ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block | 782 | ((UChar*)block32) [0 .. nblock-1] holds block |
726 | ptr exists for [0 .. nblock-1] | 783 | ptr exists for [0 .. nblock-1] |
727 | 784 | ||
728 | Post: | 785 | Post: |
729 | ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block | 786 | ((UChar*)block32) [0 .. nblock-1] holds block |
730 | All other areas of block32 destroyed | 787 | All other areas of block32 destroyed |
731 | ftab [0 .. 65536 ] destroyed | 788 | ftab [0 .. 65536 ] destroyed |
732 | ptr [0 .. nblock-1] holds sorted order | 789 | ptr [0 .. nblock-1] holds sorted order |
@@ -739,40 +796,47 @@ void mainQSort3 ( UInt32* ptr, | |||
739 | 796 | ||
740 | static | 797 | static |
741 | void mainSort ( UInt32* ptr, | 798 | void mainSort ( UInt32* ptr, |
742 | UInt16* block, | 799 | UChar* block, |
743 | UInt16* quadrant, | 800 | UInt16* quadrant, |
744 | UInt32* ftab, | 801 | UInt32* ftab, |
745 | Int32 nblock, | 802 | Int32 nblock, |
746 | Int32 verb, | 803 | Int32 verb, |
747 | Int32* budget ) | 804 | Int32* budget ) |
748 | { | 805 | { |
749 | Int32 i, j, k, m, ss, sb; | 806 | Int32 i, j, k, ss, sb; |
750 | Int32 runningOrder[256]; | 807 | Int32 runningOrder[256]; |
751 | Int32 copy[256]; | ||
752 | Bool bigDone[256]; | 808 | Bool bigDone[256]; |
809 | Int32 copyStart[256]; | ||
810 | Int32 copyEnd [256]; | ||
753 | UChar c1; | 811 | UChar c1; |
754 | Int32 numQSorted; | 812 | Int32 numQSorted; |
755 | Int32 biggestSoFar; | ||
756 | UInt16 s; | 813 | UInt16 s; |
757 | |||
758 | if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); | 814 | if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); |
759 | 815 | ||
760 | /*-- Stripe the block data into 16 bits, and at the | 816 | /*-- set up the 2-byte frequency table --*/ |
761 | same time set up the 2-byte frequency table | ||
762 | --*/ | ||
763 | for (i = 65536; i >= 0; i--) ftab[i] = 0; | 817 | for (i = 65536; i >= 0; i--) ftab[i] = 0; |
764 | 818 | ||
765 | s = block[0]; | 819 | j = block[0] << 8; |
766 | for (i = 1; i < nblock; i++) { | 820 | i = nblock-1; |
821 | for (; i >= 3; i -= 4) { | ||
822 | quadrant[i] = 0; | ||
823 | j = (j >> 8) | ( ((UInt16)block[i]) << 8); | ||
824 | ftab[j]++; | ||
825 | quadrant[i-1] = 0; | ||
826 | j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); | ||
827 | ftab[j]++; | ||
828 | quadrant[i-2] = 0; | ||
829 | j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); | ||
830 | ftab[j]++; | ||
831 | quadrant[i-3] = 0; | ||
832 | j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); | ||
833 | ftab[j]++; | ||
834 | } | ||
835 | for (; i >= 0; i--) { | ||
767 | quadrant[i] = 0; | 836 | quadrant[i] = 0; |
768 | s = (s << 8) | block[i]; | 837 | j = (j >> 8) | ( ((UInt16)block[i]) << 8); |
769 | block[i-1] = s; | 838 | ftab[j]++; |
770 | ftab[s]++; | ||
771 | } | 839 | } |
772 | quadrant[0] = 0; | ||
773 | s = (s << 8) | (block[0] >> 8); | ||
774 | block[nblock-1] = s; | ||
775 | ftab[s]++; | ||
776 | 840 | ||
777 | /*-- (emphasises close relationship of block & quadrant) --*/ | 841 | /*-- (emphasises close relationship of block & quadrant) --*/ |
778 | for (i = 0; i < BZ_N_OVERSHOOT; i++) { | 842 | for (i = 0; i < BZ_N_OVERSHOOT; i++) { |
@@ -785,9 +849,29 @@ void mainSort ( UInt32* ptr, | |||
785 | /*-- Complete the initial radix sort --*/ | 849 | /*-- Complete the initial radix sort --*/ |
786 | for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; | 850 | for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; |
787 | 851 | ||
788 | for (i = 0; i < nblock; i++) { | 852 | s = block[0] << 8; |
789 | s = block[i]; | 853 | i = nblock-1; |
790 | j = ftab[s] - 1; | 854 | for (; i >= 3; i -= 4) { |
855 | s = (s >> 8) | (block[i] << 8); | ||
856 | j = ftab[s] -1; | ||
857 | ftab[s] = j; | ||
858 | ptr[j] = i; | ||
859 | s = (s >> 8) | (block[i-1] << 8); | ||
860 | j = ftab[s] -1; | ||
861 | ftab[s] = j; | ||
862 | ptr[j] = i-1; | ||
863 | s = (s >> 8) | (block[i-2] << 8); | ||
864 | j = ftab[s] -1; | ||
865 | ftab[s] = j; | ||
866 | ptr[j] = i-2; | ||
867 | s = (s >> 8) | (block[i-3] << 8); | ||
868 | j = ftab[s] -1; | ||
869 | ftab[s] = j; | ||
870 | ptr[j] = i-3; | ||
871 | } | ||
872 | for (; i >= 0; i--) { | ||
873 | s = (s >> 8) | (block[i] << 8); | ||
874 | j = ftab[s] -1; | ||
791 | ftab[s] = j; | 875 | ftab[s] = j; |
792 | ptr[j] = i; | 876 | ptr[j] = i; |
793 | } | 877 | } |
@@ -826,13 +910,13 @@ void mainSort ( UInt32* ptr, | |||
826 | The main sorting loop. | 910 | The main sorting loop. |
827 | --*/ | 911 | --*/ |
828 | 912 | ||
829 | biggestSoFar = numQSorted = 0; | 913 | numQSorted = 0; |
830 | 914 | ||
831 | for (i = 0; i <= 255; i++) { | 915 | for (i = 0; i <= 255; i++) { |
832 | 916 | ||
833 | /*-- | 917 | /*-- |
834 | Process big buckets, starting with the least full. | 918 | Process big buckets, starting with the least full. |
835 | Basically this is a 4-step process in which we call | 919 | Basically this is a 3-step process in which we call |
836 | mainQSort3 to sort the small buckets [ss, j], but | 920 | mainQSort3 to sort the small buckets [ss, j], but |
837 | also make a big effort to avoid the calls if we can. | 921 | also make a big effort to avoid the calls if we can. |
838 | --*/ | 922 | --*/ |
@@ -869,39 +953,38 @@ void mainSort ( UInt32* ptr, | |||
869 | } | 953 | } |
870 | } | 954 | } |
871 | 955 | ||
956 | AssertH ( !bigDone[ss], 1006 ); | ||
957 | |||
872 | /*-- | 958 | /*-- |
873 | Step 2: | 959 | Step 2: |
874 | Deal specially with case [ss, ss]. This establishes the | 960 | Now scan this big bucket [ss] so as to synthesise the |
875 | sorted order for [ss, ss] without any comparisons. | 961 | sorted order for small buckets [t, ss] for all t, |
876 | A clever trick, cryptically described as steps Q6b and Q6c | 962 | including, magically, the bucket [ss,ss] too. |
877 | in SRC-124 (aka BW94). Compared to bzip2, this makes it | 963 | This will avoid doing Real Work in subsequent Step 1's. |
878 | practical not to use a preliminary run-length coder. | ||
879 | --*/ | 964 | --*/ |
880 | { | 965 | { |
881 | Int32 put0, get0, put1, get1; | 966 | for (j = 0; j <= 255; j++) { |
882 | Int32 sbn = (ss << 8) + ss; | 967 | copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; |
883 | Int32 lo = ftab[sbn] & CLEARMASK; | 968 | copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; |
884 | Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1; | 969 | } |
885 | UChar ssc = (UChar)ss; | 970 | for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { |
886 | put0 = lo; | 971 | k = ptr[j]-1; if (k < 0) k += nblock; |
887 | get0 = ftab[ss << 8] & CLEARMASK; | 972 | c1 = block[k]; |
888 | put1 = hi; | 973 | if (!bigDone[c1]) |
889 | get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1; | 974 | ptr[ copyStart[c1]++ ] = k; |
890 | while (get0 < put0) { | ||
891 | j = ptr[get0]-1; if (j < 0) j += nblock; | ||
892 | c1 = (UChar)(block[j] >> 8); | ||
893 | if (c1 == ssc) { ptr[put0] = j; put0++; }; | ||
894 | get0++; | ||
895 | } | 975 | } |
896 | while (get1 > put1) { | 976 | for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { |
897 | j = ptr[get1]-1; if (j < 0) j += nblock; | 977 | k = ptr[j]-1; if (k < 0) k += nblock; |
898 | c1 = (UChar)(block[j] >> 8); | 978 | c1 = block[k]; |
899 | if (c1 == ssc) { ptr[put1] = j; put1--; }; | 979 | if (!bigDone[c1]) |
900 | get1--; | 980 | ptr[ copyEnd[c1]-- ] = k; |
901 | } | 981 | } |
902 | ftab[sbn] |= SETMASK; | ||
903 | } | 982 | } |
904 | 983 | ||
984 | AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 ); | ||
985 | |||
986 | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; | ||
987 | |||
905 | /*-- | 988 | /*-- |
906 | Step 3: | 989 | Step 3: |
907 | The [ss] big bucket is now done. Record this fact, | 990 | The [ss] big bucket is now done. Record this fact, |
@@ -950,7 +1033,7 @@ void mainSort ( UInt32* ptr, | |||
950 | 1033 | ||
951 | while ((bbSize >> shifts) > 65534) shifts++; | 1034 | while ((bbSize >> shifts) > 65534) shifts++; |
952 | 1035 | ||
953 | for (j = 0; j < bbSize; j++) { | 1036 | for (j = bbSize-1; j >= 0; j--) { |
954 | Int32 a2update = ptr[bbStart + j]; | 1037 | Int32 a2update = ptr[bbStart + j]; |
955 | UInt16 qVal = (UInt16)(j >> shifts); | 1038 | UInt16 qVal = (UInt16)(j >> shifts); |
956 | quadrant[a2update] = qVal; | 1039 | quadrant[a2update] = qVal; |
@@ -960,26 +1043,6 @@ void mainSort ( UInt32* ptr, | |||
960 | AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); | 1043 | AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); |
961 | } | 1044 | } |
962 | 1045 | ||
963 | /*-- | ||
964 | Step 4: | ||
965 | Now scan this big bucket [ss] so as to synthesise the | ||
966 | sorted order for small buckets [t, ss] for all t != ss. | ||
967 | This will avoid doing Real Work in subsequent Step 1's. | ||
968 | --*/ | ||
969 | for (j = 0; j <= 255; j++) | ||
970 | copy[j] = ftab[(j << 8) + ss] & CLEARMASK; | ||
971 | |||
972 | m = ftab[(ss+1) << 8] & CLEARMASK; | ||
973 | for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) { | ||
974 | k = ptr[j] - 1; if (k < 0) k += nblock; | ||
975 | c1 = (UChar)(block[k] >> 8); | ||
976 | if ( ! bigDone[c1] ) { | ||
977 | ptr[copy[c1]] = k; | ||
978 | copy[c1] ++; | ||
979 | } | ||
980 | } | ||
981 | |||
982 | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; | ||
983 | } | 1046 | } |
984 | 1047 | ||
985 | if (verb >= 4) | 1048 | if (verb >= 4) |
@@ -996,19 +1059,19 @@ void mainSort ( UInt32* ptr, | |||
996 | /* Pre: | 1059 | /* Pre: |
997 | nblock > 0 | 1060 | nblock > 0 |
998 | arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] | 1061 | arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] |
999 | ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block | 1062 | ((UChar*)arr2) [0 .. nblock-1] holds block |
1000 | arr1 exists for [0 .. nblock-1] | 1063 | arr1 exists for [0 .. nblock-1] |
1001 | 1064 | ||
1002 | Post: | 1065 | Post: |
1003 | ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block | 1066 | ((UChar*)arr2) [0 .. nblock-1] holds block |
1004 | All other areas of block destroyed | 1067 | All other areas of block destroyed |
1005 | ftab [ 0 .. 65536 ] destroyed | 1068 | ftab [ 0 .. 65536 ] destroyed |
1006 | arr1 [0 .. nblock-1] holds sorted order | 1069 | arr1 [0 .. nblock-1] holds sorted order |
1007 | */ | 1070 | */ |
1008 | void blockSort ( EState* s ) | 1071 | void BZ2_blockSort ( EState* s ) |
1009 | { | 1072 | { |
1010 | UInt32* ptr = s->ptr; | 1073 | UInt32* ptr = s->ptr; |
1011 | UInt16* block = s->block; | 1074 | UChar* block = s->block; |
1012 | UInt32* ftab = s->ftab; | 1075 | UInt32* ftab = s->ftab; |
1013 | Int32 nblock = s->nblock; | 1076 | Int32 nblock = s->nblock; |
1014 | Int32 verb = s->verbosity; | 1077 | Int32 verb = s->verbosity; |
@@ -1019,10 +1082,16 @@ void blockSort ( EState* s ) | |||
1019 | Int32 i; | 1082 | Int32 i; |
1020 | 1083 | ||
1021 | if (nblock < 10000) { | 1084 | if (nblock < 10000) { |
1022 | for (i = 0; i < nblock; i++) block[i] <<= 8; | ||
1023 | fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); | 1085 | fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); |
1024 | } else { | 1086 | } else { |
1025 | quadrant = &(block[nblock+BZ_N_OVERSHOOT]); | 1087 | /* Calculate the location for quadrant, remembering to get |
1088 | the alignment right. Assumes that &(block[0]) is at least | ||
1089 | 2-byte aligned -- this should be ok since block is really | ||
1090 | the first section of arr2. | ||
1091 | */ | ||
1092 | i = nblock+BZ_N_OVERSHOOT; | ||
1093 | if (i & 1) i++; | ||
1094 | quadrant = (UInt16*)(&(block[i])); | ||
1026 | 1095 | ||
1027 | /* (wfact-1) / 3 puts the default-factor-30 | 1096 | /* (wfact-1) / 3 puts the default-factor-30 |
1028 | transition point at very roughly the same place as | 1097 | transition point at very roughly the same place as |