aboutsummaryrefslogtreecommitdiff
path: root/blocksort.c
diff options
context:
space:
mode:
Diffstat (limited to 'blocksort.c')
-rw-r--r--blocksort.c709
1 files changed, 709 insertions, 0 deletions
diff --git a/blocksort.c b/blocksort.c
new file mode 100644
index 0000000..d8bb26a
--- /dev/null
+++ b/blocksort.c
@@ -0,0 +1,709 @@
1
2/*-------------------------------------------------------------*/
3/*--- Block sorting machinery ---*/
4/*--- blocksort.c ---*/
5/*-------------------------------------------------------------*/
6
7/*--
8 This file is a part of bzip2 and/or libbzip2, a program and
9 library for lossless, block-sorting data compression.
10
11 Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
12
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions
15 are met:
16
17 1. Redistributions of source code must retain the above copyright
18 notice, this list of conditions and the following disclaimer.
19
20 2. The origin of this software must not be misrepresented; you must
21 not claim that you wrote the original software. If you use this
22 software in a product, an acknowledgment in the product
23 documentation would be appreciated but is not required.
24
25 3. Altered source versions must be plainly marked as such, and must
26 not be misrepresented as being the original software.
27
28 4. The name of the author may not be used to endorse or promote
29 products derived from this software without specific prior written
30 permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43
44 Julian Seward, Guildford, Surrey, UK.
45 jseward@acm.org
46 bzip2/libbzip2 version 0.9.0c of 18 October 1998
47
48 This program is based on (at least) the work of:
49 Mike Burrows
50 David Wheeler
51 Peter Fenwick
52 Alistair Moffat
53 Radford Neal
54 Ian H. Witten
55 Robert Sedgewick
56 Jon L. Bentley
57
58 For more information on these sources, see the manual.
59--*/
60
61
62#include "bzlib_private.h"
63
64/*---------------------------------------------*/
65/*--
66 Compare two strings in block. We assume (see
67 discussion above) that i1 and i2 have a max
68 offset of 10 on entry, and that the first
69 bytes of both block and quadrant have been
70 copied into the "overshoot area", ie
71 into the subscript range
72 [nblock .. nblock+NUM_OVERSHOOT_BYTES-1].
73--*/
74static __inline__ Bool fullGtU ( UChar* block,
75 UInt16* quadrant,
76 UInt32 nblock,
77 Int32* workDone,
78 Int32 i1,
79 Int32 i2
80 )
81{
82 Int32 k;
83 UChar c1, c2;
84 UInt16 s1, s2;
85
86 AssertD ( i1 != i2, "fullGtU(1)" );
87
88 c1 = block[i1];
89 c2 = block[i2];
90 if (c1 != c2) return (c1 > c2);
91 i1++; i2++;
92
93 c1 = block[i1];
94 c2 = block[i2];
95 if (c1 != c2) return (c1 > c2);
96 i1++; i2++;
97
98 c1 = block[i1];
99 c2 = block[i2];
100 if (c1 != c2) return (c1 > c2);
101 i1++; i2++;
102
103 c1 = block[i1];
104 c2 = block[i2];
105 if (c1 != c2) return (c1 > c2);
106 i1++; i2++;
107
108 c1 = block[i1];
109 c2 = block[i2];
110 if (c1 != c2) return (c1 > c2);
111 i1++; i2++;
112
113 c1 = block[i1];
114 c2 = block[i2];
115 if (c1 != c2) return (c1 > c2);
116 i1++; i2++;
117
118 k = nblock;
119
120 do {
121
122 c1 = block[i1];
123 c2 = block[i2];
124 if (c1 != c2) return (c1 > c2);
125 s1 = quadrant[i1];
126 s2 = quadrant[i2];
127 if (s1 != s2) return (s1 > s2);
128 i1++; i2++;
129
130 c1 = block[i1];
131 c2 = block[i2];
132 if (c1 != c2) return (c1 > c2);
133 s1 = quadrant[i1];
134 s2 = quadrant[i2];
135 if (s1 != s2) return (s1 > s2);
136 i1++; i2++;
137
138 c1 = block[i1];
139 c2 = block[i2];
140 if (c1 != c2) return (c1 > c2);
141 s1 = quadrant[i1];
142 s2 = quadrant[i2];
143 if (s1 != s2) return (s1 > s2);
144 i1++; i2++;
145
146 c1 = block[i1];
147 c2 = block[i2];
148 if (c1 != c2) return (c1 > c2);
149 s1 = quadrant[i1];
150 s2 = quadrant[i2];
151 if (s1 != s2) return (s1 > s2);
152 i1++; i2++;
153
154 if (i1 >= nblock) i1 -= nblock;
155 if (i2 >= nblock) i2 -= nblock;
156
157 k -= 4;
158 (*workDone)++;
159 }
160 while (k >= 0);
161
162 return False;
163}
164
165/*---------------------------------------------*/
166/*--
167 Knuth's increments seem to work better
168 than Incerpi-Sedgewick here. Possibly
169 because the number of elems to sort is
170 usually small, typically <= 20.
171--*/
172static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
173 9841, 29524, 88573, 265720,
174 797161, 2391484 };
175
176static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d )
177{
178 Int32 i, j, h, bigN, hp;
179 Int32 v;
180
181 UChar* block = s->block;
182 UInt32* zptr = s->zptr;
183 UInt16* quadrant = s->quadrant;
184 Int32* workDone = &(s->workDone);
185 Int32 nblock = s->nblock;
186 Int32 workLimit = s->workLimit;
187 Bool firstAttempt = s->firstAttempt;
188
189 bigN = hi - lo + 1;
190 if (bigN < 2) return;
191
192 hp = 0;
193 while (incs[hp] < bigN) hp++;
194 hp--;
195
196 for (; hp >= 0; hp--) {
197 h = incs[hp];
198 i = lo + h;
199 while (True) {
200
201 /*-- copy 1 --*/
202 if (i > hi) break;
203 v = zptr[i];
204 j = i;
205 while ( fullGtU ( block, quadrant, nblock, workDone,
206 zptr[j-h]+d, v+d ) ) {
207 zptr[j] = zptr[j-h];
208 j = j - h;
209 if (j <= (lo + h - 1)) break;
210 }
211 zptr[j] = v;
212 i++;
213
214 /*-- copy 2 --*/
215 if (i > hi) break;
216 v = zptr[i];
217 j = i;
218 while ( fullGtU ( block, quadrant, nblock, workDone,
219 zptr[j-h]+d, v+d ) ) {
220 zptr[j] = zptr[j-h];
221 j = j - h;
222 if (j <= (lo + h - 1)) break;
223 }
224 zptr[j] = v;
225 i++;
226
227 /*-- copy 3 --*/
228 if (i > hi) break;
229 v = zptr[i];
230 j = i;
231 while ( fullGtU ( block, quadrant, nblock, workDone,
232 zptr[j-h]+d, v+d ) ) {
233 zptr[j] = zptr[j-h];
234 j = j - h;
235 if (j <= (lo + h - 1)) break;
236 }
237 zptr[j] = v;
238 i++;
239
240 if (*workDone > workLimit && firstAttempt) return;
241 }
242 }
243}
244
245
246/*---------------------------------------------*/
247/*--
248 The following is an implementation of
249 an elegant 3-way quicksort for strings,
250 described in a paper "Fast Algorithms for
251 Sorting and Searching Strings", by Robert
252 Sedgewick and Jon L. Bentley.
253--*/
254
255#define swap(lv1, lv2) \
256 { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
257
258static void vswap ( UInt32* zptr, Int32 p1, Int32 p2, Int32 n )
259{
260 while (n > 0) {
261 swap(zptr[p1], zptr[p2]);
262 p1++; p2++; n--;
263 }
264}
265
266static UChar med3 ( UChar a, UChar b, UChar c )
267{
268 UChar t;
269 if (a > b) { t = a; a = b; b = t; };
270 if (b > c) { t = b; b = c; c = t; };
271 if (a > b) b = a;
272 return b;
273}
274
275
276#define min(a,b) ((a) < (b)) ? (a) : (b)
277
278typedef
279 struct { Int32 ll; Int32 hh; Int32 dd; }
280 StackElem;
281
282#define push(lz,hz,dz) { stack[sp].ll = lz; \
283 stack[sp].hh = hz; \
284 stack[sp].dd = dz; \
285 sp++; }
286
287#define pop(lz,hz,dz) { sp--; \
288 lz = stack[sp].ll; \
289 hz = stack[sp].hh; \
290 dz = stack[sp].dd; }
291
292#define SMALL_THRESH 20
293#define DEPTH_THRESH 10
294
295/*--
296 If you are ever unlucky/improbable enough
297 to get a stack overflow whilst sorting,
298 increase the following constant and try
299 again. In practice I have never seen the
300 stack go above 27 elems, so the following
301 limit seems very generous.
302--*/
303#define QSORT_STACK_SIZE 1000
304
305
306static void qSort3 ( EState* s, Int32 loSt, Int32 hiSt, Int32 dSt )
307{
308 Int32 unLo, unHi, ltLo, gtHi, med, n, m;
309 Int32 sp, lo, hi, d;
310 StackElem stack[QSORT_STACK_SIZE];
311
312 UChar* block = s->block;
313 UInt32* zptr = s->zptr;
314 Int32* workDone = &(s->workDone);
315 Int32 workLimit = s->workLimit;
316 Bool firstAttempt = s->firstAttempt;
317
318 sp = 0;
319 push ( loSt, hiSt, dSt );
320
321 while (sp > 0) {
322
323 AssertH ( sp < QSORT_STACK_SIZE, 1001 );
324
325 pop ( lo, hi, d );
326
327 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
328 simpleSort ( s, lo, hi, d );
329 if (*workDone > workLimit && firstAttempt) return;
330 continue;
331 }
332
333 med = med3 ( block[zptr[ lo ]+d],
334 block[zptr[ hi ]+d],
335 block[zptr[ (lo+hi)>>1 ]+d] );
336
337 unLo = ltLo = lo;
338 unHi = gtHi = hi;
339
340 while (True) {
341 while (True) {
342 if (unLo > unHi) break;
343 n = ((Int32)block[zptr[unLo]+d]) - med;
344 if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
345 if (n > 0) break;
346 unLo++;
347 }
348 while (True) {
349 if (unLo > unHi) break;
350 n = ((Int32)block[zptr[unHi]+d]) - med;
351 if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
352 if (n < 0) break;
353 unHi--;
354 }
355 if (unLo > unHi) break;
356 swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
357 }
358
359 AssertD ( unHi == unLo-1, "bad termination in qSort3" );
360
361 if (gtHi < ltLo) {
362 push(lo, hi, d+1 );
363 continue;
364 }
365
366 n = min(ltLo-lo, unLo-ltLo); vswap(zptr, lo, unLo-n, n);
367 m = min(hi-gtHi, gtHi-unHi); vswap(zptr, unLo, hi-m+1, m);
368
369 n = lo + unLo - ltLo - 1;
370 m = hi - (gtHi - unHi) + 1;
371
372 push ( lo, n, d );
373 push ( n+1, m-1, d+1 );
374 push ( m, hi, d );
375 }
376}
377
378
379/*---------------------------------------------*/
380
381#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
382
383#define SETMASK (1 << 21)
384#define CLEARMASK (~(SETMASK))
385
386static void sortMain ( EState* s )
387{
388 Int32 i, j, k, ss, sb;
389 Int32 runningOrder[256];
390 Int32 copy[256];
391 Bool bigDone[256];
392 UChar c1, c2;
393 Int32 numQSorted;
394
395 UChar* block = s->block;
396 UInt32* zptr = s->zptr;
397 UInt16* quadrant = s->quadrant;
398 Int32* ftab = s->ftab;
399 Int32* workDone = &(s->workDone);
400 Int32 nblock = s->nblock;
401 Int32 workLimit = s->workLimit;
402 Bool firstAttempt = s->firstAttempt;
403
404 /*--
405 In the various block-sized structures, live data runs
406 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
407 set up the overshoot area for block.
408 --*/
409
410 if (s->verbosity >= 4)
411 VPrintf0( " sort initialise ...\n" );
412
413 for (i = 0; i < BZ_NUM_OVERSHOOT_BYTES; i++)
414 block[nblock+i] = block[i % nblock];
415 for (i = 0; i < nblock+BZ_NUM_OVERSHOOT_BYTES; i++)
416 quadrant[i] = 0;
417
418
419 if (nblock <= 4000) {
420
421 /*--
422 Use simpleSort(), since the full sorting mechanism
423 has quite a large constant overhead.
424 --*/
425 if (s->verbosity >= 4) VPrintf0( " simpleSort ...\n" );
426 for (i = 0; i < nblock; i++) zptr[i] = i;
427 firstAttempt = False;
428 *workDone = workLimit = 0;
429 simpleSort ( s, 0, nblock-1, 0 );
430 if (s->verbosity >= 4) VPrintf0( " simpleSort done.\n" );
431
432 } else {
433
434 numQSorted = 0;
435 for (i = 0; i <= 255; i++) bigDone[i] = False;
436
437 if (s->verbosity >= 4) VPrintf0( " bucket sorting ...\n" );
438
439 for (i = 0; i <= 65536; i++) ftab[i] = 0;
440
441 c1 = block[nblock-1];
442 for (i = 0; i < nblock; i++) {
443 c2 = block[i];
444 ftab[(c1 << 8) + c2]++;
445 c1 = c2;
446 }
447
448 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
449
450 c1 = block[0];
451 for (i = 0; i < nblock-1; i++) {
452 c2 = block[i+1];
453 j = (c1 << 8) + c2;
454 c1 = c2;
455 ftab[j]--;
456 zptr[ftab[j]] = i;
457 }
458 j = (block[nblock-1] << 8) + block[0];
459 ftab[j]--;
460 zptr[ftab[j]] = nblock-1;
461
462 /*--
463 Now ftab contains the first loc of every small bucket.
464 Calculate the running order, from smallest to largest
465 big bucket.
466 --*/
467
468 for (i = 0; i <= 255; i++) runningOrder[i] = i;
469
470 {
471 Int32 vv;
472 Int32 h = 1;
473 do h = 3 * h + 1; while (h <= 256);
474 do {
475 h = h / 3;
476 for (i = h; i <= 255; i++) {
477 vv = runningOrder[i];
478 j = i;
479 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
480 runningOrder[j] = runningOrder[j-h];
481 j = j - h;
482 if (j <= (h - 1)) goto zero;
483 }
484 zero:
485 runningOrder[j] = vv;
486 }
487 } while (h != 1);
488 }
489
490 /*--
491 The main sorting loop.
492 --*/
493
494 for (i = 0; i <= 255; i++) {
495
496 /*--
497 Process big buckets, starting with the least full.
498 Basically this is a 4-step process in which we call
499 qSort3 to sort the small buckets [ss, j], but
500 also make a big effort to avoid the calls if we can.
501 --*/
502 ss = runningOrder[i];
503
504 /*--
505 Step 1:
506 Complete the big bucket [ss] by quicksorting
507 any unsorted small buckets [ss, j], for j != ss.
508 Hopefully previous pointer-scanning phases have already
509 completed many of the small buckets [ss, j], so
510 we don't have to sort them at all.
511 --*/
512 for (j = 0; j <= 255; j++) {
513 if (j != ss) {
514 sb = (ss << 8) + j;
515 if ( ! (ftab[sb] & SETMASK) ) {
516 Int32 lo = ftab[sb] & CLEARMASK;
517 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
518 if (hi > lo) {
519 if (s->verbosity >= 4)
520 VPrintf4( " qsort [0x%x, 0x%x] done %d this %d\n",
521 ss, j, numQSorted, hi - lo + 1 );
522 qSort3 ( s, lo, hi, 2 );
523 numQSorted += ( hi - lo + 1 );
524 if (*workDone > workLimit && firstAttempt) return;
525 }
526 }
527 ftab[sb] |= SETMASK;
528 }
529 }
530
531 /*--
532 Step 2:
533 Deal specially with case [ss, ss]. This establishes the
534 sorted order for [ss, ss] without any comparisons.
535 A clever trick, cryptically described as steps Q6b and Q6c
536 in SRC-124 (aka BW94). This makes it entirely practical to
537 not use a preliminary run-length coder, but unfortunately
538 we are now stuck with the .bz2 file format.
539 --*/
540 {
541 Int32 put0, get0, put1, get1;
542 Int32 sbn = (ss << 8) + ss;
543 Int32 lo = ftab[sbn] & CLEARMASK;
544 Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
545 UChar ssc = (UChar)ss;
546 put0 = lo;
547 get0 = ftab[ss << 8] & CLEARMASK;
548 put1 = hi;
549 get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
550 while (get0 < put0) {
551 j = zptr[get0]-1; if (j < 0) j += nblock;
552 c1 = block[j];
553 if (c1 == ssc) { zptr[put0] = j; put0++; };
554 get0++;
555 }
556 while (get1 > put1) {
557 j = zptr[get1]-1; if (j < 0) j += nblock;
558 c1 = block[j];
559 if (c1 == ssc) { zptr[put1] = j; put1--; };
560 get1--;
561 }
562 ftab[sbn] |= SETMASK;
563 }
564
565 /*--
566 Step 3:
567 The [ss] big bucket is now done. Record this fact,
568 and update the quadrant descriptors. Remember to
569 update quadrants in the overshoot area too, if
570 necessary. The "if (i < 255)" test merely skips
571 this updating for the last bucket processed, since
572 updating for the last bucket is pointless.
573
574 The quadrant array provides a way to incrementally
575 cache sort orderings, as they appear, so as to
576 make subsequent comparisons in fullGtU() complete
577 faster. For repetitive blocks this makes a big
578 difference (but not big enough to be able to avoid
579 randomisation for very repetitive data.)
580
581 The precise meaning is: at all times:
582
583 for 0 <= i < nblock and 0 <= j <= nblock
584
585 if block[i] != block[j],
586
587 then the relative values of quadrant[i] and
588 quadrant[j] are meaningless.
589
590 else {
591 if quadrant[i] < quadrant[j]
592 then the string starting at i lexicographically
593 precedes the string starting at j
594
595 else if quadrant[i] > quadrant[j]
596 then the string starting at j lexicographically
597 precedes the string starting at i
598
599 else
600 the relative ordering of the strings starting
601 at i and j has not yet been determined.
602 }
603 --*/
604 bigDone[ss] = True;
605
606 if (i < 255) {
607 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
608 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
609 Int32 shifts = 0;
610
611 while ((bbSize >> shifts) > 65534) shifts++;
612
613 for (j = 0; j < bbSize; j++) {
614 Int32 a2update = zptr[bbStart + j];
615 UInt16 qVal = (UInt16)(j >> shifts);
616 quadrant[a2update] = qVal;
617 if (a2update < BZ_NUM_OVERSHOOT_BYTES)
618 quadrant[a2update + nblock] = qVal;
619 }
620
621 AssertH ( ( ((bbSize-1) >> shifts) <= 65535 ), 1002 );
622 }
623
624 /*--
625 Step 4:
626 Now scan this big bucket [ss] so as to synthesise the
627 sorted order for small buckets [t, ss] for all t != ss.
628 This will avoid doing Real Work in subsequent Step 1's.
629 --*/
630 for (j = 0; j <= 255; j++)
631 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
632
633 for (j = ftab[ss << 8] & CLEARMASK;
634 j < (ftab[(ss+1) << 8] & CLEARMASK);
635 j++) {
636 k = zptr[j]-1; if (k < 0) k += nblock;
637 c1 = block[k];
638 if ( ! bigDone[c1] ) {
639 zptr[copy[c1]] = k;
640 copy[c1] ++;
641 }
642 }
643
644 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
645 }
646 if (s->verbosity >= 4)
647 VPrintf3( " %d pointers, %d sorted, %d scanned\n",
648 nblock, numQSorted, nblock - numQSorted );
649 }
650}
651
652
653/*---------------------------------------------*/
654static void randomiseBlock ( EState* s )
655{
656 Int32 i;
657 BZ_RAND_INIT_MASK;
658 for (i = 0; i < 256; i++) s->inUse[i] = False;
659
660 for (i = 0; i < s->nblock; i++) {
661 BZ_RAND_UPD_MASK;
662 s->block[i] ^= BZ_RAND_MASK;
663 s->inUse[s->block[i]] = True;
664 }
665}
666
667
668/*---------------------------------------------*/
669void blockSort ( EState* s )
670{
671 Int32 i;
672
673 s->workLimit = s->workFactor * (s->nblock - 1);
674 s->workDone = 0;
675 s->blockRandomised = False;
676 s->firstAttempt = True;
677
678 sortMain ( s );
679
680 if (s->verbosity >= 3)
681 VPrintf3( " %d work, %d block, ratio %5.2f\n",
682 s->workDone, s->nblock-1,
683 (float)(s->workDone) / (float)(s->nblock-1) );
684
685 if (s->workDone > s->workLimit && s->firstAttempt) {
686 if (s->verbosity >= 2)
687 VPrintf0( " sorting aborted; randomising block\n" );
688 randomiseBlock ( s );
689 s->workLimit = s->workDone = 0;
690 s->blockRandomised = True;
691 s->firstAttempt = False;
692 sortMain ( s );
693 if (s->verbosity >= 3)
694 VPrintf3( " %d work, %d block, ratio %f\n",
695 s->workDone, s->nblock-1,
696 (float)(s->workDone) / (float)(s->nblock-1) );
697 }
698
699 s->origPtr = -1;
700 for (i = 0; i < s->nblock; i++)
701 if (s->zptr[i] == 0)
702 { s->origPtr = i; break; };
703
704 AssertH( s->origPtr != -1, 1003 );
705}
706
707/*-------------------------------------------------------------*/
708/*--- end blocksort.c ---*/
709/*-------------------------------------------------------------*/