aboutsummaryrefslogtreecommitdiff
path: root/archival/bz/blocksort.c
diff options
context:
space:
mode:
authorDenis Vlasenko <vda.linux@googlemail.com>2007-10-13 03:36:03 +0000
committerDenis Vlasenko <vda.linux@googlemail.com>2007-10-13 03:36:03 +0000
commit77f1ec1b9bf100e6c10aa0856c7156e321511785 (patch)
treef20e5a9062ecad82a43bde81e3041a19c4292733 /archival/bz/blocksort.c
parent11c23d7b990eae27357e5a41a97d62b9a214f7db (diff)
downloadbusybox-w32-77f1ec1b9bf100e6c10aa0856c7156e321511785.tar.gz
busybox-w32-77f1ec1b9bf100e6c10aa0856c7156e321511785.tar.bz2
busybox-w32-77f1ec1b9bf100e6c10aa0856c7156e321511785.zip
bzip2: port bzip2 1.0.4 to busybox. note: bzip2 code resides
in separate directory (archival/bz/*) and is covered by BSD-style license. code size: 13k
Diffstat (limited to 'archival/bz/blocksort.c')
-rw-r--r--archival/bz/blocksort.c1128
1 files changed, 1128 insertions, 0 deletions
diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c
new file mode 100644
index 000000000..7d2856ba7
--- /dev/null
+++ b/archival/bz/blocksort.c
@@ -0,0 +1,1128 @@
1/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Block sorting machinery ---*/
9/*--- blocksort.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib_private.h" */
27
28/*---------------------------------------------*/
29/*--- Fallback O(N log(N)^2) sorting ---*/
30/*--- algorithm, for repetitive blocks ---*/
31/*---------------------------------------------*/
32
33/*---------------------------------------------*/
34static
35inline
36void fallbackSimpleSort(uint32_t* fmap,
37 uint32_t* eclass,
38 int32_t lo,
39 int32_t hi)
40{
41 int32_t i, j, tmp;
42 uint32_t ec_tmp;
43
44 if (lo == hi) return;
45
46 if (hi - lo > 3) {
47 for (i = hi-4; i >= lo; i--) {
48 tmp = fmap[i];
49 ec_tmp = eclass[tmp];
50 for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
51 fmap[j-4] = fmap[j];
52 fmap[j-4] = tmp;
53 }
54 }
55
56 for (i = hi-1; i >= lo; i--) {
57 tmp = fmap[i];
58 ec_tmp = eclass[tmp];
59 for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
60 fmap[j-1] = fmap[j];
61 fmap[j-1] = tmp;
62 }
63}
64
65
66/*---------------------------------------------*/
67#define fswap(zz1, zz2) \
68{ \
69 int32_t zztmp = zz1; \
70 zz1 = zz2; \
71 zz2 = zztmp; \
72}
73
74#define fvswap(zzp1, zzp2, zzn) \
75{ \
76 int32_t yyp1 = (zzp1); \
77 int32_t yyp2 = (zzp2); \
78 int32_t yyn = (zzn); \
79 while (yyn > 0) { \
80 fswap(fmap[yyp1], fmap[yyp2]); \
81 yyp1++; \
82 yyp2++; \
83 yyn--; \
84 } \
85}
86
87
88#define fmin(a,b) ((a) < (b)) ? (a) : (b)
89
90#define fpush(lz,hz) { \
91 stackLo[sp] = lz; \
92 stackHi[sp] = hz; \
93 sp++; \
94}
95
96#define fpop(lz,hz) { \
97 sp--; \
98 lz = stackLo[sp]; \
99 hz = stackHi[sp]; \
100}
101
102#define FALLBACK_QSORT_SMALL_THRESH 10
103#define FALLBACK_QSORT_STACK_SIZE 100
104
105
106static
107void fallbackQSort3(uint32_t* fmap,
108 uint32_t* eclass,
109 int32_t loSt,
110 int32_t hiSt)
111{
112 int32_t unLo, unHi, ltLo, gtHi, n, m;
113 int32_t sp, lo, hi;
114 uint32_t med, r, r3;
115 int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];
116 int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];
117
118 r = 0;
119
120 sp = 0;
121 fpush(loSt, hiSt);
122
123 while (sp > 0) {
124 AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
125
126 fpop(lo, hi);
127 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
128 fallbackSimpleSort(fmap, eclass, lo, hi);
129 continue;
130 }
131
132 /* Random partitioning. Median of 3 sometimes fails to
133 * avoid bad cases. Median of 9 seems to help but
134 * looks rather expensive. This too seems to work but
135 * is cheaper. Guidance for the magic constants
136 * 7621 and 32768 is taken from Sedgewick's algorithms
137 * book, chapter 35.
138 */
139 r = ((r * 7621) + 1) % 32768;
140 r3 = r % 3;
141 if (r3 == 0)
142 med = eclass[fmap[lo]];
143 else if (r3 == 1)
144 med = eclass[fmap[(lo+hi)>>1]];
145 else
146 med = eclass[fmap[hi]];
147
148 unLo = ltLo = lo;
149 unHi = gtHi = hi;
150
151 while (1) {
152 while (1) {
153 if (unLo > unHi) break;
154 n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;
155 if (n == 0) {
156 fswap(fmap[unLo], fmap[ltLo]);
157 ltLo++;
158 unLo++;
159 continue;
160 };
161 if (n > 0) break;
162 unLo++;
163 }
164 while (1) {
165 if (unLo > unHi) break;
166 n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;
167 if (n == 0) {
168 fswap(fmap[unHi], fmap[gtHi]);
169 gtHi--; unHi--;
170 continue;
171 };
172 if (n < 0) break;
173 unHi--;
174 }
175 if (unLo > unHi) break;
176 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
177 }
178
179 AssertD(unHi == unLo-1, "fallbackQSort3(2)");
180
181 if (gtHi < ltLo) continue;
182
183 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
184 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
185
186 n = lo + unLo - ltLo - 1;
187 m = hi - (gtHi - unHi) + 1;
188
189 if (n - lo > hi - m) {
190 fpush(lo, n);
191 fpush(m, hi);
192 } else {
193 fpush(m, hi);
194 fpush(lo, n);
195 }
196 }
197}
198
199#undef fmin
200#undef fpush
201#undef fpop
202#undef fswap
203#undef fvswap
204#undef FALLBACK_QSORT_SMALL_THRESH
205#undef FALLBACK_QSORT_STACK_SIZE
206
207
208/*---------------------------------------------*/
209/* Pre:
210 * nblock > 0
211 * eclass exists for [0 .. nblock-1]
212 * ((UChar*)eclass) [0 .. nblock-1] holds block
213 * ptr exists for [0 .. nblock-1]
214 *
215 * Post:
216 * ((UChar*)eclass) [0 .. nblock-1] holds block
217 * All other areas of eclass destroyed
218 * fmap [0 .. nblock-1] holds sorted order
219 * bhtab[0 .. 2+(nblock/32)] destroyed
220*/
221
222#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
223#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
224#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
225#define WORD_BH(zz) bhtab[(zz) >> 5]
226#define UNALIGNED_BH(zz) ((zz) & 0x01f)
227
228static
229void fallbackSort(uint32_t* fmap,
230 uint32_t* eclass,
231 uint32_t* bhtab,
232 int32_t nblock)
233{
234 int32_t ftab[257];
235 int32_t ftabCopy[256];
236 int32_t H, i, j, k, l, r, cc, cc1;
237 int32_t nNotDone;
238 int32_t nBhtab;
239 UChar* eclass8 = (UChar*)eclass;
240
241 /*
242 * Initial 1-char radix sort to generate
243 * initial fmap and initial BH bits.
244 */
245 for (i = 0; i < 257; i++) ftab[i] = 0;
246 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
247 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
248 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
249
250 for (i = 0; i < nblock; i++) {
251 j = eclass8[i];
252 k = ftab[j] - 1;
253 ftab[j] = k;
254 fmap[k] = i;
255 }
256
257 nBhtab = 2 + (nblock / 32);
258 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
259 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
260
261 /*
262 * Inductively refine the buckets. Kind-of an
263 * "exponential radix sort" (!), inspired by the
264 * Manber-Myers suffix array construction algorithm.
265 */
266
267 /*-- set sentinel bits for block-end detection --*/
268 for (i = 0; i < 32; i++) {
269 SET_BH(nblock + 2*i);
270 CLEAR_BH(nblock + 2*i + 1);
271 }
272
273 /*-- the log(N) loop --*/
274 H = 1;
275 while (1) {
276 j = 0;
277 for (i = 0; i < nblock; i++) {
278 if (ISSET_BH(i))
279 j = i;
280 k = fmap[i] - H;
281 if (k < 0)
282 k += nblock;
283 eclass[k] = j;
284 }
285
286 nNotDone = 0;
287 r = -1;
288 while (1) {
289
290 /*-- find the next non-singleton bucket --*/
291 k = r + 1;
292 while (ISSET_BH(k) && UNALIGNED_BH(k))
293 k++;
294 if (ISSET_BH(k)) {
295 while (WORD_BH(k) == 0xffffffff) k += 32;
296 while (ISSET_BH(k)) k++;
297 }
298 l = k - 1;
299 if (l >= nblock)
300 break;
301 while (!ISSET_BH(k) && UNALIGNED_BH(k))
302 k++;
303 if (!ISSET_BH(k)) {
304 while (WORD_BH(k) == 0x00000000) k += 32;
305 while (!ISSET_BH(k)) k++;
306 }
307 r = k - 1;
308 if (r >= nblock)
309 break;
310
311 /*-- now [l, r] bracket current bucket --*/
312 if (r > l) {
313 nNotDone += (r - l + 1);
314 fallbackQSort3(fmap, eclass, l, r);
315
316 /*-- scan bucket and generate header bits-- */
317 cc = -1;
318 for (i = l; i <= r; i++) {
319 cc1 = eclass[fmap[i]];
320 if (cc != cc1) {
321 SET_BH(i);
322 cc = cc1;
323 };
324 }
325 }
326 }
327
328 H *= 2;
329 if (H > nblock || nNotDone == 0)
330 break;
331 }
332
333 /*
334 * Reconstruct the original block in
335 * eclass8 [0 .. nblock-1], since the
336 * previous phase destroyed it.
337 */
338 j = 0;
339 for (i = 0; i < nblock; i++) {
340 while (ftabCopy[j] == 0)
341 j++;
342 ftabCopy[j]--;
343 eclass8[fmap[i]] = (UChar)j;
344 }
345 AssertH(j < 256, 1005);
346}
347
348#undef SET_BH
349#undef CLEAR_BH
350#undef ISSET_BH
351#undef WORD_BH
352#undef UNALIGNED_BH
353
354
355/*---------------------------------------------*/
356/*--- The main, O(N^2 log(N)) sorting ---*/
357/*--- algorithm. Faster for "normal" ---*/
358/*--- non-repetitive blocks. ---*/
359/*---------------------------------------------*/
360
361/*---------------------------------------------*/
362static
363inline
364Bool mainGtU(
365 uint32_t i1,
366 uint32_t i2,
367 UChar* block,
368 uint16_t* quadrant,
369 uint32_t nblock,
370 int32_t* budget)
371{
372 int32_t k;
373 UChar c1, c2;
374 uint16_t s1, s2;
375
376///unrolling
377 AssertD(i1 != i2, "mainGtU");
378 /* 1 */
379 c1 = block[i1]; c2 = block[i2];
380 if (c1 != c2) return (c1 > c2);
381 i1++; i2++;
382 /* 2 */
383 c1 = block[i1]; c2 = block[i2];
384 if (c1 != c2) return (c1 > c2);
385 i1++; i2++;
386 /* 3 */
387 c1 = block[i1]; c2 = block[i2];
388 if (c1 != c2) return (c1 > c2);
389 i1++; i2++;
390 /* 4 */
391 c1 = block[i1]; c2 = block[i2];
392 if (c1 != c2) return (c1 > c2);
393 i1++; i2++;
394 /* 5 */
395 c1 = block[i1]; c2 = block[i2];
396 if (c1 != c2) return (c1 > c2);
397 i1++; i2++;
398 /* 6 */
399 c1 = block[i1]; c2 = block[i2];
400 if (c1 != c2) return (c1 > c2);
401 i1++; i2++;
402 /* 7 */
403 c1 = block[i1]; c2 = block[i2];
404 if (c1 != c2) return (c1 > c2);
405 i1++; i2++;
406 /* 8 */
407 c1 = block[i1]; c2 = block[i2];
408 if (c1 != c2) return (c1 > c2);
409 i1++; i2++;
410 /* 9 */
411 c1 = block[i1]; c2 = block[i2];
412 if (c1 != c2) return (c1 > c2);
413 i1++; i2++;
414 /* 10 */
415 c1 = block[i1]; c2 = block[i2];
416 if (c1 != c2) return (c1 > c2);
417 i1++; i2++;
418 /* 11 */
419 c1 = block[i1]; c2 = block[i2];
420 if (c1 != c2) return (c1 > c2);
421 i1++; i2++;
422 /* 12 */
423 c1 = block[i1]; c2 = block[i2];
424 if (c1 != c2) return (c1 > c2);
425 i1++; i2++;
426
427 k = nblock + 8;
428
429///unrolling
430 do {
431 /* 1 */
432 c1 = block[i1]; c2 = block[i2];
433 if (c1 != c2) return (c1 > c2);
434 s1 = quadrant[i1]; s2 = quadrant[i2];
435 if (s1 != s2) return (s1 > s2);
436 i1++; i2++;
437 /* 2 */
438 c1 = block[i1]; c2 = block[i2];
439 if (c1 != c2) return (c1 > c2);
440 s1 = quadrant[i1]; s2 = quadrant[i2];
441 if (s1 != s2) return (s1 > s2);
442 i1++; i2++;
443 /* 3 */
444 c1 = block[i1]; c2 = block[i2];
445 if (c1 != c2) return (c1 > c2);
446 s1 = quadrant[i1]; s2 = quadrant[i2];
447 if (s1 != s2) return (s1 > s2);
448 i1++; i2++;
449 /* 4 */
450 c1 = block[i1]; c2 = block[i2];
451 if (c1 != c2) return (c1 > c2);
452 s1 = quadrant[i1]; s2 = quadrant[i2];
453 if (s1 != s2) return (s1 > s2);
454 i1++; i2++;
455 /* 5 */
456 c1 = block[i1]; c2 = block[i2];
457 if (c1 != c2) return (c1 > c2);
458 s1 = quadrant[i1]; s2 = quadrant[i2];
459 if (s1 != s2) return (s1 > s2);
460 i1++; i2++;
461 /* 6 */
462 c1 = block[i1]; c2 = block[i2];
463 if (c1 != c2) return (c1 > c2);
464 s1 = quadrant[i1]; s2 = quadrant[i2];
465 if (s1 != s2) return (s1 > s2);
466 i1++; i2++;
467 /* 7 */
468 c1 = block[i1]; c2 = block[i2];
469 if (c1 != c2) return (c1 > c2);
470 s1 = quadrant[i1]; s2 = quadrant[i2];
471 if (s1 != s2) return (s1 > s2);
472 i1++; i2++;
473 /* 8 */
474 c1 = block[i1]; c2 = block[i2];
475 if (c1 != c2) return (c1 > c2);
476 s1 = quadrant[i1]; s2 = quadrant[i2];
477 if (s1 != s2) return (s1 > s2);
478 i1++; i2++;
479
480 if (i1 >= nblock) i1 -= nblock;
481 if (i2 >= nblock) i2 -= nblock;
482
483 k -= 8;
484 (*budget)--;
485 } while (k >= 0);
486
487 return False;
488}
489
490
491/*---------------------------------------------*/
492/*
493 * Knuth's increments seem to work better
494 * than Incerpi-Sedgewick here. Possibly
495 * because the number of elems to sort is
496 * usually small, typically <= 20.
497 */
498static
499const int32_t incs[14] = {
500 1, 4, 13, 40, 121, 364, 1093, 3280,
501 9841, 29524, 88573, 265720,
502 797161, 2391484
503};
504
505static
506void mainSimpleSort(uint32_t* ptr,
507 UChar* block,
508 uint16_t* quadrant,
509 int32_t nblock,
510 int32_t lo,
511 int32_t hi,
512 int32_t d,
513 int32_t* budget)
514{
515 int32_t i, j, h, bigN, hp;
516 uint32_t v;
517
518 bigN = hi - lo + 1;
519 if (bigN < 2) return;
520
521 hp = 0;
522 while (incs[hp] < bigN) hp++;
523 hp--;
524
525 for (; hp >= 0; hp--) {
526 h = incs[hp];
527
528 i = lo + h;
529 while (1) {
530
531///unrolling
532 /*-- copy 1 --*/
533 if (i > hi) break;
534 v = ptr[i];
535 j = i;
536 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
537 ptr[j] = ptr[j-h];
538 j = j - h;
539 if (j <= (lo + h - 1)) break;
540 }
541 ptr[j] = v;
542 i++;
543
544 /*-- copy 2 --*/
545 if (i > hi) break;
546 v = ptr[i];
547 j = i;
548 while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
549 ptr[j] = ptr[j-h];
550 j = j - h;
551 if (j <= (lo + h - 1)) break;
552 }
553 ptr[j] = v;
554 i++;
555
556 /*-- copy 3 --*/
557 if (i > hi) break;
558 v = ptr[i];
559 j = i;
560 while (mainGtU (ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
561 ptr[j] = ptr[j-h];
562 j = j - h;
563 if (j <= (lo + h - 1)) break;
564 }
565 ptr[j] = v;
566 i++;
567
568 if (*budget < 0) return;
569 }
570 }
571}
572
573
574/*---------------------------------------------*/
575/*
576 * The following is an implementation of
577 * an elegant 3-way quicksort for strings,
578 * described in a paper "Fast Algorithms for
579 * Sorting and Searching Strings", by Robert
580 * Sedgewick and Jon L. Bentley.
581 */
582
583#define mswap(zz1, zz2) \
584{ \
585 int32_t zztmp = zz1; \
586 zz1 = zz2; \
587 zz2 = zztmp; \
588}
589
590#define mvswap(zzp1, zzp2, zzn) \
591{ \
592 int32_t yyp1 = (zzp1); \
593 int32_t yyp2 = (zzp2); \
594 int32_t yyn = (zzn); \
595 while (yyn > 0) { \
596 mswap(ptr[yyp1], ptr[yyp2]); \
597 yyp1++; \
598 yyp2++; \
599 yyn--; \
600 } \
601}
602
603static
604inline
605UChar mmed3(UChar a, UChar b, UChar c)
606{
607 UChar t;
608 if (a > b) {
609 t = a;
610 a = b;
611 b = t;
612 };
613 if (b > c) {
614 b = c;
615 if (a > b)
616 b = a;
617 }
618 return b;
619}
620
621#define mmin(a,b) ((a) < (b)) ? (a) : (b)
622
623#define mpush(lz,hz,dz) \
624{ \
625 stackLo[sp] = lz; \
626 stackHi[sp] = hz; \
627 stackD [sp] = dz; \
628 sp++; \
629}
630
631#define mpop(lz,hz,dz) \
632{ \
633 sp--; \
634 lz = stackLo[sp]; \
635 hz = stackHi[sp]; \
636 dz = stackD [sp]; \
637}
638
639
640#define mnextsize(az) (nextHi[az]-nextLo[az])
641
642#define mnextswap(az,bz) \
643{ \
644 int32_t tz; \
645 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
646 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
647 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \
648}
649
650#define MAIN_QSORT_SMALL_THRESH 20
651#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
652#define MAIN_QSORT_STACK_SIZE 100
653
654static
655void mainQSort3(uint32_t* ptr,
656 UChar* block,
657 uint16_t* quadrant,
658 int32_t nblock,
659 int32_t loSt,
660 int32_t hiSt,
661 int32_t dSt,
662 int32_t* budget)
663{
664 int32_t unLo, unHi, ltLo, gtHi, n, m, med;
665 int32_t sp, lo, hi, d;
666
667 int32_t stackLo[MAIN_QSORT_STACK_SIZE];
668 int32_t stackHi[MAIN_QSORT_STACK_SIZE];
669 int32_t stackD [MAIN_QSORT_STACK_SIZE];
670
671 int32_t nextLo[3];
672 int32_t nextHi[3];
673 int32_t nextD [3];
674
675 sp = 0;
676 mpush(loSt, hiSt, dSt);
677
678 while (sp > 0) {
679 AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
680
681 mpop(lo, hi, d);
682 if (hi - lo < MAIN_QSORT_SMALL_THRESH
683 || d > MAIN_QSORT_DEPTH_THRESH
684 ) {
685 mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
686 if (*budget < 0)
687 return;
688 continue;
689 }
690
691 med = (int32_t) mmed3(block[ptr[lo ] + d],
692 block[ptr[hi ] + d],
693 block[ptr[(lo+hi) >> 1] + d]);
694
695 unLo = ltLo = lo;
696 unHi = gtHi = hi;
697
698 while (1) {
699 while (1) {
700 if (unLo > unHi)
701 break;
702 n = ((int32_t)block[ptr[unLo]+d]) - med;
703 if (n == 0) {
704 mswap(ptr[unLo], ptr[ltLo]);
705 ltLo++;
706 unLo++;
707 continue;
708 };
709 if (n > 0) break;
710 unLo++;
711 }
712 while (1) {
713 if (unLo > unHi)
714 break;
715 n = ((int32_t)block[ptr[unHi]+d]) - med;
716 if (n == 0) {
717 mswap(ptr[unHi], ptr[gtHi]);
718 gtHi--;
719 unHi--;
720 continue;
721 };
722 if (n < 0) break;
723 unHi--;
724 }
725 if (unLo > unHi)
726 break;
727 mswap(ptr[unLo], ptr[unHi]);
728 unLo++;
729 unHi--;
730 }
731
732 AssertD(unHi == unLo-1, "mainQSort3(2)");
733
734 if (gtHi < ltLo) {
735 mpush(lo, hi, d + 1);
736 continue;
737 }
738
739 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
740 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
741
742 n = lo + unLo - ltLo - 1;
743 m = hi - (gtHi - unHi) + 1;
744
745 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
746 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
747 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
748
749 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
750 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
751 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
752
753 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
754 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
755
756 mpush (nextLo[0], nextHi[0], nextD[0]);
757 mpush (nextLo[1], nextHi[1], nextD[1]);
758 mpush (nextLo[2], nextHi[2], nextD[2]);
759 }
760}
761
762#undef mswap
763#undef mvswap
764#undef mpush
765#undef mpop
766#undef mmin
767#undef mnextsize
768#undef mnextswap
769#undef MAIN_QSORT_SMALL_THRESH
770#undef MAIN_QSORT_DEPTH_THRESH
771#undef MAIN_QSORT_STACK_SIZE
772
773
774/*---------------------------------------------*/
775/* Pre:
776 * nblock > N_OVERSHOOT
777 * block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
778 * ((UChar*)block32) [0 .. nblock-1] holds block
779 * ptr exists for [0 .. nblock-1]
780 *
781 * Post:
782 * ((UChar*)block32) [0 .. nblock-1] holds block
783 * All other areas of block32 destroyed
784 * ftab[0 .. 65536] destroyed
785 * ptr [0 .. nblock-1] holds sorted order
786 * if (*budget < 0), sorting was abandoned
787 */
788
789#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
790#define SETMASK (1 << 21)
791#define CLEARMASK (~(SETMASK))
792
793static NOINLINE
794void mainSort(uint32_t* ptr,
795 UChar* block,
796 uint16_t* quadrant,
797 uint32_t* ftab,
798 int32_t nblock,
799 int32_t* budget)
800{
801 int32_t i, j, k, ss, sb;
802 int32_t runningOrder[256];
803 Bool bigDone[256];
804 int32_t copyStart[256];
805 int32_t copyEnd [256];
806 UChar c1;
807 int32_t numQSorted;
808 uint16_t s;
809
810 /*-- set up the 2-byte frequency table --*/
811 /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
812 memset(ftab, 0, 65537 * sizeof(ftab[0]));
813
814 j = block[0] << 8;
815 i = nblock-1;
816#if 0
817 for (; i >= 3; i -= 4) {
818 quadrant[i] = 0;
819 j = (j >> 8) |(((uint16_t)block[i]) << 8);
820 ftab[j]++;
821 quadrant[i-1] = 0;
822 j = (j >> 8) |(((uint16_t)block[i-1]) << 8);
823 ftab[j]++;
824 quadrant[i-2] = 0;
825 j = (j >> 8) |(((uint16_t)block[i-2]) << 8);
826 ftab[j]++;
827 quadrant[i-3] = 0;
828 j = (j >> 8) |(((uint16_t)block[i-3]) << 8);
829 ftab[j]++;
830 }
831#endif
832 for (; i >= 0; i--) {
833 quadrant[i] = 0;
834 j = (j >> 8) |(((uint16_t)block[i]) << 8);
835 ftab[j]++;
836 }
837
838 /*-- (emphasises close relationship of block & quadrant) --*/
839 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
840 block [nblock+i] = block[i];
841 quadrant[nblock+i] = 0;
842 }
843
844 /*-- Complete the initial radix sort --*/
845 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
846
847 s = block[0] << 8;
848 i = nblock-1;
849#if 0
850 for (; i >= 3; i -= 4) {
851 s = (s >> 8) | (block[i] << 8);
852 j = ftab[s] -1;
853 ftab[s] = j;
854 ptr[j] = i;
855 s = (s >> 8) | (block[i-1] << 8);
856 j = ftab[s] -1;
857 ftab[s] = j;
858 ptr[j] = i-1;
859 s = (s >> 8) | (block[i-2] << 8);
860 j = ftab[s] -1;
861 ftab[s] = j;
862 ptr[j] = i-2;
863 s = (s >> 8) | (block[i-3] << 8);
864 j = ftab[s] -1;
865 ftab[s] = j;
866 ptr[j] = i-3;
867 }
868#endif
869 for (; i >= 0; i--) {
870 s = (s >> 8) | (block[i] << 8);
871 j = ftab[s] -1;
872 ftab[s] = j;
873 ptr[j] = i;
874 }
875
876 /*
877 * Now ftab contains the first loc of every small bucket.
878 * Calculate the running order, from smallest to largest
879 * big bucket.
880 */
881 for (i = 0; i <= 255; i++) {
882 bigDone [i] = False;
883 runningOrder[i] = i;
884 }
885
886 {
887 int32_t vv;
888 /* was: int32_t h = 1; */
889 /* do h = 3 * h + 1; while (h <= 256); */
890 int32_t h = 364;
891
892 do {
893 h = h / 3;
894 for (i = h; i <= 255; i++) {
895 vv = runningOrder[i];
896 j = i;
897 while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) {
898 runningOrder[j] = runningOrder[j-h];
899 j = j - h;
900 if (j <= (h - 1)) goto zero;
901 }
902 zero:
903 runningOrder[j] = vv;
904 }
905 } while (h != 1);
906 }
907
908 /*
909 * The main sorting loop.
910 */
911
912 numQSorted = 0;
913
914 for (i = 0; i <= 255; i++) {
915
916 /*
917 * Process big buckets, starting with the least full.
918 * Basically this is a 3-step process in which we call
919 * mainQSort3 to sort the small buckets [ss, j], but
920 * also make a big effort to avoid the calls if we can.
921 */
922 ss = runningOrder[i];
923
924 /*
925 * Step 1:
926 * Complete the big bucket [ss] by quicksorting
927 * any unsorted small buckets [ss, j], for j != ss.
928 * Hopefully previous pointer-scanning phases have already
929 * completed many of the small buckets [ss, j], so
930 * we don't have to sort them at all.
931 */
932 for (j = 0; j <= 255; j++) {
933 if (j != ss) {
934 sb = (ss << 8) + j;
935 if (!(ftab[sb] & SETMASK)) {
936 int32_t lo = ftab[sb] & CLEARMASK;
937 int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
938 if (hi > lo) {
939 mainQSort3 (
940 ptr, block, quadrant, nblock,
941 lo, hi, BZ_N_RADIX, budget
942 );
943 numQSorted += (hi - lo + 1);
944 if (*budget < 0) return;
945 }
946 }
947 ftab[sb] |= SETMASK;
948 }
949 }
950
951 AssertH(!bigDone[ss], 1006);
952
953 /*
954 * Step 2:
955 * Now scan this big bucket [ss] so as to synthesise the
956 * sorted order for small buckets [t, ss] for all t,
957 * including, magically, the bucket [ss,ss] too.
958 * This will avoid doing Real Work in subsequent Step 1's.
959 */
960 {
961 for (j = 0; j <= 255; j++) {
962 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
963 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
964 }
965 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
966 k = ptr[j] - 1;
967 if (k < 0)
968 k += nblock;
969 c1 = block[k];
970 if (!bigDone[c1])
971 ptr[copyStart[c1]++] = k;
972 }
973 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
974 k = ptr[j]-1;
975 if (k < 0)
976 k += nblock;
977 c1 = block[k];
978 if (!bigDone[c1])
979 ptr[copyEnd[c1]--] = k;
980 }
981 }
982
983 AssertH((copyStart[ss]-1 == copyEnd[ss])
984 ||
985 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
986 * Necessity for this case is demonstrated by compressing
987 * a sequence of approximately 48.5 million of character
988 * 251; 1.0.0/1.0.1 will then die here. */
989 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
990 1007)
991
992 for (j = 0; j <= 255; j++)
993 ftab[(j << 8) + ss] |= SETMASK;
994
995 /*
996 * Step 3:
997 * The [ss] big bucket is now done. Record this fact,
998 * and update the quadrant descriptors. Remember to
999 * update quadrants in the overshoot area too, if
1000 * necessary. The "if (i < 255)" test merely skips
1001 * this updating for the last bucket processed, since
1002 * updating for the last bucket is pointless.
1003 *
1004 * The quadrant array provides a way to incrementally
1005 * cache sort orderings, as they appear, so as to
1006 * make subsequent comparisons in fullGtU() complete
1007 * faster. For repetitive blocks this makes a big
1008 * difference (but not big enough to be able to avoid
1009 * the fallback sorting mechanism, exponential radix sort).
1010 *
1011 * The precise meaning is: at all times:
1012 *
1013 * for 0 <= i < nblock and 0 <= j <= nblock
1014 *
1015 * if block[i] != block[j],
1016 *
1017 * then the relative values of quadrant[i] and
1018 * quadrant[j] are meaningless.
1019 *
1020 * else {
1021 * if quadrant[i] < quadrant[j]
1022 * then the string starting at i lexicographically
1023 * precedes the string starting at j
1024 *
1025 * else if quadrant[i] > quadrant[j]
1026 * then the string starting at j lexicographically
1027 * precedes the string starting at i
1028 *
1029 * else
1030 * the relative ordering of the strings starting
1031 * at i and j has not yet been determined.
1032 * }
1033 */
1034 bigDone[ss] = True;
1035
1036 if (i < 255) {
1037 int32_t bbStart = ftab[ss << 8] & CLEARMASK;
1038 int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1039 int32_t shifts = 0;
1040
1041 while ((bbSize >> shifts) > 65534) shifts++;
1042
1043 for (j = bbSize-1; j >= 0; j--) {
1044 int32_t a2update = ptr[bbStart + j];
1045 uint16_t qVal = (uint16_t)(j >> shifts);
1046 quadrant[a2update] = qVal;
1047 if (a2update < BZ_N_OVERSHOOT)
1048 quadrant[a2update + nblock] = qVal;
1049 }
1050 AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
1051 }
1052
1053 }
1054}
1055
1056#undef BIGFREQ
1057#undef SETMASK
1058#undef CLEARMASK
1059
1060
1061/*---------------------------------------------*/
1062/* Pre:
1063 * nblock > 0
1064 * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1065 * ((UChar*)arr2)[0 .. nblock-1] holds block
1066 * arr1 exists for [0 .. nblock-1]
1067 *
1068 * Post:
1069 * ((UChar*)arr2) [0 .. nblock-1] holds block
1070 * All other areas of block destroyed
1071 * ftab[0 .. 65536] destroyed
1072 * arr1[0 .. nblock-1] holds sorted order
1073 */
1074static NOINLINE
1075void BZ2_blockSort(EState* s)
1076{
1077 /* In original bzip2 1.0.4, it's a parameter, but 30
1078 * should work ok. */
1079 enum { wfact = 30 };
1080
1081 uint32_t* ptr = s->ptr;
1082 UChar* block = s->block;
1083 uint32_t* ftab = s->ftab;
1084 int32_t nblock = s->nblock;
1085 uint16_t* quadrant;
1086 int32_t budget;
1087 int32_t i;
1088
1089 if (nblock < 10000) {
1090 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1091 } else {
1092 /* Calculate the location for quadrant, remembering to get
1093 * the alignment right. Assumes that &(block[0]) is at least
1094 * 2-byte aligned -- this should be ok since block is really
1095 * the first section of arr2.
1096 */
1097 i = nblock + BZ_N_OVERSHOOT;
1098 if (i & 1) i++;
1099 quadrant = (uint16_t*)(&(block[i]));
1100
1101 /* (wfact-1) / 3 puts the default-factor-30
1102 * transition point at very roughly the same place as
1103 * with v0.1 and v0.9.0.
1104 * Not that it particularly matters any more, since the
1105 * resulting compressed stream is now the same regardless
1106 * of whether or not we use the main sort or fallback sort.
1107 */
1108 budget = nblock * ((wfact-1) / 3);
1109
1110 mainSort(ptr, block, quadrant, ftab, nblock, &budget);
1111 if (budget < 0) {
1112 fallbackSort(s->arr1, s->arr2, ftab, nblock);
1113 }
1114 }
1115
1116 s->origPtr = -1;
1117 for (i = 0; i < s->nblock; i++)
1118 if (ptr[i] == 0) {
1119 s->origPtr = i; break;
1120 };
1121
1122 AssertH(s->origPtr != -1, 1003);
1123}
1124
1125
1126/*-------------------------------------------------------------*/
1127/*--- end blocksort.c ---*/
1128/*-------------------------------------------------------------*/