aboutsummaryrefslogtreecommitdiff
path: root/blocksort.c
diff options
context:
space:
mode:
Diffstat (limited to 'blocksort.c')
-rw-r--r--blocksort.c1242
1 files changed, 799 insertions, 443 deletions
diff --git a/blocksort.c b/blocksort.c
index d8bb26a..85a02de 100644
--- a/blocksort.c
+++ b/blocksort.c
@@ -8,7 +8,7 @@
8 This file is a part of bzip2 and/or libbzip2, a program and 8 This file is a part of bzip2 and/or libbzip2, a program and
9 library for lossless, block-sorting data compression. 9 library for lossless, block-sorting data compression.
10 10
11 Copyright (C) 1996-1998 Julian R Seward. All rights reserved. 11 Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
12 12
13 Redistribution and use in source and binary forms, with or without 13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions 14 modification, are permitted provided that the following conditions
@@ -41,9 +41,9 @@
41 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 43
44 Julian Seward, Guildford, Surrey, UK. 44 Julian Seward, Cambridge, UK.
45 jseward@acm.org 45 jseward@acm.org
46 bzip2/libbzip2 version 0.9.0c of 18 October 1998 46 bzip2/libbzip2 version 0.9.5 of 24 May 1999
47 47
48 This program is based on (at least) the work of: 48 This program is based on (at least) the work of:
49 Mike Burrows 49 Mike Burrows
@@ -62,106 +62,404 @@
62#include "bzlib_private.h" 62#include "bzlib_private.h"
63 63
64/*---------------------------------------------*/ 64/*---------------------------------------------*/
65/*-- 65/*--- Fallback O(N log(N)^2) sorting ---*/
66 Compare two strings in block. We assume (see 66/*--- algorithm, for repetitive blocks ---*/
67 discussion above) that i1 and i2 have a max 67/*---------------------------------------------*/
68 offset of 10 on entry, and that the first 68
69 bytes of both block and quadrant have been 69/*---------------------------------------------*/
70 copied into the "overshoot area", ie 70static
71 into the subscript range 71__inline__
72 [nblock .. nblock+NUM_OVERSHOOT_BYTES-1]. 72void fallbackSimpleSort ( UInt32* fmap,
73--*/ 73 UInt32* eclass,
74static __inline__ Bool fullGtU ( UChar* block, 74 Int32 lo,
75 UInt16* quadrant, 75 Int32 hi )
76 UInt32 nblock, 76{
77 Int32* workDone, 77 Int32 i, j, tmp;
78 Int32 i1, 78 UInt32 ec_tmp;
79 Int32 i2 79
80 ) 80 if (lo == hi) return;
81
82 if (hi - lo > 3) {
83 for ( i = hi-4; i >= lo; i-- ) {
84 tmp = fmap[i];
85 ec_tmp = eclass[tmp];
86 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
87 fmap[j-4] = fmap[j];
88 fmap[j-4] = tmp;
89 }
90 }
91
92 for ( i = hi-1; i >= lo; i-- ) {
93 tmp = fmap[i];
94 ec_tmp = eclass[tmp];
95 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
96 fmap[j-1] = fmap[j];
97 fmap[j-1] = tmp;
98 }
99}
100
101
102/*---------------------------------------------*/
103#define fswap(zz1, zz2) \
104 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
105
106#define fvswap(zzp1, zzp2, zzn) \
107{ \
108 Int32 yyp1 = (zzp1); \
109 Int32 yyp2 = (zzp2); \
110 Int32 yyn = (zzn); \
111 while (yyn > 0) { \
112 fswap(fmap[yyp1], fmap[yyp2]); \
113 yyp1++; yyp2++; yyn--; \
114 } \
115}
116
117
118#define fmin(a,b) ((a) < (b)) ? (a) : (b)
119
120#define fpush(lz,hz) { stackLo[sp] = lz; \
121 stackHi[sp] = hz; \
122 sp++; }
123
124#define fpop(lz,hz) { sp--; \
125 lz = stackLo[sp]; \
126 hz = stackHi[sp]; }
127
128#define FALLBACK_QSORT_SMALL_THRESH 10
129#define FALLBACK_QSORT_STACK_SIZE 100
130
131
132static
133void fallbackQSort3 ( UInt32* fmap,
134 UInt32* eclass,
135 Int32 loSt,
136 Int32 hiSt )
137{
138 Int32 unLo, unHi, ltLo, gtHi, n, m;
139 Int32 sp, lo, hi;
140 UInt32 med, r, r3;
141 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
142 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
143
144 r = 0;
145
146 sp = 0;
147 fpush ( loSt, hiSt );
148
149 while (sp > 0) {
150
151 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
152
153 fpop ( lo, hi );
154 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
155 fallbackSimpleSort ( fmap, eclass, lo, hi );
156 continue;
157 }
158
159 /* Random partitioning. Median of 3 sometimes fails to
160 avoid bad cases. Median of 9 seems to help but
161 looks rather expensive. This too seems to work but
162 is cheaper. Guidance for the magic constants
163 7621 and 32768 is taken from Sedgewick's algorithms
164 book, chapter 35.
165 */
166 r = ((r * 7621) + 1) % 32768;
167 r3 = r % 3;
168 if (r3 == 0) med = eclass[fmap[lo]]; else
169 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
170 med = eclass[fmap[hi]];
171
172 unLo = ltLo = lo;
173 unHi = gtHi = hi;
174
175 while (1) {
176 while (1) {
177 if (unLo > unHi) break;
178 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
179 if (n == 0) {
180 fswap(fmap[unLo], fmap[ltLo]);
181 ltLo++; unLo++;
182 continue;
183 };
184 if (n > 0) break;
185 unLo++;
186 }
187 while (1) {
188 if (unLo > unHi) break;
189 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
190 if (n == 0) {
191 fswap(fmap[unHi], fmap[gtHi]);
192 gtHi--; unHi--;
193 continue;
194 };
195 if (n < 0) break;
196 unHi--;
197 }
198 if (unLo > unHi) break;
199 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
200 }
201
202 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
203
204 if (gtHi < ltLo) continue;
205
206 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
207 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
208
209 n = lo + unLo - ltLo - 1;
210 m = hi - (gtHi - unHi) + 1;
211
212 if (n - lo > hi - m) {
213 fpush ( lo, n );
214 fpush ( m, hi );
215 } else {
216 fpush ( m, hi );
217 fpush ( lo, n );
218 }
219 }
220}
221
222#undef fmin
223#undef fpush
224#undef fpop
225#undef fswap
226#undef fvswap
227#undef FALLBACK_QSORT_SMALL_THRESH
228#undef FALLBACK_QSORT_STACK_SIZE
229
230
231/*---------------------------------------------*/
232/* Pre:
233 nblock > 0
234 eclass exists for [0 .. nblock-1]
235 ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block
236 ptr exists for [0 .. nblock-1]
237
238 Post:
239 ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block
240 All other areas of eclass destroyed
241 fmap [0 .. nblock-1] holds sorted order
242 bhtab [ 0 .. 2+(nblock/32) ] destroyed
243*/
244
245#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
246#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
247#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
248#define WORD_BH(zz) bhtab[(zz) >> 5]
249#define UNALIGNED_BH(zz) ((zz) & 0x01f)
250
251static
252void fallbackSort ( UInt32* fmap,
253 UInt32* eclass,
254 UInt32* bhtab,
255 Int32 nblock,
256 Int32 verb )
257{
258 Int32 ftab[257];
259 Int32 ftabCopy[256];
260 Int32 H, i, j, k, l, r, cc, cc1;
261 Int32 nNotDone;
262 Int32 nBhtab;
263 UInt16* eclass16 = (UInt16*)eclass;
264
265 /*--
266 Initial 1-char radix sort to generate
267 initial fmap and initial BH bits.
268 --*/
269 if (verb >= 4)
270 VPrintf0 ( " bucket sorting ...\n" );
271 for (i = 0; i < 257; i++) ftab[i] = 0;
272 for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++;
273 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
274 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
275
276 for (i = 0; i < nblock; i++) {
277 j = eclass16[i] >> 8;
278 k = ftab[j] - 1;
279 ftab[j] = k;
280 fmap[k] = i;
281 }
282
283 nBhtab = 2 + (nblock / 32);
284 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
285 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
286
287 /*--
288 Inductively refine the buckets. Kind-of an
289 "exponential radix sort" (!), inspired by the
290 Manber-Myers suffix array construction algorithm.
291 --*/
292
293 /*-- set sentinel bits for block-end detection --*/
294 for (i = 0; i < 32; i++) {
295 SET_BH(nblock + 2*i);
296 CLEAR_BH(nblock + 2*i + 1);
297 }
298
299 /*-- the log(N) loop --*/
300 H = 1;
301 while (1) {
302
303 if (verb >= 4)
304 VPrintf1 ( " depth %6d has ", H );
305
306 j = 0;
307 for (i = 0; i < nblock; i++) {
308 if (ISSET_BH(i)) j = i;
309 k = fmap[i] - H; if (k < 0) k += nblock;
310 eclass[k] = j;
311 }
312
313 nNotDone = 0;
314 r = -1;
315 while (1) {
316
317 /*-- find the next non-singleton bucket --*/
318 k = r + 1;
319 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
320 if (ISSET_BH(k)) {
321 while (WORD_BH(k) == 0xffffffff) k += 32;
322 while (ISSET_BH(k)) k++;
323 }
324 l = k - 1;
325 if (l >= nblock) break;
326 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
327 if (!ISSET_BH(k)) {
328 while (WORD_BH(k) == 0x00000000) k += 32;
329 while (!ISSET_BH(k)) k++;
330 }
331 r = k - 1;
332 if (r >= nblock) break;
333
334 /*-- now [l, r] bracket current bucket --*/
335 if (r > l) {
336 nNotDone += (r - l + 1);
337 fallbackQSort3 ( fmap, eclass, l, r );
338
339 /*-- scan bucket and generate header bits-- */
340 cc = -1;
341 for (i = l; i <= r; i++) {
342 cc1 = eclass[fmap[i]];
343 if (cc != cc1) { SET_BH(i); cc = cc1; };
344 }
345 }
346 }
347
348 if (verb >= 4)
349 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
350
351 H *= 2;
352 if (H > nblock || nNotDone == 0) break;
353 }
354
355 /*--
356 Reconstruct the original block in
357 eclass16 [0 .. nblock-1] [15:8], since the
358 previous phase destroyed it.
359 --*/
360 if (verb >= 4)
361 VPrintf0 ( " reconstructing block ...\n" );
362 j = 0;
363 for (i = 0; i < nblock; i++) {
364 while (ftabCopy[j] == 0) j++;
365 ftabCopy[j]--;
366 eclass16[fmap[i]] = j << 8;
367 }
368 AssertH ( j < 256, 1005 );
369}
370
371#undef SET_BH
372#undef CLEAR_BH
373#undef ISSET_BH
374#undef WORD_BH
375#undef UNALIGNED_BH
376
377
378/*---------------------------------------------*/
379/*--- The main, O(N^2 log(N)) sorting ---*/
380/*--- algorithm. Faster for "normal" ---*/
381/*--- non-repetitive blocks. ---*/
382/*---------------------------------------------*/
383
384/*---------------------------------------------*/
385static
386__inline__
387Bool mainGtU ( UInt32 i1,
388 UInt32 i2,
389 UInt16* block,
390 UInt16* quadrant,
391 UInt32 nblock,
392 Int32* budget )
81{ 393{
82 Int32 k; 394 Int32 k;
83 UChar c1, c2;
84 UInt16 s1, s2; 395 UInt16 s1, s2;
85 396
86 AssertD ( i1 != i2, "fullGtU(1)" ); 397 AssertD ( i1 != i2, "mainGtU" );
87 398
88 c1 = block[i1]; 399 s1 = block[i1]; s2 = block[i2];
89 c2 = block[i2]; 400 if (s1 != s2) return (s1 > s2);
90 if (c1 != c2) return (c1 > c2); 401 i1 += 2; i2 += 2;
91 i1++; i2++;
92 402
93 c1 = block[i1]; 403 s1 = block[i1]; s2 = block[i2];
94 c2 = block[i2]; 404 if (s1 != s2) return (s1 > s2);
95 if (c1 != c2) return (c1 > c2); 405 i1 += 2; i2 += 2;
96 i1++; i2++;
97 406
98 c1 = block[i1]; 407 s1 = block[i1]; s2 = block[i2];
99 c2 = block[i2]; 408 if (s1 != s2) return (s1 > s2);
100 if (c1 != c2) return (c1 > c2); 409 i1 += 2; i2 += 2;
101 i1++; i2++;
102 410
103 c1 = block[i1]; 411 s1 = block[i1]; s2 = block[i2];
104 c2 = block[i2]; 412 if (s1 != s2) return (s1 > s2);
105 if (c1 != c2) return (c1 > c2); 413 i1 += 2; i2 += 2;
106 i1++; i2++;
107 414
108 c1 = block[i1]; 415 s1 = block[i1]; s2 = block[i2];
109 c2 = block[i2]; 416 if (s1 != s2) return (s1 > s2);
110 if (c1 != c2) return (c1 > c2); 417 i1 += 2; i2 += 2;
111 i1++; i2++;
112 418
113 c1 = block[i1]; 419 s1 = block[i1]; s2 = block[i2];
114 c2 = block[i2]; 420 if (s1 != s2) return (s1 > s2);
115 if (c1 != c2) return (c1 > c2); 421 i1 += 2; i2 += 2;
116 i1++; i2++;
117 422
118 k = nblock; 423 k = nblock + 8;
119 424
120 do { 425 do {
121 426
122 c1 = block[i1]; 427 s1 = block[i1]; s2 = block[i2];
123 c2 = block[i2];
124 if (c1 != c2) return (c1 > c2);
125 s1 = quadrant[i1];
126 s2 = quadrant[i2];
127 if (s1 != s2) return (s1 > s2); 428 if (s1 != s2) return (s1 > s2);
128 i1++; i2++; 429 s1 = quadrant[i1]; s2 = quadrant[i2];
430 if (s1 != s2) return (s1 > s2);
431 i1 += 2; i2 += 2;
129 432
130 c1 = block[i1]; 433 s1 = block[i1]; s2 = block[i2];
131 c2 = block[i2]; 434 if (s1 != s2) return (s1 > s2);
132 if (c1 != c2) return (c1 > c2); 435 s1 = quadrant[i1]; s2 = quadrant[i2];
133 s1 = quadrant[i1];
134 s2 = quadrant[i2];
135 if (s1 != s2) return (s1 > s2); 436 if (s1 != s2) return (s1 > s2);
136 i1++; i2++; 437 i1 += 2; i2 += 2;
137 438
138 c1 = block[i1]; 439 s1 = block[i1]; s2 = block[i2];
139 c2 = block[i2];
140 if (c1 != c2) return (c1 > c2);
141 s1 = quadrant[i1];
142 s2 = quadrant[i2];
143 if (s1 != s2) return (s1 > s2); 440 if (s1 != s2) return (s1 > s2);
144 i1++; i2++; 441 s1 = quadrant[i1]; s2 = quadrant[i2];
442 if (s1 != s2) return (s1 > s2);
443 i1 += 2; i2 += 2;
145 444
146 c1 = block[i1]; 445 s1 = block[i1]; s2 = block[i2];
147 c2 = block[i2]; 446 if (s1 != s2) return (s1 > s2);
148 if (c1 != c2) return (c1 > c2); 447 s1 = quadrant[i1]; s2 = quadrant[i2];
149 s1 = quadrant[i1];
150 s2 = quadrant[i2];
151 if (s1 != s2) return (s1 > s2); 448 if (s1 != s2) return (s1 > s2);
152 i1++; i2++; 449 i1 += 2; i2 += 2;
153 450
154 if (i1 >= nblock) i1 -= nblock; 451 if (i1 >= nblock) i1 -= nblock;
155 if (i2 >= nblock) i2 -= nblock; 452 if (i2 >= nblock) i2 -= nblock;
156 453
157 k -= 4; 454 k -= 8;
158 (*workDone)++; 455 (*budget)--;
159 } 456 }
160 while (k >= 0); 457 while (k >= 0);
161 458
162 return False; 459 return False;
163} 460}
164 461
462
165/*---------------------------------------------*/ 463/*---------------------------------------------*/
166/*-- 464/*--
167 Knuth's increments seem to work better 465 Knuth's increments seem to work better
@@ -169,22 +467,22 @@ static __inline__ Bool fullGtU ( UChar* block,
169 because the number of elems to sort is 467 because the number of elems to sort is
170 usually small, typically <= 20. 468 usually small, typically <= 20.
171--*/ 469--*/
172static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 470Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
173 9841, 29524, 88573, 265720, 471 9841, 29524, 88573, 265720,
174 797161, 2391484 }; 472 797161, 2391484 };
175 473
176static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d ) 474static
475void mainSimpleSort ( UInt32* ptr,
476 UInt16* block,
477 UInt16* quadrant,
478 Int32 nblock,
479 Int32 lo,
480 Int32 hi,
481 Int32 d,
482 Int32* budget )
177{ 483{
178 Int32 i, j, h, bigN, hp; 484 Int32 i, j, h, bigN, hp;
179 Int32 v; 485 UInt32 v;
180
181 UChar* block = s->block;
182 UInt32* zptr = s->zptr;
183 UInt16* quadrant = s->quadrant;
184 Int32* workDone = &(s->workDone);
185 Int32 nblock = s->nblock;
186 Int32 workLimit = s->workLimit;
187 Bool firstAttempt = s->firstAttempt;
188 486
189 bigN = hi - lo + 1; 487 bigN = hi - lo + 1;
190 if (bigN < 2) return; 488 if (bigN < 2) return;
@@ -195,49 +493,53 @@ static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d )
195 493
196 for (; hp >= 0; hp--) { 494 for (; hp >= 0; hp--) {
197 h = incs[hp]; 495 h = incs[hp];
496
198 i = lo + h; 497 i = lo + h;
199 while (True) { 498 while (True) {
200 499
201 /*-- copy 1 --*/ 500 /*-- copy 1 --*/
202 if (i > hi) break; 501 if (i > hi) break;
203 v = zptr[i]; 502 v = ptr[i];
204 j = i; 503 j = i;
205 while ( fullGtU ( block, quadrant, nblock, workDone, 504 while ( mainGtU (
206 zptr[j-h]+d, v+d ) ) { 505 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
207 zptr[j] = zptr[j-h]; 506 ) ) {
507 ptr[j] = ptr[j-h];
208 j = j - h; 508 j = j - h;
209 if (j <= (lo + h - 1)) break; 509 if (j <= (lo + h - 1)) break;
210 } 510 }
211 zptr[j] = v; 511 ptr[j] = v;
212 i++; 512 i++;
213 513
214 /*-- copy 2 --*/ 514 /*-- copy 2 --*/
215 if (i > hi) break; 515 if (i > hi) break;
216 v = zptr[i]; 516 v = ptr[i];
217 j = i; 517 j = i;
218 while ( fullGtU ( block, quadrant, nblock, workDone, 518 while ( mainGtU (
219 zptr[j-h]+d, v+d ) ) { 519 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
220 zptr[j] = zptr[j-h]; 520 ) ) {
521 ptr[j] = ptr[j-h];
221 j = j - h; 522 j = j - h;
222 if (j <= (lo + h - 1)) break; 523 if (j <= (lo + h - 1)) break;
223 } 524 }
224 zptr[j] = v; 525 ptr[j] = v;
225 i++; 526 i++;
226 527
227 /*-- copy 3 --*/ 528 /*-- copy 3 --*/
228 if (i > hi) break; 529 if (i > hi) break;
229 v = zptr[i]; 530 v = ptr[i];
230 j = i; 531 j = i;
231 while ( fullGtU ( block, quadrant, nblock, workDone, 532 while ( mainGtU (
232 zptr[j-h]+d, v+d ) ) { 533 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
233 zptr[j] = zptr[j-h]; 534 ) ) {
535 ptr[j] = ptr[j-h];
234 j = j - h; 536 j = j - h;
235 if (j <= (lo + h - 1)) break; 537 if (j <= (lo + h - 1)) break;
236 } 538 }
237 zptr[j] = v; 539 ptr[j] = v;
238 i++; 540 i++;
239 541
240 if (*workDone > workLimit && firstAttempt) return; 542 if (*budget < 0) return;
241 } 543 }
242 } 544 }
243} 545}
@@ -252,20 +554,26 @@ static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d )
252 Sedgewick and Jon L. Bentley. 554 Sedgewick and Jon L. Bentley.
253--*/ 555--*/
254 556
255#define swap(lv1, lv2) \ 557#define mswap(zz1, zz2) \
256 { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; } 558 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
257 559
258static void vswap ( UInt32* zptr, Int32 p1, Int32 p2, Int32 n ) 560#define mvswap(zzp1, zzp2, zzn) \
259{ 561{ \
260 while (n > 0) { 562 Int32 yyp1 = (zzp1); \
261 swap(zptr[p1], zptr[p2]); 563 Int32 yyp2 = (zzp2); \
262 p1++; p2++; n--; 564 Int32 yyn = (zzn); \
263 } 565 while (yyn > 0) { \
566 mswap(ptr[yyp1], ptr[yyp2]); \
567 yyp1++; yyp2++; yyn--; \
568 } \
264} 569}
265 570
266static UChar med3 ( UChar a, UChar b, UChar c ) 571
572static
573__inline__
574UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c )
267{ 575{
268 UChar t; 576 UInt16 t;
269 if (a > b) { t = a; a = b; b = t; }; 577 if (a > b) { t = a; a = b; b = t; };
270 if (b > c) { t = b; b = c; c = t; }; 578 if (b > c) { t = b; b = c; c = t; };
271 if (a > b) b = a; 579 if (a > b) b = a;
@@ -273,66 +581,72 @@ static UChar med3 ( UChar a, UChar b, UChar c )
273} 581}
274 582
275 583
276#define min(a,b) ((a) < (b)) ? (a) : (b) 584#define mmin(a,b) ((a) < (b)) ? (a) : (b)
277 585
278typedef 586#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
279 struct { Int32 ll; Int32 hh; Int32 dd; } 587 stackHi[sp] = hz; \
280 StackElem; 588 stackD [sp] = dz; \
589 sp++; }
281 590
282#define push(lz,hz,dz) { stack[sp].ll = lz; \ 591#define mpop(lz,hz,dz) { sp--; \
283 stack[sp].hh = hz; \ 592 lz = stackLo[sp]; \
284 stack[sp].dd = dz; \ 593 hz = stackHi[sp]; \
285 sp++; } 594 dz = stackD [sp]; }
286 595
287#define pop(lz,hz,dz) { sp--; \
288 lz = stack[sp].ll; \
289 hz = stack[sp].hh; \
290 dz = stack[sp].dd; }
291 596
292#define SMALL_THRESH 20 597#define mnextsize(az) (nextHi[az]-nextLo[az])
293#define DEPTH_THRESH 10 598
599#define mnextswap(az,bz) \
600 { Int32 tz; \
601 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
602 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
603 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
294 604
295/*--
296 If you are ever unlucky/improbable enough
297 to get a stack overflow whilst sorting,
298 increase the following constant and try
299 again. In practice I have never seen the
300 stack go above 27 elems, so the following
301 limit seems very generous.
302--*/
303#define QSORT_STACK_SIZE 1000
304 605
606#define MAIN_QSORT_SMALL_THRESH 20
607#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
608#define MAIN_QSORT_STACK_SIZE 100
305 609
306static void qSort3 ( EState* s, Int32 loSt, Int32 hiSt, Int32 dSt ) 610static
611void mainQSort3 ( UInt32* ptr,
612 UInt16* block,
613 UInt16* quadrant,
614 Int32 nblock,
615 Int32 loSt,
616 Int32 hiSt,
617 Int32 dSt,
618 Int32* budget )
307{ 619{
308 Int32 unLo, unHi, ltLo, gtHi, med, n, m; 620 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
309 Int32 sp, lo, hi, d; 621 Int32 sp, lo, hi, d;
310 StackElem stack[QSORT_STACK_SIZE];
311 622
312 UChar* block = s->block; 623 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
313 UInt32* zptr = s->zptr; 624 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
314 Int32* workDone = &(s->workDone); 625 Int32 stackD [MAIN_QSORT_STACK_SIZE];
315 Int32 workLimit = s->workLimit; 626
316 Bool firstAttempt = s->firstAttempt; 627 Int32 nextLo[3];
628 Int32 nextHi[3];
629 Int32 nextD [3];
317 630
318 sp = 0; 631 sp = 0;
319 push ( loSt, hiSt, dSt ); 632 mpush ( loSt, hiSt, dSt );
320 633
321 while (sp > 0) { 634 while (sp > 0) {
322 635
323 AssertH ( sp < QSORT_STACK_SIZE, 1001 ); 636 AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
324 637
325 pop ( lo, hi, d ); 638 mpop ( lo, hi, d );
326 639 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
327 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { 640 d > MAIN_QSORT_DEPTH_THRESH) {
328 simpleSort ( s, lo, hi, d ); 641 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
329 if (*workDone > workLimit && firstAttempt) return; 642 if (*budget < 0) return;
330 continue; 643 continue;
331 } 644 }
332 645
333 med = med3 ( block[zptr[ lo ]+d], 646 med = (Int32)
334 block[zptr[ hi ]+d], 647 mmed3 ( block[ptr[ lo ]+d],
335 block[zptr[ (lo+hi)>>1 ]+d] ); 648 block[ptr[ hi ]+d],
649 block[ptr[ (lo+hi)>>1 ]+d] );
336 650
337 unLo = ltLo = lo; 651 unLo = ltLo = lo;
338 unHi = gtHi = hi; 652 unHi = gtHi = hi;
@@ -340,370 +654,412 @@ static void qSort3 ( EState* s, Int32 loSt, Int32 hiSt, Int32 dSt )
340 while (True) { 654 while (True) {
341 while (True) { 655 while (True) {
342 if (unLo > unHi) break; 656 if (unLo > unHi) break;
343 n = ((Int32)block[zptr[unLo]+d]) - med; 657 n = ((Int32)block[ptr[unLo]+d]) - med;
344 if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; }; 658 if (n == 0) {
659 mswap(ptr[unLo], ptr[ltLo]);
660 ltLo++; unLo++; continue;
661 };
345 if (n > 0) break; 662 if (n > 0) break;
346 unLo++; 663 unLo++;
347 } 664 }
348 while (True) { 665 while (True) {
349 if (unLo > unHi) break; 666 if (unLo > unHi) break;
350 n = ((Int32)block[zptr[unHi]+d]) - med; 667 n = ((Int32)block[ptr[unHi]+d]) - med;
351 if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; }; 668 if (n == 0) {
669 mswap(ptr[unHi], ptr[gtHi]);
670 gtHi--; unHi--; continue;
671 };
352 if (n < 0) break; 672 if (n < 0) break;
353 unHi--; 673 unHi--;
354 } 674 }
355 if (unLo > unHi) break; 675 if (unLo > unHi) break;
356 swap(zptr[unLo], zptr[unHi]); unLo++; unHi--; 676 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
357 } 677 }
358 678
359 AssertD ( unHi == unLo-1, "bad termination in qSort3" ); 679 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
360 680
361 if (gtHi < ltLo) { 681 if (gtHi < ltLo) {
362 push(lo, hi, d+1 ); 682 mpush(lo, hi, d+2 );
363 continue; 683 continue;
364 } 684 }
365 685
366 n = min(ltLo-lo, unLo-ltLo); vswap(zptr, lo, unLo-n, n); 686 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
367 m = min(hi-gtHi, gtHi-unHi); vswap(zptr, unLo, hi-m+1, m); 687 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
368 688
369 n = lo + unLo - ltLo - 1; 689 n = lo + unLo - ltLo - 1;
370 m = hi - (gtHi - unHi) + 1; 690 m = hi - (gtHi - unHi) + 1;
371 691
372 push ( lo, n, d ); 692 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
373 push ( n+1, m-1, d+1 ); 693 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
374 push ( m, hi, d ); 694 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2;
695
696 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
697 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
698 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
699
700 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
701 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
702
703 mpush (nextLo[0], nextHi[0], nextD[0]);
704 mpush (nextLo[1], nextHi[1], nextD[1]);
705 mpush (nextLo[2], nextHi[2], nextD[2]);
375 } 706 }
376} 707}
377 708
709#undef mswap
710#undef mvswap
711#undef mpush
712#undef mpop
713#undef mmin
714#undef mnextsize
715#undef mnextswap
716#undef MAIN_QSORT_SMALL_THRESH
717#undef MAIN_QSORT_DEPTH_THRESH
718#undef MAIN_QSORT_STACK_SIZE
719
378 720
379/*---------------------------------------------*/ 721/*---------------------------------------------*/
722/* Pre:
723 nblock > N_OVERSHOOT
724 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
725 ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block
726 ptr exists for [0 .. nblock-1]
727
728 Post:
729 ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block
730 All other areas of block32 destroyed
731 ftab [0 .. 65536 ] destroyed
732 ptr [0 .. nblock-1] holds sorted order
733 if (*budget < 0), sorting was abandoned
734*/
380 735
381#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 736#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
382
383#define SETMASK (1 << 21) 737#define SETMASK (1 << 21)
384#define CLEARMASK (~(SETMASK)) 738#define CLEARMASK (~(SETMASK))
385 739
386static void sortMain ( EState* s ) 740static
741void mainSort ( UInt32* ptr,
742 UInt16* block,
743 UInt16* quadrant,
744 UInt32* ftab,
745 Int32 nblock,
746 Int32 verb,
747 Int32* budget )
387{ 748{
388 Int32 i, j, k, ss, sb; 749 Int32 i, j, k, m, ss, sb;
389 Int32 runningOrder[256]; 750 Int32 runningOrder[256];
390 Int32 copy[256]; 751 Int32 copy[256];
391 Bool bigDone[256]; 752 Bool bigDone[256];
392 UChar c1, c2; 753 UChar c1;
393 Int32 numQSorted; 754 Int32 numQSorted;
394 755 Int32 biggestSoFar;
395 UChar* block = s->block; 756 UInt16 s;
396 UInt32* zptr = s->zptr; 757
397 UInt16* quadrant = s->quadrant; 758 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
398 Int32* ftab = s->ftab; 759
399 Int32* workDone = &(s->workDone); 760 /*-- Stripe the block data into 16 bits, and at the
400 Int32 nblock = s->nblock; 761 same time set up the 2-byte frequency table
401 Int32 workLimit = s->workLimit;
402 Bool firstAttempt = s->firstAttempt;
403
404 /*--
405 In the various block-sized structures, live data runs
406 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
407 set up the overshoot area for block.
408 --*/ 762 --*/
763 for (i = 65536; i >= 0; i--) ftab[i] = 0;
764
765 s = block[0];
766 for (i = 1; i < nblock; i++) {
767 quadrant[i] = 0;
768 s = (s << 8) | block[i];
769 block[i-1] = s;
770 ftab[s]++;
771 }
772 quadrant[0] = 0;
773 s = (s << 8) | (block[0] >> 8);
774 block[nblock-1] = s;
775 ftab[s]++;
776
777 /*-- (emphasises close relationship of block & quadrant) --*/
778 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
779 block [nblock+i] = block[i];
780 quadrant[nblock+i] = 0;
781 }
409 782
410 if (s->verbosity >= 4) 783 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
411 VPrintf0( " sort initialise ...\n" );
412
413 for (i = 0; i < BZ_NUM_OVERSHOOT_BYTES; i++)
414 block[nblock+i] = block[i % nblock];
415 for (i = 0; i < nblock+BZ_NUM_OVERSHOOT_BYTES; i++)
416 quadrant[i] = 0;
417
418
419 if (nblock <= 4000) {
420
421 /*--
422 Use simpleSort(), since the full sorting mechanism
423 has quite a large constant overhead.
424 --*/
425 if (s->verbosity >= 4) VPrintf0( " simpleSort ...\n" );
426 for (i = 0; i < nblock; i++) zptr[i] = i;
427 firstAttempt = False;
428 *workDone = workLimit = 0;
429 simpleSort ( s, 0, nblock-1, 0 );
430 if (s->verbosity >= 4) VPrintf0( " simpleSort done.\n" );
431 784
432 } else { 785 /*-- Complete the initial radix sort --*/
786 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
433 787
434 numQSorted = 0; 788 for (i = 0; i < nblock; i++) {
435 for (i = 0; i <= 255; i++) bigDone[i] = False; 789 s = block[i];
790 j = ftab[s] - 1;
791 ftab[s] = j;
792 ptr[j] = i;
793 }
436 794
437 if (s->verbosity >= 4) VPrintf0( " bucket sorting ...\n" ); 795 /*--
796 Now ftab contains the first loc of every small bucket.
797 Calculate the running order, from smallest to largest
798 big bucket.
799 --*/
800 for (i = 0; i <= 255; i++) {
801 bigDone [i] = False;
802 runningOrder[i] = i;
803 }
438 804
439 for (i = 0; i <= 65536; i++) ftab[i] = 0; 805 {
806 Int32 vv;
807 Int32 h = 1;
808 do h = 3 * h + 1; while (h <= 256);
809 do {
810 h = h / 3;
811 for (i = h; i <= 255; i++) {
812 vv = runningOrder[i];
813 j = i;
814 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
815 runningOrder[j] = runningOrder[j-h];
816 j = j - h;
817 if (j <= (h - 1)) goto zero;
818 }
819 zero:
820 runningOrder[j] = vv;
821 }
822 } while (h != 1);
823 }
440 824
441 c1 = block[nblock-1]; 825 /*--
442 for (i = 0; i < nblock; i++) { 826 The main sorting loop.
443 c2 = block[i]; 827 --*/
444 ftab[(c1 << 8) + c2]++;
445 c1 = c2;
446 }
447 828
448 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 829 biggestSoFar = numQSorted = 0;
449 830
450 c1 = block[0]; 831 for (i = 0; i <= 255; i++) {
451 for (i = 0; i < nblock-1; i++) {
452 c2 = block[i+1];
453 j = (c1 << 8) + c2;
454 c1 = c2;
455 ftab[j]--;
456 zptr[ftab[j]] = i;
457 }
458 j = (block[nblock-1] << 8) + block[0];
459 ftab[j]--;
460 zptr[ftab[j]] = nblock-1;
461 832
462 /*-- 833 /*--
463 Now ftab contains the first loc of every small bucket. 834 Process big buckets, starting with the least full.
464 Calculate the running order, from smallest to largest 835 Basically this is a 4-step process in which we call
465 big bucket. 836 mainQSort3 to sort the small buckets [ss, j], but
837 also make a big effort to avoid the calls if we can.
466 --*/ 838 --*/
839 ss = runningOrder[i];
467 840
468 for (i = 0; i <= 255; i++) runningOrder[i] = i; 841 /*--
469 842 Step 1:
470 { 843 Complete the big bucket [ss] by quicksorting
471 Int32 vv; 844 any unsorted small buckets [ss, j], for j != ss.
472 Int32 h = 1; 845 Hopefully previous pointer-scanning phases have already
473 do h = 3 * h + 1; while (h <= 256); 846 completed many of the small buckets [ss, j], so
474 do { 847 we don't have to sort them at all.
475 h = h / 3; 848 --*/
476 for (i = h; i <= 255; i++) { 849 for (j = 0; j <= 255; j++) {
477 vv = runningOrder[i]; 850 if (j != ss) {
478 j = i; 851 sb = (ss << 8) + j;
479 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 852 if ( ! (ftab[sb] & SETMASK) ) {
480 runningOrder[j] = runningOrder[j-h]; 853 Int32 lo = ftab[sb] & CLEARMASK;
481 j = j - h; 854 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
482 if (j <= (h - 1)) goto zero; 855 if (hi > lo) {
856 if (verb >= 4)
857 VPrintf4 ( " qsort [0x%x, 0x%x] "
858 "done %d this %d\n",
859 ss, j, numQSorted, hi - lo + 1 );
860 mainQSort3 (
861 ptr, block, quadrant, nblock,
862 lo, hi, BZ_N_RADIX, budget
863 );
864 numQSorted += (hi - lo + 1);
865 if (*budget < 0) return;
483 } 866 }
484 zero:
485 runningOrder[j] = vv;
486 } 867 }
487 } while (h != 1); 868 ftab[sb] |= SETMASK;
869 }
488 } 870 }
489 871
490 /*-- 872 /*--
491 The main sorting loop. 873 Step 2:
874 Deal specially with case [ss, ss]. This establishes the
875 sorted order for [ss, ss] without any comparisons.
876 A clever trick, cryptically described as steps Q6b and Q6c
877 in SRC-124 (aka BW94). Compared to bzip2, this makes it
878 practical not to use a preliminary run-length coder.
492 --*/ 879 --*/
880 {
881 Int32 put0, get0, put1, get1;
882 Int32 sbn = (ss << 8) + ss;
883 Int32 lo = ftab[sbn] & CLEARMASK;
884 Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
885 UChar ssc = (UChar)ss;
886 put0 = lo;
887 get0 = ftab[ss << 8] & CLEARMASK;
888 put1 = hi;
889 get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
890 while (get0 < put0) {
891 j = ptr[get0]-1; if (j < 0) j += nblock;
892 c1 = (UChar)(block[j] >> 8);
893 if (c1 == ssc) { ptr[put0] = j; put0++; };
894 get0++;
895 }
896 while (get1 > put1) {
897 j = ptr[get1]-1; if (j < 0) j += nblock;
898 c1 = (UChar)(block[j] >> 8);
899 if (c1 == ssc) { ptr[put1] = j; put1--; };
900 get1--;
901 }
902 ftab[sbn] |= SETMASK;
903 }
493 904
494 for (i = 0; i <= 255; i++) { 905 /*--
495 906 Step 3:
496 /*-- 907 The [ss] big bucket is now done. Record this fact,
497 Process big buckets, starting with the least full. 908 and update the quadrant descriptors. Remember to
498 Basically this is a 4-step process in which we call 909 update quadrants in the overshoot area too, if
499 qSort3 to sort the small buckets [ss, j], but 910 necessary. The "if (i < 255)" test merely skips
500 also make a big effort to avoid the calls if we can. 911 this updating for the last bucket processed, since
501 --*/ 912 updating for the last bucket is pointless.
502 ss = runningOrder[i]; 913
503 914 The quadrant array provides a way to incrementally
504 /*-- 915 cache sort orderings, as they appear, so as to
505 Step 1: 916 make subsequent comparisons in fullGtU() complete
506 Complete the big bucket [ss] by quicksorting 917 faster. For repetitive blocks this makes a big
507 any unsorted small buckets [ss, j], for j != ss. 918 difference (but not big enough to be able to avoid
508 Hopefully previous pointer-scanning phases have already 919 the fallback sorting mechanism, exponential radix sort).
509 completed many of the small buckets [ss, j], so 920
510 we don't have to sort them at all. 921 The precise meaning is: at all times:
511 --*/ 922
512 for (j = 0; j <= 255; j++) { 923 for 0 <= i < nblock and 0 <= j <= nblock
513 if (j != ss) { 924
514 sb = (ss << 8) + j; 925 if block[i] != block[j],
515 if ( ! (ftab[sb] & SETMASK) ) { 926
516 Int32 lo = ftab[sb] & CLEARMASK; 927 then the relative values of quadrant[i] and
517 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 928 quadrant[j] are meaningless.
518 if (hi > lo) { 929
519 if (s->verbosity >= 4) 930 else {
520 VPrintf4( " qsort [0x%x, 0x%x] done %d this %d\n", 931 if quadrant[i] < quadrant[j]
521 ss, j, numQSorted, hi - lo + 1 ); 932 then the string starting at i lexicographically
522 qSort3 ( s, lo, hi, 2 ); 933 precedes the string starting at j
523 numQSorted += ( hi - lo + 1 ); 934
524 if (*workDone > workLimit && firstAttempt) return; 935 else if quadrant[i] > quadrant[j]
525 } 936 then the string starting at j lexicographically
937 precedes the string starting at i
938
939 else
940 the relative ordering of the strings starting
941 at i and j has not yet been determined.
526 } 942 }
527 ftab[sb] |= SETMASK; 943 --*/
528 } 944 bigDone[ss] = True;
529 }
530 945
531 /*-- 946 if (i < 255) {
532 Step 2: 947 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
533 Deal specially with case [ss, ss]. This establishes the 948 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
534 sorted order for [ss, ss] without any comparisons. 949 Int32 shifts = 0;
535 A clever trick, cryptically described as steps Q6b and Q6c
536 in SRC-124 (aka BW94). This makes it entirely practical to
537 not use a preliminary run-length coder, but unfortunately
538 we are now stuck with the .bz2 file format.
539 --*/
540 {
541 Int32 put0, get0, put1, get1;
542 Int32 sbn = (ss << 8) + ss;
543 Int32 lo = ftab[sbn] & CLEARMASK;
544 Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
545 UChar ssc = (UChar)ss;
546 put0 = lo;
547 get0 = ftab[ss << 8] & CLEARMASK;
548 put1 = hi;
549 get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
550 while (get0 < put0) {
551 j = zptr[get0]-1; if (j < 0) j += nblock;
552 c1 = block[j];
553 if (c1 == ssc) { zptr[put0] = j; put0++; };
554 get0++;
555 }
556 while (get1 > put1) {
557 j = zptr[get1]-1; if (j < 0) j += nblock;
558 c1 = block[j];
559 if (c1 == ssc) { zptr[put1] = j; put1--; };
560 get1--;
561 }
562 ftab[sbn] |= SETMASK;
563 }
564 950
565 /*-- 951 while ((bbSize >> shifts) > 65534) shifts++;
566 Step 3:
567 The [ss] big bucket is now done. Record this fact,
568 and update the quadrant descriptors. Remember to
569 update quadrants in the overshoot area too, if
570 necessary. The "if (i < 255)" test merely skips
571 this updating for the last bucket processed, since
572 updating for the last bucket is pointless.
573
574 The quadrant array provides a way to incrementally
575 cache sort orderings, as they appear, so as to
576 make subsequent comparisons in fullGtU() complete
577 faster. For repetitive blocks this makes a big
578 difference (but not big enough to be able to avoid
579 randomisation for very repetitive data.)
580
581 The precise meaning is: at all times:
582
583 for 0 <= i < nblock and 0 <= j <= nblock
584
585 if block[i] != block[j],
586
587 then the relative values of quadrant[i] and
588 quadrant[j] are meaningless.
589
590 else {
591 if quadrant[i] < quadrant[j]
592 then the string starting at i lexicographically
593 precedes the string starting at j
594
595 else if quadrant[i] > quadrant[j]
596 then the string starting at j lexicographically
597 precedes the string starting at i
598
599 else
600 the relative ordering of the strings starting
601 at i and j has not yet been determined.
602 }
603 --*/
604 bigDone[ss] = True;
605
606 if (i < 255) {
607 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
608 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
609 Int32 shifts = 0;
610
611 while ((bbSize >> shifts) > 65534) shifts++;
612
613 for (j = 0; j < bbSize; j++) {
614 Int32 a2update = zptr[bbStart + j];
615 UInt16 qVal = (UInt16)(j >> shifts);
616 quadrant[a2update] = qVal;
617 if (a2update < BZ_NUM_OVERSHOOT_BYTES)
618 quadrant[a2update + nblock] = qVal;
619 }
620 952
621 AssertH ( ( ((bbSize-1) >> shifts) <= 65535 ), 1002 ); 953 for (j = 0; j < bbSize; j++) {
954 Int32 a2update = ptr[bbStart + j];
955 UInt16 qVal = (UInt16)(j >> shifts);
956 quadrant[a2update] = qVal;
957 if (a2update < BZ_N_OVERSHOOT)
958 quadrant[a2update + nblock] = qVal;
622 } 959 }
960 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
961 }
623 962
624 /*-- 963 /*--
625 Step 4: 964 Step 4:
626 Now scan this big bucket [ss] so as to synthesise the 965 Now scan this big bucket [ss] so as to synthesise the
627 sorted order for small buckets [t, ss] for all t != ss. 966 sorted order for small buckets [t, ss] for all t != ss.
628 This will avoid doing Real Work in subsequent Step 1's. 967 This will avoid doing Real Work in subsequent Step 1's.
629 --*/ 968 --*/
630 for (j = 0; j <= 255; j++) 969 for (j = 0; j <= 255; j++)
631 copy[j] = ftab[(j << 8) + ss] & CLEARMASK; 970 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
632 971
633 for (j = ftab[ss << 8] & CLEARMASK; 972 m = ftab[(ss+1) << 8] & CLEARMASK;
634 j < (ftab[(ss+1) << 8] & CLEARMASK); 973 for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) {
635 j++) { 974 k = ptr[j] - 1; if (k < 0) k += nblock;
636 k = zptr[j]-1; if (k < 0) k += nblock; 975 c1 = (UChar)(block[k] >> 8);
637 c1 = block[k]; 976 if ( ! bigDone[c1] ) {
638 if ( ! bigDone[c1] ) { 977 ptr[copy[c1]] = k;
639 zptr[copy[c1]] = k; 978 copy[c1] ++;
640 copy[c1] ++;
641 }
642 } 979 }
643
644 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
645 } 980 }
646 if (s->verbosity >= 4)
647 VPrintf3( " %d pointers, %d sorted, %d scanned\n",
648 nblock, numQSorted, nblock - numQSorted );
649 }
650}
651
652 981
653/*---------------------------------------------*/ 982 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
654static void randomiseBlock ( EState* s )
655{
656 Int32 i;
657 BZ_RAND_INIT_MASK;
658 for (i = 0; i < 256; i++) s->inUse[i] = False;
659
660 for (i = 0; i < s->nblock; i++) {
661 BZ_RAND_UPD_MASK;
662 s->block[i] ^= BZ_RAND_MASK;
663 s->inUse[s->block[i]] = True;
664 } 983 }
984
985 if (verb >= 4)
986 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
987 nblock, numQSorted, nblock - numQSorted );
665} 988}
666 989
990#undef BIGFREQ
991#undef SETMASK
992#undef CLEARMASK
993
667 994
668/*---------------------------------------------*/ 995/*---------------------------------------------*/
996/* Pre:
997 nblock > 0
998 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
999 ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block
1000 arr1 exists for [0 .. nblock-1]
1001
1002 Post:
1003 ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block
1004 All other areas of block destroyed
1005 ftab [ 0 .. 65536 ] destroyed
1006 arr1 [0 .. nblock-1] holds sorted order
1007*/
669void blockSort ( EState* s ) 1008void blockSort ( EState* s )
670{ 1009{
671 Int32 i; 1010 UInt32* ptr = s->ptr;
672 1011 UInt16* block = s->block;
673 s->workLimit = s->workFactor * (s->nblock - 1); 1012 UInt32* ftab = s->ftab;
674 s->workDone = 0; 1013 Int32 nblock = s->nblock;
675 s->blockRandomised = False; 1014 Int32 verb = s->verbosity;
676 s->firstAttempt = True; 1015 Int32 wfact = s->workFactor;
677 1016 UInt16* quadrant;
678 sortMain ( s ); 1017 Int32 budget;
679 1018 Int32 budgetInit;
680 if (s->verbosity >= 3) 1019 Int32 i;
681 VPrintf3( " %d work, %d block, ratio %5.2f\n", 1020
682 s->workDone, s->nblock-1, 1021 if (nblock < 10000) {
683 (float)(s->workDone) / (float)(s->nblock-1) ); 1022 for (i = 0; i < nblock; i++) block[i] <<= 8;
684 1023 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
685 if (s->workDone > s->workLimit && s->firstAttempt) { 1024 } else {
686 if (s->verbosity >= 2) 1025 quadrant = &(block[nblock+BZ_N_OVERSHOOT]);
687 VPrintf0( " sorting aborted; randomising block\n" ); 1026
688 randomiseBlock ( s ); 1027 /* (wfact-1) / 3 puts the default-factor-30
689 s->workLimit = s->workDone = 0; 1028 transition point at very roughly the same place as
690 s->blockRandomised = True; 1029 with v0.1 and v0.9.0.
691 s->firstAttempt = False; 1030 Not that it particularly matters any more, since the
692 sortMain ( s ); 1031 resulting compressed stream is now the same regardless
693 if (s->verbosity >= 3) 1032 of whether or not we use the main sort or fallback sort.
694 VPrintf3( " %d work, %d block, ratio %f\n", 1033 */
695 s->workDone, s->nblock-1, 1034 if (wfact < 1 ) wfact = 1;
696 (float)(s->workDone) / (float)(s->nblock-1) ); 1035 if (wfact > 100) wfact = 100;
1036 budgetInit = nblock * ((wfact-1) / 3);
1037 budget = budgetInit;
1038
1039 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1040 if (verb >= 3)
1041 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1042 budgetInit - budget,
1043 nblock,
1044 (float)(budgetInit - budget) /
1045 (float)(nblock==0 ? 1 : nblock) );
1046 if (budget < 0) {
1047 if (verb >= 2)
1048 VPrintf0 ( " too repetitive; using fallback"
1049 " sorting algorithm\n" );
1050 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1051 }
697 } 1052 }
698 1053
699 s->origPtr = -1; 1054 s->origPtr = -1;
700 for (i = 0; i < s->nblock; i++) 1055 for (i = 0; i < s->nblock; i++)
701 if (s->zptr[i] == 0) 1056 if (ptr[i] == 0)
702 { s->origPtr = i; break; }; 1057 { s->origPtr = i; break; };
703 1058
704 AssertH( s->origPtr != -1, 1003 ); 1059 AssertH( s->origPtr != -1, 1003 );
705} 1060}
706 1061
1062
707/*-------------------------------------------------------------*/ 1063/*-------------------------------------------------------------*/
708/*--- end blocksort.c ---*/ 1064/*--- end blocksort.c ---*/
709/*-------------------------------------------------------------*/ 1065/*-------------------------------------------------------------*/