diff options
Diffstat (limited to 'blocksort.c')
-rw-r--r-- | blocksort.c | 709 |
1 files changed, 709 insertions, 0 deletions
diff --git a/blocksort.c b/blocksort.c new file mode 100644 index 0000000..d8bb26a --- /dev/null +++ b/blocksort.c | |||
@@ -0,0 +1,709 @@ | |||
1 | |||
2 | /*-------------------------------------------------------------*/ | ||
3 | /*--- Block sorting machinery ---*/ | ||
4 | /*--- blocksort.c ---*/ | ||
5 | /*-------------------------------------------------------------*/ | ||
6 | |||
7 | /*-- | ||
8 | This file is a part of bzip2 and/or libbzip2, a program and | ||
9 | library for lossless, block-sorting data compression. | ||
10 | |||
11 | Copyright (C) 1996-1998 Julian R Seward. All rights reserved. | ||
12 | |||
13 | Redistribution and use in source and binary forms, with or without | ||
14 | modification, are permitted provided that the following conditions | ||
15 | are met: | ||
16 | |||
17 | 1. Redistributions of source code must retain the above copyright | ||
18 | notice, this list of conditions and the following disclaimer. | ||
19 | |||
20 | 2. The origin of this software must not be misrepresented; you must | ||
21 | not claim that you wrote the original software. If you use this | ||
22 | software in a product, an acknowledgment in the product | ||
23 | documentation would be appreciated but is not required. | ||
24 | |||
25 | 3. Altered source versions must be plainly marked as such, and must | ||
26 | not be misrepresented as being the original software. | ||
27 | |||
28 | 4. The name of the author may not be used to endorse or promote | ||
29 | products derived from this software without specific prior written | ||
30 | permission. | ||
31 | |||
32 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | ||
33 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
34 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
35 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | ||
36 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
37 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | ||
38 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
39 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
40 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
41 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
42 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
43 | |||
44 | Julian Seward, Guildford, Surrey, UK. | ||
45 | jseward@acm.org | ||
46 | bzip2/libbzip2 version 0.9.0c of 18 October 1998 | ||
47 | |||
48 | This program is based on (at least) the work of: | ||
49 | Mike Burrows | ||
50 | David Wheeler | ||
51 | Peter Fenwick | ||
52 | Alistair Moffat | ||
53 | Radford Neal | ||
54 | Ian H. Witten | ||
55 | Robert Sedgewick | ||
56 | Jon L. Bentley | ||
57 | |||
58 | For more information on these sources, see the manual. | ||
59 | --*/ | ||
60 | |||
61 | |||
62 | #include "bzlib_private.h" | ||
63 | |||
64 | /*---------------------------------------------*/ | ||
65 | /*-- | ||
66 | Compare two strings in block. We assume (see | ||
67 | discussion above) that i1 and i2 have a max | ||
68 | offset of 10 on entry, and that the first | ||
69 | bytes of both block and quadrant have been | ||
70 | copied into the "overshoot area", ie | ||
71 | into the subscript range | ||
72 | [nblock .. nblock+NUM_OVERSHOOT_BYTES-1]. | ||
73 | --*/ | ||
74 | static __inline__ Bool fullGtU ( UChar* block, | ||
75 | UInt16* quadrant, | ||
76 | UInt32 nblock, | ||
77 | Int32* workDone, | ||
78 | Int32 i1, | ||
79 | Int32 i2 | ||
80 | ) | ||
81 | { | ||
82 | Int32 k; | ||
83 | UChar c1, c2; | ||
84 | UInt16 s1, s2; | ||
85 | |||
86 | AssertD ( i1 != i2, "fullGtU(1)" ); | ||
87 | |||
88 | c1 = block[i1]; | ||
89 | c2 = block[i2]; | ||
90 | if (c1 != c2) return (c1 > c2); | ||
91 | i1++; i2++; | ||
92 | |||
93 | c1 = block[i1]; | ||
94 | c2 = block[i2]; | ||
95 | if (c1 != c2) return (c1 > c2); | ||
96 | i1++; i2++; | ||
97 | |||
98 | c1 = block[i1]; | ||
99 | c2 = block[i2]; | ||
100 | if (c1 != c2) return (c1 > c2); | ||
101 | i1++; i2++; | ||
102 | |||
103 | c1 = block[i1]; | ||
104 | c2 = block[i2]; | ||
105 | if (c1 != c2) return (c1 > c2); | ||
106 | i1++; i2++; | ||
107 | |||
108 | c1 = block[i1]; | ||
109 | c2 = block[i2]; | ||
110 | if (c1 != c2) return (c1 > c2); | ||
111 | i1++; i2++; | ||
112 | |||
113 | c1 = block[i1]; | ||
114 | c2 = block[i2]; | ||
115 | if (c1 != c2) return (c1 > c2); | ||
116 | i1++; i2++; | ||
117 | |||
118 | k = nblock; | ||
119 | |||
120 | do { | ||
121 | |||
122 | c1 = block[i1]; | ||
123 | c2 = block[i2]; | ||
124 | if (c1 != c2) return (c1 > c2); | ||
125 | s1 = quadrant[i1]; | ||
126 | s2 = quadrant[i2]; | ||
127 | if (s1 != s2) return (s1 > s2); | ||
128 | i1++; i2++; | ||
129 | |||
130 | c1 = block[i1]; | ||
131 | c2 = block[i2]; | ||
132 | if (c1 != c2) return (c1 > c2); | ||
133 | s1 = quadrant[i1]; | ||
134 | s2 = quadrant[i2]; | ||
135 | if (s1 != s2) return (s1 > s2); | ||
136 | i1++; i2++; | ||
137 | |||
138 | c1 = block[i1]; | ||
139 | c2 = block[i2]; | ||
140 | if (c1 != c2) return (c1 > c2); | ||
141 | s1 = quadrant[i1]; | ||
142 | s2 = quadrant[i2]; | ||
143 | if (s1 != s2) return (s1 > s2); | ||
144 | i1++; i2++; | ||
145 | |||
146 | c1 = block[i1]; | ||
147 | c2 = block[i2]; | ||
148 | if (c1 != c2) return (c1 > c2); | ||
149 | s1 = quadrant[i1]; | ||
150 | s2 = quadrant[i2]; | ||
151 | if (s1 != s2) return (s1 > s2); | ||
152 | i1++; i2++; | ||
153 | |||
154 | if (i1 >= nblock) i1 -= nblock; | ||
155 | if (i2 >= nblock) i2 -= nblock; | ||
156 | |||
157 | k -= 4; | ||
158 | (*workDone)++; | ||
159 | } | ||
160 | while (k >= 0); | ||
161 | |||
162 | return False; | ||
163 | } | ||
164 | |||
165 | /*---------------------------------------------*/ | ||
166 | /*-- | ||
167 | Knuth's increments seem to work better | ||
168 | than Incerpi-Sedgewick here. Possibly | ||
169 | because the number of elems to sort is | ||
170 | usually small, typically <= 20. | ||
171 | --*/ | ||
172 | static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, | ||
173 | 9841, 29524, 88573, 265720, | ||
174 | 797161, 2391484 }; | ||
175 | |||
176 | static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d ) | ||
177 | { | ||
178 | Int32 i, j, h, bigN, hp; | ||
179 | Int32 v; | ||
180 | |||
181 | UChar* block = s->block; | ||
182 | UInt32* zptr = s->zptr; | ||
183 | UInt16* quadrant = s->quadrant; | ||
184 | Int32* workDone = &(s->workDone); | ||
185 | Int32 nblock = s->nblock; | ||
186 | Int32 workLimit = s->workLimit; | ||
187 | Bool firstAttempt = s->firstAttempt; | ||
188 | |||
189 | bigN = hi - lo + 1; | ||
190 | if (bigN < 2) return; | ||
191 | |||
192 | hp = 0; | ||
193 | while (incs[hp] < bigN) hp++; | ||
194 | hp--; | ||
195 | |||
196 | for (; hp >= 0; hp--) { | ||
197 | h = incs[hp]; | ||
198 | i = lo + h; | ||
199 | while (True) { | ||
200 | |||
201 | /*-- copy 1 --*/ | ||
202 | if (i > hi) break; | ||
203 | v = zptr[i]; | ||
204 | j = i; | ||
205 | while ( fullGtU ( block, quadrant, nblock, workDone, | ||
206 | zptr[j-h]+d, v+d ) ) { | ||
207 | zptr[j] = zptr[j-h]; | ||
208 | j = j - h; | ||
209 | if (j <= (lo + h - 1)) break; | ||
210 | } | ||
211 | zptr[j] = v; | ||
212 | i++; | ||
213 | |||
214 | /*-- copy 2 --*/ | ||
215 | if (i > hi) break; | ||
216 | v = zptr[i]; | ||
217 | j = i; | ||
218 | while ( fullGtU ( block, quadrant, nblock, workDone, | ||
219 | zptr[j-h]+d, v+d ) ) { | ||
220 | zptr[j] = zptr[j-h]; | ||
221 | j = j - h; | ||
222 | if (j <= (lo + h - 1)) break; | ||
223 | } | ||
224 | zptr[j] = v; | ||
225 | i++; | ||
226 | |||
227 | /*-- copy 3 --*/ | ||
228 | if (i > hi) break; | ||
229 | v = zptr[i]; | ||
230 | j = i; | ||
231 | while ( fullGtU ( block, quadrant, nblock, workDone, | ||
232 | zptr[j-h]+d, v+d ) ) { | ||
233 | zptr[j] = zptr[j-h]; | ||
234 | j = j - h; | ||
235 | if (j <= (lo + h - 1)) break; | ||
236 | } | ||
237 | zptr[j] = v; | ||
238 | i++; | ||
239 | |||
240 | if (*workDone > workLimit && firstAttempt) return; | ||
241 | } | ||
242 | } | ||
243 | } | ||
244 | |||
245 | |||
246 | /*---------------------------------------------*/ | ||
247 | /*-- | ||
248 | The following is an implementation of | ||
249 | an elegant 3-way quicksort for strings, | ||
250 | described in a paper "Fast Algorithms for | ||
251 | Sorting and Searching Strings", by Robert | ||
252 | Sedgewick and Jon L. Bentley. | ||
253 | --*/ | ||
254 | |||
255 | #define swap(lv1, lv2) \ | ||
256 | { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; } | ||
257 | |||
258 | static void vswap ( UInt32* zptr, Int32 p1, Int32 p2, Int32 n ) | ||
259 | { | ||
260 | while (n > 0) { | ||
261 | swap(zptr[p1], zptr[p2]); | ||
262 | p1++; p2++; n--; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | static UChar med3 ( UChar a, UChar b, UChar c ) | ||
267 | { | ||
268 | UChar t; | ||
269 | if (a > b) { t = a; a = b; b = t; }; | ||
270 | if (b > c) { t = b; b = c; c = t; }; | ||
271 | if (a > b) b = a; | ||
272 | return b; | ||
273 | } | ||
274 | |||
275 | |||
276 | #define min(a,b) ((a) < (b)) ? (a) : (b) | ||
277 | |||
278 | typedef | ||
279 | struct { Int32 ll; Int32 hh; Int32 dd; } | ||
280 | StackElem; | ||
281 | |||
282 | #define push(lz,hz,dz) { stack[sp].ll = lz; \ | ||
283 | stack[sp].hh = hz; \ | ||
284 | stack[sp].dd = dz; \ | ||
285 | sp++; } | ||
286 | |||
287 | #define pop(lz,hz,dz) { sp--; \ | ||
288 | lz = stack[sp].ll; \ | ||
289 | hz = stack[sp].hh; \ | ||
290 | dz = stack[sp].dd; } | ||
291 | |||
292 | #define SMALL_THRESH 20 | ||
293 | #define DEPTH_THRESH 10 | ||
294 | |||
295 | /*-- | ||
296 | If you are ever unlucky/improbable enough | ||
297 | to get a stack overflow whilst sorting, | ||
298 | increase the following constant and try | ||
299 | again. In practice I have never seen the | ||
300 | stack go above 27 elems, so the following | ||
301 | limit seems very generous. | ||
302 | --*/ | ||
303 | #define QSORT_STACK_SIZE 1000 | ||
304 | |||
305 | |||
306 | static void qSort3 ( EState* s, Int32 loSt, Int32 hiSt, Int32 dSt ) | ||
307 | { | ||
308 | Int32 unLo, unHi, ltLo, gtHi, med, n, m; | ||
309 | Int32 sp, lo, hi, d; | ||
310 | StackElem stack[QSORT_STACK_SIZE]; | ||
311 | |||
312 | UChar* block = s->block; | ||
313 | UInt32* zptr = s->zptr; | ||
314 | Int32* workDone = &(s->workDone); | ||
315 | Int32 workLimit = s->workLimit; | ||
316 | Bool firstAttempt = s->firstAttempt; | ||
317 | |||
318 | sp = 0; | ||
319 | push ( loSt, hiSt, dSt ); | ||
320 | |||
321 | while (sp > 0) { | ||
322 | |||
323 | AssertH ( sp < QSORT_STACK_SIZE, 1001 ); | ||
324 | |||
325 | pop ( lo, hi, d ); | ||
326 | |||
327 | if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) { | ||
328 | simpleSort ( s, lo, hi, d ); | ||
329 | if (*workDone > workLimit && firstAttempt) return; | ||
330 | continue; | ||
331 | } | ||
332 | |||
333 | med = med3 ( block[zptr[ lo ]+d], | ||
334 | block[zptr[ hi ]+d], | ||
335 | block[zptr[ (lo+hi)>>1 ]+d] ); | ||
336 | |||
337 | unLo = ltLo = lo; | ||
338 | unHi = gtHi = hi; | ||
339 | |||
340 | while (True) { | ||
341 | while (True) { | ||
342 | if (unLo > unHi) break; | ||
343 | n = ((Int32)block[zptr[unLo]+d]) - med; | ||
344 | if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; }; | ||
345 | if (n > 0) break; | ||
346 | unLo++; | ||
347 | } | ||
348 | while (True) { | ||
349 | if (unLo > unHi) break; | ||
350 | n = ((Int32)block[zptr[unHi]+d]) - med; | ||
351 | if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; }; | ||
352 | if (n < 0) break; | ||
353 | unHi--; | ||
354 | } | ||
355 | if (unLo > unHi) break; | ||
356 | swap(zptr[unLo], zptr[unHi]); unLo++; unHi--; | ||
357 | } | ||
358 | |||
359 | AssertD ( unHi == unLo-1, "bad termination in qSort3" ); | ||
360 | |||
361 | if (gtHi < ltLo) { | ||
362 | push(lo, hi, d+1 ); | ||
363 | continue; | ||
364 | } | ||
365 | |||
366 | n = min(ltLo-lo, unLo-ltLo); vswap(zptr, lo, unLo-n, n); | ||
367 | m = min(hi-gtHi, gtHi-unHi); vswap(zptr, unLo, hi-m+1, m); | ||
368 | |||
369 | n = lo + unLo - ltLo - 1; | ||
370 | m = hi - (gtHi - unHi) + 1; | ||
371 | |||
372 | push ( lo, n, d ); | ||
373 | push ( n+1, m-1, d+1 ); | ||
374 | push ( m, hi, d ); | ||
375 | } | ||
376 | } | ||
377 | |||
378 | |||
379 | /*---------------------------------------------*/ | ||
380 | |||
381 | #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) | ||
382 | |||
383 | #define SETMASK (1 << 21) | ||
384 | #define CLEARMASK (~(SETMASK)) | ||
385 | |||
386 | static void sortMain ( EState* s ) | ||
387 | { | ||
388 | Int32 i, j, k, ss, sb; | ||
389 | Int32 runningOrder[256]; | ||
390 | Int32 copy[256]; | ||
391 | Bool bigDone[256]; | ||
392 | UChar c1, c2; | ||
393 | Int32 numQSorted; | ||
394 | |||
395 | UChar* block = s->block; | ||
396 | UInt32* zptr = s->zptr; | ||
397 | UInt16* quadrant = s->quadrant; | ||
398 | Int32* ftab = s->ftab; | ||
399 | Int32* workDone = &(s->workDone); | ||
400 | Int32 nblock = s->nblock; | ||
401 | Int32 workLimit = s->workLimit; | ||
402 | Bool firstAttempt = s->firstAttempt; | ||
403 | |||
404 | /*-- | ||
405 | In the various block-sized structures, live data runs | ||
406 | from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, | ||
407 | set up the overshoot area for block. | ||
408 | --*/ | ||
409 | |||
410 | if (s->verbosity >= 4) | ||
411 | VPrintf0( " sort initialise ...\n" ); | ||
412 | |||
413 | for (i = 0; i < BZ_NUM_OVERSHOOT_BYTES; i++) | ||
414 | block[nblock+i] = block[i % nblock]; | ||
415 | for (i = 0; i < nblock+BZ_NUM_OVERSHOOT_BYTES; i++) | ||
416 | quadrant[i] = 0; | ||
417 | |||
418 | |||
419 | if (nblock <= 4000) { | ||
420 | |||
421 | /*-- | ||
422 | Use simpleSort(), since the full sorting mechanism | ||
423 | has quite a large constant overhead. | ||
424 | --*/ | ||
425 | if (s->verbosity >= 4) VPrintf0( " simpleSort ...\n" ); | ||
426 | for (i = 0; i < nblock; i++) zptr[i] = i; | ||
427 | firstAttempt = False; | ||
428 | *workDone = workLimit = 0; | ||
429 | simpleSort ( s, 0, nblock-1, 0 ); | ||
430 | if (s->verbosity >= 4) VPrintf0( " simpleSort done.\n" ); | ||
431 | |||
432 | } else { | ||
433 | |||
434 | numQSorted = 0; | ||
435 | for (i = 0; i <= 255; i++) bigDone[i] = False; | ||
436 | |||
437 | if (s->verbosity >= 4) VPrintf0( " bucket sorting ...\n" ); | ||
438 | |||
439 | for (i = 0; i <= 65536; i++) ftab[i] = 0; | ||
440 | |||
441 | c1 = block[nblock-1]; | ||
442 | for (i = 0; i < nblock; i++) { | ||
443 | c2 = block[i]; | ||
444 | ftab[(c1 << 8) + c2]++; | ||
445 | c1 = c2; | ||
446 | } | ||
447 | |||
448 | for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; | ||
449 | |||
450 | c1 = block[0]; | ||
451 | for (i = 0; i < nblock-1; i++) { | ||
452 | c2 = block[i+1]; | ||
453 | j = (c1 << 8) + c2; | ||
454 | c1 = c2; | ||
455 | ftab[j]--; | ||
456 | zptr[ftab[j]] = i; | ||
457 | } | ||
458 | j = (block[nblock-1] << 8) + block[0]; | ||
459 | ftab[j]--; | ||
460 | zptr[ftab[j]] = nblock-1; | ||
461 | |||
462 | /*-- | ||
463 | Now ftab contains the first loc of every small bucket. | ||
464 | Calculate the running order, from smallest to largest | ||
465 | big bucket. | ||
466 | --*/ | ||
467 | |||
468 | for (i = 0; i <= 255; i++) runningOrder[i] = i; | ||
469 | |||
470 | { | ||
471 | Int32 vv; | ||
472 | Int32 h = 1; | ||
473 | do h = 3 * h + 1; while (h <= 256); | ||
474 | do { | ||
475 | h = h / 3; | ||
476 | for (i = h; i <= 255; i++) { | ||
477 | vv = runningOrder[i]; | ||
478 | j = i; | ||
479 | while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { | ||
480 | runningOrder[j] = runningOrder[j-h]; | ||
481 | j = j - h; | ||
482 | if (j <= (h - 1)) goto zero; | ||
483 | } | ||
484 | zero: | ||
485 | runningOrder[j] = vv; | ||
486 | } | ||
487 | } while (h != 1); | ||
488 | } | ||
489 | |||
490 | /*-- | ||
491 | The main sorting loop. | ||
492 | --*/ | ||
493 | |||
494 | for (i = 0; i <= 255; i++) { | ||
495 | |||
496 | /*-- | ||
497 | Process big buckets, starting with the least full. | ||
498 | Basically this is a 4-step process in which we call | ||
499 | qSort3 to sort the small buckets [ss, j], but | ||
500 | also make a big effort to avoid the calls if we can. | ||
501 | --*/ | ||
502 | ss = runningOrder[i]; | ||
503 | |||
504 | /*-- | ||
505 | Step 1: | ||
506 | Complete the big bucket [ss] by quicksorting | ||
507 | any unsorted small buckets [ss, j], for j != ss. | ||
508 | Hopefully previous pointer-scanning phases have already | ||
509 | completed many of the small buckets [ss, j], so | ||
510 | we don't have to sort them at all. | ||
511 | --*/ | ||
512 | for (j = 0; j <= 255; j++) { | ||
513 | if (j != ss) { | ||
514 | sb = (ss << 8) + j; | ||
515 | if ( ! (ftab[sb] & SETMASK) ) { | ||
516 | Int32 lo = ftab[sb] & CLEARMASK; | ||
517 | Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; | ||
518 | if (hi > lo) { | ||
519 | if (s->verbosity >= 4) | ||
520 | VPrintf4( " qsort [0x%x, 0x%x] done %d this %d\n", | ||
521 | ss, j, numQSorted, hi - lo + 1 ); | ||
522 | qSort3 ( s, lo, hi, 2 ); | ||
523 | numQSorted += ( hi - lo + 1 ); | ||
524 | if (*workDone > workLimit && firstAttempt) return; | ||
525 | } | ||
526 | } | ||
527 | ftab[sb] |= SETMASK; | ||
528 | } | ||
529 | } | ||
530 | |||
531 | /*-- | ||
532 | Step 2: | ||
533 | Deal specially with case [ss, ss]. This establishes the | ||
534 | sorted order for [ss, ss] without any comparisons. | ||
535 | A clever trick, cryptically described as steps Q6b and Q6c | ||
536 | in SRC-124 (aka BW94). This makes it entirely practical to | ||
537 | not use a preliminary run-length coder, but unfortunately | ||
538 | we are now stuck with the .bz2 file format. | ||
539 | --*/ | ||
540 | { | ||
541 | Int32 put0, get0, put1, get1; | ||
542 | Int32 sbn = (ss << 8) + ss; | ||
543 | Int32 lo = ftab[sbn] & CLEARMASK; | ||
544 | Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1; | ||
545 | UChar ssc = (UChar)ss; | ||
546 | put0 = lo; | ||
547 | get0 = ftab[ss << 8] & CLEARMASK; | ||
548 | put1 = hi; | ||
549 | get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1; | ||
550 | while (get0 < put0) { | ||
551 | j = zptr[get0]-1; if (j < 0) j += nblock; | ||
552 | c1 = block[j]; | ||
553 | if (c1 == ssc) { zptr[put0] = j; put0++; }; | ||
554 | get0++; | ||
555 | } | ||
556 | while (get1 > put1) { | ||
557 | j = zptr[get1]-1; if (j < 0) j += nblock; | ||
558 | c1 = block[j]; | ||
559 | if (c1 == ssc) { zptr[put1] = j; put1--; }; | ||
560 | get1--; | ||
561 | } | ||
562 | ftab[sbn] |= SETMASK; | ||
563 | } | ||
564 | |||
565 | /*-- | ||
566 | Step 3: | ||
567 | The [ss] big bucket is now done. Record this fact, | ||
568 | and update the quadrant descriptors. Remember to | ||
569 | update quadrants in the overshoot area too, if | ||
570 | necessary. The "if (i < 255)" test merely skips | ||
571 | this updating for the last bucket processed, since | ||
572 | updating for the last bucket is pointless. | ||
573 | |||
574 | The quadrant array provides a way to incrementally | ||
575 | cache sort orderings, as they appear, so as to | ||
576 | make subsequent comparisons in fullGtU() complete | ||
577 | faster. For repetitive blocks this makes a big | ||
578 | difference (but not big enough to be able to avoid | ||
579 | randomisation for very repetitive data.) | ||
580 | |||
581 | The precise meaning is: at all times: | ||
582 | |||
583 | for 0 <= i < nblock and 0 <= j <= nblock | ||
584 | |||
585 | if block[i] != block[j], | ||
586 | |||
587 | then the relative values of quadrant[i] and | ||
588 | quadrant[j] are meaningless. | ||
589 | |||
590 | else { | ||
591 | if quadrant[i] < quadrant[j] | ||
592 | then the string starting at i lexicographically | ||
593 | precedes the string starting at j | ||
594 | |||
595 | else if quadrant[i] > quadrant[j] | ||
596 | then the string starting at j lexicographically | ||
597 | precedes the string starting at i | ||
598 | |||
599 | else | ||
600 | the relative ordering of the strings starting | ||
601 | at i and j has not yet been determined. | ||
602 | } | ||
603 | --*/ | ||
604 | bigDone[ss] = True; | ||
605 | |||
606 | if (i < 255) { | ||
607 | Int32 bbStart = ftab[ss << 8] & CLEARMASK; | ||
608 | Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; | ||
609 | Int32 shifts = 0; | ||
610 | |||
611 | while ((bbSize >> shifts) > 65534) shifts++; | ||
612 | |||
613 | for (j = 0; j < bbSize; j++) { | ||
614 | Int32 a2update = zptr[bbStart + j]; | ||
615 | UInt16 qVal = (UInt16)(j >> shifts); | ||
616 | quadrant[a2update] = qVal; | ||
617 | if (a2update < BZ_NUM_OVERSHOOT_BYTES) | ||
618 | quadrant[a2update + nblock] = qVal; | ||
619 | } | ||
620 | |||
621 | AssertH ( ( ((bbSize-1) >> shifts) <= 65535 ), 1002 ); | ||
622 | } | ||
623 | |||
624 | /*-- | ||
625 | Step 4: | ||
626 | Now scan this big bucket [ss] so as to synthesise the | ||
627 | sorted order for small buckets [t, ss] for all t != ss. | ||
628 | This will avoid doing Real Work in subsequent Step 1's. | ||
629 | --*/ | ||
630 | for (j = 0; j <= 255; j++) | ||
631 | copy[j] = ftab[(j << 8) + ss] & CLEARMASK; | ||
632 | |||
633 | for (j = ftab[ss << 8] & CLEARMASK; | ||
634 | j < (ftab[(ss+1) << 8] & CLEARMASK); | ||
635 | j++) { | ||
636 | k = zptr[j]-1; if (k < 0) k += nblock; | ||
637 | c1 = block[k]; | ||
638 | if ( ! bigDone[c1] ) { | ||
639 | zptr[copy[c1]] = k; | ||
640 | copy[c1] ++; | ||
641 | } | ||
642 | } | ||
643 | |||
644 | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; | ||
645 | } | ||
646 | if (s->verbosity >= 4) | ||
647 | VPrintf3( " %d pointers, %d sorted, %d scanned\n", | ||
648 | nblock, numQSorted, nblock - numQSorted ); | ||
649 | } | ||
650 | } | ||
651 | |||
652 | |||
653 | /*---------------------------------------------*/ | ||
654 | static void randomiseBlock ( EState* s ) | ||
655 | { | ||
656 | Int32 i; | ||
657 | BZ_RAND_INIT_MASK; | ||
658 | for (i = 0; i < 256; i++) s->inUse[i] = False; | ||
659 | |||
660 | for (i = 0; i < s->nblock; i++) { | ||
661 | BZ_RAND_UPD_MASK; | ||
662 | s->block[i] ^= BZ_RAND_MASK; | ||
663 | s->inUse[s->block[i]] = True; | ||
664 | } | ||
665 | } | ||
666 | |||
667 | |||
668 | /*---------------------------------------------*/ | ||
669 | void blockSort ( EState* s ) | ||
670 | { | ||
671 | Int32 i; | ||
672 | |||
673 | s->workLimit = s->workFactor * (s->nblock - 1); | ||
674 | s->workDone = 0; | ||
675 | s->blockRandomised = False; | ||
676 | s->firstAttempt = True; | ||
677 | |||
678 | sortMain ( s ); | ||
679 | |||
680 | if (s->verbosity >= 3) | ||
681 | VPrintf3( " %d work, %d block, ratio %5.2f\n", | ||
682 | s->workDone, s->nblock-1, | ||
683 | (float)(s->workDone) / (float)(s->nblock-1) ); | ||
684 | |||
685 | if (s->workDone > s->workLimit && s->firstAttempt) { | ||
686 | if (s->verbosity >= 2) | ||
687 | VPrintf0( " sorting aborted; randomising block\n" ); | ||
688 | randomiseBlock ( s ); | ||
689 | s->workLimit = s->workDone = 0; | ||
690 | s->blockRandomised = True; | ||
691 | s->firstAttempt = False; | ||
692 | sortMain ( s ); | ||
693 | if (s->verbosity >= 3) | ||
694 | VPrintf3( " %d work, %d block, ratio %f\n", | ||
695 | s->workDone, s->nblock-1, | ||
696 | (float)(s->workDone) / (float)(s->nblock-1) ); | ||
697 | } | ||
698 | |||
699 | s->origPtr = -1; | ||
700 | for (i = 0; i < s->nblock; i++) | ||
701 | if (s->zptr[i] == 0) | ||
702 | { s->origPtr = i; break; }; | ||
703 | |||
704 | AssertH( s->origPtr != -1, 1003 ); | ||
705 | } | ||
706 | |||
707 | /*-------------------------------------------------------------*/ | ||
708 | /*--- end blocksort.c ---*/ | ||
709 | /*-------------------------------------------------------------*/ | ||