aboutsummaryrefslogtreecommitdiff
path: root/C/BwtSort.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--C/BwtSort.c468
1 files changed, 290 insertions, 178 deletions
diff --git a/C/BwtSort.c b/C/BwtSort.c
index 05ad6de..8f64f9d 100644
--- a/C/BwtSort.c
+++ b/C/BwtSort.c
@@ -1,5 +1,5 @@
1/* BwtSort.c -- BWT block sorting 1/* BwtSort.c -- BWT block sorting
22023-04-02 : Igor Pavlov : Public domain */ 2: Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
@@ -7,6 +7,44 @@
7#include "Sort.h" 7#include "Sort.h"
8 8
9/* #define BLOCK_SORT_USE_HEAP_SORT */ 9/* #define BLOCK_SORT_USE_HEAP_SORT */
10// #define BLOCK_SORT_USE_HEAP_SORT
11
12#ifdef BLOCK_SORT_USE_HEAP_SORT
13
14#define HeapSortRefDown(p, vals, n, size, temp) \
15 { size_t k = n; UInt32 val = vals[temp]; for (;;) { \
16 size_t s = k << 1; \
17 if (s > size) break; \
18 if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
19 if (val >= vals[p[s]]) break; \
20 p[k] = p[s]; k = s; \
21 } p[k] = temp; }
22
23void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
24{
25 if (size <= 1)
26 return;
27 p--;
28 {
29 size_t i = size / 2;
30 do
31 {
32 UInt32 temp = p[i];
33 HeapSortRefDown(p, vals, i, size, temp);
34 }
35 while (--i != 0);
36 }
37 do
38 {
39 UInt32 temp = p[size];
40 p[size--] = p[1];
41 HeapSortRefDown(p, vals, 1, size, temp);
42 }
43 while (size > 1);
44}
45
46#endif // BLOCK_SORT_USE_HEAP_SORT
47
10 48
11/* Don't change it !!! */ 49/* Don't change it !!! */
12#define kNumHashBytes 2 50#define kNumHashBytes 2
@@ -27,26 +65,27 @@
27 65
28#else 66#else
29 67
30#define kNumBitsMax 20 68#define kNumBitsMax 20
31#define kIndexMask ((1 << kNumBitsMax) - 1) 69#define kIndexMask (((UInt32)1 << kNumBitsMax) - 1)
32#define kNumExtraBits (32 - kNumBitsMax) 70#define kNumExtraBits (32 - kNumBitsMax)
33#define kNumExtra0Bits (kNumExtraBits - 2) 71#define kNumExtra0Bits (kNumExtraBits - 2)
34#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) 72#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
35 73
36#define SetFinishedGroupSize(p, size) \ 74#define SetFinishedGroupSize(p, size) \
37 { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ 75 { *(p) |= ((((UInt32)(size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
38 if ((size) > (1 << kNumExtra0Bits)) { \ 76 if ((size) > (1 << kNumExtra0Bits)) { \
39 *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ 77 *(p) |= 0x40000000; \
78 *((p) + 1) |= (((UInt32)(size) - 1) >> kNumExtra0Bits) << kNumBitsMax; } } \
40 79
41static void SetGroupSize(UInt32 *p, UInt32 size) 80static void SetGroupSize(UInt32 *p, size_t size)
42{ 81{
43 if (--size == 0) 82 if (--size == 0)
44 return; 83 return;
45 *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); 84 *p |= 0x80000000 | (((UInt32)size & kNumExtra0Mask) << kNumBitsMax);
46 if (size >= (1 << kNumExtra0Bits)) 85 if (size >= (1 << kNumExtra0Bits))
47 { 86 {
48 *p |= 0x40000000; 87 *p |= 0x40000000;
49 p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); 88 p[1] |= (((UInt32)size >> kNumExtra0Bits) << kNumBitsMax);
50 } 89 }
51} 90}
52 91
@@ -59,12 +98,14 @@ returns: 1 - if there are groups, 0 - no more groups
59*/ 98*/
60 99
61static 100static
62UInt32 101unsigned
63Z7_FASTCALL 102Z7_FASTCALL
64SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices 103SortGroup(size_t BlockSize, size_t NumSortedBytes,
65 #ifndef BLOCK_SORT_USE_HEAP_SORT 104 size_t groupOffset, size_t groupSize,
66 , UInt32 left, UInt32 range 105 unsigned NumRefBits, UInt32 *Indices
67 #endif 106#ifndef BLOCK_SORT_USE_HEAP_SORT
107 , size_t left, size_t range
108#endif
68 ) 109 )
69{ 110{
70 UInt32 *ind2 = Indices + groupOffset; 111 UInt32 *ind2 = Indices + groupOffset;
@@ -79,90 +120,93 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
79 return 0; 120 return 0;
80 } 121 }
81 Groups = Indices + BlockSize + BS_TEMP_SIZE; 122 Groups = Indices + BlockSize + BS_TEMP_SIZE;
82 if (groupSize <= ((UInt32)1 << NumRefBits) 123 if (groupSize <= ((size_t)1 << NumRefBits)
83 #ifndef BLOCK_SORT_USE_HEAP_SORT 124#ifndef BLOCK_SORT_USE_HEAP_SORT
84 && groupSize <= range 125 && groupSize <= range
85 #endif 126#endif
86 ) 127 )
87 { 128 {
88 UInt32 *temp = Indices + BlockSize; 129 UInt32 *temp = Indices + BlockSize;
89 UInt32 j; 130 size_t j, group;
90 UInt32 mask, thereAreGroups, group, cg; 131 UInt32 mask, cg;
132 unsigned thereAreGroups;
91 { 133 {
92 UInt32 gPrev; 134 UInt32 gPrev;
93 UInt32 gRes = 0; 135 UInt32 gRes = 0;
94 { 136 {
95 UInt32 sp = ind2[0] + NumSortedBytes; 137 size_t sp = ind2[0] + NumSortedBytes;
96 if (sp >= BlockSize) sp -= BlockSize; 138 if (sp >= BlockSize)
139 sp -= BlockSize;
97 gPrev = Groups[sp]; 140 gPrev = Groups[sp];
98 temp[0] = (gPrev << NumRefBits); 141 temp[0] = gPrev << NumRefBits;
99 } 142 }
100 143
101 for (j = 1; j < groupSize; j++) 144 for (j = 1; j < groupSize; j++)
102 { 145 {
103 UInt32 sp = ind2[j] + NumSortedBytes; 146 size_t sp = ind2[j] + NumSortedBytes;
104 UInt32 g; 147 UInt32 g;
105 if (sp >= BlockSize) sp -= BlockSize; 148 if (sp >= BlockSize)
149 sp -= BlockSize;
106 g = Groups[sp]; 150 g = Groups[sp];
107 temp[j] = (g << NumRefBits) | j; 151 temp[j] = (g << NumRefBits) | (UInt32)j;
108 gRes |= (gPrev ^ g); 152 gRes |= (gPrev ^ g);
109 } 153 }
110 if (gRes == 0) 154 if (gRes == 0)
111 { 155 {
112 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 156#ifndef BLOCK_SORT_EXTERNAL_FLAGS
113 SetGroupSize(ind2, groupSize); 157 SetGroupSize(ind2, groupSize);
114 #endif 158#endif
115 return 1; 159 return 1;
116 } 160 }
117 } 161 }
118 162
119 HeapSort(temp, groupSize); 163 HeapSort(temp, groupSize);
120 mask = (((UInt32)1 << NumRefBits) - 1); 164 mask = ((UInt32)1 << NumRefBits) - 1;
121 thereAreGroups = 0; 165 thereAreGroups = 0;
122 166
123 group = groupOffset; 167 group = groupOffset;
124 cg = (temp[0] >> NumRefBits); 168 cg = temp[0] >> NumRefBits;
125 temp[0] = ind2[temp[0] & mask]; 169 temp[0] = ind2[temp[0] & mask];
126 170
127 { 171 {
128 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 172#ifdef BLOCK_SORT_EXTERNAL_FLAGS
129 UInt32 *Flags = Groups + BlockSize; 173 UInt32 *Flags = Groups + BlockSize;
130 #else 174#else
131 UInt32 prevGroupStart = 0; 175 size_t prevGroupStart = 0;
132 #endif 176#endif
133 177
134 for (j = 1; j < groupSize; j++) 178 for (j = 1; j < groupSize; j++)
135 { 179 {
136 UInt32 val = temp[j]; 180 const UInt32 val = temp[j];
137 UInt32 cgCur = (val >> NumRefBits); 181 const UInt32 cgCur = val >> NumRefBits;
138 182
139 if (cgCur != cg) 183 if (cgCur != cg)
140 { 184 {
141 cg = cgCur; 185 cg = cgCur;
142 group = groupOffset + j; 186 group = groupOffset + j;
143 187
144 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 188#ifdef BLOCK_SORT_EXTERNAL_FLAGS
145 { 189 {
146 UInt32 t = group - 1; 190 const size_t t = group - 1;
147 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 191 Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
148 } 192 }
149 #else 193#else
150 SetGroupSize(temp + prevGroupStart, j - prevGroupStart); 194 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
151 prevGroupStart = j; 195 prevGroupStart = j;
152 #endif 196#endif
153 } 197 }
154 else 198 else
155 thereAreGroups = 1; 199 thereAreGroups = 1;
156 { 200 {
157 UInt32 ind = ind2[val & mask]; 201 const UInt32 ind = ind2[val & mask];
158 temp[j] = ind; 202 temp[j] = ind;
159 Groups[ind] = group; 203 Groups[ind] = (UInt32)group;
160 } 204 }
161 } 205 }
162 206
163 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 207#ifndef BLOCK_SORT_EXTERNAL_FLAGS
164 SetGroupSize(temp + prevGroupStart, j - prevGroupStart); 208 SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
165 #endif 209#endif
166 } 210 }
167 211
168 for (j = 0; j < groupSize; j++) 212 for (j = 0; j < groupSize; j++)
@@ -172,37 +216,42 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
172 216
173 /* Check that all strings are in one group (cannot sort) */ 217 /* Check that all strings are in one group (cannot sort) */
174 { 218 {
175 UInt32 group, j; 219 UInt32 group;
176 UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 220 size_t j;
221 size_t sp = ind2[0] + NumSortedBytes;
222 if (sp >= BlockSize)
223 sp -= BlockSize;
177 group = Groups[sp]; 224 group = Groups[sp];
178 for (j = 1; j < groupSize; j++) 225 for (j = 1; j < groupSize; j++)
179 { 226 {
180 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 227 sp = ind2[j] + NumSortedBytes;
228 if (sp >= BlockSize)
229 sp -= BlockSize;
181 if (Groups[sp] != group) 230 if (Groups[sp] != group)
182 break; 231 break;
183 } 232 }
184 if (j == groupSize) 233 if (j == groupSize)
185 { 234 {
186 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 235#ifndef BLOCK_SORT_EXTERNAL_FLAGS
187 SetGroupSize(ind2, groupSize); 236 SetGroupSize(ind2, groupSize);
188 #endif 237#endif
189 return 1; 238 return 1;
190 } 239 }
191 } 240 }
192 241
193 #ifndef BLOCK_SORT_USE_HEAP_SORT 242#ifndef BLOCK_SORT_USE_HEAP_SORT
194 { 243 {
195 /* ---------- Range Sort ---------- */ 244 /* ---------- Range Sort ---------- */
196 UInt32 i; 245 size_t i;
197 UInt32 mid; 246 size_t mid;
198 for (;;) 247 for (;;)
199 { 248 {
200 UInt32 j; 249 size_t j;
201 if (range <= 1) 250 if (range <= 1)
202 { 251 {
203 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 252#ifndef BLOCK_SORT_EXTERNAL_FLAGS
204 SetGroupSize(ind2, groupSize); 253 SetGroupSize(ind2, groupSize);
205 #endif 254#endif
206 return 1; 255 return 1;
207 } 256 }
208 mid = left + ((range + 1) >> 1); 257 mid = left + ((range + 1) >> 1);
@@ -210,7 +259,7 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
210 i = 0; 259 i = 0;
211 do 260 do
212 { 261 {
213 UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 262 size_t sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
214 if (Groups[sp] >= mid) 263 if (Groups[sp] >= mid)
215 { 264 {
216 for (j--; j > i; j--) 265 for (j--; j > i; j--)
@@ -238,51 +287,53 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
238 break; 287 break;
239 } 288 }
240 289
241 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 290#ifdef BLOCK_SORT_EXTERNAL_FLAGS
242 { 291 {
243 UInt32 t = (groupOffset + i - 1); 292 const size_t t = groupOffset + i - 1;
244 UInt32 *Flags = Groups + BlockSize; 293 UInt32 *Flags = Groups + BlockSize;
245 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 294 Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
246 } 295 }
247 #endif 296#endif
248 297
249 { 298 {
250 UInt32 j; 299 size_t j;
251 for (j = i; j < groupSize; j++) 300 for (j = i; j < groupSize; j++)
252 Groups[ind2[j]] = groupOffset + i; 301 Groups[ind2[j]] = (UInt32)(groupOffset + i);
253 } 302 }
254 303
255 { 304 {
256 UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); 305 unsigned res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
257 return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); 306 return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
258 } 307 }
259 308
260 } 309 }
261 310
262 #else 311#else // BLOCK_SORT_USE_HEAP_SORT
263 312
264 /* ---------- Heap Sort ---------- */ 313 /* ---------- Heap Sort ---------- */
265 314
266 { 315 {
267 UInt32 j; 316 size_t j;
268 for (j = 0; j < groupSize; j++) 317 for (j = 0; j < groupSize; j++)
269 { 318 {
270 UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; 319 size_t sp = ind2[j] + NumSortedBytes;
271 ind2[j] = sp; 320 if (sp >= BlockSize)
321 sp -= BlockSize;
322 ind2[j] = (UInt32)sp;
272 } 323 }
273 324
274 HeapSortRef(ind2, Groups, groupSize); 325 HeapSortRef(ind2, Groups, groupSize);
275 326
276 /* Write Flags */ 327 /* Write Flags */
277 { 328 {
278 UInt32 sp = ind2[0]; 329 size_t sp = ind2[0];
279 UInt32 group = Groups[sp]; 330 UInt32 group = Groups[sp];
280 331
281 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 332#ifdef BLOCK_SORT_EXTERNAL_FLAGS
282 UInt32 *Flags = Groups + BlockSize; 333 UInt32 *Flags = Groups + BlockSize;
283 #else 334#else
284 UInt32 prevGroupStart = 0; 335 size_t prevGroupStart = 0;
285 #endif 336#endif
286 337
287 for (j = 1; j < groupSize; j++) 338 for (j = 1; j < groupSize; j++)
288 { 339 {
@@ -290,149 +341,210 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
290 if (Groups[sp] != group) 341 if (Groups[sp] != group)
291 { 342 {
292 group = Groups[sp]; 343 group = Groups[sp];
293 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 344#ifdef BLOCK_SORT_EXTERNAL_FLAGS
294 { 345 {
295 UInt32 t = groupOffset + j - 1; 346 const size_t t = groupOffset + j - 1;
296 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); 347 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
297 } 348 }
298 #else 349#else
299 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); 350 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
300 prevGroupStart = j; 351 prevGroupStart = j;
301 #endif 352#endif
302 } 353 }
303 } 354 }
304 355
305 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 356#ifndef BLOCK_SORT_EXTERNAL_FLAGS
306 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); 357 SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
307 #endif 358#endif
308 } 359 }
309 { 360 {
310 /* Write new Groups values and Check that there are groups */ 361 /* Write new Groups values and Check that there are groups */
311 UInt32 thereAreGroups = 0; 362 unsigned thereAreGroups = 0;
312 for (j = 0; j < groupSize; j++) 363 for (j = 0; j < groupSize; j++)
313 { 364 {
314 UInt32 group = groupOffset + j; 365 size_t group = groupOffset + j;
315 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 366#ifndef BLOCK_SORT_EXTERNAL_FLAGS
316 UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); 367 UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
317 if ((ind2[j] & 0x40000000) != 0) 368 if (ind2[j] & 0x40000000)
318 subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); 369 subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
319 subGroupSize++; 370 subGroupSize++;
320 for (;;) 371 for (;;)
321 { 372 {
322 UInt32 original = ind2[j]; 373 const UInt32 original = ind2[j];
323 UInt32 sp = original & kIndexMask; 374 size_t sp = original & kIndexMask;
324 if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; 375 if (sp < NumSortedBytes)
325 ind2[j] = sp | (original & ~kIndexMask); 376 sp += BlockSize;
326 Groups[sp] = group; 377 sp -= NumSortedBytes;
378 ind2[j] = (UInt32)sp | (original & ~kIndexMask);
379 Groups[sp] = (UInt32)group;
327 if (--subGroupSize == 0) 380 if (--subGroupSize == 0)
328 break; 381 break;
329 j++; 382 j++;
330 thereAreGroups = 1; 383 thereAreGroups = 1;
331 } 384 }
332 #else 385#else
333 UInt32 *Flags = Groups + BlockSize; 386 UInt32 *Flags = Groups + BlockSize;
334 for (;;) 387 for (;;)
335 { 388 {
336 UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; 389 size_t sp = ind2[j];
337 ind2[j] = sp; 390 if (sp < NumSortedBytes)
338 Groups[sp] = group; 391 sp += BlockSize;
392 sp -= NumSortedBytes;
393 ind2[j] = (UInt32)sp;
394 Groups[sp] = (UInt32)group;
339 if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) 395 if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
340 break; 396 break;
341 j++; 397 j++;
342 thereAreGroups = 1; 398 thereAreGroups = 1;
343 } 399 }
344 #endif 400#endif
345 } 401 }
346 return thereAreGroups; 402 return thereAreGroups;
347 } 403 }
348 } 404 }
349 #endif 405#endif // BLOCK_SORT_USE_HEAP_SORT
350} 406}
351 407
408
352/* conditions: blockSize > 0 */ 409/* conditions: blockSize > 0 */
353UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) 410UInt32 BlockSort(UInt32 *Indices, const Byte *data, size_t blockSize)
354{ 411{
355 UInt32 *counters = Indices + blockSize; 412 UInt32 *counters = Indices + blockSize;
356 UInt32 i; 413 size_t i;
357 UInt32 *Groups; 414 UInt32 *Groups;
358 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 415#ifdef BLOCK_SORT_EXTERNAL_FLAGS
359 UInt32 *Flags; 416 UInt32 *Flags;
360 #endif 417#endif
361 418
362 /* Radix-Sort for 2 bytes */ 419/* Radix-Sort for 2 bytes */
420// { UInt32 yyy; for (yyy = 0; yyy < 100; yyy++) {
363 for (i = 0; i < kNumHashValues; i++) 421 for (i = 0; i < kNumHashValues; i++)
364 counters[i] = 0; 422 counters[i] = 0;
365 for (i = 0; i < blockSize - 1; i++) 423 {
366 counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++; 424 const Byte *data2 = data;
367 counters[((UInt32)data[i] << 8) | data[0]]++; 425 size_t a = data[(size_t)blockSize - 1];
426 const Byte *data_lim = data + blockSize;
427 if (blockSize >= 4)
428 {
429 data_lim -= 3;
430 do
431 {
432 size_t b;
433 b = data2[0]; counters[(a << 8) | b]++;
434 a = data2[1]; counters[(b << 8) | a]++;
435 b = data2[2]; counters[(a << 8) | b]++;
436 a = data2[3]; counters[(b << 8) | a]++;
437 data2 += 4;
438 }
439 while (data2 < data_lim);
440 data_lim += 3;
441 }
442 while (data2 != data_lim)
443 {
444 size_t b = *data2++;
445 counters[(a << 8) | b]++;
446 a = b;
447 }
448 }
449// }}
368 450
369 Groups = counters + BS_TEMP_SIZE; 451 Groups = counters + BS_TEMP_SIZE;
370 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 452#ifdef BLOCK_SORT_EXTERNAL_FLAGS
371 Flags = Groups + blockSize; 453 Flags = Groups + blockSize;
372 { 454 {
373 UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; 455 const size_t numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
374 for (i = 0; i < numWords; i++) 456 for (i = 0; i < numWords; i++)
375 Flags[i] = kAllFlags; 457 Flags[i] = kAllFlags;
376 } 458 }
377 #endif 459#endif
378 460
379 { 461 {
380 UInt32 sum = 0; 462 UInt32 sum = 0;
381 for (i = 0; i < kNumHashValues; i++) 463 for (i = 0; i < kNumHashValues; i++)
382 { 464 {
383 UInt32 groupSize = counters[i]; 465 const UInt32 groupSize = counters[i];
384 if (groupSize > 0) 466 counters[i] = sum;
467 sum += groupSize;
468#ifdef BLOCK_SORT_EXTERNAL_FLAGS
469 if (groupSize)
385 { 470 {
386 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 471 const UInt32 t = sum - 1;
387 UInt32 t = sum + groupSize - 1; 472 Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
388 Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
389 #endif
390 sum += groupSize;
391 } 473 }
392 counters[i] = sum - groupSize; 474#endif
393 } 475 }
476 }
394 477
395 for (i = 0; i < blockSize - 1; i++) 478 for (i = 0; i < blockSize - 1; i++)
396 Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]; 479 Groups[i] = counters[((unsigned)data[i] << 8) | data[(size_t)i + 1]];
397 Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; 480 Groups[i] = counters[((unsigned)data[i] << 8) | data[0]];
481
482 {
483#define SET_Indices(a, b, i) \
484 { UInt32 c; \
485 a = (a << 8) | (b); \
486 c = counters[a]; \
487 Indices[c] = (UInt32)i++; \
488 counters[a] = c + 1; \
489 }
398 490
399 for (i = 0; i < blockSize - 1; i++) 491 size_t a = data[0];
400 Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i; 492 const Byte *data_ptr = data + 1;
401 Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; 493 i = 0;
402 494 if (blockSize >= 3)
403 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 495 {
496 blockSize -= 2;
497 do
498 {
499 size_t b;
500 b = data_ptr[0]; SET_Indices(a, b, i)
501 a = data_ptr[1]; SET_Indices(b, a, i)
502 data_ptr += 2;
503 }
504 while (i < blockSize);
505 blockSize += 2;
506 }
507 if (i < blockSize - 1)
404 { 508 {
509 SET_Indices(a, data[(size_t)i + 1], i)
510 a = (Byte)a;
511 }
512 SET_Indices(a, data[0], i)
513 }
514
515#ifndef BLOCK_SORT_EXTERNAL_FLAGS
516 {
405 UInt32 prev = 0; 517 UInt32 prev = 0;
406 for (i = 0; i < kNumHashValues; i++) 518 for (i = 0; i < kNumHashValues; i++)
407 { 519 {
408 UInt32 prevGroupSize = counters[i] - prev; 520 const UInt32 prevGroupSize = counters[i] - prev;
409 if (prevGroupSize == 0) 521 if (prevGroupSize == 0)
410 continue; 522 continue;
411 SetGroupSize(Indices + prev, prevGroupSize); 523 SetGroupSize(Indices + prev, prevGroupSize);
412 prev = counters[i]; 524 prev = counters[i];
413 } 525 }
414 }
415 #endif
416 } 526 }
527#endif
417 528
418 { 529 {
419 int NumRefBits; 530 unsigned NumRefBits;
420 UInt32 NumSortedBytes; 531 size_t NumSortedBytes;
421 for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); 532 for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++)
533 {}
422 NumRefBits = 32 - NumRefBits; 534 NumRefBits = 32 - NumRefBits;
423 if (NumRefBits > kNumRefBitsMax) 535 if (NumRefBits > kNumRefBitsMax)
424 NumRefBits = kNumRefBitsMax; 536 NumRefBits = kNumRefBitsMax;
425 537
426 for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) 538 for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
427 { 539 {
428 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 540#ifndef BLOCK_SORT_EXTERNAL_FLAGS
429 UInt32 finishedGroupSize = 0; 541 size_t finishedGroupSize = 0;
430 #endif 542#endif
431 UInt32 newLimit = 0; 543 size_t newLimit = 0;
432 for (i = 0; i < blockSize;) 544 for (i = 0; i < blockSize;)
433 { 545 {
434 UInt32 groupSize; 546 size_t groupSize;
435 #ifdef BLOCK_SORT_EXTERNAL_FLAGS 547#ifdef BLOCK_SORT_EXTERNAL_FLAGS
436 548
437 if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) 549 if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
438 { 550 {
@@ -441,56 +553,56 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
441 } 553 }
442 for (groupSize = 1; 554 for (groupSize = 1;
443 (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; 555 (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
444 groupSize++); 556 groupSize++)
445 557 {}
446 groupSize++; 558 groupSize++;
447 559
448 #else 560#else
449 561
450 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); 562 groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax;
451 { 563 {
452 BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); 564 const BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
453 if ((Indices[i] & 0x40000000) != 0) 565 if (Indices[i] & 0x40000000)
454 {
455 groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
456 Indices[(size_t)i + 1] &= kIndexMask;
457 }
458 Indices[i] &= kIndexMask;
459 groupSize++;
460 if (finishedGroup || groupSize == 1)
461 {
462 Indices[i - finishedGroupSize] &= kIndexMask;
463 if (finishedGroupSize > 1)
464 Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
465 { 566 {
466 UInt32 newGroupSize = groupSize + finishedGroupSize; 567 groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
467 SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) 568 Indices[(size_t)i + 1] &= kIndexMask;
468 finishedGroupSize = newGroupSize;
469 } 569 }
470 i += groupSize; 570 Indices[i] &= kIndexMask;
471 continue; 571 groupSize++;
472 } 572 if (finishedGroup || groupSize == 1)
473 finishedGroupSize = 0; 573 {
574 Indices[i - finishedGroupSize] &= kIndexMask;
575 if (finishedGroupSize > 1)
576 Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
577 {
578 const size_t newGroupSize = groupSize + finishedGroupSize;
579 SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
580 finishedGroupSize = newGroupSize;
581 }
582 i += groupSize;
583 continue;
584 }
585 finishedGroupSize = 0;
474 } 586 }
475 587
476 #endif 588#endif
477 589
478 if (NumSortedBytes >= blockSize) 590 if (NumSortedBytes >= blockSize)
479 { 591 {
480 UInt32 j; 592 size_t j;
481 for (j = 0; j < groupSize; j++) 593 for (j = 0; j < groupSize; j++)
482 { 594 {
483 UInt32 t = (i + j); 595 size_t t = i + j;
484 /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ 596 /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
485 Groups[Indices[t]] = t; 597 Groups[Indices[t]] = (UInt32)t;
486 } 598 }
487 } 599 }
488 else 600 else
489 if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices 601 if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
490 #ifndef BLOCK_SORT_USE_HEAP_SORT 602 #ifndef BLOCK_SORT_USE_HEAP_SORT
491 , 0, blockSize 603 , 0, blockSize
492 #endif 604 #endif
493 ) != 0) 605 ))
494 newLimit = i + groupSize; 606 newLimit = i + groupSize;
495 i += groupSize; 607 i += groupSize;
496 } 608 }
@@ -498,19 +610,19 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
498 break; 610 break;
499 } 611 }
500 } 612 }
501 #ifndef BLOCK_SORT_EXTERNAL_FLAGS 613#ifndef BLOCK_SORT_EXTERNAL_FLAGS
502 for (i = 0; i < blockSize;) 614 for (i = 0; i < blockSize;)
503 { 615 {
504 UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); 616 size_t groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax;
505 if ((Indices[i] & 0x40000000) != 0) 617 if (Indices[i] & 0x40000000)
506 { 618 {
507 groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); 619 groupSize += (Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits;
508 Indices[(size_t)i + 1] &= kIndexMask; 620 Indices[(size_t)i + 1] &= kIndexMask;
509 } 621 }
510 Indices[i] &= kIndexMask; 622 Indices[i] &= kIndexMask;
511 groupSize++; 623 groupSize++;
512 i += groupSize; 624 i += groupSize;
513 } 625 }
514 #endif 626#endif
515 return Groups[0]; 627 return Groups[0];
516} 628}