diff options
Diffstat (limited to 'C/BwtSort.c')
-rw-r--r-- | C/BwtSort.c | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/C/BwtSort.c b/C/BwtSort.c new file mode 100644 index 0000000..3eb57ef --- /dev/null +++ b/C/BwtSort.c | |||
@@ -0,0 +1,515 @@ | |||
1 | /* BwtSort.c -- BWT block sorting | ||
2 | 2021-04-01 : Igor Pavlov : Public domain */ | ||
3 | |||
4 | #include "Precomp.h" | ||
5 | |||
6 | #include "BwtSort.h" | ||
7 | #include "Sort.h" | ||
8 | |||
9 | /* #define BLOCK_SORT_USE_HEAP_SORT */ | ||
10 | |||
11 | #define NO_INLINE MY_FAST_CALL | ||
12 | |||
13 | /* Don't change it !!! */ | ||
14 | #define kNumHashBytes 2 | ||
15 | #define kNumHashValues (1 << (kNumHashBytes * 8)) | ||
16 | |||
17 | /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */ | ||
18 | #define kNumRefBitsMax 12 | ||
19 | |||
20 | #define BS_TEMP_SIZE kNumHashValues | ||
21 | |||
22 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
23 | |||
24 | /* 32 Flags in UInt32 word */ | ||
25 | #define kNumFlagsBits 5 | ||
26 | #define kNumFlagsInWord (1 << kNumFlagsBits) | ||
27 | #define kFlagsMask (kNumFlagsInWord - 1) | ||
28 | #define kAllFlags 0xFFFFFFFF | ||
29 | |||
30 | #else | ||
31 | |||
32 | #define kNumBitsMax 20 | ||
33 | #define kIndexMask ((1 << kNumBitsMax) - 1) | ||
34 | #define kNumExtraBits (32 - kNumBitsMax) | ||
35 | #define kNumExtra0Bits (kNumExtraBits - 2) | ||
36 | #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) | ||
37 | |||
38 | #define SetFinishedGroupSize(p, size) \ | ||
39 | { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ | ||
40 | if ((size) > (1 << kNumExtra0Bits)) { \ | ||
41 | *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ | ||
42 | |||
43 | static void SetGroupSize(UInt32 *p, UInt32 size) | ||
44 | { | ||
45 | if (--size == 0) | ||
46 | return; | ||
47 | *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); | ||
48 | if (size >= (1 << kNumExtra0Bits)) | ||
49 | { | ||
50 | *p |= 0x40000000; | ||
51 | p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | #endif | ||
56 | |||
57 | /* | ||
58 | SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks | ||
59 | "range" is not real range. It's only for optimization. | ||
60 | returns: 1 - if there are groups, 0 - no more groups | ||
61 | */ | ||
62 | |||
63 | static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices | ||
64 | #ifndef BLOCK_SORT_USE_HEAP_SORT | ||
65 | , UInt32 left, UInt32 range | ||
66 | #endif | ||
67 | ) | ||
68 | { | ||
69 | UInt32 *ind2 = Indices + groupOffset; | ||
70 | UInt32 *Groups; | ||
71 | if (groupSize <= 1) | ||
72 | { | ||
73 | /* | ||
74 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
75 | SetFinishedGroupSize(ind2, 1); | ||
76 | #endif | ||
77 | */ | ||
78 | return 0; | ||
79 | } | ||
80 | Groups = Indices + BlockSize + BS_TEMP_SIZE; | ||
81 | if (groupSize <= ((UInt32)1 << NumRefBits) | ||
82 | #ifndef BLOCK_SORT_USE_HEAP_SORT | ||
83 | && groupSize <= range | ||
84 | #endif | ||
85 | ) | ||
86 | { | ||
87 | UInt32 *temp = Indices + BlockSize; | ||
88 | UInt32 j; | ||
89 | UInt32 mask, thereAreGroups, group, cg; | ||
90 | { | ||
91 | UInt32 gPrev; | ||
92 | UInt32 gRes = 0; | ||
93 | { | ||
94 | UInt32 sp = ind2[0] + NumSortedBytes; | ||
95 | if (sp >= BlockSize) sp -= BlockSize; | ||
96 | gPrev = Groups[sp]; | ||
97 | temp[0] = (gPrev << NumRefBits); | ||
98 | } | ||
99 | |||
100 | for (j = 1; j < groupSize; j++) | ||
101 | { | ||
102 | UInt32 sp = ind2[j] + NumSortedBytes; | ||
103 | UInt32 g; | ||
104 | if (sp >= BlockSize) sp -= BlockSize; | ||
105 | g = Groups[sp]; | ||
106 | temp[j] = (g << NumRefBits) | j; | ||
107 | gRes |= (gPrev ^ g); | ||
108 | } | ||
109 | if (gRes == 0) | ||
110 | { | ||
111 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
112 | SetGroupSize(ind2, groupSize); | ||
113 | #endif | ||
114 | return 1; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | HeapSort(temp, groupSize); | ||
119 | mask = (((UInt32)1 << NumRefBits) - 1); | ||
120 | thereAreGroups = 0; | ||
121 | |||
122 | group = groupOffset; | ||
123 | cg = (temp[0] >> NumRefBits); | ||
124 | temp[0] = ind2[temp[0] & mask]; | ||
125 | |||
126 | { | ||
127 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
128 | UInt32 *Flags = Groups + BlockSize; | ||
129 | #else | ||
130 | UInt32 prevGroupStart = 0; | ||
131 | #endif | ||
132 | |||
133 | for (j = 1; j < groupSize; j++) | ||
134 | { | ||
135 | UInt32 val = temp[j]; | ||
136 | UInt32 cgCur = (val >> NumRefBits); | ||
137 | |||
138 | if (cgCur != cg) | ||
139 | { | ||
140 | cg = cgCur; | ||
141 | group = groupOffset + j; | ||
142 | |||
143 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
144 | { | ||
145 | UInt32 t = group - 1; | ||
146 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | ||
147 | } | ||
148 | #else | ||
149 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | ||
150 | prevGroupStart = j; | ||
151 | #endif | ||
152 | } | ||
153 | else | ||
154 | thereAreGroups = 1; | ||
155 | { | ||
156 | UInt32 ind = ind2[val & mask]; | ||
157 | temp[j] = ind; | ||
158 | Groups[ind] = group; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
163 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | ||
164 | #endif | ||
165 | } | ||
166 | |||
167 | for (j = 0; j < groupSize; j++) | ||
168 | ind2[j] = temp[j]; | ||
169 | return thereAreGroups; | ||
170 | } | ||
171 | |||
172 | /* Check that all strings are in one group (cannot sort) */ | ||
173 | { | ||
174 | UInt32 group, j; | ||
175 | UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | ||
176 | group = Groups[sp]; | ||
177 | for (j = 1; j < groupSize; j++) | ||
178 | { | ||
179 | sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | ||
180 | if (Groups[sp] != group) | ||
181 | break; | ||
182 | } | ||
183 | if (j == groupSize) | ||
184 | { | ||
185 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
186 | SetGroupSize(ind2, groupSize); | ||
187 | #endif | ||
188 | return 1; | ||
189 | } | ||
190 | } | ||
191 | |||
192 | #ifndef BLOCK_SORT_USE_HEAP_SORT | ||
193 | { | ||
194 | /* ---------- Range Sort ---------- */ | ||
195 | UInt32 i; | ||
196 | UInt32 mid; | ||
197 | for (;;) | ||
198 | { | ||
199 | UInt32 j; | ||
200 | if (range <= 1) | ||
201 | { | ||
202 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
203 | SetGroupSize(ind2, groupSize); | ||
204 | #endif | ||
205 | return 1; | ||
206 | } | ||
207 | mid = left + ((range + 1) >> 1); | ||
208 | j = groupSize; | ||
209 | i = 0; | ||
210 | do | ||
211 | { | ||
212 | UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | ||
213 | if (Groups[sp] >= mid) | ||
214 | { | ||
215 | for (j--; j > i; j--) | ||
216 | { | ||
217 | sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | ||
218 | if (Groups[sp] < mid) | ||
219 | { | ||
220 | UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp; | ||
221 | break; | ||
222 | } | ||
223 | } | ||
224 | if (i >= j) | ||
225 | break; | ||
226 | } | ||
227 | } | ||
228 | while (++i < j); | ||
229 | if (i == 0) | ||
230 | { | ||
231 | range = range - (mid - left); | ||
232 | left = mid; | ||
233 | } | ||
234 | else if (i == groupSize) | ||
235 | range = (mid - left); | ||
236 | else | ||
237 | break; | ||
238 | } | ||
239 | |||
240 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
241 | { | ||
242 | UInt32 t = (groupOffset + i - 1); | ||
243 | UInt32 *Flags = Groups + BlockSize; | ||
244 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | ||
245 | } | ||
246 | #endif | ||
247 | |||
248 | { | ||
249 | UInt32 j; | ||
250 | for (j = i; j < groupSize; j++) | ||
251 | Groups[ind2[j]] = groupOffset + i; | ||
252 | } | ||
253 | |||
254 | { | ||
255 | UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); | ||
256 | return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); | ||
257 | } | ||
258 | |||
259 | } | ||
260 | |||
261 | #else | ||
262 | |||
263 | /* ---------- Heap Sort ---------- */ | ||
264 | |||
265 | { | ||
266 | UInt32 j; | ||
267 | for (j = 0; j < groupSize; j++) | ||
268 | { | ||
269 | UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | ||
270 | ind2[j] = sp; | ||
271 | } | ||
272 | |||
273 | HeapSortRef(ind2, Groups, groupSize); | ||
274 | |||
275 | /* Write Flags */ | ||
276 | { | ||
277 | UInt32 sp = ind2[0]; | ||
278 | UInt32 group = Groups[sp]; | ||
279 | |||
280 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
281 | UInt32 *Flags = Groups + BlockSize; | ||
282 | #else | ||
283 | UInt32 prevGroupStart = 0; | ||
284 | #endif | ||
285 | |||
286 | for (j = 1; j < groupSize; j++) | ||
287 | { | ||
288 | sp = ind2[j]; | ||
289 | if (Groups[sp] != group) | ||
290 | { | ||
291 | group = Groups[sp]; | ||
292 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
293 | { | ||
294 | UInt32 t = groupOffset + j - 1; | ||
295 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | ||
296 | } | ||
297 | #else | ||
298 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | ||
299 | prevGroupStart = j; | ||
300 | #endif | ||
301 | } | ||
302 | } | ||
303 | |||
304 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
305 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | ||
306 | #endif | ||
307 | } | ||
308 | { | ||
309 | /* Write new Groups values and Check that there are groups */ | ||
310 | UInt32 thereAreGroups = 0; | ||
311 | for (j = 0; j < groupSize; j++) | ||
312 | { | ||
313 | UInt32 group = groupOffset + j; | ||
314 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
315 | UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); | ||
316 | if ((ind2[j] & 0x40000000) != 0) | ||
317 | subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); | ||
318 | subGroupSize++; | ||
319 | for (;;) | ||
320 | { | ||
321 | UInt32 original = ind2[j]; | ||
322 | UInt32 sp = original & kIndexMask; | ||
323 | if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | ||
324 | ind2[j] = sp | (original & ~kIndexMask); | ||
325 | Groups[sp] = group; | ||
326 | if (--subGroupSize == 0) | ||
327 | break; | ||
328 | j++; | ||
329 | thereAreGroups = 1; | ||
330 | } | ||
331 | #else | ||
332 | UInt32 *Flags = Groups + BlockSize; | ||
333 | for (;;) | ||
334 | { | ||
335 | UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | ||
336 | ind2[j] = sp; | ||
337 | Groups[sp] = group; | ||
338 | if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) | ||
339 | break; | ||
340 | j++; | ||
341 | thereAreGroups = 1; | ||
342 | } | ||
343 | #endif | ||
344 | } | ||
345 | return thereAreGroups; | ||
346 | } | ||
347 | } | ||
348 | #endif | ||
349 | } | ||
350 | |||
351 | /* conditions: blockSize > 0 */ | ||
352 | UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | ||
353 | { | ||
354 | UInt32 *counters = Indices + blockSize; | ||
355 | UInt32 i; | ||
356 | UInt32 *Groups; | ||
357 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
358 | UInt32 *Flags; | ||
359 | #endif | ||
360 | |||
361 | /* Radix-Sort for 2 bytes */ | ||
362 | for (i = 0; i < kNumHashValues; i++) | ||
363 | counters[i] = 0; | ||
364 | for (i = 0; i < blockSize - 1; i++) | ||
365 | counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++; | ||
366 | counters[((UInt32)data[i] << 8) | data[0]]++; | ||
367 | |||
368 | Groups = counters + BS_TEMP_SIZE; | ||
369 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
370 | Flags = Groups + blockSize; | ||
371 | { | ||
372 | UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; | ||
373 | for (i = 0; i < numWords; i++) | ||
374 | Flags[i] = kAllFlags; | ||
375 | } | ||
376 | #endif | ||
377 | |||
378 | { | ||
379 | UInt32 sum = 0; | ||
380 | for (i = 0; i < kNumHashValues; i++) | ||
381 | { | ||
382 | UInt32 groupSize = counters[i]; | ||
383 | if (groupSize > 0) | ||
384 | { | ||
385 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
386 | UInt32 t = sum + groupSize - 1; | ||
387 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | ||
388 | #endif | ||
389 | sum += groupSize; | ||
390 | } | ||
391 | counters[i] = sum - groupSize; | ||
392 | } | ||
393 | |||
394 | for (i = 0; i < blockSize - 1; i++) | ||
395 | Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]; | ||
396 | Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; | ||
397 | |||
398 | for (i = 0; i < blockSize - 1; i++) | ||
399 | Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i; | ||
400 | Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; | ||
401 | |||
402 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
403 | { | ||
404 | UInt32 prev = 0; | ||
405 | for (i = 0; i < kNumHashValues; i++) | ||
406 | { | ||
407 | UInt32 prevGroupSize = counters[i] - prev; | ||
408 | if (prevGroupSize == 0) | ||
409 | continue; | ||
410 | SetGroupSize(Indices + prev, prevGroupSize); | ||
411 | prev = counters[i]; | ||
412 | } | ||
413 | } | ||
414 | #endif | ||
415 | } | ||
416 | |||
417 | { | ||
418 | int NumRefBits; | ||
419 | UInt32 NumSortedBytes; | ||
420 | for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); | ||
421 | NumRefBits = 32 - NumRefBits; | ||
422 | if (NumRefBits > kNumRefBitsMax) | ||
423 | NumRefBits = kNumRefBitsMax; | ||
424 | |||
425 | for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) | ||
426 | { | ||
427 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
428 | UInt32 finishedGroupSize = 0; | ||
429 | #endif | ||
430 | UInt32 newLimit = 0; | ||
431 | for (i = 0; i < blockSize;) | ||
432 | { | ||
433 | UInt32 groupSize; | ||
434 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
435 | |||
436 | if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) | ||
437 | { | ||
438 | i++; | ||
439 | continue; | ||
440 | } | ||
441 | for (groupSize = 1; | ||
442 | (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; | ||
443 | groupSize++); | ||
444 | |||
445 | groupSize++; | ||
446 | |||
447 | #else | ||
448 | |||
449 | groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | ||
450 | { | ||
451 | BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); | ||
452 | if ((Indices[i] & 0x40000000) != 0) | ||
453 | { | ||
454 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); | ||
455 | Indices[(size_t)i + 1] &= kIndexMask; | ||
456 | } | ||
457 | Indices[i] &= kIndexMask; | ||
458 | groupSize++; | ||
459 | if (finishedGroup || groupSize == 1) | ||
460 | { | ||
461 | Indices[i - finishedGroupSize] &= kIndexMask; | ||
462 | if (finishedGroupSize > 1) | ||
463 | Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; | ||
464 | { | ||
465 | UInt32 newGroupSize = groupSize + finishedGroupSize; | ||
466 | SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize); | ||
467 | finishedGroupSize = newGroupSize; | ||
468 | } | ||
469 | i += groupSize; | ||
470 | continue; | ||
471 | } | ||
472 | finishedGroupSize = 0; | ||
473 | } | ||
474 | |||
475 | #endif | ||
476 | |||
477 | if (NumSortedBytes >= blockSize) | ||
478 | { | ||
479 | UInt32 j; | ||
480 | for (j = 0; j < groupSize; j++) | ||
481 | { | ||
482 | UInt32 t = (i + j); | ||
483 | /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ | ||
484 | Groups[Indices[t]] = t; | ||
485 | } | ||
486 | } | ||
487 | else | ||
488 | if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices | ||
489 | #ifndef BLOCK_SORT_USE_HEAP_SORT | ||
490 | , 0, blockSize | ||
491 | #endif | ||
492 | ) != 0) | ||
493 | newLimit = i + groupSize; | ||
494 | i += groupSize; | ||
495 | } | ||
496 | if (newLimit == 0) | ||
497 | break; | ||
498 | } | ||
499 | } | ||
500 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
501 | for (i = 0; i < blockSize;) | ||
502 | { | ||
503 | UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | ||
504 | if ((Indices[i] & 0x40000000) != 0) | ||
505 | { | ||
506 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); | ||
507 | Indices[(size_t)i + 1] &= kIndexMask; | ||
508 | } | ||
509 | Indices[i] &= kIndexMask; | ||
510 | groupSize++; | ||
511 | i += groupSize; | ||
512 | } | ||
513 | #endif | ||
514 | return Groups[0]; | ||
515 | } | ||