diff options
Diffstat (limited to '')
-rw-r--r-- | C/BwtSort.c | 468 |
1 files changed, 290 insertions, 178 deletions
diff --git a/C/BwtSort.c b/C/BwtSort.c index 05ad6de..8f64f9d 100644 --- a/C/BwtSort.c +++ b/C/BwtSort.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* BwtSort.c -- BWT block sorting | 1 | /* BwtSort.c -- BWT block sorting |
2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
@@ -7,6 +7,44 @@ | |||
7 | #include "Sort.h" | 7 | #include "Sort.h" |
8 | 8 | ||
9 | /* #define BLOCK_SORT_USE_HEAP_SORT */ | 9 | /* #define BLOCK_SORT_USE_HEAP_SORT */ |
10 | // #define BLOCK_SORT_USE_HEAP_SORT | ||
11 | |||
12 | #ifdef BLOCK_SORT_USE_HEAP_SORT | ||
13 | |||
14 | #define HeapSortRefDown(p, vals, n, size, temp) \ | ||
15 | { size_t k = n; UInt32 val = vals[temp]; for (;;) { \ | ||
16 | size_t s = k << 1; \ | ||
17 | if (s > size) break; \ | ||
18 | if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \ | ||
19 | if (val >= vals[p[s]]) break; \ | ||
20 | p[k] = p[s]; k = s; \ | ||
21 | } p[k] = temp; } | ||
22 | |||
23 | void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size) | ||
24 | { | ||
25 | if (size <= 1) | ||
26 | return; | ||
27 | p--; | ||
28 | { | ||
29 | size_t i = size / 2; | ||
30 | do | ||
31 | { | ||
32 | UInt32 temp = p[i]; | ||
33 | HeapSortRefDown(p, vals, i, size, temp); | ||
34 | } | ||
35 | while (--i != 0); | ||
36 | } | ||
37 | do | ||
38 | { | ||
39 | UInt32 temp = p[size]; | ||
40 | p[size--] = p[1]; | ||
41 | HeapSortRefDown(p, vals, 1, size, temp); | ||
42 | } | ||
43 | while (size > 1); | ||
44 | } | ||
45 | |||
46 | #endif // BLOCK_SORT_USE_HEAP_SORT | ||
47 | |||
10 | 48 | ||
11 | /* Don't change it !!! */ | 49 | /* Don't change it !!! */ |
12 | #define kNumHashBytes 2 | 50 | #define kNumHashBytes 2 |
@@ -27,26 +65,27 @@ | |||
27 | 65 | ||
28 | #else | 66 | #else |
29 | 67 | ||
30 | #define kNumBitsMax 20 | 68 | #define kNumBitsMax 20 |
31 | #define kIndexMask ((1 << kNumBitsMax) - 1) | 69 | #define kIndexMask (((UInt32)1 << kNumBitsMax) - 1) |
32 | #define kNumExtraBits (32 - kNumBitsMax) | 70 | #define kNumExtraBits (32 - kNumBitsMax) |
33 | #define kNumExtra0Bits (kNumExtraBits - 2) | 71 | #define kNumExtra0Bits (kNumExtraBits - 2) |
34 | #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) | 72 | #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) |
35 | 73 | ||
36 | #define SetFinishedGroupSize(p, size) \ | 74 | #define SetFinishedGroupSize(p, size) \ |
37 | { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ | 75 | { *(p) |= ((((UInt32)(size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ |
38 | if ((size) > (1 << kNumExtra0Bits)) { \ | 76 | if ((size) > (1 << kNumExtra0Bits)) { \ |
39 | *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ | 77 | *(p) |= 0x40000000; \ |
78 | *((p) + 1) |= (((UInt32)(size) - 1) >> kNumExtra0Bits) << kNumBitsMax; } } \ | ||
40 | 79 | ||
41 | static void SetGroupSize(UInt32 *p, UInt32 size) | 80 | static void SetGroupSize(UInt32 *p, size_t size) |
42 | { | 81 | { |
43 | if (--size == 0) | 82 | if (--size == 0) |
44 | return; | 83 | return; |
45 | *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); | 84 | *p |= 0x80000000 | (((UInt32)size & kNumExtra0Mask) << kNumBitsMax); |
46 | if (size >= (1 << kNumExtra0Bits)) | 85 | if (size >= (1 << kNumExtra0Bits)) |
47 | { | 86 | { |
48 | *p |= 0x40000000; | 87 | *p |= 0x40000000; |
49 | p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); | 88 | p[1] |= (((UInt32)size >> kNumExtra0Bits) << kNumBitsMax); |
50 | } | 89 | } |
51 | } | 90 | } |
52 | 91 | ||
@@ -59,12 +98,14 @@ returns: 1 - if there are groups, 0 - no more groups | |||
59 | */ | 98 | */ |
60 | 99 | ||
61 | static | 100 | static |
62 | UInt32 | 101 | unsigned |
63 | Z7_FASTCALL | 102 | Z7_FASTCALL |
64 | SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices | 103 | SortGroup(size_t BlockSize, size_t NumSortedBytes, |
65 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 104 | size_t groupOffset, size_t groupSize, |
66 | , UInt32 left, UInt32 range | 105 | unsigned NumRefBits, UInt32 *Indices |
67 | #endif | 106 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
107 | , size_t left, size_t range | ||
108 | #endif | ||
68 | ) | 109 | ) |
69 | { | 110 | { |
70 | UInt32 *ind2 = Indices + groupOffset; | 111 | UInt32 *ind2 = Indices + groupOffset; |
@@ -79,90 +120,93 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
79 | return 0; | 120 | return 0; |
80 | } | 121 | } |
81 | Groups = Indices + BlockSize + BS_TEMP_SIZE; | 122 | Groups = Indices + BlockSize + BS_TEMP_SIZE; |
82 | if (groupSize <= ((UInt32)1 << NumRefBits) | 123 | if (groupSize <= ((size_t)1 << NumRefBits) |
83 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 124 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
84 | && groupSize <= range | 125 | && groupSize <= range |
85 | #endif | 126 | #endif |
86 | ) | 127 | ) |
87 | { | 128 | { |
88 | UInt32 *temp = Indices + BlockSize; | 129 | UInt32 *temp = Indices + BlockSize; |
89 | UInt32 j; | 130 | size_t j, group; |
90 | UInt32 mask, thereAreGroups, group, cg; | 131 | UInt32 mask, cg; |
132 | unsigned thereAreGroups; | ||
91 | { | 133 | { |
92 | UInt32 gPrev; | 134 | UInt32 gPrev; |
93 | UInt32 gRes = 0; | 135 | UInt32 gRes = 0; |
94 | { | 136 | { |
95 | UInt32 sp = ind2[0] + NumSortedBytes; | 137 | size_t sp = ind2[0] + NumSortedBytes; |
96 | if (sp >= BlockSize) sp -= BlockSize; | 138 | if (sp >= BlockSize) |
139 | sp -= BlockSize; | ||
97 | gPrev = Groups[sp]; | 140 | gPrev = Groups[sp]; |
98 | temp[0] = (gPrev << NumRefBits); | 141 | temp[0] = gPrev << NumRefBits; |
99 | } | 142 | } |
100 | 143 | ||
101 | for (j = 1; j < groupSize; j++) | 144 | for (j = 1; j < groupSize; j++) |
102 | { | 145 | { |
103 | UInt32 sp = ind2[j] + NumSortedBytes; | 146 | size_t sp = ind2[j] + NumSortedBytes; |
104 | UInt32 g; | 147 | UInt32 g; |
105 | if (sp >= BlockSize) sp -= BlockSize; | 148 | if (sp >= BlockSize) |
149 | sp -= BlockSize; | ||
106 | g = Groups[sp]; | 150 | g = Groups[sp]; |
107 | temp[j] = (g << NumRefBits) | j; | 151 | temp[j] = (g << NumRefBits) | (UInt32)j; |
108 | gRes |= (gPrev ^ g); | 152 | gRes |= (gPrev ^ g); |
109 | } | 153 | } |
110 | if (gRes == 0) | 154 | if (gRes == 0) |
111 | { | 155 | { |
112 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 156 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
113 | SetGroupSize(ind2, groupSize); | 157 | SetGroupSize(ind2, groupSize); |
114 | #endif | 158 | #endif |
115 | return 1; | 159 | return 1; |
116 | } | 160 | } |
117 | } | 161 | } |
118 | 162 | ||
119 | HeapSort(temp, groupSize); | 163 | HeapSort(temp, groupSize); |
120 | mask = (((UInt32)1 << NumRefBits) - 1); | 164 | mask = ((UInt32)1 << NumRefBits) - 1; |
121 | thereAreGroups = 0; | 165 | thereAreGroups = 0; |
122 | 166 | ||
123 | group = groupOffset; | 167 | group = groupOffset; |
124 | cg = (temp[0] >> NumRefBits); | 168 | cg = temp[0] >> NumRefBits; |
125 | temp[0] = ind2[temp[0] & mask]; | 169 | temp[0] = ind2[temp[0] & mask]; |
126 | 170 | ||
127 | { | 171 | { |
128 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 172 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
129 | UInt32 *Flags = Groups + BlockSize; | 173 | UInt32 *Flags = Groups + BlockSize; |
130 | #else | 174 | #else |
131 | UInt32 prevGroupStart = 0; | 175 | size_t prevGroupStart = 0; |
132 | #endif | 176 | #endif |
133 | 177 | ||
134 | for (j = 1; j < groupSize; j++) | 178 | for (j = 1; j < groupSize; j++) |
135 | { | 179 | { |
136 | UInt32 val = temp[j]; | 180 | const UInt32 val = temp[j]; |
137 | UInt32 cgCur = (val >> NumRefBits); | 181 | const UInt32 cgCur = val >> NumRefBits; |
138 | 182 | ||
139 | if (cgCur != cg) | 183 | if (cgCur != cg) |
140 | { | 184 | { |
141 | cg = cgCur; | 185 | cg = cgCur; |
142 | group = groupOffset + j; | 186 | group = groupOffset + j; |
143 | 187 | ||
144 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 188 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
145 | { | 189 | { |
146 | UInt32 t = group - 1; | 190 | const size_t t = group - 1; |
147 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | 191 | Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); |
148 | } | 192 | } |
149 | #else | 193 | #else |
150 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | 194 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); |
151 | prevGroupStart = j; | 195 | prevGroupStart = j; |
152 | #endif | 196 | #endif |
153 | } | 197 | } |
154 | else | 198 | else |
155 | thereAreGroups = 1; | 199 | thereAreGroups = 1; |
156 | { | 200 | { |
157 | UInt32 ind = ind2[val & mask]; | 201 | const UInt32 ind = ind2[val & mask]; |
158 | temp[j] = ind; | 202 | temp[j] = ind; |
159 | Groups[ind] = group; | 203 | Groups[ind] = (UInt32)group; |
160 | } | 204 | } |
161 | } | 205 | } |
162 | 206 | ||
163 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 207 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
164 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | 208 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); |
165 | #endif | 209 | #endif |
166 | } | 210 | } |
167 | 211 | ||
168 | for (j = 0; j < groupSize; j++) | 212 | for (j = 0; j < groupSize; j++) |
@@ -172,37 +216,42 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
172 | 216 | ||
173 | /* Check that all strings are in one group (cannot sort) */ | 217 | /* Check that all strings are in one group (cannot sort) */ |
174 | { | 218 | { |
175 | UInt32 group, j; | 219 | UInt32 group; |
176 | UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 220 | size_t j; |
221 | size_t sp = ind2[0] + NumSortedBytes; | ||
222 | if (sp >= BlockSize) | ||
223 | sp -= BlockSize; | ||
177 | group = Groups[sp]; | 224 | group = Groups[sp]; |
178 | for (j = 1; j < groupSize; j++) | 225 | for (j = 1; j < groupSize; j++) |
179 | { | 226 | { |
180 | sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 227 | sp = ind2[j] + NumSortedBytes; |
228 | if (sp >= BlockSize) | ||
229 | sp -= BlockSize; | ||
181 | if (Groups[sp] != group) | 230 | if (Groups[sp] != group) |
182 | break; | 231 | break; |
183 | } | 232 | } |
184 | if (j == groupSize) | 233 | if (j == groupSize) |
185 | { | 234 | { |
186 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 235 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
187 | SetGroupSize(ind2, groupSize); | 236 | SetGroupSize(ind2, groupSize); |
188 | #endif | 237 | #endif |
189 | return 1; | 238 | return 1; |
190 | } | 239 | } |
191 | } | 240 | } |
192 | 241 | ||
193 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 242 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
194 | { | 243 | { |
195 | /* ---------- Range Sort ---------- */ | 244 | /* ---------- Range Sort ---------- */ |
196 | UInt32 i; | 245 | size_t i; |
197 | UInt32 mid; | 246 | size_t mid; |
198 | for (;;) | 247 | for (;;) |
199 | { | 248 | { |
200 | UInt32 j; | 249 | size_t j; |
201 | if (range <= 1) | 250 | if (range <= 1) |
202 | { | 251 | { |
203 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 252 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
204 | SetGroupSize(ind2, groupSize); | 253 | SetGroupSize(ind2, groupSize); |
205 | #endif | 254 | #endif |
206 | return 1; | 255 | return 1; |
207 | } | 256 | } |
208 | mid = left + ((range + 1) >> 1); | 257 | mid = left + ((range + 1) >> 1); |
@@ -210,7 +259,7 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
210 | i = 0; | 259 | i = 0; |
211 | do | 260 | do |
212 | { | 261 | { |
213 | UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 262 | size_t sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
214 | if (Groups[sp] >= mid) | 263 | if (Groups[sp] >= mid) |
215 | { | 264 | { |
216 | for (j--; j > i; j--) | 265 | for (j--; j > i; j--) |
@@ -238,51 +287,53 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
238 | break; | 287 | break; |
239 | } | 288 | } |
240 | 289 | ||
241 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 290 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
242 | { | 291 | { |
243 | UInt32 t = (groupOffset + i - 1); | 292 | const size_t t = groupOffset + i - 1; |
244 | UInt32 *Flags = Groups + BlockSize; | 293 | UInt32 *Flags = Groups + BlockSize; |
245 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | 294 | Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); |
246 | } | 295 | } |
247 | #endif | 296 | #endif |
248 | 297 | ||
249 | { | 298 | { |
250 | UInt32 j; | 299 | size_t j; |
251 | for (j = i; j < groupSize; j++) | 300 | for (j = i; j < groupSize; j++) |
252 | Groups[ind2[j]] = groupOffset + i; | 301 | Groups[ind2[j]] = (UInt32)(groupOffset + i); |
253 | } | 302 | } |
254 | 303 | ||
255 | { | 304 | { |
256 | UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); | 305 | unsigned res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); |
257 | return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); | 306 | return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); |
258 | } | 307 | } |
259 | 308 | ||
260 | } | 309 | } |
261 | 310 | ||
262 | #else | 311 | #else // BLOCK_SORT_USE_HEAP_SORT |
263 | 312 | ||
264 | /* ---------- Heap Sort ---------- */ | 313 | /* ---------- Heap Sort ---------- */ |
265 | 314 | ||
266 | { | 315 | { |
267 | UInt32 j; | 316 | size_t j; |
268 | for (j = 0; j < groupSize; j++) | 317 | for (j = 0; j < groupSize; j++) |
269 | { | 318 | { |
270 | UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 319 | size_t sp = ind2[j] + NumSortedBytes; |
271 | ind2[j] = sp; | 320 | if (sp >= BlockSize) |
321 | sp -= BlockSize; | ||
322 | ind2[j] = (UInt32)sp; | ||
272 | } | 323 | } |
273 | 324 | ||
274 | HeapSortRef(ind2, Groups, groupSize); | 325 | HeapSortRef(ind2, Groups, groupSize); |
275 | 326 | ||
276 | /* Write Flags */ | 327 | /* Write Flags */ |
277 | { | 328 | { |
278 | UInt32 sp = ind2[0]; | 329 | size_t sp = ind2[0]; |
279 | UInt32 group = Groups[sp]; | 330 | UInt32 group = Groups[sp]; |
280 | 331 | ||
281 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 332 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
282 | UInt32 *Flags = Groups + BlockSize; | 333 | UInt32 *Flags = Groups + BlockSize; |
283 | #else | 334 | #else |
284 | UInt32 prevGroupStart = 0; | 335 | size_t prevGroupStart = 0; |
285 | #endif | 336 | #endif |
286 | 337 | ||
287 | for (j = 1; j < groupSize; j++) | 338 | for (j = 1; j < groupSize; j++) |
288 | { | 339 | { |
@@ -290,149 +341,210 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
290 | if (Groups[sp] != group) | 341 | if (Groups[sp] != group) |
291 | { | 342 | { |
292 | group = Groups[sp]; | 343 | group = Groups[sp]; |
293 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 344 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
294 | { | 345 | { |
295 | UInt32 t = groupOffset + j - 1; | 346 | const size_t t = groupOffset + j - 1; |
296 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | 347 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); |
297 | } | 348 | } |
298 | #else | 349 | #else |
299 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | 350 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); |
300 | prevGroupStart = j; | 351 | prevGroupStart = j; |
301 | #endif | 352 | #endif |
302 | } | 353 | } |
303 | } | 354 | } |
304 | 355 | ||
305 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 356 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
306 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | 357 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); |
307 | #endif | 358 | #endif |
308 | } | 359 | } |
309 | { | 360 | { |
310 | /* Write new Groups values and Check that there are groups */ | 361 | /* Write new Groups values and Check that there are groups */ |
311 | UInt32 thereAreGroups = 0; | 362 | unsigned thereAreGroups = 0; |
312 | for (j = 0; j < groupSize; j++) | 363 | for (j = 0; j < groupSize; j++) |
313 | { | 364 | { |
314 | UInt32 group = groupOffset + j; | 365 | size_t group = groupOffset + j; |
315 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 366 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
316 | UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); | 367 | UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); |
317 | if ((ind2[j] & 0x40000000) != 0) | 368 | if (ind2[j] & 0x40000000) |
318 | subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); | 369 | subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); |
319 | subGroupSize++; | 370 | subGroupSize++; |
320 | for (;;) | 371 | for (;;) |
321 | { | 372 | { |
322 | UInt32 original = ind2[j]; | 373 | const UInt32 original = ind2[j]; |
323 | UInt32 sp = original & kIndexMask; | 374 | size_t sp = original & kIndexMask; |
324 | if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | 375 | if (sp < NumSortedBytes) |
325 | ind2[j] = sp | (original & ~kIndexMask); | 376 | sp += BlockSize; |
326 | Groups[sp] = group; | 377 | sp -= NumSortedBytes; |
378 | ind2[j] = (UInt32)sp | (original & ~kIndexMask); | ||
379 | Groups[sp] = (UInt32)group; | ||
327 | if (--subGroupSize == 0) | 380 | if (--subGroupSize == 0) |
328 | break; | 381 | break; |
329 | j++; | 382 | j++; |
330 | thereAreGroups = 1; | 383 | thereAreGroups = 1; |
331 | } | 384 | } |
332 | #else | 385 | #else |
333 | UInt32 *Flags = Groups + BlockSize; | 386 | UInt32 *Flags = Groups + BlockSize; |
334 | for (;;) | 387 | for (;;) |
335 | { | 388 | { |
336 | UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | 389 | size_t sp = ind2[j]; |
337 | ind2[j] = sp; | 390 | if (sp < NumSortedBytes) |
338 | Groups[sp] = group; | 391 | sp += BlockSize; |
392 | sp -= NumSortedBytes; | ||
393 | ind2[j] = (UInt32)sp; | ||
394 | Groups[sp] = (UInt32)group; | ||
339 | if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) | 395 | if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) |
340 | break; | 396 | break; |
341 | j++; | 397 | j++; |
342 | thereAreGroups = 1; | 398 | thereAreGroups = 1; |
343 | } | 399 | } |
344 | #endif | 400 | #endif |
345 | } | 401 | } |
346 | return thereAreGroups; | 402 | return thereAreGroups; |
347 | } | 403 | } |
348 | } | 404 | } |
349 | #endif | 405 | #endif // BLOCK_SORT_USE_HEAP_SORT |
350 | } | 406 | } |
351 | 407 | ||
408 | |||
352 | /* conditions: blockSize > 0 */ | 409 | /* conditions: blockSize > 0 */ |
353 | UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | 410 | UInt32 BlockSort(UInt32 *Indices, const Byte *data, size_t blockSize) |
354 | { | 411 | { |
355 | UInt32 *counters = Indices + blockSize; | 412 | UInt32 *counters = Indices + blockSize; |
356 | UInt32 i; | 413 | size_t i; |
357 | UInt32 *Groups; | 414 | UInt32 *Groups; |
358 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 415 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
359 | UInt32 *Flags; | 416 | UInt32 *Flags; |
360 | #endif | 417 | #endif |
361 | 418 | ||
362 | /* Radix-Sort for 2 bytes */ | 419 | /* Radix-Sort for 2 bytes */ |
420 | // { UInt32 yyy; for (yyy = 0; yyy < 100; yyy++) { | ||
363 | for (i = 0; i < kNumHashValues; i++) | 421 | for (i = 0; i < kNumHashValues; i++) |
364 | counters[i] = 0; | 422 | counters[i] = 0; |
365 | for (i = 0; i < blockSize - 1; i++) | 423 | { |
366 | counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++; | 424 | const Byte *data2 = data; |
367 | counters[((UInt32)data[i] << 8) | data[0]]++; | 425 | size_t a = data[(size_t)blockSize - 1]; |
426 | const Byte *data_lim = data + blockSize; | ||
427 | if (blockSize >= 4) | ||
428 | { | ||
429 | data_lim -= 3; | ||
430 | do | ||
431 | { | ||
432 | size_t b; | ||
433 | b = data2[0]; counters[(a << 8) | b]++; | ||
434 | a = data2[1]; counters[(b << 8) | a]++; | ||
435 | b = data2[2]; counters[(a << 8) | b]++; | ||
436 | a = data2[3]; counters[(b << 8) | a]++; | ||
437 | data2 += 4; | ||
438 | } | ||
439 | while (data2 < data_lim); | ||
440 | data_lim += 3; | ||
441 | } | ||
442 | while (data2 != data_lim) | ||
443 | { | ||
444 | size_t b = *data2++; | ||
445 | counters[(a << 8) | b]++; | ||
446 | a = b; | ||
447 | } | ||
448 | } | ||
449 | // }} | ||
368 | 450 | ||
369 | Groups = counters + BS_TEMP_SIZE; | 451 | Groups = counters + BS_TEMP_SIZE; |
370 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 452 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
371 | Flags = Groups + blockSize; | 453 | Flags = Groups + blockSize; |
372 | { | 454 | { |
373 | UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; | 455 | const size_t numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; |
374 | for (i = 0; i < numWords; i++) | 456 | for (i = 0; i < numWords; i++) |
375 | Flags[i] = kAllFlags; | 457 | Flags[i] = kAllFlags; |
376 | } | 458 | } |
377 | #endif | 459 | #endif |
378 | 460 | ||
379 | { | 461 | { |
380 | UInt32 sum = 0; | 462 | UInt32 sum = 0; |
381 | for (i = 0; i < kNumHashValues; i++) | 463 | for (i = 0; i < kNumHashValues; i++) |
382 | { | 464 | { |
383 | UInt32 groupSize = counters[i]; | 465 | const UInt32 groupSize = counters[i]; |
384 | if (groupSize > 0) | 466 | counters[i] = sum; |
467 | sum += groupSize; | ||
468 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
469 | if (groupSize) | ||
385 | { | 470 | { |
386 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 471 | const UInt32 t = sum - 1; |
387 | UInt32 t = sum + groupSize - 1; | 472 | Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); |
388 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | ||
389 | #endif | ||
390 | sum += groupSize; | ||
391 | } | 473 | } |
392 | counters[i] = sum - groupSize; | 474 | #endif |
393 | } | 475 | } |
476 | } | ||
394 | 477 | ||
395 | for (i = 0; i < blockSize - 1; i++) | 478 | for (i = 0; i < blockSize - 1; i++) |
396 | Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]; | 479 | Groups[i] = counters[((unsigned)data[i] << 8) | data[(size_t)i + 1]]; |
397 | Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; | 480 | Groups[i] = counters[((unsigned)data[i] << 8) | data[0]]; |
481 | |||
482 | { | ||
483 | #define SET_Indices(a, b, i) \ | ||
484 | { UInt32 c; \ | ||
485 | a = (a << 8) | (b); \ | ||
486 | c = counters[a]; \ | ||
487 | Indices[c] = (UInt32)i++; \ | ||
488 | counters[a] = c + 1; \ | ||
489 | } | ||
398 | 490 | ||
399 | for (i = 0; i < blockSize - 1; i++) | 491 | size_t a = data[0]; |
400 | Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i; | 492 | const Byte *data_ptr = data + 1; |
401 | Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; | 493 | i = 0; |
402 | 494 | if (blockSize >= 3) | |
403 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 495 | { |
496 | blockSize -= 2; | ||
497 | do | ||
498 | { | ||
499 | size_t b; | ||
500 | b = data_ptr[0]; SET_Indices(a, b, i) | ||
501 | a = data_ptr[1]; SET_Indices(b, a, i) | ||
502 | data_ptr += 2; | ||
503 | } | ||
504 | while (i < blockSize); | ||
505 | blockSize += 2; | ||
506 | } | ||
507 | if (i < blockSize - 1) | ||
404 | { | 508 | { |
509 | SET_Indices(a, data[(size_t)i + 1], i) | ||
510 | a = (Byte)a; | ||
511 | } | ||
512 | SET_Indices(a, data[0], i) | ||
513 | } | ||
514 | |||
515 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
516 | { | ||
405 | UInt32 prev = 0; | 517 | UInt32 prev = 0; |
406 | for (i = 0; i < kNumHashValues; i++) | 518 | for (i = 0; i < kNumHashValues; i++) |
407 | { | 519 | { |
408 | UInt32 prevGroupSize = counters[i] - prev; | 520 | const UInt32 prevGroupSize = counters[i] - prev; |
409 | if (prevGroupSize == 0) | 521 | if (prevGroupSize == 0) |
410 | continue; | 522 | continue; |
411 | SetGroupSize(Indices + prev, prevGroupSize); | 523 | SetGroupSize(Indices + prev, prevGroupSize); |
412 | prev = counters[i]; | 524 | prev = counters[i]; |
413 | } | 525 | } |
414 | } | ||
415 | #endif | ||
416 | } | 526 | } |
527 | #endif | ||
417 | 528 | ||
418 | { | 529 | { |
419 | int NumRefBits; | 530 | unsigned NumRefBits; |
420 | UInt32 NumSortedBytes; | 531 | size_t NumSortedBytes; |
421 | for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); | 532 | for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++) |
533 | {} | ||
422 | NumRefBits = 32 - NumRefBits; | 534 | NumRefBits = 32 - NumRefBits; |
423 | if (NumRefBits > kNumRefBitsMax) | 535 | if (NumRefBits > kNumRefBitsMax) |
424 | NumRefBits = kNumRefBitsMax; | 536 | NumRefBits = kNumRefBitsMax; |
425 | 537 | ||
426 | for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) | 538 | for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) |
427 | { | 539 | { |
428 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 540 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
429 | UInt32 finishedGroupSize = 0; | 541 | size_t finishedGroupSize = 0; |
430 | #endif | 542 | #endif |
431 | UInt32 newLimit = 0; | 543 | size_t newLimit = 0; |
432 | for (i = 0; i < blockSize;) | 544 | for (i = 0; i < blockSize;) |
433 | { | 545 | { |
434 | UInt32 groupSize; | 546 | size_t groupSize; |
435 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 547 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
436 | 548 | ||
437 | if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) | 549 | if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) |
438 | { | 550 | { |
@@ -441,56 +553,56 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | |||
441 | } | 553 | } |
442 | for (groupSize = 1; | 554 | for (groupSize = 1; |
443 | (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; | 555 | (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; |
444 | groupSize++); | 556 | groupSize++) |
445 | 557 | {} | |
446 | groupSize++; | 558 | groupSize++; |
447 | 559 | ||
448 | #else | 560 | #else |
449 | 561 | ||
450 | groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | 562 | groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax; |
451 | { | 563 | { |
452 | BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); | 564 | const BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); |
453 | if ((Indices[i] & 0x40000000) != 0) | 565 | if (Indices[i] & 0x40000000) |
454 | { | ||
455 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); | ||
456 | Indices[(size_t)i + 1] &= kIndexMask; | ||
457 | } | ||
458 | Indices[i] &= kIndexMask; | ||
459 | groupSize++; | ||
460 | if (finishedGroup || groupSize == 1) | ||
461 | { | ||
462 | Indices[i - finishedGroupSize] &= kIndexMask; | ||
463 | if (finishedGroupSize > 1) | ||
464 | Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; | ||
465 | { | 566 | { |
466 | UInt32 newGroupSize = groupSize + finishedGroupSize; | 567 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); |
467 | SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) | 568 | Indices[(size_t)i + 1] &= kIndexMask; |
468 | finishedGroupSize = newGroupSize; | ||
469 | } | 569 | } |
470 | i += groupSize; | 570 | Indices[i] &= kIndexMask; |
471 | continue; | 571 | groupSize++; |
472 | } | 572 | if (finishedGroup || groupSize == 1) |
473 | finishedGroupSize = 0; | 573 | { |
574 | Indices[i - finishedGroupSize] &= kIndexMask; | ||
575 | if (finishedGroupSize > 1) | ||
576 | Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; | ||
577 | { | ||
578 | const size_t newGroupSize = groupSize + finishedGroupSize; | ||
579 | SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) | ||
580 | finishedGroupSize = newGroupSize; | ||
581 | } | ||
582 | i += groupSize; | ||
583 | continue; | ||
584 | } | ||
585 | finishedGroupSize = 0; | ||
474 | } | 586 | } |
475 | 587 | ||
476 | #endif | 588 | #endif |
477 | 589 | ||
478 | if (NumSortedBytes >= blockSize) | 590 | if (NumSortedBytes >= blockSize) |
479 | { | 591 | { |
480 | UInt32 j; | 592 | size_t j; |
481 | for (j = 0; j < groupSize; j++) | 593 | for (j = 0; j < groupSize; j++) |
482 | { | 594 | { |
483 | UInt32 t = (i + j); | 595 | size_t t = i + j; |
484 | /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ | 596 | /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ |
485 | Groups[Indices[t]] = t; | 597 | Groups[Indices[t]] = (UInt32)t; |
486 | } | 598 | } |
487 | } | 599 | } |
488 | else | 600 | else |
489 | if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices | 601 | if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices |
490 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 602 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
491 | , 0, blockSize | 603 | , 0, blockSize |
492 | #endif | 604 | #endif |
493 | ) != 0) | 605 | )) |
494 | newLimit = i + groupSize; | 606 | newLimit = i + groupSize; |
495 | i += groupSize; | 607 | i += groupSize; |
496 | } | 608 | } |
@@ -498,19 +610,19 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | |||
498 | break; | 610 | break; |
499 | } | 611 | } |
500 | } | 612 | } |
501 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 613 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
502 | for (i = 0; i < blockSize;) | 614 | for (i = 0; i < blockSize;) |
503 | { | 615 | { |
504 | UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | 616 | size_t groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax; |
505 | if ((Indices[i] & 0x40000000) != 0) | 617 | if (Indices[i] & 0x40000000) |
506 | { | 618 | { |
507 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); | 619 | groupSize += (Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits; |
508 | Indices[(size_t)i + 1] &= kIndexMask; | 620 | Indices[(size_t)i + 1] &= kIndexMask; |
509 | } | 621 | } |
510 | Indices[i] &= kIndexMask; | 622 | Indices[i] &= kIndexMask; |
511 | groupSize++; | 623 | groupSize++; |
512 | i += groupSize; | 624 | i += groupSize; |
513 | } | 625 | } |
514 | #endif | 626 | #endif |
515 | return Groups[0]; | 627 | return Groups[0]; |
516 | } | 628 | } |