diff options
Diffstat (limited to 'C/BwtSort.c')
| -rw-r--r-- | C/BwtSort.c | 468 |
1 files changed, 290 insertions, 178 deletions
diff --git a/C/BwtSort.c b/C/BwtSort.c index 05ad6de..8f64f9d 100644 --- a/C/BwtSort.c +++ b/C/BwtSort.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* BwtSort.c -- BWT block sorting | 1 | /* BwtSort.c -- BWT block sorting |
| 2 | 2023-04-02 : Igor Pavlov : Public domain */ | 2 | : Igor Pavlov : Public domain */ |
| 3 | 3 | ||
| 4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
| 5 | 5 | ||
| @@ -7,6 +7,44 @@ | |||
| 7 | #include "Sort.h" | 7 | #include "Sort.h" |
| 8 | 8 | ||
| 9 | /* #define BLOCK_SORT_USE_HEAP_SORT */ | 9 | /* #define BLOCK_SORT_USE_HEAP_SORT */ |
| 10 | // #define BLOCK_SORT_USE_HEAP_SORT | ||
| 11 | |||
| 12 | #ifdef BLOCK_SORT_USE_HEAP_SORT | ||
| 13 | |||
| 14 | #define HeapSortRefDown(p, vals, n, size, temp) \ | ||
| 15 | { size_t k = n; UInt32 val = vals[temp]; for (;;) { \ | ||
| 16 | size_t s = k << 1; \ | ||
| 17 | if (s > size) break; \ | ||
| 18 | if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \ | ||
| 19 | if (val >= vals[p[s]]) break; \ | ||
| 20 | p[k] = p[s]; k = s; \ | ||
| 21 | } p[k] = temp; } | ||
| 22 | |||
| 23 | void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size) | ||
| 24 | { | ||
| 25 | if (size <= 1) | ||
| 26 | return; | ||
| 27 | p--; | ||
| 28 | { | ||
| 29 | size_t i = size / 2; | ||
| 30 | do | ||
| 31 | { | ||
| 32 | UInt32 temp = p[i]; | ||
| 33 | HeapSortRefDown(p, vals, i, size, temp); | ||
| 34 | } | ||
| 35 | while (--i != 0); | ||
| 36 | } | ||
| 37 | do | ||
| 38 | { | ||
| 39 | UInt32 temp = p[size]; | ||
| 40 | p[size--] = p[1]; | ||
| 41 | HeapSortRefDown(p, vals, 1, size, temp); | ||
| 42 | } | ||
| 43 | while (size > 1); | ||
| 44 | } | ||
| 45 | |||
| 46 | #endif // BLOCK_SORT_USE_HEAP_SORT | ||
| 47 | |||
| 10 | 48 | ||
| 11 | /* Don't change it !!! */ | 49 | /* Don't change it !!! */ |
| 12 | #define kNumHashBytes 2 | 50 | #define kNumHashBytes 2 |
| @@ -27,26 +65,27 @@ | |||
| 27 | 65 | ||
| 28 | #else | 66 | #else |
| 29 | 67 | ||
| 30 | #define kNumBitsMax 20 | 68 | #define kNumBitsMax 20 |
| 31 | #define kIndexMask ((1 << kNumBitsMax) - 1) | 69 | #define kIndexMask (((UInt32)1 << kNumBitsMax) - 1) |
| 32 | #define kNumExtraBits (32 - kNumBitsMax) | 70 | #define kNumExtraBits (32 - kNumBitsMax) |
| 33 | #define kNumExtra0Bits (kNumExtraBits - 2) | 71 | #define kNumExtra0Bits (kNumExtraBits - 2) |
| 34 | #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) | 72 | #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) |
| 35 | 73 | ||
| 36 | #define SetFinishedGroupSize(p, size) \ | 74 | #define SetFinishedGroupSize(p, size) \ |
| 37 | { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ | 75 | { *(p) |= ((((UInt32)(size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ |
| 38 | if ((size) > (1 << kNumExtra0Bits)) { \ | 76 | if ((size) > (1 << kNumExtra0Bits)) { \ |
| 39 | *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ | 77 | *(p) |= 0x40000000; \ |
| 78 | *((p) + 1) |= (((UInt32)(size) - 1) >> kNumExtra0Bits) << kNumBitsMax; } } \ | ||
| 40 | 79 | ||
| 41 | static void SetGroupSize(UInt32 *p, UInt32 size) | 80 | static void SetGroupSize(UInt32 *p, size_t size) |
| 42 | { | 81 | { |
| 43 | if (--size == 0) | 82 | if (--size == 0) |
| 44 | return; | 83 | return; |
| 45 | *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); | 84 | *p |= 0x80000000 | (((UInt32)size & kNumExtra0Mask) << kNumBitsMax); |
| 46 | if (size >= (1 << kNumExtra0Bits)) | 85 | if (size >= (1 << kNumExtra0Bits)) |
| 47 | { | 86 | { |
| 48 | *p |= 0x40000000; | 87 | *p |= 0x40000000; |
| 49 | p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); | 88 | p[1] |= (((UInt32)size >> kNumExtra0Bits) << kNumBitsMax); |
| 50 | } | 89 | } |
| 51 | } | 90 | } |
| 52 | 91 | ||
| @@ -59,12 +98,14 @@ returns: 1 - if there are groups, 0 - no more groups | |||
| 59 | */ | 98 | */ |
| 60 | 99 | ||
| 61 | static | 100 | static |
| 62 | UInt32 | 101 | unsigned |
| 63 | Z7_FASTCALL | 102 | Z7_FASTCALL |
| 64 | SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices | 103 | SortGroup(size_t BlockSize, size_t NumSortedBytes, |
| 65 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 104 | size_t groupOffset, size_t groupSize, |
| 66 | , UInt32 left, UInt32 range | 105 | unsigned NumRefBits, UInt32 *Indices |
| 67 | #endif | 106 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
| 107 | , size_t left, size_t range | ||
| 108 | #endif | ||
| 68 | ) | 109 | ) |
| 69 | { | 110 | { |
| 70 | UInt32 *ind2 = Indices + groupOffset; | 111 | UInt32 *ind2 = Indices + groupOffset; |
| @@ -79,90 +120,93 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
| 79 | return 0; | 120 | return 0; |
| 80 | } | 121 | } |
| 81 | Groups = Indices + BlockSize + BS_TEMP_SIZE; | 122 | Groups = Indices + BlockSize + BS_TEMP_SIZE; |
| 82 | if (groupSize <= ((UInt32)1 << NumRefBits) | 123 | if (groupSize <= ((size_t)1 << NumRefBits) |
| 83 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 124 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
| 84 | && groupSize <= range | 125 | && groupSize <= range |
| 85 | #endif | 126 | #endif |
| 86 | ) | 127 | ) |
| 87 | { | 128 | { |
| 88 | UInt32 *temp = Indices + BlockSize; | 129 | UInt32 *temp = Indices + BlockSize; |
| 89 | UInt32 j; | 130 | size_t j, group; |
| 90 | UInt32 mask, thereAreGroups, group, cg; | 131 | UInt32 mask, cg; |
| 132 | unsigned thereAreGroups; | ||
| 91 | { | 133 | { |
| 92 | UInt32 gPrev; | 134 | UInt32 gPrev; |
| 93 | UInt32 gRes = 0; | 135 | UInt32 gRes = 0; |
| 94 | { | 136 | { |
| 95 | UInt32 sp = ind2[0] + NumSortedBytes; | 137 | size_t sp = ind2[0] + NumSortedBytes; |
| 96 | if (sp >= BlockSize) sp -= BlockSize; | 138 | if (sp >= BlockSize) |
| 139 | sp -= BlockSize; | ||
| 97 | gPrev = Groups[sp]; | 140 | gPrev = Groups[sp]; |
| 98 | temp[0] = (gPrev << NumRefBits); | 141 | temp[0] = gPrev << NumRefBits; |
| 99 | } | 142 | } |
| 100 | 143 | ||
| 101 | for (j = 1; j < groupSize; j++) | 144 | for (j = 1; j < groupSize; j++) |
| 102 | { | 145 | { |
| 103 | UInt32 sp = ind2[j] + NumSortedBytes; | 146 | size_t sp = ind2[j] + NumSortedBytes; |
| 104 | UInt32 g; | 147 | UInt32 g; |
| 105 | if (sp >= BlockSize) sp -= BlockSize; | 148 | if (sp >= BlockSize) |
| 149 | sp -= BlockSize; | ||
| 106 | g = Groups[sp]; | 150 | g = Groups[sp]; |
| 107 | temp[j] = (g << NumRefBits) | j; | 151 | temp[j] = (g << NumRefBits) | (UInt32)j; |
| 108 | gRes |= (gPrev ^ g); | 152 | gRes |= (gPrev ^ g); |
| 109 | } | 153 | } |
| 110 | if (gRes == 0) | 154 | if (gRes == 0) |
| 111 | { | 155 | { |
| 112 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 156 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 113 | SetGroupSize(ind2, groupSize); | 157 | SetGroupSize(ind2, groupSize); |
| 114 | #endif | 158 | #endif |
| 115 | return 1; | 159 | return 1; |
| 116 | } | 160 | } |
| 117 | } | 161 | } |
| 118 | 162 | ||
| 119 | HeapSort(temp, groupSize); | 163 | HeapSort(temp, groupSize); |
| 120 | mask = (((UInt32)1 << NumRefBits) - 1); | 164 | mask = ((UInt32)1 << NumRefBits) - 1; |
| 121 | thereAreGroups = 0; | 165 | thereAreGroups = 0; |
| 122 | 166 | ||
| 123 | group = groupOffset; | 167 | group = groupOffset; |
| 124 | cg = (temp[0] >> NumRefBits); | 168 | cg = temp[0] >> NumRefBits; |
| 125 | temp[0] = ind2[temp[0] & mask]; | 169 | temp[0] = ind2[temp[0] & mask]; |
| 126 | 170 | ||
| 127 | { | 171 | { |
| 128 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 172 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 129 | UInt32 *Flags = Groups + BlockSize; | 173 | UInt32 *Flags = Groups + BlockSize; |
| 130 | #else | 174 | #else |
| 131 | UInt32 prevGroupStart = 0; | 175 | size_t prevGroupStart = 0; |
| 132 | #endif | 176 | #endif |
| 133 | 177 | ||
| 134 | for (j = 1; j < groupSize; j++) | 178 | for (j = 1; j < groupSize; j++) |
| 135 | { | 179 | { |
| 136 | UInt32 val = temp[j]; | 180 | const UInt32 val = temp[j]; |
| 137 | UInt32 cgCur = (val >> NumRefBits); | 181 | const UInt32 cgCur = val >> NumRefBits; |
| 138 | 182 | ||
| 139 | if (cgCur != cg) | 183 | if (cgCur != cg) |
| 140 | { | 184 | { |
| 141 | cg = cgCur; | 185 | cg = cgCur; |
| 142 | group = groupOffset + j; | 186 | group = groupOffset + j; |
| 143 | 187 | ||
| 144 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 188 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 145 | { | 189 | { |
| 146 | UInt32 t = group - 1; | 190 | const size_t t = group - 1; |
| 147 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | 191 | Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); |
| 148 | } | 192 | } |
| 149 | #else | 193 | #else |
| 150 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | 194 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); |
| 151 | prevGroupStart = j; | 195 | prevGroupStart = j; |
| 152 | #endif | 196 | #endif |
| 153 | } | 197 | } |
| 154 | else | 198 | else |
| 155 | thereAreGroups = 1; | 199 | thereAreGroups = 1; |
| 156 | { | 200 | { |
| 157 | UInt32 ind = ind2[val & mask]; | 201 | const UInt32 ind = ind2[val & mask]; |
| 158 | temp[j] = ind; | 202 | temp[j] = ind; |
| 159 | Groups[ind] = group; | 203 | Groups[ind] = (UInt32)group; |
| 160 | } | 204 | } |
| 161 | } | 205 | } |
| 162 | 206 | ||
| 163 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 207 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 164 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); | 208 | SetGroupSize(temp + prevGroupStart, j - prevGroupStart); |
| 165 | #endif | 209 | #endif |
| 166 | } | 210 | } |
| 167 | 211 | ||
| 168 | for (j = 0; j < groupSize; j++) | 212 | for (j = 0; j < groupSize; j++) |
| @@ -172,37 +216,42 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
| 172 | 216 | ||
| 173 | /* Check that all strings are in one group (cannot sort) */ | 217 | /* Check that all strings are in one group (cannot sort) */ |
| 174 | { | 218 | { |
| 175 | UInt32 group, j; | 219 | UInt32 group; |
| 176 | UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 220 | size_t j; |
| 221 | size_t sp = ind2[0] + NumSortedBytes; | ||
| 222 | if (sp >= BlockSize) | ||
| 223 | sp -= BlockSize; | ||
| 177 | group = Groups[sp]; | 224 | group = Groups[sp]; |
| 178 | for (j = 1; j < groupSize; j++) | 225 | for (j = 1; j < groupSize; j++) |
| 179 | { | 226 | { |
| 180 | sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 227 | sp = ind2[j] + NumSortedBytes; |
| 228 | if (sp >= BlockSize) | ||
| 229 | sp -= BlockSize; | ||
| 181 | if (Groups[sp] != group) | 230 | if (Groups[sp] != group) |
| 182 | break; | 231 | break; |
| 183 | } | 232 | } |
| 184 | if (j == groupSize) | 233 | if (j == groupSize) |
| 185 | { | 234 | { |
| 186 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 235 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 187 | SetGroupSize(ind2, groupSize); | 236 | SetGroupSize(ind2, groupSize); |
| 188 | #endif | 237 | #endif |
| 189 | return 1; | 238 | return 1; |
| 190 | } | 239 | } |
| 191 | } | 240 | } |
| 192 | 241 | ||
| 193 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 242 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
| 194 | { | 243 | { |
| 195 | /* ---------- Range Sort ---------- */ | 244 | /* ---------- Range Sort ---------- */ |
| 196 | UInt32 i; | 245 | size_t i; |
| 197 | UInt32 mid; | 246 | size_t mid; |
| 198 | for (;;) | 247 | for (;;) |
| 199 | { | 248 | { |
| 200 | UInt32 j; | 249 | size_t j; |
| 201 | if (range <= 1) | 250 | if (range <= 1) |
| 202 | { | 251 | { |
| 203 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 252 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 204 | SetGroupSize(ind2, groupSize); | 253 | SetGroupSize(ind2, groupSize); |
| 205 | #endif | 254 | #endif |
| 206 | return 1; | 255 | return 1; |
| 207 | } | 256 | } |
| 208 | mid = left + ((range + 1) >> 1); | 257 | mid = left + ((range + 1) >> 1); |
| @@ -210,7 +259,7 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
| 210 | i = 0; | 259 | i = 0; |
| 211 | do | 260 | do |
| 212 | { | 261 | { |
| 213 | UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 262 | size_t sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
| 214 | if (Groups[sp] >= mid) | 263 | if (Groups[sp] >= mid) |
| 215 | { | 264 | { |
| 216 | for (j--; j > i; j--) | 265 | for (j--; j > i; j--) |
| @@ -238,51 +287,53 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
| 238 | break; | 287 | break; |
| 239 | } | 288 | } |
| 240 | 289 | ||
| 241 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 290 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 242 | { | 291 | { |
| 243 | UInt32 t = (groupOffset + i - 1); | 292 | const size_t t = groupOffset + i - 1; |
| 244 | UInt32 *Flags = Groups + BlockSize; | 293 | UInt32 *Flags = Groups + BlockSize; |
| 245 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | 294 | Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); |
| 246 | } | 295 | } |
| 247 | #endif | 296 | #endif |
| 248 | 297 | ||
| 249 | { | 298 | { |
| 250 | UInt32 j; | 299 | size_t j; |
| 251 | for (j = i; j < groupSize; j++) | 300 | for (j = i; j < groupSize; j++) |
| 252 | Groups[ind2[j]] = groupOffset + i; | 301 | Groups[ind2[j]] = (UInt32)(groupOffset + i); |
| 253 | } | 302 | } |
| 254 | 303 | ||
| 255 | { | 304 | { |
| 256 | UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); | 305 | unsigned res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); |
| 257 | return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); | 306 | return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); |
| 258 | } | 307 | } |
| 259 | 308 | ||
| 260 | } | 309 | } |
| 261 | 310 | ||
| 262 | #else | 311 | #else // BLOCK_SORT_USE_HEAP_SORT |
| 263 | 312 | ||
| 264 | /* ---------- Heap Sort ---------- */ | 313 | /* ---------- Heap Sort ---------- */ |
| 265 | 314 | ||
| 266 | { | 315 | { |
| 267 | UInt32 j; | 316 | size_t j; |
| 268 | for (j = 0; j < groupSize; j++) | 317 | for (j = 0; j < groupSize; j++) |
| 269 | { | 318 | { |
| 270 | UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; | 319 | size_t sp = ind2[j] + NumSortedBytes; |
| 271 | ind2[j] = sp; | 320 | if (sp >= BlockSize) |
| 321 | sp -= BlockSize; | ||
| 322 | ind2[j] = (UInt32)sp; | ||
| 272 | } | 323 | } |
| 273 | 324 | ||
| 274 | HeapSortRef(ind2, Groups, groupSize); | 325 | HeapSortRef(ind2, Groups, groupSize); |
| 275 | 326 | ||
| 276 | /* Write Flags */ | 327 | /* Write Flags */ |
| 277 | { | 328 | { |
| 278 | UInt32 sp = ind2[0]; | 329 | size_t sp = ind2[0]; |
| 279 | UInt32 group = Groups[sp]; | 330 | UInt32 group = Groups[sp]; |
| 280 | 331 | ||
| 281 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 332 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 282 | UInt32 *Flags = Groups + BlockSize; | 333 | UInt32 *Flags = Groups + BlockSize; |
| 283 | #else | 334 | #else |
| 284 | UInt32 prevGroupStart = 0; | 335 | size_t prevGroupStart = 0; |
| 285 | #endif | 336 | #endif |
| 286 | 337 | ||
| 287 | for (j = 1; j < groupSize; j++) | 338 | for (j = 1; j < groupSize; j++) |
| 288 | { | 339 | { |
| @@ -290,149 +341,210 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr | |||
| 290 | if (Groups[sp] != group) | 341 | if (Groups[sp] != group) |
| 291 | { | 342 | { |
| 292 | group = Groups[sp]; | 343 | group = Groups[sp]; |
| 293 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 344 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 294 | { | 345 | { |
| 295 | UInt32 t = groupOffset + j - 1; | 346 | const size_t t = groupOffset + j - 1; |
| 296 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | 347 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); |
| 297 | } | 348 | } |
| 298 | #else | 349 | #else |
| 299 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | 350 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); |
| 300 | prevGroupStart = j; | 351 | prevGroupStart = j; |
| 301 | #endif | 352 | #endif |
| 302 | } | 353 | } |
| 303 | } | 354 | } |
| 304 | 355 | ||
| 305 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 356 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 306 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); | 357 | SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); |
| 307 | #endif | 358 | #endif |
| 308 | } | 359 | } |
| 309 | { | 360 | { |
| 310 | /* Write new Groups values and Check that there are groups */ | 361 | /* Write new Groups values and Check that there are groups */ |
| 311 | UInt32 thereAreGroups = 0; | 362 | unsigned thereAreGroups = 0; |
| 312 | for (j = 0; j < groupSize; j++) | 363 | for (j = 0; j < groupSize; j++) |
| 313 | { | 364 | { |
| 314 | UInt32 group = groupOffset + j; | 365 | size_t group = groupOffset + j; |
| 315 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 366 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 316 | UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); | 367 | UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); |
| 317 | if ((ind2[j] & 0x40000000) != 0) | 368 | if (ind2[j] & 0x40000000) |
| 318 | subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); | 369 | subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); |
| 319 | subGroupSize++; | 370 | subGroupSize++; |
| 320 | for (;;) | 371 | for (;;) |
| 321 | { | 372 | { |
| 322 | UInt32 original = ind2[j]; | 373 | const UInt32 original = ind2[j]; |
| 323 | UInt32 sp = original & kIndexMask; | 374 | size_t sp = original & kIndexMask; |
| 324 | if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | 375 | if (sp < NumSortedBytes) |
| 325 | ind2[j] = sp | (original & ~kIndexMask); | 376 | sp += BlockSize; |
| 326 | Groups[sp] = group; | 377 | sp -= NumSortedBytes; |
| 378 | ind2[j] = (UInt32)sp | (original & ~kIndexMask); | ||
| 379 | Groups[sp] = (UInt32)group; | ||
| 327 | if (--subGroupSize == 0) | 380 | if (--subGroupSize == 0) |
| 328 | break; | 381 | break; |
| 329 | j++; | 382 | j++; |
| 330 | thereAreGroups = 1; | 383 | thereAreGroups = 1; |
| 331 | } | 384 | } |
| 332 | #else | 385 | #else |
| 333 | UInt32 *Flags = Groups + BlockSize; | 386 | UInt32 *Flags = Groups + BlockSize; |
| 334 | for (;;) | 387 | for (;;) |
| 335 | { | 388 | { |
| 336 | UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; | 389 | size_t sp = ind2[j]; |
| 337 | ind2[j] = sp; | 390 | if (sp < NumSortedBytes) |
| 338 | Groups[sp] = group; | 391 | sp += BlockSize; |
| 392 | sp -= NumSortedBytes; | ||
| 393 | ind2[j] = (UInt32)sp; | ||
| 394 | Groups[sp] = (UInt32)group; | ||
| 339 | if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) | 395 | if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) |
| 340 | break; | 396 | break; |
| 341 | j++; | 397 | j++; |
| 342 | thereAreGroups = 1; | 398 | thereAreGroups = 1; |
| 343 | } | 399 | } |
| 344 | #endif | 400 | #endif |
| 345 | } | 401 | } |
| 346 | return thereAreGroups; | 402 | return thereAreGroups; |
| 347 | } | 403 | } |
| 348 | } | 404 | } |
| 349 | #endif | 405 | #endif // BLOCK_SORT_USE_HEAP_SORT |
| 350 | } | 406 | } |
| 351 | 407 | ||
| 408 | |||
| 352 | /* conditions: blockSize > 0 */ | 409 | /* conditions: blockSize > 0 */ |
| 353 | UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | 410 | UInt32 BlockSort(UInt32 *Indices, const Byte *data, size_t blockSize) |
| 354 | { | 411 | { |
| 355 | UInt32 *counters = Indices + blockSize; | 412 | UInt32 *counters = Indices + blockSize; |
| 356 | UInt32 i; | 413 | size_t i; |
| 357 | UInt32 *Groups; | 414 | UInt32 *Groups; |
| 358 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 415 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 359 | UInt32 *Flags; | 416 | UInt32 *Flags; |
| 360 | #endif | 417 | #endif |
| 361 | 418 | ||
| 362 | /* Radix-Sort for 2 bytes */ | 419 | /* Radix-Sort for 2 bytes */ |
| 420 | // { UInt32 yyy; for (yyy = 0; yyy < 100; yyy++) { | ||
| 363 | for (i = 0; i < kNumHashValues; i++) | 421 | for (i = 0; i < kNumHashValues; i++) |
| 364 | counters[i] = 0; | 422 | counters[i] = 0; |
| 365 | for (i = 0; i < blockSize - 1; i++) | 423 | { |
| 366 | counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++; | 424 | const Byte *data2 = data; |
| 367 | counters[((UInt32)data[i] << 8) | data[0]]++; | 425 | size_t a = data[(size_t)blockSize - 1]; |
| 426 | const Byte *data_lim = data + blockSize; | ||
| 427 | if (blockSize >= 4) | ||
| 428 | { | ||
| 429 | data_lim -= 3; | ||
| 430 | do | ||
| 431 | { | ||
| 432 | size_t b; | ||
| 433 | b = data2[0]; counters[(a << 8) | b]++; | ||
| 434 | a = data2[1]; counters[(b << 8) | a]++; | ||
| 435 | b = data2[2]; counters[(a << 8) | b]++; | ||
| 436 | a = data2[3]; counters[(b << 8) | a]++; | ||
| 437 | data2 += 4; | ||
| 438 | } | ||
| 439 | while (data2 < data_lim); | ||
| 440 | data_lim += 3; | ||
| 441 | } | ||
| 442 | while (data2 != data_lim) | ||
| 443 | { | ||
| 444 | size_t b = *data2++; | ||
| 445 | counters[(a << 8) | b]++; | ||
| 446 | a = b; | ||
| 447 | } | ||
| 448 | } | ||
| 449 | // }} | ||
| 368 | 450 | ||
| 369 | Groups = counters + BS_TEMP_SIZE; | 451 | Groups = counters + BS_TEMP_SIZE; |
| 370 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 452 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 371 | Flags = Groups + blockSize; | 453 | Flags = Groups + blockSize; |
| 372 | { | 454 | { |
| 373 | UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; | 455 | const size_t numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; |
| 374 | for (i = 0; i < numWords; i++) | 456 | for (i = 0; i < numWords; i++) |
| 375 | Flags[i] = kAllFlags; | 457 | Flags[i] = kAllFlags; |
| 376 | } | 458 | } |
| 377 | #endif | 459 | #endif |
| 378 | 460 | ||
| 379 | { | 461 | { |
| 380 | UInt32 sum = 0; | 462 | UInt32 sum = 0; |
| 381 | for (i = 0; i < kNumHashValues; i++) | 463 | for (i = 0; i < kNumHashValues; i++) |
| 382 | { | 464 | { |
| 383 | UInt32 groupSize = counters[i]; | 465 | const UInt32 groupSize = counters[i]; |
| 384 | if (groupSize > 0) | 466 | counters[i] = sum; |
| 467 | sum += groupSize; | ||
| 468 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | ||
| 469 | if (groupSize) | ||
| 385 | { | 470 | { |
| 386 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 471 | const UInt32 t = sum - 1; |
| 387 | UInt32 t = sum + groupSize - 1; | 472 | Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); |
| 388 | Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); | ||
| 389 | #endif | ||
| 390 | sum += groupSize; | ||
| 391 | } | 473 | } |
| 392 | counters[i] = sum - groupSize; | 474 | #endif |
| 393 | } | 475 | } |
| 476 | } | ||
| 394 | 477 | ||
| 395 | for (i = 0; i < blockSize - 1; i++) | 478 | for (i = 0; i < blockSize - 1; i++) |
| 396 | Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]; | 479 | Groups[i] = counters[((unsigned)data[i] << 8) | data[(size_t)i + 1]]; |
| 397 | Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; | 480 | Groups[i] = counters[((unsigned)data[i] << 8) | data[0]]; |
| 481 | |||
| 482 | { | ||
| 483 | #define SET_Indices(a, b, i) \ | ||
| 484 | { UInt32 c; \ | ||
| 485 | a = (a << 8) | (b); \ | ||
| 486 | c = counters[a]; \ | ||
| 487 | Indices[c] = (UInt32)i++; \ | ||
| 488 | counters[a] = c + 1; \ | ||
| 489 | } | ||
| 398 | 490 | ||
| 399 | for (i = 0; i < blockSize - 1; i++) | 491 | size_t a = data[0]; |
| 400 | Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i; | 492 | const Byte *data_ptr = data + 1; |
| 401 | Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; | 493 | i = 0; |
| 402 | 494 | if (blockSize >= 3) | |
| 403 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 495 | { |
| 496 | blockSize -= 2; | ||
| 497 | do | ||
| 498 | { | ||
| 499 | size_t b; | ||
| 500 | b = data_ptr[0]; SET_Indices(a, b, i) | ||
| 501 | a = data_ptr[1]; SET_Indices(b, a, i) | ||
| 502 | data_ptr += 2; | ||
| 503 | } | ||
| 504 | while (i < blockSize); | ||
| 505 | blockSize += 2; | ||
| 506 | } | ||
| 507 | if (i < blockSize - 1) | ||
| 404 | { | 508 | { |
| 509 | SET_Indices(a, data[(size_t)i + 1], i) | ||
| 510 | a = (Byte)a; | ||
| 511 | } | ||
| 512 | SET_Indices(a, data[0], i) | ||
| 513 | } | ||
| 514 | |||
| 515 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | ||
| 516 | { | ||
| 405 | UInt32 prev = 0; | 517 | UInt32 prev = 0; |
| 406 | for (i = 0; i < kNumHashValues; i++) | 518 | for (i = 0; i < kNumHashValues; i++) |
| 407 | { | 519 | { |
| 408 | UInt32 prevGroupSize = counters[i] - prev; | 520 | const UInt32 prevGroupSize = counters[i] - prev; |
| 409 | if (prevGroupSize == 0) | 521 | if (prevGroupSize == 0) |
| 410 | continue; | 522 | continue; |
| 411 | SetGroupSize(Indices + prev, prevGroupSize); | 523 | SetGroupSize(Indices + prev, prevGroupSize); |
| 412 | prev = counters[i]; | 524 | prev = counters[i]; |
| 413 | } | 525 | } |
| 414 | } | ||
| 415 | #endif | ||
| 416 | } | 526 | } |
| 527 | #endif | ||
| 417 | 528 | ||
| 418 | { | 529 | { |
| 419 | int NumRefBits; | 530 | unsigned NumRefBits; |
| 420 | UInt32 NumSortedBytes; | 531 | size_t NumSortedBytes; |
| 421 | for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); | 532 | for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++) |
| 533 | {} | ||
| 422 | NumRefBits = 32 - NumRefBits; | 534 | NumRefBits = 32 - NumRefBits; |
| 423 | if (NumRefBits > kNumRefBitsMax) | 535 | if (NumRefBits > kNumRefBitsMax) |
| 424 | NumRefBits = kNumRefBitsMax; | 536 | NumRefBits = kNumRefBitsMax; |
| 425 | 537 | ||
| 426 | for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) | 538 | for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) |
| 427 | { | 539 | { |
| 428 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 540 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 429 | UInt32 finishedGroupSize = 0; | 541 | size_t finishedGroupSize = 0; |
| 430 | #endif | 542 | #endif |
| 431 | UInt32 newLimit = 0; | 543 | size_t newLimit = 0; |
| 432 | for (i = 0; i < blockSize;) | 544 | for (i = 0; i < blockSize;) |
| 433 | { | 545 | { |
| 434 | UInt32 groupSize; | 546 | size_t groupSize; |
| 435 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS | 547 | #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| 436 | 548 | ||
| 437 | if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) | 549 | if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) |
| 438 | { | 550 | { |
| @@ -441,56 +553,56 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | |||
| 441 | } | 553 | } |
| 442 | for (groupSize = 1; | 554 | for (groupSize = 1; |
| 443 | (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; | 555 | (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; |
| 444 | groupSize++); | 556 | groupSize++) |
| 445 | 557 | {} | |
| 446 | groupSize++; | 558 | groupSize++; |
| 447 | 559 | ||
| 448 | #else | 560 | #else |
| 449 | 561 | ||
| 450 | groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | 562 | groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax; |
| 451 | { | 563 | { |
| 452 | BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); | 564 | const BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); |
| 453 | if ((Indices[i] & 0x40000000) != 0) | 565 | if (Indices[i] & 0x40000000) |
| 454 | { | ||
| 455 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); | ||
| 456 | Indices[(size_t)i + 1] &= kIndexMask; | ||
| 457 | } | ||
| 458 | Indices[i] &= kIndexMask; | ||
| 459 | groupSize++; | ||
| 460 | if (finishedGroup || groupSize == 1) | ||
| 461 | { | ||
| 462 | Indices[i - finishedGroupSize] &= kIndexMask; | ||
| 463 | if (finishedGroupSize > 1) | ||
| 464 | Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; | ||
| 465 | { | 566 | { |
| 466 | UInt32 newGroupSize = groupSize + finishedGroupSize; | 567 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); |
| 467 | SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) | 568 | Indices[(size_t)i + 1] &= kIndexMask; |
| 468 | finishedGroupSize = newGroupSize; | ||
| 469 | } | 569 | } |
| 470 | i += groupSize; | 570 | Indices[i] &= kIndexMask; |
| 471 | continue; | 571 | groupSize++; |
| 472 | } | 572 | if (finishedGroup || groupSize == 1) |
| 473 | finishedGroupSize = 0; | 573 | { |
| 574 | Indices[i - finishedGroupSize] &= kIndexMask; | ||
| 575 | if (finishedGroupSize > 1) | ||
| 576 | Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; | ||
| 577 | { | ||
| 578 | const size_t newGroupSize = groupSize + finishedGroupSize; | ||
| 579 | SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) | ||
| 580 | finishedGroupSize = newGroupSize; | ||
| 581 | } | ||
| 582 | i += groupSize; | ||
| 583 | continue; | ||
| 584 | } | ||
| 585 | finishedGroupSize = 0; | ||
| 474 | } | 586 | } |
| 475 | 587 | ||
| 476 | #endif | 588 | #endif |
| 477 | 589 | ||
| 478 | if (NumSortedBytes >= blockSize) | 590 | if (NumSortedBytes >= blockSize) |
| 479 | { | 591 | { |
| 480 | UInt32 j; | 592 | size_t j; |
| 481 | for (j = 0; j < groupSize; j++) | 593 | for (j = 0; j < groupSize; j++) |
| 482 | { | 594 | { |
| 483 | UInt32 t = (i + j); | 595 | size_t t = i + j; |
| 484 | /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ | 596 | /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ |
| 485 | Groups[Indices[t]] = t; | 597 | Groups[Indices[t]] = (UInt32)t; |
| 486 | } | 598 | } |
| 487 | } | 599 | } |
| 488 | else | 600 | else |
| 489 | if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices | 601 | if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices |
| 490 | #ifndef BLOCK_SORT_USE_HEAP_SORT | 602 | #ifndef BLOCK_SORT_USE_HEAP_SORT |
| 491 | , 0, blockSize | 603 | , 0, blockSize |
| 492 | #endif | 604 | #endif |
| 493 | ) != 0) | 605 | )) |
| 494 | newLimit = i + groupSize; | 606 | newLimit = i + groupSize; |
| 495 | i += groupSize; | 607 | i += groupSize; |
| 496 | } | 608 | } |
| @@ -498,19 +610,19 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) | |||
| 498 | break; | 610 | break; |
| 499 | } | 611 | } |
| 500 | } | 612 | } |
| 501 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS | 613 | #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| 502 | for (i = 0; i < blockSize;) | 614 | for (i = 0; i < blockSize;) |
| 503 | { | 615 | { |
| 504 | UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); | 616 | size_t groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax; |
| 505 | if ((Indices[i] & 0x40000000) != 0) | 617 | if (Indices[i] & 0x40000000) |
| 506 | { | 618 | { |
| 507 | groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); | 619 | groupSize += (Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits; |
| 508 | Indices[(size_t)i + 1] &= kIndexMask; | 620 | Indices[(size_t)i + 1] &= kIndexMask; |
| 509 | } | 621 | } |
| 510 | Indices[i] &= kIndexMask; | 622 | Indices[i] &= kIndexMask; |
| 511 | groupSize++; | 623 | groupSize++; |
| 512 | i += groupSize; | 624 | i += groupSize; |
| 513 | } | 625 | } |
| 514 | #endif | 626 | #endif |
| 515 | return Groups[0]; | 627 | return Groups[0]; |
| 516 | } | 628 | } |
