diff options
Diffstat (limited to 'C/HuffEnc.c')
| -rw-r--r-- | C/HuffEnc.c | 384 |
1 files changed, 295 insertions, 89 deletions
diff --git a/C/HuffEnc.c b/C/HuffEnc.c index 996da30..cbf8c22 100644 --- a/C/HuffEnc.c +++ b/C/HuffEnc.c | |||
| @@ -1,60 +1,125 @@ | |||
| 1 | /* HuffEnc.c -- functions for Huffman encoding | 1 | /* HuffEnc.c -- functions for Huffman encoding |
| 2 | 2023-09-07 : Igor Pavlov : Public domain */ | 2 | Igor Pavlov : Public domain */ |
| 3 | 3 | ||
| 4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
| 5 | 5 | ||
| 6 | #include <string.h> | ||
| 7 | |||
| 6 | #include "HuffEnc.h" | 8 | #include "HuffEnc.h" |
| 7 | #include "Sort.h" | 9 | #include "Sort.h" |
| 10 | #include "CpuArch.h" | ||
| 8 | 11 | ||
| 9 | #define kMaxLen 16 | 12 | #define kMaxLen Z7_HUFFMAN_LEN_MAX |
| 10 | #define NUM_BITS 10 | 13 | #define NUM_BITS 10 |
| 11 | #define MASK ((1u << NUM_BITS) - 1) | 14 | #define MASK ((1u << NUM_BITS) - 1) |
| 12 | 15 | #define FREQ_MASK (~(UInt32)MASK) | |
| 13 | #define NUM_COUNTERS 64 | 16 | #define NUM_COUNTERS (48 * 2) |
| 14 | 17 | ||
| 15 | #define HUFFMAN_SPEED_OPT | 18 | #if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE)) |
| 19 | #if defined(MY_CPU_LE) | ||
| 20 | #define HI_HALF_OFFSET 1 | ||
| 21 | #else | ||
| 22 | #define HI_HALF_OFFSET 0 | ||
| 23 | #endif | ||
| 24 | #define LOAD_PARENT(p) ((unsigned)*((const UInt16 *)(p) + HI_HALF_OFFSET)) | ||
| 25 | #define STORE_PARENT(p, fb, val) *((UInt16 *)(p) + HI_HALF_OFFSET) = (UInt16)(val); | ||
| 26 | #define STORE_PARENT_DIRECT(p, fb, hi) STORE_PARENT(p, fb, hi) | ||
| 27 | #define UPDATE_E(eHi) eHi++; | ||
| 28 | #else | ||
| 29 | #define LOAD_PARENT(p) ((unsigned)(*(p) >> NUM_BITS)) | ||
| 30 | #define STORE_PARENT_DIRECT(p, fb, hi) *(p) = ((fb) & MASK) | (hi); // set parent field | ||
| 31 | #define STORE_PARENT(p, fb, val) STORE_PARENT_DIRECT(p, fb, ((UInt32)(val) << NUM_BITS)) | ||
| 32 | #define UPDATE_E(eHi) eHi += 1 << NUM_BITS; | ||
| 33 | #endif | ||
| 16 | 34 | ||
| 17 | void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen) | 35 | void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSymbols, unsigned maxLen) |
| 18 | { | 36 | { |
| 19 | UInt32 num = 0; | 37 | #if NUM_COUNTERS > 2 |
| 20 | /* if (maxLen > 10) maxLen = 10; */ | 38 | unsigned counters[NUM_COUNTERS]; |
| 39 | #endif | ||
| 40 | #if 1 && NUM_COUNTERS > (kMaxLen + 4) * 2 | ||
| 41 | #define lenCounters (counters) | ||
| 42 | #define codes (counters + kMaxLen + 4) | ||
| 43 | #else | ||
| 44 | unsigned lenCounters[kMaxLen + 1]; | ||
| 45 | UInt32 codes[kMaxLen + 1]; | ||
| 46 | #endif | ||
| 47 | |||
| 48 | unsigned num; | ||
| 21 | { | 49 | { |
| 22 | UInt32 i; | 50 | unsigned i; |
| 23 | 51 | // UInt32 sum = 0; | |
| 24 | #ifdef HUFFMAN_SPEED_OPT | 52 | |
| 53 | #if NUM_COUNTERS > 2 | ||
| 25 | 54 | ||
| 26 | UInt32 counters[NUM_COUNTERS]; | 55 | #define CTR_ITEM_FOR_FREQ(freq) \ |
| 56 | counters[(freq) >= NUM_COUNTERS - 1 ? NUM_COUNTERS - 1 : (unsigned)(freq)] | ||
| 57 | |||
| 27 | for (i = 0; i < NUM_COUNTERS; i++) | 58 | for (i = 0; i < NUM_COUNTERS; i++) |
| 28 | counters[i] = 0; | 59 | counters[i] = 0; |
| 29 | for (i = 0; i < numSymbols; i++) | 60 | memset(lens, 0, numSymbols); |
| 30 | { | 61 | { |
| 31 | UInt32 freq = freqs[i]; | 62 | const UInt32 *fp = freqs + numSymbols; |
| 32 | counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++; | 63 | #define NUM_UNROLLS 1 |
| 64 | #if NUM_UNROLLS > 1 // use 1 if odd (numSymbols) is possisble | ||
| 65 | if (numSymbols & 1) | ||
| 66 | { | ||
| 67 | UInt32 f; | ||
| 68 | f = *--fp; CTR_ITEM_FOR_FREQ(f)++; | ||
| 69 | // sum += f; | ||
| 70 | } | ||
| 71 | #endif | ||
| 72 | do | ||
| 73 | { | ||
| 74 | UInt32 f; | ||
| 75 | fp -= NUM_UNROLLS; | ||
| 76 | f = fp[0]; CTR_ITEM_FOR_FREQ(f)++; | ||
| 77 | // sum += f; | ||
| 78 | #if NUM_UNROLLS > 1 | ||
| 79 | f = fp[1]; CTR_ITEM_FOR_FREQ(f)++; | ||
| 80 | // sum += f; | ||
| 81 | #endif | ||
| 82 | } | ||
| 83 | while (fp != freqs); | ||
| 33 | } | 84 | } |
| 34 | 85 | #if 0 | |
| 35 | for (i = 1; i < NUM_COUNTERS; i++) | 86 | printf("\nsum=%8u numSymbols =%3u ctrs:", sum, numSymbols); |
| 36 | { | 87 | { |
| 37 | UInt32 temp = counters[i]; | 88 | unsigned k = 0; |
| 38 | counters[i] = num; | 89 | for (k = 0; k < NUM_COUNTERS; k++) |
| 39 | num += temp; | 90 | printf(" %u", counters[k]); |
| 40 | } | 91 | } |
| 41 | 92 | #endif | |
| 42 | for (i = 0; i < numSymbols; i++) | 93 | |
| 94 | num = counters[1]; | ||
| 95 | counters[1] = 0; | ||
| 96 | for (i = 2; i != NUM_COUNTERS; i += 2) | ||
| 43 | { | 97 | { |
| 44 | UInt32 freq = freqs[i]; | 98 | unsigned c; |
| 45 | if (freq == 0) | 99 | c = (counters )[i]; (counters )[i] = num; num += c; |
| 46 | lens[i] = 0; | 100 | c = (counters + 1)[i]; (counters + 1)[i] = num; num += c; |
| 47 | else | 101 | } |
| 48 | p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS); | 102 | counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array |
| 103 | { | ||
| 104 | i = 0; | ||
| 105 | do | ||
| 106 | { | ||
| 107 | const UInt32 f = freqs[i]; | ||
| 108 | #if 0 | ||
| 109 | if (f == 0) lens[i] = 0; else | ||
| 110 | #endif | ||
| 111 | p[CTR_ITEM_FOR_FREQ(f)++] = i | (f << NUM_BITS); | ||
| 112 | } | ||
| 113 | while (++i != numSymbols); | ||
| 49 | } | 114 | } |
| 50 | counters[0] = 0; | ||
| 51 | HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); | 115 | HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); |
| 52 | 116 | ||
| 53 | #else | 117 | #else // NUM_COUNTERS <= 2 |
| 54 | 118 | ||
| 119 | num = 0; | ||
| 55 | for (i = 0; i < numSymbols; i++) | 120 | for (i = 0; i < numSymbols; i++) |
| 56 | { | 121 | { |
| 57 | UInt32 freq = freqs[i]; | 122 | const UInt32 freq = freqs[i]; |
| 58 | if (freq == 0) | 123 | if (freq == 0) |
| 59 | lens[i] = 0; | 124 | lens[i] = 0; |
| 60 | else | 125 | else |
| @@ -62,17 +127,27 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb | |||
| 62 | } | 127 | } |
| 63 | HeapSort(p, num); | 128 | HeapSort(p, num); |
| 64 | 129 | ||
| 65 | #endif | 130 | #endif |
| 66 | } | 131 | } |
| 67 | 132 | ||
| 68 | if (num < 2) | 133 | if (num <= 2) |
| 69 | { | 134 | { |
| 70 | unsigned minCode = 0; | 135 | unsigned minCode = 0; |
| 71 | unsigned maxCode = 1; | 136 | unsigned maxCode = 1; |
| 72 | if (num == 1) | 137 | if (num) |
| 73 | { | 138 | { |
| 74 | maxCode = (unsigned)p[0] & MASK; | 139 | maxCode = (unsigned)p[(size_t)num - 1] & MASK; |
| 75 | if (maxCode == 0) | 140 | if (num == 2) |
| 141 | { | ||
| 142 | minCode = (unsigned)p[0] & MASK; | ||
| 143 | if (minCode > maxCode) | ||
| 144 | { | ||
| 145 | const unsigned temp = minCode; | ||
| 146 | minCode = maxCode; | ||
| 147 | maxCode = temp; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | else if (maxCode == 0) | ||
| 76 | maxCode++; | 151 | maxCode++; |
| 77 | } | 152 | } |
| 78 | p[minCode] = 0; | 153 | p[minCode] = 0; |
| @@ -80,69 +155,191 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb | |||
| 80 | lens[minCode] = lens[maxCode] = 1; | 155 | lens[minCode] = lens[maxCode] = 1; |
| 81 | return; | 156 | return; |
| 82 | } | 157 | } |
| 83 | |||
| 84 | { | 158 | { |
| 85 | UInt32 b, e, i; | 159 | unsigned i; |
| 86 | 160 | for (i = 0; i <= kMaxLen; i++) | |
| 87 | i = b = e = 0; | 161 | lenCounters[i] = 0; |
| 88 | do | 162 | lenCounters[1] = 2; // by default root node has 2 child leaves at level 1. |
| 163 | } | ||
| 164 | // if (num != 2) | ||
| 165 | { | ||
| 166 | // num > 2 | ||
| 167 | // the binary tree will contain (num - 1) internal nodes. | ||
| 168 | // p[num - 2] will be root node of binary tree. | ||
| 169 | UInt32 *b; | ||
| 170 | UInt32 *n; | ||
| 171 | // first node will have two leaf childs: p[0] and p[1]: | ||
| 172 | // p[0] += p[1] & FREQ_MASK; // set frequency sum of child leafs | ||
| 173 | // if (pi == n) exit(0); | ||
| 174 | // if (pi != n) | ||
| 89 | { | 175 | { |
| 90 | UInt32 n, m, freq; | 176 | UInt32 fb = (p[1] & FREQ_MASK) + p[0]; |
| 91 | n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; | 177 | UInt32 f = p[2] & FREQ_MASK; |
| 92 | freq = (p[n] & ~MASK); | 178 | const UInt32 *pi = p + 2; |
| 93 | p[n] = (p[n] & MASK) | (e << NUM_BITS); | 179 | UInt32 *e = p; |
| 94 | m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; | 180 | UInt32 eHi = 0; |
| 95 | freq += (p[m] & ~MASK); | 181 | n = p + num; |
| 96 | p[m] = (p[m] & MASK) | (e << NUM_BITS); | 182 | b = p; |
| 97 | p[e] = (p[e] & MASK) | freq; | 183 | // p[0] = fb; |
| 98 | e++; | 184 | for (;;) |
| 185 | { | ||
| 186 | // (b <= e) | ||
| 187 | UInt32 sum; | ||
| 188 | e++; | ||
| 189 | UPDATE_E(eHi) | ||
| 190 | |||
| 191 | // (b < e) | ||
| 192 | |||
| 193 | // p range : high bits | ||
| 194 | // [0, b) : parent : processed nodes that have parent and childs | ||
| 195 | // [b, e) : FREQ : non-processed nodes that have no parent but have childs | ||
| 196 | // [e, pi) : FREQ : processed leaves for which parent node was created | ||
| 197 | // [pi, n) : FREQ : non-processed leaves for which parent node was not created | ||
| 198 | |||
| 199 | // first child | ||
| 200 | // note : (*b < f) is same result as ((*b & FREQ_MASK) < f) | ||
| 201 | if (fb < f) | ||
| 202 | { | ||
| 203 | // node freq is smaller | ||
| 204 | sum = fb & FREQ_MASK; | ||
| 205 | STORE_PARENT_DIRECT (b, fb, eHi) | ||
| 206 | b++; | ||
| 207 | fb = *b; | ||
| 208 | if (b == e) | ||
| 209 | { | ||
| 210 | if (++pi == n) | ||
| 211 | break; | ||
| 212 | sum += f; | ||
| 213 | fb &= MASK; | ||
| 214 | fb |= sum; | ||
| 215 | *e = fb; | ||
| 216 | f = *pi & FREQ_MASK; | ||
| 217 | continue; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | else if (++pi == n) | ||
| 221 | { | ||
| 222 | STORE_PARENT_DIRECT (b, fb, eHi) | ||
| 223 | b++; | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | else | ||
| 227 | { | ||
| 228 | sum = f; | ||
| 229 | f = *pi & FREQ_MASK; | ||
| 230 | } | ||
| 231 | |||
| 232 | // (b < e) | ||
| 233 | |||
| 234 | // second child | ||
| 235 | if (fb < f) | ||
| 236 | { | ||
| 237 | sum += fb; | ||
| 238 | sum &= FREQ_MASK; | ||
| 239 | STORE_PARENT_DIRECT (b, fb, eHi) | ||
| 240 | b++; | ||
| 241 | *e = (*e & MASK) | sum; // set frequency sum | ||
| 242 | // (b <= e) is possible here | ||
| 243 | fb = *b; | ||
| 244 | } | ||
| 245 | else if (++pi == n) | ||
| 246 | break; | ||
| 247 | else | ||
| 248 | { | ||
| 249 | sum += f; | ||
| 250 | f = *pi & FREQ_MASK; | ||
| 251 | *e = (*e & MASK) | sum; // set frequency sum | ||
| 252 | } | ||
| 253 | } | ||
| 99 | } | 254 | } |
| 100 | while (num - e > 1); | ||
| 101 | 255 | ||
| 256 | // printf("\nnum-e=%3u, numSymbols=%3u, num=%3u, b=%3u", n - e, numSymbols, n - p, b - p); | ||
| 102 | { | 257 | { |
| 103 | UInt32 lenCounters[kMaxLen + 1]; | 258 | n -= 2; |
| 104 | for (i = 0; i <= kMaxLen; i++) | 259 | *n &= MASK; // root node : we clear high bits (zero bits mean level == 0) |
| 105 | lenCounters[i] = 0; | 260 | if (n != b) |
| 106 | |||
| 107 | p[--e] &= MASK; | ||
| 108 | lenCounters[1] = 2; | ||
| 109 | while (e != 0) | ||
| 110 | { | ||
| 111 | UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; | ||
| 112 | p[e] = (p[e] & MASK) | (len << NUM_BITS); | ||
| 113 | if (len >= maxLen) | ||
| 114 | for (len = maxLen - 1; lenCounters[len] == 0; len--); | ||
| 115 | lenCounters[len]--; | ||
| 116 | lenCounters[(size_t)len + 1] += 2; | ||
| 117 | } | ||
| 118 | |||
| 119 | { | 261 | { |
| 120 | UInt32 len; | 262 | // We go here, if we have some number of non-created nodes up to root. |
| 121 | i = 0; | 263 | // We process them in simplified code: |
| 122 | for (len = maxLen; len != 0; len--) | 264 | // position of parent for each pair of nodes is known. |
| 265 | // n[-2], n[-1] : current pair of child nodes | ||
| 266 | // (p1) : parent node for current pair. | ||
| 267 | UInt32 *p1 = n; | ||
| 268 | do | ||
| 123 | { | 269 | { |
| 124 | UInt32 k; | 270 | const unsigned len = LOAD_PARENT(p1) + 1; |
| 125 | for (k = lenCounters[len]; k != 0; k--) | 271 | p1--; |
| 126 | lens[p[i++] & MASK] = (Byte)len; | 272 | (lenCounters )[len] -= 2; // we remove 2 leaves from level (len) |
| 273 | (lenCounters + 1)[len] += 2 * 2; // we add 4 leaves at level (len + 1) | ||
| 274 | n -= 2; | ||
| 275 | STORE_PARENT (n , n[0], len) | ||
| 276 | STORE_PARENT (n + 1, n[1], len) | ||
| 127 | } | 277 | } |
| 278 | while (n != b); | ||
| 128 | } | 279 | } |
| 129 | 280 | } | |
| 281 | |||
| 282 | if (b != p) | ||
| 283 | { | ||
| 284 | // we detect level of each node (realtive to root), | ||
| 285 | // and update lenCounters[]. | ||
| 286 | // We process only intermediate nodes and we don't process leaves. | ||
| 287 | do | ||
| 130 | { | 288 | { |
| 131 | UInt32 nextCodes[kMaxLen + 1]; | 289 | // if (ii < b) : parent_bits_of (p[ii]) == index of parent node : ii < (p[ii]) |
| 132 | { | 290 | // if (ii >= b) : parent_bits_of (p[ii]) == level of this (ii) node in tree |
| 133 | UInt32 code = 0; | 291 | unsigned len; |
| 134 | UInt32 len; | 292 | b--; |
| 135 | for (len = 1; len <= kMaxLen; len++) | 293 | len = (unsigned)LOAD_PARENT(p + LOAD_PARENT(b)) + 1; |
| 136 | nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1; | 294 | STORE_PARENT (b, *b, len) |
| 137 | } | 295 | if (len >= maxLen) |
| 138 | /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ | ||
| 139 | |||
| 140 | { | 296 | { |
| 141 | UInt32 k; | 297 | // We are not allowed to create node at level (maxLen) and higher, |
| 142 | for (k = 0; k < numSymbols; k++) | 298 | // because all leaves must be placed to level (maxLen) or lower. |
| 143 | p[k] = nextCodes[lens[k]]++; | 299 | // We find nearest allowed leaf and place current node to level of that leaf: |
| 300 | for (len = maxLen - 1; lenCounters[len] == 0; len--) {} | ||
| 144 | } | 301 | } |
| 302 | lenCounters[len]--; // we remove 1 leaf from level (len) | ||
| 303 | (lenCounters + 1)[len] += 2; // we add 2 leaves at level (len + 1) | ||
| 304 | } | ||
| 305 | while (b != p); | ||
| 306 | } | ||
| 307 | } | ||
| 308 | { | ||
| 309 | { | ||
| 310 | unsigned len = maxLen; | ||
| 311 | const UInt32 *p2 = p; | ||
| 312 | do | ||
| 313 | { | ||
| 314 | unsigned k = lenCounters[len]; | ||
| 315 | if (k) | ||
| 316 | do | ||
| 317 | lens[(unsigned)*p2++ & MASK] = (Byte)len; | ||
| 318 | while (--k); | ||
| 319 | } | ||
| 320 | while (--len); | ||
| 321 | } | ||
| 322 | codes[0] = 0; // we don't want garbage values to be written to p[] array. | ||
| 323 | // codes[1] = 0; | ||
| 324 | { | ||
| 325 | UInt32 code = 0; | ||
| 326 | unsigned len; | ||
| 327 | for (len = 0; len < kMaxLen; len++) | ||
| 328 | (codes + 1)[len] = code = (code + lenCounters[len]) << 1; | ||
| 329 | } | ||
| 330 | /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ | ||
| 331 | { | ||
| 332 | const Byte * const limit = lens + numSymbols; | ||
| 333 | do | ||
| 334 | { | ||
| 335 | unsigned len; | ||
| 336 | UInt32 c; | ||
| 337 | len = lens[0]; c = codes[len]; p[0] = c; codes[len] = c + 1; | ||
| 338 | // len = lens[1]; c = codes[len]; p[1] = c; codes[len] = c + 1; | ||
| 339 | p += 1; | ||
| 340 | lens += 1; | ||
| 145 | } | 341 | } |
| 342 | while (lens != limit); | ||
| 146 | } | 343 | } |
| 147 | } | 344 | } |
| 148 | } | 345 | } |
| @@ -150,5 +347,14 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb | |||
| 150 | #undef kMaxLen | 347 | #undef kMaxLen |
| 151 | #undef NUM_BITS | 348 | #undef NUM_BITS |
| 152 | #undef MASK | 349 | #undef MASK |
| 350 | #undef FREQ_MASK | ||
| 153 | #undef NUM_COUNTERS | 351 | #undef NUM_COUNTERS |
| 154 | #undef HUFFMAN_SPEED_OPT | 352 | #undef CTR_ITEM_FOR_FREQ |
| 353 | #undef LOAD_PARENT | ||
| 354 | #undef STORE_PARENT | ||
| 355 | #undef STORE_PARENT_DIRECT | ||
| 356 | #undef UPDATE_E | ||
| 357 | #undef HI_HALF_OFFSET | ||
| 358 | #undef NUM_UNROLLS | ||
| 359 | #undef lenCounters | ||
| 360 | #undef codes | ||
