diff options
Diffstat (limited to '')
-rw-r--r-- | C/HuffEnc.c | 384 |
1 files changed, 295 insertions, 89 deletions
diff --git a/C/HuffEnc.c b/C/HuffEnc.c index 996da30..cbf8c22 100644 --- a/C/HuffEnc.c +++ b/C/HuffEnc.c | |||
@@ -1,60 +1,125 @@ | |||
1 | /* HuffEnc.c -- functions for Huffman encoding | 1 | /* HuffEnc.c -- functions for Huffman encoding |
2 | 2023-09-07 : Igor Pavlov : Public domain */ | 2 | Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include <string.h> | ||
7 | |||
6 | #include "HuffEnc.h" | 8 | #include "HuffEnc.h" |
7 | #include "Sort.h" | 9 | #include "Sort.h" |
10 | #include "CpuArch.h" | ||
8 | 11 | ||
9 | #define kMaxLen 16 | 12 | #define kMaxLen Z7_HUFFMAN_LEN_MAX |
10 | #define NUM_BITS 10 | 13 | #define NUM_BITS 10 |
11 | #define MASK ((1u << NUM_BITS) - 1) | 14 | #define MASK ((1u << NUM_BITS) - 1) |
12 | 15 | #define FREQ_MASK (~(UInt32)MASK) | |
13 | #define NUM_COUNTERS 64 | 16 | #define NUM_COUNTERS (48 * 2) |
14 | 17 | ||
15 | #define HUFFMAN_SPEED_OPT | 18 | #if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE)) |
19 | #if defined(MY_CPU_LE) | ||
20 | #define HI_HALF_OFFSET 1 | ||
21 | #else | ||
22 | #define HI_HALF_OFFSET 0 | ||
23 | #endif | ||
24 | #define LOAD_PARENT(p) ((unsigned)*((const UInt16 *)(p) + HI_HALF_OFFSET)) | ||
25 | #define STORE_PARENT(p, fb, val) *((UInt16 *)(p) + HI_HALF_OFFSET) = (UInt16)(val); | ||
26 | #define STORE_PARENT_DIRECT(p, fb, hi) STORE_PARENT(p, fb, hi) | ||
27 | #define UPDATE_E(eHi) eHi++; | ||
28 | #else | ||
29 | #define LOAD_PARENT(p) ((unsigned)(*(p) >> NUM_BITS)) | ||
30 | #define STORE_PARENT_DIRECT(p, fb, hi) *(p) = ((fb) & MASK) | (hi); // set parent field | ||
31 | #define STORE_PARENT(p, fb, val) STORE_PARENT_DIRECT(p, fb, ((UInt32)(val) << NUM_BITS)) | ||
32 | #define UPDATE_E(eHi) eHi += 1 << NUM_BITS; | ||
33 | #endif | ||
16 | 34 | ||
17 | void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen) | 35 | void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSymbols, unsigned maxLen) |
18 | { | 36 | { |
19 | UInt32 num = 0; | 37 | #if NUM_COUNTERS > 2 |
20 | /* if (maxLen > 10) maxLen = 10; */ | 38 | unsigned counters[NUM_COUNTERS]; |
39 | #endif | ||
40 | #if 1 && NUM_COUNTERS > (kMaxLen + 4) * 2 | ||
41 | #define lenCounters (counters) | ||
42 | #define codes (counters + kMaxLen + 4) | ||
43 | #else | ||
44 | unsigned lenCounters[kMaxLen + 1]; | ||
45 | UInt32 codes[kMaxLen + 1]; | ||
46 | #endif | ||
47 | |||
48 | unsigned num; | ||
21 | { | 49 | { |
22 | UInt32 i; | 50 | unsigned i; |
23 | 51 | // UInt32 sum = 0; | |
24 | #ifdef HUFFMAN_SPEED_OPT | 52 | |
53 | #if NUM_COUNTERS > 2 | ||
25 | 54 | ||
26 | UInt32 counters[NUM_COUNTERS]; | 55 | #define CTR_ITEM_FOR_FREQ(freq) \ |
56 | counters[(freq) >= NUM_COUNTERS - 1 ? NUM_COUNTERS - 1 : (unsigned)(freq)] | ||
57 | |||
27 | for (i = 0; i < NUM_COUNTERS; i++) | 58 | for (i = 0; i < NUM_COUNTERS; i++) |
28 | counters[i] = 0; | 59 | counters[i] = 0; |
29 | for (i = 0; i < numSymbols; i++) | 60 | memset(lens, 0, numSymbols); |
30 | { | 61 | { |
31 | UInt32 freq = freqs[i]; | 62 | const UInt32 *fp = freqs + numSymbols; |
32 | counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++; | 63 | #define NUM_UNROLLS 1 |
64 | #if NUM_UNROLLS > 1 // use 1 if odd (numSymbols) is possisble | ||
65 | if (numSymbols & 1) | ||
66 | { | ||
67 | UInt32 f; | ||
68 | f = *--fp; CTR_ITEM_FOR_FREQ(f)++; | ||
69 | // sum += f; | ||
70 | } | ||
71 | #endif | ||
72 | do | ||
73 | { | ||
74 | UInt32 f; | ||
75 | fp -= NUM_UNROLLS; | ||
76 | f = fp[0]; CTR_ITEM_FOR_FREQ(f)++; | ||
77 | // sum += f; | ||
78 | #if NUM_UNROLLS > 1 | ||
79 | f = fp[1]; CTR_ITEM_FOR_FREQ(f)++; | ||
80 | // sum += f; | ||
81 | #endif | ||
82 | } | ||
83 | while (fp != freqs); | ||
33 | } | 84 | } |
34 | 85 | #if 0 | |
35 | for (i = 1; i < NUM_COUNTERS; i++) | 86 | printf("\nsum=%8u numSymbols =%3u ctrs:", sum, numSymbols); |
36 | { | 87 | { |
37 | UInt32 temp = counters[i]; | 88 | unsigned k = 0; |
38 | counters[i] = num; | 89 | for (k = 0; k < NUM_COUNTERS; k++) |
39 | num += temp; | 90 | printf(" %u", counters[k]); |
40 | } | 91 | } |
41 | 92 | #endif | |
42 | for (i = 0; i < numSymbols; i++) | 93 | |
94 | num = counters[1]; | ||
95 | counters[1] = 0; | ||
96 | for (i = 2; i != NUM_COUNTERS; i += 2) | ||
43 | { | 97 | { |
44 | UInt32 freq = freqs[i]; | 98 | unsigned c; |
45 | if (freq == 0) | 99 | c = (counters )[i]; (counters )[i] = num; num += c; |
46 | lens[i] = 0; | 100 | c = (counters + 1)[i]; (counters + 1)[i] = num; num += c; |
47 | else | 101 | } |
48 | p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS); | 102 | counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array |
103 | { | ||
104 | i = 0; | ||
105 | do | ||
106 | { | ||
107 | const UInt32 f = freqs[i]; | ||
108 | #if 0 | ||
109 | if (f == 0) lens[i] = 0; else | ||
110 | #endif | ||
111 | p[CTR_ITEM_FOR_FREQ(f)++] = i | (f << NUM_BITS); | ||
112 | } | ||
113 | while (++i != numSymbols); | ||
49 | } | 114 | } |
50 | counters[0] = 0; | ||
51 | HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); | 115 | HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); |
52 | 116 | ||
53 | #else | 117 | #else // NUM_COUNTERS <= 2 |
54 | 118 | ||
119 | num = 0; | ||
55 | for (i = 0; i < numSymbols; i++) | 120 | for (i = 0; i < numSymbols; i++) |
56 | { | 121 | { |
57 | UInt32 freq = freqs[i]; | 122 | const UInt32 freq = freqs[i]; |
58 | if (freq == 0) | 123 | if (freq == 0) |
59 | lens[i] = 0; | 124 | lens[i] = 0; |
60 | else | 125 | else |
@@ -62,17 +127,27 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb | |||
62 | } | 127 | } |
63 | HeapSort(p, num); | 128 | HeapSort(p, num); |
64 | 129 | ||
65 | #endif | 130 | #endif |
66 | } | 131 | } |
67 | 132 | ||
68 | if (num < 2) | 133 | if (num <= 2) |
69 | { | 134 | { |
70 | unsigned minCode = 0; | 135 | unsigned minCode = 0; |
71 | unsigned maxCode = 1; | 136 | unsigned maxCode = 1; |
72 | if (num == 1) | 137 | if (num) |
73 | { | 138 | { |
74 | maxCode = (unsigned)p[0] & MASK; | 139 | maxCode = (unsigned)p[(size_t)num - 1] & MASK; |
75 | if (maxCode == 0) | 140 | if (num == 2) |
141 | { | ||
142 | minCode = (unsigned)p[0] & MASK; | ||
143 | if (minCode > maxCode) | ||
144 | { | ||
145 | const unsigned temp = minCode; | ||
146 | minCode = maxCode; | ||
147 | maxCode = temp; | ||
148 | } | ||
149 | } | ||
150 | else if (maxCode == 0) | ||
76 | maxCode++; | 151 | maxCode++; |
77 | } | 152 | } |
78 | p[minCode] = 0; | 153 | p[minCode] = 0; |
@@ -80,69 +155,191 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb | |||
80 | lens[minCode] = lens[maxCode] = 1; | 155 | lens[minCode] = lens[maxCode] = 1; |
81 | return; | 156 | return; |
82 | } | 157 | } |
83 | |||
84 | { | 158 | { |
85 | UInt32 b, e, i; | 159 | unsigned i; |
86 | 160 | for (i = 0; i <= kMaxLen; i++) | |
87 | i = b = e = 0; | 161 | lenCounters[i] = 0; |
88 | do | 162 | lenCounters[1] = 2; // by default root node has 2 child leaves at level 1. |
163 | } | ||
164 | // if (num != 2) | ||
165 | { | ||
166 | // num > 2 | ||
167 | // the binary tree will contain (num - 1) internal nodes. | ||
168 | // p[num - 2] will be root node of binary tree. | ||
169 | UInt32 *b; | ||
170 | UInt32 *n; | ||
171 | // first node will have two leaf childs: p[0] and p[1]: | ||
172 | // p[0] += p[1] & FREQ_MASK; // set frequency sum of child leafs | ||
173 | // if (pi == n) exit(0); | ||
174 | // if (pi != n) | ||
89 | { | 175 | { |
90 | UInt32 n, m, freq; | 176 | UInt32 fb = (p[1] & FREQ_MASK) + p[0]; |
91 | n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; | 177 | UInt32 f = p[2] & FREQ_MASK; |
92 | freq = (p[n] & ~MASK); | 178 | const UInt32 *pi = p + 2; |
93 | p[n] = (p[n] & MASK) | (e << NUM_BITS); | 179 | UInt32 *e = p; |
94 | m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; | 180 | UInt32 eHi = 0; |
95 | freq += (p[m] & ~MASK); | 181 | n = p + num; |
96 | p[m] = (p[m] & MASK) | (e << NUM_BITS); | 182 | b = p; |
97 | p[e] = (p[e] & MASK) | freq; | 183 | // p[0] = fb; |
98 | e++; | 184 | for (;;) |
185 | { | ||
186 | // (b <= e) | ||
187 | UInt32 sum; | ||
188 | e++; | ||
189 | UPDATE_E(eHi) | ||
190 | |||
191 | // (b < e) | ||
192 | |||
193 | // p range : high bits | ||
194 | // [0, b) : parent : processed nodes that have parent and childs | ||
195 | // [b, e) : FREQ : non-processed nodes that have no parent but have childs | ||
196 | // [e, pi) : FREQ : processed leaves for which parent node was created | ||
197 | // [pi, n) : FREQ : non-processed leaves for which parent node was not created | ||
198 | |||
199 | // first child | ||
200 | // note : (*b < f) is same result as ((*b & FREQ_MASK) < f) | ||
201 | if (fb < f) | ||
202 | { | ||
203 | // node freq is smaller | ||
204 | sum = fb & FREQ_MASK; | ||
205 | STORE_PARENT_DIRECT (b, fb, eHi) | ||
206 | b++; | ||
207 | fb = *b; | ||
208 | if (b == e) | ||
209 | { | ||
210 | if (++pi == n) | ||
211 | break; | ||
212 | sum += f; | ||
213 | fb &= MASK; | ||
214 | fb |= sum; | ||
215 | *e = fb; | ||
216 | f = *pi & FREQ_MASK; | ||
217 | continue; | ||
218 | } | ||
219 | } | ||
220 | else if (++pi == n) | ||
221 | { | ||
222 | STORE_PARENT_DIRECT (b, fb, eHi) | ||
223 | b++; | ||
224 | break; | ||
225 | } | ||
226 | else | ||
227 | { | ||
228 | sum = f; | ||
229 | f = *pi & FREQ_MASK; | ||
230 | } | ||
231 | |||
232 | // (b < e) | ||
233 | |||
234 | // second child | ||
235 | if (fb < f) | ||
236 | { | ||
237 | sum += fb; | ||
238 | sum &= FREQ_MASK; | ||
239 | STORE_PARENT_DIRECT (b, fb, eHi) | ||
240 | b++; | ||
241 | *e = (*e & MASK) | sum; // set frequency sum | ||
242 | // (b <= e) is possible here | ||
243 | fb = *b; | ||
244 | } | ||
245 | else if (++pi == n) | ||
246 | break; | ||
247 | else | ||
248 | { | ||
249 | sum += f; | ||
250 | f = *pi & FREQ_MASK; | ||
251 | *e = (*e & MASK) | sum; // set frequency sum | ||
252 | } | ||
253 | } | ||
99 | } | 254 | } |
100 | while (num - e > 1); | ||
101 | 255 | ||
256 | // printf("\nnum-e=%3u, numSymbols=%3u, num=%3u, b=%3u", n - e, numSymbols, n - p, b - p); | ||
102 | { | 257 | { |
103 | UInt32 lenCounters[kMaxLen + 1]; | 258 | n -= 2; |
104 | for (i = 0; i <= kMaxLen; i++) | 259 | *n &= MASK; // root node : we clear high bits (zero bits mean level == 0) |
105 | lenCounters[i] = 0; | 260 | if (n != b) |
106 | |||
107 | p[--e] &= MASK; | ||
108 | lenCounters[1] = 2; | ||
109 | while (e != 0) | ||
110 | { | ||
111 | UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; | ||
112 | p[e] = (p[e] & MASK) | (len << NUM_BITS); | ||
113 | if (len >= maxLen) | ||
114 | for (len = maxLen - 1; lenCounters[len] == 0; len--); | ||
115 | lenCounters[len]--; | ||
116 | lenCounters[(size_t)len + 1] += 2; | ||
117 | } | ||
118 | |||
119 | { | 261 | { |
120 | UInt32 len; | 262 | // We go here, if we have some number of non-created nodes up to root. |
121 | i = 0; | 263 | // We process them in simplified code: |
122 | for (len = maxLen; len != 0; len--) | 264 | // position of parent for each pair of nodes is known. |
265 | // n[-2], n[-1] : current pair of child nodes | ||
266 | // (p1) : parent node for current pair. | ||
267 | UInt32 *p1 = n; | ||
268 | do | ||
123 | { | 269 | { |
124 | UInt32 k; | 270 | const unsigned len = LOAD_PARENT(p1) + 1; |
125 | for (k = lenCounters[len]; k != 0; k--) | 271 | p1--; |
126 | lens[p[i++] & MASK] = (Byte)len; | 272 | (lenCounters )[len] -= 2; // we remove 2 leaves from level (len) |
273 | (lenCounters + 1)[len] += 2 * 2; // we add 4 leaves at level (len + 1) | ||
274 | n -= 2; | ||
275 | STORE_PARENT (n , n[0], len) | ||
276 | STORE_PARENT (n + 1, n[1], len) | ||
127 | } | 277 | } |
278 | while (n != b); | ||
128 | } | 279 | } |
129 | 280 | } | |
281 | |||
282 | if (b != p) | ||
283 | { | ||
284 | // we detect level of each node (realtive to root), | ||
285 | // and update lenCounters[]. | ||
286 | // We process only intermediate nodes and we don't process leaves. | ||
287 | do | ||
130 | { | 288 | { |
131 | UInt32 nextCodes[kMaxLen + 1]; | 289 | // if (ii < b) : parent_bits_of (p[ii]) == index of parent node : ii < (p[ii]) |
132 | { | 290 | // if (ii >= b) : parent_bits_of (p[ii]) == level of this (ii) node in tree |
133 | UInt32 code = 0; | 291 | unsigned len; |
134 | UInt32 len; | 292 | b--; |
135 | for (len = 1; len <= kMaxLen; len++) | 293 | len = (unsigned)LOAD_PARENT(p + LOAD_PARENT(b)) + 1; |
136 | nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1; | 294 | STORE_PARENT (b, *b, len) |
137 | } | 295 | if (len >= maxLen) |
138 | /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ | ||
139 | |||
140 | { | 296 | { |
141 | UInt32 k; | 297 | // We are not allowed to create node at level (maxLen) and higher, |
142 | for (k = 0; k < numSymbols; k++) | 298 | // because all leaves must be placed to level (maxLen) or lower. |
143 | p[k] = nextCodes[lens[k]]++; | 299 | // We find nearest allowed leaf and place current node to level of that leaf: |
300 | for (len = maxLen - 1; lenCounters[len] == 0; len--) {} | ||
144 | } | 301 | } |
302 | lenCounters[len]--; // we remove 1 leaf from level (len) | ||
303 | (lenCounters + 1)[len] += 2; // we add 2 leaves at level (len + 1) | ||
304 | } | ||
305 | while (b != p); | ||
306 | } | ||
307 | } | ||
308 | { | ||
309 | { | ||
310 | unsigned len = maxLen; | ||
311 | const UInt32 *p2 = p; | ||
312 | do | ||
313 | { | ||
314 | unsigned k = lenCounters[len]; | ||
315 | if (k) | ||
316 | do | ||
317 | lens[(unsigned)*p2++ & MASK] = (Byte)len; | ||
318 | while (--k); | ||
319 | } | ||
320 | while (--len); | ||
321 | } | ||
322 | codes[0] = 0; // we don't want garbage values to be written to p[] array. | ||
323 | // codes[1] = 0; | ||
324 | { | ||
325 | UInt32 code = 0; | ||
326 | unsigned len; | ||
327 | for (len = 0; len < kMaxLen; len++) | ||
328 | (codes + 1)[len] = code = (code + lenCounters[len]) << 1; | ||
329 | } | ||
330 | /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ | ||
331 | { | ||
332 | const Byte * const limit = lens + numSymbols; | ||
333 | do | ||
334 | { | ||
335 | unsigned len; | ||
336 | UInt32 c; | ||
337 | len = lens[0]; c = codes[len]; p[0] = c; codes[len] = c + 1; | ||
338 | // len = lens[1]; c = codes[len]; p[1] = c; codes[len] = c + 1; | ||
339 | p += 1; | ||
340 | lens += 1; | ||
145 | } | 341 | } |
342 | while (lens != limit); | ||
146 | } | 343 | } |
147 | } | 344 | } |
148 | } | 345 | } |
@@ -150,5 +347,14 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb | |||
150 | #undef kMaxLen | 347 | #undef kMaxLen |
151 | #undef NUM_BITS | 348 | #undef NUM_BITS |
152 | #undef MASK | 349 | #undef MASK |
350 | #undef FREQ_MASK | ||
153 | #undef NUM_COUNTERS | 351 | #undef NUM_COUNTERS |
154 | #undef HUFFMAN_SPEED_OPT | 352 | #undef CTR_ITEM_FOR_FREQ |
353 | #undef LOAD_PARENT | ||
354 | #undef STORE_PARENT | ||
355 | #undef STORE_PARENT_DIRECT | ||
356 | #undef UPDATE_E | ||
357 | #undef HI_HALF_OFFSET | ||
358 | #undef NUM_UNROLLS | ||
359 | #undef lenCounters | ||
360 | #undef codes | ||