aboutsummaryrefslogtreecommitdiff
path: root/C/HuffEnc.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--C/HuffEnc.c384
1 files changed, 295 insertions, 89 deletions
diff --git a/C/HuffEnc.c b/C/HuffEnc.c
index 996da30..cbf8c22 100644
--- a/C/HuffEnc.c
+++ b/C/HuffEnc.c
@@ -1,60 +1,125 @@
1/* HuffEnc.c -- functions for Huffman encoding 1/* HuffEnc.c -- functions for Huffman encoding
22023-09-07 : Igor Pavlov : Public domain */ 2Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6#include <string.h>
7
6#include "HuffEnc.h" 8#include "HuffEnc.h"
7#include "Sort.h" 9#include "Sort.h"
10#include "CpuArch.h"
8 11
9#define kMaxLen 16 12#define kMaxLen Z7_HUFFMAN_LEN_MAX
10#define NUM_BITS 10 13#define NUM_BITS 10
11#define MASK ((1u << NUM_BITS) - 1) 14#define MASK ((1u << NUM_BITS) - 1)
12 15#define FREQ_MASK (~(UInt32)MASK)
13#define NUM_COUNTERS 64 16#define NUM_COUNTERS (48 * 2)
14 17
15#define HUFFMAN_SPEED_OPT 18#if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE))
19#if defined(MY_CPU_LE)
20 #define HI_HALF_OFFSET 1
21#else
22 #define HI_HALF_OFFSET 0
23#endif
24#define LOAD_PARENT(p) ((unsigned)*((const UInt16 *)(p) + HI_HALF_OFFSET))
25#define STORE_PARENT(p, fb, val) *((UInt16 *)(p) + HI_HALF_OFFSET) = (UInt16)(val);
26#define STORE_PARENT_DIRECT(p, fb, hi) STORE_PARENT(p, fb, hi)
27#define UPDATE_E(eHi) eHi++;
28#else
29#define LOAD_PARENT(p) ((unsigned)(*(p) >> NUM_BITS))
30#define STORE_PARENT_DIRECT(p, fb, hi) *(p) = ((fb) & MASK) | (hi); // set parent field
31#define STORE_PARENT(p, fb, val) STORE_PARENT_DIRECT(p, fb, ((UInt32)(val) << NUM_BITS))
32#define UPDATE_E(eHi) eHi += 1 << NUM_BITS;
33#endif
16 34
17void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen) 35void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSymbols, unsigned maxLen)
18{ 36{
19 UInt32 num = 0; 37#if NUM_COUNTERS > 2
20 /* if (maxLen > 10) maxLen = 10; */ 38 unsigned counters[NUM_COUNTERS];
39#endif
40#if 1 && NUM_COUNTERS > (kMaxLen + 4) * 2
41 #define lenCounters (counters)
42 #define codes (counters + kMaxLen + 4)
43#else
44 unsigned lenCounters[kMaxLen + 1];
45 UInt32 codes[kMaxLen + 1];
46#endif
47
48 unsigned num;
21 { 49 {
22 UInt32 i; 50 unsigned i;
23 51 // UInt32 sum = 0;
24 #ifdef HUFFMAN_SPEED_OPT 52
53#if NUM_COUNTERS > 2
25 54
26 UInt32 counters[NUM_COUNTERS]; 55#define CTR_ITEM_FOR_FREQ(freq) \
56 counters[(freq) >= NUM_COUNTERS - 1 ? NUM_COUNTERS - 1 : (unsigned)(freq)]
57
27 for (i = 0; i < NUM_COUNTERS; i++) 58 for (i = 0; i < NUM_COUNTERS; i++)
28 counters[i] = 0; 59 counters[i] = 0;
29 for (i = 0; i < numSymbols; i++) 60 memset(lens, 0, numSymbols);
30 { 61 {
31 UInt32 freq = freqs[i]; 62 const UInt32 *fp = freqs + numSymbols;
32 counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++; 63#define NUM_UNROLLS 1
64#if NUM_UNROLLS > 1 // use 1 if odd (numSymbols) is possisble
65 if (numSymbols & 1)
66 {
67 UInt32 f;
68 f = *--fp; CTR_ITEM_FOR_FREQ(f)++;
69 // sum += f;
70 }
71#endif
72 do
73 {
74 UInt32 f;
75 fp -= NUM_UNROLLS;
76 f = fp[0]; CTR_ITEM_FOR_FREQ(f)++;
77 // sum += f;
78#if NUM_UNROLLS > 1
79 f = fp[1]; CTR_ITEM_FOR_FREQ(f)++;
80 // sum += f;
81#endif
82 }
83 while (fp != freqs);
33 } 84 }
34 85#if 0
35 for (i = 1; i < NUM_COUNTERS; i++) 86 printf("\nsum=%8u numSymbols =%3u ctrs:", sum, numSymbols);
36 { 87 {
37 UInt32 temp = counters[i]; 88 unsigned k = 0;
38 counters[i] = num; 89 for (k = 0; k < NUM_COUNTERS; k++)
39 num += temp; 90 printf(" %u", counters[k]);
40 } 91 }
41 92#endif
42 for (i = 0; i < numSymbols; i++) 93
94 num = counters[1];
95 counters[1] = 0;
96 for (i = 2; i != NUM_COUNTERS; i += 2)
43 { 97 {
44 UInt32 freq = freqs[i]; 98 unsigned c;
45 if (freq == 0) 99 c = (counters )[i]; (counters )[i] = num; num += c;
46 lens[i] = 0; 100 c = (counters + 1)[i]; (counters + 1)[i] = num; num += c;
47 else 101 }
48 p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS); 102 counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array
103 {
104 i = 0;
105 do
106 {
107 const UInt32 f = freqs[i];
108#if 0
109 if (f == 0) lens[i] = 0; else
110#endif
111 p[CTR_ITEM_FOR_FREQ(f)++] = i | (f << NUM_BITS);
112 }
113 while (++i != numSymbols);
49 } 114 }
50 counters[0] = 0;
51 HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); 115 HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]);
52 116
53 #else 117#else // NUM_COUNTERS <= 2
54 118
119 num = 0;
55 for (i = 0; i < numSymbols; i++) 120 for (i = 0; i < numSymbols; i++)
56 { 121 {
57 UInt32 freq = freqs[i]; 122 const UInt32 freq = freqs[i];
58 if (freq == 0) 123 if (freq == 0)
59 lens[i] = 0; 124 lens[i] = 0;
60 else 125 else
@@ -62,17 +127,27 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
62 } 127 }
63 HeapSort(p, num); 128 HeapSort(p, num);
64 129
65 #endif 130#endif
66 } 131 }
67 132
68 if (num < 2) 133 if (num <= 2)
69 { 134 {
70 unsigned minCode = 0; 135 unsigned minCode = 0;
71 unsigned maxCode = 1; 136 unsigned maxCode = 1;
72 if (num == 1) 137 if (num)
73 { 138 {
74 maxCode = (unsigned)p[0] & MASK; 139 maxCode = (unsigned)p[(size_t)num - 1] & MASK;
75 if (maxCode == 0) 140 if (num == 2)
141 {
142 minCode = (unsigned)p[0] & MASK;
143 if (minCode > maxCode)
144 {
145 const unsigned temp = minCode;
146 minCode = maxCode;
147 maxCode = temp;
148 }
149 }
150 else if (maxCode == 0)
76 maxCode++; 151 maxCode++;
77 } 152 }
78 p[minCode] = 0; 153 p[minCode] = 0;
@@ -80,69 +155,191 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
80 lens[minCode] = lens[maxCode] = 1; 155 lens[minCode] = lens[maxCode] = 1;
81 return; 156 return;
82 } 157 }
83
84 { 158 {
85 UInt32 b, e, i; 159 unsigned i;
86 160 for (i = 0; i <= kMaxLen; i++)
87 i = b = e = 0; 161 lenCounters[i] = 0;
88 do 162 lenCounters[1] = 2; // by default root node has 2 child leaves at level 1.
163 }
164 // if (num != 2)
165 {
166 // num > 2
167 // the binary tree will contain (num - 1) internal nodes.
168 // p[num - 2] will be root node of binary tree.
169 UInt32 *b;
170 UInt32 *n;
171 // first node will have two leaf childs: p[0] and p[1]:
172 // p[0] += p[1] & FREQ_MASK; // set frequency sum of child leafs
173 // if (pi == n) exit(0);
174 // if (pi != n)
89 { 175 {
90 UInt32 n, m, freq; 176 UInt32 fb = (p[1] & FREQ_MASK) + p[0];
91 n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; 177 UInt32 f = p[2] & FREQ_MASK;
92 freq = (p[n] & ~MASK); 178 const UInt32 *pi = p + 2;
93 p[n] = (p[n] & MASK) | (e << NUM_BITS); 179 UInt32 *e = p;
94 m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; 180 UInt32 eHi = 0;
95 freq += (p[m] & ~MASK); 181 n = p + num;
96 p[m] = (p[m] & MASK) | (e << NUM_BITS); 182 b = p;
97 p[e] = (p[e] & MASK) | freq; 183 // p[0] = fb;
98 e++; 184 for (;;)
185 {
186 // (b <= e)
187 UInt32 sum;
188 e++;
189 UPDATE_E(eHi)
190
191 // (b < e)
192
193 // p range : high bits
194 // [0, b) : parent : processed nodes that have parent and childs
195 // [b, e) : FREQ : non-processed nodes that have no parent but have childs
196 // [e, pi) : FREQ : processed leaves for which parent node was created
197 // [pi, n) : FREQ : non-processed leaves for which parent node was not created
198
199 // first child
200 // note : (*b < f) is same result as ((*b & FREQ_MASK) < f)
201 if (fb < f)
202 {
203 // node freq is smaller
204 sum = fb & FREQ_MASK;
205 STORE_PARENT_DIRECT (b, fb, eHi)
206 b++;
207 fb = *b;
208 if (b == e)
209 {
210 if (++pi == n)
211 break;
212 sum += f;
213 fb &= MASK;
214 fb |= sum;
215 *e = fb;
216 f = *pi & FREQ_MASK;
217 continue;
218 }
219 }
220 else if (++pi == n)
221 {
222 STORE_PARENT_DIRECT (b, fb, eHi)
223 b++;
224 break;
225 }
226 else
227 {
228 sum = f;
229 f = *pi & FREQ_MASK;
230 }
231
232 // (b < e)
233
234 // second child
235 if (fb < f)
236 {
237 sum += fb;
238 sum &= FREQ_MASK;
239 STORE_PARENT_DIRECT (b, fb, eHi)
240 b++;
241 *e = (*e & MASK) | sum; // set frequency sum
242 // (b <= e) is possible here
243 fb = *b;
244 }
245 else if (++pi == n)
246 break;
247 else
248 {
249 sum += f;
250 f = *pi & FREQ_MASK;
251 *e = (*e & MASK) | sum; // set frequency sum
252 }
253 }
99 } 254 }
100 while (num - e > 1);
101 255
256 // printf("\nnum-e=%3u, numSymbols=%3u, num=%3u, b=%3u", n - e, numSymbols, n - p, b - p);
102 { 257 {
103 UInt32 lenCounters[kMaxLen + 1]; 258 n -= 2;
104 for (i = 0; i <= kMaxLen; i++) 259 *n &= MASK; // root node : we clear high bits (zero bits mean level == 0)
105 lenCounters[i] = 0; 260 if (n != b)
106
107 p[--e] &= MASK;
108 lenCounters[1] = 2;
109 while (e != 0)
110 {
111 UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
112 p[e] = (p[e] & MASK) | (len << NUM_BITS);
113 if (len >= maxLen)
114 for (len = maxLen - 1; lenCounters[len] == 0; len--);
115 lenCounters[len]--;
116 lenCounters[(size_t)len + 1] += 2;
117 }
118
119 { 261 {
120 UInt32 len; 262 // We go here, if we have some number of non-created nodes up to root.
121 i = 0; 263 // We process them in simplified code:
122 for (len = maxLen; len != 0; len--) 264 // position of parent for each pair of nodes is known.
265 // n[-2], n[-1] : current pair of child nodes
266 // (p1) : parent node for current pair.
267 UInt32 *p1 = n;
268 do
123 { 269 {
124 UInt32 k; 270 const unsigned len = LOAD_PARENT(p1) + 1;
125 for (k = lenCounters[len]; k != 0; k--) 271 p1--;
126 lens[p[i++] & MASK] = (Byte)len; 272 (lenCounters )[len] -= 2; // we remove 2 leaves from level (len)
273 (lenCounters + 1)[len] += 2 * 2; // we add 4 leaves at level (len + 1)
274 n -= 2;
275 STORE_PARENT (n , n[0], len)
276 STORE_PARENT (n + 1, n[1], len)
127 } 277 }
278 while (n != b);
128 } 279 }
129 280 }
281
282 if (b != p)
283 {
284 // we detect level of each node (realtive to root),
285 // and update lenCounters[].
286 // We process only intermediate nodes and we don't process leaves.
287 do
130 { 288 {
131 UInt32 nextCodes[kMaxLen + 1]; 289 // if (ii < b) : parent_bits_of (p[ii]) == index of parent node : ii < (p[ii])
132 { 290 // if (ii >= b) : parent_bits_of (p[ii]) == level of this (ii) node in tree
133 UInt32 code = 0; 291 unsigned len;
134 UInt32 len; 292 b--;
135 for (len = 1; len <= kMaxLen; len++) 293 len = (unsigned)LOAD_PARENT(p + LOAD_PARENT(b)) + 1;
136 nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1; 294 STORE_PARENT (b, *b, len)
137 } 295 if (len >= maxLen)
138 /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
139
140 { 296 {
141 UInt32 k; 297 // We are not allowed to create node at level (maxLen) and higher,
142 for (k = 0; k < numSymbols; k++) 298 // because all leaves must be placed to level (maxLen) or lower.
143 p[k] = nextCodes[lens[k]]++; 299 // We find nearest allowed leaf and place current node to level of that leaf:
300 for (len = maxLen - 1; lenCounters[len] == 0; len--) {}
144 } 301 }
302 lenCounters[len]--; // we remove 1 leaf from level (len)
303 (lenCounters + 1)[len] += 2; // we add 2 leaves at level (len + 1)
304 }
305 while (b != p);
306 }
307 }
308 {
309 {
310 unsigned len = maxLen;
311 const UInt32 *p2 = p;
312 do
313 {
314 unsigned k = lenCounters[len];
315 if (k)
316 do
317 lens[(unsigned)*p2++ & MASK] = (Byte)len;
318 while (--k);
319 }
320 while (--len);
321 }
322 codes[0] = 0; // we don't want garbage values to be written to p[] array.
323 // codes[1] = 0;
324 {
325 UInt32 code = 0;
326 unsigned len;
327 for (len = 0; len < kMaxLen; len++)
328 (codes + 1)[len] = code = (code + lenCounters[len]) << 1;
329 }
330 /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
331 {
332 const Byte * const limit = lens + numSymbols;
333 do
334 {
335 unsigned len;
336 UInt32 c;
337 len = lens[0]; c = codes[len]; p[0] = c; codes[len] = c + 1;
338 // len = lens[1]; c = codes[len]; p[1] = c; codes[len] = c + 1;
339 p += 1;
340 lens += 1;
145 } 341 }
342 while (lens != limit);
146 } 343 }
147 } 344 }
148} 345}
@@ -150,5 +347,14 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
150#undef kMaxLen 347#undef kMaxLen
151#undef NUM_BITS 348#undef NUM_BITS
152#undef MASK 349#undef MASK
350#undef FREQ_MASK
153#undef NUM_COUNTERS 351#undef NUM_COUNTERS
154#undef HUFFMAN_SPEED_OPT 352#undef CTR_ITEM_FOR_FREQ
353#undef LOAD_PARENT
354#undef STORE_PARENT
355#undef STORE_PARENT_DIRECT
356#undef UPDATE_E
357#undef HI_HALF_OFFSET
358#undef NUM_UNROLLS
359#undef lenCounters
360#undef codes