aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2018-08-01 01:49:45 -0700
committerMark Adler <madler@alumni.caltech.edu>2018-08-01 19:51:56 -0700
commit8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e (patch)
tree4c7be828258ab03d4585ba46714c2b858563f7e4 /examples
parent4c14b51587b3bf484f7ca26a92dcfc0f8559201b (diff)
downloadzlib-8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e.tar.gz
zlib-8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e.tar.bz2
zlib-8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e.zip
Clean up code style in enough.c, update version.
Diffstat (limited to 'examples')
-rw-r--r--examples/enough.c413
1 files changed, 202 insertions, 211 deletions
diff --git a/examples/enough.c b/examples/enough.c
index d6f8e25..56ad63e 100644
--- a/examples/enough.c
+++ b/examples/enough.c
@@ -1,7 +1,7 @@
1/* enough.c -- determine the maximum size of inflate's Huffman code tables over 1/* enough.c -- determine the maximum size of inflate's Huffman code tables over
2 * all possible valid and complete Huffman codes, subject to a length limit. 2 * all possible valid and complete Huffman codes, subject to a length limit.
3 * Copyright (C) 2007, 2008, 2012 Mark Adler 3 * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler
4 * Version 1.4 18 August 2012 Mark Adler 4 * Version 1.5 1 August 2018 Mark Adler
5 */ 5 */
6 6
7/* Version history: 7/* Version history:
@@ -17,86 +17,87 @@
17 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) 17 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!)
18 Clean up comparisons of different types 18 Clean up comparisons of different types
19 Clean up code indentation 19 Clean up code indentation
20 1.5 1 Aug 2018 Clean up code style and formatting
20 */ 21 */
21 22
22/* 23/*
23 Examine all possible Huffman codes for a given number of symbols and a 24 Examine all possible Huffman codes for a given number of symbols and a
24 maximum code length in bits to determine the maximum table size for zilb's 25 maximum code length in bits to determine the maximum table size for zlib's
25 inflate. Only complete Huffman codes are counted. 26 inflate. Only complete Huffman codes are counted.
26 27
27 Two codes are considered distinct if the vectors of the number of codes per 28 Two codes are considered distinct if the vectors of the number of codes per
28 length are not identical. So permutations of the symbol assignments result 29 length are not identical. So permutations of the symbol assignments result
29 in the same code for the counting, as do permutations of the assignments of 30 in the same code for the counting, as do permutations of the assignments of
30 the bit values to the codes (i.e. only canonical codes are counted). 31 the bit values to the codes (i.e. only canonical codes are counted).
31 32
32 We build a code from shorter to longer lengths, determining how many symbols 33 We build a code from shorter to longer lengths, determining how many symbols
33 are coded at each length. At each step, we have how many symbols remain to 34 are coded at each length. At each step, we have how many symbols remain to
34 be coded, what the last code length used was, and how many bit patterns of 35 be coded, what the last code length used was, and how many bit patterns of
35 that length remain unused. Then we add one to the code length and double the 36 that length remain unused. Then we add one to the code length and double the
36 number of unused patterns to graduate to the next code length. We then 37 number of unused patterns to graduate to the next code length. We then
37 assign all portions of the remaining symbols to that code length that 38 assign all portions of the remaining symbols to that code length that
38 preserve the properties of a correct and eventually complete code. Those 39 preserve the properties of a correct and eventually complete code. Those
39 properties are: we cannot use more bit patterns than are available; and when 40 properties are: we cannot use more bit patterns than are available; and when
40 all the symbols are used, there are exactly zero possible bit patterns 41 all the symbols are used, there are exactly zero possible bit patterns
41 remaining. 42 remaining.
42 43
43 The inflate Huffman decoding algorithm uses two-level lookup tables for 44 The inflate Huffman decoding algorithm uses two-level lookup tables for
44 speed. There is a single first-level table to decode codes up to root bits 45 speed. There is a single first-level table to decode codes up to root bits
45 in length (root == 9 in the current inflate implementation). The table 46 in length (root == 9 in the current inflate implementation). The table has 1
46 has 1 << root entries and is indexed by the next root bits of input. Codes 47 << root entries and is indexed by the next root bits of input. Codes shorter
47 shorter than root bits have replicated table entries, so that the correct 48 than root bits have replicated table entries, so that the correct entry is
48 entry is pointed to regardless of the bits that follow the short code. If 49 pointed to regardless of the bits that follow the short code. If the code is
49 the code is longer than root bits, then the table entry points to a second- 50 longer than root bits, then the table entry points to a second- level table.
50 level table. The size of that table is determined by the longest code with 51 The size of that table is determined by the longest code with that root-bit
51 that root-bit prefix. If that longest code has length len, then the table 52 prefix. If that longest code has length len, then the table has size 1 <<
52 has size 1 << (len - root), to index the remaining bits in that set of 53 (len - root), to index the remaining bits in that set of codes. Each
53 codes. Each subsequent root-bit prefix then has its own sub-table. The 54 subsequent root-bit prefix then has its own sub-table. The total number of
54 total number of table entries required by the code is calculated 55 table entries required by the code is calculated incrementally as the number
55 incrementally as the number of codes at each bit length is populated. When 56 of codes at each bit length is populated. When all of the codes are shorter
56 all of the codes are shorter than root bits, then root is reduced to the 57 than root bits, then root is reduced to the longest code length, resulting
57 longest code length, resulting in a single, smaller, one-level table. 58 in a single, smaller, one-level table.
58 59
59 The inflate algorithm also provides for small values of root (relative to 60 The inflate algorithm also provides for small values of root (relative to
60 the log2 of the number of symbols), where the shortest code has more bits 61 the log2 of the number of symbols), where the shortest code has more bits
61 than root. In that case, root is increased to the length of the shortest 62 than root. In that case, root is increased to the length of the shortest
62 code. This program, by design, does not handle that case, so it is verified 63 code. This program, by design, does not handle that case, so it is verified
63 that the number of symbols is less than 2^(root + 1). 64 that the number of symbols is less than 2^(root + 1).
64 65
65 In order to speed up the examination (by about ten orders of magnitude for 66 In order to speed up the examination (by about ten orders of magnitude for
66 the default arguments), the intermediate states in the build-up of a code 67 the default arguments), the intermediate states in the build-up of a code
67 are remembered and previously visited branches are pruned. The memory 68 are remembered and previously visited branches are pruned. The memory
68 required for this will increase rapidly with the total number of symbols and 69 required for this will increase rapidly with the total number of symbols and
69 the maximum code length in bits. However this is a very small price to pay 70 the maximum code length in bits. However this is a very small price to pay
70 for the vast speedup. 71 for the vast speedup.
71 72
72 First, all of the possible Huffman codes are counted, and reachable 73 First, all of the possible Huffman codes are counted, and reachable
73 intermediate states are noted by a non-zero count in a saved-results array. 74 intermediate states are noted by a non-zero count in a saved-results array.
74 Second, the intermediate states that lead to (root + 1) bit or longer codes 75 Second, the intermediate states that lead to (root + 1) bit or longer codes
75 are used to look at all sub-codes from those junctures for their inflate 76 are used to look at all sub-codes from those junctures for their inflate
76 memory usage. (The amount of memory used is not affected by the number of 77 memory usage. (The amount of memory used is not affected by the number of
77 codes of root bits or less in length.) Third, the visited states in the 78 codes of root bits or less in length.) Third, the visited states in the
78 construction of those sub-codes and the associated calculation of the table 79 construction of those sub-codes and the associated calculation of the table
79 size is recalled in order to avoid recalculating from the same juncture. 80 size is recalled in order to avoid recalculating from the same juncture.
80 Beginning the code examination at (root + 1) bit codes, which is enabled by 81 Beginning the code examination at (root + 1) bit codes, which is enabled by
81 identifying the reachable nodes, accounts for about six of the orders of 82 identifying the reachable nodes, accounts for about six of the orders of
82 magnitude of improvement for the default arguments. About another four 83 magnitude of improvement for the default arguments. About another four
83 orders of magnitude come from not revisiting previous states. Out of 84 orders of magnitude come from not revisiting previous states. Out of
84 approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes 85 approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes
85 need to be examined to cover all of the possible table memory usage cases 86 need to be examined to cover all of the possible table memory usage cases
86 for the default arguments of 286 symbols limited to 15-bit codes. 87 for the default arguments of 286 symbols limited to 15-bit codes.
87 88
88 Note that an unsigned long long type is used for counting. It is quite easy 89 Note that an unsigned long long type is used for counting. It is quite easy
89 to exceed the capacity of an eight-byte integer with a large number of 90 to exceed the capacity of an eight-byte integer with a large number of
90 symbols and a large maximum code length, so multiple-precision arithmetic 91 symbols and a large maximum code length, so multiple-precision arithmetic
91 would need to replace the unsigned long long arithmetic in that case. This 92 would need to replace the unsigned long long arithmetic in that case. This
92 program will abort if an overflow occurs. The big_t type identifies where 93 program will abort if an overflow occurs. The big_t type identifies where
93 the counting takes place. 94 the counting takes place.
94 95
95 An unsigned long long type is also used for calculating the number of 96 An unsigned long long type is also used for calculating the number of
96 possible codes remaining at the maximum length. This limits the maximum 97 possible codes remaining at the maximum length. This limits the maximum code
97 code length to the number of bits in a long long minus the number of bits 98 length to the number of bits in a long long minus the number of bits needed
98 needed to represent the symbols in a flat code. The code_t type identifies 99 to represent the symbols in a flat code. The code_t type identifies where
99 where the bit pattern counting takes place. 100 the bit pattern counting takes place.
100 */ 101 */
101 102
102#include <stdio.h> 103#include <stdio.h>
@@ -106,13 +107,13 @@
106 107
107#define local static 108#define local static
108 109
109/* special data types */ 110// Special data types.
110typedef unsigned long long big_t; /* type for code counting */ 111typedef unsigned long long big_t; // type for code counting
111#define PRIbig "llu" /* printf format for big_t */ 112#define PRIbig "llu" // printf format for big_t
112typedef unsigned long long code_t; /* type for bit pattern counting */ 113typedef unsigned long long code_t; // type for bit pattern counting
113struct tab { /* type for been here check */ 114struct tab { // type for been here check
114 size_t len; /* length of bit vector in char's */ 115 size_t len; // length of bit vector in char's
115 char *vec; /* allocated bit vector */ 116 char *vec; // allocated bit vector
116}; 117};
117 118
118/* The array for saving results, num[], is indexed with this triplet: 119/* The array for saving results, num[], is indexed with this triplet:
@@ -127,24 +128,23 @@ struct tab { /* type for been here check */
127 left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) 128 left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6)
128 len: 1..max - 1 (max == maximum code length in bits) 129 len: 1..max - 1 (max == maximum code length in bits)
129 130
130 syms == 2 is not saved since that immediately leads to a single code. left 131 syms == 2 is not saved since that immediately leads to a single code. left
131 must be even, since it represents the number of available bit patterns at 132 must be even, since it represents the number of available bit patterns at
132 the current length, which is double the number at the previous length. 133 the current length, which is double the number at the previous length. left
133 left ends at syms-1 since left == syms immediately results in a single code. 134 ends at syms-1 since left == syms immediately results in a single code.
134 (left > sym is not allowed since that would result in an incomplete code.) 135 (left > sym is not allowed since that would result in an incomplete code.)
135 len is less than max, since the code completes immediately when len == max. 136 len is less than max, since the code completes immediately when len == max.
136 137
137 The offset into the array is calculated for the three indices with the 138 The offset into the array is calculated for the three indices with the first
138 first one (syms) being outermost, and the last one (len) being innermost. 139 one (syms) being outermost, and the last one (len) being innermost. We build
139 We build the array with length max-1 lists for the len index, with syms-3 140 the array with length max-1 lists for the len index, with syms-3 of those
140 of those for each symbol. There are totsym-2 of those, with each one 141 for each symbol. There are totsym-2 of those, with each one varying in
141 varying in length as a function of sym. See the calculation of index in 142 length as a function of sym. See the calculation of index in map() for the
142 count() for the index, and the calculation of size in main() for the size 143 index, and the calculation of size in main() for the size of the array.
143 of the array.
144 144
145 For the deflate example of 286 symbols limited to 15-bit codes, the array 145 For the deflate example of 286 symbols limited to 15-bit codes, the array
146 has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than 146 has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half
147 half of the space allocated for saved results is actually used -- not all 147 of the space allocated for saved results is actually used -- not all
148 possible triplets are reached in the generation of valid Huffman codes. 148 possible triplets are reached in the generation of valid Huffman codes.
149 */ 149 */
150 150
@@ -152,14 +152,14 @@ struct tab { /* type for been here check */
152 to the num[] array as described above for the (syms, left, len) triplet. 152 to the num[] array as described above for the (syms, left, len) triplet.
153 Each element in the array is further indexed by the (mem, rem) doublet, 153 Each element in the array is further indexed by the (mem, rem) doublet,
154 where mem is the amount of inflate table space used so far, and rem is the 154 where mem is the amount of inflate table space used so far, and rem is the
155 remaining unused entries in the current inflate sub-table. Each indexed 155 remaining unused entries in the current inflate sub-table. Each indexed
156 element is simply one bit indicating whether the state has been visited or 156 element is simply one bit indicating whether the state has been visited or
157 not. Since the ranges for mem and rem are not known a priori, each bit 157 not. Since the ranges for mem and rem are not known a priori, each bit
158 vector is of a variable size, and grows as needed to accommodate the visited 158 vector is of a variable size, and grows as needed to accommodate the visited
159 states. mem and rem are used to calculate a single index in a triangular 159 states. mem and rem are used to calculate a single index in a triangular
160 array. Since the range of mem is expected in the default case to be about 160 array. Since the range of mem is expected in the default case to be about
161 ten times larger than the range of rem, the array is skewed to reduce the 161 ten times larger than the range of rem, the array is skewed to reduce the
162 memory usage, with eight times the range for mem than for rem. See the 162 memory usage, with eight times the range for mem than for rem. See the
163 calculations for offset and bit in beenhere() for the details. 163 calculations for offset and bit in beenhere() for the details.
164 164
165 For the deflate example of 286 symbols limited to 15-bit codes, the bit 165 For the deflate example of 286 symbols limited to 15-bit codes, the bit
@@ -167,23 +167,22 @@ struct tab { /* type for been here check */
167 array itself. 167 array itself.
168 */ 168 */
169 169
170/* Globals to avoid propagating constants or constant pointers recursively */ 170// Globals to avoid propagating constants or constant pointers recursively.
171struct { 171struct {
172 int max; /* maximum allowed bit length for the codes */ 172 int max; // maximum allowed bit length for the codes
173 int root; /* size of base code table in bits */ 173 int root; // size of base code table in bits
174 int large; /* largest code table so far */ 174 int large; // largest code table so far
175 size_t size; /* number of elements in num and done */ 175 size_t size; // number of elements in num and done
176 int *code; /* number of symbols assigned to each bit length */ 176 int *code; // number of symbols assigned to each bit length
177 big_t *num; /* saved results array for code counting */ 177 big_t *num; // saved results array for code counting
178 struct tab *done; /* states already evaluated array */ 178 struct tab *done; // states already evaluated array
179} g; 179} g;
180 180
181/* Index function for num[] and done[] */ 181// Index function for num[] and done[].
182#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(g.max-1)+k-1) 182#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(g.max-1)+k-1)
183 183
184/* Free allocated space. Uses globals code, num, and done. */ 184// Free allocated space. Uses globals code, num, and done.
185local void cleanup(void) 185local void cleanup(void) {
186{
187 size_t n; 186 size_t n;
188 187
189 if (g.done != NULL) { 188 if (g.done != NULL) {
@@ -198,91 +197,89 @@ local void cleanup(void)
198 free(g.code); 197 free(g.code);
199} 198}
200 199
201/* Return the number of possible Huffman codes using bit patterns of lengths 200// Return the number of possible Huffman codes using bit patterns of lengths
202 len through max inclusive, coding syms symbols, with left bit patterns of 201// len through max inclusive, coding syms symbols, with left bit patterns of
203 length len unused -- return -1 if there is an overflow in the counting. 202// length len unused -- return -1 if there is an overflow in the counting. Keep
204 Keep a record of previous results in num to prevent repeating the same 203// a record of previous results in num to prevent repeating the same
205 calculation. Uses the globals max and num. */ 204// calculation. Uses the globals max and num.
206local big_t count(int syms, int len, int left) 205local big_t count(int syms, int len, int left) {
207{ 206 big_t sum; // number of possible codes from this juncture
208 big_t sum; /* number of possible codes from this juncture */ 207 big_t got; // value returned from count()
209 big_t got; /* value returned from count() */ 208 int least; // least number of syms to use at this juncture
210 int least; /* least number of syms to use at this juncture */ 209 int most; // most number of syms to use at this juncture
211 int most; /* most number of syms to use at this juncture */ 210 int use; // number of bit patterns to use in next call
212 int use; /* number of bit patterns to use in next call */ 211 size_t index; // index of this case in *num
213 size_t index; /* index of this case in *num */ 212
214 213 // see if only one possible code
215 /* see if only one possible code */
216 if (syms == left) 214 if (syms == left)
217 return 1; 215 return 1;
218 216
219 /* note and verify the expected state */ 217 // note and verify the expected state
220 assert(syms > left && left > 0 && len < g.max); 218 assert(syms > left && left > 0 && len < g.max);
221 219
222 /* see if we've done this one already */ 220 // see if we've done this one already
223 index = INDEX(syms, left, len); 221 index = INDEX(syms, left, len);
224 got = g.num[index]; 222 got = g.num[index];
225 if (got) 223 if (got)
226 return got; /* we have -- return the saved result */ 224 return got; // we have -- return the saved result
227 225
228 /* we need to use at least this many bit patterns so that the code won't be 226 // we need to use at least this many bit patterns so that the code won't be
229 incomplete at the next length (more bit patterns than symbols) */ 227 // incomplete at the next length (more bit patterns than symbols)
230 least = (left << 1) - syms; 228 least = (left << 1) - syms;
231 if (least < 0) 229 if (least < 0)
232 least = 0; 230 least = 0;
233 231
234 /* we can use at most this many bit patterns, lest there not be enough 232 // we can use at most this many bit patterns, lest there not be enough
235 available for the remaining symbols at the maximum length (if there were 233 // available for the remaining symbols at the maximum length (if there were
236 no limit to the code length, this would become: most = left - 1) */ 234 // no limit to the code length, this would become: most = left - 1)
237 most = (((code_t)left << (g.max - len)) - syms) / 235 most = (((code_t)left << (g.max - len)) - syms) /
238 (((code_t)1 << (g.max - len)) - 1); 236 (((code_t)1 << (g.max - len)) - 1);
239 237
240 /* count all possible codes from this juncture and add them up */ 238 // count all possible codes from this juncture and add them up
241 sum = 0; 239 sum = 0;
242 for (use = least; use <= most; use++) { 240 for (use = least; use <= most; use++) {
243 got = count(syms - use, len + 1, (left - use) << 1); 241 got = count(syms - use, len + 1, (left - use) << 1);
244 sum += got; 242 sum += got;
245 if (got == (big_t)0 - 1 || sum < got) /* overflow */ 243 if (got == (big_t)0 - 1 || sum < got) // overflow
246 return (big_t)0 - 1; 244 return (big_t)0 - 1;
247 } 245 }
248 246
249 /* verify that all recursive calls are productive */ 247 // verify that all recursive calls are productive
250 assert(sum != 0); 248 assert(sum != 0);
251 249
252 /* save the result and return it */ 250 // save the result and return it
253 g.num[index] = sum; 251 g.num[index] = sum;
254 return sum; 252 return sum;
255} 253}
256 254
257/* Return true if we've been here before, set to true if not. Set a bit in a 255// Return true if we've been here before, set to true if not. Set a bit in a
258 bit vector to indicate visiting this state. Each (syms,len,left) state 256// bit vector to indicate visiting this state. Each (syms,len,left) state has a
259 has a variable size bit vector indexed by (mem,rem). The bit vector is 257// variable size bit vector indexed by (mem,rem). The bit vector is lengthened
260 lengthened if needed to allow setting the (mem,rem) bit. */ 258// if needed to allow setting the (mem,rem) bit.
261local int beenhere(int syms, int len, int left, int mem, int rem) 259local int beenhere(int syms, int len, int left, int mem, int rem) {
262{ 260 size_t index; // index for this state's bit vector
263 size_t index; /* index for this state's bit vector */ 261 size_t offset; // offset in this state's bit vector
264 size_t offset; /* offset in this state's bit vector */ 262 int bit; // mask for this state's bit
265 int bit; /* mask for this state's bit */ 263 size_t length; // length of the bit vector in bytes
266 size_t length; /* length of the bit vector in bytes */ 264 char *vector; // new or enlarged bit vector
267 char *vector; /* new or enlarged bit vector */ 265
268 266 // point to vector for (syms,left,len), bit in vector for (mem,rem)
269 /* point to vector for (syms,left,len), bit in vector for (mem,rem) */
270 index = INDEX(syms, left, len); 267 index = INDEX(syms, left, len);
271 mem -= 1 << g.root; 268 mem -= 1 << g.root;
272 offset = (mem >> 3) + rem; 269 offset = (mem >> 3) + rem;
273 offset = ((offset * (offset + 1)) >> 1) + rem; 270 offset = ((offset * (offset + 1)) >> 1) + rem;
274 bit = 1 << (mem & 7); 271 bit = 1 << (mem & 7);
275 272
276 /* see if we've been here */ 273 // see if we've been here
277 length = g.done[index].len; 274 length = g.done[index].len;
278 if (offset < length && (g.done[index].vec[offset] & bit) != 0) 275 if (offset < length && (g.done[index].vec[offset] & bit) != 0)
279 return 1; /* done this! */ 276 return 1; // done this!
280 277
281 /* we haven't been here before -- set the bit to show we have now */ 278 // we haven't been here before -- set the bit to show we have now
282 279
283 /* see if we need to lengthen the vector in order to set the bit */ 280 // see if we need to lengthen the vector in order to set the bit
284 if (length <= offset) { 281 if (length <= offset) {
285 /* if we have one already, enlarge it, zero out the appended space */ 282 // if we have one already, enlarge it, zero out the appended space
286 if (length) { 283 if (length) {
287 do { 284 do {
288 length <<= 1; 285 length <<= 1;
@@ -293,7 +290,7 @@ local int beenhere(int syms, int len, int left, int mem, int rem)
293 length - g.done[index].len); 290 length - g.done[index].len);
294 } 291 }
295 292
296 /* otherwise we need to make a new vector and zero it out */ 293 // otherwise we need to make a new vector and zero it out
297 else { 294 else {
298 length = 1 << (len - g.root); 295 length = 1 << (len - g.root);
299 while (length <= offset) 296 while (length <= offset)
@@ -301,40 +298,39 @@ local int beenhere(int syms, int len, int left, int mem, int rem)
301 vector = calloc(length, sizeof(char)); 298 vector = calloc(length, sizeof(char));
302 } 299 }
303 300
304 /* in either case, bail if we can't get the memory */ 301 // in either case, bail if we can't get the memory
305 if (vector == NULL) { 302 if (vector == NULL) {
306 fputs("abort: unable to allocate enough memory\n", stderr); 303 fputs("abort: unable to allocate enough memory\n", stderr);
307 cleanup(); 304 cleanup();
308 exit(1); 305 exit(1);
309 } 306 }
310 307
311 /* install the new vector */ 308 // install the new vector
312 g.done[index].len = length; 309 g.done[index].len = length;
313 g.done[index].vec = vector; 310 g.done[index].vec = vector;
314 } 311 }
315 312
316 /* set the bit */ 313 // set the bit
317 g.done[index].vec[offset] |= bit; 314 g.done[index].vec[offset] |= bit;
318 return 0; 315 return 0;
319} 316}
320 317
321/* Examine all possible codes from the given node (syms, len, left). Compute 318// Examine all possible codes from the given node (syms, len, left). Compute
322 the amount of memory required to build inflate's decoding tables, where the 319// the amount of memory required to build inflate's decoding tables, where the
323 number of code structures used so far is mem, and the number remaining in 320// number of code structures used so far is mem, and the number remaining in
324 the current sub-table is rem. Uses the globals max, code, root, large, and 321// the current sub-table is rem. Uses the globals max, code, root, large, and
325 done. */ 322// done.
326local void examine(int syms, int len, int left, int mem, int rem) 323local void examine(int syms, int len, int left, int mem, int rem) {
327{ 324 int least; // least number of syms to use at this juncture
328 int least; /* least number of syms to use at this juncture */ 325 int most; // most number of syms to use at this juncture
329 int most; /* most number of syms to use at this juncture */ 326 int use; // number of bit patterns to use in next call
330 int use; /* number of bit patterns to use in next call */ 327
331 328 // see if we have a complete code
332 /* see if we have a complete code */
333 if (syms == left) { 329 if (syms == left) {
334 /* set the last code entry */ 330 // set the last code entry
335 g.code[len] = left; 331 g.code[len] = left;
336 332
337 /* complete computation of memory used by this code */ 333 // complete computation of memory used by this code
338 while (rem < left) { 334 while (rem < left) {
339 left -= rem; 335 left -= rem;
340 rem = 1 << (len - g.root); 336 rem = 1 << (len - g.root);
@@ -342,7 +338,7 @@ local void examine(int syms, int len, int left, int mem, int rem)
342 } 338 }
343 assert(rem == left); 339 assert(rem == left);
344 340
345 /* if this is a new maximum, show the entries used and the sub-code */ 341 // if this is a new maximum, show the entries used and the sub-code
346 if (mem > g.large) { 342 if (mem > g.large) {
347 g.large = mem; 343 g.large = mem;
348 printf("max %d: ", mem); 344 printf("max %d: ", mem);
@@ -353,28 +349,28 @@ local void examine(int syms, int len, int left, int mem, int rem)
353 fflush(stdout); 349 fflush(stdout);
354 } 350 }
355 351
356 /* remove entries as we drop back down in the recursion */ 352 // remove entries as we drop back down in the recursion
357 g.code[len] = 0; 353 g.code[len] = 0;
358 return; 354 return;
359 } 355 }
360 356
361 /* prune the tree if we can */ 357 // prune the tree if we can
362 if (beenhere(syms, len, left, mem, rem)) 358 if (beenhere(syms, len, left, mem, rem))
363 return; 359 return;
364 360
365 /* we need to use at least this many bit patterns so that the code won't be 361 // we need to use at least this many bit patterns so that the code won't be
366 incomplete at the next length (more bit patterns than symbols) */ 362 // incomplete at the next length (more bit patterns than symbols)
367 least = (left << 1) - syms; 363 least = (left << 1) - syms;
368 if (least < 0) 364 if (least < 0)
369 least = 0; 365 least = 0;
370 366
371 /* we can use at most this many bit patterns, lest there not be enough 367 // we can use at most this many bit patterns, lest there not be enough
372 available for the remaining symbols at the maximum length (if there were 368 // available for the remaining symbols at the maximum length (if there were
373 no limit to the code length, this would become: most = left - 1) */ 369 // no limit to the code length, this would become: most = left - 1)
374 most = (((code_t)left << (g.max - len)) - syms) / 370 most = (((code_t)left << (g.max - len)) - syms) /
375 (((code_t)1 << (g.max - len)) - 1); 371 (((code_t)1 << (g.max - len)) - 1);
376 372
377 /* occupy least table spaces, creating new sub-tables as needed */ 373 // occupy least table spaces, creating new sub-tables as needed
378 use = least; 374 use = least;
379 while (rem < use) { 375 while (rem < use) {
380 use -= rem; 376 use -= rem;
@@ -383,7 +379,7 @@ local void examine(int syms, int len, int left, int mem, int rem)
383 } 379 }
384 rem -= use; 380 rem -= use;
385 381
386 /* examine codes from here, updating table space as we go */ 382 // examine codes from here, updating table space as we go
387 for (use = least; use <= most; use++) { 383 for (use = least; use <= most; use++) {
388 g.code[len] = use; 384 g.code[len] = use;
389 examine(syms - use, len + 1, (left - use) << 1, 385 examine(syms - use, len + 1, (left - use) << 1,
@@ -395,79 +391,74 @@ local void examine(int syms, int len, int left, int mem, int rem)
395 rem--; 391 rem--;
396 } 392 }
397 393
398 /* remove entries as we drop back down in the recursion */ 394 // remove entries as we drop back down in the recursion
399 g.code[len] = 0; 395 g.code[len] = 0;
400} 396}
401 397
402/* Look at all sub-codes starting with root + 1 bits. Look at only the valid 398// Look at all sub-codes starting with root + 1 bits. Look at only the valid
403 intermediate code states (syms, left, len). For each completed code, 399// intermediate code states (syms, left, len). For each completed code,
404 calculate the amount of memory required by inflate to build the decoding 400// calculate the amount of memory required by inflate to build the decoding
405 tables. Find the maximum amount of memory required and show the code that 401// tables. Find the maximum amount of memory required and show the code that
406 requires that maximum. Uses the globals max, root, and num. */ 402// requires that maximum. Uses the globals max, root, and num.
407local void enough(int syms) 403local void enough(int syms) {
408{ 404 int n; // number of remaing symbols for this node
409 int n; /* number of remaing symbols for this node */ 405 int left; // number of unused bit patterns at this length
410 int left; /* number of unused bit patterns at this length */ 406 size_t index; // index of this case in *num
411 size_t index; /* index of this case in *num */ 407
412 408 // clear code
413 /* clear code */
414 for (n = 0; n <= g.max; n++) 409 for (n = 0; n <= g.max; n++)
415 g.code[n] = 0; 410 g.code[n] = 0;
416 411
417 /* look at all (root + 1) bit and longer codes */ 412 // look at all (root + 1) bit and longer codes
418 g.large = 1 << g.root; /* base table */ 413 g.large = 1 << g.root; // base table
419 if (g.root < g.max) /* otherwise, there's only a base table */ 414 if (g.root < g.max) // otherwise, there's only a base table
420 for (n = 3; n <= syms; n++) 415 for (n = 3; n <= syms; n++)
421 for (left = 2; left < n; left += 2) 416 for (left = 2; left < n; left += 2) {
422 { 417 // look at all reachable (root + 1) bit nodes, and the
423 /* look at all reachable (root + 1) bit nodes, and the 418 // resulting codes (complete at root + 2 or more)
424 resulting codes (complete at root + 2 or more) */
425 index = INDEX(n, left, g.root + 1); 419 index = INDEX(n, left, g.root + 1);
426 if (g.root + 1 < g.max && g.num[index]) /* reachable node */ 420 if (g.root + 1 < g.max && g.num[index]) // reachable node
427 examine(n, g.root + 1, left, 1 << g.root, 0); 421 examine(n, g.root + 1, left, 1 << g.root, 0);
428 422
429 /* also look at root bit codes with completions at root + 1 423 // also look at root bit codes with completions at root + 1
430 bits (not saved in num, since complete), just in case */ 424 // bits (not saved in num, since complete), just in case
431 if (g.num[index - 1] && n <= left << 1) 425 if (g.num[index - 1] && n <= left << 1)
432 examine((n - left) << 1, g.root + 1, (n - left) << 1, 426 examine((n - left) << 1, g.root + 1, (n - left) << 1,
433 1 << g.root, 0); 427 1 << g.root, 0);
434 } 428 }
435 429
436 /* done */ 430 // done
437 printf("done: maximum of %d table entries\n", g.large); 431 printf("done: maximum of %d table entries\n", g.large);
438} 432}
439 433
440/* 434// Examine and show the total number of possible Huffman codes for a given
441 Examine and show the total number of possible Huffman codes for a given 435// maximum number of symbols, initial root table size, and maximum code length
442 maximum number of symbols, initial root table size, and maximum code length 436// in bits -- those are the command arguments in that order. The default values
443 in bits -- those are the command arguments in that order. The default 437// are 286, 9, and 15 respectively, for the deflate literal/length code. The
444 values are 286, 9, and 15 respectively, for the deflate literal/length code. 438// possible codes are counted for each number of coded symbols from two to the
445 The possible codes are counted for each number of coded symbols from two to 439// maximum. The counts for each of those and the total number of codes are
446 the maximum. The counts for each of those and the total number of codes are 440// shown. The maximum number of inflate table entires is then calculated across
447 shown. The maximum number of inflate table entires is then calculated 441// all possible codes. Each new maximum number of table entries and the
448 across all possible codes. Each new maximum number of table entries and the 442// associated sub-code (starting at root + 1 == 10 bits) is shown.
449 associated sub-code (starting at root + 1 == 10 bits) is shown. 443//
450 444// To count and examine Huffman codes that are not length-limited, provide a
451 To count and examine Huffman codes that are not length-limited, provide a 445// maximum length equal to the number of symbols minus one.
452 maximum length equal to the number of symbols minus one. 446//
453 447// For the deflate literal/length code, use "enough". For the deflate distance
454 For the deflate literal/length code, use "enough". For the deflate distance 448// code, use "enough 30 6".
455 code, use "enough 30 6". 449int main(int argc, char **argv) {
456 */ 450 int syms; // total number of symbols to code
457int main(int argc, char **argv) 451 int n; // number of symbols to code for this run
458{ 452 big_t got; // return value of count()
459 int syms; /* total number of symbols to code */ 453 big_t sum; // accumulated number of codes over n
460 int n; /* number of symbols to code for this run */ 454 code_t word; // for counting bits in code_t
461 big_t got; /* return value of count() */ 455
462 big_t sum; /* accumulated number of codes over n */ 456 // set up globals for cleanup()
463 code_t word; /* for counting bits in code_t */
464
465 /* set up globals for cleanup() */
466 g.code = NULL; 457 g.code = NULL;
467 g.num = NULL; 458 g.num = NULL;
468 g.done = NULL; 459 g.done = NULL;
469 460
470 /* get arguments -- default to the deflate literal/length code */ 461 // get arguments -- default to the deflate literal/length code
471 syms = 286; 462 syms = 286;
472 g.root = 9; 463 g.root = 9;
473 g.max = 15; 464 g.max = 15;
@@ -485,38 +476,38 @@ int main(int argc, char **argv)
485 return 1; 476 return 1;
486 } 477 }
487 478
488 /* if not restricting the code length, the longest is syms - 1 */ 479 // if not restricting the code length, the longest is syms - 1
489 if (g.max > syms - 1) 480 if (g.max > syms - 1)
490 g.max = syms - 1; 481 g.max = syms - 1;
491 482
492 /* determine the number of bits in a code_t */ 483 // determine the number of bits in a code_t
493 for (n = 0, word = 1; word; n++, word <<= 1) 484 for (n = 0, word = 1; word; n++, word <<= 1)
494 ; 485 ;
495 486
496 /* make sure that the calculation of most will not overflow */ 487 // make sure that the calculation of most will not overflow
497 if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) { 488 if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) {
498 fputs("abort: code length too long for internal types\n", stderr); 489 fputs("abort: code length too long for internal types\n", stderr);
499 return 1; 490 return 1;
500 } 491 }
501 492
502 /* reject impossible code requests */ 493 // reject impossible code requests
503 if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { 494 if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) {
504 fprintf(stderr, "%d symbols cannot be coded in %d bits\n", 495 fprintf(stderr, "%d symbols cannot be coded in %d bits\n",
505 syms, g.max); 496 syms, g.max);
506 return 1; 497 return 1;
507 } 498 }
508 499
509 /* allocate code vector */ 500 // allocate code vector
510 g.code = calloc(g.max + 1, sizeof(int)); 501 g.code = calloc(g.max + 1, sizeof(int));
511 if (g.code == NULL) { 502 if (g.code == NULL) {
512 fputs("abort: unable to allocate enough memory\n", stderr); 503 fputs("abort: unable to allocate enough memory\n", stderr);
513 return 1; 504 return 1;
514 } 505 }
515 506
516 /* determine size of saved results array, checking for overflows, 507 // determine size of saved results array, checking for overflows,
517 allocate and clear the array (set all to zero with calloc()) */ 508 // allocate and clear the array (set all to zero with calloc())
518 if (syms == 2) /* iff max == 1 */ 509 if (syms == 2) // iff max == 1
519 g.num = NULL; /* won't be saving any results */ 510 g.num = NULL; // won't be saving any results
520 else { 511 else {
521 g.size = syms >> 1; 512 g.size = syms >> 1;
522 if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || 513 if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) ||
@@ -529,12 +520,12 @@ int main(int argc, char **argv)
529 } 520 }
530 } 521 }
531 522
532 /* count possible codes for all numbers of symbols, add up counts */ 523 // count possible codes for all numbers of symbols, add up counts
533 sum = 0; 524 sum = 0;
534 for (n = 2; n <= syms; n++) { 525 for (n = 2; n <= syms; n++) {
535 got = count(n, 1, 2); 526 got = count(n, 1, 2);
536 sum += got; 527 sum += got;
537 if (got == (big_t)0 - 1 || sum < got) { /* overflow */ 528 if (got == (big_t)0 - 1 || sum < got) { // overflow
538 fputs("abort: can't count that high!\n", stderr); 529 fputs("abort: can't count that high!\n", stderr);
539 cleanup(); 530 cleanup();
540 return 1; 531 return 1;
@@ -547,7 +538,7 @@ int main(int argc, char **argv)
547 else 538 else
548 puts(" (no length limit)"); 539 puts(" (no length limit)");
549 540
550 /* allocate and clear done array for beenhere() */ 541 // allocate and clear done array for beenhere()
551 if (syms == 2) 542 if (syms == 2)
552 g.done = NULL; 543 g.done = NULL;
553 else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) || 544 else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) ||
@@ -557,15 +548,15 @@ int main(int argc, char **argv)
557 return 1; 548 return 1;
558 } 549 }
559 550
560 /* find and show maximum inflate table usage */ 551 // find and show maximum inflate table usage
561 if (g.root > g.max) /* reduce root to max length */ 552 if (g.root > g.max) // reduce root to max length
562 g.root = g.max; 553 g.root = g.max;
563 if ((code_t)syms < ((code_t)1 << (g.root + 1))) 554 if ((code_t)syms < ((code_t)1 << (g.root + 1)))
564 enough(syms); 555 enough(syms);
565 else 556 else
566 puts("cannot handle minimum code lengths > root"); 557 puts("cannot handle minimum code lengths > root");
567 558
568 /* done */ 559 // done
569 cleanup(); 560 cleanup();
570 return 0; 561 return 0;
571} 562}