diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:26:40 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:26:40 -0700 |
commit | f6194ef39af5864f792412460c354cc339dde7d1 (patch) | |
tree | 5ea1e6849128e9b2194c66ee3d82afa36b4ac07c /examples/enough.c | |
parent | 639be997883d9016baaf46017a2802b2ce1698bd (diff) | |
download | zlib-1.2.3.4.tar.gz zlib-1.2.3.4.tar.bz2 zlib-1.2.3.4.zip |
zlib 1.2.3.4v1.2.3.4
Diffstat (limited to 'examples/enough.c')
-rw-r--r-- | examples/enough.c | 569 |
1 files changed, 569 insertions, 0 deletions
diff --git a/examples/enough.c b/examples/enough.c new file mode 100644 index 0000000..b570707 --- /dev/null +++ b/examples/enough.c | |||
@@ -0,0 +1,569 @@ | |||
1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over | ||
2 | * all possible valid and complete Huffman codes, subject to a length limit. | ||
3 | * Copyright (C) 2007, 2008 Mark Adler | ||
4 | * Version 1.3 17 February 2008 Mark Adler | ||
5 | */ | ||
6 | |||
7 | /* Version history: | ||
8 | 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) | ||
9 | 1.1 4 Jan 2007 Use faster incremental table usage computation | ||
10 | Prune examine() search on previously visited states | ||
11 | 1.2 5 Jan 2007 Comments clean up | ||
12 | As inflate does, decrease root for short codes | ||
13 | Refuse cases where inflate would increase root | ||
14 | 1.3 17 Feb 2008 Add argument for initial root table size | ||
15 | Fix bug for initial root table size == max - 1 | ||
16 | Use a macro to compute the history index | ||
17 | */ | ||
18 | |||
19 | /* | ||
20 | Examine all possible Huffman codes for a given number of symbols and a | ||
21 | maximum code length in bits to determine the maximum table size for zilb's | ||
22 | inflate. Only complete Huffman codes are counted. | ||
23 | |||
24 | Two codes are considered distinct if the vectors of the number of codes per | ||
25 | length are not identical. So permutations of the symbol assignments result | ||
26 | in the same code for the counting, as do permutations of the assignments of | ||
27 | the bit values to the codes (i.e. only canonical codes are counted). | ||
28 | |||
29 | We build a code from shorter to longer lengths, determining how many symbols | ||
30 | are coded at each length. At each step, we have how many symbols remain to | ||
31 | be coded, what the last code length used was, and how many bit patterns of | ||
32 | that length remain unused. Then we add one to the code length and double the | ||
33 | number of unused patterns to graduate to the next code length. We then | ||
34 | assign all portions of the remaining symbols to that code length that | ||
35 | preserve the properties of a correct and eventually complete code. Those | ||
36 | properties are: we cannot use more bit patterns than are available; and when | ||
37 | all the symbols are used, there are exactly zero possible bit patterns | ||
38 | remaining. | ||
39 | |||
40 | The inflate Huffman decoding algorithm uses two-level lookup tables for | ||
41 | speed. There is a single first-level table to decode codes up to root bits | ||
42 | in length (root == 9 in the current inflate implementation). The table | ||
43 | has 1 << root entries and is indexed by the next root bits of input. Codes | ||
44 | shorter than root bits have replicated table entries, so that the correct | ||
45 | entry is pointed to regardless of the bits that follow the short code. If | ||
46 | the code is longer than root bits, then the table entry points to a second- | ||
47 | level table. The size of that table is determined by the longest code with | ||
48 | that root-bit prefix. If that longest code has length len, then the table | ||
49 | has size 1 << (len - root), to index the remaining bits in that set of | ||
50 | codes. Each subsequent root-bit prefix then has its own sub-table. The | ||
51 | total number of table entries required by the code is calculated | ||
52 | incrementally as the number of codes at each bit length is populated. When | ||
53 | all of the codes are shorter than root bits, then root is reduced to the | ||
54 | longest code length, resulting in a single, smaller, one-level table. | ||
55 | |||
56 | The inflate algorithm also provides for small values of root (relative to | ||
57 | the log2 of the number of symbols), where the shortest code has more bits | ||
58 | than root. In that case, root is increased to the length of the shortest | ||
59 | code. This program, by design, does not handle that case, so it is verified | ||
60 | that the number of symbols is less than 2^(root + 1). | ||
61 | |||
62 | In order to speed up the examination (by about ten orders of magnitude for | ||
63 | the default arguments), the intermediate states in the build-up of a code | ||
64 | are remembered and previously visited branches are pruned. The memory | ||
65 | required for this will increase rapidly with the total number of symbols and | ||
66 | the maximum code length in bits. However this is a very small price to pay | ||
67 | for the vast speedup. | ||
68 | |||
69 | First, all of the possible Huffman codes are counted, and reachable | ||
70 | intermediate states are noted by a non-zero count in a saved-results array. | ||
71 | Second, the intermediate states that lead to (root + 1) bit or longer codes | ||
72 | are used to look at all sub-codes from those junctures for their inflate | ||
73 | memory usage. (The amount of memory used is not affected by the number of | ||
74 | codes of root bits or less in length.) Third, the visited states in the | ||
75 | construction of those sub-codes and the associated calculation of the table | ||
76 | size is recalled in order to avoid recalculating from the same juncture. | ||
77 | Beginning the code examination at (root + 1) bit codes, which is enabled by | ||
78 | identifying the reachable nodes, accounts for about six of the orders of | ||
79 | magnitude of improvement for the default arguments. About another four | ||
80 | orders of magnitude come from not revisiting previous states. Out of | ||
81 | approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes | ||
82 | need to be examined to cover all of the possible table memory usage cases | ||
83 | for the default arguments of 286 symbols limited to 15-bit codes. | ||
84 | |||
85 | Note that an unsigned long long type is used for counting. It is quite easy | ||
86 | to exceed the capacity of an eight-byte integer with a large number of | ||
87 | symbols and a large maximum code length, so multiple-precision arithmetic | ||
88 | would need to replace the unsigned long long arithmetic in that case. This | ||
89 | program will abort if an overflow occurs. The big_t type identifies where | ||
90 | the counting takes place. | ||
91 | |||
92 | An unsigned long long type is also used for calculating the number of | ||
93 | possible codes remaining at the maximum length. This limits the maximum | ||
94 | code length to the number of bits in a long long minus the number of bits | ||
95 | needed to represent the symbols in a flat code. The code_t type identifies | ||
96 | where the bit pattern counting takes place. | ||
97 | */ | ||
98 | |||
99 | #include <stdio.h> | ||
100 | #include <stdlib.h> | ||
101 | #include <string.h> | ||
102 | #include <assert.h> | ||
103 | |||
104 | #define local static | ||
105 | |||
106 | /* special data types */ | ||
107 | typedef unsigned long long big_t; /* type for code counting */ | ||
108 | typedef unsigned long long code_t; /* type for bit pattern counting */ | ||
109 | struct tab { /* type for been here check */ | ||
110 | size_t len; /* length of bit vector in char's */ | ||
111 | char *vec; /* allocated bit vector */ | ||
112 | }; | ||
113 | |||
114 | /* The array for saving results, num[], is indexed with this triplet: | ||
115 | |||
116 | syms: number of symbols remaining to code | ||
117 | left: number of available bit patterns at length len | ||
118 | len: number of bits in the codes currently being assigned | ||
119 | |||
120 | Those indices are constrained thusly when saving results: | ||
121 | |||
122 | syms: 3..totsym (totsym == total symbols to code) | ||
123 | left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) | ||
124 | len: 1..max - 1 (max == maximum code length in bits) | ||
125 | |||
126 | syms == 2 is not saved since that immediately leads to a single code. left | ||
127 | must be even, since it represents the number of available bit patterns at | ||
128 | the current length, which is double the number at the previous length. | ||
129 | left ends at syms-1 since left == syms immediately results in a single code. | ||
130 | (left > sym is not allowed since that would result in an incomplete code.) | ||
131 | len is less than max, since the code completes immediately when len == max. | ||
132 | |||
133 | The offset into the array is calculated for the three indices with the | ||
134 | first one (syms) being outermost, and the last one (len) being innermost. | ||
135 | We build the array with length max-1 lists for the len index, with syms-3 | ||
136 | of those for each symbol. There are totsym-2 of those, with each one | ||
137 | varying in length as a function of sym. See the calculation of index in | ||
138 | count() for the index, and the calculation of size in main() for the size | ||
139 | of the array. | ||
140 | |||
141 | For the deflate example of 286 symbols limited to 15-bit codes, the array | ||
142 | has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than | ||
143 | half of the space allocated for saved results is actually used -- not all | ||
144 | possible triplets are reached in the generation of valid Huffman codes. | ||
145 | */ | ||
146 | |||
147 | /* The array for tracking visited states, done[], is itself indexed identically | ||
148 | to the num[] array as described above for the (syms, left, len) triplet. | ||
149 | Each element in the array is further indexed by the (mem, rem) doublet, | ||
150 | where mem is the amount of inflate table space used so far, and rem is the | ||
151 | remaining unused entries in the current inflate sub-table. Each indexed | ||
152 | element is simply one bit indicating whether the state has been visited or | ||
153 | not. Since the ranges for mem and rem are not known a priori, each bit | ||
154 | vector is of a variable size, and grows as needed to accommodate the visited | ||
155 | states. mem and rem are used to calculate a single index in a triangular | ||
156 | array. Since the range of mem is expected in the default case to be about | ||
157 | ten times larger than the range of rem, the array is skewed to reduce the | ||
158 | memory usage, with eight times the range for mem than for rem. See the | ||
159 | calculations for offset and bit in beenhere() for the details. | ||
160 | |||
161 | For the deflate example of 286 symbols limited to 15-bit codes, the bit | ||
162 | vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] | ||
163 | array itself. | ||
164 | */ | ||
165 | |||
166 | /* Globals to avoid propagating constants or constant pointers recursively */ | ||
167 | local int max; /* maximum allowed bit length for the codes */ | ||
168 | local int root; /* size of base code table in bits */ | ||
169 | local int large; /* largest code table so far */ | ||
170 | local size_t size; /* number of elements in num and done */ | ||
171 | local int *code; /* number of symbols assigned to each bit length */ | ||
172 | local big_t *num; /* saved results array for code counting */ | ||
173 | local struct tab *done; /* states already evaluated array */ | ||
174 | |||
175 | /* Index function for num[] and done[] */ | ||
176 | #define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1) | ||
177 | |||
178 | /* Free allocated space. Uses globals code, num, and done. */ | ||
179 | local void cleanup(void) | ||
180 | { | ||
181 | size_t n; | ||
182 | |||
183 | if (done != NULL) { | ||
184 | for (n = 0; n < size; n++) | ||
185 | if (done[n].len) | ||
186 | free(done[n].vec); | ||
187 | free(done); | ||
188 | } | ||
189 | if (num != NULL) | ||
190 | free(num); | ||
191 | if (code != NULL) | ||
192 | free(code); | ||
193 | } | ||
194 | |||
195 | /* Return the number of possible Huffman codes using bit patterns of lengths | ||
196 | len through max inclusive, coding syms symbols, with left bit patterns of | ||
197 | length len unused -- return -1 if there is an overflow in the counting. | ||
198 | Keep a record of previous results in num to prevent repeating the same | ||
199 | calculation. Uses the globals max and num. */ | ||
200 | local big_t count(int syms, int len, int left) | ||
201 | { | ||
202 | big_t sum; /* number of possible codes from this juncture */ | ||
203 | big_t got; /* value returned from count() */ | ||
204 | int least; /* least number of syms to use at this juncture */ | ||
205 | int most; /* most number of syms to use at this juncture */ | ||
206 | int use; /* number of bit patterns to use in next call */ | ||
207 | size_t index; /* index of this case in *num */ | ||
208 | |||
209 | /* see if only one possible code */ | ||
210 | if (syms == left) | ||
211 | return 1; | ||
212 | |||
213 | /* note and verify the expected state */ | ||
214 | assert(syms > left && left > 0 && len < max); | ||
215 | |||
216 | /* see if we've done this one already */ | ||
217 | index = INDEX(syms, left, len); | ||
218 | got = num[index]; | ||
219 | if (got) | ||
220 | return got; /* we have -- return the saved result */ | ||
221 | |||
222 | /* we need to use at least this many bit patterns so that the code won't be | ||
223 | incomplete at the next length (more bit patterns than symbols) */ | ||
224 | least = (left << 1) - syms; | ||
225 | if (least < 0) | ||
226 | least = 0; | ||
227 | |||
228 | /* we can use at most this many bit patterns, lest there not be enough | ||
229 | available for the remaining symbols at the maximum length (if there were | ||
230 | no limit to the code length, this would become: most = left - 1) */ | ||
231 | most = (((code_t)left << (max - len)) - syms) / | ||
232 | (((code_t)1 << (max - len)) - 1); | ||
233 | |||
234 | /* count all possible codes from this juncture and add them up */ | ||
235 | sum = 0; | ||
236 | for (use = least; use <= most; use++) { | ||
237 | got = count(syms - use, len + 1, (left - use) << 1); | ||
238 | sum += got; | ||
239 | if (got == -1 || sum < got) /* overflow */ | ||
240 | return -1; | ||
241 | } | ||
242 | |||
243 | /* verify that all recursive calls are productive */ | ||
244 | assert(sum != 0); | ||
245 | |||
246 | /* save the result and return it */ | ||
247 | num[index] = sum; | ||
248 | return sum; | ||
249 | } | ||
250 | |||
251 | /* Return true if we've been here before, set to true if not. Set a bit in a | ||
252 | bit vector to indicate visiting this state. Each (syms,len,left) state | ||
253 | has a variable size bit vector indexed by (mem,rem). The bit vector is | ||
254 | lengthened if needed to allow setting the (mem,rem) bit. */ | ||
255 | local int beenhere(int syms, int len, int left, int mem, int rem) | ||
256 | { | ||
257 | size_t index; /* index for this state's bit vector */ | ||
258 | size_t offset; /* offset in this state's bit vector */ | ||
259 | int bit; /* mask for this state's bit */ | ||
260 | size_t length; /* length of the bit vector in bytes */ | ||
261 | char *vector; /* new or enlarged bit vector */ | ||
262 | |||
263 | /* point to vector for (syms,left,len), bit in vector for (mem,rem) */ | ||
264 | index = INDEX(syms, left, len); | ||
265 | mem -= 1 << root; | ||
266 | offset = (mem >> 3) + rem; | ||
267 | offset = ((offset * (offset + 1)) >> 1) + rem; | ||
268 | bit = 1 << (mem & 7); | ||
269 | |||
270 | /* see if we've been here */ | ||
271 | length = done[index].len; | ||
272 | if (offset < length && (done[index].vec[offset] & bit) != 0) | ||
273 | return 1; /* done this! */ | ||
274 | |||
275 | /* we haven't been here before -- set the bit to show we have now */ | ||
276 | |||
277 | /* see if we need to lengthen the vector in order to set the bit */ | ||
278 | if (length <= offset) { | ||
279 | /* if we have one already, enlarge it, zero out the appended space */ | ||
280 | if (length) { | ||
281 | do { | ||
282 | length <<= 1; | ||
283 | } while (length <= offset); | ||
284 | vector = realloc(done[index].vec, length); | ||
285 | if (vector != NULL) | ||
286 | memset(vector + done[index].len, 0, length - done[index].len); | ||
287 | } | ||
288 | |||
289 | /* otherwise we need to make a new vector and zero it out */ | ||
290 | else { | ||
291 | length = 1 << (len - root); | ||
292 | while (length <= offset) | ||
293 | length <<= 1; | ||
294 | vector = calloc(length, sizeof(char)); | ||
295 | } | ||
296 | |||
297 | /* in either case, bail if we can't get the memory */ | ||
298 | if (vector == NULL) { | ||
299 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
300 | cleanup(); | ||
301 | exit(1); | ||
302 | } | ||
303 | |||
304 | /* install the new vector */ | ||
305 | done[index].len = length; | ||
306 | done[index].vec = vector; | ||
307 | } | ||
308 | |||
309 | /* set the bit */ | ||
310 | done[index].vec[offset] |= bit; | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | /* Examine all possible codes from the given node (syms, len, left). Compute | ||
315 | the amount of memory required to build inflate's decoding tables, where the | ||
316 | number of code structures used so far is mem, and the number remaining in | ||
317 | the current sub-table is rem. Uses the globals max, code, root, large, and | ||
318 | done. */ | ||
319 | local void examine(int syms, int len, int left, int mem, int rem) | ||
320 | { | ||
321 | int least; /* least number of syms to use at this juncture */ | ||
322 | int most; /* most number of syms to use at this juncture */ | ||
323 | int use; /* number of bit patterns to use in next call */ | ||
324 | |||
325 | /* see if we have a complete code */ | ||
326 | if (syms == left) { | ||
327 | /* set the last code entry */ | ||
328 | code[len] = left; | ||
329 | |||
330 | /* complete computation of memory used by this code */ | ||
331 | while (rem < left) { | ||
332 | left -= rem; | ||
333 | rem = 1 << (len - root); | ||
334 | mem += rem; | ||
335 | } | ||
336 | assert(rem == left); | ||
337 | |||
338 | /* if this is a new maximum, show the entries used and the sub-code */ | ||
339 | if (mem > large) { | ||
340 | large = mem; | ||
341 | printf("max %d: ", mem); | ||
342 | for (use = root + 1; use <= max; use++) | ||
343 | if (code[use]) | ||
344 | printf("%d[%d] ", code[use], use); | ||
345 | putchar('\n'); | ||
346 | fflush(stdout); | ||
347 | } | ||
348 | |||
349 | /* remove entries as we drop back down in the recursion */ | ||
350 | code[len] = 0; | ||
351 | return; | ||
352 | } | ||
353 | |||
354 | /* prune the tree if we can */ | ||
355 | if (beenhere(syms, len, left, mem, rem)) | ||
356 | return; | ||
357 | |||
358 | /* we need to use at least this many bit patterns so that the code won't be | ||
359 | incomplete at the next length (more bit patterns than symbols) */ | ||
360 | least = (left << 1) - syms; | ||
361 | if (least < 0) | ||
362 | least = 0; | ||
363 | |||
364 | /* we can use at most this many bit patterns, lest there not be enough | ||
365 | available for the remaining symbols at the maximum length (if there were | ||
366 | no limit to the code length, this would become: most = left - 1) */ | ||
367 | most = (((code_t)left << (max - len)) - syms) / | ||
368 | (((code_t)1 << (max - len)) - 1); | ||
369 | |||
370 | /* occupy least table spaces, creating new sub-tables as needed */ | ||
371 | use = least; | ||
372 | while (rem < use) { | ||
373 | use -= rem; | ||
374 | rem = 1 << (len - root); | ||
375 | mem += rem; | ||
376 | } | ||
377 | rem -= use; | ||
378 | |||
379 | /* examine codes from here, updating table space as we go */ | ||
380 | for (use = least; use <= most; use++) { | ||
381 | code[len] = use; | ||
382 | examine(syms - use, len + 1, (left - use) << 1, | ||
383 | mem + (rem ? 1 << (len - root) : 0), rem << 1); | ||
384 | if (rem == 0) { | ||
385 | rem = 1 << (len - root); | ||
386 | mem += rem; | ||
387 | } | ||
388 | rem--; | ||
389 | } | ||
390 | |||
391 | /* remove entries as we drop back down in the recursion */ | ||
392 | code[len] = 0; | ||
393 | } | ||
394 | |||
395 | /* Look at all sub-codes starting with root + 1 bits. Look at only the valid | ||
396 | intermediate code states (syms, left, len). For each completed code, | ||
397 | calculate the amount of memory required by inflate to build the decoding | ||
398 | tables. Find the maximum amount of memory required and show the code that | ||
399 | requires that maximum. Uses the globals max, root, and num. */ | ||
400 | local void enough(int syms) | ||
401 | { | ||
402 | int n; /* number of remaing symbols for this node */ | ||
403 | int left; /* number of unused bit patterns at this length */ | ||
404 | size_t index; /* index of this case in *num */ | ||
405 | |||
406 | /* clear code */ | ||
407 | for (n = 0; n <= max; n++) | ||
408 | code[n] = 0; | ||
409 | |||
410 | /* look at all (root + 1) bit and longer codes */ | ||
411 | large = 1 << root; /* base table */ | ||
412 | if (root < max) /* otherwise, there's only a base table */ | ||
413 | for (n = 3; n <= syms; n++) | ||
414 | for (left = 2; left < n; left += 2) | ||
415 | { | ||
416 | /* look at all reachable (root + 1) bit nodes, and the | ||
417 | resulting codes (complete at root + 2 or more) */ | ||
418 | index = INDEX(n, left, root + 1); | ||
419 | if (root + 1 < max && num[index]) /* reachable node */ | ||
420 | examine(n, root + 1, left, 1 << root, 0); | ||
421 | |||
422 | /* also look at root bit codes with completions at root + 1 | ||
423 | bits (not saved in num, since complete), just in case */ | ||
424 | if (num[index - 1] && n <= left << 1) | ||
425 | examine((n - left) << 1, root + 1, (n - left) << 1, | ||
426 | 1 << root, 0); | ||
427 | } | ||
428 | |||
429 | /* done */ | ||
430 | printf("done: maximum of %d table entries\n", large); | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | Examine and show the total number of possible Huffman codes for a given | ||
435 | maximum number of symbols, initial root table size, and maximum code length | ||
436 | in bits -- those are the command arguments in that order. The default | ||
437 | values are 286, 9, and 15 respectively, for the deflate literal/length code. | ||
438 | The possible codes are counted for each number of coded symbols from two to | ||
439 | the maximum. The counts for each of those and the total number of codes are | ||
440 | shown. The maximum number of inflate table entires is then calculated | ||
441 | across all possible codes. Each new maximum number of table entries and the | ||
442 | associated sub-code (starting at root + 1 == 10 bits) is shown. | ||
443 | |||
444 | To count and examine Huffman codes that are not length-limited, provide a | ||
445 | maximum length equal to the number of symbols minus one. | ||
446 | |||
447 | For the deflate literal/length code, use "enough". For the deflate distance | ||
448 | code, use "enough 30 6". | ||
449 | |||
450 | This uses the %llu printf format to print big_t numbers, which assumes that | ||
451 | big_t is an unsigned long long. If the big_t type is changed (for example | ||
452 | to a multiple precision type), the method of printing will also need to be | ||
453 | updated. | ||
454 | */ | ||
455 | int main(int argc, char **argv) | ||
456 | { | ||
457 | int syms; /* total number of symbols to code */ | ||
458 | int n; /* number of symbols to code for this run */ | ||
459 | big_t got; /* return value of count() */ | ||
460 | big_t sum; /* accumulated number of codes over n */ | ||
461 | |||
462 | /* set up globals for cleanup() */ | ||
463 | code = NULL; | ||
464 | num = NULL; | ||
465 | done = NULL; | ||
466 | |||
467 | /* get arguments -- default to the deflate literal/length code */ | ||
468 | syms = 286; | ||
469 | root = 9; | ||
470 | max = 15; | ||
471 | if (argc > 1) { | ||
472 | syms = atoi(argv[1]); | ||
473 | if (argc > 2) { | ||
474 | root = atoi(argv[2]); | ||
475 | if (argc > 3) | ||
476 | max = atoi(argv[3]); | ||
477 | } | ||
478 | } | ||
479 | if (argc > 4 || syms < 2 || root < 1 || max < 1) { | ||
480 | fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", | ||
481 | stderr); | ||
482 | return 1; | ||
483 | } | ||
484 | |||
485 | /* if not restricting the code length, the longest is syms - 1 */ | ||
486 | if (max > syms - 1) | ||
487 | max = syms - 1; | ||
488 | |||
489 | /* determine the number of bits in a code_t */ | ||
490 | n = 0; | ||
491 | while (((code_t)1 << n) != 0) | ||
492 | n++; | ||
493 | |||
494 | /* make sure that the calculation of most will not overflow */ | ||
495 | if (max > n || syms - 2 >= (((code_t)0 - 1) >> (max - 1))) { | ||
496 | fputs("abort: code length too long for internal types\n", stderr); | ||
497 | return 1; | ||
498 | } | ||
499 | |||
500 | /* reject impossible code requests */ | ||
501 | if (syms - 1 > ((code_t)1 << max) - 1) { | ||
502 | fprintf(stderr, "%d symbols cannot be coded in %d bits\n", | ||
503 | syms, max); | ||
504 | return 1; | ||
505 | } | ||
506 | |||
507 | /* allocate code vector */ | ||
508 | code = calloc(max + 1, sizeof(int)); | ||
509 | if (code == NULL) { | ||
510 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
511 | return 1; | ||
512 | } | ||
513 | |||
514 | /* determine size of saved results array, checking for overflows, | ||
515 | allocate and clear the array (set all to zero with calloc()) */ | ||
516 | if (syms == 2) /* iff max == 1 */ | ||
517 | num = NULL; /* won't be saving any results */ | ||
518 | else { | ||
519 | size = syms >> 1; | ||
520 | if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || | ||
521 | (size *= n, size > ((size_t)0 - 1) / (n = max - 1)) || | ||
522 | (size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) || | ||
523 | (num = calloc(size, sizeof(big_t))) == NULL) { | ||
524 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
525 | cleanup(); | ||
526 | return 1; | ||
527 | } | ||
528 | } | ||
529 | |||
530 | /* count possible codes for all numbers of symbols, add up counts */ | ||
531 | sum = 0; | ||
532 | for (n = 2; n <= syms; n++) { | ||
533 | got = count(n, 1, 2); | ||
534 | sum += got; | ||
535 | if (got == -1 || sum < got) { /* overflow */ | ||
536 | fputs("abort: can't count that high!\n", stderr); | ||
537 | cleanup(); | ||
538 | return 1; | ||
539 | } | ||
540 | printf("%llu %d-codes\n", got, n); | ||
541 | } | ||
542 | printf("%llu total codes for 2 to %d symbols", sum, syms); | ||
543 | if (max < syms - 1) | ||
544 | printf(" (%d-bit length limit)\n", max); | ||
545 | else | ||
546 | puts(" (no length limit)"); | ||
547 | |||
548 | /* allocate and clear done array for beenhere() */ | ||
549 | if (syms == 2) | ||
550 | done = NULL; | ||
551 | else if (size > ((size_t)0 - 1) / sizeof(struct tab) || | ||
552 | (done = calloc(size, sizeof(struct tab))) == NULL) { | ||
553 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
554 | cleanup(); | ||
555 | return 1; | ||
556 | } | ||
557 | |||
558 | /* find and show maximum inflate table usage */ | ||
559 | if (root > max) /* reduce root to max length */ | ||
560 | root = max; | ||
561 | if (syms < ((code_t)1 << (root + 1))) | ||
562 | enough(syms); | ||
563 | else | ||
564 | puts("cannot handle minimum code lengths > root"); | ||
565 | |||
566 | /* done */ | ||
567 | cleanup(); | ||
568 | return 0; | ||
569 | } | ||