diff options
Diffstat (limited to 'examples')
-rw-r--r-- | examples/enough.c | 351 |
1 files changed, 191 insertions, 160 deletions
diff --git a/examples/enough.c b/examples/enough.c index 5cfbfbf..19cf08c 100644 --- a/examples/enough.c +++ b/examples/enough.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over | 1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over |
2 | * all possible valid and complete prefix codes, subject to a length limit. | 2 | * all possible valid and complete prefix codes, subject to a length limit. |
3 | * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler | 3 | * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler |
4 | * Version 1.5 1 August 2018 Mark Adler | 4 | * Version 1.5 5 August 2018 Mark Adler |
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* Version history: | 7 | /* Version history: |
@@ -17,8 +17,8 @@ | |||
17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) | 17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) |
18 | Clean up comparisons of different types | 18 | Clean up comparisons of different types |
19 | Clean up code indentation | 19 | Clean up code indentation |
20 | 1.5 1 Aug 2018 Clean up code style, formatting, and comments | 20 | 1.5 5 Aug 2018 Clean up code style, formatting, and comments |
21 | Use inline function instead of macro for index | 21 | Show all the codes for the maximum, and only the maximum |
22 | */ | 22 | */ |
23 | 23 | ||
24 | /* | 24 | /* |
@@ -39,16 +39,17 @@ | |||
39 | assign all portions of the remaining symbols to that code length that | 39 | assign all portions of the remaining symbols to that code length that |
40 | preserve the properties of a correct and eventually complete code. Those | 40 | preserve the properties of a correct and eventually complete code. Those |
41 | properties are: we cannot use more bit patterns than are available; and when | 41 | properties are: we cannot use more bit patterns than are available; and when |
42 | all the symbols are used, there are exactly zero possible bit patterns | 42 | all the symbols are used, there are exactly zero possible bit patterns left |
43 | remaining. | 43 | unused. |
44 | 44 | ||
45 | The inflate Huffman decoding algorithm uses two-level lookup tables for | 45 | The inflate Huffman decoding algorithm uses two-level lookup tables for |
46 | speed. There is a single first-level table to decode codes up to root bits | 46 | speed. There is a single first-level table to decode codes up to root bits |
47 | in length (root == 9 in the current inflate implementation). The table has 1 | 47 | in length (root == 9 for literal/length codes and root == 6 for distance |
48 | << root entries and is indexed by the next root bits of input. Codes shorter | 48 | codes, in the current inflate implementation). The base table has 1 << root |
49 | than root bits have replicated table entries, so that the correct entry is | 49 | entries and is indexed by the next root bits of input. Codes shorter than |
50 | root bits have replicated table entries, so that the correct entry is | ||
50 | pointed to regardless of the bits that follow the short code. If the code is | 51 | pointed to regardless of the bits that follow the short code. If the code is |
51 | longer than root bits, then the table entry points to a second- level table. | 52 | longer than root bits, then the table entry points to a second-level table. |
52 | The size of that table is determined by the longest code with that root-bit | 53 | The size of that table is determined by the longest code with that root-bit |
53 | prefix. If that longest code has length len, then the table has size 1 << | 54 | prefix. If that longest code has length len, then the table has size 1 << |
54 | (len - root), to index the remaining bits in that set of codes. Each | 55 | (len - root), to index the remaining bits in that set of codes. Each |
@@ -62,7 +63,7 @@ | |||
62 | the log2 of the number of symbols), where the shortest code has more bits | 63 | the log2 of the number of symbols), where the shortest code has more bits |
63 | than root. In that case, root is increased to the length of the shortest | 64 | than root. In that case, root is increased to the length of the shortest |
64 | code. This program, by design, does not handle that case, so it is verified | 65 | code. This program, by design, does not handle that case, so it is verified |
65 | that the number of symbols is less than 2^(root + 1). | 66 | that the number of symbols is less than 1 << (root + 1). |
66 | 67 | ||
67 | In order to speed up the examination (by about ten orders of magnitude for | 68 | In order to speed up the examination (by about ten orders of magnitude for |
68 | the default arguments), the intermediate states in the build-up of a code | 69 | the default arguments), the intermediate states in the build-up of a code |
@@ -87,34 +88,36 @@ | |||
87 | need to be examined to cover all of the possible table memory usage cases | 88 | need to be examined to cover all of the possible table memory usage cases |
88 | for the default arguments of 286 symbols limited to 15-bit codes. | 89 | for the default arguments of 286 symbols limited to 15-bit codes. |
89 | 90 | ||
90 | Note that an unsigned long long type is used for counting. It is quite easy | 91 | Note that the uintmax_t type is used for counting. It is quite easy to |
91 | to exceed the capacity of an eight-byte integer with a large number of | 92 | exceed the capacity of an eight-byte integer with a large number of symbols |
92 | symbols and a large maximum code length, so multiple-precision arithmetic | 93 | and a large maximum code length, so multiple-precision arithmetic would need |
93 | would need to replace the unsigned long long arithmetic in that case. This | 94 | to replace the integer arithmetic in that case. This program will abort if |
94 | program will abort if an overflow occurs. The big_t type identifies where | 95 | an overflow occurs. The big_t type identifies where the counting takes |
95 | the counting takes place. | 96 | place. |
96 | 97 | ||
97 | An unsigned long long type is also used for calculating the number of | 98 | The uintmax_t type is also used for calculating the number of possible codes |
98 | possible codes remaining at the maximum length. This limits the maximum code | 99 | remaining at the maximum length. This limits the maximum code length to the |
99 | length to the number of bits in a long long minus the number of bits needed | 100 | number of bits in a long long minus the number of bits needed to represent |
100 | to represent the symbols in a flat code. The code_t type identifies where | 101 | the symbols in a flat code. The code_t type identifies where the bit-pattern |
101 | the bit pattern counting takes place. | 102 | counting takes place. |
102 | */ | 103 | */ |
103 | 104 | ||
104 | #include <stdio.h> | 105 | #include <stdio.h> |
105 | #include <stdlib.h> | 106 | #include <stdlib.h> |
106 | #include <string.h> | 107 | #include <string.h> |
108 | #include <stdarg.h> | ||
109 | #include <stdint.h> | ||
107 | #include <assert.h> | 110 | #include <assert.h> |
108 | 111 | ||
109 | #define local static | 112 | #define local static |
110 | 113 | ||
111 | // Special data types. | 114 | // Special data types. |
112 | typedef unsigned long long big_t; // type for code counting | 115 | typedef uintmax_t big_t; // type for code counting |
113 | #define PRIbig "llu" // printf format for big_t | 116 | #define PRIbig "ju" // printf format for big_t |
114 | typedef unsigned long long code_t; // type for bit pattern counting | 117 | typedef uintmax_t code_t; // type for bit pattern counting |
115 | struct tab { // type for been here check | 118 | struct tab { // type for been-here check |
116 | size_t len; // length of bit vector in octets | 119 | size_t len; // allocated length of bit vector in octets |
117 | char *vec; // allocated bit vector | 120 | char *vec; // allocated bit vector |
118 | }; | 121 | }; |
119 | 122 | ||
120 | /* The array for saving results, num[], is indexed with this triplet: | 123 | /* The array for saving results, num[], is indexed with this triplet: |
@@ -161,59 +164,101 @@ struct tab { // type for been here check | |||
161 | array. Since the range of mem is expected in the default case to be about | 164 | array. Since the range of mem is expected in the default case to be about |
162 | ten times larger than the range of rem, the array is skewed to reduce the | 165 | ten times larger than the range of rem, the array is skewed to reduce the |
163 | memory usage, with eight times the range for mem than for rem. See the | 166 | memory usage, with eight times the range for mem than for rem. See the |
164 | calculations for offset and bit in beenhere() for the details. | 167 | calculations for offset and bit in been_here() for the details. |
165 | 168 | ||
166 | For the deflate example of 286 symbols limited to 15-bit codes, the bit | 169 | For the deflate example of 286 symbols limited to 15-bit codes, the bit |
167 | vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] | 170 | vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. |
168 | array itself. | ||
169 | */ | 171 | */ |
170 | 172 | ||
173 | // Type for a variable-length, allocated string. | ||
174 | typedef struct { | ||
175 | char *str; // pointer to allocated string | ||
176 | size_t size; // size of allocation | ||
177 | size_t len; // length of string, not including terminating zero | ||
178 | } string_t; | ||
179 | |||
180 | // Clear a string_t. | ||
181 | local void string_clear(string_t *s) { | ||
182 | s->str[0] = 0; | ||
183 | s->len = 0; | ||
184 | } | ||
185 | |||
186 | // Initialize a string_t. | ||
187 | local void string_init(string_t *s) { | ||
188 | s->size = 16; | ||
189 | s->str = malloc(s->size); | ||
190 | assert(s->str != NULL && "out of memory"); | ||
191 | string_clear(s); | ||
192 | } | ||
193 | |||
194 | // Release the allocation of a string_t. | ||
195 | local void string_free(string_t *s) { | ||
196 | free(s->str); | ||
197 | s->str = NULL; | ||
198 | s->size = 0; | ||
199 | s->len = 0; | ||
200 | } | ||
201 | |||
202 | // Save the results of printf with fmt and the subsequent argument list to s. | ||
203 | // Each call appends to s. The allocated space for s is increased as needed. | ||
204 | local void string_printf(string_t *s, char *fmt, ...) { | ||
205 | va_list ap; | ||
206 | va_start(ap, fmt); | ||
207 | size_t len = s->len; | ||
208 | int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); | ||
209 | assert(ret >= 0 && "out of memory"); | ||
210 | s->len += ret; | ||
211 | if (s->size < s->len + 1) { | ||
212 | do { | ||
213 | s->size <<= 1; | ||
214 | assert(s->size != 0 && "overflow"); | ||
215 | } while (s->size < s->len + 1); | ||
216 | s->str = realloc(s->str, s->size); | ||
217 | assert(s->str != NULL && "out of memory"); | ||
218 | vsnprintf(s->str + len, s->size - len, fmt, ap); | ||
219 | } | ||
220 | va_end(ap); | ||
221 | } | ||
222 | |||
171 | // Globals to avoid propagating constants or constant pointers recursively. | 223 | // Globals to avoid propagating constants or constant pointers recursively. |
172 | struct { | 224 | struct { |
173 | int max; // maximum allowed bit length for the codes | 225 | int max; // maximum allowed bit length for the codes |
174 | int root; // size of base code table in bits | 226 | int root; // size of base code table in bits |
175 | int large; // largest code table so far | 227 | int large; // largest code table so far |
176 | size_t size; // number of elements in num and done | 228 | size_t size; // number of elements in num and done |
229 | big_t tot; // total number of codes with maximum tables size | ||
230 | string_t out; // display of subcodes for maximum tables size | ||
177 | int *code; // number of symbols assigned to each bit length | 231 | int *code; // number of symbols assigned to each bit length |
178 | big_t *num; // saved results array for code counting | 232 | big_t *num; // saved results array for code counting |
179 | struct tab *done; // states already evaluated array | 233 | struct tab *done; // states already evaluated array |
180 | } g; | 234 | } g; |
181 | 235 | ||
182 | // Index function for num[] and done[]. | 236 | // Index function for num[] and done[]. |
183 | local inline size_t map(int i, int j, int k) { | 237 | local inline size_t map(int syms, int left, int len) { |
184 | return k - 1 + ((size_t)((i - 1) >> 1) * ((i - 2) >> 1) + (j >> 1) - 1) * | 238 | return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + |
185 | (g.max - 1); | 239 | (left >> 1) - 1) * (g.max - 1) + |
240 | len - 1; | ||
186 | } | 241 | } |
187 | 242 | ||
188 | // Free allocated space. Uses globals code, num, and done. | 243 | // Free allocated space in globals. |
189 | local void cleanup(void) { | 244 | local void cleanup(void) { |
190 | size_t n; | ||
191 | |||
192 | if (g.done != NULL) { | 245 | if (g.done != NULL) { |
193 | for (n = 0; n < g.size; n++) | 246 | for (size_t n = 0; n < g.size; n++) |
194 | if (g.done[n].len) | 247 | if (g.done[n].len) |
195 | free(g.done[n].vec); | 248 | free(g.done[n].vec); |
196 | free(g.done); | 249 | g.size = 0; |
250 | free(g.done); g.done = NULL; | ||
197 | } | 251 | } |
198 | if (g.num != NULL) | 252 | free(g.num); g.num = NULL; |
199 | free(g.num); | 253 | free(g.code); g.code = NULL; |
200 | if (g.code != NULL) | 254 | string_free(&g.out); |
201 | free(g.code); | ||
202 | } | 255 | } |
203 | 256 | ||
204 | // Return the number of possible prefix codes using bit patterns of lengths len | 257 | // Return the number of possible prefix codes using bit patterns of lengths len |
205 | // through max inclusive, coding syms symbols, with left bit patterns of length | 258 | // through max inclusive, coding syms symbols, with left bit patterns of length |
206 | // len unused -- return -1 if there is an overflow in the counting. Keep a | 259 | // len unused -- return -1 if there is an overflow in the counting. Keep a |
207 | // record of previous results in num to prevent repeating the same calculation. | 260 | // record of previous results in num to prevent repeating the same calculation. |
208 | // Uses the globals max and num. | 261 | local big_t count(int syms, int left, int len) { |
209 | local big_t count(int syms, int len, int left) { | ||
210 | big_t sum; // number of possible codes from this juncture | ||
211 | big_t got; // value returned from count() | ||
212 | int least; // least number of syms to use at this juncture | ||
213 | int most; // most number of syms to use at this juncture | ||
214 | int use; // number of bit patterns to use in next call | ||
215 | size_t index; // index of this case in *num | ||
216 | |||
217 | // see if only one possible code | 262 | // see if only one possible code |
218 | if (syms == left) | 263 | if (syms == left) |
219 | return 1; | 264 | return 1; |
@@ -222,30 +267,30 @@ local big_t count(int syms, int len, int left) { | |||
222 | assert(syms > left && left > 0 && len < g.max); | 267 | assert(syms > left && left > 0 && len < g.max); |
223 | 268 | ||
224 | // see if we've done this one already | 269 | // see if we've done this one already |
225 | index = map(syms, left, len); | 270 | size_t index = map(syms, left, len); |
226 | got = g.num[index]; | 271 | big_t got = g.num[index]; |
227 | if (got) | 272 | if (got) |
228 | return got; // we have -- return the saved result | 273 | return got; // we have -- return the saved result |
229 | 274 | ||
230 | // we need to use at least this many bit patterns so that the code won't be | 275 | // we need to use at least this many bit patterns so that the code won't be |
231 | // incomplete at the next length (more bit patterns than symbols) | 276 | // incomplete at the next length (more bit patterns than symbols) |
232 | least = (left << 1) - syms; | 277 | int least = (left << 1) - syms; |
233 | if (least < 0) | 278 | if (least < 0) |
234 | least = 0; | 279 | least = 0; |
235 | 280 | ||
236 | // we can use at most this many bit patterns, lest there not be enough | 281 | // we can use at most this many bit patterns, lest there not be enough |
237 | // available for the remaining symbols at the maximum length (if there were | 282 | // available for the remaining symbols at the maximum length (if there were |
238 | // no limit to the code length, this would become: most = left - 1) | 283 | // no limit to the code length, this would become: most = left - 1) |
239 | most = (((code_t)left << (g.max - len)) - syms) / | 284 | int most = (((code_t)left << (g.max - len)) - syms) / |
240 | (((code_t)1 << (g.max - len)) - 1); | 285 | (((code_t)1 << (g.max - len)) - 1); |
241 | 286 | ||
242 | // count all possible codes from this juncture and add them up | 287 | // count all possible codes from this juncture and add them up |
243 | sum = 0; | 288 | big_t sum = 0; |
244 | for (use = least; use <= most; use++) { | 289 | for (int use = least; use <= most; use++) { |
245 | got = count(syms - use, len + 1, (left - use) << 1); | 290 | got = count(syms - use, (left - use) << 1, len + 1); |
246 | sum += got; | 291 | sum += got; |
247 | if (got == (big_t)0 - 1 || sum < got) // overflow | 292 | if (got == (big_t)-1 || sum < got) // overflow |
248 | return (big_t)0 - 1; | 293 | return (big_t)-1; |
249 | } | 294 | } |
250 | 295 | ||
251 | // verify that all recursive calls are productive | 296 | // verify that all recursive calls are productive |
@@ -259,23 +304,19 @@ local big_t count(int syms, int len, int left) { | |||
259 | // Return true if we've been here before, set to true if not. Set a bit in a | 304 | // Return true if we've been here before, set to true if not. Set a bit in a |
260 | // bit vector to indicate visiting this state. Each (syms,len,left) state has a | 305 | // bit vector to indicate visiting this state. Each (syms,len,left) state has a |
261 | // variable size bit vector indexed by (mem,rem). The bit vector is lengthened | 306 | // variable size bit vector indexed by (mem,rem). The bit vector is lengthened |
262 | // if needed to allow setting the (mem,rem) bit. | 307 | // as needed to allow setting the (mem,rem) bit. |
263 | local int beenhere(int syms, int len, int left, int mem, int rem) { | 308 | local int been_here(int syms, int left, int len, int mem, int rem) { |
264 | size_t index; // index for this state's bit vector | ||
265 | size_t offset; // offset in this state's bit vector | ||
266 | int bit; // mask for this state's bit | ||
267 | size_t length; // length of the bit vector in bytes | ||
268 | char *vector; // new or enlarged bit vector | ||
269 | |||
270 | // point to vector for (syms,left,len), bit in vector for (mem,rem) | 309 | // point to vector for (syms,left,len), bit in vector for (mem,rem) |
271 | index = map(syms, left, len); | 310 | size_t index = map(syms, left, len); |
272 | mem -= 1 << g.root; | 311 | mem -= 1 << g.root; // mem always includes the root table |
273 | offset = (mem >> 3) + rem; | 312 | mem >>= 1; // mem and rem are always even |
313 | rem >>= 1; | ||
314 | size_t offset = (mem >> 3) + rem; | ||
274 | offset = ((offset * (offset + 1)) >> 1) + rem; | 315 | offset = ((offset * (offset + 1)) >> 1) + rem; |
275 | bit = 1 << (mem & 7); | 316 | int bit = 1 << (mem & 7); |
276 | 317 | ||
277 | // see if we've been here | 318 | // see if we've been here |
278 | length = g.done[index].len; | 319 | size_t length = g.done[index].len; |
279 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) | 320 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) |
280 | return 1; // done this! | 321 | return 1; // done this! |
281 | 322 | ||
@@ -284,29 +325,23 @@ local int beenhere(int syms, int len, int left, int mem, int rem) { | |||
284 | // see if we need to lengthen the vector in order to set the bit | 325 | // see if we need to lengthen the vector in order to set the bit |
285 | if (length <= offset) { | 326 | if (length <= offset) { |
286 | // if we have one already, enlarge it, zero out the appended space | 327 | // if we have one already, enlarge it, zero out the appended space |
328 | char *vector; | ||
287 | if (length) { | 329 | if (length) { |
288 | do { | 330 | do { |
289 | length <<= 1; | 331 | length <<= 1; |
290 | } while (length <= offset); | 332 | } while (length <= offset); |
291 | vector = realloc(g.done[index].vec, length); | 333 | vector = realloc(g.done[index].vec, length); |
292 | if (vector != NULL) | 334 | assert(vector != NULL && "out of memory"); |
293 | memset(vector + g.done[index].len, 0, | 335 | memset(vector + g.done[index].len, 0, length - g.done[index].len); |
294 | length - g.done[index].len); | ||
295 | } | 336 | } |
296 | 337 | ||
297 | // otherwise we need to make a new vector and zero it out | 338 | // otherwise we need to make a new vector and zero it out |
298 | else { | 339 | else { |
299 | length = 1 << (len - g.root); | 340 | length = 16; |
300 | while (length <= offset) | 341 | while (length <= offset) |
301 | length <<= 1; | 342 | length <<= 1; |
302 | vector = calloc(length, sizeof(char)); | 343 | vector = calloc(length, 1); |
303 | } | 344 | assert(vector != NULL && "out of memory"); |
304 | |||
305 | // in either case, bail if we can't get the memory | ||
306 | if (vector == NULL) { | ||
307 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
308 | cleanup(); | ||
309 | exit(1); | ||
310 | } | 345 | } |
311 | 346 | ||
312 | // install the new vector | 347 | // install the new vector |
@@ -322,13 +357,8 @@ local int beenhere(int syms, int len, int left, int mem, int rem) { | |||
322 | // Examine all possible codes from the given node (syms, len, left). Compute | 357 | // Examine all possible codes from the given node (syms, len, left). Compute |
323 | // the amount of memory required to build inflate's decoding tables, where the | 358 | // the amount of memory required to build inflate's decoding tables, where the |
324 | // number of code structures used so far is mem, and the number remaining in | 359 | // number of code structures used so far is mem, and the number remaining in |
325 | // the current sub-table is rem. Uses the globals max, code, root, large, and | 360 | // the current sub-table is rem. |
326 | // done. | 361 | local void examine(int syms, int left, int len, int mem, int rem) { |
327 | local void examine(int syms, int len, int left, int mem, int rem) { | ||
328 | int least; // least number of syms to use at this juncture | ||
329 | int most; // most number of syms to use at this juncture | ||
330 | int use; // number of bit patterns to use in next call | ||
331 | |||
332 | // see if we have a complete code | 362 | // see if we have a complete code |
333 | if (syms == left) { | 363 | if (syms == left) { |
334 | // set the last code entry | 364 | // set the last code entry |
@@ -342,15 +372,32 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
342 | } | 372 | } |
343 | assert(rem == left); | 373 | assert(rem == left); |
344 | 374 | ||
345 | // if this is a new maximum, show the entries used and the sub-code | 375 | // if this is at the maximum, show the sub-code |
346 | if (mem > g.large) { | 376 | if (mem >= g.large) { |
347 | g.large = mem; | 377 | // if this is a new maximum, update the maximum and clear out the |
348 | printf("max %d: ", mem); | 378 | // printed sub-codes from the previous maximum |
349 | for (use = g.root + 1; use <= g.max; use++) | 379 | if (mem > g.large) { |
350 | if (g.code[use]) | 380 | g.large = mem; |
351 | printf("%d[%d] ", g.code[use], use); | 381 | string_clear(&g.out); |
352 | putchar('\n'); | 382 | } |
353 | fflush(stdout); | 383 | |
384 | // compute the starting state for this sub-code | ||
385 | syms = 0; | ||
386 | left = 1 << g.max; | ||
387 | for (int bits = g.max; bits > g.root; bits--) { | ||
388 | syms += g.code[bits]; | ||
389 | left -= g.code[bits]; | ||
390 | assert((left & 1) == 0); | ||
391 | left >>= 1; | ||
392 | } | ||
393 | |||
394 | // print the starting state and the resulting sub-code to g.out | ||
395 | string_printf(&g.out, "<%u, %u, %u>:", | ||
396 | syms, g.root + 1, ((1 << g.root) - left) << 1); | ||
397 | for (int bits = g.root + 1; bits <= g.max; bits++) | ||
398 | if (g.code[bits]) | ||
399 | string_printf(&g.out, " %d[%d]", g.code[bits], bits); | ||
400 | string_printf(&g.out, "\n"); | ||
354 | } | 401 | } |
355 | 402 | ||
356 | // remove entries as we drop back down in the recursion | 403 | // remove entries as we drop back down in the recursion |
@@ -359,23 +406,23 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
359 | } | 406 | } |
360 | 407 | ||
361 | // prune the tree if we can | 408 | // prune the tree if we can |
362 | if (beenhere(syms, len, left, mem, rem)) | 409 | if (been_here(syms, left, len, mem, rem)) |
363 | return; | 410 | return; |
364 | 411 | ||
365 | // we need to use at least this many bit patterns so that the code won't be | 412 | // we need to use at least this many bit patterns so that the code won't be |
366 | // incomplete at the next length (more bit patterns than symbols) | 413 | // incomplete at the next length (more bit patterns than symbols) |
367 | least = (left << 1) - syms; | 414 | int least = (left << 1) - syms; |
368 | if (least < 0) | 415 | if (least < 0) |
369 | least = 0; | 416 | least = 0; |
370 | 417 | ||
371 | // we can use at most this many bit patterns, lest there not be enough | 418 | // we can use at most this many bit patterns, lest there not be enough |
372 | // available for the remaining symbols at the maximum length (if there were | 419 | // available for the remaining symbols at the maximum length (if there were |
373 | // no limit to the code length, this would become: most = left - 1) | 420 | // no limit to the code length, this would become: most = left - 1) |
374 | most = (((code_t)left << (g.max - len)) - syms) / | 421 | int most = (((code_t)left << (g.max - len)) - syms) / |
375 | (((code_t)1 << (g.max - len)) - 1); | 422 | (((code_t)1 << (g.max - len)) - 1); |
376 | 423 | ||
377 | // occupy least table spaces, creating new sub-tables as needed | 424 | // occupy least table spaces, creating new sub-tables as needed |
378 | use = least; | 425 | int use = least; |
379 | while (rem < use) { | 426 | while (rem < use) { |
380 | use -= rem; | 427 | use -= rem; |
381 | rem = 1 << (len - g.root); | 428 | rem = 1 << (len - g.root); |
@@ -386,7 +433,7 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
386 | // examine codes from here, updating table space as we go | 433 | // examine codes from here, updating table space as we go |
387 | for (use = least; use <= most; use++) { | 434 | for (use = least; use <= most; use++) { |
388 | g.code[len] = use; | 435 | g.code[len] = use; |
389 | examine(syms - use, len + 1, (left - use) << 1, | 436 | examine(syms - use, (left - use) << 1, len + 1, |
390 | mem + (rem ? 1 << (len - g.root) : 0), rem << 1); | 437 | mem + (rem ? 1 << (len - g.root) : 0), rem << 1); |
391 | if (rem == 0) { | 438 | if (rem == 0) { |
392 | rem = 1 << (len - g.root); | 439 | rem = 1 << (len - g.root); |
@@ -402,37 +449,35 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
402 | // Look at all sub-codes starting with root + 1 bits. Look at only the valid | 449 | // Look at all sub-codes starting with root + 1 bits. Look at only the valid |
403 | // intermediate code states (syms, left, len). For each completed code, | 450 | // intermediate code states (syms, left, len). For each completed code, |
404 | // calculate the amount of memory required by inflate to build the decoding | 451 | // calculate the amount of memory required by inflate to build the decoding |
405 | // tables. Find the maximum amount of memory required and show the code that | 452 | // tables. Find the maximum amount of memory required and show the codes that |
406 | // requires that maximum. Uses the globals max, root, and num. | 453 | // require that maximum. |
407 | local void enough(int syms) { | 454 | local void enough(int syms) { |
408 | int n; // number of remaing symbols for this node | ||
409 | int left; // number of unused bit patterns at this length | ||
410 | size_t index; // index of this case in *num | ||
411 | |||
412 | // clear code | 455 | // clear code |
413 | for (n = 0; n <= g.max; n++) | 456 | for (int n = 0; n <= g.max; n++) |
414 | g.code[n] = 0; | 457 | g.code[n] = 0; |
415 | 458 | ||
416 | // look at all (root + 1) bit and longer codes | 459 | // look at all (root + 1) bit and longer codes |
460 | string_clear(&g.out); // empty saved results | ||
417 | g.large = 1 << g.root; // base table | 461 | g.large = 1 << g.root; // base table |
418 | if (g.root < g.max) // otherwise, there's only a base table | 462 | if (g.root < g.max) // otherwise, there's only a base table |
419 | for (n = 3; n <= syms; n++) | 463 | for (int n = 3; n <= syms; n++) |
420 | for (left = 2; left < n; left += 2) { | 464 | for (int left = 2; left < n; left += 2) { |
421 | // look at all reachable (root + 1) bit nodes, and the | 465 | // look at all reachable (root + 1) bit nodes, and the |
422 | // resulting codes (complete at root + 2 or more) | 466 | // resulting codes (complete at root + 2 or more) |
423 | index = map(n, left, g.root + 1); | 467 | size_t index = map(n, left, g.root + 1); |
424 | if (g.root + 1 < g.max && g.num[index]) // reachable node | 468 | if (g.root + 1 < g.max && g.num[index]) // reachable node |
425 | examine(n, g.root + 1, left, 1 << g.root, 0); | 469 | examine(n, left, g.root + 1, 1 << g.root, 0); |
426 | 470 | ||
427 | // also look at root bit codes with completions at root + 1 | 471 | // also look at root bit codes with completions at root + 1 |
428 | // bits (not saved in num, since complete), just in case | 472 | // bits (not saved in num, since complete), just in case |
429 | if (g.num[index - 1] && n <= left << 1) | 473 | if (g.num[index - 1] && n <= left << 1) |
430 | examine((n - left) << 1, g.root + 1, (n - left) << 1, | 474 | examine((n - left) << 1, (n - left) << 1, g.root + 1, |
431 | 1 << g.root, 0); | 475 | 1 << g.root, 0); |
432 | } | 476 | } |
433 | 477 | ||
434 | // done | 478 | // done |
435 | printf("done: maximum of %d table entries\n", g.large); | 479 | printf("maximum of %d table entries for root = %d\n", g.large, g.root); |
480 | fputs(g.out.str, stdout); | ||
436 | } | 481 | } |
437 | 482 | ||
438 | // Examine and show the total number of possible prefix codes for a given | 483 | // Examine and show the total number of possible prefix codes for a given |
@@ -451,19 +496,14 @@ local void enough(int syms) { | |||
451 | // For the deflate literal/length code, use "enough". For the deflate distance | 496 | // For the deflate literal/length code, use "enough". For the deflate distance |
452 | // code, use "enough 30 6". | 497 | // code, use "enough 30 6". |
453 | int main(int argc, char **argv) { | 498 | int main(int argc, char **argv) { |
454 | int syms; // total number of symbols to code | ||
455 | int n; // number of symbols to code for this run | ||
456 | big_t got; // return value of count() | ||
457 | big_t sum; // accumulated number of codes over n | ||
458 | code_t word; // for counting bits in code_t | ||
459 | |||
460 | // set up globals for cleanup() | 499 | // set up globals for cleanup() |
461 | g.code = NULL; | 500 | g.code = NULL; |
462 | g.num = NULL; | 501 | g.num = NULL; |
463 | g.done = NULL; | 502 | g.done = NULL; |
503 | string_init(&g.out); | ||
464 | 504 | ||
465 | // get arguments -- default to the deflate literal/length code | 505 | // get arguments -- default to the deflate literal/length code |
466 | syms = 286; | 506 | int syms = 286; |
467 | g.root = 9; | 507 | g.root = 9; |
468 | g.max = 15; | 508 | g.max = 15; |
469 | if (argc > 1) { | 509 | if (argc > 1) { |
@@ -485,11 +525,12 @@ int main(int argc, char **argv) { | |||
485 | g.max = syms - 1; | 525 | g.max = syms - 1; |
486 | 526 | ||
487 | // determine the number of bits in a code_t | 527 | // determine the number of bits in a code_t |
488 | for (n = 0, word = 1; word; n++, word <<= 1) | 528 | int bits = 0; |
489 | ; | 529 | for (code_t word = 1; word; word <<= 1) |
530 | bits++; | ||
490 | 531 | ||
491 | // make sure that the calculation of most will not overflow | 532 | // make sure that the calculation of most will not overflow |
492 | if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) { | 533 | if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { |
493 | fputs("abort: code length too long for internal types\n", stderr); | 534 | fputs("abort: code length too long for internal types\n", stderr); |
494 | return 1; | 535 | return 1; |
495 | } | 536 | } |
@@ -503,10 +544,7 @@ int main(int argc, char **argv) { | |||
503 | 544 | ||
504 | // allocate code vector | 545 | // allocate code vector |
505 | g.code = calloc(g.max + 1, sizeof(int)); | 546 | g.code = calloc(g.max + 1, sizeof(int)); |
506 | if (g.code == NULL) { | 547 | assert(g.code != NULL && "out of memory"); |
507 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
508 | return 1; | ||
509 | } | ||
510 | 548 | ||
511 | // determine size of saved results array, checking for overflows, | 549 | // determine size of saved results array, checking for overflows, |
512 | // allocate and clear the array (set all to zero with calloc()) | 550 | // allocate and clear the array (set all to zero with calloc()) |
@@ -514,27 +552,22 @@ int main(int argc, char **argv) { | |||
514 | g.num = NULL; // won't be saving any results | 552 | g.num = NULL; // won't be saving any results |
515 | else { | 553 | else { |
516 | g.size = syms >> 1; | 554 | g.size = syms >> 1; |
517 | if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || | 555 | int n = (syms - 1) >> 1; |
518 | (g.size *= n, g.size > ((size_t)0 - 1) / (n = g.max - 1)) || | 556 | assert(g.size <= (size_t)-1 / n && "overflow"); |
519 | (g.size *= n, g.size > ((size_t)0 - 1) / sizeof(big_t)) || | 557 | g.size *= n; |
520 | (g.num = calloc(g.size, sizeof(big_t))) == NULL) { | 558 | n = g.max - 1; |
521 | fputs("abort: unable to allocate enough memory\n", stderr); | 559 | assert(g.size <= (size_t)-1 / n && "overflow"); |
522 | cleanup(); | 560 | g.size *= n; |
523 | return 1; | 561 | g.num = calloc(g.size, sizeof(big_t)); |
524 | } | 562 | assert(g.num != NULL && "out of memory"); |
525 | } | 563 | } |
526 | 564 | ||
527 | // count possible codes for all numbers of symbols, add up counts | 565 | // count possible codes for all numbers of symbols, add up counts |
528 | sum = 0; | 566 | big_t sum = 0; |
529 | for (n = 2; n <= syms; n++) { | 567 | for (int n = 2; n <= syms; n++) { |
530 | got = count(n, 1, 2); | 568 | big_t got = count(n, 2, 1); |
531 | sum += got; | 569 | sum += got; |
532 | if (got == (big_t)0 - 1 || sum < got) { // overflow | 570 | assert(got != (big_t)-1 && sum >= got && "overflow"); |
533 | fputs("abort: can't count that high!\n", stderr); | ||
534 | cleanup(); | ||
535 | return 1; | ||
536 | } | ||
537 | printf("%"PRIbig" %d-codes\n", got, n); | ||
538 | } | 571 | } |
539 | printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); | 572 | printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); |
540 | if (g.max < syms - 1) | 573 | if (g.max < syms - 1) |
@@ -542,14 +575,12 @@ int main(int argc, char **argv) { | |||
542 | else | 575 | else |
543 | puts(" (no length limit)"); | 576 | puts(" (no length limit)"); |
544 | 577 | ||
545 | // allocate and clear done array for beenhere() | 578 | // allocate and clear done array for been_here() |
546 | if (syms == 2) | 579 | if (syms == 2) |
547 | g.done = NULL; | 580 | g.done = NULL; |
548 | else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) || | 581 | else { |
549 | (g.done = calloc(g.size, sizeof(struct tab))) == NULL) { | 582 | g.done = calloc(g.size, sizeof(struct tab)); |
550 | fputs("abort: unable to allocate enough memory\n", stderr); | 583 | assert(g.done != NULL && "out of memory"); |
551 | cleanup(); | ||
552 | return 1; | ||
553 | } | 584 | } |
554 | 585 | ||
555 | // find and show maximum inflate table usage | 586 | // find and show maximum inflate table usage |
@@ -558,7 +589,7 @@ int main(int argc, char **argv) { | |||
558 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) | 589 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) |
559 | enough(syms); | 590 | enough(syms); |
560 | else | 591 | else |
561 | puts("cannot handle minimum code lengths > root"); | 592 | fputs("cannot handle minimum code lengths > root", stderr); |
562 | 593 | ||
563 | // done | 594 | // done |
564 | cleanup(); | 595 | cleanup(); |