aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2018-08-04 14:34:01 -0700
committerMark Adler <madler@alumni.caltech.edu>2018-08-05 23:08:33 -0700
commitcd16ff0b3a3665273e47141de8973bf1088cdf59 (patch)
tree615da5944afbd2a72156438eb3e247e29ae910d0 /examples
parentd00c147f531e757bfbd5cfab58d941caa8333ba8 (diff)
downloadzlib-cd16ff0b3a3665273e47141de8973bf1088cdf59.tar.gz
zlib-cd16ff0b3a3665273e47141de8973bf1088cdf59.tar.bz2
zlib-cd16ff0b3a3665273e47141de8973bf1088cdf59.zip
Show all the codes for the maximum tables size in enough.c.
Diffstat (limited to 'examples')
-rw-r--r--examples/enough.c351
1 files changed, 191 insertions, 160 deletions
diff --git a/examples/enough.c b/examples/enough.c
index 5cfbfbf..19cf08c 100644
--- a/examples/enough.c
+++ b/examples/enough.c
@@ -1,7 +1,7 @@
1/* enough.c -- determine the maximum size of inflate's Huffman code tables over 1/* enough.c -- determine the maximum size of inflate's Huffman code tables over
2 * all possible valid and complete prefix codes, subject to a length limit. 2 * all possible valid and complete prefix codes, subject to a length limit.
3 * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler 3 * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler
4 * Version 1.5 1 August 2018 Mark Adler 4 * Version 1.5 5 August 2018 Mark Adler
5 */ 5 */
6 6
7/* Version history: 7/* Version history:
@@ -17,8 +17,8 @@
17 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) 17 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!)
18 Clean up comparisons of different types 18 Clean up comparisons of different types
19 Clean up code indentation 19 Clean up code indentation
20 1.5 1 Aug 2018 Clean up code style, formatting, and comments 20 1.5 5 Aug 2018 Clean up code style, formatting, and comments
21 Use inline function instead of macro for index 21 Show all the codes for the maximum, and only the maximum
22 */ 22 */
23 23
24/* 24/*
@@ -39,16 +39,17 @@
39 assign all portions of the remaining symbols to that code length that 39 assign all portions of the remaining symbols to that code length that
40 preserve the properties of a correct and eventually complete code. Those 40 preserve the properties of a correct and eventually complete code. Those
41 properties are: we cannot use more bit patterns than are available; and when 41 properties are: we cannot use more bit patterns than are available; and when
42 all the symbols are used, there are exactly zero possible bit patterns 42 all the symbols are used, there are exactly zero possible bit patterns left
43 remaining. 43 unused.
44 44
45 The inflate Huffman decoding algorithm uses two-level lookup tables for 45 The inflate Huffman decoding algorithm uses two-level lookup tables for
46 speed. There is a single first-level table to decode codes up to root bits 46 speed. There is a single first-level table to decode codes up to root bits
47 in length (root == 9 in the current inflate implementation). The table has 1 47 in length (root == 9 for literal/length codes and root == 6 for distance
48 << root entries and is indexed by the next root bits of input. Codes shorter 48 codes, in the current inflate implementation). The base table has 1 << root
49 than root bits have replicated table entries, so that the correct entry is 49 entries and is indexed by the next root bits of input. Codes shorter than
50 root bits have replicated table entries, so that the correct entry is
50 pointed to regardless of the bits that follow the short code. If the code is 51 pointed to regardless of the bits that follow the short code. If the code is
51 longer than root bits, then the table entry points to a second- level table. 52 longer than root bits, then the table entry points to a second-level table.
52 The size of that table is determined by the longest code with that root-bit 53 The size of that table is determined by the longest code with that root-bit
53 prefix. If that longest code has length len, then the table has size 1 << 54 prefix. If that longest code has length len, then the table has size 1 <<
54 (len - root), to index the remaining bits in that set of codes. Each 55 (len - root), to index the remaining bits in that set of codes. Each
@@ -62,7 +63,7 @@
62 the log2 of the number of symbols), where the shortest code has more bits 63 the log2 of the number of symbols), where the shortest code has more bits
63 than root. In that case, root is increased to the length of the shortest 64 than root. In that case, root is increased to the length of the shortest
64 code. This program, by design, does not handle that case, so it is verified 65 code. This program, by design, does not handle that case, so it is verified
65 that the number of symbols is less than 2^(root + 1). 66 that the number of symbols is less than 1 << (root + 1).
66 67
67 In order to speed up the examination (by about ten orders of magnitude for 68 In order to speed up the examination (by about ten orders of magnitude for
68 the default arguments), the intermediate states in the build-up of a code 69 the default arguments), the intermediate states in the build-up of a code
@@ -87,34 +88,36 @@
87 need to be examined to cover all of the possible table memory usage cases 88 need to be examined to cover all of the possible table memory usage cases
88 for the default arguments of 286 symbols limited to 15-bit codes. 89 for the default arguments of 286 symbols limited to 15-bit codes.
89 90
90 Note that an unsigned long long type is used for counting. It is quite easy 91 Note that the uintmax_t type is used for counting. It is quite easy to
91 to exceed the capacity of an eight-byte integer with a large number of 92 exceed the capacity of an eight-byte integer with a large number of symbols
92 symbols and a large maximum code length, so multiple-precision arithmetic 93 and a large maximum code length, so multiple-precision arithmetic would need
93 would need to replace the unsigned long long arithmetic in that case. This 94 to replace the integer arithmetic in that case. This program will abort if
94 program will abort if an overflow occurs. The big_t type identifies where 95 an overflow occurs. The big_t type identifies where the counting takes
95 the counting takes place. 96 place.
96 97
97 An unsigned long long type is also used for calculating the number of 98 The uintmax_t type is also used for calculating the number of possible codes
98 possible codes remaining at the maximum length. This limits the maximum code 99 remaining at the maximum length. This limits the maximum code length to the
99 length to the number of bits in a long long minus the number of bits needed 100 number of bits in a long long minus the number of bits needed to represent
100 to represent the symbols in a flat code. The code_t type identifies where 101 the symbols in a flat code. The code_t type identifies where the bit-pattern
101 the bit pattern counting takes place. 102 counting takes place.
102 */ 103 */
103 104
104#include <stdio.h> 105#include <stdio.h>
105#include <stdlib.h> 106#include <stdlib.h>
106#include <string.h> 107#include <string.h>
108#include <stdarg.h>
109#include <stdint.h>
107#include <assert.h> 110#include <assert.h>
108 111
109#define local static 112#define local static
110 113
111// Special data types. 114// Special data types.
112typedef unsigned long long big_t; // type for code counting 115typedef uintmax_t big_t; // type for code counting
113#define PRIbig "llu" // printf format for big_t 116#define PRIbig "ju" // printf format for big_t
114typedef unsigned long long code_t; // type for bit pattern counting 117typedef uintmax_t code_t; // type for bit pattern counting
115struct tab { // type for been here check 118struct tab { // type for been-here check
116 size_t len; // length of bit vector in octets 119 size_t len; // allocated length of bit vector in octets
117 char *vec; // allocated bit vector 120 char *vec; // allocated bit vector
118}; 121};
119 122
120/* The array for saving results, num[], is indexed with this triplet: 123/* The array for saving results, num[], is indexed with this triplet:
@@ -161,59 +164,101 @@ struct tab { // type for been here check
161 array. Since the range of mem is expected in the default case to be about 164 array. Since the range of mem is expected in the default case to be about
162 ten times larger than the range of rem, the array is skewed to reduce the 165 ten times larger than the range of rem, the array is skewed to reduce the
163 memory usage, with eight times the range for mem than for rem. See the 166 memory usage, with eight times the range for mem than for rem. See the
164 calculations for offset and bit in beenhere() for the details. 167 calculations for offset and bit in been_here() for the details.
165 168
166 For the deflate example of 286 symbols limited to 15-bit codes, the bit 169 For the deflate example of 286 symbols limited to 15-bit codes, the bit
167 vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] 170 vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself.
168 array itself.
169 */ 171 */
170 172
173// Type for a variable-length, allocated string.
174typedef struct {
175 char *str; // pointer to allocated string
176 size_t size; // size of allocation
177 size_t len; // length of string, not including terminating zero
178} string_t;
179
180// Clear a string_t.
181local void string_clear(string_t *s) {
182 s->str[0] = 0;
183 s->len = 0;
184}
185
186// Initialize a string_t.
187local void string_init(string_t *s) {
188 s->size = 16;
189 s->str = malloc(s->size);
190 assert(s->str != NULL && "out of memory");
191 string_clear(s);
192}
193
194// Release the allocation of a string_t.
195local void string_free(string_t *s) {
196 free(s->str);
197 s->str = NULL;
198 s->size = 0;
199 s->len = 0;
200}
201
202// Save the results of printf with fmt and the subsequent argument list to s.
203// Each call appends to s. The allocated space for s is increased as needed.
204local void string_printf(string_t *s, char *fmt, ...) {
205 va_list ap;
206 va_start(ap, fmt);
207 size_t len = s->len;
208 int ret = vsnprintf(s->str + len, s->size - len, fmt, ap);
209 assert(ret >= 0 && "out of memory");
210 s->len += ret;
211 if (s->size < s->len + 1) {
212 do {
213 s->size <<= 1;
214 assert(s->size != 0 && "overflow");
215 } while (s->size < s->len + 1);
216 s->str = realloc(s->str, s->size);
217 assert(s->str != NULL && "out of memory");
218 vsnprintf(s->str + len, s->size - len, fmt, ap);
219 }
220 va_end(ap);
221}
222
171// Globals to avoid propagating constants or constant pointers recursively. 223// Globals to avoid propagating constants or constant pointers recursively.
172struct { 224struct {
173 int max; // maximum allowed bit length for the codes 225 int max; // maximum allowed bit length for the codes
174 int root; // size of base code table in bits 226 int root; // size of base code table in bits
175 int large; // largest code table so far 227 int large; // largest code table so far
176 size_t size; // number of elements in num and done 228 size_t size; // number of elements in num and done
229 big_t tot; // total number of codes with maximum tables size
230 string_t out; // display of subcodes for maximum tables size
177 int *code; // number of symbols assigned to each bit length 231 int *code; // number of symbols assigned to each bit length
178 big_t *num; // saved results array for code counting 232 big_t *num; // saved results array for code counting
179 struct tab *done; // states already evaluated array 233 struct tab *done; // states already evaluated array
180} g; 234} g;
181 235
182// Index function for num[] and done[]. 236// Index function for num[] and done[].
183local inline size_t map(int i, int j, int k) { 237local inline size_t map(int syms, int left, int len) {
184 return k - 1 + ((size_t)((i - 1) >> 1) * ((i - 2) >> 1) + (j >> 1) - 1) * 238 return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) +
185 (g.max - 1); 239 (left >> 1) - 1) * (g.max - 1) +
240 len - 1;
186} 241}
187 242
188// Free allocated space. Uses globals code, num, and done. 243// Free allocated space in globals.
189local void cleanup(void) { 244local void cleanup(void) {
190 size_t n;
191
192 if (g.done != NULL) { 245 if (g.done != NULL) {
193 for (n = 0; n < g.size; n++) 246 for (size_t n = 0; n < g.size; n++)
194 if (g.done[n].len) 247 if (g.done[n].len)
195 free(g.done[n].vec); 248 free(g.done[n].vec);
196 free(g.done); 249 g.size = 0;
250 free(g.done); g.done = NULL;
197 } 251 }
198 if (g.num != NULL) 252 free(g.num); g.num = NULL;
199 free(g.num); 253 free(g.code); g.code = NULL;
200 if (g.code != NULL) 254 string_free(&g.out);
201 free(g.code);
202} 255}
203 256
204// Return the number of possible prefix codes using bit patterns of lengths len 257// Return the number of possible prefix codes using bit patterns of lengths len
205// through max inclusive, coding syms symbols, with left bit patterns of length 258// through max inclusive, coding syms symbols, with left bit patterns of length
206// len unused -- return -1 if there is an overflow in the counting. Keep a 259// len unused -- return -1 if there is an overflow in the counting. Keep a
207// record of previous results in num to prevent repeating the same calculation. 260// record of previous results in num to prevent repeating the same calculation.
208// Uses the globals max and num. 261local big_t count(int syms, int left, int len) {
209local big_t count(int syms, int len, int left) {
210 big_t sum; // number of possible codes from this juncture
211 big_t got; // value returned from count()
212 int least; // least number of syms to use at this juncture
213 int most; // most number of syms to use at this juncture
214 int use; // number of bit patterns to use in next call
215 size_t index; // index of this case in *num
216
217 // see if only one possible code 262 // see if only one possible code
218 if (syms == left) 263 if (syms == left)
219 return 1; 264 return 1;
@@ -222,30 +267,30 @@ local big_t count(int syms, int len, int left) {
222 assert(syms > left && left > 0 && len < g.max); 267 assert(syms > left && left > 0 && len < g.max);
223 268
224 // see if we've done this one already 269 // see if we've done this one already
225 index = map(syms, left, len); 270 size_t index = map(syms, left, len);
226 got = g.num[index]; 271 big_t got = g.num[index];
227 if (got) 272 if (got)
228 return got; // we have -- return the saved result 273 return got; // we have -- return the saved result
229 274
230 // we need to use at least this many bit patterns so that the code won't be 275 // we need to use at least this many bit patterns so that the code won't be
231 // incomplete at the next length (more bit patterns than symbols) 276 // incomplete at the next length (more bit patterns than symbols)
232 least = (left << 1) - syms; 277 int least = (left << 1) - syms;
233 if (least < 0) 278 if (least < 0)
234 least = 0; 279 least = 0;
235 280
236 // we can use at most this many bit patterns, lest there not be enough 281 // we can use at most this many bit patterns, lest there not be enough
237 // available for the remaining symbols at the maximum length (if there were 282 // available for the remaining symbols at the maximum length (if there were
238 // no limit to the code length, this would become: most = left - 1) 283 // no limit to the code length, this would become: most = left - 1)
239 most = (((code_t)left << (g.max - len)) - syms) / 284 int most = (((code_t)left << (g.max - len)) - syms) /
240 (((code_t)1 << (g.max - len)) - 1); 285 (((code_t)1 << (g.max - len)) - 1);
241 286
242 // count all possible codes from this juncture and add them up 287 // count all possible codes from this juncture and add them up
243 sum = 0; 288 big_t sum = 0;
244 for (use = least; use <= most; use++) { 289 for (int use = least; use <= most; use++) {
245 got = count(syms - use, len + 1, (left - use) << 1); 290 got = count(syms - use, (left - use) << 1, len + 1);
246 sum += got; 291 sum += got;
247 if (got == (big_t)0 - 1 || sum < got) // overflow 292 if (got == (big_t)-1 || sum < got) // overflow
248 return (big_t)0 - 1; 293 return (big_t)-1;
249 } 294 }
250 295
251 // verify that all recursive calls are productive 296 // verify that all recursive calls are productive
@@ -259,23 +304,19 @@ local big_t count(int syms, int len, int left) {
259// Return true if we've been here before, set to true if not. Set a bit in a 304// Return true if we've been here before, set to true if not. Set a bit in a
260// bit vector to indicate visiting this state. Each (syms,len,left) state has a 305// bit vector to indicate visiting this state. Each (syms,len,left) state has a
261// variable size bit vector indexed by (mem,rem). The bit vector is lengthened 306// variable size bit vector indexed by (mem,rem). The bit vector is lengthened
262// if needed to allow setting the (mem,rem) bit. 307// as needed to allow setting the (mem,rem) bit.
263local int beenhere(int syms, int len, int left, int mem, int rem) { 308local int been_here(int syms, int left, int len, int mem, int rem) {
264 size_t index; // index for this state's bit vector
265 size_t offset; // offset in this state's bit vector
266 int bit; // mask for this state's bit
267 size_t length; // length of the bit vector in bytes
268 char *vector; // new or enlarged bit vector
269
270 // point to vector for (syms,left,len), bit in vector for (mem,rem) 309 // point to vector for (syms,left,len), bit in vector for (mem,rem)
271 index = map(syms, left, len); 310 size_t index = map(syms, left, len);
272 mem -= 1 << g.root; 311 mem -= 1 << g.root; // mem always includes the root table
273 offset = (mem >> 3) + rem; 312 mem >>= 1; // mem and rem are always even
313 rem >>= 1;
314 size_t offset = (mem >> 3) + rem;
274 offset = ((offset * (offset + 1)) >> 1) + rem; 315 offset = ((offset * (offset + 1)) >> 1) + rem;
275 bit = 1 << (mem & 7); 316 int bit = 1 << (mem & 7);
276 317
277 // see if we've been here 318 // see if we've been here
278 length = g.done[index].len; 319 size_t length = g.done[index].len;
279 if (offset < length && (g.done[index].vec[offset] & bit) != 0) 320 if (offset < length && (g.done[index].vec[offset] & bit) != 0)
280 return 1; // done this! 321 return 1; // done this!
281 322
@@ -284,29 +325,23 @@ local int beenhere(int syms, int len, int left, int mem, int rem) {
284 // see if we need to lengthen the vector in order to set the bit 325 // see if we need to lengthen the vector in order to set the bit
285 if (length <= offset) { 326 if (length <= offset) {
286 // if we have one already, enlarge it, zero out the appended space 327 // if we have one already, enlarge it, zero out the appended space
328 char *vector;
287 if (length) { 329 if (length) {
288 do { 330 do {
289 length <<= 1; 331 length <<= 1;
290 } while (length <= offset); 332 } while (length <= offset);
291 vector = realloc(g.done[index].vec, length); 333 vector = realloc(g.done[index].vec, length);
292 if (vector != NULL) 334 assert(vector != NULL && "out of memory");
293 memset(vector + g.done[index].len, 0, 335 memset(vector + g.done[index].len, 0, length - g.done[index].len);
294 length - g.done[index].len);
295 } 336 }
296 337
297 // otherwise we need to make a new vector and zero it out 338 // otherwise we need to make a new vector and zero it out
298 else { 339 else {
299 length = 1 << (len - g.root); 340 length = 16;
300 while (length <= offset) 341 while (length <= offset)
301 length <<= 1; 342 length <<= 1;
302 vector = calloc(length, sizeof(char)); 343 vector = calloc(length, 1);
303 } 344 assert(vector != NULL && "out of memory");
304
305 // in either case, bail if we can't get the memory
306 if (vector == NULL) {
307 fputs("abort: unable to allocate enough memory\n", stderr);
308 cleanup();
309 exit(1);
310 } 345 }
311 346
312 // install the new vector 347 // install the new vector
@@ -322,13 +357,8 @@ local int beenhere(int syms, int len, int left, int mem, int rem) {
322// Examine all possible codes from the given node (syms, len, left). Compute 357// Examine all possible codes from the given node (syms, len, left). Compute
323// the amount of memory required to build inflate's decoding tables, where the 358// the amount of memory required to build inflate's decoding tables, where the
324// number of code structures used so far is mem, and the number remaining in 359// number of code structures used so far is mem, and the number remaining in
325// the current sub-table is rem. Uses the globals max, code, root, large, and 360// the current sub-table is rem.
326// done. 361local void examine(int syms, int left, int len, int mem, int rem) {
327local void examine(int syms, int len, int left, int mem, int rem) {
328 int least; // least number of syms to use at this juncture
329 int most; // most number of syms to use at this juncture
330 int use; // number of bit patterns to use in next call
331
332 // see if we have a complete code 362 // see if we have a complete code
333 if (syms == left) { 363 if (syms == left) {
334 // set the last code entry 364 // set the last code entry
@@ -342,15 +372,32 @@ local void examine(int syms, int len, int left, int mem, int rem) {
342 } 372 }
343 assert(rem == left); 373 assert(rem == left);
344 374
345 // if this is a new maximum, show the entries used and the sub-code 375 // if this is at the maximum, show the sub-code
346 if (mem > g.large) { 376 if (mem >= g.large) {
347 g.large = mem; 377 // if this is a new maximum, update the maximum and clear out the
348 printf("max %d: ", mem); 378 // printed sub-codes from the previous maximum
349 for (use = g.root + 1; use <= g.max; use++) 379 if (mem > g.large) {
350 if (g.code[use]) 380 g.large = mem;
351 printf("%d[%d] ", g.code[use], use); 381 string_clear(&g.out);
352 putchar('\n'); 382 }
353 fflush(stdout); 383
384 // compute the starting state for this sub-code
385 syms = 0;
386 left = 1 << g.max;
387 for (int bits = g.max; bits > g.root; bits--) {
388 syms += g.code[bits];
389 left -= g.code[bits];
390 assert((left & 1) == 0);
391 left >>= 1;
392 }
393
394 // print the starting state and the resulting sub-code to g.out
395 string_printf(&g.out, "<%u, %u, %u>:",
396 syms, g.root + 1, ((1 << g.root) - left) << 1);
397 for (int bits = g.root + 1; bits <= g.max; bits++)
398 if (g.code[bits])
399 string_printf(&g.out, " %d[%d]", g.code[bits], bits);
400 string_printf(&g.out, "\n");
354 } 401 }
355 402
356 // remove entries as we drop back down in the recursion 403 // remove entries as we drop back down in the recursion
@@ -359,23 +406,23 @@ local void examine(int syms, int len, int left, int mem, int rem) {
359 } 406 }
360 407
361 // prune the tree if we can 408 // prune the tree if we can
362 if (beenhere(syms, len, left, mem, rem)) 409 if (been_here(syms, left, len, mem, rem))
363 return; 410 return;
364 411
365 // we need to use at least this many bit patterns so that the code won't be 412 // we need to use at least this many bit patterns so that the code won't be
366 // incomplete at the next length (more bit patterns than symbols) 413 // incomplete at the next length (more bit patterns than symbols)
367 least = (left << 1) - syms; 414 int least = (left << 1) - syms;
368 if (least < 0) 415 if (least < 0)
369 least = 0; 416 least = 0;
370 417
371 // we can use at most this many bit patterns, lest there not be enough 418 // we can use at most this many bit patterns, lest there not be enough
372 // available for the remaining symbols at the maximum length (if there were 419 // available for the remaining symbols at the maximum length (if there were
373 // no limit to the code length, this would become: most = left - 1) 420 // no limit to the code length, this would become: most = left - 1)
374 most = (((code_t)left << (g.max - len)) - syms) / 421 int most = (((code_t)left << (g.max - len)) - syms) /
375 (((code_t)1 << (g.max - len)) - 1); 422 (((code_t)1 << (g.max - len)) - 1);
376 423
377 // occupy least table spaces, creating new sub-tables as needed 424 // occupy least table spaces, creating new sub-tables as needed
378 use = least; 425 int use = least;
379 while (rem < use) { 426 while (rem < use) {
380 use -= rem; 427 use -= rem;
381 rem = 1 << (len - g.root); 428 rem = 1 << (len - g.root);
@@ -386,7 +433,7 @@ local void examine(int syms, int len, int left, int mem, int rem) {
386 // examine codes from here, updating table space as we go 433 // examine codes from here, updating table space as we go
387 for (use = least; use <= most; use++) { 434 for (use = least; use <= most; use++) {
388 g.code[len] = use; 435 g.code[len] = use;
389 examine(syms - use, len + 1, (left - use) << 1, 436 examine(syms - use, (left - use) << 1, len + 1,
390 mem + (rem ? 1 << (len - g.root) : 0), rem << 1); 437 mem + (rem ? 1 << (len - g.root) : 0), rem << 1);
391 if (rem == 0) { 438 if (rem == 0) {
392 rem = 1 << (len - g.root); 439 rem = 1 << (len - g.root);
@@ -402,37 +449,35 @@ local void examine(int syms, int len, int left, int mem, int rem) {
402// Look at all sub-codes starting with root + 1 bits. Look at only the valid 449// Look at all sub-codes starting with root + 1 bits. Look at only the valid
403// intermediate code states (syms, left, len). For each completed code, 450// intermediate code states (syms, left, len). For each completed code,
404// calculate the amount of memory required by inflate to build the decoding 451// calculate the amount of memory required by inflate to build the decoding
405// tables. Find the maximum amount of memory required and show the code that 452// tables. Find the maximum amount of memory required and show the codes that
406// requires that maximum. Uses the globals max, root, and num. 453// require that maximum.
407local void enough(int syms) { 454local void enough(int syms) {
408 int n; // number of remaing symbols for this node
409 int left; // number of unused bit patterns at this length
410 size_t index; // index of this case in *num
411
412 // clear code 455 // clear code
413 for (n = 0; n <= g.max; n++) 456 for (int n = 0; n <= g.max; n++)
414 g.code[n] = 0; 457 g.code[n] = 0;
415 458
416 // look at all (root + 1) bit and longer codes 459 // look at all (root + 1) bit and longer codes
460 string_clear(&g.out); // empty saved results
417 g.large = 1 << g.root; // base table 461 g.large = 1 << g.root; // base table
418 if (g.root < g.max) // otherwise, there's only a base table 462 if (g.root < g.max) // otherwise, there's only a base table
419 for (n = 3; n <= syms; n++) 463 for (int n = 3; n <= syms; n++)
420 for (left = 2; left < n; left += 2) { 464 for (int left = 2; left < n; left += 2) {
421 // look at all reachable (root + 1) bit nodes, and the 465 // look at all reachable (root + 1) bit nodes, and the
422 // resulting codes (complete at root + 2 or more) 466 // resulting codes (complete at root + 2 or more)
423 index = map(n, left, g.root + 1); 467 size_t index = map(n, left, g.root + 1);
424 if (g.root + 1 < g.max && g.num[index]) // reachable node 468 if (g.root + 1 < g.max && g.num[index]) // reachable node
425 examine(n, g.root + 1, left, 1 << g.root, 0); 469 examine(n, left, g.root + 1, 1 << g.root, 0);
426 470
427 // also look at root bit codes with completions at root + 1 471 // also look at root bit codes with completions at root + 1
428 // bits (not saved in num, since complete), just in case 472 // bits (not saved in num, since complete), just in case
429 if (g.num[index - 1] && n <= left << 1) 473 if (g.num[index - 1] && n <= left << 1)
430 examine((n - left) << 1, g.root + 1, (n - left) << 1, 474 examine((n - left) << 1, (n - left) << 1, g.root + 1,
431 1 << g.root, 0); 475 1 << g.root, 0);
432 } 476 }
433 477
434 // done 478 // done
435 printf("done: maximum of %d table entries\n", g.large); 479 printf("maximum of %d table entries for root = %d\n", g.large, g.root);
480 fputs(g.out.str, stdout);
436} 481}
437 482
438// Examine and show the total number of possible prefix codes for a given 483// Examine and show the total number of possible prefix codes for a given
@@ -451,19 +496,14 @@ local void enough(int syms) {
451// For the deflate literal/length code, use "enough". For the deflate distance 496// For the deflate literal/length code, use "enough". For the deflate distance
452// code, use "enough 30 6". 497// code, use "enough 30 6".
453int main(int argc, char **argv) { 498int main(int argc, char **argv) {
454 int syms; // total number of symbols to code
455 int n; // number of symbols to code for this run
456 big_t got; // return value of count()
457 big_t sum; // accumulated number of codes over n
458 code_t word; // for counting bits in code_t
459
460 // set up globals for cleanup() 499 // set up globals for cleanup()
461 g.code = NULL; 500 g.code = NULL;
462 g.num = NULL; 501 g.num = NULL;
463 g.done = NULL; 502 g.done = NULL;
503 string_init(&g.out);
464 504
465 // get arguments -- default to the deflate literal/length code 505 // get arguments -- default to the deflate literal/length code
466 syms = 286; 506 int syms = 286;
467 g.root = 9; 507 g.root = 9;
468 g.max = 15; 508 g.max = 15;
469 if (argc > 1) { 509 if (argc > 1) {
@@ -485,11 +525,12 @@ int main(int argc, char **argv) {
485 g.max = syms - 1; 525 g.max = syms - 1;
486 526
487 // determine the number of bits in a code_t 527 // determine the number of bits in a code_t
488 for (n = 0, word = 1; word; n++, word <<= 1) 528 int bits = 0;
489 ; 529 for (code_t word = 1; word; word <<= 1)
530 bits++;
490 531
491 // make sure that the calculation of most will not overflow 532 // make sure that the calculation of most will not overflow
492 if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) { 533 if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) {
493 fputs("abort: code length too long for internal types\n", stderr); 534 fputs("abort: code length too long for internal types\n", stderr);
494 return 1; 535 return 1;
495 } 536 }
@@ -503,10 +544,7 @@ int main(int argc, char **argv) {
503 544
504 // allocate code vector 545 // allocate code vector
505 g.code = calloc(g.max + 1, sizeof(int)); 546 g.code = calloc(g.max + 1, sizeof(int));
506 if (g.code == NULL) { 547 assert(g.code != NULL && "out of memory");
507 fputs("abort: unable to allocate enough memory\n", stderr);
508 return 1;
509 }
510 548
511 // determine size of saved results array, checking for overflows, 549 // determine size of saved results array, checking for overflows,
512 // allocate and clear the array (set all to zero with calloc()) 550 // allocate and clear the array (set all to zero with calloc())
@@ -514,27 +552,22 @@ int main(int argc, char **argv) {
514 g.num = NULL; // won't be saving any results 552 g.num = NULL; // won't be saving any results
515 else { 553 else {
516 g.size = syms >> 1; 554 g.size = syms >> 1;
517 if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || 555 int n = (syms - 1) >> 1;
518 (g.size *= n, g.size > ((size_t)0 - 1) / (n = g.max - 1)) || 556 assert(g.size <= (size_t)-1 / n && "overflow");
519 (g.size *= n, g.size > ((size_t)0 - 1) / sizeof(big_t)) || 557 g.size *= n;
520 (g.num = calloc(g.size, sizeof(big_t))) == NULL) { 558 n = g.max - 1;
521 fputs("abort: unable to allocate enough memory\n", stderr); 559 assert(g.size <= (size_t)-1 / n && "overflow");
522 cleanup(); 560 g.size *= n;
523 return 1; 561 g.num = calloc(g.size, sizeof(big_t));
524 } 562 assert(g.num != NULL && "out of memory");
525 } 563 }
526 564
527 // count possible codes for all numbers of symbols, add up counts 565 // count possible codes for all numbers of symbols, add up counts
528 sum = 0; 566 big_t sum = 0;
529 for (n = 2; n <= syms; n++) { 567 for (int n = 2; n <= syms; n++) {
530 got = count(n, 1, 2); 568 big_t got = count(n, 2, 1);
531 sum += got; 569 sum += got;
532 if (got == (big_t)0 - 1 || sum < got) { // overflow 570 assert(got != (big_t)-1 && sum >= got && "overflow");
533 fputs("abort: can't count that high!\n", stderr);
534 cleanup();
535 return 1;
536 }
537 printf("%"PRIbig" %d-codes\n", got, n);
538 } 571 }
539 printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); 572 printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms);
540 if (g.max < syms - 1) 573 if (g.max < syms - 1)
@@ -542,14 +575,12 @@ int main(int argc, char **argv) {
542 else 575 else
543 puts(" (no length limit)"); 576 puts(" (no length limit)");
544 577
545 // allocate and clear done array for beenhere() 578 // allocate and clear done array for been_here()
546 if (syms == 2) 579 if (syms == 2)
547 g.done = NULL; 580 g.done = NULL;
548 else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) || 581 else {
549 (g.done = calloc(g.size, sizeof(struct tab))) == NULL) { 582 g.done = calloc(g.size, sizeof(struct tab));
550 fputs("abort: unable to allocate enough memory\n", stderr); 583 assert(g.done != NULL && "out of memory");
551 cleanup();
552 return 1;
553 } 584 }
554 585
555 // find and show maximum inflate table usage 586 // find and show maximum inflate table usage
@@ -558,7 +589,7 @@ int main(int argc, char **argv) {
558 if ((code_t)syms < ((code_t)1 << (g.root + 1))) 589 if ((code_t)syms < ((code_t)1 << (g.root + 1)))
559 enough(syms); 590 enough(syms);
560 else 591 else
561 puts("cannot handle minimum code lengths > root"); 592 fputs("cannot handle minimum code lengths > root", stderr);
562 593
563 // done 594 // done
564 cleanup(); 595 cleanup();