diff options
| author | Mark Adler <madler@alumni.caltech.edu> | 2018-08-04 14:34:01 -0700 |
|---|---|---|
| committer | Mark Adler <madler@alumni.caltech.edu> | 2018-08-05 23:08:33 -0700 |
| commit | cd16ff0b3a3665273e47141de8973bf1088cdf59 (patch) | |
| tree | 615da5944afbd2a72156438eb3e247e29ae910d0 /examples | |
| parent | d00c147f531e757bfbd5cfab58d941caa8333ba8 (diff) | |
| download | zlib-cd16ff0b3a3665273e47141de8973bf1088cdf59.tar.gz zlib-cd16ff0b3a3665273e47141de8973bf1088cdf59.tar.bz2 zlib-cd16ff0b3a3665273e47141de8973bf1088cdf59.zip | |
Show all the codes for the maximum tables size in enough.c.
Diffstat (limited to 'examples')
| -rw-r--r-- | examples/enough.c | 351 |
1 files changed, 191 insertions, 160 deletions
diff --git a/examples/enough.c b/examples/enough.c index 5cfbfbf..19cf08c 100644 --- a/examples/enough.c +++ b/examples/enough.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over | 1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over |
| 2 | * all possible valid and complete prefix codes, subject to a length limit. | 2 | * all possible valid and complete prefix codes, subject to a length limit. |
| 3 | * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler | 3 | * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler |
| 4 | * Version 1.5 1 August 2018 Mark Adler | 4 | * Version 1.5 5 August 2018 Mark Adler |
| 5 | */ | 5 | */ |
| 6 | 6 | ||
| 7 | /* Version history: | 7 | /* Version history: |
| @@ -17,8 +17,8 @@ | |||
| 17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) | 17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) |
| 18 | Clean up comparisons of different types | 18 | Clean up comparisons of different types |
| 19 | Clean up code indentation | 19 | Clean up code indentation |
| 20 | 1.5 1 Aug 2018 Clean up code style, formatting, and comments | 20 | 1.5 5 Aug 2018 Clean up code style, formatting, and comments |
| 21 | Use inline function instead of macro for index | 21 | Show all the codes for the maximum, and only the maximum |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| @@ -39,16 +39,17 @@ | |||
| 39 | assign all portions of the remaining symbols to that code length that | 39 | assign all portions of the remaining symbols to that code length that |
| 40 | preserve the properties of a correct and eventually complete code. Those | 40 | preserve the properties of a correct and eventually complete code. Those |
| 41 | properties are: we cannot use more bit patterns than are available; and when | 41 | properties are: we cannot use more bit patterns than are available; and when |
| 42 | all the symbols are used, there are exactly zero possible bit patterns | 42 | all the symbols are used, there are exactly zero possible bit patterns left |
| 43 | remaining. | 43 | unused. |
| 44 | 44 | ||
| 45 | The inflate Huffman decoding algorithm uses two-level lookup tables for | 45 | The inflate Huffman decoding algorithm uses two-level lookup tables for |
| 46 | speed. There is a single first-level table to decode codes up to root bits | 46 | speed. There is a single first-level table to decode codes up to root bits |
| 47 | in length (root == 9 in the current inflate implementation). The table has 1 | 47 | in length (root == 9 for literal/length codes and root == 6 for distance |
| 48 | << root entries and is indexed by the next root bits of input. Codes shorter | 48 | codes, in the current inflate implementation). The base table has 1 << root |
| 49 | than root bits have replicated table entries, so that the correct entry is | 49 | entries and is indexed by the next root bits of input. Codes shorter than |
| 50 | root bits have replicated table entries, so that the correct entry is | ||
| 50 | pointed to regardless of the bits that follow the short code. If the code is | 51 | pointed to regardless of the bits that follow the short code. If the code is |
| 51 | longer than root bits, then the table entry points to a second- level table. | 52 | longer than root bits, then the table entry points to a second-level table. |
| 52 | The size of that table is determined by the longest code with that root-bit | 53 | The size of that table is determined by the longest code with that root-bit |
| 53 | prefix. If that longest code has length len, then the table has size 1 << | 54 | prefix. If that longest code has length len, then the table has size 1 << |
| 54 | (len - root), to index the remaining bits in that set of codes. Each | 55 | (len - root), to index the remaining bits in that set of codes. Each |
| @@ -62,7 +63,7 @@ | |||
| 62 | the log2 of the number of symbols), where the shortest code has more bits | 63 | the log2 of the number of symbols), where the shortest code has more bits |
| 63 | than root. In that case, root is increased to the length of the shortest | 64 | than root. In that case, root is increased to the length of the shortest |
| 64 | code. This program, by design, does not handle that case, so it is verified | 65 | code. This program, by design, does not handle that case, so it is verified |
| 65 | that the number of symbols is less than 2^(root + 1). | 66 | that the number of symbols is less than 1 << (root + 1). |
| 66 | 67 | ||
| 67 | In order to speed up the examination (by about ten orders of magnitude for | 68 | In order to speed up the examination (by about ten orders of magnitude for |
| 68 | the default arguments), the intermediate states in the build-up of a code | 69 | the default arguments), the intermediate states in the build-up of a code |
| @@ -87,34 +88,36 @@ | |||
| 87 | need to be examined to cover all of the possible table memory usage cases | 88 | need to be examined to cover all of the possible table memory usage cases |
| 88 | for the default arguments of 286 symbols limited to 15-bit codes. | 89 | for the default arguments of 286 symbols limited to 15-bit codes. |
| 89 | 90 | ||
| 90 | Note that an unsigned long long type is used for counting. It is quite easy | 91 | Note that the uintmax_t type is used for counting. It is quite easy to |
| 91 | to exceed the capacity of an eight-byte integer with a large number of | 92 | exceed the capacity of an eight-byte integer with a large number of symbols |
| 92 | symbols and a large maximum code length, so multiple-precision arithmetic | 93 | and a large maximum code length, so multiple-precision arithmetic would need |
| 93 | would need to replace the unsigned long long arithmetic in that case. This | 94 | to replace the integer arithmetic in that case. This program will abort if |
| 94 | program will abort if an overflow occurs. The big_t type identifies where | 95 | an overflow occurs. The big_t type identifies where the counting takes |
| 95 | the counting takes place. | 96 | place. |
| 96 | 97 | ||
| 97 | An unsigned long long type is also used for calculating the number of | 98 | The uintmax_t type is also used for calculating the number of possible codes |
| 98 | possible codes remaining at the maximum length. This limits the maximum code | 99 | remaining at the maximum length. This limits the maximum code length to the |
| 99 | length to the number of bits in a long long minus the number of bits needed | 100 | number of bits in a long long minus the number of bits needed to represent |
| 100 | to represent the symbols in a flat code. The code_t type identifies where | 101 | the symbols in a flat code. The code_t type identifies where the bit-pattern |
| 101 | the bit pattern counting takes place. | 102 | counting takes place. |
| 102 | */ | 103 | */ |
| 103 | 104 | ||
| 104 | #include <stdio.h> | 105 | #include <stdio.h> |
| 105 | #include <stdlib.h> | 106 | #include <stdlib.h> |
| 106 | #include <string.h> | 107 | #include <string.h> |
| 108 | #include <stdarg.h> | ||
| 109 | #include <stdint.h> | ||
| 107 | #include <assert.h> | 110 | #include <assert.h> |
| 108 | 111 | ||
| 109 | #define local static | 112 | #define local static |
| 110 | 113 | ||
| 111 | // Special data types. | 114 | // Special data types. |
| 112 | typedef unsigned long long big_t; // type for code counting | 115 | typedef uintmax_t big_t; // type for code counting |
| 113 | #define PRIbig "llu" // printf format for big_t | 116 | #define PRIbig "ju" // printf format for big_t |
| 114 | typedef unsigned long long code_t; // type for bit pattern counting | 117 | typedef uintmax_t code_t; // type for bit pattern counting |
| 115 | struct tab { // type for been here check | 118 | struct tab { // type for been-here check |
| 116 | size_t len; // length of bit vector in octets | 119 | size_t len; // allocated length of bit vector in octets |
| 117 | char *vec; // allocated bit vector | 120 | char *vec; // allocated bit vector |
| 118 | }; | 121 | }; |
| 119 | 122 | ||
| 120 | /* The array for saving results, num[], is indexed with this triplet: | 123 | /* The array for saving results, num[], is indexed with this triplet: |
| @@ -161,59 +164,101 @@ struct tab { // type for been here check | |||
| 161 | array. Since the range of mem is expected in the default case to be about | 164 | array. Since the range of mem is expected in the default case to be about |
| 162 | ten times larger than the range of rem, the array is skewed to reduce the | 165 | ten times larger than the range of rem, the array is skewed to reduce the |
| 163 | memory usage, with eight times the range for mem than for rem. See the | 166 | memory usage, with eight times the range for mem than for rem. See the |
| 164 | calculations for offset and bit in beenhere() for the details. | 167 | calculations for offset and bit in been_here() for the details. |
| 165 | 168 | ||
| 166 | For the deflate example of 286 symbols limited to 15-bit codes, the bit | 169 | For the deflate example of 286 symbols limited to 15-bit codes, the bit |
| 167 | vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] | 170 | vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. |
| 168 | array itself. | ||
| 169 | */ | 171 | */ |
| 170 | 172 | ||
| 173 | // Type for a variable-length, allocated string. | ||
| 174 | typedef struct { | ||
| 175 | char *str; // pointer to allocated string | ||
| 176 | size_t size; // size of allocation | ||
| 177 | size_t len; // length of string, not including terminating zero | ||
| 178 | } string_t; | ||
| 179 | |||
| 180 | // Clear a string_t. | ||
| 181 | local void string_clear(string_t *s) { | ||
| 182 | s->str[0] = 0; | ||
| 183 | s->len = 0; | ||
| 184 | } | ||
| 185 | |||
| 186 | // Initialize a string_t. | ||
| 187 | local void string_init(string_t *s) { | ||
| 188 | s->size = 16; | ||
| 189 | s->str = malloc(s->size); | ||
| 190 | assert(s->str != NULL && "out of memory"); | ||
| 191 | string_clear(s); | ||
| 192 | } | ||
| 193 | |||
| 194 | // Release the allocation of a string_t. | ||
| 195 | local void string_free(string_t *s) { | ||
| 196 | free(s->str); | ||
| 197 | s->str = NULL; | ||
| 198 | s->size = 0; | ||
| 199 | s->len = 0; | ||
| 200 | } | ||
| 201 | |||
| 202 | // Save the results of printf with fmt and the subsequent argument list to s. | ||
| 203 | // Each call appends to s. The allocated space for s is increased as needed. | ||
| 204 | local void string_printf(string_t *s, char *fmt, ...) { | ||
| 205 | va_list ap; | ||
| 206 | va_start(ap, fmt); | ||
| 207 | size_t len = s->len; | ||
| 208 | int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); | ||
| 209 | assert(ret >= 0 && "out of memory"); | ||
| 210 | s->len += ret; | ||
| 211 | if (s->size < s->len + 1) { | ||
| 212 | do { | ||
| 213 | s->size <<= 1; | ||
| 214 | assert(s->size != 0 && "overflow"); | ||
| 215 | } while (s->size < s->len + 1); | ||
| 216 | s->str = realloc(s->str, s->size); | ||
| 217 | assert(s->str != NULL && "out of memory"); | ||
| 218 | vsnprintf(s->str + len, s->size - len, fmt, ap); | ||
| 219 | } | ||
| 220 | va_end(ap); | ||
| 221 | } | ||
| 222 | |||
| 171 | // Globals to avoid propagating constants or constant pointers recursively. | 223 | // Globals to avoid propagating constants or constant pointers recursively. |
| 172 | struct { | 224 | struct { |
| 173 | int max; // maximum allowed bit length for the codes | 225 | int max; // maximum allowed bit length for the codes |
| 174 | int root; // size of base code table in bits | 226 | int root; // size of base code table in bits |
| 175 | int large; // largest code table so far | 227 | int large; // largest code table so far |
| 176 | size_t size; // number of elements in num and done | 228 | size_t size; // number of elements in num and done |
| 229 | big_t tot; // total number of codes with maximum tables size | ||
| 230 | string_t out; // display of subcodes for maximum tables size | ||
| 177 | int *code; // number of symbols assigned to each bit length | 231 | int *code; // number of symbols assigned to each bit length |
| 178 | big_t *num; // saved results array for code counting | 232 | big_t *num; // saved results array for code counting |
| 179 | struct tab *done; // states already evaluated array | 233 | struct tab *done; // states already evaluated array |
| 180 | } g; | 234 | } g; |
| 181 | 235 | ||
| 182 | // Index function for num[] and done[]. | 236 | // Index function for num[] and done[]. |
| 183 | local inline size_t map(int i, int j, int k) { | 237 | local inline size_t map(int syms, int left, int len) { |
| 184 | return k - 1 + ((size_t)((i - 1) >> 1) * ((i - 2) >> 1) + (j >> 1) - 1) * | 238 | return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + |
| 185 | (g.max - 1); | 239 | (left >> 1) - 1) * (g.max - 1) + |
| 240 | len - 1; | ||
| 186 | } | 241 | } |
| 187 | 242 | ||
| 188 | // Free allocated space. Uses globals code, num, and done. | 243 | // Free allocated space in globals. |
| 189 | local void cleanup(void) { | 244 | local void cleanup(void) { |
| 190 | size_t n; | ||
| 191 | |||
| 192 | if (g.done != NULL) { | 245 | if (g.done != NULL) { |
| 193 | for (n = 0; n < g.size; n++) | 246 | for (size_t n = 0; n < g.size; n++) |
| 194 | if (g.done[n].len) | 247 | if (g.done[n].len) |
| 195 | free(g.done[n].vec); | 248 | free(g.done[n].vec); |
| 196 | free(g.done); | 249 | g.size = 0; |
| 250 | free(g.done); g.done = NULL; | ||
| 197 | } | 251 | } |
| 198 | if (g.num != NULL) | 252 | free(g.num); g.num = NULL; |
| 199 | free(g.num); | 253 | free(g.code); g.code = NULL; |
| 200 | if (g.code != NULL) | 254 | string_free(&g.out); |
| 201 | free(g.code); | ||
| 202 | } | 255 | } |
| 203 | 256 | ||
| 204 | // Return the number of possible prefix codes using bit patterns of lengths len | 257 | // Return the number of possible prefix codes using bit patterns of lengths len |
| 205 | // through max inclusive, coding syms symbols, with left bit patterns of length | 258 | // through max inclusive, coding syms symbols, with left bit patterns of length |
| 206 | // len unused -- return -1 if there is an overflow in the counting. Keep a | 259 | // len unused -- return -1 if there is an overflow in the counting. Keep a |
| 207 | // record of previous results in num to prevent repeating the same calculation. | 260 | // record of previous results in num to prevent repeating the same calculation. |
| 208 | // Uses the globals max and num. | 261 | local big_t count(int syms, int left, int len) { |
| 209 | local big_t count(int syms, int len, int left) { | ||
| 210 | big_t sum; // number of possible codes from this juncture | ||
| 211 | big_t got; // value returned from count() | ||
| 212 | int least; // least number of syms to use at this juncture | ||
| 213 | int most; // most number of syms to use at this juncture | ||
| 214 | int use; // number of bit patterns to use in next call | ||
| 215 | size_t index; // index of this case in *num | ||
| 216 | |||
| 217 | // see if only one possible code | 262 | // see if only one possible code |
| 218 | if (syms == left) | 263 | if (syms == left) |
| 219 | return 1; | 264 | return 1; |
| @@ -222,30 +267,30 @@ local big_t count(int syms, int len, int left) { | |||
| 222 | assert(syms > left && left > 0 && len < g.max); | 267 | assert(syms > left && left > 0 && len < g.max); |
| 223 | 268 | ||
| 224 | // see if we've done this one already | 269 | // see if we've done this one already |
| 225 | index = map(syms, left, len); | 270 | size_t index = map(syms, left, len); |
| 226 | got = g.num[index]; | 271 | big_t got = g.num[index]; |
| 227 | if (got) | 272 | if (got) |
| 228 | return got; // we have -- return the saved result | 273 | return got; // we have -- return the saved result |
| 229 | 274 | ||
| 230 | // we need to use at least this many bit patterns so that the code won't be | 275 | // we need to use at least this many bit patterns so that the code won't be |
| 231 | // incomplete at the next length (more bit patterns than symbols) | 276 | // incomplete at the next length (more bit patterns than symbols) |
| 232 | least = (left << 1) - syms; | 277 | int least = (left << 1) - syms; |
| 233 | if (least < 0) | 278 | if (least < 0) |
| 234 | least = 0; | 279 | least = 0; |
| 235 | 280 | ||
| 236 | // we can use at most this many bit patterns, lest there not be enough | 281 | // we can use at most this many bit patterns, lest there not be enough |
| 237 | // available for the remaining symbols at the maximum length (if there were | 282 | // available for the remaining symbols at the maximum length (if there were |
| 238 | // no limit to the code length, this would become: most = left - 1) | 283 | // no limit to the code length, this would become: most = left - 1) |
| 239 | most = (((code_t)left << (g.max - len)) - syms) / | 284 | int most = (((code_t)left << (g.max - len)) - syms) / |
| 240 | (((code_t)1 << (g.max - len)) - 1); | 285 | (((code_t)1 << (g.max - len)) - 1); |
| 241 | 286 | ||
| 242 | // count all possible codes from this juncture and add them up | 287 | // count all possible codes from this juncture and add them up |
| 243 | sum = 0; | 288 | big_t sum = 0; |
| 244 | for (use = least; use <= most; use++) { | 289 | for (int use = least; use <= most; use++) { |
| 245 | got = count(syms - use, len + 1, (left - use) << 1); | 290 | got = count(syms - use, (left - use) << 1, len + 1); |
| 246 | sum += got; | 291 | sum += got; |
| 247 | if (got == (big_t)0 - 1 || sum < got) // overflow | 292 | if (got == (big_t)-1 || sum < got) // overflow |
| 248 | return (big_t)0 - 1; | 293 | return (big_t)-1; |
| 249 | } | 294 | } |
| 250 | 295 | ||
| 251 | // verify that all recursive calls are productive | 296 | // verify that all recursive calls are productive |
| @@ -259,23 +304,19 @@ local big_t count(int syms, int len, int left) { | |||
| 259 | // Return true if we've been here before, set to true if not. Set a bit in a | 304 | // Return true if we've been here before, set to true if not. Set a bit in a |
| 260 | // bit vector to indicate visiting this state. Each (syms,len,left) state has a | 305 | // bit vector to indicate visiting this state. Each (syms,len,left) state has a |
| 261 | // variable size bit vector indexed by (mem,rem). The bit vector is lengthened | 306 | // variable size bit vector indexed by (mem,rem). The bit vector is lengthened |
| 262 | // if needed to allow setting the (mem,rem) bit. | 307 | // as needed to allow setting the (mem,rem) bit. |
| 263 | local int beenhere(int syms, int len, int left, int mem, int rem) { | 308 | local int been_here(int syms, int left, int len, int mem, int rem) { |
| 264 | size_t index; // index for this state's bit vector | ||
| 265 | size_t offset; // offset in this state's bit vector | ||
| 266 | int bit; // mask for this state's bit | ||
| 267 | size_t length; // length of the bit vector in bytes | ||
| 268 | char *vector; // new or enlarged bit vector | ||
| 269 | |||
| 270 | // point to vector for (syms,left,len), bit in vector for (mem,rem) | 309 | // point to vector for (syms,left,len), bit in vector for (mem,rem) |
| 271 | index = map(syms, left, len); | 310 | size_t index = map(syms, left, len); |
| 272 | mem -= 1 << g.root; | 311 | mem -= 1 << g.root; // mem always includes the root table |
| 273 | offset = (mem >> 3) + rem; | 312 | mem >>= 1; // mem and rem are always even |
| 313 | rem >>= 1; | ||
| 314 | size_t offset = (mem >> 3) + rem; | ||
| 274 | offset = ((offset * (offset + 1)) >> 1) + rem; | 315 | offset = ((offset * (offset + 1)) >> 1) + rem; |
| 275 | bit = 1 << (mem & 7); | 316 | int bit = 1 << (mem & 7); |
| 276 | 317 | ||
| 277 | // see if we've been here | 318 | // see if we've been here |
| 278 | length = g.done[index].len; | 319 | size_t length = g.done[index].len; |
| 279 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) | 320 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) |
| 280 | return 1; // done this! | 321 | return 1; // done this! |
| 281 | 322 | ||
| @@ -284,29 +325,23 @@ local int beenhere(int syms, int len, int left, int mem, int rem) { | |||
| 284 | // see if we need to lengthen the vector in order to set the bit | 325 | // see if we need to lengthen the vector in order to set the bit |
| 285 | if (length <= offset) { | 326 | if (length <= offset) { |
| 286 | // if we have one already, enlarge it, zero out the appended space | 327 | // if we have one already, enlarge it, zero out the appended space |
| 328 | char *vector; | ||
| 287 | if (length) { | 329 | if (length) { |
| 288 | do { | 330 | do { |
| 289 | length <<= 1; | 331 | length <<= 1; |
| 290 | } while (length <= offset); | 332 | } while (length <= offset); |
| 291 | vector = realloc(g.done[index].vec, length); | 333 | vector = realloc(g.done[index].vec, length); |
| 292 | if (vector != NULL) | 334 | assert(vector != NULL && "out of memory"); |
| 293 | memset(vector + g.done[index].len, 0, | 335 | memset(vector + g.done[index].len, 0, length - g.done[index].len); |
| 294 | length - g.done[index].len); | ||
| 295 | } | 336 | } |
| 296 | 337 | ||
| 297 | // otherwise we need to make a new vector and zero it out | 338 | // otherwise we need to make a new vector and zero it out |
| 298 | else { | 339 | else { |
| 299 | length = 1 << (len - g.root); | 340 | length = 16; |
| 300 | while (length <= offset) | 341 | while (length <= offset) |
| 301 | length <<= 1; | 342 | length <<= 1; |
| 302 | vector = calloc(length, sizeof(char)); | 343 | vector = calloc(length, 1); |
| 303 | } | 344 | assert(vector != NULL && "out of memory"); |
| 304 | |||
| 305 | // in either case, bail if we can't get the memory | ||
| 306 | if (vector == NULL) { | ||
| 307 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
| 308 | cleanup(); | ||
| 309 | exit(1); | ||
| 310 | } | 345 | } |
| 311 | 346 | ||
| 312 | // install the new vector | 347 | // install the new vector |
| @@ -322,13 +357,8 @@ local int beenhere(int syms, int len, int left, int mem, int rem) { | |||
| 322 | // Examine all possible codes from the given node (syms, len, left). Compute | 357 | // Examine all possible codes from the given node (syms, len, left). Compute |
| 323 | // the amount of memory required to build inflate's decoding tables, where the | 358 | // the amount of memory required to build inflate's decoding tables, where the |
| 324 | // number of code structures used so far is mem, and the number remaining in | 359 | // number of code structures used so far is mem, and the number remaining in |
| 325 | // the current sub-table is rem. Uses the globals max, code, root, large, and | 360 | // the current sub-table is rem. |
| 326 | // done. | 361 | local void examine(int syms, int left, int len, int mem, int rem) { |
| 327 | local void examine(int syms, int len, int left, int mem, int rem) { | ||
| 328 | int least; // least number of syms to use at this juncture | ||
| 329 | int most; // most number of syms to use at this juncture | ||
| 330 | int use; // number of bit patterns to use in next call | ||
| 331 | |||
| 332 | // see if we have a complete code | 362 | // see if we have a complete code |
| 333 | if (syms == left) { | 363 | if (syms == left) { |
| 334 | // set the last code entry | 364 | // set the last code entry |
| @@ -342,15 +372,32 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
| 342 | } | 372 | } |
| 343 | assert(rem == left); | 373 | assert(rem == left); |
| 344 | 374 | ||
| 345 | // if this is a new maximum, show the entries used and the sub-code | 375 | // if this is at the maximum, show the sub-code |
| 346 | if (mem > g.large) { | 376 | if (mem >= g.large) { |
| 347 | g.large = mem; | 377 | // if this is a new maximum, update the maximum and clear out the |
| 348 | printf("max %d: ", mem); | 378 | // printed sub-codes from the previous maximum |
| 349 | for (use = g.root + 1; use <= g.max; use++) | 379 | if (mem > g.large) { |
| 350 | if (g.code[use]) | 380 | g.large = mem; |
| 351 | printf("%d[%d] ", g.code[use], use); | 381 | string_clear(&g.out); |
| 352 | putchar('\n'); | 382 | } |
| 353 | fflush(stdout); | 383 | |
| 384 | // compute the starting state for this sub-code | ||
| 385 | syms = 0; | ||
| 386 | left = 1 << g.max; | ||
| 387 | for (int bits = g.max; bits > g.root; bits--) { | ||
| 388 | syms += g.code[bits]; | ||
| 389 | left -= g.code[bits]; | ||
| 390 | assert((left & 1) == 0); | ||
| 391 | left >>= 1; | ||
| 392 | } | ||
| 393 | |||
| 394 | // print the starting state and the resulting sub-code to g.out | ||
| 395 | string_printf(&g.out, "<%u, %u, %u>:", | ||
| 396 | syms, g.root + 1, ((1 << g.root) - left) << 1); | ||
| 397 | for (int bits = g.root + 1; bits <= g.max; bits++) | ||
| 398 | if (g.code[bits]) | ||
| 399 | string_printf(&g.out, " %d[%d]", g.code[bits], bits); | ||
| 400 | string_printf(&g.out, "\n"); | ||
| 354 | } | 401 | } |
| 355 | 402 | ||
| 356 | // remove entries as we drop back down in the recursion | 403 | // remove entries as we drop back down in the recursion |
| @@ -359,23 +406,23 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
| 359 | } | 406 | } |
| 360 | 407 | ||
| 361 | // prune the tree if we can | 408 | // prune the tree if we can |
| 362 | if (beenhere(syms, len, left, mem, rem)) | 409 | if (been_here(syms, left, len, mem, rem)) |
| 363 | return; | 410 | return; |
| 364 | 411 | ||
| 365 | // we need to use at least this many bit patterns so that the code won't be | 412 | // we need to use at least this many bit patterns so that the code won't be |
| 366 | // incomplete at the next length (more bit patterns than symbols) | 413 | // incomplete at the next length (more bit patterns than symbols) |
| 367 | least = (left << 1) - syms; | 414 | int least = (left << 1) - syms; |
| 368 | if (least < 0) | 415 | if (least < 0) |
| 369 | least = 0; | 416 | least = 0; |
| 370 | 417 | ||
| 371 | // we can use at most this many bit patterns, lest there not be enough | 418 | // we can use at most this many bit patterns, lest there not be enough |
| 372 | // available for the remaining symbols at the maximum length (if there were | 419 | // available for the remaining symbols at the maximum length (if there were |
| 373 | // no limit to the code length, this would become: most = left - 1) | 420 | // no limit to the code length, this would become: most = left - 1) |
| 374 | most = (((code_t)left << (g.max - len)) - syms) / | 421 | int most = (((code_t)left << (g.max - len)) - syms) / |
| 375 | (((code_t)1 << (g.max - len)) - 1); | 422 | (((code_t)1 << (g.max - len)) - 1); |
| 376 | 423 | ||
| 377 | // occupy least table spaces, creating new sub-tables as needed | 424 | // occupy least table spaces, creating new sub-tables as needed |
| 378 | use = least; | 425 | int use = least; |
| 379 | while (rem < use) { | 426 | while (rem < use) { |
| 380 | use -= rem; | 427 | use -= rem; |
| 381 | rem = 1 << (len - g.root); | 428 | rem = 1 << (len - g.root); |
| @@ -386,7 +433,7 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
| 386 | // examine codes from here, updating table space as we go | 433 | // examine codes from here, updating table space as we go |
| 387 | for (use = least; use <= most; use++) { | 434 | for (use = least; use <= most; use++) { |
| 388 | g.code[len] = use; | 435 | g.code[len] = use; |
| 389 | examine(syms - use, len + 1, (left - use) << 1, | 436 | examine(syms - use, (left - use) << 1, len + 1, |
| 390 | mem + (rem ? 1 << (len - g.root) : 0), rem << 1); | 437 | mem + (rem ? 1 << (len - g.root) : 0), rem << 1); |
| 391 | if (rem == 0) { | 438 | if (rem == 0) { |
| 392 | rem = 1 << (len - g.root); | 439 | rem = 1 << (len - g.root); |
| @@ -402,37 +449,35 @@ local void examine(int syms, int len, int left, int mem, int rem) { | |||
| 402 | // Look at all sub-codes starting with root + 1 bits. Look at only the valid | 449 | // Look at all sub-codes starting with root + 1 bits. Look at only the valid |
| 403 | // intermediate code states (syms, left, len). For each completed code, | 450 | // intermediate code states (syms, left, len). For each completed code, |
| 404 | // calculate the amount of memory required by inflate to build the decoding | 451 | // calculate the amount of memory required by inflate to build the decoding |
| 405 | // tables. Find the maximum amount of memory required and show the code that | 452 | // tables. Find the maximum amount of memory required and show the codes that |
| 406 | // requires that maximum. Uses the globals max, root, and num. | 453 | // require that maximum. |
| 407 | local void enough(int syms) { | 454 | local void enough(int syms) { |
| 408 | int n; // number of remaing symbols for this node | ||
| 409 | int left; // number of unused bit patterns at this length | ||
| 410 | size_t index; // index of this case in *num | ||
| 411 | |||
| 412 | // clear code | 455 | // clear code |
| 413 | for (n = 0; n <= g.max; n++) | 456 | for (int n = 0; n <= g.max; n++) |
| 414 | g.code[n] = 0; | 457 | g.code[n] = 0; |
| 415 | 458 | ||
| 416 | // look at all (root + 1) bit and longer codes | 459 | // look at all (root + 1) bit and longer codes |
| 460 | string_clear(&g.out); // empty saved results | ||
| 417 | g.large = 1 << g.root; // base table | 461 | g.large = 1 << g.root; // base table |
| 418 | if (g.root < g.max) // otherwise, there's only a base table | 462 | if (g.root < g.max) // otherwise, there's only a base table |
| 419 | for (n = 3; n <= syms; n++) | 463 | for (int n = 3; n <= syms; n++) |
| 420 | for (left = 2; left < n; left += 2) { | 464 | for (int left = 2; left < n; left += 2) { |
| 421 | // look at all reachable (root + 1) bit nodes, and the | 465 | // look at all reachable (root + 1) bit nodes, and the |
| 422 | // resulting codes (complete at root + 2 or more) | 466 | // resulting codes (complete at root + 2 or more) |
| 423 | index = map(n, left, g.root + 1); | 467 | size_t index = map(n, left, g.root + 1); |
| 424 | if (g.root + 1 < g.max && g.num[index]) // reachable node | 468 | if (g.root + 1 < g.max && g.num[index]) // reachable node |
| 425 | examine(n, g.root + 1, left, 1 << g.root, 0); | 469 | examine(n, left, g.root + 1, 1 << g.root, 0); |
| 426 | 470 | ||
| 427 | // also look at root bit codes with completions at root + 1 | 471 | // also look at root bit codes with completions at root + 1 |
| 428 | // bits (not saved in num, since complete), just in case | 472 | // bits (not saved in num, since complete), just in case |
| 429 | if (g.num[index - 1] && n <= left << 1) | 473 | if (g.num[index - 1] && n <= left << 1) |
| 430 | examine((n - left) << 1, g.root + 1, (n - left) << 1, | 474 | examine((n - left) << 1, (n - left) << 1, g.root + 1, |
| 431 | 1 << g.root, 0); | 475 | 1 << g.root, 0); |
| 432 | } | 476 | } |
| 433 | 477 | ||
| 434 | // done | 478 | // done |
| 435 | printf("done: maximum of %d table entries\n", g.large); | 479 | printf("maximum of %d table entries for root = %d\n", g.large, g.root); |
| 480 | fputs(g.out.str, stdout); | ||
| 436 | } | 481 | } |
| 437 | 482 | ||
| 438 | // Examine and show the total number of possible prefix codes for a given | 483 | // Examine and show the total number of possible prefix codes for a given |
| @@ -451,19 +496,14 @@ local void enough(int syms) { | |||
| 451 | // For the deflate literal/length code, use "enough". For the deflate distance | 496 | // For the deflate literal/length code, use "enough". For the deflate distance |
| 452 | // code, use "enough 30 6". | 497 | // code, use "enough 30 6". |
| 453 | int main(int argc, char **argv) { | 498 | int main(int argc, char **argv) { |
| 454 | int syms; // total number of symbols to code | ||
| 455 | int n; // number of symbols to code for this run | ||
| 456 | big_t got; // return value of count() | ||
| 457 | big_t sum; // accumulated number of codes over n | ||
| 458 | code_t word; // for counting bits in code_t | ||
| 459 | |||
| 460 | // set up globals for cleanup() | 499 | // set up globals for cleanup() |
| 461 | g.code = NULL; | 500 | g.code = NULL; |
| 462 | g.num = NULL; | 501 | g.num = NULL; |
| 463 | g.done = NULL; | 502 | g.done = NULL; |
| 503 | string_init(&g.out); | ||
| 464 | 504 | ||
| 465 | // get arguments -- default to the deflate literal/length code | 505 | // get arguments -- default to the deflate literal/length code |
| 466 | syms = 286; | 506 | int syms = 286; |
| 467 | g.root = 9; | 507 | g.root = 9; |
| 468 | g.max = 15; | 508 | g.max = 15; |
| 469 | if (argc > 1) { | 509 | if (argc > 1) { |
| @@ -485,11 +525,12 @@ int main(int argc, char **argv) { | |||
| 485 | g.max = syms - 1; | 525 | g.max = syms - 1; |
| 486 | 526 | ||
| 487 | // determine the number of bits in a code_t | 527 | // determine the number of bits in a code_t |
| 488 | for (n = 0, word = 1; word; n++, word <<= 1) | 528 | int bits = 0; |
| 489 | ; | 529 | for (code_t word = 1; word; word <<= 1) |
| 530 | bits++; | ||
| 490 | 531 | ||
| 491 | // make sure that the calculation of most will not overflow | 532 | // make sure that the calculation of most will not overflow |
| 492 | if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) { | 533 | if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { |
| 493 | fputs("abort: code length too long for internal types\n", stderr); | 534 | fputs("abort: code length too long for internal types\n", stderr); |
| 494 | return 1; | 535 | return 1; |
| 495 | } | 536 | } |
| @@ -503,10 +544,7 @@ int main(int argc, char **argv) { | |||
| 503 | 544 | ||
| 504 | // allocate code vector | 545 | // allocate code vector |
| 505 | g.code = calloc(g.max + 1, sizeof(int)); | 546 | g.code = calloc(g.max + 1, sizeof(int)); |
| 506 | if (g.code == NULL) { | 547 | assert(g.code != NULL && "out of memory"); |
| 507 | fputs("abort: unable to allocate enough memory\n", stderr); | ||
| 508 | return 1; | ||
| 509 | } | ||
| 510 | 548 | ||
| 511 | // determine size of saved results array, checking for overflows, | 549 | // determine size of saved results array, checking for overflows, |
| 512 | // allocate and clear the array (set all to zero with calloc()) | 550 | // allocate and clear the array (set all to zero with calloc()) |
| @@ -514,27 +552,22 @@ int main(int argc, char **argv) { | |||
| 514 | g.num = NULL; // won't be saving any results | 552 | g.num = NULL; // won't be saving any results |
| 515 | else { | 553 | else { |
| 516 | g.size = syms >> 1; | 554 | g.size = syms >> 1; |
| 517 | if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || | 555 | int n = (syms - 1) >> 1; |
| 518 | (g.size *= n, g.size > ((size_t)0 - 1) / (n = g.max - 1)) || | 556 | assert(g.size <= (size_t)-1 / n && "overflow"); |
| 519 | (g.size *= n, g.size > ((size_t)0 - 1) / sizeof(big_t)) || | 557 | g.size *= n; |
| 520 | (g.num = calloc(g.size, sizeof(big_t))) == NULL) { | 558 | n = g.max - 1; |
| 521 | fputs("abort: unable to allocate enough memory\n", stderr); | 559 | assert(g.size <= (size_t)-1 / n && "overflow"); |
| 522 | cleanup(); | 560 | g.size *= n; |
| 523 | return 1; | 561 | g.num = calloc(g.size, sizeof(big_t)); |
| 524 | } | 562 | assert(g.num != NULL && "out of memory"); |
| 525 | } | 563 | } |
| 526 | 564 | ||
| 527 | // count possible codes for all numbers of symbols, add up counts | 565 | // count possible codes for all numbers of symbols, add up counts |
| 528 | sum = 0; | 566 | big_t sum = 0; |
| 529 | for (n = 2; n <= syms; n++) { | 567 | for (int n = 2; n <= syms; n++) { |
| 530 | got = count(n, 1, 2); | 568 | big_t got = count(n, 2, 1); |
| 531 | sum += got; | 569 | sum += got; |
| 532 | if (got == (big_t)0 - 1 || sum < got) { // overflow | 570 | assert(got != (big_t)-1 && sum >= got && "overflow"); |
| 533 | fputs("abort: can't count that high!\n", stderr); | ||
| 534 | cleanup(); | ||
| 535 | return 1; | ||
| 536 | } | ||
| 537 | printf("%"PRIbig" %d-codes\n", got, n); | ||
| 538 | } | 571 | } |
| 539 | printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); | 572 | printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); |
| 540 | if (g.max < syms - 1) | 573 | if (g.max < syms - 1) |
| @@ -542,14 +575,12 @@ int main(int argc, char **argv) { | |||
| 542 | else | 575 | else |
| 543 | puts(" (no length limit)"); | 576 | puts(" (no length limit)"); |
| 544 | 577 | ||
| 545 | // allocate and clear done array for beenhere() | 578 | // allocate and clear done array for been_here() |
| 546 | if (syms == 2) | 579 | if (syms == 2) |
| 547 | g.done = NULL; | 580 | g.done = NULL; |
| 548 | else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) || | 581 | else { |
| 549 | (g.done = calloc(g.size, sizeof(struct tab))) == NULL) { | 582 | g.done = calloc(g.size, sizeof(struct tab)); |
| 550 | fputs("abort: unable to allocate enough memory\n", stderr); | 583 | assert(g.done != NULL && "out of memory"); |
| 551 | cleanup(); | ||
| 552 | return 1; | ||
| 553 | } | 584 | } |
| 554 | 585 | ||
| 555 | // find and show maximum inflate table usage | 586 | // find and show maximum inflate table usage |
| @@ -558,7 +589,7 @@ int main(int argc, char **argv) { | |||
| 558 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) | 589 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) |
| 559 | enough(syms); | 590 | enough(syms); |
| 560 | else | 591 | else |
| 561 | puts("cannot handle minimum code lengths > root"); | 592 | fputs("cannot handle minimum code lengths > root", stderr); |
| 562 | 593 | ||
| 563 | // done | 594 | // done |
| 564 | cleanup(); | 595 | cleanup(); |
