diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2018-08-01 01:49:45 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2018-08-01 19:51:56 -0700 |
commit | 8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e (patch) | |
tree | 4c7be828258ab03d4585ba46714c2b858563f7e4 /examples | |
parent | 4c14b51587b3bf484f7ca26a92dcfc0f8559201b (diff) | |
download | zlib-8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e.tar.gz zlib-8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e.tar.bz2 zlib-8ba2cdb6bdbee95703e1c6632fd8519a5e4a206e.zip |
Clean up code style in enough.c, update version.
Diffstat (limited to 'examples')
-rw-r--r-- | examples/enough.c | 413 |
1 files changed, 202 insertions, 211 deletions
diff --git a/examples/enough.c b/examples/enough.c index d6f8e25..56ad63e 100644 --- a/examples/enough.c +++ b/examples/enough.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over | 1 | /* enough.c -- determine the maximum size of inflate's Huffman code tables over |
2 | * all possible valid and complete Huffman codes, subject to a length limit. | 2 | * all possible valid and complete Huffman codes, subject to a length limit. |
3 | * Copyright (C) 2007, 2008, 2012 Mark Adler | 3 | * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler |
4 | * Version 1.4 18 August 2012 Mark Adler | 4 | * Version 1.5 1 August 2018 Mark Adler |
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* Version history: | 7 | /* Version history: |
@@ -17,86 +17,87 @@ | |||
17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) | 17 | 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) |
18 | Clean up comparisons of different types | 18 | Clean up comparisons of different types |
19 | Clean up code indentation | 19 | Clean up code indentation |
20 | 1.5 1 Aug 2018 Clean up code style and formatting | ||
20 | */ | 21 | */ |
21 | 22 | ||
22 | /* | 23 | /* |
23 | Examine all possible Huffman codes for a given number of symbols and a | 24 | Examine all possible Huffman codes for a given number of symbols and a |
24 | maximum code length in bits to determine the maximum table size for zilb's | 25 | maximum code length in bits to determine the maximum table size for zlib's |
25 | inflate. Only complete Huffman codes are counted. | 26 | inflate. Only complete Huffman codes are counted. |
26 | 27 | ||
27 | Two codes are considered distinct if the vectors of the number of codes per | 28 | Two codes are considered distinct if the vectors of the number of codes per |
28 | length are not identical. So permutations of the symbol assignments result | 29 | length are not identical. So permutations of the symbol assignments result |
29 | in the same code for the counting, as do permutations of the assignments of | 30 | in the same code for the counting, as do permutations of the assignments of |
30 | the bit values to the codes (i.e. only canonical codes are counted). | 31 | the bit values to the codes (i.e. only canonical codes are counted). |
31 | 32 | ||
32 | We build a code from shorter to longer lengths, determining how many symbols | 33 | We build a code from shorter to longer lengths, determining how many symbols |
33 | are coded at each length. At each step, we have how many symbols remain to | 34 | are coded at each length. At each step, we have how many symbols remain to |
34 | be coded, what the last code length used was, and how many bit patterns of | 35 | be coded, what the last code length used was, and how many bit patterns of |
35 | that length remain unused. Then we add one to the code length and double the | 36 | that length remain unused. Then we add one to the code length and double the |
36 | number of unused patterns to graduate to the next code length. We then | 37 | number of unused patterns to graduate to the next code length. We then |
37 | assign all portions of the remaining symbols to that code length that | 38 | assign all portions of the remaining symbols to that code length that |
38 | preserve the properties of a correct and eventually complete code. Those | 39 | preserve the properties of a correct and eventually complete code. Those |
39 | properties are: we cannot use more bit patterns than are available; and when | 40 | properties are: we cannot use more bit patterns than are available; and when |
40 | all the symbols are used, there are exactly zero possible bit patterns | 41 | all the symbols are used, there are exactly zero possible bit patterns |
41 | remaining. | 42 | remaining. |
42 | 43 | ||
43 | The inflate Huffman decoding algorithm uses two-level lookup tables for | 44 | The inflate Huffman decoding algorithm uses two-level lookup tables for |
44 | speed. There is a single first-level table to decode codes up to root bits | 45 | speed. There is a single first-level table to decode codes up to root bits |
45 | in length (root == 9 in the current inflate implementation). The table | 46 | in length (root == 9 in the current inflate implementation). The table has 1 |
46 | has 1 << root entries and is indexed by the next root bits of input. Codes | 47 | << root entries and is indexed by the next root bits of input. Codes shorter |
47 | shorter than root bits have replicated table entries, so that the correct | 48 | than root bits have replicated table entries, so that the correct entry is |
48 | entry is pointed to regardless of the bits that follow the short code. If | 49 | pointed to regardless of the bits that follow the short code. If the code is |
49 | the code is longer than root bits, then the table entry points to a second- | 50 | longer than root bits, then the table entry points to a second- level table. |
50 | level table. The size of that table is determined by the longest code with | 51 | The size of that table is determined by the longest code with that root-bit |
51 | that root-bit prefix. If that longest code has length len, then the table | 52 | prefix. If that longest code has length len, then the table has size 1 << |
52 | has size 1 << (len - root), to index the remaining bits in that set of | 53 | (len - root), to index the remaining bits in that set of codes. Each |
53 | codes. Each subsequent root-bit prefix then has its own sub-table. The | 54 | subsequent root-bit prefix then has its own sub-table. The total number of |
54 | total number of table entries required by the code is calculated | 55 | table entries required by the code is calculated incrementally as the number |
55 | incrementally as the number of codes at each bit length is populated. When | 56 | of codes at each bit length is populated. When all of the codes are shorter |
56 | all of the codes are shorter than root bits, then root is reduced to the | 57 | than root bits, then root is reduced to the longest code length, resulting |
57 | longest code length, resulting in a single, smaller, one-level table. | 58 | in a single, smaller, one-level table. |
58 | 59 | ||
59 | The inflate algorithm also provides for small values of root (relative to | 60 | The inflate algorithm also provides for small values of root (relative to |
60 | the log2 of the number of symbols), where the shortest code has more bits | 61 | the log2 of the number of symbols), where the shortest code has more bits |
61 | than root. In that case, root is increased to the length of the shortest | 62 | than root. In that case, root is increased to the length of the shortest |
62 | code. This program, by design, does not handle that case, so it is verified | 63 | code. This program, by design, does not handle that case, so it is verified |
63 | that the number of symbols is less than 2^(root + 1). | 64 | that the number of symbols is less than 2^(root + 1). |
64 | 65 | ||
65 | In order to speed up the examination (by about ten orders of magnitude for | 66 | In order to speed up the examination (by about ten orders of magnitude for |
66 | the default arguments), the intermediate states in the build-up of a code | 67 | the default arguments), the intermediate states in the build-up of a code |
67 | are remembered and previously visited branches are pruned. The memory | 68 | are remembered and previously visited branches are pruned. The memory |
68 | required for this will increase rapidly with the total number of symbols and | 69 | required for this will increase rapidly with the total number of symbols and |
69 | the maximum code length in bits. However this is a very small price to pay | 70 | the maximum code length in bits. However this is a very small price to pay |
70 | for the vast speedup. | 71 | for the vast speedup. |
71 | 72 | ||
72 | First, all of the possible Huffman codes are counted, and reachable | 73 | First, all of the possible Huffman codes are counted, and reachable |
73 | intermediate states are noted by a non-zero count in a saved-results array. | 74 | intermediate states are noted by a non-zero count in a saved-results array. |
74 | Second, the intermediate states that lead to (root + 1) bit or longer codes | 75 | Second, the intermediate states that lead to (root + 1) bit or longer codes |
75 | are used to look at all sub-codes from those junctures for their inflate | 76 | are used to look at all sub-codes from those junctures for their inflate |
76 | memory usage. (The amount of memory used is not affected by the number of | 77 | memory usage. (The amount of memory used is not affected by the number of |
77 | codes of root bits or less in length.) Third, the visited states in the | 78 | codes of root bits or less in length.) Third, the visited states in the |
78 | construction of those sub-codes and the associated calculation of the table | 79 | construction of those sub-codes and the associated calculation of the table |
79 | size is recalled in order to avoid recalculating from the same juncture. | 80 | size is recalled in order to avoid recalculating from the same juncture. |
80 | Beginning the code examination at (root + 1) bit codes, which is enabled by | 81 | Beginning the code examination at (root + 1) bit codes, which is enabled by |
81 | identifying the reachable nodes, accounts for about six of the orders of | 82 | identifying the reachable nodes, accounts for about six of the orders of |
82 | magnitude of improvement for the default arguments. About another four | 83 | magnitude of improvement for the default arguments. About another four |
83 | orders of magnitude come from not revisiting previous states. Out of | 84 | orders of magnitude come from not revisiting previous states. Out of |
84 | approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes | 85 | approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes |
85 | need to be examined to cover all of the possible table memory usage cases | 86 | need to be examined to cover all of the possible table memory usage cases |
86 | for the default arguments of 286 symbols limited to 15-bit codes. | 87 | for the default arguments of 286 symbols limited to 15-bit codes. |
87 | 88 | ||
88 | Note that an unsigned long long type is used for counting. It is quite easy | 89 | Note that an unsigned long long type is used for counting. It is quite easy |
89 | to exceed the capacity of an eight-byte integer with a large number of | 90 | to exceed the capacity of an eight-byte integer with a large number of |
90 | symbols and a large maximum code length, so multiple-precision arithmetic | 91 | symbols and a large maximum code length, so multiple-precision arithmetic |
91 | would need to replace the unsigned long long arithmetic in that case. This | 92 | would need to replace the unsigned long long arithmetic in that case. This |
92 | program will abort if an overflow occurs. The big_t type identifies where | 93 | program will abort if an overflow occurs. The big_t type identifies where |
93 | the counting takes place. | 94 | the counting takes place. |
94 | 95 | ||
95 | An unsigned long long type is also used for calculating the number of | 96 | An unsigned long long type is also used for calculating the number of |
96 | possible codes remaining at the maximum length. This limits the maximum | 97 | possible codes remaining at the maximum length. This limits the maximum code |
97 | code length to the number of bits in a long long minus the number of bits | 98 | length to the number of bits in a long long minus the number of bits needed |
98 | needed to represent the symbols in a flat code. The code_t type identifies | 99 | to represent the symbols in a flat code. The code_t type identifies where |
99 | where the bit pattern counting takes place. | 100 | the bit pattern counting takes place. |
100 | */ | 101 | */ |
101 | 102 | ||
102 | #include <stdio.h> | 103 | #include <stdio.h> |
@@ -106,13 +107,13 @@ | |||
106 | 107 | ||
107 | #define local static | 108 | #define local static |
108 | 109 | ||
109 | /* special data types */ | 110 | // Special data types. |
110 | typedef unsigned long long big_t; /* type for code counting */ | 111 | typedef unsigned long long big_t; // type for code counting |
111 | #define PRIbig "llu" /* printf format for big_t */ | 112 | #define PRIbig "llu" // printf format for big_t |
112 | typedef unsigned long long code_t; /* type for bit pattern counting */ | 113 | typedef unsigned long long code_t; // type for bit pattern counting |
113 | struct tab { /* type for been here check */ | 114 | struct tab { // type for been here check |
114 | size_t len; /* length of bit vector in char's */ | 115 | size_t len; // length of bit vector in char's |
115 | char *vec; /* allocated bit vector */ | 116 | char *vec; // allocated bit vector |
116 | }; | 117 | }; |
117 | 118 | ||
118 | /* The array for saving results, num[], is indexed with this triplet: | 119 | /* The array for saving results, num[], is indexed with this triplet: |
@@ -127,24 +128,23 @@ struct tab { /* type for been here check */ | |||
127 | left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) | 128 | left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) |
128 | len: 1..max - 1 (max == maximum code length in bits) | 129 | len: 1..max - 1 (max == maximum code length in bits) |
129 | 130 | ||
130 | syms == 2 is not saved since that immediately leads to a single code. left | 131 | syms == 2 is not saved since that immediately leads to a single code. left |
131 | must be even, since it represents the number of available bit patterns at | 132 | must be even, since it represents the number of available bit patterns at |
132 | the current length, which is double the number at the previous length. | 133 | the current length, which is double the number at the previous length. left |
133 | left ends at syms-1 since left == syms immediately results in a single code. | 134 | ends at syms-1 since left == syms immediately results in a single code. |
134 | (left > sym is not allowed since that would result in an incomplete code.) | 135 | (left > sym is not allowed since that would result in an incomplete code.) |
135 | len is less than max, since the code completes immediately when len == max. | 136 | len is less than max, since the code completes immediately when len == max. |
136 | 137 | ||
137 | The offset into the array is calculated for the three indices with the | 138 | The offset into the array is calculated for the three indices with the first |
138 | first one (syms) being outermost, and the last one (len) being innermost. | 139 | one (syms) being outermost, and the last one (len) being innermost. We build |
139 | We build the array with length max-1 lists for the len index, with syms-3 | 140 | the array with length max-1 lists for the len index, with syms-3 of those |
140 | of those for each symbol. There are totsym-2 of those, with each one | 141 | for each symbol. There are totsym-2 of those, with each one varying in |
141 | varying in length as a function of sym. See the calculation of index in | 142 | length as a function of sym. See the calculation of index in map() for the |
142 | count() for the index, and the calculation of size in main() for the size | 143 | index, and the calculation of size in main() for the size of the array. |
143 | of the array. | ||
144 | 144 | ||
145 | For the deflate example of 286 symbols limited to 15-bit codes, the array | 145 | For the deflate example of 286 symbols limited to 15-bit codes, the array |
146 | has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than | 146 | has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half |
147 | half of the space allocated for saved results is actually used -- not all | 147 | of the space allocated for saved results is actually used -- not all |
148 | possible triplets are reached in the generation of valid Huffman codes. | 148 | possible triplets are reached in the generation of valid Huffman codes. |
149 | */ | 149 | */ |
150 | 150 | ||
@@ -152,14 +152,14 @@ struct tab { /* type for been here check */ | |||
152 | to the num[] array as described above for the (syms, left, len) triplet. | 152 | to the num[] array as described above for the (syms, left, len) triplet. |
153 | Each element in the array is further indexed by the (mem, rem) doublet, | 153 | Each element in the array is further indexed by the (mem, rem) doublet, |
154 | where mem is the amount of inflate table space used so far, and rem is the | 154 | where mem is the amount of inflate table space used so far, and rem is the |
155 | remaining unused entries in the current inflate sub-table. Each indexed | 155 | remaining unused entries in the current inflate sub-table. Each indexed |
156 | element is simply one bit indicating whether the state has been visited or | 156 | element is simply one bit indicating whether the state has been visited or |
157 | not. Since the ranges for mem and rem are not known a priori, each bit | 157 | not. Since the ranges for mem and rem are not known a priori, each bit |
158 | vector is of a variable size, and grows as needed to accommodate the visited | 158 | vector is of a variable size, and grows as needed to accommodate the visited |
159 | states. mem and rem are used to calculate a single index in a triangular | 159 | states. mem and rem are used to calculate a single index in a triangular |
160 | array. Since the range of mem is expected in the default case to be about | 160 | array. Since the range of mem is expected in the default case to be about |
161 | ten times larger than the range of rem, the array is skewed to reduce the | 161 | ten times larger than the range of rem, the array is skewed to reduce the |
162 | memory usage, with eight times the range for mem than for rem. See the | 162 | memory usage, with eight times the range for mem than for rem. See the |
163 | calculations for offset and bit in beenhere() for the details. | 163 | calculations for offset and bit in beenhere() for the details. |
164 | 164 | ||
165 | For the deflate example of 286 symbols limited to 15-bit codes, the bit | 165 | For the deflate example of 286 symbols limited to 15-bit codes, the bit |
@@ -167,23 +167,22 @@ struct tab { /* type for been here check */ | |||
167 | array itself. | 167 | array itself. |
168 | */ | 168 | */ |
169 | 169 | ||
170 | /* Globals to avoid propagating constants or constant pointers recursively */ | 170 | // Globals to avoid propagating constants or constant pointers recursively. |
171 | struct { | 171 | struct { |
172 | int max; /* maximum allowed bit length for the codes */ | 172 | int max; // maximum allowed bit length for the codes |
173 | int root; /* size of base code table in bits */ | 173 | int root; // size of base code table in bits |
174 | int large; /* largest code table so far */ | 174 | int large; // largest code table so far |
175 | size_t size; /* number of elements in num and done */ | 175 | size_t size; // number of elements in num and done |
176 | int *code; /* number of symbols assigned to each bit length */ | 176 | int *code; // number of symbols assigned to each bit length |
177 | big_t *num; /* saved results array for code counting */ | 177 | big_t *num; // saved results array for code counting |
178 | struct tab *done; /* states already evaluated array */ | 178 | struct tab *done; // states already evaluated array |
179 | } g; | 179 | } g; |
180 | 180 | ||
181 | /* Index function for num[] and done[] */ | 181 | // Index function for num[] and done[]. |
182 | #define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(g.max-1)+k-1) | 182 | #define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(g.max-1)+k-1) |
183 | 183 | ||
184 | /* Free allocated space. Uses globals code, num, and done. */ | 184 | // Free allocated space. Uses globals code, num, and done. |
185 | local void cleanup(void) | 185 | local void cleanup(void) { |
186 | { | ||
187 | size_t n; | 186 | size_t n; |
188 | 187 | ||
189 | if (g.done != NULL) { | 188 | if (g.done != NULL) { |
@@ -198,91 +197,89 @@ local void cleanup(void) | |||
198 | free(g.code); | 197 | free(g.code); |
199 | } | 198 | } |
200 | 199 | ||
201 | /* Return the number of possible Huffman codes using bit patterns of lengths | 200 | // Return the number of possible Huffman codes using bit patterns of lengths |
202 | len through max inclusive, coding syms symbols, with left bit patterns of | 201 | // len through max inclusive, coding syms symbols, with left bit patterns of |
203 | length len unused -- return -1 if there is an overflow in the counting. | 202 | // length len unused -- return -1 if there is an overflow in the counting. Keep |
204 | Keep a record of previous results in num to prevent repeating the same | 203 | // a record of previous results in num to prevent repeating the same |
205 | calculation. Uses the globals max and num. */ | 204 | // calculation. Uses the globals max and num. |
206 | local big_t count(int syms, int len, int left) | 205 | local big_t count(int syms, int len, int left) { |
207 | { | 206 | big_t sum; // number of possible codes from this juncture |
208 | big_t sum; /* number of possible codes from this juncture */ | 207 | big_t got; // value returned from count() |
209 | big_t got; /* value returned from count() */ | 208 | int least; // least number of syms to use at this juncture |
210 | int least; /* least number of syms to use at this juncture */ | 209 | int most; // most number of syms to use at this juncture |
211 | int most; /* most number of syms to use at this juncture */ | 210 | int use; // number of bit patterns to use in next call |
212 | int use; /* number of bit patterns to use in next call */ | 211 | size_t index; // index of this case in *num |
213 | size_t index; /* index of this case in *num */ | 212 | |
214 | 213 | // see if only one possible code | |
215 | /* see if only one possible code */ | ||
216 | if (syms == left) | 214 | if (syms == left) |
217 | return 1; | 215 | return 1; |
218 | 216 | ||
219 | /* note and verify the expected state */ | 217 | // note and verify the expected state |
220 | assert(syms > left && left > 0 && len < g.max); | 218 | assert(syms > left && left > 0 && len < g.max); |
221 | 219 | ||
222 | /* see if we've done this one already */ | 220 | // see if we've done this one already |
223 | index = INDEX(syms, left, len); | 221 | index = INDEX(syms, left, len); |
224 | got = g.num[index]; | 222 | got = g.num[index]; |
225 | if (got) | 223 | if (got) |
226 | return got; /* we have -- return the saved result */ | 224 | return got; // we have -- return the saved result |
227 | 225 | ||
228 | /* we need to use at least this many bit patterns so that the code won't be | 226 | // we need to use at least this many bit patterns so that the code won't be |
229 | incomplete at the next length (more bit patterns than symbols) */ | 227 | // incomplete at the next length (more bit patterns than symbols) |
230 | least = (left << 1) - syms; | 228 | least = (left << 1) - syms; |
231 | if (least < 0) | 229 | if (least < 0) |
232 | least = 0; | 230 | least = 0; |
233 | 231 | ||
234 | /* we can use at most this many bit patterns, lest there not be enough | 232 | // we can use at most this many bit patterns, lest there not be enough |
235 | available for the remaining symbols at the maximum length (if there were | 233 | // available for the remaining symbols at the maximum length (if there were |
236 | no limit to the code length, this would become: most = left - 1) */ | 234 | // no limit to the code length, this would become: most = left - 1) |
237 | most = (((code_t)left << (g.max - len)) - syms) / | 235 | most = (((code_t)left << (g.max - len)) - syms) / |
238 | (((code_t)1 << (g.max - len)) - 1); | 236 | (((code_t)1 << (g.max - len)) - 1); |
239 | 237 | ||
240 | /* count all possible codes from this juncture and add them up */ | 238 | // count all possible codes from this juncture and add them up |
241 | sum = 0; | 239 | sum = 0; |
242 | for (use = least; use <= most; use++) { | 240 | for (use = least; use <= most; use++) { |
243 | got = count(syms - use, len + 1, (left - use) << 1); | 241 | got = count(syms - use, len + 1, (left - use) << 1); |
244 | sum += got; | 242 | sum += got; |
245 | if (got == (big_t)0 - 1 || sum < got) /* overflow */ | 243 | if (got == (big_t)0 - 1 || sum < got) // overflow |
246 | return (big_t)0 - 1; | 244 | return (big_t)0 - 1; |
247 | } | 245 | } |
248 | 246 | ||
249 | /* verify that all recursive calls are productive */ | 247 | // verify that all recursive calls are productive |
250 | assert(sum != 0); | 248 | assert(sum != 0); |
251 | 249 | ||
252 | /* save the result and return it */ | 250 | // save the result and return it |
253 | g.num[index] = sum; | 251 | g.num[index] = sum; |
254 | return sum; | 252 | return sum; |
255 | } | 253 | } |
256 | 254 | ||
257 | /* Return true if we've been here before, set to true if not. Set a bit in a | 255 | // Return true if we've been here before, set to true if not. Set a bit in a |
258 | bit vector to indicate visiting this state. Each (syms,len,left) state | 256 | // bit vector to indicate visiting this state. Each (syms,len,left) state has a |
259 | has a variable size bit vector indexed by (mem,rem). The bit vector is | 257 | // variable size bit vector indexed by (mem,rem). The bit vector is lengthened |
260 | lengthened if needed to allow setting the (mem,rem) bit. */ | 258 | // if needed to allow setting the (mem,rem) bit. |
261 | local int beenhere(int syms, int len, int left, int mem, int rem) | 259 | local int beenhere(int syms, int len, int left, int mem, int rem) { |
262 | { | 260 | size_t index; // index for this state's bit vector |
263 | size_t index; /* index for this state's bit vector */ | 261 | size_t offset; // offset in this state's bit vector |
264 | size_t offset; /* offset in this state's bit vector */ | 262 | int bit; // mask for this state's bit |
265 | int bit; /* mask for this state's bit */ | 263 | size_t length; // length of the bit vector in bytes |
266 | size_t length; /* length of the bit vector in bytes */ | 264 | char *vector; // new or enlarged bit vector |
267 | char *vector; /* new or enlarged bit vector */ | 265 | |
268 | 266 | // point to vector for (syms,left,len), bit in vector for (mem,rem) | |
269 | /* point to vector for (syms,left,len), bit in vector for (mem,rem) */ | ||
270 | index = INDEX(syms, left, len); | 267 | index = INDEX(syms, left, len); |
271 | mem -= 1 << g.root; | 268 | mem -= 1 << g.root; |
272 | offset = (mem >> 3) + rem; | 269 | offset = (mem >> 3) + rem; |
273 | offset = ((offset * (offset + 1)) >> 1) + rem; | 270 | offset = ((offset * (offset + 1)) >> 1) + rem; |
274 | bit = 1 << (mem & 7); | 271 | bit = 1 << (mem & 7); |
275 | 272 | ||
276 | /* see if we've been here */ | 273 | // see if we've been here |
277 | length = g.done[index].len; | 274 | length = g.done[index].len; |
278 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) | 275 | if (offset < length && (g.done[index].vec[offset] & bit) != 0) |
279 | return 1; /* done this! */ | 276 | return 1; // done this! |
280 | 277 | ||
281 | /* we haven't been here before -- set the bit to show we have now */ | 278 | // we haven't been here before -- set the bit to show we have now |
282 | 279 | ||
283 | /* see if we need to lengthen the vector in order to set the bit */ | 280 | // see if we need to lengthen the vector in order to set the bit |
284 | if (length <= offset) { | 281 | if (length <= offset) { |
285 | /* if we have one already, enlarge it, zero out the appended space */ | 282 | // if we have one already, enlarge it, zero out the appended space |
286 | if (length) { | 283 | if (length) { |
287 | do { | 284 | do { |
288 | length <<= 1; | 285 | length <<= 1; |
@@ -293,7 +290,7 @@ local int beenhere(int syms, int len, int left, int mem, int rem) | |||
293 | length - g.done[index].len); | 290 | length - g.done[index].len); |
294 | } | 291 | } |
295 | 292 | ||
296 | /* otherwise we need to make a new vector and zero it out */ | 293 | // otherwise we need to make a new vector and zero it out |
297 | else { | 294 | else { |
298 | length = 1 << (len - g.root); | 295 | length = 1 << (len - g.root); |
299 | while (length <= offset) | 296 | while (length <= offset) |
@@ -301,40 +298,39 @@ local int beenhere(int syms, int len, int left, int mem, int rem) | |||
301 | vector = calloc(length, sizeof(char)); | 298 | vector = calloc(length, sizeof(char)); |
302 | } | 299 | } |
303 | 300 | ||
304 | /* in either case, bail if we can't get the memory */ | 301 | // in either case, bail if we can't get the memory |
305 | if (vector == NULL) { | 302 | if (vector == NULL) { |
306 | fputs("abort: unable to allocate enough memory\n", stderr); | 303 | fputs("abort: unable to allocate enough memory\n", stderr); |
307 | cleanup(); | 304 | cleanup(); |
308 | exit(1); | 305 | exit(1); |
309 | } | 306 | } |
310 | 307 | ||
311 | /* install the new vector */ | 308 | // install the new vector |
312 | g.done[index].len = length; | 309 | g.done[index].len = length; |
313 | g.done[index].vec = vector; | 310 | g.done[index].vec = vector; |
314 | } | 311 | } |
315 | 312 | ||
316 | /* set the bit */ | 313 | // set the bit |
317 | g.done[index].vec[offset] |= bit; | 314 | g.done[index].vec[offset] |= bit; |
318 | return 0; | 315 | return 0; |
319 | } | 316 | } |
320 | 317 | ||
321 | /* Examine all possible codes from the given node (syms, len, left). Compute | 318 | // Examine all possible codes from the given node (syms, len, left). Compute |
322 | the amount of memory required to build inflate's decoding tables, where the | 319 | // the amount of memory required to build inflate's decoding tables, where the |
323 | number of code structures used so far is mem, and the number remaining in | 320 | // number of code structures used so far is mem, and the number remaining in |
324 | the current sub-table is rem. Uses the globals max, code, root, large, and | 321 | // the current sub-table is rem. Uses the globals max, code, root, large, and |
325 | done. */ | 322 | // done. |
326 | local void examine(int syms, int len, int left, int mem, int rem) | 323 | local void examine(int syms, int len, int left, int mem, int rem) { |
327 | { | 324 | int least; // least number of syms to use at this juncture |
328 | int least; /* least number of syms to use at this juncture */ | 325 | int most; // most number of syms to use at this juncture |
329 | int most; /* most number of syms to use at this juncture */ | 326 | int use; // number of bit patterns to use in next call |
330 | int use; /* number of bit patterns to use in next call */ | 327 | |
331 | 328 | // see if we have a complete code | |
332 | /* see if we have a complete code */ | ||
333 | if (syms == left) { | 329 | if (syms == left) { |
334 | /* set the last code entry */ | 330 | // set the last code entry |
335 | g.code[len] = left; | 331 | g.code[len] = left; |
336 | 332 | ||
337 | /* complete computation of memory used by this code */ | 333 | // complete computation of memory used by this code |
338 | while (rem < left) { | 334 | while (rem < left) { |
339 | left -= rem; | 335 | left -= rem; |
340 | rem = 1 << (len - g.root); | 336 | rem = 1 << (len - g.root); |
@@ -342,7 +338,7 @@ local void examine(int syms, int len, int left, int mem, int rem) | |||
342 | } | 338 | } |
343 | assert(rem == left); | 339 | assert(rem == left); |
344 | 340 | ||
345 | /* if this is a new maximum, show the entries used and the sub-code */ | 341 | // if this is a new maximum, show the entries used and the sub-code |
346 | if (mem > g.large) { | 342 | if (mem > g.large) { |
347 | g.large = mem; | 343 | g.large = mem; |
348 | printf("max %d: ", mem); | 344 | printf("max %d: ", mem); |
@@ -353,28 +349,28 @@ local void examine(int syms, int len, int left, int mem, int rem) | |||
353 | fflush(stdout); | 349 | fflush(stdout); |
354 | } | 350 | } |
355 | 351 | ||
356 | /* remove entries as we drop back down in the recursion */ | 352 | // remove entries as we drop back down in the recursion |
357 | g.code[len] = 0; | 353 | g.code[len] = 0; |
358 | return; | 354 | return; |
359 | } | 355 | } |
360 | 356 | ||
361 | /* prune the tree if we can */ | 357 | // prune the tree if we can |
362 | if (beenhere(syms, len, left, mem, rem)) | 358 | if (beenhere(syms, len, left, mem, rem)) |
363 | return; | 359 | return; |
364 | 360 | ||
365 | /* we need to use at least this many bit patterns so that the code won't be | 361 | // we need to use at least this many bit patterns so that the code won't be |
366 | incomplete at the next length (more bit patterns than symbols) */ | 362 | // incomplete at the next length (more bit patterns than symbols) |
367 | least = (left << 1) - syms; | 363 | least = (left << 1) - syms; |
368 | if (least < 0) | 364 | if (least < 0) |
369 | least = 0; | 365 | least = 0; |
370 | 366 | ||
371 | /* we can use at most this many bit patterns, lest there not be enough | 367 | // we can use at most this many bit patterns, lest there not be enough |
372 | available for the remaining symbols at the maximum length (if there were | 368 | // available for the remaining symbols at the maximum length (if there were |
373 | no limit to the code length, this would become: most = left - 1) */ | 369 | // no limit to the code length, this would become: most = left - 1) |
374 | most = (((code_t)left << (g.max - len)) - syms) / | 370 | most = (((code_t)left << (g.max - len)) - syms) / |
375 | (((code_t)1 << (g.max - len)) - 1); | 371 | (((code_t)1 << (g.max - len)) - 1); |
376 | 372 | ||
377 | /* occupy least table spaces, creating new sub-tables as needed */ | 373 | // occupy least table spaces, creating new sub-tables as needed |
378 | use = least; | 374 | use = least; |
379 | while (rem < use) { | 375 | while (rem < use) { |
380 | use -= rem; | 376 | use -= rem; |
@@ -383,7 +379,7 @@ local void examine(int syms, int len, int left, int mem, int rem) | |||
383 | } | 379 | } |
384 | rem -= use; | 380 | rem -= use; |
385 | 381 | ||
386 | /* examine codes from here, updating table space as we go */ | 382 | // examine codes from here, updating table space as we go |
387 | for (use = least; use <= most; use++) { | 383 | for (use = least; use <= most; use++) { |
388 | g.code[len] = use; | 384 | g.code[len] = use; |
389 | examine(syms - use, len + 1, (left - use) << 1, | 385 | examine(syms - use, len + 1, (left - use) << 1, |
@@ -395,79 +391,74 @@ local void examine(int syms, int len, int left, int mem, int rem) | |||
395 | rem--; | 391 | rem--; |
396 | } | 392 | } |
397 | 393 | ||
398 | /* remove entries as we drop back down in the recursion */ | 394 | // remove entries as we drop back down in the recursion |
399 | g.code[len] = 0; | 395 | g.code[len] = 0; |
400 | } | 396 | } |
401 | 397 | ||
402 | /* Look at all sub-codes starting with root + 1 bits. Look at only the valid | 398 | // Look at all sub-codes starting with root + 1 bits. Look at only the valid |
403 | intermediate code states (syms, left, len). For each completed code, | 399 | // intermediate code states (syms, left, len). For each completed code, |
404 | calculate the amount of memory required by inflate to build the decoding | 400 | // calculate the amount of memory required by inflate to build the decoding |
405 | tables. Find the maximum amount of memory required and show the code that | 401 | // tables. Find the maximum amount of memory required and show the code that |
406 | requires that maximum. Uses the globals max, root, and num. */ | 402 | // requires that maximum. Uses the globals max, root, and num. |
407 | local void enough(int syms) | 403 | local void enough(int syms) { |
408 | { | 404 | int n; // number of remaing symbols for this node |
409 | int n; /* number of remaing symbols for this node */ | 405 | int left; // number of unused bit patterns at this length |
410 | int left; /* number of unused bit patterns at this length */ | 406 | size_t index; // index of this case in *num |
411 | size_t index; /* index of this case in *num */ | 407 | |
412 | 408 | // clear code | |
413 | /* clear code */ | ||
414 | for (n = 0; n <= g.max; n++) | 409 | for (n = 0; n <= g.max; n++) |
415 | g.code[n] = 0; | 410 | g.code[n] = 0; |
416 | 411 | ||
417 | /* look at all (root + 1) bit and longer codes */ | 412 | // look at all (root + 1) bit and longer codes |
418 | g.large = 1 << g.root; /* base table */ | 413 | g.large = 1 << g.root; // base table |
419 | if (g.root < g.max) /* otherwise, there's only a base table */ | 414 | if (g.root < g.max) // otherwise, there's only a base table |
420 | for (n = 3; n <= syms; n++) | 415 | for (n = 3; n <= syms; n++) |
421 | for (left = 2; left < n; left += 2) | 416 | for (left = 2; left < n; left += 2) { |
422 | { | 417 | // look at all reachable (root + 1) bit nodes, and the |
423 | /* look at all reachable (root + 1) bit nodes, and the | 418 | // resulting codes (complete at root + 2 or more) |
424 | resulting codes (complete at root + 2 or more) */ | ||
425 | index = INDEX(n, left, g.root + 1); | 419 | index = INDEX(n, left, g.root + 1); |
426 | if (g.root + 1 < g.max && g.num[index]) /* reachable node */ | 420 | if (g.root + 1 < g.max && g.num[index]) // reachable node |
427 | examine(n, g.root + 1, left, 1 << g.root, 0); | 421 | examine(n, g.root + 1, left, 1 << g.root, 0); |
428 | 422 | ||
429 | /* also look at root bit codes with completions at root + 1 | 423 | // also look at root bit codes with completions at root + 1 |
430 | bits (not saved in num, since complete), just in case */ | 424 | // bits (not saved in num, since complete), just in case |
431 | if (g.num[index - 1] && n <= left << 1) | 425 | if (g.num[index - 1] && n <= left << 1) |
432 | examine((n - left) << 1, g.root + 1, (n - left) << 1, | 426 | examine((n - left) << 1, g.root + 1, (n - left) << 1, |
433 | 1 << g.root, 0); | 427 | 1 << g.root, 0); |
434 | } | 428 | } |
435 | 429 | ||
436 | /* done */ | 430 | // done |
437 | printf("done: maximum of %d table entries\n", g.large); | 431 | printf("done: maximum of %d table entries\n", g.large); |
438 | } | 432 | } |
439 | 433 | ||
440 | /* | 434 | // Examine and show the total number of possible Huffman codes for a given |
441 | Examine and show the total number of possible Huffman codes for a given | 435 | // maximum number of symbols, initial root table size, and maximum code length |
442 | maximum number of symbols, initial root table size, and maximum code length | 436 | // in bits -- those are the command arguments in that order. The default values |
443 | in bits -- those are the command arguments in that order. The default | 437 | // are 286, 9, and 15 respectively, for the deflate literal/length code. The |
444 | values are 286, 9, and 15 respectively, for the deflate literal/length code. | 438 | // possible codes are counted for each number of coded symbols from two to the |
445 | The possible codes are counted for each number of coded symbols from two to | 439 | // maximum. The counts for each of those and the total number of codes are |
446 | the maximum. The counts for each of those and the total number of codes are | 440 | // shown. The maximum number of inflate table entires is then calculated across |
447 | shown. The maximum number of inflate table entires is then calculated | 441 | // all possible codes. Each new maximum number of table entries and the |
448 | across all possible codes. Each new maximum number of table entries and the | 442 | // associated sub-code (starting at root + 1 == 10 bits) is shown. |
449 | associated sub-code (starting at root + 1 == 10 bits) is shown. | 443 | // |
450 | 444 | // To count and examine Huffman codes that are not length-limited, provide a | |
451 | To count and examine Huffman codes that are not length-limited, provide a | 445 | // maximum length equal to the number of symbols minus one. |
452 | maximum length equal to the number of symbols minus one. | 446 | // |
453 | 447 | // For the deflate literal/length code, use "enough". For the deflate distance | |
454 | For the deflate literal/length code, use "enough". For the deflate distance | 448 | // code, use "enough 30 6". |
455 | code, use "enough 30 6". | 449 | int main(int argc, char **argv) { |
456 | */ | 450 | int syms; // total number of symbols to code |
457 | int main(int argc, char **argv) | 451 | int n; // number of symbols to code for this run |
458 | { | 452 | big_t got; // return value of count() |
459 | int syms; /* total number of symbols to code */ | 453 | big_t sum; // accumulated number of codes over n |
460 | int n; /* number of symbols to code for this run */ | 454 | code_t word; // for counting bits in code_t |
461 | big_t got; /* return value of count() */ | 455 | |
462 | big_t sum; /* accumulated number of codes over n */ | 456 | // set up globals for cleanup() |
463 | code_t word; /* for counting bits in code_t */ | ||
464 | |||
465 | /* set up globals for cleanup() */ | ||
466 | g.code = NULL; | 457 | g.code = NULL; |
467 | g.num = NULL; | 458 | g.num = NULL; |
468 | g.done = NULL; | 459 | g.done = NULL; |
469 | 460 | ||
470 | /* get arguments -- default to the deflate literal/length code */ | 461 | // get arguments -- default to the deflate literal/length code |
471 | syms = 286; | 462 | syms = 286; |
472 | g.root = 9; | 463 | g.root = 9; |
473 | g.max = 15; | 464 | g.max = 15; |
@@ -485,38 +476,38 @@ int main(int argc, char **argv) | |||
485 | return 1; | 476 | return 1; |
486 | } | 477 | } |
487 | 478 | ||
488 | /* if not restricting the code length, the longest is syms - 1 */ | 479 | // if not restricting the code length, the longest is syms - 1 |
489 | if (g.max > syms - 1) | 480 | if (g.max > syms - 1) |
490 | g.max = syms - 1; | 481 | g.max = syms - 1; |
491 | 482 | ||
492 | /* determine the number of bits in a code_t */ | 483 | // determine the number of bits in a code_t |
493 | for (n = 0, word = 1; word; n++, word <<= 1) | 484 | for (n = 0, word = 1; word; n++, word <<= 1) |
494 | ; | 485 | ; |
495 | 486 | ||
496 | /* make sure that the calculation of most will not overflow */ | 487 | // make sure that the calculation of most will not overflow |
497 | if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) { | 488 | if (g.max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (g.max - 1))) { |
498 | fputs("abort: code length too long for internal types\n", stderr); | 489 | fputs("abort: code length too long for internal types\n", stderr); |
499 | return 1; | 490 | return 1; |
500 | } | 491 | } |
501 | 492 | ||
502 | /* reject impossible code requests */ | 493 | // reject impossible code requests |
503 | if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { | 494 | if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { |
504 | fprintf(stderr, "%d symbols cannot be coded in %d bits\n", | 495 | fprintf(stderr, "%d symbols cannot be coded in %d bits\n", |
505 | syms, g.max); | 496 | syms, g.max); |
506 | return 1; | 497 | return 1; |
507 | } | 498 | } |
508 | 499 | ||
509 | /* allocate code vector */ | 500 | // allocate code vector |
510 | g.code = calloc(g.max + 1, sizeof(int)); | 501 | g.code = calloc(g.max + 1, sizeof(int)); |
511 | if (g.code == NULL) { | 502 | if (g.code == NULL) { |
512 | fputs("abort: unable to allocate enough memory\n", stderr); | 503 | fputs("abort: unable to allocate enough memory\n", stderr); |
513 | return 1; | 504 | return 1; |
514 | } | 505 | } |
515 | 506 | ||
516 | /* determine size of saved results array, checking for overflows, | 507 | // determine size of saved results array, checking for overflows, |
517 | allocate and clear the array (set all to zero with calloc()) */ | 508 | // allocate and clear the array (set all to zero with calloc()) |
518 | if (syms == 2) /* iff max == 1 */ | 509 | if (syms == 2) // iff max == 1 |
519 | g.num = NULL; /* won't be saving any results */ | 510 | g.num = NULL; // won't be saving any results |
520 | else { | 511 | else { |
521 | g.size = syms >> 1; | 512 | g.size = syms >> 1; |
522 | if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || | 513 | if (g.size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || |
@@ -529,12 +520,12 @@ int main(int argc, char **argv) | |||
529 | } | 520 | } |
530 | } | 521 | } |
531 | 522 | ||
532 | /* count possible codes for all numbers of symbols, add up counts */ | 523 | // count possible codes for all numbers of symbols, add up counts |
533 | sum = 0; | 524 | sum = 0; |
534 | for (n = 2; n <= syms; n++) { | 525 | for (n = 2; n <= syms; n++) { |
535 | got = count(n, 1, 2); | 526 | got = count(n, 1, 2); |
536 | sum += got; | 527 | sum += got; |
537 | if (got == (big_t)0 - 1 || sum < got) { /* overflow */ | 528 | if (got == (big_t)0 - 1 || sum < got) { // overflow |
538 | fputs("abort: can't count that high!\n", stderr); | 529 | fputs("abort: can't count that high!\n", stderr); |
539 | cleanup(); | 530 | cleanup(); |
540 | return 1; | 531 | return 1; |
@@ -547,7 +538,7 @@ int main(int argc, char **argv) | |||
547 | else | 538 | else |
548 | puts(" (no length limit)"); | 539 | puts(" (no length limit)"); |
549 | 540 | ||
550 | /* allocate and clear done array for beenhere() */ | 541 | // allocate and clear done array for beenhere() |
551 | if (syms == 2) | 542 | if (syms == 2) |
552 | g.done = NULL; | 543 | g.done = NULL; |
553 | else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) || | 544 | else if (g.size > ((size_t)0 - 1) / sizeof(struct tab) || |
@@ -557,15 +548,15 @@ int main(int argc, char **argv) | |||
557 | return 1; | 548 | return 1; |
558 | } | 549 | } |
559 | 550 | ||
560 | /* find and show maximum inflate table usage */ | 551 | // find and show maximum inflate table usage |
561 | if (g.root > g.max) /* reduce root to max length */ | 552 | if (g.root > g.max) // reduce root to max length |
562 | g.root = g.max; | 553 | g.root = g.max; |
563 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) | 554 | if ((code_t)syms < ((code_t)1 << (g.root + 1))) |
564 | enough(syms); | 555 | enough(syms); |
565 | else | 556 | else |
566 | puts("cannot handle minimum code lengths > root"); | 557 | puts("cannot handle minimum code lengths > root"); |
567 | 558 | ||
568 | /* done */ | 559 | // done |
569 | cleanup(); | 560 | cleanup(); |
570 | return 0; | 561 | return 0; |
571 | } | 562 | } |