summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2018-08-04 14:27:02 -0700
committerMark Adler <madler@alumni.caltech.edu>2018-08-05 23:08:25 -0700
commitd00c147f531e757bfbd5cfab58d941caa8333ba8 (patch)
tree513995a58c5a5c92986c337d06f5ce91f73a8448
parent5b1381006b715a93f226117f82b7b6dd81bfff5e (diff)
downloadzlib-d00c147f531e757bfbd5cfab58d941caa8333ba8.tar.gz
zlib-d00c147f531e757bfbd5cfab58d941caa8333ba8.tar.bz2
zlib-d00c147f531e757bfbd5cfab58d941caa8333ba8.zip
Clarify that prefix codes are counted in enough.c.
There is no assurance that all prefix codes are reachable as optimal Huffman codes for the numbers of symbols encountered in a deflate block. This code considers all possible prefix codes, which might be a larger set than all possible Huffman codes, depending on the constraints.
-rw-r--r--examples/enough.c30
1 files changed, 15 insertions, 15 deletions
diff --git a/examples/enough.c b/examples/enough.c
index 3f8c0d2..5cfbfbf 100644
--- a/examples/enough.c
+++ b/examples/enough.c
@@ -1,5 +1,5 @@
1/* enough.c -- determine the maximum size of inflate's Huffman code tables over 1/* enough.c -- determine the maximum size of inflate's Huffman code tables over
2 * all possible valid and complete Huffman codes, subject to a length limit. 2 * all possible valid and complete prefix codes, subject to a length limit.
3 * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler 3 * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler
4 * Version 1.5 1 August 2018 Mark Adler 4 * Version 1.5 1 August 2018 Mark Adler
5 */ 5 */
@@ -17,14 +17,14 @@
17 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) 17 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!)
18 Clean up comparisons of different types 18 Clean up comparisons of different types
19 Clean up code indentation 19 Clean up code indentation
20 1.5 1 Aug 2018 Clean up code style and formatting 20 1.5 1 Aug 2018 Clean up code style, formatting, and comments
21 Use inline function instead of macro for index 21 Use inline function instead of macro for index
22 */ 22 */
23 23
24/* 24/*
25 Examine all possible Huffman codes for a given number of symbols and a 25 Examine all possible prefix codes for a given number of symbols and a
26 maximum code length in bits to determine the maximum table size for zlib's 26 maximum code length in bits to determine the maximum table size for zlib's
27 inflate. Only complete Huffman codes are counted. 27 inflate. Only complete prefix codes are counted.
28 28
29 Two codes are considered distinct if the vectors of the number of codes per 29 Two codes are considered distinct if the vectors of the number of codes per
30 length are not identical. So permutations of the symbol assignments result 30 length are not identical. So permutations of the symbol assignments result
@@ -71,7 +71,7 @@
71 the maximum code length in bits. However this is a very small price to pay 71 the maximum code length in bits. However this is a very small price to pay
72 for the vast speedup. 72 for the vast speedup.
73 73
74 First, all of the possible Huffman codes are counted, and reachable 74 First, all of the possible prefix codes are counted, and reachable
75 intermediate states are noted by a non-zero count in a saved-results array. 75 intermediate states are noted by a non-zero count in a saved-results array.
76 Second, the intermediate states that lead to (root + 1) bit or longer codes 76 Second, the intermediate states that lead to (root + 1) bit or longer codes
77 are used to look at all sub-codes from those junctures for their inflate 77 are used to look at all sub-codes from those junctures for their inflate
@@ -83,7 +83,7 @@
83 identifying the reachable nodes, accounts for about six of the orders of 83 identifying the reachable nodes, accounts for about six of the orders of
84 magnitude of improvement for the default arguments. About another four 84 magnitude of improvement for the default arguments. About another four
85 orders of magnitude come from not revisiting previous states. Out of 85 orders of magnitude come from not revisiting previous states. Out of
86 approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes 86 approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes
87 need to be examined to cover all of the possible table memory usage cases 87 need to be examined to cover all of the possible table memory usage cases
88 for the default arguments of 286 symbols limited to 15-bit codes. 88 for the default arguments of 286 symbols limited to 15-bit codes.
89 89
@@ -113,7 +113,7 @@ typedef unsigned long long big_t; // type for code counting
113#define PRIbig "llu" // printf format for big_t 113#define PRIbig "llu" // printf format for big_t
114typedef unsigned long long code_t; // type for bit pattern counting 114typedef unsigned long long code_t; // type for bit pattern counting
115struct tab { // type for been here check 115struct tab { // type for been here check
116 size_t len; // length of bit vector in char's 116 size_t len; // length of bit vector in octets
117 char *vec; // allocated bit vector 117 char *vec; // allocated bit vector
118}; 118};
119 119
@@ -146,7 +146,7 @@ struct tab { // type for been here check
146 For the deflate example of 286 symbols limited to 15-bit codes, the array 146 For the deflate example of 286 symbols limited to 15-bit codes, the array
147 has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half 147 has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half
148 of the space allocated for saved results is actually used -- not all 148 of the space allocated for saved results is actually used -- not all
149 possible triplets are reached in the generation of valid Huffman codes. 149 possible triplets are reached in the generation of valid prefix codes.
150 */ 150 */
151 151
152/* The array for tracking visited states, done[], is itself indexed identically 152/* The array for tracking visited states, done[], is itself indexed identically
@@ -201,11 +201,11 @@ local void cleanup(void) {
201 free(g.code); 201 free(g.code);
202} 202}
203 203
204// Return the number of possible Huffman codes using bit patterns of lengths 204// Return the number of possible prefix codes using bit patterns of lengths len
205// len through max inclusive, coding syms symbols, with left bit patterns of 205// through max inclusive, coding syms symbols, with left bit patterns of length
206// length len unused -- return -1 if there is an overflow in the counting. Keep 206// len unused -- return -1 if there is an overflow in the counting. Keep a
207// a record of previous results in num to prevent repeating the same 207// record of previous results in num to prevent repeating the same calculation.
208// calculation. Uses the globals max and num. 208// Uses the globals max and num.
209local big_t count(int syms, int len, int left) { 209local big_t count(int syms, int len, int left) {
210 big_t sum; // number of possible codes from this juncture 210 big_t sum; // number of possible codes from this juncture
211 big_t got; // value returned from count() 211 big_t got; // value returned from count()
@@ -435,7 +435,7 @@ local void enough(int syms) {
435 printf("done: maximum of %d table entries\n", g.large); 435 printf("done: maximum of %d table entries\n", g.large);
436} 436}
437 437
438// Examine and show the total number of possible Huffman codes for a given 438// Examine and show the total number of possible prefix codes for a given
439// maximum number of symbols, initial root table size, and maximum code length 439// maximum number of symbols, initial root table size, and maximum code length
440// in bits -- those are the command arguments in that order. The default values 440// in bits -- those are the command arguments in that order. The default values
441// are 286, 9, and 15 respectively, for the deflate literal/length code. The 441// are 286, 9, and 15 respectively, for the deflate literal/length code. The
@@ -445,7 +445,7 @@ local void enough(int syms) {
445// all possible codes. Each new maximum number of table entries and the 445// all possible codes. Each new maximum number of table entries and the
446// associated sub-code (starting at root + 1 == 10 bits) is shown. 446// associated sub-code (starting at root + 1 == 10 bits) is shown.
447// 447//
448// To count and examine Huffman codes that are not length-limited, provide a 448// To count and examine prefix codes that are not length-limited, provide a
449// maximum length equal to the number of symbols minus one. 449// maximum length equal to the number of symbols minus one.
450// 450//
451// For the deflate literal/length code, use "enough". For the deflate distance 451// For the deflate literal/length code, use "enough". For the deflate distance