aboutsummaryrefslogtreecommitdiff
path: root/deflate.c
diff options
context:
space:
mode:
authorMark Adler <fork@madler.net>2022-10-03 20:05:58 -0700
committerMark Adler <fork@madler.net>2022-10-05 15:16:42 -0700
commit4078713e3bdd4a310fde906a1d6b74a6bfe77e5b (patch)
treeb6d301043bcdb38243a9c74b928ae194eae008e4 /deflate.c
parent7fabcb53576aca08b8e25174eb7b0df7c585e4e0 (diff)
downloadzlib-4078713e3bdd4a310fde906a1d6b74a6bfe77e5b.tar.gz
zlib-4078713e3bdd4a310fde906a1d6b74a6bfe77e5b.tar.bz2
zlib-4078713e3bdd4a310fde906a1d6b74a6bfe77e5b.zip
Tighten deflateBound bounds.
This improves the non-default expansion from 14% down to 4% in most cases, and 13% in the remainder.
Diffstat (limited to 'deflate.c')
-rw-r--r--deflate.c59
1 files changed, 37 insertions, 22 deletions
diff --git a/deflate.c b/deflate.c
index 7f421e4..2d638ca 100644
--- a/deflate.c
+++ b/deflate.c
@@ -674,36 +674,50 @@ int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
674} 674}
675 675
676/* ========================================================================= 676/* =========================================================================
677 * For the default windowBits of 15 and memLevel of 8, this function returns 677 * For the default windowBits of 15 and memLevel of 8, this function returns a
678 * a close to exact, as well as small, upper bound on the compressed size. 678 * close to exact, as well as small, upper bound on the compressed size. This
679 * They are coded as constants here for a reason--if the #define's are 679 * is an expansion of ~0.03%, plus a small constant.
680 * changed, then this function needs to be changed as well. The return
681 * value for 15 and 8 only works for those exact settings.
682 * 680 *
683 * For any setting other than those defaults for windowBits and memLevel, 681 * For any setting other than those defaults for windowBits and memLevel, one
684 * the value returned is a conservative worst case for the maximum expansion 682 * of two worst case bounds is returned. This is at most an expansion of ~4% or
685 * resulting from using fixed blocks instead of stored blocks, which deflate 683 * ~13%, plus a small constant.
686 * can emit on compressed data for some combinations of the parameters.
687 * 684 *
688 * This function could be more sophisticated to provide closer upper bounds for 685 * Both the 0.03% and 4% derive from the overhead of stored blocks. The first
689 * every combination of windowBits and memLevel. But even the conservative 686 * one is for stored blocks of 16383 bytes (memLevel == 8), whereas the second
690 * upper bound of about 14% expansion does not seem onerous for output buffer 687 * is for stored blocks of 127 bytes (the worst case memLevel == 1). The
691 * allocation. 688 * expansion results from five bytes of header for each stored block.
689 *
690 * The larger expansion of 13% results from a window size less than or equal to
691 * the symbols buffer size (windowBits <= memLevel + 7). In that case some of
692 * the data being compressed may have slid out of the sliding window, impeding
693 * a stored block from being emitted. Then the only choice is a fixed or
694 * dynamic block, where a fixed block limits the maximum expansion to 9 bits
695 * per 8-bit byte, plus 10 bits for every block. The smallest block size for
696 * which this can occur is 255 (memLevel == 2).
697 *
698 * Shifts are used to approximate divisions, for speed.
692 */ 699 */
693uLong ZEXPORT deflateBound(strm, sourceLen) 700uLong ZEXPORT deflateBound(strm, sourceLen)
694 z_streamp strm; 701 z_streamp strm;
695 uLong sourceLen; 702 uLong sourceLen;
696{ 703{
697 deflate_state *s; 704 deflate_state *s;
698 uLong complen, wraplen; 705 uLong fixedlen, storelen, wraplen;
706
707 /* upper bound for fixed blocks with 9-bit literals and length 255
708 (memLevel == 2, which is the lowest that may not use stored blocks) --
709 ~13% overhead plus a small constant */
710 fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) +
711 (sourceLen >> 9) + 4;
699 712
700 /* conservative upper bound for compressed data */ 713 /* upper bound for stored blocks with length 127 (memLevel == 1) --
701 complen = sourceLen + 714 ~4% overhead plus a small constant */
702 ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; 715 storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) +
716 (sourceLen >> 11) + 7;
703 717
704 /* if can't get parameters, return conservative bound plus zlib wrapper */ 718 /* if can't get parameters, return larger bound plus a zlib wrapper */
705 if (deflateStateCheck(strm)) 719 if (deflateStateCheck(strm))
706 return complen + 6; 720 return (fixedlen > storelen ? fixedlen : storelen) + 6;
707 721
708 /* compute wrapper length */ 722 /* compute wrapper length */
709 s = strm->state; 723 s = strm->state;
@@ -740,11 +754,12 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
740 wraplen = 6; 754 wraplen = 6;
741 } 755 }
742 756
743 /* if not default parameters, return conservative bound */ 757 /* if not default parameters, return one of the conservative bounds */
744 if (s->w_bits != 15 || s->hash_bits != 8 + 7) 758 if (s->w_bits != 15 || s->hash_bits != 8 + 7)
745 return complen + wraplen; 759 return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen;
746 760
747 /* default settings: return tight bound for that case */ 761 /* default settings: return tight bound for that case -- ~0.03% overhead
762 plus a small constant */
748 return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 763 return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
749 (sourceLen >> 25) + 13 - 6 + wraplen; 764 (sourceLen >> 25) + 13 - 6 + wraplen;
750} 765}