summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2023-09-18 21:17:00 -0700
committerMark Adler <madler@alumni.caltech.edu>2023-09-21 00:14:56 -0700
commitac8f12c97d1afd9bafa9c710f827d40a407d3266 (patch)
treecc8841032d996c2f5c41d75c7edf65558d2d90ce
parentbd9c329c1055a9265812352655ed2eec93f36e92 (diff)
downloadzlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.tar.gz
zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.tar.bz2
zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.zip
Add LIT_MEM define to use more memory for a small deflate speedup.
A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of deflate. This commit provides the option to #define LIT_MEM, which uses more memory to reverse most of that slowdown. The memory for the pending buffer and symbol buffers is increased by 25%, which increases the total memory usage with the default parameters by about 6%.
-rw-r--r--deflate.c21
-rw-r--r--deflate.h31
-rw-r--r--trees.c18
3 files changed, 67 insertions, 3 deletions
diff --git a/deflate.c b/deflate.c
index 5e72cd9..263bbc8 100644
--- a/deflate.c
+++ b/deflate.c
@@ -493,7 +493,11 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
493 * symbols from which it is being constructed. 493 * symbols from which it is being constructed.
494 */ 494 */
495 495
496#ifdef LIT_MEM
497 s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 5);
498#else
496 s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); 499 s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
500#endif
497 s->pending_buf_size = (ulg)s->lit_bufsize * 4; 501 s->pending_buf_size = (ulg)s->lit_bufsize * 4;
498 502
499 if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || 503 if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
@@ -503,8 +507,14 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
503 deflateEnd (strm); 507 deflateEnd (strm);
504 return Z_MEM_ERROR; 508 return Z_MEM_ERROR;
505 } 509 }
510#ifdef LIT_MEM
511 s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1));
512 s->l_buf = s->pending_buf + (s->lit_bufsize << 2);
513 s->sym_end = s->lit_bufsize - 1;
514#else
506 s->sym_buf = s->pending_buf + s->lit_bufsize; 515 s->sym_buf = s->pending_buf + s->lit_bufsize;
507 s->sym_end = (s->lit_bufsize - 1) * 3; 516 s->sym_end = (s->lit_bufsize - 1) * 3;
517#endif
508 /* We avoid equality with lit_bufsize*3 because of wraparound at 64K 518 /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
509 * on 16 bit machines and because stored blocks are restricted to 519 * on 16 bit machines and because stored blocks are restricted to
510 * 64K-1 bytes. 520 * 64K-1 bytes.
@@ -720,9 +730,15 @@ int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) {
720 730
721 if (deflateStateCheck(strm)) return Z_STREAM_ERROR; 731 if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
722 s = strm->state; 732 s = strm->state;
733#ifdef LIT_MEM
734 if (bits < 0 || bits > 16 ||
735 (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3))
736 return Z_BUF_ERROR;
737#else
723 if (bits < 0 || bits > 16 || 738 if (bits < 0 || bits > 16 ||
724 s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) 739 s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
725 return Z_BUF_ERROR; 740 return Z_BUF_ERROR;
741#endif
726 do { 742 do {
727 put = Buf_size - s->bi_valid; 743 put = Buf_size - s->bi_valid;
728 if (put > bits) 744 if (put > bits)
@@ -1308,7 +1324,12 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) {
1308 zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); 1324 zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
1309 1325
1310 ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); 1326 ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
1327#ifdef LIT_MEM
1328 ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1));
1329 ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2);
1330#else
1311 ds->sym_buf = ds->pending_buf + ds->lit_bufsize; 1331 ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
1332#endif
1312 1333
1313 ds->l_desc.dyn_tree = ds->dyn_ltree; 1334 ds->l_desc.dyn_tree = ds->dyn_ltree;
1314 ds->d_desc.dyn_tree = ds->dyn_dtree; 1335 ds->d_desc.dyn_tree = ds->dyn_dtree;
diff --git a/deflate.h b/deflate.h
index 8696791..69fe3a9 100644
--- a/deflate.h
+++ b/deflate.h
@@ -23,6 +23,10 @@
23# define GZIP 23# define GZIP
24#endif 24#endif
25 25
26/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
27 the cost of a larger memory footprint */
28/* #define LIT_MEM */
29
26/* =========================================================================== 30/* ===========================================================================
27 * Internal compression state. 31 * Internal compression state.
28 */ 32 */
@@ -217,7 +221,12 @@ typedef struct internal_state {
217 /* Depth of each subtree used as tie breaker for trees of equal frequency 221 /* Depth of each subtree used as tie breaker for trees of equal frequency
218 */ 222 */
219 223
224#ifdef LIT_MEM
225 ushf *d_buf; /* buffer for distances */
226 uchf *l_buf; /* buffer for literals/lengths */
227#else
220 uchf *sym_buf; /* buffer for distances and literals/lengths */ 228 uchf *sym_buf; /* buffer for distances and literals/lengths */
229#endif
221 230
222 uInt lit_bufsize; 231 uInt lit_bufsize;
223 /* Size of match buffer for literals/lengths. There are 4 reasons for 232 /* Size of match buffer for literals/lengths. There are 4 reasons for
@@ -239,7 +248,7 @@ typedef struct internal_state {
239 * - I can't count above 4 248 * - I can't count above 4
240 */ 249 */
241 250
242 uInt sym_next; /* running index in sym_buf */ 251 uInt sym_next; /* running index in symbol buffer */
243 uInt sym_end; /* symbol table full when sym_next reaches this */ 252 uInt sym_end; /* symbol table full when sym_next reaches this */
244 253
245 ulg opt_len; /* bit length of current block with optimal trees */ 254 ulg opt_len; /* bit length of current block with optimal trees */
@@ -318,6 +327,25 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
318 extern const uch ZLIB_INTERNAL _dist_code[]; 327 extern const uch ZLIB_INTERNAL _dist_code[];
319#endif 328#endif
320 329
330#ifdef LIT_MEM
331# define _tr_tally_lit(s, c, flush) \
332 { uch cc = (c); \
333 s->d_buf[s->sym_next] = 0; \
334 s->l_buf[s->sym_next++] = cc; \
335 s->dyn_ltree[cc].Freq++; \
336 flush = (s->sym_next == s->sym_end); \
337 }
338# define _tr_tally_dist(s, distance, length, flush) \
339 { uch len = (uch)(length); \
340 ush dist = (ush)(distance); \
341 s->d_buf[s->sym_next] = dist; \
342 s->l_buf[s->sym_next++] = len; \
343 dist--; \
344 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
345 s->dyn_dtree[d_code(dist)].Freq++; \
346 flush = (s->sym_next == s->sym_end); \
347 }
348#else
321# define _tr_tally_lit(s, c, flush) \ 349# define _tr_tally_lit(s, c, flush) \
322 { uch cc = (c); \ 350 { uch cc = (c); \
323 s->sym_buf[s->sym_next++] = 0; \ 351 s->sym_buf[s->sym_next++] = 0; \
@@ -337,6 +365,7 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
337 s->dyn_dtree[d_code(dist)].Freq++; \ 365 s->dyn_dtree[d_code(dist)].Freq++; \
338 flush = (s->sym_next == s->sym_end); \ 366 flush = (s->sym_next == s->sym_end); \
339 } 367 }
368#endif
340#else 369#else
341# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 370# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
342# define _tr_tally_dist(s, distance, length, flush) \ 371# define _tr_tally_dist(s, distance, length, flush) \
diff --git a/trees.c b/trees.c
index 8dbdc40..5ca23e9 100644
--- a/trees.c
+++ b/trees.c
@@ -899,14 +899,19 @@ local void compress_block(deflate_state *s, const ct_data *ltree,
899 const ct_data *dtree) { 899 const ct_data *dtree) {
900 unsigned dist; /* distance of matched string */ 900 unsigned dist; /* distance of matched string */
901 int lc; /* match length or unmatched char (if dist == 0) */ 901 int lc; /* match length or unmatched char (if dist == 0) */
902 unsigned sx = 0; /* running index in sym_buf */ 902 unsigned sx = 0; /* running index in symbol buffers */
903 unsigned code; /* the code to send */ 903 unsigned code; /* the code to send */
904 int extra; /* number of extra bits to send */ 904 int extra; /* number of extra bits to send */
905 905
906 if (s->sym_next != 0) do { 906 if (s->sym_next != 0) do {
907#ifdef LIT_MEM
908 dist = s->d_buf[sx];
909 lc = s->l_buf[sx++];
910#else
907 dist = s->sym_buf[sx++] & 0xff; 911 dist = s->sym_buf[sx++] & 0xff;
908 dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; 912 dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
909 lc = s->sym_buf[sx++]; 913 lc = s->sym_buf[sx++];
914#endif
910 if (dist == 0) { 915 if (dist == 0) {
911 send_code(s, lc, ltree); /* send a literal byte */ 916 send_code(s, lc, ltree); /* send a literal byte */
912 Tracecv(isgraph(lc), (stderr," '%c' ", lc)); 917 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
@@ -931,8 +936,12 @@ local void compress_block(deflate_state *s, const ct_data *ltree,
931 } 936 }
932 } /* literal or match pair ? */ 937 } /* literal or match pair ? */
933 938
934 /* Check that the overlay between pending_buf and sym_buf is ok: */ 939 /* Check for no overlay of pending_buf on needed symbols */
940#ifdef LIT_MEM
941 Assert(s->pending < (s->lit_bufsize << 1) + sx, "pendingBuf overflow");
942#else
935 Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); 943 Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
944#endif
936 945
937 } while (sx < s->sym_next); 946 } while (sx < s->sym_next);
938 947
@@ -1082,9 +1091,14 @@ void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
1082 * the current block must be flushed. 1091 * the current block must be flushed.
1083 */ 1092 */
1084int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { 1093int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
1094#ifdef LIT_MEM
1095 s->d_buf[s->sym_next] = (ush)dist;
1096 s->l_buf[s->sym_next++] = (uch)lc;
1097#else
1085 s->sym_buf[s->sym_next++] = (uch)dist; 1098 s->sym_buf[s->sym_next++] = (uch)dist;
1086 s->sym_buf[s->sym_next++] = (uch)(dist >> 8); 1099 s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
1087 s->sym_buf[s->sym_next++] = (uch)lc; 1100 s->sym_buf[s->sym_next++] = (uch)lc;
1101#endif
1088 if (dist == 0) { 1102 if (dist == 0) {
1089 /* lc is the unmatched char */ 1103 /* lc is the unmatched char */
1090 s->dyn_ltree[lc].Freq++; 1104 s->dyn_ltree[lc].Freq++;