diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2023-09-18 21:17:00 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2023-09-21 00:14:56 -0700 |
commit | ac8f12c97d1afd9bafa9c710f827d40a407d3266 (patch) | |
tree | cc8841032d996c2f5c41d75c7edf65558d2d90ce | |
parent | bd9c329c1055a9265812352655ed2eec93f36e92 (diff) | |
download | zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.tar.gz zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.tar.bz2 zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.zip |
Add LIT_MEM define to use more memory for a small deflate speedup.
A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of
deflate. This commit provides the option to #define LIT_MEM, which
uses more memory to reverse most of that slowdown. The memory for
the pending buffer and symbol buffers is increased by 25%, which
increases the total memory usage with the default parameters by
about 6%.
-rw-r--r-- | deflate.c | 21 | ||||
-rw-r--r-- | deflate.h | 31 | ||||
-rw-r--r-- | trees.c | 18 |
3 files changed, 67 insertions, 3 deletions
@@ -493,7 +493,11 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, | |||
493 | * symbols from which it is being constructed. | 493 | * symbols from which it is being constructed. |
494 | */ | 494 | */ |
495 | 495 | ||
496 | #ifdef LIT_MEM | ||
497 | s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 5); | ||
498 | #else | ||
496 | s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); | 499 | s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); |
500 | #endif | ||
497 | s->pending_buf_size = (ulg)s->lit_bufsize * 4; | 501 | s->pending_buf_size = (ulg)s->lit_bufsize * 4; |
498 | 502 | ||
499 | if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || | 503 | if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || |
@@ -503,8 +507,14 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, | |||
503 | deflateEnd (strm); | 507 | deflateEnd (strm); |
504 | return Z_MEM_ERROR; | 508 | return Z_MEM_ERROR; |
505 | } | 509 | } |
510 | #ifdef LIT_MEM | ||
511 | s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1)); | ||
512 | s->l_buf = s->pending_buf + (s->lit_bufsize << 2); | ||
513 | s->sym_end = s->lit_bufsize - 1; | ||
514 | #else | ||
506 | s->sym_buf = s->pending_buf + s->lit_bufsize; | 515 | s->sym_buf = s->pending_buf + s->lit_bufsize; |
507 | s->sym_end = (s->lit_bufsize - 1) * 3; | 516 | s->sym_end = (s->lit_bufsize - 1) * 3; |
517 | #endif | ||
508 | /* We avoid equality with lit_bufsize*3 because of wraparound at 64K | 518 | /* We avoid equality with lit_bufsize*3 because of wraparound at 64K |
509 | * on 16 bit machines and because stored blocks are restricted to | 519 | * on 16 bit machines and because stored blocks are restricted to |
510 | * 64K-1 bytes. | 520 | * 64K-1 bytes. |
@@ -720,9 +730,15 @@ int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) { | |||
720 | 730 | ||
721 | if (deflateStateCheck(strm)) return Z_STREAM_ERROR; | 731 | if (deflateStateCheck(strm)) return Z_STREAM_ERROR; |
722 | s = strm->state; | 732 | s = strm->state; |
733 | #ifdef LIT_MEM | ||
734 | if (bits < 0 || bits > 16 || | ||
735 | (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3)) | ||
736 | return Z_BUF_ERROR; | ||
737 | #else | ||
723 | if (bits < 0 || bits > 16 || | 738 | if (bits < 0 || bits > 16 || |
724 | s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) | 739 | s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) |
725 | return Z_BUF_ERROR; | 740 | return Z_BUF_ERROR; |
741 | #endif | ||
726 | do { | 742 | do { |
727 | put = Buf_size - s->bi_valid; | 743 | put = Buf_size - s->bi_valid; |
728 | if (put > bits) | 744 | if (put > bits) |
@@ -1308,7 +1324,12 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { | |||
1308 | zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); | 1324 | zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); |
1309 | 1325 | ||
1310 | ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); | 1326 | ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); |
1327 | #ifdef LIT_MEM | ||
1328 | ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1)); | ||
1329 | ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); | ||
1330 | #else | ||
1311 | ds->sym_buf = ds->pending_buf + ds->lit_bufsize; | 1331 | ds->sym_buf = ds->pending_buf + ds->lit_bufsize; |
1332 | #endif | ||
1312 | 1333 | ||
1313 | ds->l_desc.dyn_tree = ds->dyn_ltree; | 1334 | ds->l_desc.dyn_tree = ds->dyn_ltree; |
1314 | ds->d_desc.dyn_tree = ds->dyn_dtree; | 1335 | ds->d_desc.dyn_tree = ds->dyn_dtree; |
@@ -23,6 +23,10 @@ | |||
23 | # define GZIP | 23 | # define GZIP |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | /* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at | ||
27 | the cost of a larger memory footprint */ | ||
28 | /* #define LIT_MEM */ | ||
29 | |||
26 | /* =========================================================================== | 30 | /* =========================================================================== |
27 | * Internal compression state. | 31 | * Internal compression state. |
28 | */ | 32 | */ |
@@ -217,7 +221,12 @@ typedef struct internal_state { | |||
217 | /* Depth of each subtree used as tie breaker for trees of equal frequency | 221 | /* Depth of each subtree used as tie breaker for trees of equal frequency |
218 | */ | 222 | */ |
219 | 223 | ||
224 | #ifdef LIT_MEM | ||
225 | ushf *d_buf; /* buffer for distances */ | ||
226 | uchf *l_buf; /* buffer for literals/lengths */ | ||
227 | #else | ||
220 | uchf *sym_buf; /* buffer for distances and literals/lengths */ | 228 | uchf *sym_buf; /* buffer for distances and literals/lengths */ |
229 | #endif | ||
221 | 230 | ||
222 | uInt lit_bufsize; | 231 | uInt lit_bufsize; |
223 | /* Size of match buffer for literals/lengths. There are 4 reasons for | 232 | /* Size of match buffer for literals/lengths. There are 4 reasons for |
@@ -239,7 +248,7 @@ typedef struct internal_state { | |||
239 | * - I can't count above 4 | 248 | * - I can't count above 4 |
240 | */ | 249 | */ |
241 | 250 | ||
242 | uInt sym_next; /* running index in sym_buf */ | 251 | uInt sym_next; /* running index in symbol buffer */ |
243 | uInt sym_end; /* symbol table full when sym_next reaches this */ | 252 | uInt sym_end; /* symbol table full when sym_next reaches this */ |
244 | 253 | ||
245 | ulg opt_len; /* bit length of current block with optimal trees */ | 254 | ulg opt_len; /* bit length of current block with optimal trees */ |
@@ -318,6 +327,25 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, | |||
318 | extern const uch ZLIB_INTERNAL _dist_code[]; | 327 | extern const uch ZLIB_INTERNAL _dist_code[]; |
319 | #endif | 328 | #endif |
320 | 329 | ||
330 | #ifdef LIT_MEM | ||
331 | # define _tr_tally_lit(s, c, flush) \ | ||
332 | { uch cc = (c); \ | ||
333 | s->d_buf[s->sym_next] = 0; \ | ||
334 | s->l_buf[s->sym_next++] = cc; \ | ||
335 | s->dyn_ltree[cc].Freq++; \ | ||
336 | flush = (s->sym_next == s->sym_end); \ | ||
337 | } | ||
338 | # define _tr_tally_dist(s, distance, length, flush) \ | ||
339 | { uch len = (uch)(length); \ | ||
340 | ush dist = (ush)(distance); \ | ||
341 | s->d_buf[s->sym_next] = dist; \ | ||
342 | s->l_buf[s->sym_next++] = len; \ | ||
343 | dist--; \ | ||
344 | s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ | ||
345 | s->dyn_dtree[d_code(dist)].Freq++; \ | ||
346 | flush = (s->sym_next == s->sym_end); \ | ||
347 | } | ||
348 | #else | ||
321 | # define _tr_tally_lit(s, c, flush) \ | 349 | # define _tr_tally_lit(s, c, flush) \ |
322 | { uch cc = (c); \ | 350 | { uch cc = (c); \ |
323 | s->sym_buf[s->sym_next++] = 0; \ | 351 | s->sym_buf[s->sym_next++] = 0; \ |
@@ -337,6 +365,7 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, | |||
337 | s->dyn_dtree[d_code(dist)].Freq++; \ | 365 | s->dyn_dtree[d_code(dist)].Freq++; \ |
338 | flush = (s->sym_next == s->sym_end); \ | 366 | flush = (s->sym_next == s->sym_end); \ |
339 | } | 367 | } |
368 | #endif | ||
340 | #else | 369 | #else |
341 | # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) | 370 | # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) |
342 | # define _tr_tally_dist(s, distance, length, flush) \ | 371 | # define _tr_tally_dist(s, distance, length, flush) \ |
@@ -899,14 +899,19 @@ local void compress_block(deflate_state *s, const ct_data *ltree, | |||
899 | const ct_data *dtree) { | 899 | const ct_data *dtree) { |
900 | unsigned dist; /* distance of matched string */ | 900 | unsigned dist; /* distance of matched string */ |
901 | int lc; /* match length or unmatched char (if dist == 0) */ | 901 | int lc; /* match length or unmatched char (if dist == 0) */ |
902 | unsigned sx = 0; /* running index in sym_buf */ | 902 | unsigned sx = 0; /* running index in symbol buffers */ |
903 | unsigned code; /* the code to send */ | 903 | unsigned code; /* the code to send */ |
904 | int extra; /* number of extra bits to send */ | 904 | int extra; /* number of extra bits to send */ |
905 | 905 | ||
906 | if (s->sym_next != 0) do { | 906 | if (s->sym_next != 0) do { |
907 | #ifdef LIT_MEM | ||
908 | dist = s->d_buf[sx]; | ||
909 | lc = s->l_buf[sx++]; | ||
910 | #else | ||
907 | dist = s->sym_buf[sx++] & 0xff; | 911 | dist = s->sym_buf[sx++] & 0xff; |
908 | dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; | 912 | dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; |
909 | lc = s->sym_buf[sx++]; | 913 | lc = s->sym_buf[sx++]; |
914 | #endif | ||
910 | if (dist == 0) { | 915 | if (dist == 0) { |
911 | send_code(s, lc, ltree); /* send a literal byte */ | 916 | send_code(s, lc, ltree); /* send a literal byte */ |
912 | Tracecv(isgraph(lc), (stderr," '%c' ", lc)); | 917 | Tracecv(isgraph(lc), (stderr," '%c' ", lc)); |
@@ -931,8 +936,12 @@ local void compress_block(deflate_state *s, const ct_data *ltree, | |||
931 | } | 936 | } |
932 | } /* literal or match pair ? */ | 937 | } /* literal or match pair ? */ |
933 | 938 | ||
934 | /* Check that the overlay between pending_buf and sym_buf is ok: */ | 939 | /* Check for no overlay of pending_buf on needed symbols */ |
940 | #ifdef LIT_MEM | ||
941 | Assert(s->pending < (s->lit_bufsize << 1) + sx, "pendingBuf overflow"); | ||
942 | #else | ||
935 | Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); | 943 | Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); |
944 | #endif | ||
936 | 945 | ||
937 | } while (sx < s->sym_next); | 946 | } while (sx < s->sym_next); |
938 | 947 | ||
@@ -1082,9 +1091,14 @@ void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, | |||
1082 | * the current block must be flushed. | 1091 | * the current block must be flushed. |
1083 | */ | 1092 | */ |
1084 | int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { | 1093 | int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { |
1094 | #ifdef LIT_MEM | ||
1095 | s->d_buf[s->sym_next] = (ush)dist; | ||
1096 | s->l_buf[s->sym_next++] = (uch)lc; | ||
1097 | #else | ||
1085 | s->sym_buf[s->sym_next++] = (uch)dist; | 1098 | s->sym_buf[s->sym_next++] = (uch)dist; |
1086 | s->sym_buf[s->sym_next++] = (uch)(dist >> 8); | 1099 | s->sym_buf[s->sym_next++] = (uch)(dist >> 8); |
1087 | s->sym_buf[s->sym_next++] = (uch)lc; | 1100 | s->sym_buf[s->sym_next++] = (uch)lc; |
1101 | #endif | ||
1088 | if (dist == 0) { | 1102 | if (dist == 0) { |
1089 | /* lc is the unmatched char */ | 1103 | /* lc is the unmatched char */ |
1090 | s->dyn_ltree[lc].Freq++; | 1104 | s->dyn_ltree[lc].Freq++; |