diff options
| author | Mark Adler <madler@alumni.caltech.edu> | 2023-09-18 21:17:00 -0700 |
|---|---|---|
| committer | Mark Adler <madler@alumni.caltech.edu> | 2023-09-21 00:14:56 -0700 |
| commit | ac8f12c97d1afd9bafa9c710f827d40a407d3266 (patch) | |
| tree | cc8841032d996c2f5c41d75c7edf65558d2d90ce | |
| parent | bd9c329c1055a9265812352655ed2eec93f36e92 (diff) | |
| download | zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.tar.gz zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.tar.bz2 zlib-ac8f12c97d1afd9bafa9c710f827d40a407d3266.zip | |
Add LIT_MEM define to use more memory for a small deflate speedup.
A bug fix in zlib 1.2.12 resulted in a slight slowdown (1-2%) of
deflate. This commit provides the option to #define LIT_MEM, which
uses more memory to reverse most of that slowdown. The memory for
the pending buffer and symbol buffers is increased by 25%, which
increases the total memory usage with the default parameters by
about 6%.
| -rw-r--r-- | deflate.c | 21 | ||||
| -rw-r--r-- | deflate.h | 31 | ||||
| -rw-r--r-- | trees.c | 18 |
3 files changed, 67 insertions, 3 deletions
| @@ -493,7 +493,11 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, | |||
| 493 | * symbols from which it is being constructed. | 493 | * symbols from which it is being constructed. |
| 494 | */ | 494 | */ |
| 495 | 495 | ||
| 496 | #ifdef LIT_MEM | ||
| 497 | s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 5); | ||
| 498 | #else | ||
| 496 | s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); | 499 | s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); |
| 500 | #endif | ||
| 497 | s->pending_buf_size = (ulg)s->lit_bufsize * 4; | 501 | s->pending_buf_size = (ulg)s->lit_bufsize * 4; |
| 498 | 502 | ||
| 499 | if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || | 503 | if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || |
| @@ -503,8 +507,14 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, | |||
| 503 | deflateEnd (strm); | 507 | deflateEnd (strm); |
| 504 | return Z_MEM_ERROR; | 508 | return Z_MEM_ERROR; |
| 505 | } | 509 | } |
| 510 | #ifdef LIT_MEM | ||
| 511 | s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1)); | ||
| 512 | s->l_buf = s->pending_buf + (s->lit_bufsize << 2); | ||
| 513 | s->sym_end = s->lit_bufsize - 1; | ||
| 514 | #else | ||
| 506 | s->sym_buf = s->pending_buf + s->lit_bufsize; | 515 | s->sym_buf = s->pending_buf + s->lit_bufsize; |
| 507 | s->sym_end = (s->lit_bufsize - 1) * 3; | 516 | s->sym_end = (s->lit_bufsize - 1) * 3; |
| 517 | #endif | ||
| 508 | /* We avoid equality with lit_bufsize*3 because of wraparound at 64K | 518 | /* We avoid equality with lit_bufsize*3 because of wraparound at 64K |
| 509 | * on 16 bit machines and because stored blocks are restricted to | 519 | * on 16 bit machines and because stored blocks are restricted to |
| 510 | * 64K-1 bytes. | 520 | * 64K-1 bytes. |
| @@ -720,9 +730,15 @@ int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) { | |||
| 720 | 730 | ||
| 721 | if (deflateStateCheck(strm)) return Z_STREAM_ERROR; | 731 | if (deflateStateCheck(strm)) return Z_STREAM_ERROR; |
| 722 | s = strm->state; | 732 | s = strm->state; |
| 733 | #ifdef LIT_MEM | ||
| 734 | if (bits < 0 || bits > 16 || | ||
| 735 | (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3)) | ||
| 736 | return Z_BUF_ERROR; | ||
| 737 | #else | ||
| 723 | if (bits < 0 || bits > 16 || | 738 | if (bits < 0 || bits > 16 || |
| 724 | s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) | 739 | s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) |
| 725 | return Z_BUF_ERROR; | 740 | return Z_BUF_ERROR; |
| 741 | #endif | ||
| 726 | do { | 742 | do { |
| 727 | put = Buf_size - s->bi_valid; | 743 | put = Buf_size - s->bi_valid; |
| 728 | if (put > bits) | 744 | if (put > bits) |
| @@ -1308,7 +1324,12 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { | |||
| 1308 | zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); | 1324 | zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); |
| 1309 | 1325 | ||
| 1310 | ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); | 1326 | ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); |
| 1327 | #ifdef LIT_MEM | ||
| 1328 | ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1)); | ||
| 1329 | ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); | ||
| 1330 | #else | ||
| 1311 | ds->sym_buf = ds->pending_buf + ds->lit_bufsize; | 1331 | ds->sym_buf = ds->pending_buf + ds->lit_bufsize; |
| 1332 | #endif | ||
| 1312 | 1333 | ||
| 1313 | ds->l_desc.dyn_tree = ds->dyn_ltree; | 1334 | ds->l_desc.dyn_tree = ds->dyn_ltree; |
| 1314 | ds->d_desc.dyn_tree = ds->dyn_dtree; | 1335 | ds->d_desc.dyn_tree = ds->dyn_dtree; |
| @@ -23,6 +23,10 @@ | |||
| 23 | # define GZIP | 23 | # define GZIP |
| 24 | #endif | 24 | #endif |
| 25 | 25 | ||
| 26 | /* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at | ||
| 27 | the cost of a larger memory footprint */ | ||
| 28 | /* #define LIT_MEM */ | ||
| 29 | |||
| 26 | /* =========================================================================== | 30 | /* =========================================================================== |
| 27 | * Internal compression state. | 31 | * Internal compression state. |
| 28 | */ | 32 | */ |
| @@ -217,7 +221,12 @@ typedef struct internal_state { | |||
| 217 | /* Depth of each subtree used as tie breaker for trees of equal frequency | 221 | /* Depth of each subtree used as tie breaker for trees of equal frequency |
| 218 | */ | 222 | */ |
| 219 | 223 | ||
| 224 | #ifdef LIT_MEM | ||
| 225 | ushf *d_buf; /* buffer for distances */ | ||
| 226 | uchf *l_buf; /* buffer for literals/lengths */ | ||
| 227 | #else | ||
| 220 | uchf *sym_buf; /* buffer for distances and literals/lengths */ | 228 | uchf *sym_buf; /* buffer for distances and literals/lengths */ |
| 229 | #endif | ||
| 221 | 230 | ||
| 222 | uInt lit_bufsize; | 231 | uInt lit_bufsize; |
| 223 | /* Size of match buffer for literals/lengths. There are 4 reasons for | 232 | /* Size of match buffer for literals/lengths. There are 4 reasons for |
| @@ -239,7 +248,7 @@ typedef struct internal_state { | |||
| 239 | * - I can't count above 4 | 248 | * - I can't count above 4 |
| 240 | */ | 249 | */ |
| 241 | 250 | ||
| 242 | uInt sym_next; /* running index in sym_buf */ | 251 | uInt sym_next; /* running index in symbol buffer */ |
| 243 | uInt sym_end; /* symbol table full when sym_next reaches this */ | 252 | uInt sym_end; /* symbol table full when sym_next reaches this */ |
| 244 | 253 | ||
| 245 | ulg opt_len; /* bit length of current block with optimal trees */ | 254 | ulg opt_len; /* bit length of current block with optimal trees */ |
| @@ -318,6 +327,25 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, | |||
| 318 | extern const uch ZLIB_INTERNAL _dist_code[]; | 327 | extern const uch ZLIB_INTERNAL _dist_code[]; |
| 319 | #endif | 328 | #endif |
| 320 | 329 | ||
| 330 | #ifdef LIT_MEM | ||
| 331 | # define _tr_tally_lit(s, c, flush) \ | ||
| 332 | { uch cc = (c); \ | ||
| 333 | s->d_buf[s->sym_next] = 0; \ | ||
| 334 | s->l_buf[s->sym_next++] = cc; \ | ||
| 335 | s->dyn_ltree[cc].Freq++; \ | ||
| 336 | flush = (s->sym_next == s->sym_end); \ | ||
| 337 | } | ||
| 338 | # define _tr_tally_dist(s, distance, length, flush) \ | ||
| 339 | { uch len = (uch)(length); \ | ||
| 340 | ush dist = (ush)(distance); \ | ||
| 341 | s->d_buf[s->sym_next] = dist; \ | ||
| 342 | s->l_buf[s->sym_next++] = len; \ | ||
| 343 | dist--; \ | ||
| 344 | s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ | ||
| 345 | s->dyn_dtree[d_code(dist)].Freq++; \ | ||
| 346 | flush = (s->sym_next == s->sym_end); \ | ||
| 347 | } | ||
| 348 | #else | ||
| 321 | # define _tr_tally_lit(s, c, flush) \ | 349 | # define _tr_tally_lit(s, c, flush) \ |
| 322 | { uch cc = (c); \ | 350 | { uch cc = (c); \ |
| 323 | s->sym_buf[s->sym_next++] = 0; \ | 351 | s->sym_buf[s->sym_next++] = 0; \ |
| @@ -337,6 +365,7 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, | |||
| 337 | s->dyn_dtree[d_code(dist)].Freq++; \ | 365 | s->dyn_dtree[d_code(dist)].Freq++; \ |
| 338 | flush = (s->sym_next == s->sym_end); \ | 366 | flush = (s->sym_next == s->sym_end); \ |
| 339 | } | 367 | } |
| 368 | #endif | ||
| 340 | #else | 369 | #else |
| 341 | # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) | 370 | # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) |
| 342 | # define _tr_tally_dist(s, distance, length, flush) \ | 371 | # define _tr_tally_dist(s, distance, length, flush) \ |
| @@ -899,14 +899,19 @@ local void compress_block(deflate_state *s, const ct_data *ltree, | |||
| 899 | const ct_data *dtree) { | 899 | const ct_data *dtree) { |
| 900 | unsigned dist; /* distance of matched string */ | 900 | unsigned dist; /* distance of matched string */ |
| 901 | int lc; /* match length or unmatched char (if dist == 0) */ | 901 | int lc; /* match length or unmatched char (if dist == 0) */ |
| 902 | unsigned sx = 0; /* running index in sym_buf */ | 902 | unsigned sx = 0; /* running index in symbol buffers */ |
| 903 | unsigned code; /* the code to send */ | 903 | unsigned code; /* the code to send */ |
| 904 | int extra; /* number of extra bits to send */ | 904 | int extra; /* number of extra bits to send */ |
| 905 | 905 | ||
| 906 | if (s->sym_next != 0) do { | 906 | if (s->sym_next != 0) do { |
| 907 | #ifdef LIT_MEM | ||
| 908 | dist = s->d_buf[sx]; | ||
| 909 | lc = s->l_buf[sx++]; | ||
| 910 | #else | ||
| 907 | dist = s->sym_buf[sx++] & 0xff; | 911 | dist = s->sym_buf[sx++] & 0xff; |
| 908 | dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; | 912 | dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; |
| 909 | lc = s->sym_buf[sx++]; | 913 | lc = s->sym_buf[sx++]; |
| 914 | #endif | ||
| 910 | if (dist == 0) { | 915 | if (dist == 0) { |
| 911 | send_code(s, lc, ltree); /* send a literal byte */ | 916 | send_code(s, lc, ltree); /* send a literal byte */ |
| 912 | Tracecv(isgraph(lc), (stderr," '%c' ", lc)); | 917 | Tracecv(isgraph(lc), (stderr," '%c' ", lc)); |
| @@ -931,8 +936,12 @@ local void compress_block(deflate_state *s, const ct_data *ltree, | |||
| 931 | } | 936 | } |
| 932 | } /* literal or match pair ? */ | 937 | } /* literal or match pair ? */ |
| 933 | 938 | ||
| 934 | /* Check that the overlay between pending_buf and sym_buf is ok: */ | 939 | /* Check for no overlay of pending_buf on needed symbols */ |
| 940 | #ifdef LIT_MEM | ||
| 941 | Assert(s->pending < (s->lit_bufsize << 1) + sx, "pendingBuf overflow"); | ||
| 942 | #else | ||
| 935 | Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); | 943 | Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); |
| 944 | #endif | ||
| 936 | 945 | ||
| 937 | } while (sx < s->sym_next); | 946 | } while (sx < s->sym_next); |
| 938 | 947 | ||
| @@ -1082,9 +1091,14 @@ void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, | |||
| 1082 | * the current block must be flushed. | 1091 | * the current block must be flushed. |
| 1083 | */ | 1092 | */ |
| 1084 | int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { | 1093 | int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { |
| 1094 | #ifdef LIT_MEM | ||
| 1095 | s->d_buf[s->sym_next] = (ush)dist; | ||
| 1096 | s->l_buf[s->sym_next++] = (uch)lc; | ||
| 1097 | #else | ||
| 1085 | s->sym_buf[s->sym_next++] = (uch)dist; | 1098 | s->sym_buf[s->sym_next++] = (uch)dist; |
| 1086 | s->sym_buf[s->sym_next++] = (uch)(dist >> 8); | 1099 | s->sym_buf[s->sym_next++] = (uch)(dist >> 8); |
| 1087 | s->sym_buf[s->sym_next++] = (uch)lc; | 1100 | s->sym_buf[s->sym_next++] = (uch)lc; |
| 1101 | #endif | ||
| 1088 | if (dist == 0) { | 1102 | if (dist == 0) { |
| 1089 | /* lc is the unmatched char */ | 1103 | /* lc is the unmatched char */ |
| 1090 | s->dyn_ltree[lc].Freq++; | 1104 | s->dyn_ltree[lc].Freq++; |
