summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2018-04-17 22:09:22 -0700
committerMark Adler <madler@alumni.caltech.edu>2018-04-19 19:47:11 -0700
commit5c44459c3b28a9bd3283aaceab7c615f8020c531 (patch)
tree7a5d9e7a3fd8c0438f592de2f3ee89a91d2cd06a
parente99813dbfe9a09e33d42e8da9e550a0c4b7ff734 (diff)
downloadzlib-5c44459c3b28a9bd3283aaceab7c615f8020c531.tar.gz
zlib-5c44459c3b28a9bd3283aaceab7c615f8020c531.tar.bz2
zlib-5c44459c3b28a9bd3283aaceab7c615f8020c531.zip
Fix a bug that can crash deflate on some input when using Z_FIXED.
This bug was reported by Danilo Ramos of Eideticom, Inc. It has lain in wait 13 years before being found! The bug was introduced in zlib 1.2.2.2, with the addition of the Z_FIXED option. That option forces the use of fixed Huffman codes. For rare inputs with a large number of distant matches, the pending buffer into which the compressed data is written can overwrite the distance symbol table which it overlays. That results in corrupted output due to invalid distances, and can result in out-of-bound accesses, crashing the application. The fix here combines the distance buffer and literal/length buffers into a single symbol buffer. Now three bytes of pending buffer space are opened up for each literal or length/distance pair consumed, instead of the previous two bytes. This assures that the pending buffer cannot overwrite the symbol table, since the maximum fixed code compressed length/distance is 31 bits, and since there are four bytes of pending space for every three bytes of symbol space.
-rw-r--r--deflate.c74
-rw-r--r--deflate.h25
-rw-r--r--trees.c50
3 files changed, 79 insertions, 70 deletions
diff --git a/deflate.c b/deflate.c
index 425babc..19cba87 100644
--- a/deflate.c
+++ b/deflate.c
@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
255 int wrap = 1; 255 int wrap = 1;
256 static const char my_version[] = ZLIB_VERSION; 256 static const char my_version[] = ZLIB_VERSION;
257 257
258 ushf *overlay;
259 /* We overlay pending_buf and d_buf+l_buf. This works since the average
260 * output size for (length,distance) codes is <= 24 bits.
261 */
262
263 if (version == Z_NULL || version[0] != my_version[0] || 258 if (version == Z_NULL || version[0] != my_version[0] ||
264 stream_size != sizeof(z_stream)) { 259 stream_size != sizeof(z_stream)) {
265 return Z_VERSION_ERROR; 260 return Z_VERSION_ERROR;
@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
329 324
330 s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ 325 s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
331 326
332 overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); 327 /* We overlay pending_buf and sym_buf. This works since the average size
333 s->pending_buf = (uchf *) overlay; 328 * for length/distance pairs over any compressed block is assured to be 31
334 s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); 329 * bits or less.
330 *
331 * Analysis: The longest fixed codes are a length code of 8 bits plus 5
332 * extra bits, for lengths 131 to 257. The longest fixed distance codes are
333 * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
334 * possible fixed-codes length/distance pair is then 31 bits total.
335 *
336 * sym_buf starts one-fourth of the way into pending_buf. So there are
337 * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
338 * in sym_buf is three bytes -- two for the distance and one for the
339 * literal/length. As each symbol is consumed, the pointer to the next
340 * sym_buf value to read moves forward three bytes. From that symbol, up to
341 * 31 bits are written to pending_buf. The closest the written pending_buf
342 * bits gets to the next sym_buf symbol to read is just before the last
343 * code is written. At that time, 31*(n-2) bits have been written, just
344 * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
345 * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
346 * symbols are written.) The closest the writing gets to what is unread is
347 * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
348 * can range from 128 to 32768.
349 *
350 * Therefore, at a minimum, there are 142 bits of space between what is
351 * written and what is read in the overlain buffers, so the symbols cannot
352 * be overwritten by the compressed data. That space is actually 139 bits,
353 * due to the three-bit fixed-code block header.
354 *
355 * That covers the case where either Z_FIXED is specified, forcing fixed
356 * codes, or when the use of fixed codes is chosen, because that choice
357 * results in a smaller compressed block than dynamic codes. That latter
358 * condition then assures that the above analysis also covers all dynamic
359 * blocks. A dynamic-code block will only be chosen to be emitted if it has
360 * fewer bits than a fixed-code block would for the same set of symbols.
361 * Therefore its average symbol length is assured to be less than 31. So
362 * the compressed data for a dynamic block also cannot overwrite the
363 * symbols from which it is being constructed.
364 */
365
366 s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
367 s->pending_buf_size = (ulg)s->lit_bufsize * 4;
335 368
336 if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || 369 if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
337 s->pending_buf == Z_NULL) { 370 s->pending_buf == Z_NULL) {
@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
340 deflateEnd (strm); 373 deflateEnd (strm);
341 return Z_MEM_ERROR; 374 return Z_MEM_ERROR;
342 } 375 }
343 s->d_buf = overlay + s->lit_bufsize/sizeof(ush); 376 s->sym_buf = s->pending_buf + s->lit_bufsize;
344 s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; 377 s->sym_end = (s->lit_bufsize - 1) * 3;
378 /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
379 * on 16 bit machines and because stored blocks are restricted to
380 * 64K-1 bytes.
381 */
345 382
346 s->level = level; 383 s->level = level;
347 s->strategy = strategy; 384 s->strategy = strategy;
@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value)
552 589
553 if (deflateStateCheck(strm)) return Z_STREAM_ERROR; 590 if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
554 s = strm->state; 591 s = strm->state;
555 if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) 592 if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
556 return Z_BUF_ERROR; 593 return Z_BUF_ERROR;
557 do { 594 do {
558 put = Buf_size - s->bi_valid; 595 put = Buf_size - s->bi_valid;
@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source)
1113#else 1150#else
1114 deflate_state *ds; 1151 deflate_state *ds;
1115 deflate_state *ss; 1152 deflate_state *ss;
1116 ushf *overlay;
1117 1153
1118 1154
1119 if (deflateStateCheck(source) || dest == Z_NULL) { 1155 if (deflateStateCheck(source) || dest == Z_NULL) {
@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source)
1133 ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); 1169 ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
1134 ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); 1170 ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos));
1135 ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); 1171 ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos));
1136 overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); 1172 ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
1137 ds->pending_buf = (uchf *) overlay;
1138 1173
1139 if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || 1174 if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
1140 ds->pending_buf == Z_NULL) { 1175 ds->pending_buf == Z_NULL) {
@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source)
1148 zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); 1183 zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
1149 1184
1150 ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); 1185 ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
1151 ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); 1186 ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
1152 ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
1153 1187
1154 ds->l_desc.dyn_tree = ds->dyn_ltree; 1188 ds->l_desc.dyn_tree = ds->dyn_ltree;
1155 ds->d_desc.dyn_tree = ds->dyn_dtree; 1189 ds->d_desc.dyn_tree = ds->dyn_dtree;
@@ -1925,7 +1959,7 @@ local block_state deflate_fast(s, flush)
1925 FLUSH_BLOCK(s, 1); 1959 FLUSH_BLOCK(s, 1);
1926 return finish_done; 1960 return finish_done;
1927 } 1961 }
1928 if (s->last_lit) 1962 if (s->sym_next)
1929 FLUSH_BLOCK(s, 0); 1963 FLUSH_BLOCK(s, 0);
1930 return block_done; 1964 return block_done;
1931} 1965}
@@ -2056,7 +2090,7 @@ local block_state deflate_slow(s, flush)
2056 FLUSH_BLOCK(s, 1); 2090 FLUSH_BLOCK(s, 1);
2057 return finish_done; 2091 return finish_done;
2058 } 2092 }
2059 if (s->last_lit) 2093 if (s->sym_next)
2060 FLUSH_BLOCK(s, 0); 2094 FLUSH_BLOCK(s, 0);
2061 return block_done; 2095 return block_done;
2062} 2096}
@@ -2131,7 +2165,7 @@ local block_state deflate_rle(s, flush)
2131 FLUSH_BLOCK(s, 1); 2165 FLUSH_BLOCK(s, 1);
2132 return finish_done; 2166 return finish_done;
2133 } 2167 }
2134 if (s->last_lit) 2168 if (s->sym_next)
2135 FLUSH_BLOCK(s, 0); 2169 FLUSH_BLOCK(s, 0);
2136 return block_done; 2170 return block_done;
2137} 2171}
@@ -2170,7 +2204,7 @@ local block_state deflate_huff(s, flush)
2170 FLUSH_BLOCK(s, 1); 2204 FLUSH_BLOCK(s, 1);
2171 return finish_done; 2205 return finish_done;
2172 } 2206 }
2173 if (s->last_lit) 2207 if (s->sym_next)
2174 FLUSH_BLOCK(s, 0); 2208 FLUSH_BLOCK(s, 0);
2175 return block_done; 2209 return block_done;
2176} 2210}
diff --git a/deflate.h b/deflate.h
index 23ecdd3..d4cf1a9 100644
--- a/deflate.h
+++ b/deflate.h
@@ -217,7 +217,7 @@ typedef struct internal_state {
217 /* Depth of each subtree used as tie breaker for trees of equal frequency 217 /* Depth of each subtree used as tie breaker for trees of equal frequency
218 */ 218 */
219 219
220 uchf *l_buf; /* buffer for literals or lengths */ 220 uchf *sym_buf; /* buffer for distances and literals/lengths */
221 221
222 uInt lit_bufsize; 222 uInt lit_bufsize;
223 /* Size of match buffer for literals/lengths. There are 4 reasons for 223 /* Size of match buffer for literals/lengths. There are 4 reasons for
@@ -239,13 +239,8 @@ typedef struct internal_state {
239 * - I can't count above 4 239 * - I can't count above 4
240 */ 240 */
241 241
242 uInt last_lit; /* running index in l_buf */ 242 uInt sym_next; /* running index in sym_buf */
243 243 uInt sym_end; /* symbol table full when sym_next reaches this */
244 ushf *d_buf;
245 /* Buffer for distances. To simplify the code, d_buf and l_buf have
246 * the same number of elements. To use different lengths, an extra flag
247 * array would be necessary.
248 */
249 244
250 ulg opt_len; /* bit length of current block with optimal trees */ 245 ulg opt_len; /* bit length of current block with optimal trees */
251 ulg static_len; /* bit length of current block with static trees */ 246 ulg static_len; /* bit length of current block with static trees */
@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
325 320
326# define _tr_tally_lit(s, c, flush) \ 321# define _tr_tally_lit(s, c, flush) \
327 { uch cc = (c); \ 322 { uch cc = (c); \
328 s->d_buf[s->last_lit] = 0; \ 323 s->sym_buf[s->sym_next++] = 0; \
329 s->l_buf[s->last_lit++] = cc; \ 324 s->sym_buf[s->sym_next++] = 0; \
325 s->sym_buf[s->sym_next++] = cc; \
330 s->dyn_ltree[cc].Freq++; \ 326 s->dyn_ltree[cc].Freq++; \
331 flush = (s->last_lit == s->lit_bufsize-1); \ 327 flush = (s->sym_next == s->sym_end); \
332 } 328 }
333# define _tr_tally_dist(s, distance, length, flush) \ 329# define _tr_tally_dist(s, distance, length, flush) \
334 { uch len = (uch)(length); \ 330 { uch len = (uch)(length); \
335 ush dist = (ush)(distance); \ 331 ush dist = (ush)(distance); \
336 s->d_buf[s->last_lit] = dist; \ 332 s->sym_buf[s->sym_next++] = dist; \
337 s->l_buf[s->last_lit++] = len; \ 333 s->sym_buf[s->sym_next++] = dist >> 8; \
334 s->sym_buf[s->sym_next++] = len; \
338 dist--; \ 335 dist--; \
339 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 336 s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
340 s->dyn_dtree[d_code(dist)].Freq++; \ 337 s->dyn_dtree[d_code(dist)].Freq++; \
341 flush = (s->last_lit == s->lit_bufsize-1); \ 338 flush = (s->sym_next == s->sym_end); \
342 } 339 }
343#else 340#else
344# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 341# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
diff --git a/trees.c b/trees.c
index 4f4a650..decaeb7 100644
--- a/trees.c
+++ b/trees.c
@@ -416,7 +416,7 @@ local void init_block(s)
416 416
417 s->dyn_ltree[END_BLOCK].Freq = 1; 417 s->dyn_ltree[END_BLOCK].Freq = 1;
418 s->opt_len = s->static_len = 0L; 418 s->opt_len = s->static_len = 0L;
419 s->last_lit = s->matches = 0; 419 s->sym_next = s->matches = 0;
420} 420}
421 421
422#define SMALLEST 1 422#define SMALLEST 1
@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
948 948
949 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", 949 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
950 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, 950 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
951 s->last_lit)); 951 s->sym_next / 3));
952 952
953 if (static_lenb <= opt_lenb) opt_lenb = static_lenb; 953 if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
954 954
@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
1017 unsigned dist; /* distance of matched string */ 1017 unsigned dist; /* distance of matched string */
1018 unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ 1018 unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
1019{ 1019{
1020 s->d_buf[s->last_lit] = (ush)dist; 1020 s->sym_buf[s->sym_next++] = dist;
1021 s->l_buf[s->last_lit++] = (uch)lc; 1021 s->sym_buf[s->sym_next++] = dist >> 8;
1022 s->sym_buf[s->sym_next++] = lc;
1022 if (dist == 0) { 1023 if (dist == 0) {
1023 /* lc is the unmatched char */ 1024 /* lc is the unmatched char */
1024 s->dyn_ltree[lc].Freq++; 1025 s->dyn_ltree[lc].Freq++;
@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
1033 s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; 1034 s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
1034 s->dyn_dtree[d_code(dist)].Freq++; 1035 s->dyn_dtree[d_code(dist)].Freq++;
1035 } 1036 }
1036 1037 return (s->sym_next == s->sym_end);
1037#ifdef TRUNCATE_BLOCK
1038 /* Try to guess if it is profitable to stop the current block here */
1039 if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1040 /* Compute an upper bound for the compressed length */
1041 ulg out_length = (ulg)s->last_lit*8L;
1042 ulg in_length = (ulg)((long)s->strstart - s->block_start);
1043 int dcode;
1044 for (dcode = 0; dcode < D_CODES; dcode++) {
1045 out_length += (ulg)s->dyn_dtree[dcode].Freq *
1046 (5L+extra_dbits[dcode]);
1047 }
1048 out_length >>= 3;
1049 Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1050 s->last_lit, in_length, out_length,
1051 100L - out_length*100L/in_length));
1052 if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1053 }
1054#endif
1055 return (s->last_lit == s->lit_bufsize-1);
1056 /* We avoid equality with lit_bufsize because of wraparound at 64K
1057 * on 16 bit machines and because stored blocks are restricted to
1058 * 64K-1 bytes.
1059 */
1060} 1038}
1061 1039
1062/* =========================================================================== 1040/* ===========================================================================
@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree)
1069{ 1047{
1070 unsigned dist; /* distance of matched string */ 1048 unsigned dist; /* distance of matched string */
1071 int lc; /* match length or unmatched char (if dist == 0) */ 1049 int lc; /* match length or unmatched char (if dist == 0) */
1072 unsigned lx = 0; /* running index in l_buf */ 1050 unsigned sx = 0; /* running index in sym_buf */
1073 unsigned code; /* the code to send */ 1051 unsigned code; /* the code to send */
1074 int extra; /* number of extra bits to send */ 1052 int extra; /* number of extra bits to send */
1075 1053
1076 if (s->last_lit != 0) do { 1054 if (s->sym_next != 0) do {
1077 dist = s->d_buf[lx]; 1055 dist = s->sym_buf[sx++] & 0xff;
1078 lc = s->l_buf[lx++]; 1056 dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
1057 lc = s->sym_buf[sx++];
1079 if (dist == 0) { 1058 if (dist == 0) {
1080 send_code(s, lc, ltree); /* send a literal byte */ 1059 send_code(s, lc, ltree); /* send a literal byte */
1081 Tracecv(isgraph(lc), (stderr," '%c' ", lc)); 1060 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree)
1100 } 1079 }
1101 } /* literal or match pair ? */ 1080 } /* literal or match pair ? */
1102 1081
1103 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ 1082 /* Check that the overlay between pending_buf and sym_buf is ok: */
1104 Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, 1083 Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
1105 "pendingBuf overflow");
1106 1084
1107 } while (lx < s->last_lit); 1085 } while (sx < s->sym_next);
1108 1086
1109 send_code(s, END_BLOCK, ltree); 1087 send_code(s, END_BLOCK, ltree);
1110} 1088}