diff options
| author | Mark Adler <madler@alumni.caltech.edu> | 2025-04-09 23:35:16 -0700 |
|---|---|---|
| committer | Mark Adler <git@madler.net> | 2025-12-06 17:39:44 -0800 |
| commit | 95278e4ef9de87294dea0c94184bd8fe4316c3a9 (patch) | |
| tree | cea18c4601b460ef7ed5e63910e9cd2ae7c518ce | |
| parent | 1ab1026a20282383d9cd2282f81461655bea4028 (diff) | |
| download | zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.tar.gz zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.tar.bz2 zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.zip | |
Improve the discrimination between trailing garbage and bad gzip.
This proceeds to try to decode whatever follows the last gzip
member, and concludes that it is acceptable trailing garbage only
if it results in a data error without decompressing any data. This
commit also reduces the probability of a false-positive gzip header
detection.
| -rw-r--r-- | gzguts.h | 1 | ||||
| -rw-r--r-- | gzlib.c | 1 | ||||
| -rw-r--r-- | gzread.c | 65 |
3 files changed, 37 insertions, 30 deletions
| @@ -183,6 +183,7 @@ typedef struct { | |||
| 183 | unsigned char *out; /* output buffer (double-sized when reading) */ | 183 | unsigned char *out; /* output buffer (double-sized when reading) */ |
| 184 | int direct; /* 0 if processing gzip, 1 if transparent */ | 184 | int direct; /* 0 if processing gzip, 1 if transparent */ |
| 185 | /* just for reading */ | 185 | /* just for reading */ |
| 186 | int junk; /* -1 = start, 1 = junk candidate, 0 = in gzip */ | ||
| 186 | int how; /* 0: get header, 1: copy, 2: decompress */ | 187 | int how; /* 0: get header, 1: copy, 2: decompress */ |
| 187 | z_off64_t start; /* where the gzip data started, for rewinding */ | 188 | z_off64_t start; /* where the gzip data started, for rewinding */ |
| 188 | int eof; /* true if end of input file reached */ | 189 | int eof; /* true if end of input file reached */ |
| @@ -72,6 +72,7 @@ local void gz_reset(gz_statep state) { | |||
| 72 | state->eof = 0; /* not at end of file */ | 72 | state->eof = 0; /* not at end of file */ |
| 73 | state->past = 0; /* have not read past end yet */ | 73 | state->past = 0; /* have not read past end yet */ |
| 74 | state->how = LOOK; /* look for gzip header */ | 74 | state->how = LOOK; /* look for gzip header */ |
| 75 | state->junk = -1; /* mark first member */ | ||
| 75 | } | 76 | } |
| 76 | else /* for writing ... */ | 77 | else /* for writing ... */ |
| 77 | state->reset = 0; /* no deflateReset pending */ | 78 | state->reset = 0; /* no deflateReset pending */ |
| @@ -106,47 +106,42 @@ local int gz_look(gz_statep state) { | |||
| 106 | } | 106 | } |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | /* if transparent reading is disabled, simply read as gzip */ | 109 | /* if transparent reading is disabled, which would only be at the start, or |
| 110 | if (state->direct == -1) { | 110 | if we're looking for a gzip member after the first one, which is not at |
| 111 | the start, then proceed directly to look for a gzip member next */ | ||
| 112 | if (state->direct == -1 || state->junk == 0) { | ||
| 111 | inflateReset(strm); | 113 | inflateReset(strm); |
| 112 | state->how = GZIP; | 114 | state->how = GZIP; |
| 115 | state->junk = state->junk != -1; | ||
| 113 | state->direct = 0; | 116 | state->direct = 0; |
| 114 | return 0; | 117 | return 0; |
| 115 | } | 118 | } |
| 116 | 119 | ||
| 117 | /* get at least the magic bytes in the input buffer */ | 120 | /* otherwise we're at the start with auto-detect -- we check to see if the |
| 118 | if (strm->avail_in < 2) { | 121 | first four bytes could be gzip header in order to decide whether or not |
| 119 | if (gz_avail(state) == -1) | 122 | this will be a transparent read */ |
| 120 | return -1; | 123 | |
| 121 | if (strm->avail_in == 0) | 124 | /* load any header bytes into the input buffer -- if the input is empty, |
| 122 | return 0; | 125 | then it's not an error as this is a transparent read of zero bytes */ |
| 123 | } | 126 | if (gz_avail(state) == -1) |
| 127 | return -1; | ||
| 128 | if (strm->avail_in == 0) | ||
| 129 | return 0; | ||
| 124 | 130 | ||
| 125 | /* look for gzip magic bytes -- if there, do gzip decoding (note: there is | 131 | /* see if this is (likely) gzip input -- if the first four bytes are |
| 126 | a logical dilemma here when considering the case of a partially written | 132 | consistent with a gzip header, then go look for the first gzip member, |
| 127 | gzip file, to wit, if a single 31 byte is written, then we cannot tell | 133 | otherwise proceed to copy the input transparently */ |
| 128 | whether this is a single-byte file, or just a partially written gzip | 134 | if (strm->avail_in > 3 && |
| 129 | file -- for here we assume that if a gzip file is being written, then | 135 | strm->next_in[0] == 31 && strm->next_in[1] == 139 && |
| 130 | the header will be written in a single operation, so that reading a | 136 | strm->next_in[2] == 8 && strm->next_in[3] < 32) { |
| 131 | single byte is sufficient indication that it is not a gzip file) */ | ||
| 132 | if (strm->avail_in > 1 && | ||
| 133 | strm->next_in[0] == 31 && strm->next_in[1] == 139) { | ||
| 134 | inflateReset(strm); | 137 | inflateReset(strm); |
| 135 | state->how = GZIP; | 138 | state->how = GZIP; |
| 139 | state->junk = 1; | ||
| 136 | state->direct = 0; | 140 | state->direct = 0; |
| 137 | return 0; | 141 | return 0; |
| 138 | } | 142 | } |
| 139 | 143 | ||
| 140 | /* no gzip header -- if we were decoding gzip before, then this is trailing | 144 | /* doing raw i/o: copy any leftover input to output -- this assumes that |
| 141 | garbage. Ignore the trailing garbage and finish. */ | ||
| 142 | if (state->direct == 0) { | ||
| 143 | strm->avail_in = 0; | ||
| 144 | state->eof = 1; | ||
| 145 | state->x.have = 0; | ||
| 146 | return 0; | ||
| 147 | } | ||
| 148 | |||
| 149 | /* doing raw i/o, copy any leftover input to output -- this assumes that | ||
| 150 | the output buffer is larger than the input buffer, which also assures | 145 | the output buffer is larger than the input buffer, which also assures |
| 151 | space for gzungetc() */ | 146 | space for gzungetc() */ |
| 152 | state->x.next = state->out; | 147 | state->x.next = state->out; |
| @@ -154,7 +149,6 @@ local int gz_look(gz_statep state) { | |||
| 154 | state->x.have = strm->avail_in; | 149 | state->x.have = strm->avail_in; |
| 155 | strm->avail_in = 0; | 150 | strm->avail_in = 0; |
| 156 | state->how = COPY; | 151 | state->how = COPY; |
| 157 | state->direct = 1; | ||
| 158 | return 0; | 152 | return 0; |
| 159 | } | 153 | } |
| 160 | 154 | ||
| @@ -181,6 +175,9 @@ local int gz_decomp(gz_statep state) { | |||
| 181 | 175 | ||
| 182 | /* decompress and handle errors */ | 176 | /* decompress and handle errors */ |
| 183 | ret = inflate(strm, Z_NO_FLUSH); | 177 | ret = inflate(strm, Z_NO_FLUSH); |
| 178 | if (strm->avail_out < had) | ||
| 179 | /* any decompressed data marks this as a real gzip stream */ | ||
| 180 | state->junk = 0; | ||
| 184 | if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { | 181 | if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { |
| 185 | gz_error(state, Z_STREAM_ERROR, | 182 | gz_error(state, Z_STREAM_ERROR, |
| 186 | "internal error: inflate stream corrupt"); | 183 | "internal error: inflate stream corrupt"); |
| @@ -191,6 +188,12 @@ local int gz_decomp(gz_statep state) { | |||
| 191 | return -1; | 188 | return -1; |
| 192 | } | 189 | } |
| 193 | if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ | 190 | if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ |
| 191 | if (state->junk == 1) { /* trailing garbage is ok */ | ||
| 192 | strm->avail_in = 0; | ||
| 193 | state->eof = 1; | ||
| 194 | state->how = LOOK; | ||
| 195 | break; | ||
| 196 | } | ||
| 194 | gz_error(state, Z_DATA_ERROR, | 197 | gz_error(state, Z_DATA_ERROR, |
| 195 | strm->msg == NULL ? "compressed data error" : strm->msg); | 198 | strm->msg == NULL ? "compressed data error" : strm->msg); |
| 196 | return -1; | 199 | return -1; |
| @@ -202,8 +205,10 @@ local int gz_decomp(gz_statep state) { | |||
| 202 | state->x.next = strm->next_out - state->x.have; | 205 | state->x.next = strm->next_out - state->x.have; |
| 203 | 206 | ||
| 204 | /* if the gzip stream completed successfully, look for another */ | 207 | /* if the gzip stream completed successfully, look for another */ |
| 205 | if (ret == Z_STREAM_END) | 208 | if (ret == Z_STREAM_END) { |
| 209 | state->junk = 0; | ||
| 206 | state->how = LOOK; | 210 | state->how = LOOK; |
| 211 | } | ||
| 207 | 212 | ||
| 208 | /* good decompression */ | 213 | /* good decompression */ |
| 209 | return 0; | 214 | return 0; |
