aboutsummaryrefslogtreecommitdiff
path: root/gzread.c
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2025-04-09 23:35:16 -0700
committerMark Adler <git@madler.net>2025-12-06 17:39:44 -0800
commit95278e4ef9de87294dea0c94184bd8fe4316c3a9 (patch)
treecea18c4601b460ef7ed5e63910e9cd2ae7c518ce /gzread.c
parent1ab1026a20282383d9cd2282f81461655bea4028 (diff)
downloadzlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.tar.gz
zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.tar.bz2
zlib-95278e4ef9de87294dea0c94184bd8fe4316c3a9.zip
Improve the discrimination between trailing garbage and bad gzip.
This proceeds to try to decode whatever follows the last gzip member, and concludes that it is acceptable trailing garbage only if it results in a data error without decompressing any data. This commit also reduces the probability of a false-positive gzip header detection.
Diffstat (limited to 'gzread.c')
-rw-r--r--gzread.c65
1 files changed, 35 insertions, 30 deletions
diff --git a/gzread.c b/gzread.c
index 6fefe89d..ac8be772 100644
--- a/gzread.c
+++ b/gzread.c
@@ -106,47 +106,42 @@ local int gz_look(gz_statep state) {
106 } 106 }
107 } 107 }
108 108
109 /* if transparent reading is disabled, simply read as gzip */ 109 /* if transparent reading is disabled, which would only be at the start, or
110 if (state->direct == -1) { 110 if we're looking for a gzip member after the first one, which is not at
111 the start, then proceed directly to look for a gzip member next */
112 if (state->direct == -1 || state->junk == 0) {
111 inflateReset(strm); 113 inflateReset(strm);
112 state->how = GZIP; 114 state->how = GZIP;
115 state->junk = state->junk != -1;
113 state->direct = 0; 116 state->direct = 0;
114 return 0; 117 return 0;
115 } 118 }
116 119
117 /* get at least the magic bytes in the input buffer */ 120 /* otherwise we're at the start with auto-detect -- we check to see if the
118 if (strm->avail_in < 2) { 121 first four bytes could be gzip header in order to decide whether or not
119 if (gz_avail(state) == -1) 122 this will be a transparent read */
120 return -1; 123
121 if (strm->avail_in == 0) 124 /* load any header bytes into the input buffer -- if the input is empty,
122 return 0; 125 then it's not an error as this is a transparent read of zero bytes */
123 } 126 if (gz_avail(state) == -1)
127 return -1;
128 if (strm->avail_in == 0)
129 return 0;
124 130
125 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is 131 /* see if this is (likely) gzip input -- if the first four bytes are
126 a logical dilemma here when considering the case of a partially written 132 consistent with a gzip header, then go look for the first gzip member,
127 gzip file, to wit, if a single 31 byte is written, then we cannot tell 133 otherwise proceed to copy the input transparently */
128 whether this is a single-byte file, or just a partially written gzip 134 if (strm->avail_in > 3 &&
129 file -- for here we assume that if a gzip file is being written, then 135 strm->next_in[0] == 31 && strm->next_in[1] == 139 &&
130 the header will be written in a single operation, so that reading a 136 strm->next_in[2] == 8 && strm->next_in[3] < 32) {
131 single byte is sufficient indication that it is not a gzip file) */
132 if (strm->avail_in > 1 &&
133 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
134 inflateReset(strm); 137 inflateReset(strm);
135 state->how = GZIP; 138 state->how = GZIP;
139 state->junk = 1;
136 state->direct = 0; 140 state->direct = 0;
137 return 0; 141 return 0;
138 } 142 }
139 143
140 /* no gzip header -- if we were decoding gzip before, then this is trailing 144 /* doing raw i/o: copy any leftover input to output -- this assumes that
141 garbage. Ignore the trailing garbage and finish. */
142 if (state->direct == 0) {
143 strm->avail_in = 0;
144 state->eof = 1;
145 state->x.have = 0;
146 return 0;
147 }
148
149 /* doing raw i/o, copy any leftover input to output -- this assumes that
150 the output buffer is larger than the input buffer, which also assures 145 the output buffer is larger than the input buffer, which also assures
151 space for gzungetc() */ 146 space for gzungetc() */
152 state->x.next = state->out; 147 state->x.next = state->out;
@@ -154,7 +149,6 @@ local int gz_look(gz_statep state) {
154 state->x.have = strm->avail_in; 149 state->x.have = strm->avail_in;
155 strm->avail_in = 0; 150 strm->avail_in = 0;
156 state->how = COPY; 151 state->how = COPY;
157 state->direct = 1;
158 return 0; 152 return 0;
159} 153}
160 154
@@ -181,6 +175,9 @@ local int gz_decomp(gz_statep state) {
181 175
182 /* decompress and handle errors */ 176 /* decompress and handle errors */
183 ret = inflate(strm, Z_NO_FLUSH); 177 ret = inflate(strm, Z_NO_FLUSH);
178 if (strm->avail_out < had)
179 /* any decompressed data marks this as a real gzip stream */
180 state->junk = 0;
184 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 181 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
185 gz_error(state, Z_STREAM_ERROR, 182 gz_error(state, Z_STREAM_ERROR,
186 "internal error: inflate stream corrupt"); 183 "internal error: inflate stream corrupt");
@@ -191,6 +188,12 @@ local int gz_decomp(gz_statep state) {
191 return -1; 188 return -1;
192 } 189 }
193 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 190 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
191 if (state->junk == 1) { /* trailing garbage is ok */
192 strm->avail_in = 0;
193 state->eof = 1;
194 state->how = LOOK;
195 break;
196 }
194 gz_error(state, Z_DATA_ERROR, 197 gz_error(state, Z_DATA_ERROR,
195 strm->msg == NULL ? "compressed data error" : strm->msg); 198 strm->msg == NULL ? "compressed data error" : strm->msg);
196 return -1; 199 return -1;
@@ -202,8 +205,10 @@ local int gz_decomp(gz_statep state) {
202 state->x.next = strm->next_out - state->x.have; 205 state->x.next = strm->next_out - state->x.have;
203 206
204 /* if the gzip stream completed successfully, look for another */ 207 /* if the gzip stream completed successfully, look for another */
205 if (ret == Z_STREAM_END) 208 if (ret == Z_STREAM_END) {
209 state->junk = 0;
206 state->how = LOOK; 210 state->how = LOOK;
211 }
207 212
208 /* good decompression */ 213 /* good decompression */
209 return 0; 214 return 0;