aboutsummaryrefslogtreecommitdiff
path: root/gzread.c
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-26 00:57:26 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-26 00:57:26 -0700
commit5ad116abdad08c49b9ab20d748550eecd2c1d96d (patch)
tree2aace8e541c2332df89f6161d98912258f23a637 /gzread.c
parenta9ae24b6538a8c75b13826ef8a2547344fd2e08c (diff)
downloadzlib-5ad116abdad08c49b9ab20d748550eecd2c1d96d.tar.gz
zlib-5ad116abdad08c49b9ab20d748550eecd2c1d96d.tar.bz2
zlib-5ad116abdad08c49b9ab20d748550eecd2c1d96d.zip
Allow gzread() and related to continue after gzclearerr().
Before this fix, gzread() would lose data if a premature end of file was encountered. This prevented gzread() from being used on a file that was being written concurrently. Now gzread() returns all of the data it has available before indicating a premature end of file. This also changes the error returned on a premature end of file from Z_DATA_ERROR to Z_BUF_ERROR. This allows the user to determine if the error is recoverable, which it is if Z_BUF_ERROR is returned. If a Z_DATA_ERROR is returned, then the error is not recoverable. This patch replaces the functionality of a previous patch that fixed reading through an empty gzip stream in a concatenation of gzip streams. To implement this fix, a noticeable rewrite of gzread.c was needed. The patch has the added advantage of using inflate's gzip processing instead of replicating the functionality in gzread.c. This makes the gz code a little simpler.
Diffstat (limited to 'gzread.c')
-rw-r--r--gzread.c268
1 files changed, 91 insertions, 177 deletions
diff --git a/gzread.c b/gzread.c
index 213ad8c..960bf12 100644
--- a/gzread.c
+++ b/gzread.c
@@ -8,10 +8,9 @@
8/* Local functions */ 8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); 9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep)); 10local int gz_avail OF((gz_statep));
11local int gz_next4 OF((gz_statep, unsigned long *)); 11local int gz_look OF((gz_statep));
12local int gz_head OF((gz_statep));
13local int gz_decomp OF((gz_statep)); 12local int gz_decomp OF((gz_statep));
14local int gz_make OF((gz_statep)); 13local int gz_fetch OF((gz_statep));
15local int gz_skip OF((gz_statep, z_off64_t)); 14local int gz_skip OF((gz_statep, z_off64_t));
16 15
17/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from 16/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
@@ -46,67 +45,42 @@ local int gz_load(state, buf, len, have)
46 error, 0 otherwise. Note that the eof flag is set when the end of the input 45 error, 0 otherwise. Note that the eof flag is set when the end of the input
47 file is reached, even though there may be unused data in the buffer. Once 46 file is reached, even though there may be unused data in the buffer. Once
48 that data has been used, no more attempts will be made to read the file. 47 that data has been used, no more attempts will be made to read the file.
49 gz_avail() assumes that strm->avail_in == 0. */ 48 If strm->avail_in != 0, then the current data is moved to the beginning of
49 the input buffer, and then the remainder of the buffer is loaded with the
50 available data from the input file. */
50local int gz_avail(state) 51local int gz_avail(state)
51 gz_statep state; 52 gz_statep state;
52{ 53{
54 unsigned got;
53 z_streamp strm = &(state->strm); 55 z_streamp strm = &(state->strm);
54 56
55 if (state->err != Z_OK) 57 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
56 return -1; 58 return -1;
57 if (state->eof == 0) { 59 if (state->eof == 0) {
58 if (gz_load(state, state->in, state->size, 60 if (strm->avail_in)
59 (unsigned *)&(strm->avail_in)) == -1) 61 memmove(state->in, strm->next_in, strm->avail_in);
62 if (gz_load(state, state->in + strm->avail_in,
63 state->size - strm->avail_in, &got) == -1)
60 return -1; 64 return -1;
65 strm->avail_in += got;
61 strm->next_in = state->in; 66 strm->next_in = state->in;
62 } 67 }
63 return 0; 68 return 0;
64} 69}
65 70
66/* Get next byte from input, or -1 if end or error. */
67#define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \
68 (strm->avail_in == 0 ? -1 : \
69 (strm->avail_in--, *(strm->next_in)++)))
70
71/* Get a four-byte little-endian integer and return 0 on success and the value
72 in *ret. Otherwise -1 is returned and *ret is not modified. */
73local int gz_next4(state, ret)
74 gz_statep state;
75 unsigned long *ret;
76{
77 int ch;
78 unsigned long val;
79 z_streamp strm = &(state->strm);
80
81 val = NEXT();
82 val += (unsigned)NEXT() << 8;
83 val += (unsigned long)NEXT() << 16;
84 ch = NEXT();
85 if (ch == -1)
86 return -1;
87 val += (unsigned long)ch << 24;
88 *ret = val;
89 return 0;
90}
91
92/* Look for gzip header, set up for inflate or copy. state->have must be zero. 71/* Look for gzip header, set up for inflate or copy. state->have must be zero.
93 If this is the first time in, allocate required memory. state->how will be 72 If this is the first time in, allocate required memory. state->how will be
94 left unchanged if there is no more input data available, will be set to COPY 73 left unchanged if there is no more input data available, will be set to COPY
95 if there is no gzip header and direct copying will be performed, or it will 74 if there is no gzip header and direct copying will be performed, or it will
96 be set to GZIP for decompression, and the gzip header will be skipped so 75 be set to GZIP for decompression. If direct copying, then leftover input
97 that the next available input data is the raw deflate stream. If direct 76 data from the input buffer will be copied to the output buffer. In that
98 copying, then leftover input data from the input buffer will be copied to 77 case, all further file reads will be directly to either the output buffer or
99 the output buffer. In that case, all further file reads will be directly to 78 a user buffer. If decompressing, the inflate state will be initialized.
100 either the output buffer or a user buffer. If decompressing, the inflate 79 gz_look() will return 0 on success or -1 on failure. */
101 state and the check value will be initialized. gz_head() will return 0 on 80local int gz_look(state)
102 success or -1 on failure. Failures may include read errors or gzip header
103 errors. */
104local int gz_head(state)
105 gz_statep state; 81 gz_statep state;
106{ 82{
107 z_streamp strm = &(state->strm); 83 z_streamp strm = &(state->strm);
108 int flags;
109 unsigned len;
110 84
111 /* allocate read buffers and inflate memory */ 85 /* allocate read buffers and inflate memory */
112 if (state->size == 0) { 86 if (state->size == 0) {
@@ -129,7 +103,7 @@ local int gz_head(state)
129 state->strm.opaque = Z_NULL; 103 state->strm.opaque = Z_NULL;
130 state->strm.avail_in = 0; 104 state->strm.avail_in = 0;
131 state->strm.next_in = Z_NULL; 105 state->strm.next_in = Z_NULL;
132 if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */ 106 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
133 free(state->out); 107 free(state->out);
134 free(state->in); 108 free(state->in);
135 state->size = 0; 109 state->size = 0;
@@ -138,73 +112,27 @@ local int gz_head(state)
138 } 112 }
139 } 113 }
140 114
141 /* get some data in the input buffer */ 115 /* get at least the magic bytes in the input buffer */
142 if (strm->avail_in == 0) { 116 if (strm->avail_in < 2) {
143 if (gz_avail(state) == -1) 117 if (gz_avail(state) == -1)
144 return -1; 118 return -1;
145 if (strm->avail_in == 0) 119 if (strm->avail_in == 0)
146 return 0; 120 return 0;
147 } 121 }
148 122
149 /* look for the gzip magic header bytes 31 and 139 */ 123 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
150 if (strm->next_in[0] == 31) { 124 a logical dilemma here when considering the case of a partially written
151 strm->avail_in--; 125 gzip file, to wit, if a single 31 byte is written, then we cannot tell
152 strm->next_in++; 126 whether this is a single-byte file, or just a partially written gzip
153 if (strm->avail_in == 0 && gz_avail(state) == -1) 127 file -- for here we assume that if a gzip file is being written, then
154 return -1; 128 the header will be written in a single operation, so that reading a
155 if (strm->avail_in && strm->next_in[0] == 139) { 129 single byte is sufficient indication that it is not a gzip file) */
156 /* we have a gzip header, woo hoo! */ 130 if (strm->avail_in > 1 &&
157 strm->avail_in--; 131 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
158 strm->next_in++; 132 inflateReset(strm);
159 133 state->how = GZIP;
160 /* skip rest of header */ 134 state->direct = 0;
161 if (NEXT() != 8) { /* compression method */ 135 return 0;
162 gz_error(state, Z_DATA_ERROR, "unknown compression method");
163 return -1;
164 }
165 flags = NEXT();
166 if (flags & 0xe0) { /* reserved flag bits */
167 gz_error(state, Z_DATA_ERROR, "unknown header flags set");
168 return -1;
169 }
170 NEXT(); /* modification time */
171 NEXT();
172 NEXT();
173 NEXT();
174 NEXT(); /* extra flags */
175 NEXT(); /* operating system */
176 if (flags & 4) { /* extra field */
177 len = (unsigned)NEXT();
178 len += (unsigned)NEXT() << 8;
179 while (len--)
180 if (NEXT() < 0)
181 break;
182 }
183 if (flags & 8) /* file name */
184 while (NEXT() > 0)
185 ;
186 if (flags & 16) /* comment */
187 while (NEXT() > 0)
188 ;
189 if (flags & 2) { /* header crc */
190 NEXT();
191 NEXT();
192 }
193 /* an unexpected end of file is not checked for here -- it will be
194 noticed on the first request for uncompressed data */
195
196 /* set up for decompression */
197 inflateReset(strm);
198 strm->adler = crc32(0L, Z_NULL, 0);
199 state->how = GZIP;
200 state->direct = 0;
201 return 0;
202 }
203 else {
204 /* not a gzip file -- save first byte (31) and fall to raw i/o */
205 state->out[0] = 31;
206 state->have = 1;
207 }
208 } 136 }
209 137
210 /* no gzip header -- if we were decoding gzip before, then this is trailing 138 /* no gzip header -- if we were decoding gzip before, then this is trailing
@@ -222,8 +150,8 @@ local int gz_head(state)
222 state->raw = state->pos; 150 state->raw = state->pos;
223 state->next = state->out; 151 state->next = state->out;
224 if (strm->avail_in) { 152 if (strm->avail_in) {
225 memcpy(state->next + state->have, strm->next_in, strm->avail_in); 153 memcpy(state->next, strm->next_in, strm->avail_in);
226 state->have += strm->avail_in; 154 state->have = strm->avail_in;
227 strm->avail_in = 0; 155 strm->avail_in = 0;
228 } 156 }
229 state->how = COPY; 157 state->how = COPY;
@@ -232,19 +160,15 @@ local int gz_head(state)
232} 160}
233 161
234/* Decompress from input to the provided next_out and avail_out in the state. 162/* Decompress from input to the provided next_out and avail_out in the state.
235 If the end of the compressed data is reached, then verify the gzip trailer 163 state->have and state->next are set to point to the just decompressed data,
236 check value and length (modulo 2^32). state->have and state->next are set 164 If the gzip stream completes, state->how is reset to LOOK to look for the
237 to point to the just decompressed data, and the crc is updated. If the 165 next gzip stream or raw data, once state->have is depleted. Returns 0 on
238 trailer is verified, state->how is reset to LOOK to look for the next gzip 166 success, -1 on failure. */
239 stream or raw data, once state->have is depleted. Returns 0 on success, -1
240 on failure. Failures may include invalid compressed data or a failed gzip
241 trailer verification. */
242local int gz_decomp(state) 167local int gz_decomp(state)
243 gz_statep state; 168 gz_statep state;
244{ 169{
245 int ret; 170 int ret = Z_OK;
246 unsigned had; 171 unsigned had;
247 unsigned long crc, len;
248 z_streamp strm = &(state->strm); 172 z_streamp strm = &(state->strm);
249 173
250 /* fill output buffer up to end of deflate stream */ 174 /* fill output buffer up to end of deflate stream */
@@ -254,15 +178,15 @@ local int gz_decomp(state)
254 if (strm->avail_in == 0 && gz_avail(state) == -1) 178 if (strm->avail_in == 0 && gz_avail(state) == -1)
255 return -1; 179 return -1;
256 if (strm->avail_in == 0) { 180 if (strm->avail_in == 0) {
257 gz_error(state, Z_DATA_ERROR, "unexpected end of file"); 181 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
258 return -1; 182 break;
259 } 183 }
260 184
261 /* decompress and handle errors */ 185 /* decompress and handle errors */
262 ret = inflate(strm, Z_NO_FLUSH); 186 ret = inflate(strm, Z_NO_FLUSH);
263 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 187 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
264 gz_error(state, Z_STREAM_ERROR, 188 gz_error(state, Z_STREAM_ERROR,
265 "internal error: inflate stream corrupt"); 189 "internal error: inflate stream corrupt");
266 return -1; 190 return -1;
267 } 191 }
268 if (ret == Z_MEM_ERROR) { 192 if (ret == Z_MEM_ERROR) {
@@ -271,67 +195,55 @@ local int gz_decomp(state)
271 } 195 }
272 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 196 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
273 gz_error(state, Z_DATA_ERROR, 197 gz_error(state, Z_DATA_ERROR,
274 strm->msg == NULL ? "compressed data error" : strm->msg); 198 strm->msg == NULL ? "compressed data error" : strm->msg);
275 return -1; 199 return -1;
276 } 200 }
277 } while (strm->avail_out && ret != Z_STREAM_END); 201 } while (strm->avail_out && ret != Z_STREAM_END);
278 202
279 /* update available output and crc check value */ 203 /* update available output */
280 state->have = had - strm->avail_out; 204 state->have = had - strm->avail_out;
281 state->next = strm->next_out - state->have; 205 state->next = strm->next_out - state->have;
282 strm->adler = crc32(strm->adler, state->next, state->have);
283 206
284 /* check gzip trailer if at end of deflate stream */ 207 /* if the gzip stream completed successfully, look for another */
285 if (ret == Z_STREAM_END) { 208 if (ret == Z_STREAM_END)
286 if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) { 209 state->how = LOOK;
287 gz_error(state, Z_DATA_ERROR, "unexpected end of file");
288 return -1;
289 }
290 if (crc != strm->adler) {
291 gz_error(state, Z_DATA_ERROR, "incorrect data check");
292 return -1;
293 }
294 if (len != (strm->total_out & 0xffffffffL)) {
295 gz_error(state, Z_DATA_ERROR, "incorrect length check");
296 return -1;
297 }
298 state->how = LOOK; /* ready for next stream, once have is 0 (leave
299 state->direct unchanged to remember how) */
300 }
301 210
302 /* good decompression */ 211 /* good decompression */
303 return 0; 212 return 0;
304} 213}
305 214
306/* Make data and put in the output buffer. Assumes that state->have == 0. 215/* Fetch data and put it in the output buffer. Assumes that state->have == 0.
307 Data is either copied from the input file or decompressed from the input 216 Data is either copied from the input file or decompressed from the input
308 file depending on state->how. If state->how is LOOK, then a gzip header is 217 file depending on state->how. If state->how is LOOK, then a gzip header is
309 looked for (and skipped if found) to discern whether to copy or decompress. 218 looked for to determine whether to copy or decompress. Returns -1 on error,
310 Returns -1 on error, otherwise 0. gz_make() will leave state->how as COPY 219 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
311 or GZIP unless the end of the input file has been reached and all data has 220 end of the input file has been reached and all data has been processed. */
312 been processed. */ 221local int gz_fetch(state)
313local int gz_make(state)
314 gz_statep state; 222 gz_statep state;
315{ 223{
316 z_streamp strm = &(state->strm); 224 z_streamp strm = &(state->strm);
317 225
318 if (state->how == LOOK) { /* look for gzip header */ 226 do {
319 if (gz_head(state) == -1) 227 switch(state->how) {
320 return -1; 228 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
321 if (state->have) /* got some data from gz_head() */ 229 if (gz_look(state) == -1)
230 return -1;
231 if (state->how == LOOK)
232 return 0;
233 break;
234 case COPY: /* -> COPY */
235 if (gz_load(state, state->out, state->size << 1, &(state->have))
236 == -1)
237 return -1;
238 state->next = state->out;
322 return 0; 239 return 0;
323 } 240 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
324 if (state->how == COPY) { /* straight copy */ 241 strm->avail_out = state->size << 1;
325 if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1) 242 strm->next_out = state->out;
326 return -1; 243 if (gz_decomp(state) == -1)
327 state->next = state->out; 244 return -1;
328 } 245 }
329 else if (state->how == GZIP) { /* decompress */ 246 } while (state->have == 0);
330 strm->avail_out = state->size << 1;
331 strm->next_out = state->out;
332 if (gz_decomp(state) == -1)
333 return -1;
334 }
335 return 0; 247 return 0;
336} 248}
337 249
@@ -361,7 +273,7 @@ local int gz_skip(state, len)
361 /* need more data to skip -- load up output buffer */ 273 /* need more data to skip -- load up output buffer */
362 else { 274 else {
363 /* get more output, looking for header if required */ 275 /* get more output, looking for header if required */
364 if (gz_make(state) == -1) 276 if (gz_fetch(state) == -1)
365 return -1; 277 return -1;
366 } 278 }
367 return 0; 279 return 0;
@@ -383,8 +295,9 @@ int ZEXPORT gzread(file, buf, len)
383 state = (gz_statep)file; 295 state = (gz_statep)file;
384 strm = &(state->strm); 296 strm = &(state->strm);
385 297
386 /* check that we're reading and that there's no error */ 298 /* check that we're reading and that there's no (serious) error */
387 if (state->mode != GZ_READ || state->err != Z_OK) 299 if (state->mode != GZ_READ ||
300 (state->err != Z_OK && state->err != Z_BUF_ERROR))
388 return -1; 301 return -1;
389 302
390 /* since an int is returned, make sure len fits in one, otherwise return 303 /* since an int is returned, make sure len fits in one, otherwise return
@@ -424,7 +337,7 @@ int ZEXPORT gzread(file, buf, len)
424 buffer */ 337 buffer */
425 else if (state->how == LOOK || len < (state->size << 1)) { 338 else if (state->how == LOOK || len < (state->size << 1)) {
426 /* get more output, looking for header if required */ 339 /* get more output, looking for header if required */
427 if (gz_make(state) == -1) 340 if (gz_fetch(state) == -1)
428 return -1; 341 return -1;
429 continue; /* no progress yet -- go back to memcpy() above */ 342 continue; /* no progress yet -- go back to memcpy() above */
430 /* the copy above assures that we will leave with space in the 343 /* the copy above assures that we will leave with space in the
@@ -471,8 +384,9 @@ int ZEXPORT gzgetc(file)
471 return -1; 384 return -1;
472 state = (gz_statep)file; 385 state = (gz_statep)file;
473 386
474 /* check that we're reading and that there's no error */ 387 /* check that we're reading and that there's no (serious) error */
475 if (state->mode != GZ_READ || state->err != Z_OK) 388 if (state->mode != GZ_READ ||
389 (state->err != Z_OK && state->err != Z_BUF_ERROR))
476 return -1; 390 return -1;
477 391
478 /* try output buffer (no need to check for skip request) */ 392 /* try output buffer (no need to check for skip request) */
@@ -499,8 +413,9 @@ int ZEXPORT gzungetc(c, file)
499 return -1; 413 return -1;
500 state = (gz_statep)file; 414 state = (gz_statep)file;
501 415
502 /* check that we're reading and that there's no error */ 416 /* check that we're reading and that there's no (serious) error */
503 if (state->mode != GZ_READ || state->err != Z_OK) 417 if (state->mode != GZ_READ ||
418 (state->err != Z_OK && state->err != Z_BUF_ERROR))
504 return -1; 419 return -1;
505 420
506 /* process a skip request */ 421 /* process a skip request */
@@ -560,8 +475,9 @@ char * ZEXPORT gzgets(file, buf, len)
560 return NULL; 475 return NULL;
561 state = (gz_statep)file; 476 state = (gz_statep)file;
562 477
563 /* check that we're reading and that there's no error */ 478 /* check that we're reading and that there's no (serious) error */
564 if (state->mode != GZ_READ || state->err != Z_OK) 479 if (state->mode != GZ_READ ||
480 (state->err != Z_OK && state->err != Z_BUF_ERROR))
565 return NULL; 481 return NULL;
566 482
567 /* process a skip request */ 483 /* process a skip request */
@@ -578,10 +494,8 @@ char * ZEXPORT gzgets(file, buf, len)
578 left = (unsigned)len - 1; 494 left = (unsigned)len - 1;
579 if (left) do { 495 if (left) do {
580 /* assure that something is in the output buffer */ 496 /* assure that something is in the output buffer */
581 while (state->have == 0 && (state->strm.avail_in || !state->eof)) { 497 if (state->have == 0 && gz_fetch(state) == -1)
582 if (gz_make(state) == -1) 498 return NULL; /* error */
583 return NULL; /* error */
584 }
585 if (state->have == 0) { /* end of file */ 499 if (state->have == 0) { /* end of file */
586 if (buf == str) /* got bupkus */ 500 if (buf == str) /* got bupkus */
587 return NULL; 501 return NULL;
@@ -626,7 +540,7 @@ int ZEXPORT gzdirect(file)
626 /* if the state is not known, but we can find out, then do so (this is 540 /* if the state is not known, but we can find out, then do so (this is
627 mainly for right after a gzopen() or gzdopen()) */ 541 mainly for right after a gzopen() or gzdopen()) */
628 if (state->how == LOOK && state->have == 0) 542 if (state->how == LOOK && state->have == 0)
629 (void)gz_head(state); 543 (void)gz_look(state);
630 544
631 /* return 1 if reading direct, 0 if decompressing a gzip stream */ 545 /* return 1 if reading direct, 0 if decompressing a gzip stream */
632 return state->direct; 546 return state->direct;