diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2024-02-04 11:33:11 -0800 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2024-02-04 18:49:40 -0800 |
commit | 6378d33478ea2d068960aa4e68cf9f11a5ffcfbe (patch) | |
tree | b114ca444ecc7bfdbfec639d5d9561514c2ae501 /examples | |
parent | bb054d95d0336f8d36c85c5f1d8a98f935a3c131 (diff) | |
download | zlib-6378d33478ea2d068960aa4e68cf9f11a5ffcfbe.tar.gz zlib-6378d33478ea2d068960aa4e68cf9f11a5ffcfbe.tar.bz2 zlib-6378d33478ea2d068960aa4e68cf9f11a5ffcfbe.zip |
Provide a reusable inflate engine in the index in example/zran.c.
Avoids the overhead of creating a new inflate engine for each
random access extraction.
Diffstat (limited to 'examples')
-rw-r--r-- | examples/zran.c | 153 | ||||
-rw-r--r-- | examples/zran.h | 1 |
2 files changed, 78 insertions, 76 deletions
diff --git a/examples/zran.c b/examples/zran.c index d3f5a36..f79e518 100644 --- a/examples/zran.c +++ b/examples/zran.c | |||
@@ -68,6 +68,7 @@ | |||
68 | void deflate_index_free(struct deflate_index *index) { | 68 | void deflate_index_free(struct deflate_index *index) { |
69 | if (index != NULL) { | 69 | if (index != NULL) { |
70 | free(index->list); | 70 | free(index->list); |
71 | inflateEnd(&index->strm); | ||
71 | free(index); | 72 | free(index); |
72 | } | 73 | } |
73 | } | 74 | } |
@@ -79,23 +80,9 @@ void deflate_index_free(struct deflate_index *index) { | |||
79 | static struct deflate_index *add_point(struct deflate_index *index, int bits, | 80 | static struct deflate_index *add_point(struct deflate_index *index, int bits, |
80 | off_t in, off_t out, unsigned left, | 81 | off_t in, off_t out, unsigned left, |
81 | unsigned char *window) { | 82 | unsigned char *window) { |
82 | if (index == NULL) { | 83 | if (index->have == index->mode) { |
83 | // The list is empty. Create it, starting with eight access points. | ||
84 | index = malloc(sizeof(struct deflate_index)); | ||
85 | if (index == NULL) | ||
86 | return NULL; | ||
87 | index->have = 0; | ||
88 | index->mode = 8; | ||
89 | index->list = malloc(sizeof(point_t) * index->mode); | ||
90 | if (index->list == NULL) { | ||
91 | free(index); | ||
92 | return NULL; | ||
93 | } | ||
94 | } | ||
95 | |||
96 | else if (index->have == index->mode) { | ||
97 | // The list is full. Make it bigger. | 84 | // The list is full. Make it bigger. |
98 | index->mode <<= 1; | 85 | index->mode = index->mode ? index->mode << 1 : 8; |
99 | point_t *next = realloc(index->list, sizeof(point_t) * index->mode); | 86 | point_t *next = realloc(index->list, sizeof(point_t) * index->mode); |
100 | if (next == NULL) { | 87 | if (next == NULL) { |
101 | deflate_index_free(index); | 88 | deflate_index_free(index); |
@@ -134,8 +121,18 @@ int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { | |||
134 | // return an error. | 121 | // return an error. |
135 | *built = NULL; | 122 | *built = NULL; |
136 | 123 | ||
137 | // Set up inflation state. | 124 | // Create and initialize the index list. |
138 | z_stream strm = {0}; // inflate engine (gets fired up later) | 125 | struct deflate_index *index = malloc(sizeof(struct deflate_index)); |
126 | if (index == NULL) | ||
127 | return Z_MEM_ERROR; | ||
128 | index->have = 0; | ||
129 | index->mode = 0; // entries in index->list allocation | ||
130 | index->list = NULL; | ||
131 | index->strm.state = Z_NULL; // so inflateEnd() can work | ||
132 | |||
133 | // Set up the inflation state. | ||
134 | index->strm.avail_in = 0; | ||
135 | index->strm.avail_out = 0; | ||
139 | unsigned char buf[CHUNK]; // input buffer | 136 | unsigned char buf[CHUNK]; // input buffer |
140 | unsigned char win[WINSIZE] = {0}; // output sliding window | 137 | unsigned char win[WINSIZE] = {0}; // output sliding window |
141 | off_t totin = 0; // total bytes read from input | 138 | off_t totin = 0; // total bytes read from input |
@@ -145,14 +142,13 @@ int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { | |||
145 | // Decompress from in, generating access points along the way. | 142 | // Decompress from in, generating access points along the way. |
146 | int ret; // the return value from zlib, or Z_ERRNO | 143 | int ret; // the return value from zlib, or Z_ERRNO |
147 | off_t last; // last access point uncompressed offset | 144 | off_t last; // last access point uncompressed offset |
148 | struct deflate_index *index = NULL; // list of access points | ||
149 | do { | 145 | do { |
150 | // Assure available input, at least until reaching EOF. | 146 | // Assure available input, at least until reaching EOF. |
151 | if (strm.avail_in == 0) { | 147 | if (index->strm.avail_in == 0) { |
152 | strm.avail_in = fread(buf, 1, sizeof(buf), in); | 148 | index->strm.avail_in = fread(buf, 1, sizeof(buf), in); |
153 | totin += strm.avail_in; | 149 | totin += index->strm.avail_in; |
154 | strm.next_in = buf; | 150 | index->strm.next_in = buf; |
155 | if (strm.avail_in < sizeof(buf) && ferror(in)) { | 151 | if (index->strm.avail_in < sizeof(buf) && ferror(in)) { |
156 | ret = Z_ERRNO; | 152 | ret = Z_ERRNO; |
157 | break; | 153 | break; |
158 | } | 154 | } |
@@ -163,11 +159,14 @@ int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { | |||
163 | // in a false positive for zlib, but in practice the fill bits | 159 | // in a false positive for zlib, but in practice the fill bits |
164 | // after a stored block are always zeros, so a raw stream won't | 160 | // after a stored block are always zeros, so a raw stream won't |
165 | // start with an 8 in the low nybble. | 161 | // start with an 8 in the low nybble. |
166 | mode = strm.avail_in == 0 ? RAW : // empty -- will fail | 162 | mode = index->strm.avail_in == 0 ? RAW : // will fail |
167 | (strm.next_in[0] & 0xf) == 8 ? ZLIB : | 163 | (index->strm.next_in[0] & 0xf) == 8 ? ZLIB : |
168 | strm.next_in[0] == 0x1f ? GZIP : | 164 | index->strm.next_in[0] == 0x1f ? GZIP : |
169 | /* else */ RAW; | 165 | /* else */ RAW; |
170 | ret = inflateInit2(&strm, mode); | 166 | index->strm.zalloc = Z_NULL; |
167 | index->strm.zfree = Z_NULL; | ||
168 | index->strm.opaque = Z_NULL; | ||
169 | ret = inflateInit2(&index->strm, mode); | ||
171 | if (ret != Z_OK) | 170 | if (ret != Z_OK) |
172 | break; | 171 | break; |
173 | } | 172 | } |
@@ -175,32 +174,33 @@ int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { | |||
175 | 174 | ||
176 | // Assure available output. This rotates the output through, for use as | 175 | // Assure available output. This rotates the output through, for use as |
177 | // a sliding window on the uncompressed data. | 176 | // a sliding window on the uncompressed data. |
178 | if (strm.avail_out == 0) { | 177 | if (index->strm.avail_out == 0) { |
179 | strm.avail_out = sizeof(win); | 178 | index->strm.avail_out = sizeof(win); |
180 | strm.next_out = win; | 179 | index->strm.next_out = win; |
181 | } | 180 | } |
182 | 181 | ||
183 | if (mode == RAW && index == NULL) | 182 | if (mode == RAW && index->have == 0) |
184 | // We skip the inflate() call at the start of raw deflate data in | 183 | // We skip the inflate() call at the start of raw deflate data in |
185 | // order generate an access point there. Set data_type to imitate | 184 | // order generate an access point there. Set data_type to imitate |
186 | // the end of a header. | 185 | // the end of a header. |
187 | strm.data_type = 0x80; | 186 | index->strm.data_type = 0x80; |
188 | else { | 187 | else { |
189 | // Inflate and update the number of uncompressed bytes. | 188 | // Inflate and update the number of uncompressed bytes. |
190 | unsigned before = strm.avail_out; | 189 | unsigned before = index->strm.avail_out; |
191 | ret = inflate(&strm, Z_BLOCK); | 190 | ret = inflate(&index->strm, Z_BLOCK); |
192 | totout += before - strm.avail_out; | 191 | totout += before - index->strm.avail_out; |
193 | } | 192 | } |
194 | 193 | ||
195 | if ((strm.data_type & 0xc0) == 0x80 && | 194 | if ((index->strm.data_type & 0xc0) == 0x80 && |
196 | (index == NULL || totout - last >= span)) { | 195 | (index->have == 0 || totout - last >= span)) { |
197 | // We are at the end of a header or a non-last deflate block, so we | 196 | // We are at the end of a header or a non-last deflate block, so we |
198 | // can add an access point here. Furthermore, we are either at the | 197 | // can add an access point here. Furthermore, we are either at the |
199 | // very start for the first access point, or there has been span or | 198 | // very start for the first access point, or there has been span or |
200 | // more uncompressed bytes since the last access point, so we want | 199 | // more uncompressed bytes since the last access point, so we want |
201 | // to add an access point here. | 200 | // to add an access point here. |
202 | index = add_point(index, strm.data_type & 7, totin - strm.avail_in, | 201 | index = add_point(index, index->strm.data_type & 7, |
203 | totout, strm.avail_out, win); | 202 | totin - index->strm.avail_in, |
203 | totout, index->strm.avail_out, win); | ||
204 | if (index == NULL) { | 204 | if (index == NULL) { |
205 | ret = Z_MEM_ERROR; | 205 | ret = Z_MEM_ERROR; |
206 | break; | 206 | break; |
@@ -209,16 +209,15 @@ int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { | |||
209 | } | 209 | } |
210 | 210 | ||
211 | if (ret == Z_STREAM_END && mode == GZIP && | 211 | if (ret == Z_STREAM_END && mode == GZIP && |
212 | (strm.avail_in || ungetc(getc(in), in) != EOF)) | 212 | (index->strm.avail_in || ungetc(getc(in), in) != EOF)) |
213 | // There is more input after the end of a gzip member. Reset the | 213 | // There is more input after the end of a gzip member. Reset the |
214 | // inflate state to read another gzip member. On success, this will | 214 | // inflate state to read another gzip member. On success, this will |
215 | // set ret to Z_OK to continue decompressing. | 215 | // set ret to Z_OK to continue decompressing. |
216 | ret = inflateReset2(&strm, GZIP); | 216 | ret = inflateReset2(&index->strm, GZIP); |
217 | 217 | ||
218 | // Keep going until Z_STREAM_END or error. If the compressed data ends | 218 | // Keep going until Z_STREAM_END or error. If the compressed data ends |
219 | // prematurely without a file read error, Z_BUF_ERROR is returned. | 219 | // prematurely without a file read error, Z_BUF_ERROR is returned. |
220 | } while (ret == Z_OK); | 220 | } while (ret == Z_OK); |
221 | inflateEnd(&strm); | ||
222 | 221 | ||
223 | if (ret != Z_STREAM_END) { | 222 | if (ret != Z_STREAM_END) { |
224 | // An error was encountered. Discard the index and return a negative | 223 | // An error was encountered. Discard the index and return a negative |
@@ -334,7 +333,8 @@ static int inflatePreface(z_stream *strm, int bits, int value) { | |||
334 | ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, | 333 | ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, |
335 | off_t offset, unsigned char *buf, size_t len) { | 334 | off_t offset, unsigned char *buf, size_t len) { |
336 | // Do a quick sanity check on the index. | 335 | // Do a quick sanity check on the index. |
337 | if (index == NULL || index->have < 1 || index->list[0].out != 0) | 336 | if (index == NULL || index->have < 1 || index->list[0].out != 0 || |
337 | index->strm.state == Z_NULL) | ||
338 | return Z_STREAM_ERROR; | 338 | return Z_STREAM_ERROR; |
339 | 339 | ||
340 | // If nothing to extract, return zero bytes extracted. | 340 | // If nothing to extract, return zero bytes extracted. |
@@ -360,13 +360,13 @@ ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, | |||
360 | int ch = 0; | 360 | int ch = 0; |
361 | if (point->bits && (ch = getc(in)) == EOF) | 361 | if (point->bits && (ch = getc(in)) == EOF) |
362 | return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; | 362 | return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; |
363 | z_stream strm = {0}; | 363 | index->strm.avail_in = 0; |
364 | ret = inflateInit2(&strm, RAW); | 364 | ret = inflateReset2(&index->strm, RAW); |
365 | if (ret != Z_OK) | 365 | if (ret != Z_OK) |
366 | return ret; | 366 | return ret; |
367 | if (point->bits) | 367 | if (point->bits) |
368 | INFLATEPRIME(&strm, point->bits, ch >> (8 - point->bits)); | 368 | INFLATEPRIME(&index->strm, point->bits, ch >> (8 - point->bits)); |
369 | inflateSetDictionary(&strm, point->window, WINSIZE); | 369 | inflateSetDictionary(&index->strm, point->window, WINSIZE); |
370 | 370 | ||
371 | // Skip uncompressed bytes until offset reached, then satisfy request. | 371 | // Skip uncompressed bytes until offset reached, then satisfy request. |
372 | unsigned char input[CHUNK]; | 372 | unsigned char input[CHUNK]; |
@@ -376,28 +376,30 @@ ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, | |||
376 | do { | 376 | do { |
377 | if (offset) { | 377 | if (offset) { |
378 | // Discard up to offset uncompressed bytes. | 378 | // Discard up to offset uncompressed bytes. |
379 | strm.avail_out = offset < WINSIZE ? (unsigned)offset : WINSIZE; | 379 | index->strm.avail_out = offset < WINSIZE ? (unsigned)offset : |
380 | strm.next_out = discard; | 380 | WINSIZE; |
381 | index->strm.next_out = discard; | ||
381 | } | 382 | } |
382 | else { | 383 | else { |
383 | // Uncompress up to left bytes into buf. | 384 | // Uncompress up to left bytes into buf. |
384 | strm.avail_out = left < UINT_MAX ? (unsigned)left : UINT_MAX; | 385 | index->strm.avail_out = left < UINT_MAX ? (unsigned)left : |
385 | strm.next_out = buf + len - left; | 386 | UINT_MAX; |
387 | index->strm.next_out = buf + len - left; | ||
386 | } | 388 | } |
387 | 389 | ||
388 | // Uncompress, setting got to the number of bytes uncompressed. | 390 | // Uncompress, setting got to the number of bytes uncompressed. |
389 | if (strm.avail_in == 0) { | 391 | if (index->strm.avail_in == 0) { |
390 | // Assure available input. | 392 | // Assure available input. |
391 | strm.avail_in = fread(input, 1, CHUNK, in); | 393 | index->strm.avail_in = fread(input, 1, CHUNK, in); |
392 | if (strm.avail_in < CHUNK && ferror(in)) { | 394 | if (index->strm.avail_in < CHUNK && ferror(in)) { |
393 | ret = Z_ERRNO; | 395 | ret = Z_ERRNO; |
394 | break; | 396 | break; |
395 | } | 397 | } |
396 | strm.next_in = input; | 398 | index->strm.next_in = input; |
397 | } | 399 | } |
398 | unsigned got = strm.avail_out; | 400 | unsigned got = index->strm.avail_out; |
399 | ret = inflate(&strm, Z_NO_FLUSH); | 401 | ret = inflate(&index->strm, Z_NO_FLUSH); |
400 | got -= strm.avail_out; | 402 | got -= index->strm.avail_out; |
401 | 403 | ||
402 | // Update the appropriate count. | 404 | // Update the appropriate count. |
403 | if (offset) | 405 | if (offset) |
@@ -414,14 +416,14 @@ ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, | |||
414 | if (ret == Z_STREAM_END && index->mode == GZIP) { | 416 | if (ret == Z_STREAM_END && index->mode == GZIP) { |
415 | // Discard the gzip trailer. | 417 | // Discard the gzip trailer. |
416 | unsigned drop = 8; // length of gzip trailer | 418 | unsigned drop = 8; // length of gzip trailer |
417 | if (strm.avail_in >= drop) { | 419 | if (index->strm.avail_in >= drop) { |
418 | strm.avail_in -= drop; | 420 | index->strm.avail_in -= drop; |
419 | strm.next_in += drop; | 421 | index->strm.next_in += drop; |
420 | } | 422 | } |
421 | else { | 423 | else { |
422 | // Read and discard the remainder of the gzip trailer. | 424 | // Read and discard the remainder of the gzip trailer. |
423 | drop -= strm.avail_in; | 425 | drop -= index->strm.avail_in; |
424 | strm.avail_in = 0; | 426 | index->strm.avail_in = 0; |
425 | do { | 427 | do { |
426 | if (getc(in) == EOF) | 428 | if (getc(in) == EOF) |
427 | // The input does not have a complete trailer. | 429 | // The input does not have a complete trailer. |
@@ -429,33 +431,32 @@ ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, | |||
429 | } while (--drop); | 431 | } while (--drop); |
430 | } | 432 | } |
431 | 433 | ||
432 | if (strm.avail_in || ungetc(getc(in), in) != EOF) { | 434 | if (index->strm.avail_in || ungetc(getc(in), in) != EOF) { |
433 | // There's more after the gzip trailer. Use inflate to skip the | 435 | // There's more after the gzip trailer. Use inflate to skip the |
434 | // gzip header and resume the raw inflate there. | 436 | // gzip header and resume the raw inflate there. |
435 | inflateReset2(&strm, GZIP); | 437 | inflateReset2(&index->strm, GZIP); |
436 | do { | 438 | do { |
437 | if (strm.avail_in == 0) { | 439 | if (index->strm.avail_in == 0) { |
438 | strm.avail_in = fread(input, 1, CHUNK, in); | 440 | index->strm.avail_in = fread(input, 1, CHUNK, in); |
439 | if (strm.avail_in < CHUNK && ferror(in)) { | 441 | if (index->strm.avail_in < CHUNK && ferror(in)) { |
440 | ret = Z_ERRNO; | 442 | ret = Z_ERRNO; |
441 | break; | 443 | break; |
442 | } | 444 | } |
443 | strm.next_in = input; | 445 | index->strm.next_in = input; |
444 | } | 446 | } |
445 | strm.avail_out = WINSIZE; | 447 | index->strm.avail_out = WINSIZE; |
446 | strm.next_out = discard; | 448 | index->strm.next_out = discard; |
447 | ret = inflate(&strm, Z_BLOCK); // stop at end of header | 449 | ret = inflate(&index->strm, Z_BLOCK); // stop after header |
448 | } while (ret == Z_OK && (strm.data_type & 0x80) == 0); | 450 | } while (ret == Z_OK && (index->strm.data_type & 0x80) == 0); |
449 | if (ret != Z_OK) | 451 | if (ret != Z_OK) |
450 | break; | 452 | break; |
451 | inflateReset2(&strm, RAW); | 453 | inflateReset2(&index->strm, RAW); |
452 | } | 454 | } |
453 | } | 455 | } |
454 | 456 | ||
455 | // Continue until we have the requested data, the deflate data has | 457 | // Continue until we have the requested data, the deflate data has |
456 | // ended, or an error is encountered. | 458 | // ended, or an error is encountered. |
457 | } while (ret == Z_OK); | 459 | } while (ret == Z_OK); |
458 | inflateEnd(&strm); | ||
459 | 460 | ||
460 | // Return the number of uncompressed bytes read into buf, or the error. | 461 | // Return the number of uncompressed bytes read into buf, or the error. |
461 | return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret; | 462 | return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret; |
diff --git a/examples/zran.h b/examples/zran.h index 8a332d6..23fbd1f 100644 --- a/examples/zran.h +++ b/examples/zran.h | |||
@@ -20,6 +20,7 @@ struct deflate_index { | |||
20 | int mode; // -15 for raw, 15 for zlib, or 31 for gzip | 20 | int mode; // -15 for raw, 15 for zlib, or 31 for gzip |
21 | off_t length; // total length of uncompressed data | 21 | off_t length; // total length of uncompressed data |
22 | point_t *list; // allocated list of access points | 22 | point_t *list; // allocated list of access points |
23 | z_stream strm; // re-usable inflate engine for extraction | ||
23 | }; | 24 | }; |
24 | 25 | ||
25 | // Make one pass through a zlib, gzip, or raw deflate compressed stream and | 26 | // Make one pass through a zlib, gzip, or raw deflate compressed stream and |