aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/README.examples29
-rw-r--r--examples/fitblk.c235
-rw-r--r--examples/gzappend.c500
-rw-r--r--examples/gzjoin.c447
-rw-r--r--examples/gzlog.c413
-rw-r--r--examples/gzlog.h58
-rw-r--r--examples/zlib_how.html522
-rw-r--r--examples/zpipe.c191
8 files changed, 2395 insertions, 0 deletions
diff --git a/examples/README.examples b/examples/README.examples
new file mode 100644
index 0000000..1084525
--- /dev/null
+++ b/examples/README.examples
@@ -0,0 +1,29 @@
1This directory contains examples of the use of zlib.
2
3fitblk.c
4 compress just enough input to nearly fill a requested output size
5 - zlib isn't designed to do this, but fitblk does it anyway
6
7gzappend.c
8 append to a gzip file
9 - illustrates the use of the Z_BLOCK flush parameter for inflate()
10 - illustrates the use of deflatePrime() to start at any bit
11
12gzjoin.c
13 join gzip files without recalculating the crc or recompressing
14 - illustrates the use of the Z_BLOCK flush parameter for inflate()
15 - illustrates the use of crc32_combine()
16
17gzlog.c
18gzlog.h
19 efficiently maintain a message log file in gzip format
20 - illustrates use of raw deflate and Z_SYNC_FLUSH
21 - illustrates use of gzip header extra field
22
23zlib_how.html
24 painfully comprehensive description of zpipe.c (see below)
25 - describes in excruciating detail the use of deflate() and inflate()
26
27zpipe.c
28 reads and writes zlib streams from stdin to stdout
29 - illustrates the proper use of deflate() and inflate()
diff --git a/examples/fitblk.c b/examples/fitblk.c
new file mode 100644
index 0000000..5f83114
--- /dev/null
+++ b/examples/fitblk.c
@@ -0,0 +1,235 @@
1/* fitblk.c: example of fitting compressed output to a specified size
2 Not copyrighted -- provided to the public domain
3 Version 1.1 25 November 2004 Mark Adler */
4
5/* Version history:
6 1.0 24 Nov 2004 First version
7 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
8 Use fixed-size, stack-allocated raw buffers
9 Simplify code moving compression to subroutines
10 Use assert() for internal errors
11 Add detailed description of approach
12 */
13
14/* Approach to just fitting a requested compressed size:
15
16 fitblk performs three compression passes on a portion of the input
17 data in order to determine how much of that input will compress to
18 nearly the requested output block size. The first pass generates
19 enough deflate blocks to produce output to fill the requested
20 output size plus a specfied excess amount (see the EXCESS define
21 below). The last deflate block may go quite a bit past that, but
22 is discarded. The second pass decompresses and recompresses just
23 the compressed data that fit in the requested plus excess sized
24 buffer. The deflate process is terminated after that amount of
25 input, which is less than the amount consumed on the first pass.
26 The last deflate block of the result will be of a comparable size
27 to the final product, so that the header for that deflate block and
28 the compression ratio for that block will be about the same as in
29 the final product. The third compression pass decompresses the
30 result of the second step, but only the compressed data up to the
31 requested size minus an amount to allow the compressed stream to
32 complete (see the MARGIN define below). That will result in a
33 final compressed stream whose length is less than or equal to the
34 requested size. Assuming sufficient input and a requested size
35 greater than a few hundred bytes, the shortfall will typically be
36 less than ten bytes.
37
38 If the input is short enough that the first compression completes
39 before filling the requested output size, then that compressed
40 stream is return with no recompression.
41
42 EXCESS is chosen to be just greater than the shortfall seen in a
43 two pass approach similar to the above. That shortfall is due to
44 the last deflate block compressing more efficiently with a smaller
45 header on the second pass. EXCESS is set to be large enough so
46 that there is enough uncompressed data for the second pass to fill
47 out the requested size, and small enough so that the final deflate
48 block of the second pass will be close in size to the final deflate
49 block of the third and final pass. MARGIN is chosen to be just
50 large enough to assure that the final compression has enough room
51 to complete in all cases.
52 */
53
54#include <stdio.h>
55#include <stdlib.h>
56#include <assert.h>
57#include "zlib.h"
58
59#define local static
60
61/* print nastygram and leave */
62local void quit(char *why)
63{
64 fprintf(stderr, "fitblk abort: %s\n", why);
65 exit(1);
66}
67
68#define RAWLEN 4096 /* intermediate uncompressed buffer size */
69
70/* compress from file to def until provided buffer is full or end of
71 input reached; return last deflate() return value, or Z_ERRNO if
72 there was read error on the file */
73local int partcompress(FILE *in, z_streamp def)
74{
75 int ret, flush;
76 char raw[RAWLEN];
77
78 flush = Z_NO_FLUSH;
79 do {
80 def->avail_in = fread(raw, 1, RAWLEN, in);
81 if (ferror(in))
82 return Z_ERRNO;
83 def->next_in = raw;
84 if (feof(in))
85 flush = Z_FINISH;
86 ret = deflate(def, flush);
87 assert(ret != Z_STREAM_ERROR);
88 } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
89 return ret;
90}
91
92/* recompress from inf's input to def's output; the input for inf and
93 the output for def are set in those structures before calling;
94 return last deflate() return value, or Z_MEM_ERROR if inflate()
95 was not able to allocate enough memory when it needed to */
96local int recompress(z_streamp inf, z_streamp def)
97{
98 int ret, flush;
99 char raw[RAWLEN];
100
101 flush = Z_NO_FLUSH;
102 do {
103 /* decompress */
104 inf->avail_out = RAWLEN;
105 inf->next_out = raw;
106 ret = inflate(inf, Z_NO_FLUSH);
107 assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
108 ret != Z_NEED_DICT);
109 if (ret == Z_MEM_ERROR)
110 return ret;
111
112 /* compress what was decompresed until done or no room */
113 def->avail_in = RAWLEN - inf->avail_out;
114 def->next_in = raw;
115 if (inf->avail_out != 0)
116 flush = Z_FINISH;
117 ret = deflate(def, flush);
118 assert(ret != Z_STREAM_ERROR);
119 } while (ret != Z_STREAM_END && def->avail_out != 0);
120 return ret;
121}
122
123#define EXCESS 256 /* empirically determined stream overage */
124#define MARGIN 8 /* amount to back off for completion */
125
126/* compress from stdin to fixed-size block on stdout */
127int main(int argc, char **argv)
128{
129 int ret; /* return code */
130 unsigned size; /* requested fixed output block size */
131 unsigned have; /* bytes written by deflate() call */
132 char *blk; /* intermediate and final stream */
133 char *tmp; /* close to desired size stream */
134 z_stream def, inf; /* zlib deflate and inflate states */
135
136 /* get requested output size */
137 if (argc != 2)
138 quit("need one argument: size of output block");
139 ret = strtol(argv[1], argv + 1, 10);
140 if (argv[1][0] != 0)
141 quit("argument must be a number");
142 if (ret < 8) /* 8 is minimum zlib stream size */
143 quit("need positive size of 8 or greater");
144 size = (unsigned)ret;
145
146 /* allocate memory for buffers and compression engine */
147 blk = malloc(size + EXCESS);
148 def.zalloc = Z_NULL;
149 def.zfree = Z_NULL;
150 def.opaque = Z_NULL;
151 ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
152 if (ret != Z_OK || blk == NULL)
153 quit("out of memory");
154
155 /* compress from stdin until output full, or no more input */
156 def.avail_out = size + EXCESS;
157 def.next_out = blk;
158 ret = partcompress(stdin, &def);
159 if (ret == Z_ERRNO)
160 quit("error reading input");
161
162 /* if it all fit, then size was undersubscribed -- done! */
163 if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
164 /* write block to stdout */
165 have = size + EXCESS - def.avail_out;
166 ret = fwrite(blk, 1, have, stdout);
167 if (ret != have || ferror(stdout))
168 quit("error writing output");
169
170 /* clean up and print results to stderr */
171 ret = deflateEnd(&def);
172 assert(ret != Z_STREAM_ERROR);
173 free(blk);
174 fprintf(stderr,
175 "%u bytes unused out of %u requested (all input)\n",
176 size - have, size);
177 return 0;
178 }
179
180 /* it didn't all fit -- set up for recompression */
181 inf.zalloc = Z_NULL;
182 inf.zfree = Z_NULL;
183 inf.opaque = Z_NULL;
184 inf.avail_in = 0;
185 inf.next_in = Z_NULL;
186 ret = inflateInit(&inf);
187 tmp = malloc(size + EXCESS);
188 if (ret != Z_OK || tmp == NULL)
189 quit("out of memory");
190 ret = deflateReset(&def);
191 assert(ret != Z_STREAM_ERROR);
192
193 /* do first recompression close to the right amount */
194 inf.avail_in = size + EXCESS;
195 inf.next_in = blk;
196 def.avail_out = size + EXCESS;
197 def.next_out = tmp;
198 ret = recompress(&inf, &def);
199 if (ret == Z_MEM_ERROR)
200 quit("out of memory");
201
202 /* set up for next reocmpression */
203 ret = inflateReset(&inf);
204 assert(ret != Z_STREAM_ERROR);
205 ret = deflateReset(&def);
206 assert(ret != Z_STREAM_ERROR);
207
208 /* do second and final recompression (third compression) */
209 inf.avail_in = size - MARGIN; /* assure stream will complete */
210 inf.next_in = tmp;
211 def.avail_out = size;
212 def.next_out = blk;
213 ret = recompress(&inf, &def);
214 if (ret == Z_MEM_ERROR)
215 quit("out of memory");
216 assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
217
218 /* done -- write block to stdout */
219 have = size - def.avail_out;
220 ret = fwrite(blk, 1, have, stdout);
221 if (ret != have || ferror(stdout))
222 quit("error writing output");
223
224 /* clean up and print results to stderr */
225 free(tmp);
226 ret = inflateEnd(&inf);
227 assert(ret != Z_STREAM_ERROR);
228 ret = deflateEnd(&def);
229 assert(ret != Z_STREAM_ERROR);
230 free(blk);
231 fprintf(stderr,
232 "%u bytes unused out of %u requested (%lu input)\n",
233 size - have, size, def.total_in);
234 return 0;
235}
diff --git a/examples/gzappend.c b/examples/gzappend.c
new file mode 100644
index 0000000..e9e878e
--- /dev/null
+++ b/examples/gzappend.c
@@ -0,0 +1,500 @@
1/* gzappend -- command to append to a gzip file
2
3 Copyright (C) 2003 Mark Adler, all rights reserved
4 version 1.1, 4 Nov 2003
5
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the author be held liable for any damages
8 arising from the use of this software.
9
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
13
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product documentation would be
17 appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21
22 Mark Adler madler@alumni.caltech.edu
23 */
24
25/*
26 * Change history:
27 *
28 * 1.0 19 Oct 2003 - First version
29 * 1.1 4 Nov 2003 - Expand and clarify some comments and notes
30 * - Add version and copyright to help
31 * - Send help to stdout instead of stderr
32 * - Add some preemptive typecasts
33 * - Add L to constants in lseek() calls
34 * - Remove some debugging information in error messages
35 * - Use new data_type definition for zlib 1.2.1
36 * - Simplfy and unify file operations
37 * - Finish off gzip file in gztack()
38 * - Use deflatePrime() instead of adding empty blocks
39 * - Keep gzip file clean on appended file read errors
40 * - Use in-place rotate instead of auxiliary buffer
41 * (Why you ask? Because it was fun to write!)
42 */
43
44/*
45 gzappend takes a gzip file and appends to it, compressing files from the
46 command line or data from stdin. The gzip file is written to directly, to
47 avoid copying that file, in case it's large. Note that this results in the
48 unfriendly behavior that if gzappend fails, the gzip file is corrupted.
49
50 This program was written to illustrate the use of the new Z_BLOCK option of
51 zlib 1.2.x's inflate() function. This option returns from inflate() at each
52 block boundary to facilitate locating and modifying the last block bit at
53 the start of the final deflate block. Also whether using Z_BLOCK or not,
54 another required feature of zlib 1.2.x is that inflate() now provides the
55 number of unusued bits in the last input byte used. gzappend will not work
56 with versions of zlib earlier than 1.2.1.
57
58 gzappend first decompresses the gzip file internally, discarding all but
59 the last 32K of uncompressed data, and noting the location of the last block
60 bit and the number of unused bits in the last byte of the compressed data.
61 The gzip trailer containing the CRC-32 and length of the uncompressed data
62 is verified. This trailer will be later overwritten.
63
64 Then the last block bit is cleared by seeking back in the file and rewriting
65 the byte that contains it. Seeking forward, the last byte of the compressed
66 data is saved along with the number of unused bits to initialize deflate.
67
68 A deflate process is initialized, using the last 32K of the uncompressed
69 data from the gzip file to initialize the dictionary. If the total
70 uncompressed data was less than 32K, then all of it is used to initialize
71 the dictionary. The deflate output bit buffer is also initialized with the
72 last bits from the original deflate stream. From here on, the data to
73 append is simply compressed using deflate, and written to the gzip file.
74 When that is complete, the new CRC-32 and uncompressed length are written
75 as the trailer of the gzip file.
76 */
77
78#include <stdio.h>
79#include <stdlib.h>
80#include <string.h>
81#include <fcntl.h>
82#include <unistd.h>
83#include "zlib.h"
84
85#define local static
86#define LGCHUNK 14
87#define CHUNK (1U << LGCHUNK)
88#define DSIZE 32768U
89
90/* print an error message and terminate with extreme prejudice */
91local void bye(char *msg1, char *msg2)
92{
93 fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
94 exit(1);
95}
96
97/* return the greatest common divisor of a and b using Euclid's algorithm,
98 modified to be fast when one argument much greater than the other, and
99 coded to avoid unnecessary swapping */
100local unsigned gcd(unsigned a, unsigned b)
101{
102 unsigned c;
103
104 while (a && b)
105 if (a > b) {
106 c = b;
107 while (a - c >= c)
108 c <<= 1;
109 a -= c;
110 }
111 else {
112 c = a;
113 while (b - c >= c)
114 c <<= 1;
115 b -= c;
116 }
117 return a + b;
118}
119
120/* rotate list[0..len-1] left by rot positions, in place */
121local void rotate(unsigned char *list, unsigned len, unsigned rot)
122{
123 unsigned char tmp;
124 unsigned cycles;
125 unsigned char *start, *last, *to, *from;
126
127 /* normalize rot and handle degenerate cases */
128 if (len < 2) return;
129 if (rot >= len) rot %= len;
130 if (rot == 0) return;
131
132 /* pointer to last entry in list */
133 last = list + (len - 1);
134
135 /* do simple left shift by one */
136 if (rot == 1) {
137 tmp = *list;
138 memcpy(list, list + 1, len - 1);
139 *last = tmp;
140 return;
141 }
142
143 /* do simple right shift by one */
144 if (rot == len - 1) {
145 tmp = *last;
146 memmove(list + 1, list, len - 1);
147 *list = tmp;
148 return;
149 }
150
151 /* otherwise do rotate as a set of cycles in place */
152 cycles = gcd(len, rot); /* number of cycles */
153 do {
154 start = from = list + cycles; /* start index is arbitrary */
155 tmp = *from; /* save entry to be overwritten */
156 for (;;) {
157 to = from; /* next step in cycle */
158 from += rot; /* go right rot positions */
159 if (from > last) from -= len; /* (pointer better not wrap) */
160 if (from == start) break; /* all but one shifted */
161 *to = *from; /* shift left */
162 }
163 *to = tmp; /* complete the circle */
164 } while (--cycles);
165}
166
167/* structure for gzip file read operations */
168typedef struct {
169 int fd; /* file descriptor */
170 int size; /* 1 << size is bytes in buf */
171 unsigned left; /* bytes available at next */
172 unsigned char *buf; /* buffer */
173 unsigned char *next; /* next byte in buffer */
174 char *name; /* file name for error messages */
175} file;
176
177/* reload buffer */
178local int readin(file *in)
179{
180 int len;
181
182 len = read(in->fd, in->buf, 1 << in->size);
183 if (len == -1) bye("error reading ", in->name);
184 in->left = (unsigned)len;
185 in->next = in->buf;
186 return len;
187}
188
189/* read from file in, exit if end-of-file */
190local int readmore(file *in)
191{
192 if (readin(in) == 0) bye("unexpected end of ", in->name);
193 return 0;
194}
195
196#define read1(in) (in->left == 0 ? readmore(in) : 0, \
197 in->left--, *(in->next)++)
198
199/* skip over n bytes of in */
200local void skip(file *in, unsigned n)
201{
202 unsigned bypass;
203
204 if (n > in->left) {
205 n -= in->left;
206 bypass = n & ~((1U << in->size) - 1);
207 if (bypass) {
208 if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
209 bye("seeking ", in->name);
210 n -= bypass;
211 }
212 readmore(in);
213 if (n > in->left)
214 bye("unexpected end of ", in->name);
215 }
216 in->left -= n;
217 in->next += n;
218}
219
220/* read a four-byte unsigned integer, little-endian, from in */
221unsigned long read4(file *in)
222{
223 unsigned long val;
224
225 val = read1(in);
226 val += (unsigned)read1(in) << 8;
227 val += (unsigned long)read1(in) << 16;
228 val += (unsigned long)read1(in) << 24;
229 return val;
230}
231
232/* skip over gzip header */
233local void gzheader(file *in)
234{
235 int flags;
236 unsigned n;
237
238 if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
239 if (read1(in) != 8) bye("unknown compression method in", in->name);
240 flags = read1(in);
241 if (flags & 0xe0) bye("unknown header flags set in", in->name);
242 skip(in, 6);
243 if (flags & 4) {
244 n = read1(in);
245 n += (unsigned)(read1(in)) << 8;
246 skip(in, n);
247 }
248 if (flags & 8) while (read1(in) != 0) ;
249 if (flags & 16) while (read1(in) != 0) ;
250 if (flags & 2) skip(in, 2);
251}
252
253/* decompress gzip file "name", return strm with a deflate stream ready to
254 continue compression of the data in the gzip file, and return a file
255 descriptor pointing to where to write the compressed data -- the deflate
256 stream is initialized to compress using level "level" */
257local int gzscan(char *name, z_stream *strm, int level)
258{
259 int ret, lastbit, left, full;
260 unsigned have;
261 unsigned long crc, tot;
262 unsigned char *window;
263 off_t lastoff, end;
264 file gz;
265
266 /* open gzip file */
267 gz.name = name;
268 gz.fd = open(name, O_RDWR, 0);
269 if (gz.fd == -1) bye("cannot open ", name);
270 gz.buf = malloc(CHUNK);
271 if (gz.buf == NULL) bye("out of memory", "");
272 gz.size = LGCHUNK;
273 gz.left = 0;
274
275 /* skip gzip header */
276 gzheader(&gz);
277
278 /* prepare to decompress */
279 window = malloc(DSIZE);
280 if (window == NULL) bye("out of memory", "");
281 strm->zalloc = Z_NULL;
282 strm->zfree = Z_NULL;
283 strm->opaque = Z_NULL;
284 ret = inflateInit2(strm, -15);
285 if (ret != Z_OK) bye("out of memory", " or library mismatch");
286
287 /* decompress the deflate stream, saving append information */
288 lastbit = 0;
289 lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
290 left = 0;
291 strm->avail_in = gz.left;
292 strm->next_in = gz.next;
293 crc = crc32(0L, Z_NULL, 0);
294 have = full = 0;
295 do {
296 /* if needed, get more input */
297 if (strm->avail_in == 0) {
298 readmore(&gz);
299 strm->avail_in = gz.left;
300 strm->next_in = gz.next;
301 }
302
303 /* set up output to next available section of sliding window */
304 strm->avail_out = DSIZE - have;
305 strm->next_out = window + have;
306
307 /* inflate and check for errors */
308 ret = inflate(strm, Z_BLOCK);
309 if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
310 if (ret == Z_MEM_ERROR) bye("out of memory", "");
311 if (ret == Z_DATA_ERROR)
312 bye("invalid compressed data--format violated in", name);
313
314 /* update crc and sliding window pointer */
315 crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
316 if (strm->avail_out)
317 have = DSIZE - strm->avail_out;
318 else {
319 have = 0;
320 full = 1;
321 }
322
323 /* process end of block */
324 if (strm->data_type & 128) {
325 if (strm->data_type & 64)
326 left = strm->data_type & 0x1f;
327 else {
328 lastbit = strm->data_type & 0x1f;
329 lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
330 }
331 }
332 } while (ret != Z_STREAM_END);
333 inflateEnd(strm);
334 gz.left = strm->avail_in;
335 gz.next = strm->next_in;
336
337 /* save the location of the end of the compressed data */
338 end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
339
340 /* check gzip trailer and save total for deflate */
341 if (crc != read4(&gz))
342 bye("invalid compressed data--crc mismatch in ", name);
343 tot = strm->total_out;
344 if ((tot & 0xffffffffUL) != read4(&gz))
345 bye("invalid compressed data--length mismatch in", name);
346
347 /* if not at end of file, warn */
348 if (gz.left || readin(&gz))
349 fprintf(stderr,
350 "gzappend warning: junk at end of gzip file overwritten\n");
351
352 /* clear last block bit */
353 lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
354 if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
355 *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
356 lseek(gz.fd, -1L, SEEK_CUR);
357 if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
358
359 /* if window wrapped, build dictionary from window by rotating */
360 if (full) {
361 rotate(window, DSIZE, have);
362 have = DSIZE;
363 }
364
365 /* set up deflate stream with window, crc, total_in, and leftover bits */
366 ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
367 if (ret != Z_OK) bye("out of memory", "");
368 deflateSetDictionary(strm, window, have);
369 strm->adler = crc;
370 strm->total_in = tot;
371 if (left) {
372 lseek(gz.fd, --end, SEEK_SET);
373 if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
374 deflatePrime(strm, 8 - left, *gz.buf);
375 }
376 lseek(gz.fd, end, SEEK_SET);
377
378 /* clean up and return */
379 free(window);
380 free(gz.buf);
381 return gz.fd;
382}
383
384/* append file "name" to gzip file gd using deflate stream strm -- if last
385 is true, then finish off the deflate stream at the end */
386local void gztack(char *name, int gd, z_stream *strm, int last)
387{
388 int fd, len, ret;
389 unsigned left;
390 unsigned char *in, *out;
391
392 /* open file to compress and append */
393 fd = 0;
394 if (name != NULL) {
395 fd = open(name, O_RDONLY, 0);
396 if (fd == -1)
397 fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
398 name);
399 }
400
401 /* allocate buffers */
402 in = fd == -1 ? NULL : malloc(CHUNK);
403 out = malloc(CHUNK);
404 if (out == NULL) bye("out of memory", "");
405
406 /* compress input file and append to gzip file */
407 do {
408 /* get more input */
409 len = fd == -1 ? 0 : read(fd, in, CHUNK);
410 if (len == -1) {
411 fprintf(stderr,
412 "gzappend warning: error reading %s, skipping rest ...\n",
413 name);
414 len = 0;
415 }
416 strm->avail_in = (unsigned)len;
417 strm->next_in = in;
418 if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
419
420 /* compress and write all available output */
421 do {
422 strm->avail_out = CHUNK;
423 strm->next_out = out;
424 ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
425 left = CHUNK - strm->avail_out;
426 while (left) {
427 len = write(gd, out + CHUNK - strm->avail_out - left, left);
428 if (len == -1) bye("writing gzip file", "");
429 left -= (unsigned)len;
430 }
431 } while (strm->avail_out == 0 && ret != Z_STREAM_END);
432 } while (len != 0);
433
434 /* write trailer after last entry */
435 if (last) {
436 deflateEnd(strm);
437 out[0] = (unsigned char)(strm->adler);
438 out[1] = (unsigned char)(strm->adler >> 8);
439 out[2] = (unsigned char)(strm->adler >> 16);
440 out[3] = (unsigned char)(strm->adler >> 24);
441 out[4] = (unsigned char)(strm->total_in);
442 out[5] = (unsigned char)(strm->total_in >> 8);
443 out[6] = (unsigned char)(strm->total_in >> 16);
444 out[7] = (unsigned char)(strm->total_in >> 24);
445 len = 8;
446 do {
447 ret = write(gd, out + 8 - len, len);
448 if (ret == -1) bye("writing gzip file", "");
449 len -= ret;
450 } while (len);
451 close(gd);
452 }
453
454 /* clean up and return */
455 free(out);
456 if (in != NULL) free(in);
457 if (fd > 0) close(fd);
458}
459
460/* process the compression level option if present, scan the gzip file, and
461 append the specified files, or append the data from stdin if no other file
462 names are provided on the command line -- the gzip file must be writable
463 and seekable */
464int main(int argc, char **argv)
465{
466 int gd, level;
467 z_stream strm;
468
469 /* ignore command name */
470 argv++;
471
472 /* provide usage if no arguments */
473 if (*argv == NULL) {
474 printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
475 printf(
476 "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
477 return 0;
478 }
479
480 /* set compression level */
481 level = Z_DEFAULT_COMPRESSION;
482 if (argv[0][0] == '-') {
483 if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
484 bye("invalid compression level", "");
485 level = argv[0][1] - '0';
486 if (*++argv == NULL) bye("no gzip file name after options", "");
487 }
488
489 /* prepare to append to gzip file */
490 gd = gzscan(*argv++, &strm, level);
491
492 /* append files on command line, or from stdin if none */
493 if (*argv == NULL)
494 gztack(NULL, gd, &strm, 1);
495 else
496 do {
497 gztack(*argv, gd, &strm, argv[1] == NULL);
498 } while (*++argv != NULL);
499 return 0;
500}
diff --git a/examples/gzjoin.c b/examples/gzjoin.c
new file mode 100644
index 0000000..7434c5b
--- /dev/null
+++ b/examples/gzjoin.c
@@ -0,0 +1,447 @@
1/* gzjoin -- command to join gzip files into one gzip file
2
3 Copyright (C) 2004 Mark Adler, all rights reserved
4 version 1.0, 11 Dec 2004
5
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the author be held liable for any damages
8 arising from the use of this software.
9
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
13
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product documentation would be
17 appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21
22 Mark Adler madler@alumni.caltech.edu
23 */
24
25/*
26 * Change history:
27 *
28 * 1.0 11 Dec 2004 - First version
29 */
30
31/*
32 gzjoin takes one or more gzip files on the command line and writes out a
33 single gzip file that will uncompress to the concatenation of the
34 uncompressed data from the individual gzip files. gzjoin does this without
35 having to recompress any of the data and without having to calculate a new
36 crc32 for the concatenated uncompressed data. gzjoin does however have to
37 decompress all of the input data in order to find the bits in the compressed
38 data that need to be modified to concatenate the streams.
39
40 gzjoin does not do an integrity check on the input gzip files other than
41 checking the gzip header and decompressing the compressed data. They are
42 otherwise assumed to be complete and correct.
43
44 Each joint between gzip files removes at least 18 bytes of previous trailer
45 and subsequent header, and inserts an average of about three bytes to the
46 compressed data in order to connect the streams. The output gzip file
47 has a minimal ten-byte gzip header with no file name or modification time.
48
49 This program was written to illustrate the use of the Z_BLOCK option of
50 inflate() and the crc32_combine() function. gzjoin will not compile with
51 versions of zlib earlier than 1.2.3.
52 */
53
54#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
55#include <stdlib.h> /* exit(), malloc(), free() */
56#include <fcntl.h> /* open() */
57#include <unistd.h> /* close(), read(), lseek() */
58#include "zlib.h"
59 /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
60
61#define local static
62
63/* exit with an error (return a value to allow use in an expression) */
64local int bail(char *why1, char *why2)
65{
66 fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
67 exit(1);
68 return 0;
69}
70
71/* -- simple buffered file input with access to the buffer -- */
72
73#define CHUNK 32768 /* must be a power of two and fit in unsigned */
74
75/* bin buffered input file type */
76typedef struct {
77 char *name; /* name of file for error messages */
78 int fd; /* file descriptor */
79 unsigned left; /* bytes remaining at next */
80 unsigned char *next; /* next byte to read */
81 unsigned char *buf; /* allocated buffer of length CHUNK */
82} bin;
83
84/* close a buffered file and free allocated memory */
85local void bclose(bin *in)
86{
87 if (in != NULL) {
88 if (in->fd != -1)
89 close(in->fd);
90 if (in->buf != NULL)
91 free(in->buf);
92 free(in);
93 }
94}
95
96/* open a buffered file for input, return a pointer to type bin, or NULL on
97 failure */
98local bin *bopen(char *name)
99{
100 bin *in;
101
102 in = malloc(sizeof(bin));
103 if (in == NULL)
104 return NULL;
105 in->buf = malloc(CHUNK);
106 in->fd = open(name, O_RDONLY, 0);
107 if (in->buf == NULL || in->fd == -1) {
108 bclose(in);
109 return NULL;
110 }
111 in->left = 0;
112 in->next = in->buf;
113 in->name = name;
114 return in;
115}
116
117/* load buffer from file, return -1 on read error, 0 or 1 on success, with
118 1 indicating that end-of-file was reached */
119local int bload(bin *in)
120{
121 ssize_t len;
122
123 if (in == NULL)
124 return -1;
125 if (in->left != 0)
126 return 0;
127 in->next = in->buf;
128 do {
129 len = read(in->fd, in->buf + in->left, CHUNK - in->left);
130 if (len < 0)
131 return -1;
132 in->left += (unsigned)len;
133 } while (len != 0 && in->left < CHUNK);
134 return len == 0 ? 1 : 0;
135}
136
137/* get a byte from the file, bail if end of file */
138#define bget(in) (in->left ? 0 : bload(in), \
139 in->left ? (in->left--, *(in->next)++) : \
140 bail("unexpected end of file on ", in->name))
141
142/* get a four-byte little-endian unsigned integer from file */
143local unsigned long bget4(bin *in)
144{
145 unsigned long val;
146
147 val = bget(in);
148 val += (unsigned long)(bget(in)) << 8;
149 val += (unsigned long)(bget(in)) << 16;
150 val += (unsigned long)(bget(in)) << 24;
151 return val;
152}
153
154/* skip bytes in file */
155local void bskip(bin *in, unsigned skip)
156{
157 /* check pointer */
158 if (in == NULL)
159 return;
160
161 /* easy case -- skip bytes in buffer */
162 if (skip <= in->left) {
163 in->left -= skip;
164 in->next += skip;
165 return;
166 }
167
168 /* skip what's in buffer, discard buffer contents */
169 skip -= in->left;
170 in->left = 0;
171
172 /* seek past multiples of CHUNK bytes */
173 if (skip > CHUNK) {
174 unsigned left;
175
176 left = skip & (CHUNK - 1);
177 if (left == 0) {
178 /* exact number of chunks: seek all the way minus one byte to check
179 for end-of-file with a read */
180 lseek(in->fd, skip - 1, SEEK_CUR);
181 if (read(in->fd, in->buf, 1) != 1)
182 bail("unexpected end of file on ", in->name);
183 return;
184 }
185
186 /* skip the integral chunks, update skip with remainder */
187 lseek(in->fd, skip - left, SEEK_CUR);
188 skip = left;
189 }
190
191 /* read more input and skip remainder */
192 bload(in);
193 if (skip > in->left)
194 bail("unexpected end of file on ", in->name);
195 in->left -= skip;
196 in->next += skip;
197}
198
199/* -- end of buffered input functions -- */
200
201/* skip the gzip header from file in */
202local void gzhead(bin *in)
203{
204 int flags;
205
206 /* verify gzip magic header and compression method */
207 if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
208 bail(in->name, " is not a valid gzip file");
209
210 /* get and verify flags */
211 flags = bget(in);
212 if ((flags & 0xe0) != 0)
213 bail("unknown reserved bits set in ", in->name);
214
215 /* skip modification time, extra flags, and os */
216 bskip(in, 6);
217
218 /* skip extra field if present */
219 if (flags & 4) {
220 unsigned len;
221
222 len = bget(in);
223 len += (unsigned)(bget(in)) << 8;
224 bskip(in, len);
225 }
226
227 /* skip file name if present */
228 if (flags & 8)
229 while (bget(in) != 0)
230 ;
231
232 /* skip comment if present */
233 if (flags & 16)
234 while (bget(in) != 0)
235 ;
236
237 /* skip header crc if present */
238 if (flags & 2)
239 bskip(in, 2);
240}
241
242/* write a four-byte little-endian unsigned integer to out */
243local void put4(unsigned long val, FILE *out)
244{
245 putc(val & 0xff, out);
246 putc((val >> 8) & 0xff, out);
247 putc((val >> 16) & 0xff, out);
248 putc((val >> 24) & 0xff, out);
249}
250
251/* Load up zlib stream from buffered input, bail if end of file */
252local void zpull(z_streamp strm, bin *in)
253{
254 if (in->left == 0)
255 bload(in);
256 if (in->left == 0)
257 bail("unexpected end of file on ", in->name);
258 strm->avail_in = in->left;
259 strm->next_in = in->next;
260}
261
262/* Write header for gzip file to out and initialize trailer. */
263local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
264{
265 fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
266 *crc = crc32(0L, Z_NULL, 0);
267 *tot = 0;
268}
269
270/* Copy the compressed data from name, zeroing the last block bit of the last
271 block if clr is true, and adding empty blocks as needed to get to a byte
272 boundary. If clr is false, then the last block becomes the last block of
273 the output, and the gzip trailer is written. crc and tot maintains the
274 crc and length (modulo 2^32) of the output for the trailer. The resulting
275 gzip file is written to out. gzinit() must be called before the first call
276 of gzcopy() to write the gzip header and to initialize crc and tot. */
277local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
278 FILE *out)
279{
280 int ret; /* return value from zlib functions */
281 int pos; /* where the "last block" bit is in byte */
282 int last; /* true if processing the last block */
283 bin *in; /* buffered input file */
284 unsigned char *start; /* start of compressed data in buffer */
285 unsigned char *junk; /* buffer for uncompressed data -- discarded */
286 z_off_t len; /* length of uncompressed data (support > 4 GB) */
287 z_stream strm; /* zlib inflate stream */
288
289 /* open gzip file and skip header */
290 in = bopen(name);
291 if (in == NULL)
292 bail("could not open ", name);
293 gzhead(in);
294
295 /* allocate buffer for uncompressed data and initialize raw inflate
296 stream */
297 junk = malloc(CHUNK);
298 strm.zalloc = Z_NULL;
299 strm.zfree = Z_NULL;
300 strm.opaque = Z_NULL;
301 strm.avail_in = 0;
302 strm.next_in = Z_NULL;
303 ret = inflateInit2(&strm, -15);
304 if (junk == NULL || ret != Z_OK)
305 bail("out of memory", "");
306
307 /* inflate and copy compressed data, clear last-block bit if requested */
308 len = 0;
309 zpull(&strm, in);
310 start = strm.next_in;
311 last = start[0] & 1;
312 if (last && clr)
313 start[0] &= ~1;
314 strm.avail_out = 0;
315 for (;;) {
316 /* if input used and output done, write used input and get more */
317 if (strm.avail_in == 0 && strm.avail_out != 0) {
318 fwrite(start, 1, strm.next_in - start, out);
319 start = in->buf;
320 in->left = 0;
321 zpull(&strm, in);
322 }
323
324 /* decompress -- return early when end-of-block reached */
325 strm.avail_out = CHUNK;
326 strm.next_out = junk;
327 ret = inflate(&strm, Z_BLOCK);
328 switch (ret) {
329 case Z_MEM_ERROR:
330 bail("out of memory", "");
331 case Z_DATA_ERROR:
332 bail("invalid compressed data in ", in->name);
333 }
334
335 /* update length of uncompressed data */
336 len += CHUNK - strm.avail_out;
337
338 /* check for block boundary (only get this when block copied out) */
339 if (strm.data_type & 128) {
340 /* if that was the last block, then done */
341 if (last)
342 break;
343
344 /* number of unused bits in last byte */
345 pos = strm.data_type & 7;
346
347 /* find the next last-block bit */
348 if (pos != 0) {
349 /* next last-block bit is in last used byte */
350 pos = 0x100 >> pos;
351 last = strm.next_in[-1] & pos;
352 if (last && clr)
353 strm.next_in[-1] &= ~pos;
354 }
355 else {
356 /* next last-block bit is in next unused byte */
357 if (strm.avail_in == 0) {
358 /* don't have that byte yet -- get it */
359 fwrite(start, 1, strm.next_in - start, out);
360 start = in->buf;
361 in->left = 0;
362 zpull(&strm, in);
363 }
364 last = strm.next_in[0] & 1;
365 if (last && clr)
366 strm.next_in[0] &= ~1;
367 }
368 }
369 }
370
371 /* update buffer with unused input */
372 in->left = strm.avail_in;
373 in->next = strm.next_in;
374
375 /* copy used input, write empty blocks to get to byte boundary */
376 pos = strm.data_type & 7;
377 fwrite(start, 1, in->next - start - 1, out);
378 last = in->next[-1];
379 if (pos == 0 || !clr)
380 /* already at byte boundary, or last file: write last byte */
381 putc(last, out);
382 else {
383 /* append empty blocks to last byte */
384 last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
385 if (pos & 1) {
386 /* odd -- append an empty stored block */
387 putc(last, out);
388 if (pos == 1)
389 putc(0, out); /* two more bits in block header */
390 fwrite("\0\0\xff\xff", 1, 4, out);
391 }
392 else {
393 /* even -- append 1, 2, or 3 empty fixed blocks */
394 switch (pos) {
395 case 6:
396 putc(last | 8, out);
397 last = 0;
398 case 4:
399 putc(last | 0x20, out);
400 last = 0;
401 case 2:
402 putc(last | 0x80, out);
403 putc(0, out);
404 }
405 }
406 }
407
408 /* update crc and tot */
409 *crc = crc32_combine(*crc, bget4(in), len);
410 *tot += (unsigned long)len;
411
412 /* clean up */
413 inflateEnd(&strm);
414 free(junk);
415 bclose(in);
416
417 /* write trailer if this is the last gzip file */
418 if (!clr) {
419 put4(*crc, out);
420 put4(*tot, out);
421 }
422}
423
424/* join the gzip files on the command line, write result to stdout */
425int main(int argc, char **argv)
426{
427 unsigned long crc, tot; /* running crc and total uncompressed length */
428
429 /* skip command name */
430 argc--;
431 argv++;
432
433 /* show usage if no arguments */
434 if (argc == 0) {
435 fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
436 stderr);
437 return 0;
438 }
439
440 /* join gzip files on command line and write to stdout */
441 gzinit(&crc, &tot, stdout);
442 while (argc--)
443 gzcopy(*argv++, argc, &crc, &tot, stdout);
444
445 /* done */
446 return 0;
447}
diff --git a/examples/gzlog.c b/examples/gzlog.c
new file mode 100644
index 0000000..f71f817
--- /dev/null
+++ b/examples/gzlog.c
@@ -0,0 +1,413 @@
1/*
2 * gzlog.c
3 * Copyright (C) 2004 Mark Adler
4 * For conditions of distribution and use, see copyright notice in gzlog.h
5 * version 1.0, 26 Nov 2004
6 *
7 */
8
9#include <string.h> /* memcmp() */
10#include <stdlib.h> /* malloc(), free(), NULL */
11#include <sys/types.h> /* size_t, off_t */
12#include <unistd.h> /* read(), close(), sleep(), ftruncate(), */
13 /* lseek() */
14#include <fcntl.h> /* open() */
15#include <sys/file.h> /* flock() */
16#include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */
17
18#include "gzlog.h" /* interface */
19#define local static
20
21/* log object structure */
22typedef struct {
23 int id; /* object identifier */
24 int fd; /* log file descriptor */
25 off_t extra; /* offset of extra "ap" subfield */
26 off_t mark_off; /* offset of marked data */
27 off_t last_off; /* offset of last block */
28 unsigned long crc; /* uncompressed crc */
29 unsigned long len; /* uncompressed length (modulo 2^32) */
30 unsigned stored; /* length of current stored block */
31} gz_log;
32
33#define GZLOGID 19334 /* gz_log object identifier */
34
35#define LOCK_RETRY 1 /* retry lock once a second */
36#define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */
37
38/* acquire a lock on a file */
39local int lock(int fd)
40{
41 int patience;
42
43 /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */
44 patience = LOCK_PATIENCE;
45 do {
46 if (flock(fd, LOCK_EX + LOCK_NB) == 0)
47 return 0;
48 (void)sleep(LOCK_RETRY);
49 patience -= LOCK_RETRY;
50 } while (patience > 0);
51
52 /* we've run out of patience -- give up */
53 return -1;
54}
55
56/* release lock */
57local void unlock(int fd)
58{
59 (void)flock(fd, LOCK_UN);
60}
61
62/* release a log object */
63local void log_clean(gz_log *log)
64{
65 unlock(log->fd);
66 (void)close(log->fd);
67 free(log);
68}
69
70/* read an unsigned long from a byte buffer little-endian */
71local unsigned long make_ulg(unsigned char *buf)
72{
73 int n;
74 unsigned long val;
75
76 val = (unsigned long)(*buf++);
77 for (n = 8; n < 32; n += 8)
78 val += (unsigned long)(*buf++) << n;
79 return val;
80}
81
82/* read an off_t from a byte buffer little-endian */
83local off_t make_off(unsigned char *buf)
84{
85 int n;
86 off_t val;
87
88 val = (off_t)(*buf++);
89 for (n = 8; n < 64; n += 8)
90 val += (off_t)(*buf++) << n;
91 return val;
92}
93
94/* write an unsigned long little-endian to byte buffer */
95local void dice_ulg(unsigned long val, unsigned char *buf)
96{
97 int n;
98
99 for (n = 0; n < 4; n++) {
100 *buf++ = val & 0xff;
101 val >>= 8;
102 }
103}
104
105/* write an off_t little-endian to byte buffer */
106local void dice_off(off_t val, unsigned char *buf)
107{
108 int n;
109
110 for (n = 0; n < 8; n++) {
111 *buf++ = val & 0xff;
112 val >>= 8;
113 }
114}
115
116/* initial, empty gzip file for appending */
117local char empty_gz[] = {
118 0x1f, 0x8b, /* magic gzip id */
119 8, /* compression method is deflate */
120 4, /* there is an extra field */
121 0, 0, 0, 0, /* no modification time provided */
122 0, 0xff, /* no extra flags, no OS */
123 20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */
124 32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */
125 32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */
126 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */
127 0, 0, 0, 0, /* crc */
128 0, 0, 0, 0 /* uncompressed length */
129};
130
131/* initialize a log object with locking */
132void *gzlog_open(char *path)
133{
134 unsigned xlen;
135 unsigned char temp[20];
136 unsigned sub_len;
137 int good;
138 gz_log *log;
139
140 /* allocate log structure */
141 log = malloc(sizeof(gz_log));
142 if (log == NULL)
143 return NULL;
144 log->id = GZLOGID;
145
146 /* open file, creating it if necessary, and locking it */
147 log->fd = open(path, O_RDWR | O_CREAT, 0600);
148 if (log->fd < 0) {
149 free(log);
150 return NULL;
151 }
152 if (lock(log->fd)) {
153 close(log->fd);
154 free(log);
155 return NULL;
156 }
157
158 /* if file is empty, write new gzip stream */
159 if (lseek(log->fd, 0, SEEK_END) == 0) {
160 if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) {
161 log_clean(log);
162 return NULL;
163 }
164 }
165
166 /* check gzip header */
167 (void)lseek(log->fd, 0, SEEK_SET);
168 if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f ||
169 temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) {
170 log_clean(log);
171 return NULL;
172 }
173
174 /* process extra field to find "ap" sub-field */
175 xlen = temp[10] + (temp[11] << 8);
176 good = 0;
177 while (xlen) {
178 if (xlen < 4 || read(log->fd, temp, 4) != 4)
179 break;
180 sub_len = temp[2];
181 sub_len += temp[3] << 8;
182 xlen -= 4;
183 if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) {
184 good = 1;
185 break;
186 }
187 if (xlen < sub_len)
188 break;
189 (void)lseek(log->fd, sub_len, SEEK_CUR);
190 xlen -= sub_len;
191 }
192 if (!good) {
193 log_clean(log);
194 return NULL;
195 }
196
197 /* read in "ap" sub-field */
198 log->extra = lseek(log->fd, 0, SEEK_CUR);
199 if (read(log->fd, temp, 16) != 16) {
200 log_clean(log);
201 return NULL;
202 }
203 log->mark_off = make_off(temp);
204 log->last_off = make_off(temp + 8);
205
206 /* get crc, length of gzip file */
207 (void)lseek(log->fd, log->last_off, SEEK_SET);
208 if (read(log->fd, temp, 13) != 13 ||
209 memcmp(temp, "\001\000\000\377\377", 5) != 0) {
210 log_clean(log);
211 return NULL;
212 }
213 log->crc = make_ulg(temp + 5);
214 log->len = make_ulg(temp + 9);
215
216 /* set up to write over empty last block */
217 (void)lseek(log->fd, log->last_off + 5, SEEK_SET);
218 log->stored = 0;
219 return (void *)log;
220}
221
222/* maximum amount to put in a stored block before starting a new one */
223#define MAX_BLOCK 16384
224
225/* write a block to a log object */
226int gzlog_write(void *obj, char *data, size_t len)
227{
228 size_t some;
229 unsigned char temp[5];
230 gz_log *log;
231
232 /* check object */
233 log = (gz_log *)obj;
234 if (log == NULL || log->id != GZLOGID)
235 return 1;
236
237 /* write stored blocks until all of the input is written */
238 do {
239 some = MAX_BLOCK - log->stored;
240 if (some > len)
241 some = len;
242 if (write(log->fd, data, some) != some)
243 return 1;
244 log->crc = crc32(log->crc, data, some);
245 log->len += some;
246 len -= some;
247 data += some;
248 log->stored += some;
249
250 /* if the stored block is full, end it and start another */
251 if (log->stored == MAX_BLOCK) {
252 (void)lseek(log->fd, log->last_off, SEEK_SET);
253 temp[0] = 0;
254 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
255 temp + 1);
256 if (write(log->fd, temp, 5) != 5)
257 return 1;
258 log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
259 (void)lseek(log->fd, 5, SEEK_CUR);
260 log->stored = 0;
261 }
262 } while (len);
263 return 0;
264}
265
266/* recompress the remaining stored deflate data in place */
267local int recomp(gz_log *log)
268{
269 z_stream strm;
270 size_t len, max;
271 unsigned char *in;
272 unsigned char *out;
273 unsigned char temp[16];
274
275 /* allocate space and read it all in (it's around 1 MB) */
276 len = log->last_off - log->mark_off;
277 max = len + (len >> 12) + (len >> 14) + 11;
278 out = malloc(max);
279 if (out == NULL)
280 return 1;
281 in = malloc(len);
282 if (in == NULL) {
283 free(out);
284 return 1;
285 }
286 (void)lseek(log->fd, log->mark_off, SEEK_SET);
287 if (read(log->fd, in, len) != len) {
288 free(in);
289 free(out);
290 return 1;
291 }
292
293 /* recompress in memory, decoding stored data as we go */
294 /* note: this assumes that unsigned is four bytes or more */
295 /* consider not making that assumption */
296 strm.zalloc = Z_NULL;
297 strm.zfree = Z_NULL;
298 strm.opaque = Z_NULL;
299 if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8,
300 Z_DEFAULT_STRATEGY) != Z_OK) {
301 free(in);
302 free(out);
303 return 1;
304 }
305 strm.next_in = in;
306 strm.avail_out = max;
307 strm.next_out = out;
308 while (len >= 5) {
309 if (strm.next_in[0] != 0)
310 break;
311 strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8);
312 strm.next_in += 5;
313 len -= 5;
314 if (strm.avail_in != 0) {
315 if (len < strm.avail_in)
316 break;
317 len -= strm.avail_in;
318 (void)deflate(&strm, Z_NO_FLUSH);
319 if (strm.avail_in != 0 || strm.avail_out == 0)
320 break;
321 }
322 }
323 (void)deflate(&strm, Z_SYNC_FLUSH);
324 (void)deflateEnd(&strm);
325 free(in);
326 if (len != 0 || strm.avail_out == 0) {
327 free(out);
328 return 1;
329 }
330
331 /* overwrite stored data with compressed data */
332 (void)lseek(log->fd, log->mark_off, SEEK_SET);
333 len = max - strm.avail_out;
334 if (write(log->fd, out, len) != len) {
335 free(out);
336 return 1;
337 }
338 free(out);
339
340 /* write last empty block, crc, and length */
341 log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR);
342 temp[0] = 1;
343 dice_ulg(0xffffL << 16, temp + 1);
344 dice_ulg(log->crc, temp + 5);
345 dice_ulg(log->len, temp + 9);
346 if (write(log->fd, temp, 13) != 13)
347 return 1;
348
349 /* truncate file to discard remaining stored data and old trailer */
350 ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR));
351
352 /* update extra field to point to new last empty block */
353 (void)lseek(log->fd, log->extra, SEEK_SET);
354 dice_off(log->mark_off, temp);
355 dice_off(log->last_off, temp + 8);
356 if (write(log->fd, temp, 16) != 16)
357 return 1;
358 return 0;
359}
360
361/* maximum accumulation of stored blocks before compressing */
362#define MAX_STORED 1048576
363
364/* close log object */
365int gzlog_close(void *obj)
366{
367 unsigned char temp[8];
368 gz_log *log;
369
370 /* check object */
371 log = (gz_log *)obj;
372 if (log == NULL || log->id != GZLOGID)
373 return 1;
374
375 /* go to start of most recent block being written */
376 (void)lseek(log->fd, log->last_off, SEEK_SET);
377
378 /* if some stuff was put there, update block */
379 if (log->stored) {
380 temp[0] = 0;
381 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
382 temp + 1);
383 if (write(log->fd, temp, 5) != 5)
384 return 1;
385 log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
386 }
387
388 /* write last block (empty) */
389 if (write(log->fd, "\001\000\000\377\377", 5) != 5)
390 return 1;
391
392 /* write updated crc and uncompressed length */
393 dice_ulg(log->crc, temp);
394 dice_ulg(log->len, temp + 4);
395 if (write(log->fd, temp, 8) != 8)
396 return 1;
397
398 /* put offset of that last block in gzip extra block */
399 (void)lseek(log->fd, log->extra + 8, SEEK_SET);
400 dice_off(log->last_off, temp);
401 if (write(log->fd, temp, 8) != 8)
402 return 1;
403
404 /* if more than 1 MB stored, then time to compress it */
405 if (log->last_off - log->mark_off > MAX_STORED) {
406 if (recomp(log))
407 return 1;
408 }
409
410 /* unlock and close file */
411 log_clean(log);
412 return 0;
413}
diff --git a/examples/gzlog.h b/examples/gzlog.h
new file mode 100644
index 0000000..a800bd5
--- /dev/null
+++ b/examples/gzlog.h
@@ -0,0 +1,58 @@
1/* gzlog.h
2 Copyright (C) 2004 Mark Adler, all rights reserved
3 version 1.0, 26 Nov 2004
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the author be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20
21 Mark Adler madler@alumni.caltech.edu
22 */
23
24/*
25 The gzlog object allows writing short messages to a gzipped log file,
26 opening the log file locked for small bursts, and then closing it. The log
27 object works by appending stored data to the gzip file until 1 MB has been
28 accumulated. At that time, the stored data is compressed, and replaces the
29 uncompressed data in the file. The log file is truncated to its new size at
30 that time. After closing, the log file is always valid gzip file that can
31 decompressed to recover what was written.
32
33 A gzip header "extra" field contains two file offsets for appending. The
34 first points to just after the last compressed data. The second points to
35 the last stored block in the deflate stream, which is empty. All of the
36 data between those pointers is uncompressed.
37 */
38
39/* Open a gzlog object, creating the log file if it does not exist. Return
40 NULL on error. Note that gzlog_open() could take a long time to return if
41 there is difficulty in locking the file. */
42void *gzlog_open(char *path);
43
44/* Write to a gzlog object. Return non-zero on error. This function will
45 simply write data to the file uncompressed. Compression of the data
46 will not occur until gzlog_close() is called. It is expected that
47 gzlog_write() is used for a short message, and then gzlog_close() is
48 called. If a large amount of data is to be written, then the application
49 should write no more than 1 MB at a time with gzlog_write() before
50 calling gzlog_close() and then gzlog_open() again. */
51int gzlog_write(void *log, char *data, size_t len);
52
53/* Close a gzlog object. Return non-zero on error. The log file is locked
54 until this function is called. This function will compress stored data
55 at the end of the gzip file if at least 1 MB has been accumulated. Note
56 that the file will not be a valid gzip file until this function completes.
57 */
58int gzlog_close(void *log);
diff --git a/examples/zlib_how.html b/examples/zlib_how.html
new file mode 100644
index 0000000..b2bda6b
--- /dev/null
+++ b/examples/zlib_how.html
@@ -0,0 +1,522 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
2 "http://www.w3.org/TR/REC-html40/loose.dtd">
3<html>
4<head>
5<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
6<title>zlib Usage Example</title>
7<!-- Copyright (c) 2004 Mark Adler. -->
8</head>
9<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000">
10<h2 align="center"> zlib Usage Example </h2>
11We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used.
12Users wonder when they should provide more input, when they should use more output,
13what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and
14so on. So for those who have read <tt>zlib.h</tt> (a few times), and
15would like further edification, below is an annotated example in C of simple routines to compress and decompress
16from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The
17annotations are interspersed between lines of the code. So please read between the lines.
18We hope this helps explain some of the intricacies of <em>zlib</em>.
19<p>
20Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>:
21<pre><b>
22/* zpipe.c: example of proper use of zlib's inflate() and deflate()
23 Not copyrighted -- provided to the public domain
24 Version 1.2 9 November 2004 Mark Adler */
25
26/* Version history:
27 1.0 30 Oct 2004 First version
28 1.1 8 Nov 2004 Add void casting for unused return values
29 Use switch statement for inflate() return values
30 1.2 9 Nov 2004 Add assertions to document zlib guarantees
31 */
32</b></pre><!-- -->
33We now include the header files for the required definitions. From
34<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>,
35<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and
36<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use
37<tt>strcmp()</tt> for command line argument processing.
38From <tt>assert.h</tt> we use the <tt>assert()</tt> macro.
39From <tt>zlib.h</tt>
40we use the basic compression functions <tt>deflateInit()</tt>,
41<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression
42functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and
43<tt>inflateEnd()</tt>.
44<pre><b>
45#include &lt;stdio.h&gt;
46#include &lt;string.h&gt;
47#include &lt;assert.h&gt;
48#include "zlib.h"
49</b></pre><!-- -->
50<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data
51from the <em>zlib</em> routines. Larger buffer sizes would be more efficient,
52especially for <tt>inflate()</tt>. If the memory is available, buffers sizes
53on the order of 128K or 256K bytes should be used.
54<pre><b>
55#define CHUNK 16384
56</b></pre><!-- -->
57The <tt>def()</tt> routine compresses data from an input file to an output file. The output data
58will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em>
59formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as
60a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast
61check value to verify the integrity of the uncompressed data after decoding.
62<pre><b>
63/* Compress from file source to file dest until EOF on source.
64 def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
65 allocated for processing, Z_STREAM_ERROR if an invalid compression
66 level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
67 version of the library linked do not match, or Z_ERRNO if there is
68 an error reading or writing the files. */
69int def(FILE *source, FILE *dest, int level)
70{
71</b></pre>
72Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em>
73return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>,
74which is either no flushing, or flush to completion after the end of the input file is reached.
75<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure
76is used to pass information to and from the <em>zlib</em> routines, and to maintain the
77<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for
78<tt>deflate()</tt>.
79<pre><b>
80 int ret, flush;
81 unsigned have;
82 z_stream strm;
83 char in[CHUNK];
84 char out[CHUNK];
85</b></pre><!-- -->
86The first thing we do is to initialize the <em>zlib</em> state for compression using
87<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>.
88The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt>
89structure must be initialized before calling <tt>deflateInit()</tt>. Here they are
90set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use
91the default memory allocation routines. An application may also choose to provide
92custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the
93order of 256K bytes for the internal state.
94(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.)
95<p>
96<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and
97the compression level, which is an integer in the range of -1 to 9. Lower compression
98levels result in faster execution, but less compression. Higher levels result in
99greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION,
100equal to -1,
101provides a good compromise between compression and speed and is equivalent to level 6.
102Level 0 actually does no compression at all, and in fact expands the data slightly to produce
103the <em>zlib</em> format (it is not a byte-for-byte copy of the input).
104More advanced applications of <em>zlib</em>
105may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how
106much memory will be used, at some price in compression. Or it may need to request a
107<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw
108encoding with no header or trailer at all.
109<p>
110We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant
111<tt>Z_OK</tt> to make sure that it was able to
112allocate memory for the internal state, and that the provided arguments were valid.
113<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt>
114file came from matches the version of <em>zlib</em> actually linked with the program. This
115is especially important for environments in which <em>zlib</em> is a shared library.
116<p>
117Note that an application can initialize multiple, independent <em>zlib</em> streams, which can
118operate in parallel. The state information maintained in the structure allows the <em>zlib</em>
119routines to be reentrant.
120<pre><b>
121 /* allocate deflate state */
122 strm.zalloc = Z_NULL;
123 strm.zfree = Z_NULL;
124 strm.opaque = Z_NULL;
125 ret = deflateInit(&amp;strm, level);
126 if (ret != Z_OK)
127 return ret;
128</b></pre><!-- -->
129With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop
130reads all of the input file and exits at the bottom of the loop once end-of-file is reached.
131This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the
132input data has been processed and that all of the output data has been generated and consumed
133before we fall out of the loop at the bottom.
134<pre><b>
135 /* compress until end of file */
136 do {
137</b></pre>
138We start off by reading data from the input file. The number of bytes read is put directly
139into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also
140check to see if end-of-file on the input has been reached. If we are at the end of file, then <tt>flush</tt> is set to the
141<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to
142indicate that this is the last chunk of input data to compress. We need to use <tt>feof()</tt>
143to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read. The
144reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss
145the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish
146up the compressed stream. If we are not yet at the end of the input, then the <em>zlib</em>
147constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still
148in the middle of the uncompressed data.
149<p>
150If there is an error in reading from the input file, the process is aborted with
151<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning
152the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called
153at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or
154<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is
155no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail.
156<pre><b>
157 strm.avail_in = fread(in, 1, CHUNK, source);
158 if (ferror(source)) {
159 (void)deflateEnd(&amp;strm);
160 return Z_ERRNO;
161 }
162 flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
163 strm.next_in = in;
164</b></pre><!-- -->
165The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then
166keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more
167new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e.,
168<tt>avail_in</tt> will be zero.
169<pre><b>
170 /* run deflate() on input until output buffer not full, finish
171 compression if all of source has been read in */
172 do {
173</b></pre>
174Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number
175of available output bytes and <tt>next_out</tt> to a pointer to that space.
176<pre><b>
177 strm.avail_out = CHUNK;
178 strm.next_out = out;
179</b></pre>
180Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the
181<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as
182<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then
183updated past the input data consumed and the output data written. It is the amount of
184output space available that may limit how much input is consumed.
185Hence the inner loop to make sure that
186all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt>
187and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those
188between <tt>deflate()</tt> calls until it's all used up.
189<p>
190The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing
191the input and output information and the internal compression engine state, and a parameter
192indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume
193several K bytes of input data before producing any output (except for the header), in order
194to accumulate statistics on the data for optimum compression. It will then put out a burst of
195compressed data, and proceed to consume more input before the next burst. Eventually,
196<tt>deflate()</tt>
197must be told to terminate the stream, complete the compression with provided input data, and
198write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long
199as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided,
200<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how
201much output space is provided, <tt>deflate()</tt> may have to be called several times until it
202has provided the complete compressed stream, even after it has consumed all of the input. The flush
203parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls.
204<p>
205There are other values of the flush parameter that are used in more advanced applications. You can
206force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided
207so far, even if it wouldn't have otherwise, for example to control data latency on a link with
208compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to
209that point so that what follows can be decompressed independently, for example for random access
210applications. Both requests will degrade compression by an amount depending on how often such
211requests are made.
212<p>
213<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why
214not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through
215<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are
216<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt>
217is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of
218<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt>
219until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not
220initialized properly, but we did initialize it properly. There is no harm in checking for
221<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some
222other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state.
223<tt>Z_BUF_ERROR</tt> will be explained further below, but
224suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume
225more input or produce more output. <tt>deflate()</tt> can be called again with more output space
226or more available input, which it will be in this code.
227<pre><b>
228 ret = deflate(&amp;strm, flush); /* no bad return value */
229 assert(ret != Z_STREAM_ERROR); /* state not clobbered */
230</b></pre>
231Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the
232difference between how much space was provided before the call, and how much output space
233is still available after the call. Then that data, if any, is written to the output file.
234We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there
235is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak.
236<pre><b>
237 have = CHUNK - strm.avail_out;
238 if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
239 (void)deflateEnd(&amp;strm);
240 return Z_ERRNO;
241 }
242</b></pre>
243The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the
244provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with
245the provided input, and that all of that input has been consumed. We can then fall out of this
246loop and reuse the input buffer.
247<p>
248The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill
249the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that
250<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer!
251<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can.
252As far as we know, <tt>deflate()</tt>
253has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output
254at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call
255wasn't able to do anything, either consume input or produce output, and so it returns
256<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at
257all. Now we finally have the desired indication that <tt>deflate()</tt> is really done,
258and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>.
259<p>
260With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will
261complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return
262<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing
263until the state is reinitialized.
264<p>
265Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt>
266instead of the single inner loop we have here. The first loop would call
267without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call
268<tt>deflate()</tt> with no more
269data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this
270example, that can be avoided by simply keeping track of the current flush state.
271<pre><b>
272 } while (strm.avail_out == 0);
273 assert(strm.avail_in == 0); /* all input will be used */
274</b></pre><!-- -->
275Now we check to see if we have already processed all of the input file. That information was
276saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so,
277then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt>
278from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was
279consumed and all of the output was generated.
280<pre><b>
281 /* done when last data in file processed */
282 } while (flush != Z_FINISH);
283 assert(ret == Z_STREAM_END); /* stream will be complete */
284</b></pre><!-- -->
285The process is complete, but we still need to deallocate the state to avoid a memory leak
286(or rather more like a memory hemorrhage if you didn't do this). Then
287finally we can return with a happy return value.
288<pre><b>
289 /* clean up and return */
290 (void)deflateEnd(&amp;strm);
291 return Z_OK;
292}
293</b></pre><!-- -->
294Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt>
295decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the
296uncompressed data to the output file. Much of the discussion above for <tt>def()</tt>
297applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between
298the two.
299<pre><b>
300/* Decompress from file source to file dest until stream ends or EOF.
301 inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
302 allocated for processing, Z_DATA_ERROR if the deflate data is
303 invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
304 the version of the library linked do not match, or Z_ERRNO if there
305 is an error reading or writing the files. */
306int inf(FILE *source, FILE *dest)
307{
308</b></pre>
309The local variables have the same functionality as they do for <tt>def()</tt>. The
310only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt>
311can tell from the <em>zlib</em> stream itself when the stream is complete.
312<pre><b>
313 int ret;
314 unsigned have;
315 z_stream strm;
316 char in[CHUNK];
317 char out[CHUNK];
318</b></pre><!-- -->
319The initialization of the state is the same, except that there is no compression level,
320of course, and two more elements of the structure are initialized. <tt>avail_in</tt>
321and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This
322is because the application has the option to provide the start of the zlib stream in
323order for <tt>inflateInit()</tt> to have access to information about the compression
324method to aid in memory allocation. In the current implementation of <em>zlib</em>
325(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of
326<tt>inflate()</tt> anyway. However those fields must be initialized since later versions
327of <em>zlib</em> that provide more compression methods may take advantage of this interface.
328In any case, no decompression is performed by <tt>inflateInit()</tt>, so the
329<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling.
330<p>
331Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to
332indicate that no input data is being provided.
333<pre><b>
334 /* allocate inflate state */
335 strm.zalloc = Z_NULL;
336 strm.zfree = Z_NULL;
337 strm.opaque = Z_NULL;
338 strm.avail_in = 0;
339 strm.next_in = Z_NULL;
340 ret = inflateInit(&amp;strm);
341 if (ret != Z_OK)
342 return ret;
343</b></pre><!-- -->
344The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates
345that it has reached the end of the compressed data and has produced all of the uncompressed
346output. This is in contrast to <tt>def()</tt> which processes all of the input file.
347If end-of-file is reached before the compressed data self-terminates, then the compressed
348data is incomplete and an error is returned.
349<pre><b>
350 /* decompress until deflate stream ends or end of file */
351 do {
352</b></pre>
353We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the
354end of the input file, then we leave the outer loop and report an error, since the
355compressed data is incomplete. Note that we may read more data than is eventually consumed
356by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream.
357For applications where <em>zlib</em> streams are embedded in other data, this routine would
358need to be modified to return the unused data, or at least indicate how much of the input
359data was not used, so the application would know where to pick up after the <em>zlib</em> stream.
360<pre><b>
361 strm.avail_in = fread(in, 1, CHUNK, source);
362 if (ferror(source)) {
363 (void)inflateEnd(&amp;strm);
364 return Z_ERRNO;
365 }
366 if (strm.avail_in == 0)
367 break;
368 strm.next_in = in;
369</b></pre><!-- -->
370The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to
371keep calling <tt>inflate()</tt> until has generated all of the output it can with the
372provided input.
373<pre><b>
374 /* run inflate() on input until output buffer not full */
375 do {
376</b></pre>
377Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>.
378<pre><b>
379 strm.avail_out = CHUNK;
380 strm.next_out = out;
381</b></pre>
382Now we run the decompression engine itself. There is no need to adjust the flush parameter, since
383the <em>zlib</em> format is self-terminating. The main difference here is that there are
384return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt>
385indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format,
386which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was
387corrupted somewhere along the way since it was compressed. The other error to be processed is
388<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt>
389needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>.
390<p>
391Advanced applications may use
392<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the
393first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt>
394requests that that dictionary be provided before it can start to decompress. Without the dictionary,
395correct decompression is not possible. For this routine, we have no idea what the dictionary is,
396so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>.
397<p>
398<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here,
399but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be
400checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be
401checked for later.
402<pre><b>
403 ret = inflate(&amp;strm, Z_NO_FLUSH);
404 assert(ret != Z_STREAM_ERROR); /* state not clobbered */
405 switch (ret) {
406 case Z_NEED_DICT:
407 ret = Z_DATA_ERROR; /* and fall through */
408 case Z_DATA_ERROR:
409 case Z_MEM_ERROR:
410 (void)inflateEnd(&amp;strm);
411 return ret;
412 }
413</b></pre>
414The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>.
415<pre><b>
416 have = CHUNK - strm.avail_out;
417 if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
418 (void)inflateEnd(&amp;strm);
419 return Z_ERRNO;
420 }
421</b></pre>
422The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated
423by not filling the output buffer, just as for <tt>deflate()</tt>.
424<pre><b>
425 } while (strm.avail_out == 0);
426 assert(strm.avail_in == 0); /* all input will be used */
427</b></pre><!-- -->
428The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the
429end of the input <em>zlib</em> stream, has completed the decompression and integrity
430check, and has provided all of the output. This is indicated by the <tt>inflate()</tt>
431return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt>
432equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end
433of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the
434loop continues to read more input.
435<pre><b>
436 /* done when inflate() says it's done */
437 } while (ret != Z_STREAM_END);
438</b></pre><!-- -->
439At this point, decompression successfully completed, or we broke out of the loop due to no
440more data being available from the input file. If the last <tt>inflate()</tt> return value
441is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error
442is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt>
443is called first to avoid a memory leak.
444<pre><b>
445 /* clean up and return */
446 (void)inflateEnd(&amp;strm);
447 return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
448}
449</b></pre><!-- -->
450That ends the routines that directly use <em>zlib</em>. The following routines make this
451a command-line program by running data through the above routines from <tt>stdin</tt> to
452<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>.
453<p>
454<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt>
455and <tt>inf()</tt>, as detailed in their comments above, and print out an error message.
456Note that these are only a subset of the possible return values from <tt>deflate()</tt>
457and <tt>inflate()</tt>.
458<pre><b>
459/* report a zlib or i/o error */
460void zerr(int ret)
461{
462 fputs("zpipe: ", stderr);
463 switch (ret) {
464 case Z_ERRNO:
465 if (ferror(stdin))
466 fputs("error reading stdin\n", stderr);
467 if (ferror(stdout))
468 fputs("error writing stdout\n", stderr);
469 break;
470 case Z_STREAM_ERROR:
471 fputs("invalid compression level\n", stderr);
472 break;
473 case Z_DATA_ERROR:
474 fputs("invalid or incomplete deflate data\n", stderr);
475 break;
476 case Z_MEM_ERROR:
477 fputs("out of memory\n", stderr);
478 break;
479 case Z_VERSION_ERROR:
480 fputs("zlib version mismatch!\n", stderr);
481 }
482}
483</b></pre><!-- -->
484Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The
485<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if
486no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other
487arguments are provided, no compression or decompression is performed. Instead a usage
488message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and
489<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress.
490<pre><b>
491/* compress or decompress from stdin to stdout */
492int main(int argc, char **argv)
493{
494 int ret;
495
496 /* do compression if no arguments */
497 if (argc == 1) {
498 ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
499 if (ret != Z_OK)
500 zerr(ret);
501 return ret;
502 }
503
504 /* do decompression if -d specified */
505 else if (argc == 2 &amp;&amp; strcmp(argv[1], "-d") == 0) {
506 ret = inf(stdin, stdout);
507 if (ret != Z_OK)
508 zerr(ret);
509 return ret;
510 }
511
512 /* otherwise, report usage */
513 else {
514 fputs("zpipe usage: zpipe [-d] &lt; source &gt; dest\n", stderr);
515 return 1;
516 }
517}
518</b></pre>
519<hr>
520<i>Copyright (c) 2004 by Mark Adler<br>Last modified 13 November 2004</i>
521</body>
522</html>
diff --git a/examples/zpipe.c b/examples/zpipe.c
new file mode 100644
index 0000000..a602d59
--- /dev/null
+++ b/examples/zpipe.c
@@ -0,0 +1,191 @@
1/* zpipe.c: example of proper use of zlib's inflate() and deflate()
2 Not copyrighted -- provided to the public domain
3 Version 1.2 9 November 2004 Mark Adler */
4
5/* Version history:
6 1.0 30 Oct 2004 First version
7 1.1 8 Nov 2004 Add void casting for unused return values
8 Use switch statement for inflate() return values
9 1.2 9 Nov 2004 Add assertions to document zlib guarantees
10 */
11
12#include <stdio.h>
13#include <string.h>
14#include <assert.h>
15#include "zlib.h"
16
17#define CHUNK 16384
18
19/* Compress from file source to file dest until EOF on source.
20 def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
21 allocated for processing, Z_STREAM_ERROR if an invalid compression
22 level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
23 version of the library linked do not match, or Z_ERRNO if there is
24 an error reading or writing the files. */
25int def(FILE *source, FILE *dest, int level)
26{
27 int ret, flush;
28 unsigned have;
29 z_stream strm;
30 char in[CHUNK];
31 char out[CHUNK];
32
33 /* allocate deflate state */
34 strm.zalloc = Z_NULL;
35 strm.zfree = Z_NULL;
36 strm.opaque = Z_NULL;
37 ret = deflateInit(&strm, level);
38 if (ret != Z_OK)
39 return ret;
40
41 /* compress until end of file */
42 do {
43 strm.avail_in = fread(in, 1, CHUNK, source);
44 if (ferror(source)) {
45 (void)deflateEnd(&strm);
46 return Z_ERRNO;
47 }
48 flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
49 strm.next_in = in;
50
51 /* run deflate() on input until output buffer not full, finish
52 compression if all of source has been read in */
53 do {
54 strm.avail_out = CHUNK;
55 strm.next_out = out;
56 ret = deflate(&strm, flush); /* no bad return value */
57 assert(ret != Z_STREAM_ERROR); /* state not clobbered */
58 have = CHUNK - strm.avail_out;
59 if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
60 (void)deflateEnd(&strm);
61 return Z_ERRNO;
62 }
63 } while (strm.avail_out == 0);
64 assert(strm.avail_in == 0); /* all input will be used */
65
66 /* done when last data in file processed */
67 } while (flush != Z_FINISH);
68 assert(ret == Z_STREAM_END); /* stream will be complete */
69
70 /* clean up and return */
71 (void)deflateEnd(&strm);
72 return Z_OK;
73}
74
75/* Decompress from file source to file dest until stream ends or EOF.
76 inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
77 allocated for processing, Z_DATA_ERROR if the deflate data is
78 invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
79 the version of the library linked do not match, or Z_ERRNO if there
80 is an error reading or writing the files. */
81int inf(FILE *source, FILE *dest)
82{
83 int ret;
84 unsigned have;
85 z_stream strm;
86 char in[CHUNK];
87 char out[CHUNK];
88
89 /* allocate inflate state */
90 strm.zalloc = Z_NULL;
91 strm.zfree = Z_NULL;
92 strm.opaque = Z_NULL;
93 strm.avail_in = 0;
94 strm.next_in = Z_NULL;
95 ret = inflateInit(&strm);
96 if (ret != Z_OK)
97 return ret;
98
99 /* decompress until deflate stream ends or end of file */
100 do {
101 strm.avail_in = fread(in, 1, CHUNK, source);
102 if (ferror(source)) {
103 (void)inflateEnd(&strm);
104 return Z_ERRNO;
105 }
106 if (strm.avail_in == 0)
107 break;
108 strm.next_in = in;
109
110 /* run inflate() on input until output buffer not full */
111 do {
112 strm.avail_out = CHUNK;
113 strm.next_out = out;
114 ret = inflate(&strm, Z_NO_FLUSH);
115 assert(ret != Z_STREAM_ERROR); /* state not clobbered */
116 switch (ret) {
117 case Z_NEED_DICT:
118 ret = Z_DATA_ERROR; /* and fall through */
119 case Z_DATA_ERROR:
120 case Z_MEM_ERROR:
121 (void)inflateEnd(&strm);
122 return ret;
123 }
124 have = CHUNK - strm.avail_out;
125 if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
126 (void)inflateEnd(&strm);
127 return Z_ERRNO;
128 }
129 } while (strm.avail_out == 0);
130 assert(strm.avail_in == 0); /* all input will be used */
131
132 /* done when inflate() says it's done */
133 } while (ret != Z_STREAM_END);
134
135 /* clean up and return */
136 (void)inflateEnd(&strm);
137 return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
138}
139
140/* report a zlib or i/o error */
141void zerr(int ret)
142{
143 fputs("zpipe: ", stderr);
144 switch (ret) {
145 case Z_ERRNO:
146 if (ferror(stdin))
147 fputs("error reading stdin\n", stderr);
148 if (ferror(stdout))
149 fputs("error writing stdout\n", stderr);
150 break;
151 case Z_STREAM_ERROR:
152 fputs("invalid compression level\n", stderr);
153 break;
154 case Z_DATA_ERROR:
155 fputs("invalid or incomplete deflate data\n", stderr);
156 break;
157 case Z_MEM_ERROR:
158 fputs("out of memory\n", stderr);
159 break;
160 case Z_VERSION_ERROR:
161 fputs("zlib version mismatch!\n", stderr);
162 }
163}
164
165/* compress or decompress from stdin to stdout */
166int main(int argc, char **argv)
167{
168 int ret;
169
170 /* do compression if no arguments */
171 if (argc == 1) {
172 ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
173 if (ret != Z_OK)
174 zerr(ret);
175 return ret;
176 }
177
178 /* do decompression if -d specified */
179 else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
180 ret = inf(stdin, stdout);
181 if (ret != Z_OK)
182 zerr(ret);
183 return ret;
184 }
185
186 /* otherwise, report usage */
187 else {
188 fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
189 return 1;
190 }
191}