summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib')
-rw-r--r--contrib/README.contrib10
-rw-r--r--contrib/blast/Makefile8
-rw-r--r--contrib/blast/README4
-rw-r--r--contrib/blast/blast.c444
-rw-r--r--contrib/blast/blast.h71
-rw-r--r--contrib/blast/test.pkbin0 -> 8 bytes
-rw-r--r--contrib/blast/test.txt1
-rw-r--r--contrib/inflate86/inffast.S1095
-rw-r--r--contrib/puff/Makefile8
-rw-r--r--contrib/puff/README63
-rw-r--r--contrib/puff/puff.c833
-rw-r--r--contrib/puff/puff.h31
-rw-r--r--contrib/puff/zeros.rawbin0 -> 1213 bytes
13 files changed, 2566 insertions, 2 deletions
diff --git a/contrib/README.contrib b/contrib/README.contrib
index 7ad191c..fcee020 100644
--- a/contrib/README.contrib
+++ b/contrib/README.contrib
@@ -11,12 +11,18 @@ asm586/ and asm686/ by Brian Raiter <breadbox@muppetlabs.com>
11 asm code for Pentium and Pentium Pro 11 asm code for Pentium and Pentium Pro
12 See http://www.muppetlabs.com/~breadbox/software/assembly.html 12 See http://www.muppetlabs.com/~breadbox/software/assembly.html
13 13
14delphi/ by Bob Dellaca <bobdl@xtra.co.nz> 14blast/ by Mark Adler <madler@alumni.caltech.edu>
15 Decompressor for output of PKWare Data Compression Library
16
17delphi/ by Bob Dellaca <bobdl@xtra.co.nz>
15 Support for Delphi 18 Support for Delphi
16 19
17delphi2/ by Davide Moretti <dave@rimini.com> 20delphi2/ by Davide Moretti <dave@rimini.com>
18 Another support for C++Builder and Delphi 21 Another support for C++Builder and Delphi
19 22
23inflate86/ by Chris Anderson <christop@charm.net>
24 Tuned x86 gcc asm code to replace inflate_fast()
25
20minizip/ by Gilles Vollant <info@winimage.com> 26minizip/ by Gilles Vollant <info@winimage.com>
21 Mini zip and unzip based on zlib 27 Mini zip and unzip based on zlib
22 See http://www.winimage.com/zLibDll/unzip.html 28 See http://www.winimage.com/zLibDll/unzip.html
diff --git a/contrib/blast/Makefile b/contrib/blast/Makefile
new file mode 100644
index 0000000..9be80ba
--- /dev/null
+++ b/contrib/blast/Makefile
@@ -0,0 +1,8 @@
1blast: blast.c blast.h
2 cc -DTEST -o blast blast.c
3
4test: blast
5 blast < test.pk | cmp - test.txt
6
7clean:
8 rm -f blast blast.o
diff --git a/contrib/blast/README b/contrib/blast/README
new file mode 100644
index 0000000..e3a60b3
--- /dev/null
+++ b/contrib/blast/README
@@ -0,0 +1,4 @@
1Read blast.h for purpose and usage.
2
3Mark Adler
4madler@alumni.caltech.edu
diff --git a/contrib/blast/blast.c b/contrib/blast/blast.c
new file mode 100644
index 0000000..67dab4e
--- /dev/null
+++ b/contrib/blast/blast.c
@@ -0,0 +1,444 @@
1/* blast.c
2 * Copyright (C) 2003 Mark Adler
3 * For conditions of distribution and use, see copyright notice in blast.h
4 * version 1.1, 16 Feb 2003
5 *
6 * blast.c decompresses data compressed by the PKWare Compression Library.
7 * This function provides functionality similar to the explode() function of
8 * the PKWare library, hence the name "blast".
9 *
10 * This decompressor is based on the excellent format description provided by
11 * Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the
12 * example Ben provided in the post is incorrect. The distance 110001 should
13 * instead be 111000. When corrected, the example byte stream becomes:
14 *
15 * 00 04 82 24 25 8f 80 7f
16 *
17 * which decompresses to "AIAIAIAIAIAIA" (without the quotes).
18 */
19
20/*
21 * Change history:
22 *
23 * 1.0 12 Feb 2003 - First version
24 * 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data
25 */
26
27#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */
28#include "blast.h" /* prototype for blast() */
29
30#define local static /* for local function definitions */
31#define MAXBITS 13 /* maximum code length */
32#define MAXWIN 4096 /* maximum window size */
33
34/* input and output state */
35struct state {
36 /* input state */
37 blast_in infun; /* input function provided by user */
38 void *inhow; /* opaque information passed to infun() */
39 unsigned char *in; /* next input location */
40 unsigned left; /* available input at in */
41 int bitbuf; /* bit buffer */
42 int bitcnt; /* number of bits in bit buffer */
43
44 /* input limit error return state for bits() and decode() */
45 jmp_buf env;
46
47 /* output state */
48 blast_out outfun; /* output function provided by user */
49 void *outhow; /* opaque information passed to outfun() */
50 unsigned next; /* index of next write location in out[] */
51 int first; /* true to check distances (for first 4K) */
52 unsigned char out[MAXWIN]; /* output buffer and sliding window */
53};
54
55/*
56 * Return need bits from the input stream. This always leaves less than
57 * eight bits in the buffer. bits() works properly for need == 0.
58 *
59 * Format notes:
60 *
61 * - Bits are stored in bytes from the least significant bit to the most
62 * significant bit. Therefore bits are dropped from the bottom of the bit
63 * buffer, using shift right, and new bytes are appended to the top of the
64 * bit buffer, using shift left.
65 */
66local int bits(struct state *s, int need)
67{
68 int val; /* bit accumulator */
69
70 /* load at least need bits into val */
71 val = s->bitbuf;
72 while (s->bitcnt < need) {
73 if (s->left == 0) {
74 s->left = s->infun(s->inhow, &(s->in));
75 if (s->left == 0) longjmp(s->env, 1); /* out of input */
76 }
77 val |= (int)(*(s->in)++) << s->bitcnt; /* load eight bits */
78 s->left--;
79 s->bitcnt += 8;
80 }
81
82 /* drop need bits and update buffer, always zero to seven bits left */
83 s->bitbuf = val >> need;
84 s->bitcnt -= need;
85
86 /* return need bits, zeroing the bits above that */
87 return val & ((1 << need) - 1);
88}
89
90/*
91 * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of
92 * each length, which for a canonical code are stepped through in order.
93 * symbol[] are the symbol values in canonical order, where the number of
94 * entries is the sum of the counts in count[]. The decoding process can be
95 * seen in the function decode() below.
96 */
97struct huffman {
98 short *count; /* number of symbols of each length */
99 short *symbol; /* canonically ordered symbols */
100};
101
102/*
103 * Decode a code from the stream s using huffman table h. Return the symbol or
104 * a negative value if there is an error. If all of the lengths are zero, i.e.
105 * an empty code, or if the code is incomplete and an invalid code is received,
106 * then -9 is returned after reading MAXBITS bits.
107 *
108 * Format notes:
109 *
110 * - The codes as stored in the compressed data are bit-reversed relative to
111 * a simple integer ordering of codes of the same lengths. Hence below the
112 * bits are pulled from the compressed data one at a time and used to
113 * build the code value reversed from what is in the stream in order to
114 * permit simple integer comparisons for decoding.
115 *
116 * - The first code for the shortest length is all ones. Subsequent codes of
117 * the same length are simply integer decrements of the previous code. When
118 * moving up a length, a one bit is appended to the code. For a complete
119 * code, the last code of the longest length will be all zeros. To support
120 * this ordering, the bits pulled during decoding are inverted to apply the
121 * more "natural" ordering starting with all zeros and incrementing.
122 */
123local int decode(struct state *s, struct huffman *h)
124{
125 int len; /* current number of bits in code */
126 int code; /* len bits being decoded */
127 int first; /* first code of length len */
128 int count; /* number of codes of length len */
129 int index; /* index of first code of length len in symbol table */
130 int bitbuf; /* bits from stream */
131 int left; /* bits left in next or left to process */
132 short *next; /* next number of codes */
133
134 bitbuf = s->bitbuf;
135 left = s->bitcnt;
136 code = first = index = 0;
137 len = 1;
138 next = h->count + 1;
139 while (1) {
140 while (left--) {
141 code |= (bitbuf & 1) ^ 1; /* invert code */
142 bitbuf >>= 1;
143 count = *next++;
144 if (code < first + count) { /* if length len, return symbol */
145 s->bitbuf = bitbuf;
146 s->bitcnt = (s->bitcnt - len) & 7;
147 return h->symbol[index + (code - first)];
148 }
149 index += count; /* else update for next length */
150 first += count;
151 first <<= 1;
152 code <<= 1;
153 len++;
154 }
155 left = (MAXBITS+1) - len;
156 if (left == 0) break;
157 if (s->left == 0) {
158 s->left = s->infun(s->inhow, &(s->in));
159 if (s->left == 0) longjmp(s->env, 1); /* out of input */
160 }
161 bitbuf = *(s->in)++;
162 s->left--;
163 if (left > 8) left = 8;
164 }
165 return -9; /* ran out of codes */
166}
167
168/*
169 * Given a list of repeated code lengths rep[0..n-1], where each byte is a
170 * count (high four bits + 1) and a code length (low four bits), generate the
171 * list of code lengths. This compaction reduces the size of the object code.
172 * Then given the list of code lengths length[0..n-1] representing a canonical
173 * Huffman code for n symbols, construct the tables required to decode those
174 * codes. Those tables are the number of codes of each length, and the symbols
175 * sorted by length, retaining their original order within each length. The
176 * return value is zero for a complete code set, negative for an over-
177 * subscribed code set, and positive for an incomplete code set. The tables
178 * can be used if the return value is zero or positive, but they cannot be used
179 * if the return value is negative. If the return value is zero, it is not
180 * possible for decode() using that table to return an error--any stream of
181 * enough bits will resolve to a symbol. If the return value is positive, then
182 * it is possible for decode() using that table to return an error for received
183 * codes past the end of the incomplete lengths.
184 */
185local int construct(struct huffman *h, const unsigned char *rep, int n)
186{
187 int symbol; /* current symbol when stepping through length[] */
188 int len; /* current length when stepping through h->count[] */
189 int left; /* number of possible codes left of current length */
190 short offs[MAXBITS+1]; /* offsets in symbol table for each length */
191 short length[256]; /* code lengths */
192
193 /* convert compact repeat counts into symbol bit length list */
194 symbol = 0;
195 do {
196 len = *rep++;
197 left = (len >> 4) + 1;
198 len &= 15;
199 do {
200 length[symbol++] = len;
201 } while (--left);
202 } while (--n);
203 n = symbol;
204
205 /* count number of codes of each length */
206 for (len = 0; len <= MAXBITS; len++)
207 h->count[len] = 0;
208 for (symbol = 0; symbol < n; symbol++)
209 (h->count[length[symbol]])++; /* assumes lengths are within bounds */
210 if (h->count[0] == n) /* no codes! */
211 return 0; /* complete, but decode() will fail */
212
213 /* check for an over-subscribed or incomplete set of lengths */
214 left = 1; /* one possible code of zero length */
215 for (len = 1; len <= MAXBITS; len++) {
216 left <<= 1; /* one more bit, double codes left */
217 left -= h->count[len]; /* deduct count from possible codes */
218 if (left < 0) return left; /* over-subscribed--return negative */
219 } /* left > 0 means incomplete */
220
221 /* generate offsets into symbol table for each length for sorting */
222 offs[1] = 0;
223 for (len = 1; len < MAXBITS; len++)
224 offs[len + 1] = offs[len] + h->count[len];
225
226 /*
227 * put symbols in table sorted by length, by symbol order within each
228 * length
229 */
230 for (symbol = 0; symbol < n; symbol++)
231 if (length[symbol] != 0)
232 h->symbol[offs[length[symbol]]++] = symbol;
233
234 /* return zero for complete set, positive for incomplete set */
235 return left;
236}
237
238/*
239 * Decode PKWare Compression Library stream.
240 *
241 * Format notes:
242 *
243 * - First byte is 0 if literals are uncoded or 1 if they are coded. Second
244 * byte is 4, 5, or 6 for the number of extra bits in the distance code.
245 * This is the base-2 logarithm of the dictionary size minus six.
246 *
247 * - Compressed data is a combination of literals and length/distance pairs
248 * terminated by an end code. Literals are either Huffman coded or
249 * uncoded bytes. A length/distance pair is a coded length followed by a
250 * coded distance to represent a string that occurs earlier in the
251 * uncompressed data that occurs again at the current location.
252 *
253 * - A bit preceding a literal or length/distance pair indicates which comes
254 * next, 0 for literals, 1 for length/distance.
255 *
256 * - If literals are uncoded, then the next eight bits are the literal, in the
257 * normal bit order in th stream, i.e. no bit-reversal is needed. Similarly,
258 * no bit reversal is needed for either the length extra bits or the distance
259 * extra bits.
260 *
261 * - Literal bytes are simply written to the output. A length/distance pair is
262 * an instruction to copy previously uncompressed bytes to the output. The
263 * copy is from distance bytes back in the output stream, copying for length
264 * bytes.
265 *
266 * - Distances pointing before the beginning of the output data are not
267 * permitted.
268 *
269 * - Overlapped copies, where the length is greater than the distance, are
270 * allowed and common. For example, a distance of one and a length of 518
271 * simply copies the last byte 518 times. A distance of four and a length of
272 * twelve copies the last four bytes three times. A simple forward copy
273 * ignoring whether the length is greater than the distance or not implements
274 * this correctly.
275 */
276local int decomp(struct state *s)
277{
278 int lit; /* true if literals are coded */
279 int dict; /* log2(dictionary size) - 6 */
280 int symbol; /* decoded symbol, extra bits for distance */
281 int len; /* length for copy */
282 int dist; /* distance for copy */
283 int copy; /* copy counter */
284 unsigned char *from, *to; /* copy pointers */
285 static int virgin = 1; /* build tables once */
286 static short litcnt[MAXBITS+1], litsym[256]; /* litcode memory */
287 static short lencnt[MAXBITS+1], lensym[16]; /* lencode memory */
288 static short distcnt[MAXBITS+1], distsym[64]; /* distcode memory */
289 static struct huffman litcode = {litcnt, litsym}; /* length code */
290 static struct huffman lencode = {lencnt, lensym}; /* length code */
291 static struct huffman distcode = {distcnt, distsym};/* distance code */
292 /* bit lengths of literal codes */
293 static const unsigned char litlen[] = {
294 11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8,
295 9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5,
296 7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12,
297 8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27,
298 44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45,
299 44, 173};
300 /* bit lengths of length codes 0..15 */
301 static const unsigned char lenlen[] = {2, 35, 36, 53, 38, 23};
302 /* bit lengths of distance codes 0..63 */
303 static const unsigned char distlen[] = {2, 20, 53, 230, 247, 151, 248};
304 static const short base[16] = { /* base for length codes */
305 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264};
306 static const char extra[16] = { /* extra bits for length codes */
307 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8};
308
309 /* set up decoding tables (once--might not be thread-safe) */
310 if (virgin) {
311 construct(&litcode, litlen, sizeof(litlen));
312 construct(&lencode, lenlen, sizeof(lenlen));
313 construct(&distcode, distlen, sizeof(distlen));
314 virgin = 0;
315 }
316
317 /* read header */
318 lit = bits(s, 8);
319 if (lit > 1) return -1;
320 dict = bits(s, 8);
321 if (dict < 4 || dict > 6) return -2;
322
323 /* decode literals and length/distance pairs */
324 do {
325 if (bits(s, 1)) {
326 /* get length */
327 symbol = decode(s, &lencode);
328 len = base[symbol] + bits(s, extra[symbol]);
329 if (len == 519) break; /* end code */
330
331 /* get distance */
332 symbol = len == 2 ? 2 : dict;
333 dist = decode(s, &distcode) << symbol;
334 dist += bits(s, symbol);
335 dist++;
336 if (s->first && dist > s->next)
337 return -3; /* distance too far back */
338
339 /* copy length bytes from distance bytes back */
340 do {
341 to = s->out + s->next;
342 from = to - dist;
343 copy = MAXWIN;
344 if (s->next < dist) {
345 from += copy;
346 copy = dist;
347 }
348 copy -= s->next;
349 if (copy > len) copy = len;
350 len -= copy;
351 s->next += copy;
352 do {
353 *to++ = *from++;
354 } while (--copy);
355 if (s->next == MAXWIN) {
356 if (s->outfun(s->outhow, s->out, s->next)) return 1;
357 s->next = 0;
358 s->first = 0;
359 }
360 } while (len != 0);
361 }
362 else {
363 /* get literal and write it */
364 symbol = lit ? decode(s, &litcode) : bits(s, 8);
365 s->out[s->next++] = symbol;
366 if (s->next == MAXWIN) {
367 if (s->outfun(s->outhow, s->out, s->next)) return 1;
368 s->next = 0;
369 s->first = 0;
370 }
371 }
372 } while (1);
373 return 0;
374}
375
376/* See comments in blast.h */
377int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow)
378{
379 struct state s; /* input/output state */
380 int err; /* return value */
381
382 /* initialize input state */
383 s.infun = infun;
384 s.inhow = inhow;
385 s.left = 0;
386 s.bitbuf = 0;
387 s.bitcnt = 0;
388
389 /* initialize output state */
390 s.outfun = outfun;
391 s.outhow = outhow;
392 s.next = 0;
393 s.first = 1;
394
395 /* return if bits() or decode() tries to read past available input */
396 if (setjmp(s.env) != 0) /* if came back here via longjmp(), */
397 err = 2; /* then skip decomp(), return error */
398 else
399 err = decomp(&s); /* decompress */
400
401 /* write any leftover output and update the error code if needed */
402 if (err != 1 && s.next && s.outfun(s.outhow, s.out, s.next) && err == 0)
403 err = 1;
404 return err;
405}
406
407#ifdef TEST
408/* Example of how to use blast() */
409#include <stdio.h>
410#include <stdlib.h>
411
412#define CHUNK 16384
413
414local unsigned inf(void *how, unsigned char **buf)
415{
416 static unsigned char hold[CHUNK];
417
418 *buf = hold;
419 return fread(hold, 1, CHUNK, (FILE *)how);
420}
421
422local int outf(void *how, unsigned char *buf, unsigned len)
423{
424 return fwrite(buf, 1, len, (FILE *)how) != len;
425}
426
427/* Decompress a PKWare Compression Library stream from stdin to stdout */
428int main(void)
429{
430 int ret, n;
431
432 /* decompress to stdout */
433 ret = blast(inf, stdin, outf, stdout);
434 if (ret != 0) fprintf(stderr, "blast error: %d\n", ret);
435
436 /* see if there are any leftover bytes */
437 n = 0;
438 while (getchar() != EOF) n++;
439 if (n) fprintf(stderr, "blast warning: %d unused bytes of input\n", n);
440
441 /* return blast() error code */
442 return ret;
443}
444#endif
diff --git a/contrib/blast/blast.h b/contrib/blast/blast.h
new file mode 100644
index 0000000..2417837
--- /dev/null
+++ b/contrib/blast/blast.h
@@ -0,0 +1,71 @@
1/* blast.h -- interface for blast.c
2 Copyright (C) 2003 Mark Adler
3 version 1.1, 16 Feb 2003
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the author be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20
21 Mark Adler madler@alumni.caltech.edu
22 */
23
24
25/*
26 * blast() decompresses the PKWare Data Compression Library (DCL) compressed
27 * format. It provides the same functionality as the explode() function in
28 * that library. (Note: PKWare overused the "implode" verb, and the format
29 * used by their library implode() function is completely different and
30 * incompatible with the implode compression method supported by PKZIP.)
31 */
32
33
34typedef unsigned (*blast_in)(void *how, unsigned char **buf);
35typedef int (*blast_out)(void *how, unsigned char *buf, unsigned len);
36/* Definitions for input/output functions passed to blast(). See below for
37 * what the provided functions need to do.
38 */
39
40
41int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow);
42/* Decompress input to output using the provided infun() and outfun() calls.
43 * On success, the return value of blast() is zero. If there is an error in
44 * the source data, i.e. it is not in the proper format, then a negative value
45 * is returned. If there is not enough input available or there is not enough
46 * output space, then a positive error is returned.
47 *
48 * The input function is invoked: len = infun(how, &buf), where buf is set by
49 * infun() to point to the input buffer, and infun() returns the number of
50 * available bytes there. If infun() returns zero, then blast() returns with
51 * an input error. (blast() only asks for input if it needs it.) inhow is for
52 * use by the application to pass an input descriptor to infun(), if desired.
53 *
54 * The output function is invoked: err = outfun(how, buf, len), where the bytes
55 * to be written are buf[0..len-1]. If err is not zero, then blast() returns
56 * with an output error. outfun() is always called with len <= 4096. outhow
57 * is for use by the application to pass an output descriptor to outfun(), if
58 * desired.
59 *
60 * The return codes are:
61 *
62 * 2: ran out of input before completing decompression
63 * 1: output error before completing decompression
64 * 0: successful decompression
65 * -1: literal flag not zero or one
66 * -2: dictionary size not in 4..6
67 * -3: distance is too far back
68 *
69 * At the bottom of blast.c is an example program that uses blast() that can be
70 * compiled to produce a command-line decompression filter by defining TEST.
71 */
diff --git a/contrib/blast/test.pk b/contrib/blast/test.pk
new file mode 100644
index 0000000..be10b2b
--- /dev/null
+++ b/contrib/blast/test.pk
Binary files differ
diff --git a/contrib/blast/test.txt b/contrib/blast/test.txt
new file mode 100644
index 0000000..bfdf1c5
--- /dev/null
+++ b/contrib/blast/test.txt
@@ -0,0 +1 @@
AIAIAIAIAIAIA \ No newline at end of file
diff --git a/contrib/inflate86/inffast.S b/contrib/inflate86/inffast.S
new file mode 100644
index 0000000..d1e80ef
--- /dev/null
+++ b/contrib/inflate86/inffast.S
@@ -0,0 +1,1095 @@
1/*
2 * inffast.S is a hand tuned assembler version of:
3 *
4 * inffast.c -- fast decoding
5 * Copyright (C) 1995-2003 Mark Adler
6 * For conditions of distribution and use, see copyright notice in zlib.h
7 *
8 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9 * Please use the copyright conditions above.
10 *
11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
13 * machine, I found that gzip style archives decompressed about 20% faster than
14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
15 * depend on how large of a buffer is used for z_stream.next_in & next_out
16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17 * stream processing I/O and crc32/addler32. In my case, this routine used
18 * 70% of the cpu time and crc32 used 20%.
19 *
20 * I am confident that this version will work in the general case, but I have
21 * not tested a wide variety of datasets or a wide variety of platforms.
22 *
23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24 * It should be a runtime flag instead of compile time flag...
25 */
26
27.file "inffast.S"
28
29.globl inflate_fast
30
31.text
32.align 4,0
33.L_invalid_literal_length_code_msg:
34.string "invalid literal/length code"
35
36.align 4,0
37.L_invalid_distance_code_msg:
38.string "invalid distance code"
39
40.align 4,0
41.L_invalid_distance_too_far_msg:
42.string "invalid distance too far back"
43
44#if defined( USE_MMX )
45.align 4,0
46.L_mask: /* mask[N] = ( 1 << N ) - 1 */
47.long 0
48.long 1
49.long 3
50.long 7
51.long 15
52.long 31
53.long 63
54.long 127
55.long 255
56.long 511
57.long 1023
58.long 2047
59.long 4095
60.long 8191
61.long 16383
62.long 32767
63.long 65535
64.long 131071
65.long 262143
66.long 524287
67.long 1048575
68.long 2097151
69.long 4194303
70.long 8388607
71.long 16777215
72.long 33554431
73.long 67108863
74.long 134217727
75.long 268435455
76.long 536870911
77.long 1073741823
78.long 2147483647
79.long 4294967295
80#endif
81
82.text
83
84/*
85 * struct z_stream offsets, in zlib.h
86 */
87#define next_in_strm 0 /* strm->next_in */
88#define avail_in_strm 4 /* strm->avail_in */
89#define next_out_strm 12 /* strm->next_out */
90#define avail_out_strm 16 /* strm->avail_out */
91#define msg_strm 24 /* strm->msg */
92#define state_strm 28 /* strm->state */
93
94/*
95 * struct inflate_state offsets, in inflate.h
96 */
97#define mode_state 0 /* state->mode */
98#define wsize_state 32 /* state->wsize */
99#define write_state 36 /* state->write */
100#define window_state 40 /* state->window */
101#define hold_state 44 /* state->hold */
102#define bits_state 48 /* state->bits */
103#define lencode_state 64 /* state->lencode */
104#define distcode_state 68 /* state->distcode */
105#define lenbits_state 72 /* state->lenbits */
106#define distbits_state 76 /* state->distbits */
107
108/*
109 * inflate_fast's activation record
110 */
111#define local_var_size 56 /* how much local space for vars */
112#define strm_sp 80 /* first arg: z_stream * (local_var_size + 24) */
113#define start_sp 84 /* second arg: unsigned int (local_var_size + 28) */
114
115/*
116 * offsets for local vars on stack
117 */
118#define out 52 /* unsigned char* */
119#define window 48 /* unsigned char* */
120#define wsize 44 /* unsigned int */
121#define write 40 /* unsigned int */
122#define in 36 /* unsigned char* */
123#define beg 32 /* unsigned char* */
124#define dist 28 /* unsigned int */
125#define len 24 /* unsigned int */
126#define last 20 /* unsigned char* */
127#define end 16 /* unsigned char* */
128#define dcode 12 /* code* */
129#define lcode 8 /* code* */
130#define dmask 4 /* unsigned int */
131#define lmask 0 /* unsigned int */
132
133/*
134 * typedef enum inflate_mode consts, in inflate.h
135 */
136#ifndef NO_GUNZIP
137#define GUNZIP
138#endif
139
140#ifdef GUNZIP
141#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
142#define INFLATE_MODE_BAD 26
143#else
144#define INFLATE_MODE_TYPE 3
145#define INFLATE_MODE_BAD 17
146#endif
147
148
149.align 16,0x90
150inflate_fast:
151 pushl %edi
152 pushl %esi
153 pushl %ebp
154 pushl %ebx
155 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
156 subl $local_var_size, %esp
157 cld
158#if defined( USE_MMX )
159 emms
160#endif
161
162#define strm_r %esi
163#define state_r %edi
164
165 movl strm_sp(%esp), strm_r
166 movl state_strm(strm_r), state_r
167
168 /* in = strm->next_in;
169 * out = strm->next_out;
170 * last = in + strm->avail_in - 5;
171 * beg = out - (start - strm->avail_out);
172 * end = out + (strm->avail_out - 257);
173 */
174 movl next_in_strm(strm_r), %eax
175 movl next_out_strm(strm_r), %ebx
176 movl avail_in_strm(strm_r), %edx
177 movl avail_out_strm(strm_r), %ecx
178 movl start_sp(%esp), %ebp
179
180 addl %eax, %edx /* avail_in += next_in */
181 subl $5, %edx /* avail_in -= 5 */
182
183 subl %ecx, %ebp /* start -= avail_out */
184 negl %ebp /* start = -start */
185 addl %ebx, %ebp /* start += next_out */
186
187 subl $257, %ecx /* avail_out -= 257 */
188 addl %ebx, %ecx /* avail_out += out */
189
190 movl %eax, in(%esp)
191 movl %ebx, out(%esp)
192 movl %edx, last(%esp)
193 movl %ebp, beg(%esp)
194 movl %ecx, end(%esp)
195
196 /* wsize = state->wsize;
197 * write = state->write;
198 * window = state->window;
199 * hold = state->hold;
200 * bits = state->bits;
201 * lcode = state->lencode;
202 * dcode = state->distcode;
203 * lmask = ( 1 << state->lenbits ) - 1;
204 * dmask = ( 1 << state->distbits ) - 1;
205 */
206
207 movl lencode_state(state_r), %eax
208 movl distcode_state(state_r), %ecx
209
210 movl %eax, lcode(%esp)
211 movl %ecx, dcode(%esp)
212
213 movl $1, %eax
214 movl lenbits_state(state_r), %ecx
215 shll %cl, %eax
216 decl %eax
217 movl %eax, lmask(%esp)
218
219 movl $1, %eax
220 movl distbits_state(state_r), %ecx
221 shll %cl, %eax
222 decl %eax
223 movl %eax, dmask(%esp)
224
225 movl wsize_state(state_r), %eax
226 movl write_state(state_r), %ecx
227 movl window_state(state_r), %edx
228
229 movl %eax, wsize(%esp)
230 movl %ecx, write(%esp)
231 movl %edx, window(%esp)
232
233#if ! defined( USE_MMX )
234
235#define hold_r %ebp
236#define bits_r %bl
237#define bitslong_r %ebx
238
239 movl hold_state(state_r), hold_r
240 movl bits_state(state_r), bitslong_r
241
242#else /* USE_MMX */
243
244#define hold_mm %mm0
245#define bits_r %ebp
246#define bitslong_r %ebp
247
248 movl hold_state(state_r), %ebx
249 movl bits_state(state_r), bitslong_r
250
251#endif
252
253#undef strm_r
254#undef state_r
255#define in_r %esi
256#define from_r %esi
257#define out_r %edi
258
259 movl in(%esp), in_r
260
261#if ! defined ( USE_MMX )
262
263 /* align in_r on word boundary */
264 testl $1, in_r
265 jz .L_is_aligned
266 xorl %eax, %eax
267 movb (in_r), %al
268 incl in_r
269 movb bits_r, %cl
270 addb $8, bits_r
271 shll %cl, %eax
272 orl %eax, hold_r
273
274#else
275 /* align in_r on long boundary */
276.L_align_long:
277 testl $3, in_r
278 jz .L_is_aligned
279 xorl %eax, %eax
280 movb (in_r), %al
281 incl in_r
282 movl bits_r, %ecx
283 addl $8, bits_r
284 shll %cl, %eax
285 orl %eax, %ebx
286 jmp .L_align_long
287
288#endif
289
290.L_is_aligned:
291 movl out(%esp), out_r
292
293#if defined ( USE_MMX )
294
295#define used_mm %mm1
296#define dmask2_mm %mm2
297#define lmask2_mm %mm3
298#define lmask_mm %mm4
299#define dmask_mm %mm5
300#define tmp_mm %mm6
301
302 movl out(%esp), out_r
303 movd lmask(%esp), lmask_mm
304 movq lmask_mm, lmask2_mm
305 movd dmask(%esp), dmask_mm
306 movq dmask_mm, dmask2_mm
307 movd %ebx, hold_mm
308 pxor used_mm, used_mm
309 movl lcode(%esp), %ebx /* ebx = lcode */
310#endif
311
312 jmp .L_do_loop
313
314.align 16,0x90
315
316#if ! defined ( USE_MMX )
317
318.L_do_loop:
319 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
320 *
321 * do {
322 * if (bits < 15) {
323 * hold |= *((unsigned short *)in)++ << bits;
324 * bits += 16
325 * }
326 * this = lcode[hold & lmask]
327 */
328 cmpb $15, bits_r
329 ja .L_get_length_code /* if (15 < bits) */
330
331 xorl %eax, %eax
332 lodsw /* al = *(ushort *)in++ */
333 movb bits_r, %cl /* cl = bits, needs it for shifting */
334 addb $16, bits_r /* bits += 16 */
335 shll %cl, %eax
336 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
337
338.L_get_length_code:
339 movl lmask(%esp), %edx /* edx = lmask */
340 movl lcode(%esp), %ecx /* ecx = lcode */
341 andl hold_r, %edx /* edx &= hold */
342 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
343
344#else /* USE_MMX */
345
346.L_do_loop:
347 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
348
349 cmpl $32, bits_r
350 ja .L_get_length_code /* if (32 < bits) */
351
352 movd bits_r, tmp_mm
353 movd (in_r), %mm7
354 addl $4, in_r
355 psllq tmp_mm, %mm7
356 addl $32, bits_r
357 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
358
359.L_get_length_code:
360 pand hold_mm, lmask_mm
361 movd lmask_mm, %eax
362 movq lmask2_mm, lmask_mm
363 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
364
365#endif
366
367#if ! defined( USE_MMX )
368
369.L_dolen:
370 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
371 *
372 * dolen:
373 * bits -= this.bits;
374 * hold >>= this.bits
375 */
376 movb %ah, %cl /* cl = this.bits */
377 subb %ah, bits_r /* bits -= this.bits */
378 shrl %cl, hold_r /* hold >>= this.bits */
379
380 /* check if op is a literal
381 * if (op == 0) {
382 * PUP(out) = this.val;
383 * }
384 */
385 testb %al, %al
386 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
387
388 shrl $16, %eax /* output this.val char */
389 stosb
390
391#else /* USE_MMX */
392
393#define len_r %edx
394
395.L_dolen:
396 movzbl %ah, %ecx /* ecx = this.bits */
397 movl %eax, len_r /* len = this */
398 shrl $16, len_r /* len = this.val */
399 movd %ecx, used_mm
400 subl %ecx, bits_r /* bits -= this.bits */
401
402 testb %al, %al
403 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
404
405 movb %dl, (out_r)
406 incl out_r
407
408#endif
409
410.L_while_test:
411 /* while (in < last && out < end)
412 */
413 cmpl out_r, end(%esp)
414 jbe .L_break_loop /* if (out >= end) */
415
416 cmpl in_r, last(%esp)
417 ja .L_do_loop /* if (in < last) */
418 jmp .L_break_loop
419
420#if ! defined( USE_MMX )
421
422.L_test_for_length_base:
423 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
424 *
425 * else if (op & 16) {
426 * len = this.val
427 * op &= 15
428 * if (op) {
429 * if (op > bits) {
430 * hold |= *((unsigned short *)in)++ << bits;
431 * bits += 16
432 * }
433 * len += hold & mask[op];
434 * bits -= op;
435 * hold >>= op;
436 * }
437 */
438#define len_r %edx
439 movl %eax, len_r /* len = this */
440 shrl $16, len_r /* len = this.val */
441 movb %al, %cl
442
443 testb $16, %al
444 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
445 andb $15, %cl /* op &= 15 */
446 jz .L_save_len /* if (!op) */
447 cmpb %cl, bits_r
448 jae .L_add_bits_to_len /* if (op <= bits) */
449
450 movb %cl, %ch /* stash op in ch, freeing cl */
451 xorl %eax, %eax
452 lodsw /* al = *(ushort *)in++ */
453 movb bits_r, %cl /* cl = bits, needs it for shifting */
454 addb $16, bits_r /* bits += 16 */
455 shll %cl, %eax
456 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
457 movb %ch, %cl /* move op back to ecx */
458
459.L_add_bits_to_len:
460 movl $1, %eax
461 shll %cl, %eax
462 decl %eax
463 subb %cl, bits_r
464 andl hold_r, %eax /* eax &= hold */
465 shrl %cl, hold_r
466 addl %eax, len_r /* len += hold & mask[op] */
467
468.L_save_len:
469 movl len_r, len(%esp) /* save len */
470#undef len_r
471
472.L_decode_distance:
473 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
474 *
475 * if (bits < 15) {
476 * hold |= *((unsigned short *)in)++ << bits;
477 * bits += 16
478 * }
479 * this = dcode[hold & dmask];
480 * dodist:
481 * bits -= this.bits;
482 * hold >>= this.bits;
483 * op = this.op;
484 */
485
486 cmpb $15, bits_r
487 ja .L_get_distance_code /* if (15 < bits) */
488
489 xorl %eax, %eax
490 lodsw /* al = *(ushort *)in++ */
491 movb bits_r, %cl /* cl = bits, needs it for shifting */
492 addb $16, bits_r /* bits += 16 */
493 shll %cl, %eax
494 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
495
496.L_get_distance_code:
497 movl dmask(%esp), %edx /* edx = dmask */
498 movl dcode(%esp), %ecx /* ecx = dcode */
499 andl hold_r, %edx /* edx &= hold */
500 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
501
502#else /* USE_MMX */
503
504.L_test_for_length_base:
505 testb $16, %al
506 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
507 andl $15, %eax /* op &= 15 */
508 jz .L_decode_distance /* if (!op) */
509
510 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
511 movd %eax, used_mm
512 movd hold_mm, %ecx
513 subl %eax, bits_r
514 andl .L_mask(,%eax,4), %ecx
515 addl %ecx, len_r /* len += hold & mask[op] */
516
517.L_decode_distance:
518
519 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
520
521 cmpl $32, bits_r
522 ja .L_get_dist_code /* if (32 < bits) */
523
524 movd bits_r, tmp_mm
525 movd (in_r), %mm7
526 addl $4, in_r
527 psllq tmp_mm, %mm7
528 addl $32, bits_r
529 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
530
531.L_get_dist_code:
532 movl dcode(%esp), %ebx /* ebx = dcode */
533 pand hold_mm, dmask_mm
534 movd dmask_mm, %eax
535 movq dmask2_mm, dmask_mm
536 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
537
538#endif
539
540#if ! defined( USE_MMX )
541
542#define dist_r %edx
543.L_dodist:
544 movl %eax, dist_r /* dist = this */
545 shrl $16, dist_r /* dist = this.val */
546 movb %ah, %cl
547 subb %ah, bits_r /* bits -= this.bits */
548 shrl %cl, hold_r /* hold >>= this.bits */
549
550 /* if (op & 16) {
551 * dist = this.val
552 * op &= 15
553 * if (op > bits) {
554 * hold |= *((unsigned short *)in)++ << bits;
555 * bits += 16
556 * }
557 * dist += hold & mask[op];
558 * bits -= op;
559 * hold >>= op;
560 */
561 movb %al, %cl /* cl = this.op */
562
563 testb $16, %al /* if ((op & 16) == 0) */
564 jz .L_test_for_second_level_dist
565 andb $15, %cl /* op &= 15 */
566 jz .L_check_dist_one
567 cmpb %cl, bits_r
568 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
569
570 movb %cl, %ch /* stash op in ch, freeing cl */
571 xorl %eax, %eax
572 lodsw /* al = *(ushort *)in++ */
573 movb bits_r, %cl /* cl = bits, needs it for shifting */
574 addb $16, bits_r /* bits += 16 */
575 shll %cl, %eax
576 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
577 movb %ch, %cl /* move op back to ecx */
578
579.L_add_bits_to_dist:
580 movl $1, %eax
581 shll %cl, %eax
582 decl %eax /* (1 << op) - 1 */
583 subb %cl, bits_r
584 andl hold_r, %eax /* eax &= hold */
585 shrl %cl, hold_r
586 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
587 jmp .L_check_window
588
589#else /* USE_MMX */
590
591#define dist_r %ebx
592.L_dodist:
593 movzbl %ah, %ecx /* ecx = this.bits */
594 movl %eax, dist_r
595 shrl $16, dist_r /* dist = this.val */
596 subl %ecx, bits_r /* bits -= this.bits */
597 movd %ecx, used_mm
598
599 testb $16, %al /* if ((op & 16) == 0) */
600 jz .L_test_for_second_level_dist
601 andl $15, %eax /* op &= 15 */
602 jz .L_check_dist_one
603
604.L_add_bits_to_dist:
605 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
606 movd %eax, used_mm /* save bit length of current op */
607 movd hold_mm, %ecx /* get the next bits on input stream */
608 subl %eax, bits_r /* bits -= op bits */
609 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
610 addl %ecx, dist_r /* dist += hold & mask[op] */
611 jmp .L_check_window
612
613#endif
614
615.align 16,0x90
616
617.L_check_dist_one:
618 cmpl $1, dist_r
619 jne .L_check_window
620 cmpl out_r, beg(%esp)
621 je .L_check_window
622
623 decl out_r
624#if ! defined( USE_MMX )
625 movl len(%esp), %ecx
626#else
627 movl len_r, %ecx
628#endif
629 movb (out_r), %al
630 subl $3, %ecx
631
632 movb %al, 1(out_r)
633 movb %al, 2(out_r)
634 movb %al, 3(out_r)
635 addl $4, out_r
636 rep stosb
637
638#if defined( USE_MMX )
639 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
640#endif
641 jmp .L_while_test
642
643.align 16,0x90
644
645.L_check_window:
646 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
647 * %ecx = nbytes
648 *
649 * nbytes = out - beg;
650 * if (dist <= nbytes) {
651 * from = out - dist;
652 * do {
653 * PUP(out) = PUP(from);
654 * } while (--len > 0) {
655 * }
656 */
657
658 movl in_r, in(%esp) /* save in so from can use it's reg */
659 movl out_r, %eax
660 subl beg(%esp), %eax /* nbytes = out - beg */
661
662 cmpl dist_r, %eax
663 jb .L_clip_window /* if (dist > nbytes) 4.2% */
664
665#if ! defined( USE_MMX )
666 movl len(%esp), %ecx
667#else
668 movl len_r, %ecx
669#endif
670 movl out_r, from_r
671 subl dist_r, from_r /* from = out - dist */
672
673 subl $3, %ecx
674 movb (from_r), %al
675 movb %al, (out_r)
676 movb 1(from_r), %al
677 movb 2(from_r), %dl
678 addl $3, from_r
679 movb %al, 1(out_r)
680 movb %dl, 2(out_r)
681 addl $3, out_r
682 rep movsb
683
684 movl in(%esp), in_r /* move in back to %esi, toss from */
685#if defined( USE_MMX )
686 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
687#endif
688 jmp .L_while_test
689
690.align 16,0x90
691
692#if ! defined( USE_MMX )
693
694.L_test_for_second_level_length:
695 /* else if ((op & 64) == 0) {
696 * this = lcode[this.val + (hold & mask[op])];
697 * }
698 */
699 testb $64, %al
700 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
701
702 movl $1, %eax
703 shll %cl, %eax
704 decl %eax
705 andl hold_r, %eax /* eax &= hold */
706 addl %edx, %eax /* eax += this.val */
707 movl lcode(%esp), %edx /* edx = lcode */
708 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
709 jmp .L_dolen
710
711#else /* USE_MMX */
712
713.L_test_for_second_level_length:
714 testb $64, %al
715 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
716
717 andl $15, %eax
718 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
719 movd hold_mm, %ecx
720 andl .L_mask(,%eax,4), %ecx
721 addl len_r, %ecx
722 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
723 jmp .L_dolen
724
725#endif
726
727.align 16,0x90
728
729#if ! defined( USE_MMX )
730
731.L_test_for_second_level_dist:
732 /* else if ((op & 64) == 0) {
733 * this = dcode[this.val + (hold & mask[op])];
734 * }
735 */
736 testb $64, %al
737 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
738
739 movl $1, %eax
740 shll %cl, %eax
741 decl %eax
742 andl hold_r, %eax /* eax &= hold */
743 addl %edx, %eax /* eax += this.val */
744 movl dcode(%esp), %edx /* edx = dcode */
745 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
746 jmp .L_dodist
747
748#else /* USE_MMX */
749
750.L_test_for_second_level_dist:
751 testb $64, %al
752 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
753
754 andl $15, %eax
755 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
756 movd hold_mm, %ecx
757 andl .L_mask(,%eax,4), %ecx
758 movl dcode(%esp), %eax /* ecx = dcode */
759 addl dist_r, %ecx
760 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
761 jmp .L_dodist
762
763#endif
764
765.align 16,0x90
766.L_clip_window:
767 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
768 * %ecx = nbytes
769 *
770 * else {
771 * if (dist > wsize) {
772 * invalid distance
773 * }
774 * from = window;
775 * nbytes = dist - nbytes;
776 * if (write == 0) {
777 * from += wsize - nbytes;
778 */
779#define nbytes_r %ecx
780
781 movl %eax, nbytes_r
782 movl wsize(%esp), %eax /* prepare for dist compare */
783 negl nbytes_r /* nbytes = -nbytes */
784 movl window(%esp), from_r /* from = window */
785
786 cmpl dist_r, %eax
787 jb .L_invalid_distance_too_far /* if (dist > wsize) */
788
789 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
790 cmpl $0, write(%esp)
791 jne .L_wrap_around_window /* if (write != 0) */
792
793 subl nbytes_r, %eax
794 addl %eax, from_r /* from += wsize - nbytes */
795
796 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
797 * %ecx = nbytes, %eax = len
798 *
799 * if (nbytes < len) {
800 * len -= nbytes;
801 * do {
802 * PUP(out) = PUP(from);
803 * } while (--nbytes);
804 * from = out - dist;
805 * }
806 * }
807 */
808
809#if ! defined( USE_MMX )
810#define len_r %eax
811 movl len(%esp), len_r
812#endif
813 cmpl nbytes_r, len_r
814 jbe .L_do_copy1 /* if (nbytes >= len) */
815
816 subl nbytes_r, len_r /* len -= nbytes */
817 rep movsb
818 movl out_r, from_r
819 subl dist_r, from_r /* from = out - dist */
820 jmp .L_do_copy1
821
822 cmpl nbytes_r, len_r
823 jbe .L_do_copy1 /* if (nbytes >= len) */
824
825 subl nbytes_r, len_r /* len -= nbytes */
826 rep movsb
827 movl out_r, from_r
828 subl dist_r, from_r /* from = out - dist */
829 jmp .L_do_copy1
830
831.L_wrap_around_window:
832 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
833 * %ecx = nbytes, %eax = write, %eax = len
834 *
835 * else if (write < nbytes) {
836 * from += wsize + write - nbytes;
837 * nbytes -= write;
838 * if (nbytes < len) {
839 * len -= nbytes;
840 * do {
841 * PUP(out) = PUP(from);
842 * } while (--nbytes);
843 * from = window;
844 * nbytes = write;
845 * if (nbytes < len) {
846 * len -= nbytes;
847 * do {
848 * PUP(out) = PUP(from);
849 * } while(--nbytes);
850 * from = out - dist;
851 * }
852 * }
853 * }
854 */
855#define write_r %eax
856
857 movl write(%esp), write_r
858 cmpl write_r, nbytes_r
859 jbe .L_contiguous_in_window /* if (write >= nbytes) */
860
861 addl wsize(%esp), from_r
862 addl write_r, from_r
863 subl nbytes_r, from_r /* from += wsize + write - nbytes */
864 subl write_r, nbytes_r /* nbytes -= write */
865#undef write_r
866
867#if ! defined( USE_MMX )
868 movl len(%esp), len_r
869#endif
870 cmpl nbytes_r, len_r
871 jbe .L_do_copy1 /* if (nbytes >= len) */
872
873 subl nbytes_r, len_r /* len -= nbytes */
874 rep movsb
875 movl window(%esp), from_r /* from = window */
876 movl write(%esp), nbytes_r /* nbytes = write */
877 cmpl nbytes_r, len_r
878 jbe .L_do_copy1 /* if (nbytes >= len) */
879
880 subl nbytes_r, len_r /* len -= nbytes */
881 rep movsb
882 movl out_r, from_r
883 subl dist_r, from_r /* from = out - dist */
884 jmp .L_do_copy1
885
886.L_contiguous_in_window:
887 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
888 * %ecx = nbytes, %eax = write, %eax = len
889 *
890 * else {
891 * from += write - nbytes;
892 * if (nbytes < len) {
893 * len -= nbytes;
894 * do {
895 * PUP(out) = PUP(from);
896 * } while (--nbytes);
897 * from = out - dist;
898 * }
899 * }
900 */
901#define write_r %eax
902
903 addl write_r, from_r
904 subl nbytes_r, from_r /* from += write - nbytes */
905#undef write_r
906
907#if ! defined( USE_MMX )
908 movl len(%esp), len_r
909#endif
910 cmpl nbytes_r, len_r
911 jbe .L_do_copy1 /* if (nbytes >= len) */
912
913 subl nbytes_r, len_r /* len -= nbytes */
914 rep movsb
915 movl out_r, from_r
916 subl dist_r, from_r /* from = out - dist */
917
918.L_do_copy1:
919 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
920 * %eax = len
921 *
922 * while (len > 0) {
923 * PUP(out) = PUP(from);
924 * len--;
925 * }
926 * }
927 * } while (in < last && out < end);
928 */
929#undef nbytes_r
930#define in_r %esi
931
932 movl len_r, %ecx
933 rep movsb
934
935 movl in(%esp), in_r /* move in back to %esi, toss from */
936#if defined( USE_MMX )
937 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
938#endif
939 jmp .L_while_test
940
941#undef len_r
942#undef from_r
943#undef dist_r
944
945.L_invalid_distance_code:
946 /* else {
947 * strm->msg = "invalid distance code";
948 * state->mode = BAD;
949 * }
950 */
951 movl $.L_invalid_distance_code_msg, %ecx
952 movl $INFLATE_MODE_BAD, %edx
953 jmp .L_update_stream_state
954
955.L_test_for_end_of_block:
956 /* else if (op & 32) {
957 * state->mode = TYPE;
958 * break;
959 * }
960 */
961 testb $32, %al
962 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
963
964 movl $0, %ecx
965 movl $INFLATE_MODE_TYPE, %edx
966 jmp .L_update_stream_state
967
968.L_invalid_literal_length_code:
969 /* else {
970 * strm->msg = "invalid literal/length code";
971 * state->mode = BAD;
972 * }
973 */
974 movl $.L_invalid_literal_length_code_msg, %ecx
975 movl $INFLATE_MODE_BAD, %edx
976 jmp .L_update_stream_state
977
978.L_invalid_distance_too_far:
979 /* strm->msg = "invalid distance too far back";
980 * state->mode = BAD;
981 */
982 movl in(%esp), in_r /* from_r has in's reg, put in back */
983 movl $.L_invalid_distance_too_far_msg, %ecx
984 movl $INFLATE_MODE_BAD, %edx
985 jmp .L_update_stream_state
986
987.L_update_stream_state:
988 /* set strm->msg = %ecx, strm->state->mode = %edx */
989 movl strm_sp(%esp), %eax
990 testl %ecx, %ecx /* if (msg != NULL) */
991 jz .L_skip_msg
992 movl %ecx, msg_strm(%eax) /* strm->msg = msg */
993.L_skip_msg:
994 movl state_strm(%eax), %eax /* state = strm->state */
995 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
996
997.L_break_loop:
998
999#define strm_r %eax
1000#define state_r %edx
1001
1002 /* len = bits >> 3;
1003 * in -= len;
1004 * bits -= len << 3;
1005 * hold &= (1U << bits) - 1;
1006 * state->hold = hold;
1007 * state->bits = bits;
1008 * strm->next_in = in;
1009 * strm->next_out = out;
1010 */
1011 movl strm_sp(%esp), strm_r
1012 movl bitslong_r, %ecx
1013 movl state_strm(strm_r), state_r
1014 shrl $3, %ecx
1015 subl %ecx, in_r
1016 shll $3, %ecx
1017 subl %ecx, bitslong_r
1018 movl out_r, next_out_strm(strm_r)
1019 movl in_r, next_in_strm(strm_r)
1020 movl bitslong_r, bits_state(state_r)
1021
1022 movl bitslong_r, %ecx
1023 movl $1, %ebx /* overwrites bitslong_r, %bl */
1024 shll %cl, %ebx
1025 decl %ebx
1026
1027#undef bits_r
1028#undef bitslong_r
1029
1030#if ! defined( USE_MMX )
1031
1032 andl %ebx, hold_r
1033 movl hold_r, hold_state(state_r)
1034
1035#else /* USE_MMX */
1036
1037 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1038 movd hold_mm, %ecx
1039 andl %ebx, %ecx
1040 movl %ecx, hold_state(state_r)
1041
1042#endif
1043
1044#define last_r %ebx
1045
1046 /* strm->avail_in = in < last ? 5 + (last - in) : 5 - (in - last) */
1047 movl last(%esp), last_r
1048 cmpl in_r, last_r
1049 jbe .L_last_is_smaller /* if (in >= last) */
1050
1051 subl in_r, last_r /* last -= in */
1052 addl $5, last_r /* last += 5 */
1053 movl last_r, avail_in_strm(strm_r)
1054 jmp .L_fixup_out
1055.L_last_is_smaller:
1056 subl last_r, in_r /* in -= last */
1057 negl in_r /* in = -in */
1058 addl $5, in_r /* in += 5 */
1059 movl in_r, avail_in_strm(strm_r)
1060
1061#undef last_r
1062#define end_r %ebx
1063
1064.L_fixup_out:
1065 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1066 movl end(%esp), end_r
1067 cmpl out_r, end_r
1068 jbe .L_end_is_smaller /* if (out >= end) */
1069
1070 subl out_r, end_r /* end -= out */
1071 addl $257, end_r /* end += 257 */
1072 movl end_r, avail_out_strm(strm_r)
1073 jmp .L_done
1074.L_end_is_smaller:
1075 subl end_r, out_r /* out -= end */
1076 negl out_r /* out = -out */
1077 addl $257, out_r /* out += 257 */
1078 movl out_r, avail_out_strm(strm_r)
1079
1080#undef end_r
1081
1082.L_done:
1083#if defined( USE_MMX )
1084 emms
1085#endif
1086 addl $local_var_size, %esp
1087 popf
1088 popl %ebx
1089 popl %ebp
1090 popl %esi
1091 popl %edi
1092 ret
1093
1094.type inflate_fast,@function
1095.size inflate_fast,.-inflate_fast
diff --git a/contrib/puff/Makefile b/contrib/puff/Makefile
new file mode 100644
index 0000000..b6b6940
--- /dev/null
+++ b/contrib/puff/Makefile
@@ -0,0 +1,8 @@
1puff: puff.c puff.h
2 cc -DTEST -o puff puff.c
3
4test: puff
5 puff zeros.raw
6
7clean:
8 rm -f puff puff.o
diff --git a/contrib/puff/README b/contrib/puff/README
new file mode 100644
index 0000000..59b3533
--- /dev/null
+++ b/contrib/puff/README
@@ -0,0 +1,63 @@
1Puff -- A Simple Inflate
23 Mar 2003
3Mark Adler
4madler@alumni.caltech.edu
5
6What this is --
7
8puff.c provides the routine puff() to decompress the deflate data format. It
9does so more slowly than zlib, but the code is about one-fifth the size of the
10inflate code in zlib, and written to be very easy to read.
11
12Why I wrote this --
13
14puff.c was written to document the deflate format unambiguously, by virtue of
15being working C code. It is meant to supplement RFC 1951, which formally
16describes the deflate format. I have received many questions on details of the
17deflate format, and I hope that reading this code will answer those questions.
18puff.c is heavily commented with details of the deflate format, especially
19those little nooks and cranies of the format that might not be obvious from a
20specification.
21
22puff.c may also be useful in applications where code size or memory usage is a
23very limited resource, and speed is not as important.
24
25How to use it --
26
27Well, most likely you should just be reading puff.c and using zlib for actual
28applications, but if you must ...
29
30Include puff.h in your code, which provides this prototype:
31
32int puff(unsigned char *dest, /* pointer to destination pointer */
33 unsigned long *destlen, /* amount of output space */
34 unsigned char *source, /* pointer to source data pointer */
35 unsigned long *sourcelen); /* amount of input available */
36
37Then you can call puff() to decompress a deflate stream that is in memory in
38its entirety at source, to a sufficiently sized block of memory for the
39decompressed data at dest. puff() is the only external symbol in puff.c The
40only C library functions that puff.c needs are setjmp() and longjmp(), which
41are used to simplify error checking in the code to improve readabilty. puff.c
42does no memory allocation, and uses less than 2K bytes off of the stack.
43
44If destlen is not enough space for the uncompressed data, then inflate will
45return an error without writing more than destlen bytes. Note that this means
46that in order to decompress the deflate data successfully, you need to know
47the size of the uncompressed data ahead of time.
48
49If needed, puff() can determine the size of the uncompressed data with no
50output space. This is done by passing dest equal to (unsigned char *)0. Then
51the initial value of *destlen is ignored and *destlen is set to the length of
52the uncompressed data. So if the size of the uncompressed data is not known,
53then two passes of puff() can be used--first to determine the size, and second
54to do the actual inflation after allocating the appropriate memory. Not
55pretty, but it works. (This is one of the reasons you should be using zlib.)
56
57The deflate format is self-terminating. If the deflate stream does not end
58in *sourcelen bytes, puff() will return an error without reading at or past
59endsource.
60
61On return, *sourcelen is updated to the amount of input data consumed, and
62*destlen is updated to the size of the uncompressed data. See the comments
63in puff.c for the possible return codes for puff().
diff --git a/contrib/puff/puff.c b/contrib/puff/puff.c
new file mode 100644
index 0000000..b6039dd
--- /dev/null
+++ b/contrib/puff/puff.c
@@ -0,0 +1,833 @@
1/*
2 * puff.c
3 * Copyright (C) 2002, 2003 Mark Adler
4 * For conditions of distribution and use, see copyright notice in puff.h
5 * version 1.7, 3 Mar 2003
6 *
7 * puff.c is a simple inflate written to be an unambiguous way to specify the
8 * deflate format. It is not written for speed but rather simplicity. As a
9 * side benefit, this code might actually be useful when small code is more
10 * important than speed, such as bootstrap applications. For typical deflate
11 * data, zlib's inflate() is about four times as fast as puff(). zlib's
12 * inflate compiles to around 20K on my machine, whereas puff.c compiles to
13 * around 4K on my machine (a PowerPC using GNU cc). If the faster decode()
14 * function here is used, then puff() is only twice as slow as zlib's
15 * inflate().
16 *
17 * All dynamically allocated memory comes from the stack. The stack required
18 * is less than 2K bytes. This code is compatible with 16-bit int's and
19 * assumes that long's are at least 32 bits. puff.c uses the short data type,
20 * assumed to be 16 bits, for arrays in order to to conserve memory. The code
21 * works whether integers are stored big endian or little endian.
22 *
23 * In the comments below are "Format notes" that describe the inflate process
24 * and document some of the less obvious aspects of the format. This source
25 * code is meant to supplement RFC 1951, which formally describes the deflate
26 * format:
27 *
28 * http://www.zlib.org/rfc-deflate.html
29 */
30
31/*
32 * Change history:
33 *
34 * 1.0 10 Feb 2002 - First version
35 * 1.1 17 Feb 2002 - Clarifications of some comments and notes
36 * - Update puff() dest and source pointers on negative
37 * errors to facilitate debugging deflators
38 * - Remove longest from struct huffman -- not needed
39 * - Simplify offs[] index in construct()
40 * - Add input size and checking, using longjmp() to
41 * maintain easy readability
42 * - Use short data type for large arrays
43 * - Use pointers instead of long to specify source and
44 * destination sizes to avoid arbitrary 4 GB limits
45 * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!),
46 * but leave simple version for readabilty
47 * - Make sure invalid distances detected if pointers
48 * are 16 bits
49 * - Fix fixed codes table error
50 * - Provide a scanning mode for determining size of
51 * uncompressed data
52 * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Jean-loup]
53 * - Add a puff.h file for the interface
54 * - Add braces in puff() for else do [Jean-loup]
55 * - Use indexes instead of pointers for readability
56 * 1.4 31 Mar 2002 - Simplify construct() code set check
57 * - Fix some comments
58 * - Add FIXLCODES #define
59 * 1.5 6 Apr 2002 - Minor comment fixes
60 * 1.6 7 Aug 2002 - Minor format changes
61 * 1.7 3 Mar 2002 - Added test code for distribution
62 * - Added zlib-like license
63 */
64
65#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */
66#include "puff.h" /* prototype for puff() */
67
68#define local static /* for local function definitions */
69#define NIL ((unsigned char *)0) /* for no output option */
70
71/*
72 * Maximums for allocations and loops. It is not useful to change these --
73 * they are fixed by the deflate format.
74 */
75#define MAXBITS 15 /* maximum bits in a code */
76#define MAXLCODES 286 /* maximum number of literal/length codes */
77#define MAXDCODES 30 /* maximum number of distance codes */
78#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */
79#define FIXLCODES 288 /* number of fixed literal/length codes */
80
81/* input and output state */
82struct state {
83 /* output state */
84 unsigned char *out; /* output buffer */
85 unsigned long outlen; /* available space at out */
86 unsigned long outcnt; /* bytes written to out so far */
87
88 /* input state */
89 unsigned char *in; /* input buffer */
90 unsigned long inlen; /* available input at in */
91 unsigned long incnt; /* bytes read so far */
92 int bitbuf; /* bit buffer */
93 int bitcnt; /* number of bits in bit buffer */
94
95 /* input limit error return state for bits() and decode() */
96 jmp_buf env;
97};
98
99/*
100 * Return need bits from the input stream. This always leaves less than
101 * eight bits in the buffer. bits() works properly for need == 0.
102 *
103 * Format notes:
104 *
105 * - Bits are stored in bytes from the least significant bit to the most
106 * significant bit. Therefore bits are dropped from the bottom of the bit
107 * buffer, using shift right, and new bytes are appended to the top of the
108 * bit buffer, using shift left.
109 */
110local int bits(struct state *s, int need)
111{
112 long val; /* bit accumulator (can use up to 20 bits) */
113
114 /* load at least need bits into val */
115 val = s->bitbuf;
116 while (s->bitcnt < need) {
117 if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */
118 val |= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */
119 s->bitcnt += 8;
120 }
121
122 /* drop need bits and update buffer, always zero to seven bits left */
123 s->bitbuf = (int)(val >> need);
124 s->bitcnt -= need;
125
126 /* return need bits, zeroing the bits above that */
127 return (int)(val & ((1L << need) - 1));
128}
129
130/*
131 * Process a stored block.
132 *
133 * Format notes:
134 *
135 * - After the two-bit stored block type (00), the stored block length and
136 * stored bytes are byte-aligned for fast copying. Therefore any leftover
137 * bits in the byte that has the last bit of the type, as many as seven, are
138 * discarded. The value of the discarded bits are not defined and should not
139 * be checked against any expectation.
140 *
141 * - The second inverted copy of the stored block length does not have to be
142 * checked, but it's probably a good idea to do so anyway.
143 *
144 * - A stored block can have zero length. This is sometimes used to byte-align
145 * subsets of the compressed data for random access or partial recovery.
146 */
147local int stored(struct state *s)
148{
149 unsigned len; /* length of stored block */
150
151 /* discard leftover bits from current byte (assumes s->bitcnt < 8) */
152 s->bitbuf = 0;
153 s->bitcnt = 0;
154
155 /* get length and check against its one's complement */
156 if (s->incnt + 4 > s->inlen) return 2; /* not enough input */
157 len = s->in[s->incnt++];
158 len |= s->in[s->incnt++] << 8;
159 if (s->in[s->incnt++] != (~len & 0xff) ||
160 s->in[s->incnt++] != ((~len >> 8) & 0xff))
161 return -2; /* didn't match complement! */
162
163 /* copy len bytes from in to out */
164 if (s->incnt + len > s->inlen) return 2; /* not enough input */
165 if (s->out != NIL) {
166 if (s->outcnt + len > s->outlen)
167 return 1; /* not enough output space */
168 while (len--)
169 s->out[s->outcnt++] = s->in[s->incnt++];
170 }
171 else { /* just scanning */
172 s->outcnt += len;
173 s->incnt += len;
174 }
175
176 /* done with a valid stored block */
177 return 0;
178}
179
180/*
181 * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of
182 * each length, which for a canonical code are stepped through in order.
183 * symbol[] are the symbol values in canonical order, where the number of
184 * entries is the sum of the counts in count[]. The decoding process can be
185 * seen in the function decode() below.
186 */
187struct huffman {
188 short *count; /* number of symbols of each length */
189 short *symbol; /* canonically ordered symbols */
190};
191
192/*
193 * Decode a code from the stream s using huffman table h. Return the symbol or
194 * a negative value if there is an error. If all of the lengths are zero, i.e.
195 * an empty code, or if the code is incomplete and an invalid code is received,
196 * then -9 is returned after reading MAXBITS bits.
197 *
198 * Format notes:
199 *
200 * - The codes as stored in the compressed data are bit-reversed relative to
201 * a simple integer ordering of codes of the same lengths. Hence below the
202 * bits are pulled from the compressed data one at a time and used to
203 * build the code value reversed from what is in the stream in order to
204 * permit simple integer comparisons for decoding. A table-based decoding
205 * scheme (as used in zlib) does not need to do this reversal.
206 *
207 * - The first code for the shortest length is all zeros. Subsequent codes of
208 * the same length are simply integer increments of the previous code. When
209 * moving up a length, a zero bit is appended to the code. For a complete
210 * code, the last code of the longest length will be all ones.
211 *
212 * - Incomplete codes are handled by this decoder, since they are permitted
213 * in the deflate format. See the format notes for fixed() and dynamic().
214 */
215#ifdef SLOW
216local int decode(struct state *s, struct huffman *h)
217{
218 int len; /* current number of bits in code */
219 int code; /* len bits being decoded */
220 int first; /* first code of length len */
221 int count; /* number of codes of length len */
222 int index; /* index of first code of length len in symbol table */
223
224 code = first = index = 0;
225 for (len = 1; len <= MAXBITS; len++) {
226 code |= bits(s, 1); /* get next bit */
227 count = h->count[len];
228 if (code < first + count) /* if length len, return symbol */
229 return h->symbol[index + (code - first)];
230 index += count; /* else update for next length */
231 first += count;
232 first <<= 1;
233 code <<= 1;
234 }
235 return -9; /* ran out of codes */
236}
237
238/*
239 * A faster version of decode() for real applications of this code. It's not
240 * as readable, but it makes puff() twice as fast. And it only makes the code
241 * a few percent larger.
242 */
243#else /* !SLOW */
244local int decode(struct state *s, struct huffman *h)
245{
246 int len; /* current number of bits in code */
247 int code; /* len bits being decoded */
248 int first; /* first code of length len */
249 int count; /* number of codes of length len */
250 int index; /* index of first code of length len in symbol table */
251 int bitbuf; /* bits from stream */
252 int left; /* bits left in next or left to process */
253 short *next; /* next number of codes */
254
255 bitbuf = s->bitbuf;
256 left = s->bitcnt;
257 code = first = index = 0;
258 len = 1;
259 next = h->count + 1;
260 while (1) {
261 while (left--) {
262 code |= bitbuf & 1;
263 bitbuf >>= 1;
264 count = *next++;
265 if (code < first + count) { /* if length len, return symbol */
266 s->bitbuf = bitbuf;
267 s->bitcnt = (s->bitcnt - len) & 7;
268 return h->symbol[index + (code - first)];
269 }
270 index += count; /* else update for next length */
271 first += count;
272 first <<= 1;
273 code <<= 1;
274 len++;
275 }
276 left = (MAXBITS+1) - len;
277 if (left == 0) break;
278 if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */
279 bitbuf = s->in[s->incnt++];
280 if (left > 8) left = 8;
281 }
282 return -9; /* ran out of codes */
283}
284#endif /* SLOW */
285
286/*
287 * Given the list of code lengths length[0..n-1] representing a canonical
288 * Huffman code for n symbols, construct the tables required to decode those
289 * codes. Those tables are the number of codes of each length, and the symbols
290 * sorted by length, retaining their original order within each length. The
291 * return value is zero for a complete code set, negative for an over-
292 * subscribed code set, and positive for an incomplete code set. The tables
293 * can be used if the return value is zero or positive, but they cannot be used
294 * if the return value is negative. If the return value is zero, it is not
295 * possible for decode() using that table to return an error--any stream of
296 * enough bits will resolve to a symbol. If the return value is positive, then
297 * it is possible for decode() using that table to return an error for received
298 * codes past the end of the incomplete lengths.
299 *
300 * Not used by decode(), but used for error checking, h->count[0] is the number
301 * of the n symbols not in the code. So n - h->count[0] is the number of
302 * codes. This is useful for checking for incomplete codes that have more than
303 * one symbol, which is an error in a dynamic block.
304 *
305 * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
306 * This is assured by the construction of the length arrays in dynamic() and
307 * fixed() and is not verified by construct().
308 *
309 * Format notes:
310 *
311 * - Permitted and expected examples of incomplete codes are one of the fixed
312 * codes and any code with a single symbol which in deflate is coded as one
313 * bit instead of zero bits. See the format notes for fixed() and dynamic().
314 *
315 * - Within a given code length, the symbols are kept in ascending order for
316 * the code bits definition.
317 */
318local int construct(struct huffman *h, short *length, int n)
319{
320 int symbol; /* current symbol when stepping through length[] */
321 int len; /* current length when stepping through h->count[] */
322 int left; /* number of possible codes left of current length */
323 short offs[MAXBITS+1]; /* offsets in symbol table for each length */
324
325 /* count number of codes of each length */
326 for (len = 0; len <= MAXBITS; len++)
327 h->count[len] = 0;
328 for (symbol = 0; symbol < n; symbol++)
329 (h->count[length[symbol]])++; /* assumes lengths are within bounds */
330 if (h->count[0] == n) /* no codes! */
331 return 0; /* complete, but decode() will fail */
332
333 /* check for an over-subscribed or incomplete set of lengths */
334 left = 1; /* one possible code of zero length */
335 for (len = 1; len <= MAXBITS; len++) {
336 left <<= 1; /* one more bit, double codes left */
337 left -= h->count[len]; /* deduct count from possible codes */
338 if (left < 0) return left; /* over-subscribed--return negative */
339 } /* left > 0 means incomplete */
340
341 /* generate offsets into symbol table for each length for sorting */
342 offs[1] = 0;
343 for (len = 1; len < MAXBITS; len++)
344 offs[len + 1] = offs[len] + h->count[len];
345
346 /*
347 * put symbols in table sorted by length, by symbol order within each
348 * length
349 */
350 for (symbol = 0; symbol < n; symbol++)
351 if (length[symbol] != 0)
352 h->symbol[offs[length[symbol]]++] = symbol;
353
354 /* return zero for complete set, positive for incomplete set */
355 return left;
356}
357
358/*
359 * Decode literal/length and distance codes until an end-of-block code.
360 *
361 * Format notes:
362 *
363 * - Compressed data that is after the block type if fixed or after the code
364 * description if dynamic is a combination of literals and length/distance
365 * pairs terminated by and end-of-block code. Literals are simply Huffman
366 * coded bytes. A length/distance pair is a coded length followed by a
367 * coded distance to represent a string that occurs earlier in the
368 * uncompressed data that occurs again at the current location.
369 *
370 * - Literals, lengths, and the end-of-block code are combined into a single
371 * code of up to 286 symbols. They are 256 literals (0..255), 29 length
372 * symbols (257..285), and the end-of-block symbol (256).
373 *
374 * - There are 256 possible lengths (3..258), and so 29 symbols are not enough
375 * to represent all of those. Lengths 3..10 and 258 are in fact represented
376 * by just a length symbol. Lengths 11..257 are represented as a symbol and
377 * some number of extra bits that are added as an integer to the base length
378 * of the length symbol. The number of extra bits is determined by the base
379 * length symbol. These are in the static arrays below, lens[] for the base
380 * lengths and lext[] for the corresponding number of extra bits.
381 *
382 * - The reason that 258 gets its own symbol is that the longest length is used
383 * often in highly redundant files. Note that 258 can also be coded as the
384 * base value 227 plus the maximum extra value of 31. While a good deflate
385 * should never do this, it is not an error, and should be decoded properly.
386 *
387 * - If a length is decoded, including its extra bits if any, then it is
388 * followed a distance code. There are up to 30 distance symbols. Again
389 * there are many more possible distances (1..32768), so extra bits are added
390 * to a base value represented by the symbol. The distances 1..4 get their
391 * own symbol, but the rest require extra bits. The base distances and
392 * corresponding number of extra bits are below in the static arrays dist[]
393 * and dext[].
394 *
395 * - Literal bytes are simply written to the output. A length/distance pair is
396 * an instruction to copy previously uncompressed bytes to the output. The
397 * copy is from distance bytes back in the output stream, copying for length
398 * bytes.
399 *
400 * - Distances pointing before the beginning of the output data are not
401 * permitted.
402 *
403 * - Overlapped copies, where the length is greater than the distance, are
404 * allowed and common. For example, a distance of one and a length of 258
405 * simply copies the last byte 258 times. A distance of four and a length of
406 * twelve copies the last four bytes three times. A simple forward copy
407 * ignoring whether the length is greater than the distance or not implements
408 * this correctly. You should not use memcpy() since its behavior is not
409 * defined for overlapped arrays. You should not use memmove() or bcopy()
410 * since though their behavior -is- defined for overlapping arrays, it is
411 * defined to do the wrong thing in this case.
412 */
413local int codes(struct state *s,
414 struct huffman *lencode,
415 struct huffman *distcode)
416{
417 int symbol; /* decoded symbol */
418 int len; /* length for copy */
419 unsigned dist; /* distance for copy */
420 static const short lens[29] = { /* Size base for length codes 257..285 */
421 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
422 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
423 static const short lext[29] = { /* Extra bits for length codes 257..285 */
424 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
425 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
426 static const short dists[30] = { /* Offset base for distance codes 0..29 */
427 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
428 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
429 8193, 12289, 16385, 24577};
430 static const short dext[30] = { /* Extra bits for distance codes 0..29 */
431 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
432 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
433 12, 12, 13, 13};
434
435 /* decode literals and length/distance pairs */
436 do {
437 symbol = decode(s, lencode);
438 if (symbol < 0) return symbol; /* invalid symbol */
439 if (symbol < 256) { /* literal: symbol is the byte */
440 /* write out the literal */
441 if (s->out != NIL) {
442 if (s->outcnt == s->outlen) return 1;
443 s->out[s->outcnt] = symbol;
444 }
445 s->outcnt++;
446 }
447 else if (symbol > 256) { /* length */
448 /* get and compute length */
449 symbol -= 257;
450 if (symbol >= 29) return -9; /* invalid fixed code */
451 len = lens[symbol] + bits(s, lext[symbol]);
452
453 /* get and check distance */
454 symbol = decode(s, distcode);
455 if (symbol < 0) return symbol; /* invalid symbol */
456 dist = dists[symbol] + bits(s, dext[symbol]);
457 if (dist > s->outcnt)
458 return -10; /* distance too far back */
459
460 /* copy length bytes from distance bytes back */
461 if (s->out != NIL) {
462 if (s->outcnt + len > s->outlen) return 1;
463 while (len--) {
464 s->out[s->outcnt] = s->out[s->outcnt - dist];
465 s->outcnt++;
466 }
467 }
468 else
469 s->outcnt += len;
470 }
471 } while (symbol != 256); /* end of block symbol */
472
473 /* done with a valid fixed or dynamic block */
474 return 0;
475}
476
477/*
478 * Process a fixed codes block.
479 *
480 * Format notes:
481 *
482 * - This block type can be useful for compressing small amounts of data for
483 * which the size of the code descriptions in a dynamic block exceeds the
484 * benefit of custom codes for that block. For fixed codes, no bits are
485 * spent on code descriptions. Instead the code lengths for literal/length
486 * codes and distance codes are fixed. The specific lengths for each symbol
487 * can be seen in the "for" loops below.
488 *
489 * - The literal/length code is complete, but has two symbols that are invalid
490 * and should result in an error if received. This cannot be implemented
491 * simply as an incomplete code since those two symbols are in the "middle"
492 * of the code. They are eight bits long and the longest literal/length\
493 * code is nine bits. Therefore the code must be constructed with those
494 * symbols, and the invalid symbols must be detected after decoding.
495 *
496 * - The fixed distance codes also have two invalid symbols that should result
497 * in an error if received. Since all of the distance codes are the same
498 * length, this can be implemented as an incomplete code. Then the invalid
499 * codes are detected while decoding.
500 */
501local int fixed(struct state *s)
502{
503 static int virgin = 1;
504 static short lencnt[MAXBITS+1], lensym[FIXLCODES];
505 static short distcnt[MAXBITS+1], distsym[MAXDCODES];
506 static struct huffman lencode = {lencnt, lensym};
507 static struct huffman distcode = {distcnt, distsym};
508
509 /* build fixed huffman tables if first call (may not be thread safe) */
510 if (virgin) {
511 int symbol;
512 short lengths[FIXLCODES];
513
514 /* literal/length table */
515 for (symbol = 0; symbol < 144; symbol++)
516 lengths[symbol] = 8;
517 for (; symbol < 256; symbol++)
518 lengths[symbol] = 9;
519 for (; symbol < 280; symbol++)
520 lengths[symbol] = 7;
521 for (; symbol < FIXLCODES; symbol++)
522 lengths[symbol] = 8;
523 construct(&lencode, lengths, FIXLCODES);
524
525 /* distance table */
526 for (symbol = 0; symbol < MAXDCODES; symbol++)
527 lengths[symbol] = 5;
528 construct(&distcode, lengths, MAXDCODES);
529
530 /* do this just once */
531 virgin = 0;
532 }
533
534 /* decode data until end-of-block code */
535 return codes(s, &lencode, &distcode);
536}
537
538/*
539 * Process a dynamic codes block.
540 *
541 * Format notes:
542 *
543 * - A dynamic block starts with a description of the literal/length and
544 * distance codes for that block. New dynamic blocks allow the compressor to
545 * rapidly adapt to changing data with new codes optimized for that data.
546 *
547 * - The codes used by the deflate format are "canonical", which means that
548 * the actual bits of the codes are generated in an unambiguous way simply
549 * from the number of bits in each code. Therefore the code descriptions
550 * are simply a list of code lengths for each symbol.
551 *
552 * - The code lengths are stored in order for the symbols, so lengths are
553 * provided for each of the literal/length symbols, and for each of the
554 * distance symbols.
555 *
556 * - If a symbol is not used in the block, this is represented by a zero as
557 * as the code length. This does not mean a zero-length code, but rather
558 * that no code should be created for this symbol. There is no way in the
559 * deflate format to represent a zero-length code.
560 *
561 * - The maximum number of bits in a code is 15, so the possible lengths for
562 * any code are 1..15.
563 *
564 * - The fact that a length of zero is not permitted for a code has an
565 * interesting consequence. Normally if only one symbol is used for a given
566 * code, then in fact that code could be represented with zero bits. However
567 * in deflate, that code has to be at least one bit. So for example, if
568 * only a single distance base symbol appears in a block, then it will be
569 * represented by a single code of length one, in particular one 0 bit. This
570 * is an incomplete code, since if a 1 bit is received, it has no meaning,
571 * and should result in an error. So incomplete distance codes of one symbol
572 * should be permitted, and the receipt of invalid codes should be handled.
573 *
574 * - It is also possible to have a single literal/length code, but that code
575 * must be the end-of-block code, since every dynamic block has one. This
576 * is not the most efficient way to create an empty block (an empty fixed
577 * block is fewer bits), but it is allowed by the format. So incomplete
578 * literal/length codes of one symbol should also be permitted.
579 *
580 * - The list of up to 286 length/literal lengths and up to 30 distance lengths
581 * are themselves compressed using Huffman codes and run-length encoding. In
582 * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means
583 * that length, and the symbols 16, 17, and 18 are run-length instructions.
584 * Each of 16, 17, and 18 are follwed by extra bits to define the length of
585 * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10
586 * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols
587 * are common, hence the special coding for zero lengths.
588 *
589 * - The symbols for 0..18 are Huffman coded, and so that code must be
590 * described first. This is simply a sequence of up to 19 three-bit values
591 * representing no code (0) or the code length for that symbol (1..7).
592 *
593 * - A dynamic block starts with three fixed-size counts from which is computed
594 * the number of literal/length code lengths, the number of distance code
595 * lengths, and the number of code length code lengths (ok, you come up with
596 * a better name!) in the code descriptions. For the literal/length and
597 * distance codes, lengths after those provided are considered zero, i.e. no
598 * code. The code length code lengths are received in a permuted order (see
599 * the order[] array below) to make a short code length code length list more
600 * likely. As it turns out, very short and very long codes are less likely
601 * to be seen in a dynamic code description, hence what may appear initially
602 * to be a peculiar ordering.
603 *
604 * - Given the number of literal/length code lengths (nlen) and distance code
605 * lengths (ndist), then they are treated as one long list of nlen + ndist
606 * code lengths. Therefore run-length coding can and often does cross the
607 * boundary between the two sets of lengths.
608 *
609 * - So to summarize, the code description at the start of a dynamic block is
610 * three counts for the number of code lengths for the literal/length codes,
611 * the distance codes, and the code length codes. This is followed by the
612 * code length code lengths, three bits each. This is used to construct the
613 * code length code which is used to read the remainder of the lengths. Then
614 * the literal/length code lengths and distance lengths are read as a single
615 * set of lengths using the code length codes. Codes are constructed from
616 * the resulting two sets of lengths, and then finally you can start
617 * decoding actual compressed data in the block.
618 *
619 * - For reference, a "typical" size for the code description in a dynamic
620 * block is around 80 bytes.
621 */
622local int dynamic(struct state *s)
623{
624 int nlen, ndist, ncode; /* number of lengths in descriptor */
625 int index; /* index of lengths[] */
626 int err; /* construct() return value */
627 short lengths[MAXCODES]; /* descriptor code lengths */
628 short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */
629 short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */
630 struct huffman lencode = {lencnt, lensym}; /* length code */
631 struct huffman distcode = {distcnt, distsym}; /* distance code */
632 static const short order[19] = /* permutation of code length codes */
633 {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
634
635 /* get number of lengths in each table, check lengths */
636 nlen = bits(s, 5) + 257;
637 ndist = bits(s, 5) + 1;
638 ncode = bits(s, 4) + 4;
639 if (nlen > MAXLCODES || ndist > MAXDCODES)
640 return -3; /* bad counts */
641
642 /* read code length code lengths (really), missing lengths are zero */
643 for (index = 0; index < ncode; index++)
644 lengths[order[index]] = bits(s, 3);
645 for (; index < 19; index++)
646 lengths[order[index]] = 0;
647
648 /* build huffman table for code lengths codes (use lencode temporarily) */
649 err = construct(&lencode, lengths, 19);
650 if (err != 0) return -4; /* require complete code set here */
651
652 /* read length/literal and distance code length tables */
653 index = 0;
654 while (index < nlen + ndist) {
655 int symbol; /* decoded value */
656 int len; /* last length to repeat */
657
658 symbol = decode(s, &lencode);
659 if (symbol < 16) /* length in 0..15 */
660 lengths[index++] = symbol;
661 else { /* repeat instruction */
662 len = 0; /* assume repeating zeros */
663 if (symbol == 16) { /* repeat last length 3..6 times */
664 if (index == 0) return -5; /* no last length! */
665 len = lengths[index - 1]; /* last length */
666 symbol = 3 + bits(s, 2);
667 }
668 else if (symbol == 17) /* repeat zero 3..10 times */
669 symbol = 3 + bits(s, 3);
670 else /* == 18, repeat zero 11..138 times */
671 symbol = 11 + bits(s, 7);
672 if (index + symbol > nlen + ndist)
673 return -6; /* too many lengths! */
674 while (symbol--) /* repeat last or zero symbol times */
675 lengths[index++] = len;
676 }
677 }
678
679 /* build huffman table for literal/length codes */
680 err = construct(&lencode, lengths, nlen);
681 if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1))
682 return -7; /* only allow incomplete codes if just one code */
683
684 /* build huffman table for distance codes */
685 err = construct(&distcode, lengths + nlen, ndist);
686 if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1))
687 return -8; /* only allow incomplete codes if just one code */
688
689 /* decode data until end-of-block code */
690 return codes(s, &lencode, &distcode);
691}
692
693/*
694 * Inflate source to dest. On return, destlen and sourcelen are updated to the
695 * size of the uncompressed data and the size of the deflate data respectively.
696 * On success, the return value of puff() is zero. If there is an error in the
697 * source data, i.e. it is not in the deflate format, then a negative value is
698 * returned. If there is not enough input available or there is not enough
699 * output space, then a positive error is returned. In that case, destlen and
700 * sourcelen are not updated to facilitate retrying from the beginning with the
701 * provision of more input data or more output space. In the case of invalid
702 * inflate data (a negative error), the dest and source pointers are updated to
703 * facilitate the debugging of deflators.
704 *
705 * puff() also has a mode to determine the size of the uncompressed output with
706 * no output written. For this dest must be (unsigned char *)0. In this case,
707 * the input value of *destlen is ignored, and on return *destlen is set to the
708 * size of the uncompressed output.
709 *
710 * The return codes are:
711 *
712 * 2: available inflate data did not terminate
713 * 1: output space exhausted before completing inflate
714 * 0: successful inflate
715 * -1: invalid block type (type == 3)
716 * -2: stored block length did not match one's complement
717 * -3: dynamic block code description: too many length or distance codes
718 * -4: dynamic block code description: code lengths codes incomplete
719 * -5: dynamic block code description: repeat lengths with no first length
720 * -6: dynamic block code description: repeat more than specified lengths
721 * -7: dynamic block code description: invalid literal/length code lengths
722 * -8: dynamic block code description: invalid distance code lengths
723 * -9: invalid literal/length or distance code in fixed or dynamic block
724 * -10: distance is too far back in fixed or dynamic block
725 *
726 * Format notes:
727 *
728 * - Three bits are read for each block to determine the kind of block and
729 * whether or not it is the last block. Then the block is decoded and the
730 * process repeated if it was not the last block.
731 *
732 * - The leftover bits in the last byte of the deflate data after the last
733 * block (if it was a fixed or dynamic block) are undefined and have no
734 * expected values to check.
735 */
736int puff(unsigned char *dest, /* pointer to destination pointer */
737 unsigned long *destlen, /* amount of output space */
738 unsigned char *source, /* pointer to source data pointer */
739 unsigned long *sourcelen) /* amount of input available */
740{
741 struct state s; /* input/output state */
742 int last, type; /* block information */
743 int err; /* return value */
744
745 /* initialize output state */
746 s.out = dest;
747 s.outlen = *destlen; /* ignored if dest is NIL */
748 s.outcnt = 0;
749
750 /* initialize input state */
751 s.in = source;
752 s.inlen = *sourcelen;
753 s.incnt = 0;
754 s.bitbuf = 0;
755 s.bitcnt = 0;
756
757 /* return if bits() or decode() tries to read past available input */
758 if (setjmp(s.env) != 0) /* if came back here via longjmp() */
759 err = 2; /* then skip do-loop, return error */
760 else {
761 /* process blocks until last block or error */
762 do {
763 last = bits(&s, 1); /* one if last block */
764 type = bits(&s, 2); /* block type 0..3 */
765 err = type == 0 ? stored(&s) :
766 (type == 1 ? fixed(&s) :
767 (type == 2 ? dynamic(&s) :
768 -1)); /* type == 3, invalid */
769 if (err != 0) break; /* return with error */
770 } while (!last);
771 }
772
773 /* update the lengths and return */
774 if (err <= 0) {
775 *destlen = s.outcnt;
776 *sourcelen = s.incnt;
777 }
778 return err;
779}
780
781#ifdef TEST
782/* Example of how to use puff() */
783#include <stdio.h>
784#include <stdlib.h>
785#include <sys/types.h>
786#include <sys/stat.h>
787
788local unsigned char *yank(char *name, unsigned long *len)
789{
790 unsigned long size;
791 unsigned char *buf;
792 FILE *in;
793 struct stat s;
794
795 *len = 0;
796 if (stat(name, &s)) return NULL;
797 if ((s.st_mode & S_IFMT) != S_IFREG) return NULL;
798 size = (unsigned long)(s.st_size);
799 if (size == 0 || (off_t)size != s.st_size) return NULL;
800 in = fopen(name, "r");
801 if (in == NULL) return NULL;
802 buf = malloc(size);
803 if (buf != NULL && fread(buf, 1, size, in) != size) {
804 free(buf);
805 buf = NULL;
806 }
807 fclose(in);
808 *len = size;
809 return buf;
810}
811
812int main(int argc, char **argv)
813{
814 int ret;
815 unsigned char *source;
816 unsigned long len, sourcelen, destlen;
817
818 if (argc < 2) return 2;
819 source = yank(argv[1], &len);
820 if (source == NULL) return 2;
821 sourcelen = len;
822 ret = puff(NIL, &destlen, source, &sourcelen);
823 if (ret)
824 printf("puff() failed with return code %d\n", ret);
825 else {
826 printf("puff() succeeded uncompressing %lu bytes\n", destlen);
827 if (sourcelen < len) printf("%lu compressed bytes unused\n",
828 len - sourcelen);
829 }
830 free(source);
831 return ret;
832}
833#endif
diff --git a/contrib/puff/puff.h b/contrib/puff/puff.h
new file mode 100644
index 0000000..41ea7e1
--- /dev/null
+++ b/contrib/puff/puff.h
@@ -0,0 +1,31 @@
1/* puff.h
2 Copyright (C) 2002, 2003 Mark Adler, all rights reserved
3 version 1.7, 3 Mar 2002
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the author be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20
21 Mark Adler madler@alumni.caltech.edu
22 */
23
24
25/*
26 * See puff.c for purpose and usage.
27 */
28int puff(unsigned char *dest, /* pointer to destination pointer */
29 unsigned long *destlen, /* amount of output space */
30 unsigned char *source, /* pointer to source data pointer */
31 unsigned long *sourcelen); /* amount of input available */
diff --git a/contrib/puff/zeros.raw b/contrib/puff/zeros.raw
new file mode 100644
index 0000000..637b7be
--- /dev/null
+++ b/contrib/puff/zeros.raw
Binary files differ