aboutsummaryrefslogtreecommitdiff
path: root/inffast.c
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-09 23:21:47 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-09 23:21:47 -0700
commit7c2a874e50b871d04fbd19501f7b42cff55e5abc (patch)
tree1879cd29182ababb17cde77cee5ce74505db4006 /inffast.c
parenta383133c4e7b93113cee912f213cf9502d785fa7 (diff)
downloadzlib-7c2a874e50b871d04fbd19501f7b42cff55e5abc.tar.gz
zlib-7c2a874e50b871d04fbd19501f7b42cff55e5abc.tar.bz2
zlib-7c2a874e50b871d04fbd19501f7b42cff55e5abc.zip
zlib 1.2.0v1.2.0
Diffstat (limited to 'inffast.c')
-rw-r--r--inffast.c439
1 files changed, 277 insertions, 162 deletions
diff --git a/inffast.c b/inffast.c
index aa7f1d4..8d145c2 100644
--- a/inffast.c
+++ b/inffast.c
@@ -1,183 +1,298 @@
1/* inffast.c -- process literals and length/distance pairs fast 1/* inffast.c -- fast decoding
2 * Copyright (C) 1995-2002 Mark Adler 2 * Copyright (C) 1995-2003 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h 3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */ 4 */
5 5
6#include "zutil.h" 6#include "zutil.h"
7#include "inftrees.h" 7#include "inftrees.h"
8#include "infblock.h" 8#include "inflate.h"
9#include "infcodes.h"
10#include "infutil.h"
11#include "inffast.h" 9#include "inffast.h"
12 10
13struct inflate_codes_state {int dummy;}; /* for buggy compilers */ 11/* Allow machine dependent optimization for post-increment or pre-increment.
12 Based on testing to date,
13 Pre-increment preferred for:
14 - PowerPC G3 (Adler)
15 - MIPS R5000 (Randers-Pehrson)
16 Post-increment preferred for:
17 - none
18 No measurable difference:
19 - Pentium III (Anderson)
20 */
21#ifdef POSTINC
22# define OFF 0
23# define PUP(a) *(a)++
24#else
25# define OFF 1
26# define PUP(a) *++(a)
27#endif
14 28
15/* simplify the use of the inflate_huft type with some defines */ 29/*
16#define exop word.what.Exop 30 Decode literal, length, and distance codes and write out the resulting
17#define bits word.what.Bits 31 literal and match bytes until either not enough input or output is
32 available, an end-of-block is encountered, or a data error is encountered.
33 When large enough input and output buffers are supplied to inflate(), for
34 example, a 16K input buffer and a 64K output buffer, more than 95% of the
35 inflate execution time is spent in this routine.
18 36
19/* macros for bit input with no checking and for returning unused bytes */ 37 Entry assumptions:
20#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}}
21#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;}
22 38
23/* Called with number of bytes left to write in window at least 258 39 state->mode == LEN
24 (the maximum string length) and number of input bytes available 40 strm->avail_in >= 6
25 at least ten. The ten bytes are six bytes for the longest length/ 41 strm->avail_out >= 258
26 distance pair plus four bytes for overloading the bit buffer. */ 42 start >= strm->avail_out
43 state->bits < 8
27 44
28int inflate_fast(bl, bd, tl, td, s, z) 45 On return, state->mode is one of:
29uInt bl, bd;
30inflate_huft *tl;
31inflate_huft *td; /* need separate declaration for Borland C++ */
32inflate_blocks_statef *s;
33z_streamp z;
34{
35 inflate_huft *t; /* temporary pointer */
36 uInt e; /* extra bits or operation */
37 uLong b; /* bit buffer */
38 uInt k; /* bits in bit buffer */
39 Bytef *p; /* input data pointer */
40 uInt n; /* bytes available there */
41 Bytef *q; /* output window write pointer */
42 uInt m; /* bytes to end of window or read pointer */
43 uInt ml; /* mask for literal/length tree */
44 uInt md; /* mask for distance tree */
45 uInt c; /* bytes to copy */
46 uInt d; /* distance back to copy from */
47 Bytef *r; /* copy source pointer */
48 46
49 /* load input, output, bit values */ 47 LEN -- ran out of enough output space or enough available input
50 LOAD 48 TYPE -- reached end of block code, inflate() to interpret next block
49 BAD -- error in block data
51 50
52 /* initialize masks */ 51 Notes:
53 ml = inflate_mask[bl];
54 md = inflate_mask[bd];
55 52
56 /* do until not enough input or output space for fast loop */ 53 - The maximum input bits used by a length/distance pair is 15 bits for the
57 do { /* assume called with m >= 258 && n >= 10 */ 54 length code, 5 bits for the length extra, 15 bits for the distance code,
58 /* get literal/length code */ 55 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
59 GRABBITS(20) /* max bits for literal/length code */ 56 Therefore if strm->avail_in >= 6, then there is enough input to avoid
60 if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) 57 checking for available input while decoding.
61 { 58
62 DUMPBITS(t->bits) 59 - The maximum bytes that a single length/distance pair can output is 258
63 Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? 60 bytes, which is the maximum length that can be coded. inflate_fast()
64 "inflate: * literal '%c'\n" : 61 requires strm->avail_out >= 258 for each loop to avoid checking for
65 "inflate: * literal 0x%02x\n", t->base)); 62 output space.
66 *q++ = (Byte)t->base; 63 */
67 m--; 64void inflate_fast(strm, start)
68 continue; 65z_streamp strm;
69 } 66unsigned start; /* inflate()'s starting value for strm->avail_out */
70 do { 67{
71 DUMPBITS(t->bits) 68 struct inflate_state FAR *state;
72 if (e & 16) 69 unsigned char FAR *in; /* local strm->next_in */
73 { 70 unsigned char FAR *last; /* while in < last, enough input available */
74 /* get extra bits for length */ 71 unsigned char FAR *out; /* local strm->next_out */
75 e &= 15; 72 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
76 c = t->base + ((uInt)b & inflate_mask[e]); 73 unsigned char FAR *end; /* while out < end, enough space available */
77 DUMPBITS(e) 74 unsigned wsize; /* window size or zero if not using window */
78 Tracevv((stderr, "inflate: * length %u\n", c)); 75 unsigned write; /* window write index */
76 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
77 unsigned long hold; /* local strm->hold */
78 unsigned bits; /* local strm->bits */
79 code const FAR *lcode; /* local strm->lencode */
80 code const FAR *dcode; /* local strm->distcode */
81 unsigned lmask; /* mask for first level of length codes */
82 unsigned dmask; /* mask for first level of distance codes */
83 code this; /* retrieved table entry */
84 unsigned op; /* code bits, operation, extra bits, or */
85 /* window position, window bytes to copy */
86 unsigned len; /* match length, unused bytes */
87 unsigned dist; /* match distance */
88 unsigned char FAR *from; /* where to copy match from */
79 89
80 /* decode distance base of block to copy */ 90 /* copy state to local variables */
81 GRABBITS(15); /* max bits for distance code */ 91 state = (struct inflate_state FAR *)strm->state;
82 e = (t = td + ((uInt)b & md))->exop; 92 in = strm->next_in - OFF;
83 do { 93 last = in + (strm->avail_in - 5);
84 DUMPBITS(t->bits) 94 out = strm->next_out - OFF;
85 if (e & 16) 95 beg = out - (start - strm->avail_out);
86 { 96 end = out + (strm->avail_out - 257);
87 /* get extra bits to add to distance base */ 97 wsize = state->wsize;
88 e &= 15; 98 write = state->write;
89 GRABBITS(e) /* get extra bits (up to 13) */ 99 window = state->window;
90 d = t->base + ((uInt)b & inflate_mask[e]); 100 hold = state->hold;
91 DUMPBITS(e) 101 bits = state->bits;
92 Tracevv((stderr, "inflate: * distance %u\n", d)); 102 lcode = state->lencode;
103 dcode = state->distcode;
104 lmask = (1U << state->lenbits) - 1;
105 dmask = (1U << state->distbits) - 1;
93 106
94 /* do the copy */ 107 /* decode literals and length/distances until end-of-block or not enough
95 m -= c; 108 input data or output space */
96 r = q - d; 109 do {
97 if (r < s->window) /* wrap if needed */ 110 if (bits < 15) {
98 { 111 hold += (unsigned long)(PUP(in)) << bits;
99 do { 112 bits += 8;
100 r += s->end - s->window; /* force pointer in window */ 113 hold += (unsigned long)(PUP(in)) << bits;
101 } while (r < s->window); /* covers invalid distances */ 114 bits += 8;
102 e = s->end - r; 115 }
103 if (c > e) 116 this = lcode[hold & lmask];
104 { 117 dolen:
105 c -= e; /* wrapped copy */ 118 op = (unsigned)(this.bits);
106 do { 119 hold >>= op;
107 *q++ = *r++; 120 bits -= op;
108 } while (--e); 121 op = (unsigned)(this.op);
109 r = s->window; 122 if (op == 0) { /* literal */
110 do { 123 Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
111 *q++ = *r++; 124 "inflate: literal '%c'\n" :
112 } while (--c); 125 "inflate: literal 0x%02x\n", this.val));
113 } 126 PUP(out) = (unsigned char)(this.val);
114 else /* normal copy */ 127 }
115 { 128 else if (op & 16) { /* length base */
116 *q++ = *r++; c--; 129 len = (unsigned)(this.val);
117 *q++ = *r++; c--; 130 op &= 15; /* number of extra bits */
118 do { 131 if (op) {
119 *q++ = *r++; 132 if (bits < op) {
120 } while (--c); 133 hold += (unsigned long)(PUP(in)) << bits;
121 } 134 bits += 8;
135 }
136 len += hold & ((1U << op) - 1);
137 hold >>= op;
138 bits -= op;
139 }
140 Tracevv((stderr, "inflate: length %u\n", len));
141 if (bits < 15) {
142 hold += (unsigned long)(PUP(in)) << bits;
143 bits += 8;
144 hold += (unsigned long)(PUP(in)) << bits;
145 bits += 8;
122 } 146 }
123 else /* normal copy */ 147 this = dcode[hold & dmask];
124 { 148 dodist:
125 *q++ = *r++; c--; 149 op = (unsigned)(this.bits);
126 *q++ = *r++; c--; 150 hold >>= op;
127 do { 151 bits -= op;
128 *q++ = *r++; 152 op = (unsigned)(this.op);
129 } while (--c); 153 if (op & 16) { /* distance base */
154 dist = (unsigned)(this.val);
155 op &= 15; /* number of extra bits */
156 if (bits < op) {
157 hold += (unsigned long)(PUP(in)) << bits;
158 bits += 8;
159 if (bits < op) {
160 hold += (unsigned long)(PUP(in)) << bits;
161 bits += 8;
162 }
163 }
164 dist += hold & ((1U << op) - 1);
165 hold >>= op;
166 bits -= op;
167 Tracevv((stderr, "inflate: distance %u\n", dist));
168 op = (unsigned)(out - beg); /* max distance in output */
169 if (dist > op) { /* see if copy from window */
170 if (dist > wsize) {
171 strm->msg = (char *)"invalid distance too far back";
172 state->mode = BAD;
173 break;
174 }
175 from = window - OFF;
176 op = dist - op; /* distance back in window */
177 if (write == 0) { /* very common case */
178 from += wsize - op;
179 if (op < len) { /* some from window */
180 len -= op;
181 do {
182 PUP(out) = PUP(from);
183 } while (--op);
184 from = out - dist; /* rest from output */
185 }
186 }
187 else if (write < op) { /* wrap around window */
188 from += wsize + write - op;
189 op -= write;
190 if (op < len) { /* some from end of window */
191 len -= op;
192 do {
193 PUP(out) = PUP(from);
194 } while (--op);
195 from = window - OFF;
196 if (write < len) { /* some from start of window */
197 op = write;
198 len -= op;
199 do {
200 PUP(out) = PUP(from);
201 } while (--op);
202 from = out - dist; /* rest from output */
203 }
204 }
205 }
206 else { /* contiguous in window */
207 from += write - op;
208 if (op < len) { /* some from window */
209 len -= op;
210 do {
211 PUP(out) = PUP(from);
212 } while (--op);
213 from = out - dist; /* rest from output */
214 }
215 }
216 while (len > 2) {
217 PUP(out) = PUP(from);
218 PUP(out) = PUP(from);
219 PUP(out) = PUP(from);
220 len -= 3;
221 }
222 if (len) {
223 PUP(out) = PUP(from);
224 if (len > 1)
225 PUP(out) = PUP(from);
226 }
227 }
228 else {
229 from = out - dist; /* copy direct from output */
230 do { /* minimum length is three */
231 PUP(out) = PUP(from);
232 PUP(out) = PUP(from);
233 PUP(out) = PUP(from);
234 len -= 3;
235 } while (len > 2);
236 if (len) {
237 PUP(out) = PUP(from);
238 if (len > 1)
239 PUP(out) = PUP(from);
240 }
241 }
130 } 242 }
243 else if ((op & 64) == 0) { /* 2nd level distance code */
244 this = dcode[this.val + (hold & ((1U << op) - 1))];
245 goto dodist;
246 }
247 else {
248 strm->msg = (char *)"invalid distance code";
249 state->mode = BAD;
250 break;
251 }
252 }
253 else if ((op & 64) == 0) { /* 2nd level length code */
254 this = lcode[this.val + (hold & ((1U << op) - 1))];
255 goto dolen;
256 }
257 else if (op & 32) { /* end-of-block */
258 Tracevv((stderr, "inflate: end of block\n"));
259 state->mode = TYPE;
131 break; 260 break;
132 }
133 else if ((e & 64) == 0)
134 {
135 t += t->base;
136 e = (t += ((uInt)b & inflate_mask[e]))->exop;
137 }
138 else
139 {
140 z->msg = (char*)"invalid distance code";
141 UNGRAB
142 UPDATE
143 return Z_DATA_ERROR;
144 }
145 } while (1);
146 break;
147 }
148 if ((e & 64) == 0)
149 {
150 t += t->base;
151 if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0)
152 {
153 DUMPBITS(t->bits)
154 Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
155 "inflate: * literal '%c'\n" :
156 "inflate: * literal 0x%02x\n", t->base));
157 *q++ = (Byte)t->base;
158 m--;
159 break;
160 } 261 }
161 } 262 else {
162 else if (e & 32) 263 strm->msg = (char *)"invalid literal/length code";
163 { 264 state->mode = BAD;
164 Tracevv((stderr, "inflate: * end of block\n")); 265 break;
165 UNGRAB 266 }
166 UPDATE 267 } while (in < last && out < end);
167 return Z_STREAM_END; 268
168 } 269 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
169 else 270 len = bits >> 3;
170 { 271 in -= len;
171 z->msg = (char*)"invalid literal/length code"; 272 bits -= len << 3;
172 UNGRAB 273 hold &= (1U << bits) - 1;
173 UPDATE
174 return Z_DATA_ERROR;
175 }
176 } while (1);
177 } while (m >= 258 && n >= 10);
178 274
179 /* not enough input or output--restore pointers and return */ 275 /* update state and return */
180 UNGRAB 276 strm->next_in = in + OFF;
181 UPDATE 277 strm->next_out = out + OFF;
182 return Z_OK; 278 strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
279 strm->avail_out = (unsigned)(out < end ?
280 257 + (end - out) : 257 - (out - end));
281 state->hold = hold;
282 state->bits = bits;
283 return;
183} 284}
285
286/*
287 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
288 - Using bit fields for code structure
289 - Different op definition to avoid & for extra bits (do & for table bits)
290 - Three separate decoding do-loops for direct, window, and write == 0
291 - Special case for distance > 1 copies to do overlapped load and store copy
292 - Explicit branch predictions (based on measured branch probabilities)
293 - Deferring match copy and interspersed it with decoding subsequent codes
294 - Swapping literal/length else
295 - Swapping window/direct else
296 - Larger unrolled copy loops (three is about right)
297 - Moving len -= 3 statement into middle of loop
298 */