summaryrefslogtreecommitdiff
path: root/contrib/inflate86
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-09 23:21:57 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-09 23:21:57 -0700
commit13a294f044ef0a89b2dcbfbb5d4d4c792673348e (patch)
treed9b377b4d8c00633c3da4e96659bfba9b08287f2 /contrib/inflate86
parent7c2a874e50b871d04fbd19501f7b42cff55e5abc (diff)
downloadzlib-1.2.0.1.tar.gz
zlib-1.2.0.1.tar.bz2
zlib-1.2.0.1.zip
zlib 1.2.0.1v1.2.0.1
Diffstat (limited to 'contrib/inflate86')
-rw-r--r--contrib/inflate86/inffas86.c783
-rw-r--r--contrib/inflate86/inffast.S2472
2 files changed, 2160 insertions, 1095 deletions
diff --git a/contrib/inflate86/inffas86.c b/contrib/inflate86/inffas86.c
new file mode 100644
index 0000000..4534693
--- /dev/null
+++ b/contrib/inflate86/inffas86.c
@@ -0,0 +1,783 @@
1/* inffas86.c is a hand tuned assembler version of
2 *
3 * inffast.c -- fast decoding
4 * Copyright (C) 1995-2003 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
6 *
7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
8 * Please use the copyright conditions above.
9 *
10 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
11 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
12 * the moment. I have successfully compiled and tested this code with gcc2.96,
13 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
14 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
15 * enabled. I will attempt to merge the MMX code into this version. Newer
16 * versions of this and inffast.S can be found at
17 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
18 */
19
20#include "zutil.h"
21#include "inftrees.h"
22#include "inflate.h"
23#include "inffast.h"
24
25/* Mark Adler's comments from inffast.c: */
26
27/*
28 Decode literal, length, and distance codes and write out the resulting
29 literal and match bytes until either not enough input or output is
30 available, an end-of-block is encountered, or a data error is encountered.
31 When large enough input and output buffers are supplied to inflate(), for
32 example, a 16K input buffer and a 64K output buffer, more than 95% of the
33 inflate execution time is spent in this routine.
34
35 Entry assumptions:
36
37 state->mode == LEN
38 strm->avail_in >= 6
39 strm->avail_out >= 258
40 start >= strm->avail_out
41 state->bits < 8
42
43 On return, state->mode is one of:
44
45 LEN -- ran out of enough output space or enough available input
46 TYPE -- reached end of block code, inflate() to interpret next block
47 BAD -- error in block data
48
49 Notes:
50
51 - The maximum input bits used by a length/distance pair is 15 bits for the
52 length code, 5 bits for the length extra, 15 bits for the distance code,
53 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
54 Therefore if strm->avail_in >= 6, then there is enough input to avoid
55 checking for available input while decoding.
56
57 - The maximum bytes that a single length/distance pair can output is 258
58 bytes, which is the maximum length that can be coded. inflate_fast()
59 requires strm->avail_out >= 258 for each loop to avoid checking for
60 output space.
61 */
62void inflate_fast(strm, start)
63z_streamp strm;
64unsigned start; /* inflate()'s starting value for strm->avail_out */
65{
66 struct inflate_state FAR *state;
67 struct inffast_ar {
68 void *esp; /* esp save */
69 unsigned char FAR *in; /* local strm->next_in */
70 unsigned char FAR *last; /* while in < last, enough input available */
71 unsigned char FAR *out; /* local strm->next_out */
72 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
73 unsigned char FAR *end; /* while out < end, enough space available */
74 unsigned wsize; /* window size or zero if not using window */
75 unsigned write; /* window write index */
76 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
77 unsigned long hold; /* local strm->hold */
78 unsigned bits; /* local strm->bits */
79 code const FAR *lcode; /* local strm->lencode */
80 code const FAR *dcode; /* local strm->distcode */
81 unsigned lmask; /* mask for first level of length codes */
82 unsigned dmask; /* mask for first level of distance codes */
83 unsigned len; /* match length, unused bytes */
84 unsigned dist; /* match distance */
85 unsigned status; /* this is set when state changes */
86 } ar;
87
88 /* copy state to local variables */
89 state = (struct inflate_state FAR *)strm->state;
90 ar.in = strm->next_in;
91 ar.last = ar.in + (strm->avail_in - 5);
92 ar.out = strm->next_out;
93 ar.beg = ar.out - (start - strm->avail_out);
94 ar.end = ar.out + (strm->avail_out - 257);
95 ar.wsize = state->wsize;
96 ar.write = state->write;
97 ar.window = state->window;
98 ar.hold = state->hold;
99 ar.bits = state->bits;
100 ar.lcode = state->lencode;
101 ar.dcode = state->distcode;
102 ar.lmask = (1U << state->lenbits) - 1;
103 ar.dmask = (1U << state->distbits) - 1;
104
105 /* decode literals and length/distances until end-of-block or not enough
106 input data or output space */
107
108 /* align in on 2 byte boundary */
109 if (((unsigned long)(void *)ar.in & 0x1) != 0) {
110 ar.hold += (unsigned long)*ar.in++ << ar.bits;
111 ar.bits += 8;
112 }
113
114#if defined( __GNUC__ ) || defined( __ICC )
115 __asm__ __volatile__ (
116" leal %0, %%eax\n"
117" pushf\n"
118" pushl %%ebp\n"
119" movl %%esp, (%%eax)\n"
120" movl %%eax, %%esp\n"
121" movl 4(%%esp), %%esi\n" /* esi = in */
122" movl 12(%%esp), %%edi\n" /* edi = out */
123" movl 36(%%esp), %%edx\n" /* edx = hold */
124" movl 40(%%esp), %%ebx\n" /* ebx = bits */
125" movl 44(%%esp), %%ebp\n" /* ebp = lcode */
126
127" cld\n"
128" jmp .L_do_loop\n"
129
130".L_while_test:\n"
131" cmpl %%edi, 20(%%esp)\n"
132" jbe .L_break_loop\n"
133" cmpl %%esi, 8(%%esp)\n"
134" jbe .L_break_loop\n"
135
136".L_do_loop:\n"
137" cmpb $15, %%bl\n"
138" ja .L_get_length_code\n" /* if (15 < bits) */
139
140" xorl %%eax, %%eax\n"
141" lodsw\n" /* al = *(ushort *)in++ */
142" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
143" addb $16, %%bl\n" /* bits += 16 */
144" shll %%cl, %%eax\n"
145" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
146
147".L_get_length_code:\n"
148" movl 52(%%esp), %%eax\n" /* eax = lmask */
149" andl %%edx, %%eax\n" /* eax &= hold */
150" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
151
152".L_dolen:\n"
153" movb %%ah, %%cl\n" /* cl = this.bits */
154" subb %%ah, %%bl\n" /* bits -= this.bits */
155" shrl %%cl, %%edx\n" /* hold >>= this.bits */
156
157" testb %%al, %%al\n"
158" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
159
160" shrl $16, %%eax\n" /* output this.val char */
161" stosb\n"
162" jmp .L_while_test\n"
163
164".L_test_for_length_base:\n"
165" movl %%eax, %%ecx\n" /* len = this */
166" shrl $16, %%ecx\n" /* len = this.val */
167" movl %%ecx, 60(%%esp)\n" /* len = this */
168" movb %%al, %%cl\n"
169
170" testb $16, %%al\n"
171" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
172" andb $15, %%cl\n" /* op &= 15 */
173" jz .L_decode_distance\n" /* if (!op) */
174" cmpb %%cl, %%bl\n"
175" jae .L_add_bits_to_len\n" /* if (op <= bits) */
176
177" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
178" xorl %%eax, %%eax\n"
179" lodsw\n" /* al = *(ushort *)in++ */
180" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
181" addb $16, %%bl\n" /* bits += 16 */
182" shll %%cl, %%eax\n"
183" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
184" movb %%ch, %%cl\n" /* move op back to ecx */
185
186".L_add_bits_to_len:\n"
187" movl $1, %%eax\n"
188" shll %%cl, %%eax\n"
189" decl %%eax\n"
190" subb %%cl, %%bl\n"
191" andl %%edx, %%eax\n" /* eax &= hold */
192" shrl %%cl, %%edx\n"
193" addl %%eax, 60(%%esp)\n" /* len += hold & mask[op] */
194
195".L_decode_distance:\n"
196" cmpb $15, %%bl\n"
197" ja .L_get_distance_code\n" /* if (15 < bits) */
198
199" xorl %%eax, %%eax\n"
200" lodsw\n" /* al = *(ushort *)in++ */
201" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
202" addb $16, %%bl\n" /* bits += 16 */
203" shll %%cl, %%eax\n"
204" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
205
206".L_get_distance_code:\n"
207" movl 56(%%esp), %%eax\n" /* eax = dmask */
208" movl 48(%%esp), %%ecx\n" /* ecx = dcode */
209" andl %%edx, %%eax\n" /* eax &= hold */
210" movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
211
212".L_dodist:\n"
213" movl %%eax, %%ebp\n" /* dist = this */
214" shrl $16, %%ebp\n" /* dist = this.val */
215" movb %%ah, %%cl\n"
216" subb %%ah, %%bl\n" /* bits -= this.bits */
217" shrl %%cl, %%edx\n" /* hold >>= this.bits */
218" movb %%al, %%cl\n" /* cl = this.op */
219
220" testb $16, %%al\n" /* if ((op & 16) == 0) */
221" jz .L_test_for_second_level_dist\n"
222" andb $15, %%cl\n" /* op &= 15 */
223" jz .L_check_dist_one\n"
224" cmpb %%cl, %%bl\n"
225" jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
226
227" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
228" xorl %%eax, %%eax\n"
229" lodsw\n" /* al = *(ushort *)in++ */
230" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
231" addb $16, %%bl\n" /* bits += 16 */
232" shll %%cl, %%eax\n"
233" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
234" movb %%ch, %%cl\n" /* move op back to ecx */
235
236".L_add_bits_to_dist:\n"
237" movl $1, %%eax\n"
238" shll %%cl, %%eax\n"
239" decl %%eax\n" /* (1 << op) - 1 */
240" subb %%cl, %%bl\n"
241" andl %%edx, %%eax\n" /* eax &= hold */
242" shrl %%cl, %%edx\n"
243" addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
244
245".L_check_window:\n"
246" movl %%esi, 4(%%esp)\n" /* save in so from can use it's reg */
247" movl %%edi, %%eax\n"
248" subl 16(%%esp), %%eax\n" /* nbytes = out - beg */
249
250" cmpl %%ebp, %%eax\n"
251" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
252
253" movl 60(%%esp), %%ecx\n"
254" movl %%edi, %%esi\n"
255" subl %%ebp, %%esi\n" /* from = out - dist */
256
257" subl $3, %%ecx\n" /* copy from to out */
258" movb (%%esi), %%al\n"
259" movb %%al, (%%edi)\n"
260" movb 1(%%esi), %%al\n"
261" movb 2(%%esi), %%ah\n"
262" addl $3, %%esi\n"
263" movb %%al, 1(%%edi)\n"
264" movb %%ah, 2(%%edi)\n"
265" addl $3, %%edi\n"
266" rep movsb\n"
267
268" movl 4(%%esp), %%esi\n" /* move in back to %esi, toss from */
269" movl 44(%%esp), %%ebp\n" /* ebp = lcode */
270" jmp .L_while_test\n"
271
272".L_check_dist_one:\n"
273" cmpl $1, %%ebp\n" /* if dist 1, is a memset */
274" jne .L_check_window\n"
275" cmpl %%edi, 16(%%esp)\n"
276" je .L_check_window\n"
277
278" decl %%edi\n"
279" movl 60(%%esp), %%ecx\n"
280" movb (%%edi), %%al\n"
281" subl $3, %%ecx\n"
282
283" movb %%al, 1(%%edi)\n" /* memset out with from[-1] */
284" movb %%al, 2(%%edi)\n"
285" movb %%al, 3(%%edi)\n"
286" addl $4, %%edi\n"
287" rep stosb\n"
288" movl 44(%%esp), %%ebp\n" /* ebp = lcode */
289" jmp .L_while_test\n"
290
291".L_test_for_second_level_length:\n"
292" testb $64, %%al\n"
293" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
294
295" movl $1, %%eax\n"
296" shll %%cl, %%eax\n"
297" decl %%eax\n"
298" andl %%edx, %%eax\n" /* eax &= hold */
299" addl 60(%%esp), %%eax\n" /* eax += this.val */
300" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
301" jmp .L_dolen\n"
302
303".L_test_for_second_level_dist:\n"
304" testb $64, %%al\n"
305" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
306
307" movl $1, %%eax\n"
308" shll %%cl, %%eax\n"
309" decl %%eax\n"
310" andl %%edx, %%eax\n" /* eax &= hold */
311" addl %%ebp, %%eax\n" /* eax += this.val */
312" movl 48(%%esp), %%ecx\n" /* ecx = dcode */
313" movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
314" jmp .L_dodist\n"
315
316".L_clip_window:\n"
317" movl %%eax, %%ecx\n"
318" movl 24(%%esp), %%eax\n" /* prepare for dist compare */
319" negl %%ecx\n" /* nbytes = -nbytes */
320" movl 32(%%esp), %%esi\n" /* from = window */
321
322" cmpl %%ebp, %%eax\n"
323" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
324
325" addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
326" cmpl $0, 28(%%esp)\n"
327" jne .L_wrap_around_window\n" /* if (write != 0) */
328
329" subl %%ecx, %%eax\n"
330" addl %%eax, %%esi\n" /* from += wsize - nbytes */
331
332" movl 60(%%esp), %%eax\n"
333" cmpl %%ecx, %%eax\n"
334" jbe .L_do_copy1\n" /* if (nbytes >= len) */
335
336" subl %%ecx, %%eax\n" /* len -= nbytes */
337" rep movsb\n"
338" movl %%edi, %%esi\n"
339" subl %%ebp, %%esi\n" /* from = out - dist */
340" jmp .L_do_copy1\n"
341
342" cmpl %%ecx, %%eax\n"
343" jbe .L_do_copy1\n" /* if (nbytes >= len) */
344
345" subl %%ecx, %%eax\n" /* len -= nbytes */
346" rep movsb\n"
347" movl %%edi, %%esi\n"
348" subl %%ebp, %%esi\n" /* from = out - dist */
349" jmp .L_do_copy1\n"
350
351".L_wrap_around_window:\n"
352" movl 28(%%esp), %%eax\n"
353" cmpl %%eax, %%ecx\n"
354" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
355
356" addl 24(%%esp), %%esi\n"
357" addl %%eax, %%esi\n"
358" subl %%ecx, %%esi\n" /* from += wsize + write - nbytes */
359" subl %%eax, %%ecx\n" /* nbytes -= write */
360
361" movl 60(%%esp), %%eax\n"
362" cmpl %%ecx, %%eax\n"
363" jbe .L_do_copy1\n" /* if (nbytes >= len) */
364
365" subl %%ecx, %%eax\n" /* len -= nbytes */
366" rep movsb\n"
367" movl 32(%%esp), %%esi\n" /* from = window */
368" movl 28(%%esp), %%ecx\n" /* nbytes = write */
369" cmpl %%ecx, %%eax\n"
370" jbe .L_do_copy1\n" /* if (nbytes >= len) */
371
372" subl %%ecx, %%eax\n" /* len -= nbytes */
373" rep movsb\n"
374" movl %%edi, %%esi\n"
375" subl %%ebp, %%esi\n" /* from = out - dist */
376" jmp .L_do_copy1\n"
377
378".L_contiguous_in_window:\n"
379" addl %%eax, %%esi\n"
380" subl %%ecx, %%esi\n" /* from += write - nbytes */
381
382" movl 60(%%esp), %%eax\n"
383" cmpl %%ecx, %%eax\n"
384" jbe .L_do_copy1\n" /* if (nbytes >= len) */
385
386" subl %%ecx, %%eax\n" /* len -= nbytes */
387" rep movsb\n"
388" movl %%edi, %%esi\n"
389" subl %%ebp, %%esi\n" /* from = out - dist */
390
391".L_do_copy1:\n"
392" movl %%eax, %%ecx\n"
393" rep movsb\n"
394
395" movl 4(%%esp), %%esi\n" /* move in back to %esi, toss from */
396" movl 44(%%esp), %%ebp\n" /* ebp = lcode */
397" jmp .L_while_test\n"
398
399".L_test_for_end_of_block:\n"
400" testb $32, %%al\n"
401" jz .L_invalid_literal_length_code\n"
402" movl $1, 68(%%esp)\n"
403" jmp .L_break_loop_with_status\n"
404
405".L_invalid_literal_length_code:\n"
406" movl $2, 68(%%esp)\n"
407" jmp .L_break_loop_with_status\n"
408
409".L_invalid_distance_code:\n"
410" movl $3, 68(%%esp)\n"
411" jmp .L_break_loop_with_status\n"
412
413".L_invalid_distance_too_far:\n"
414" movl 4(%%esp), %%esi\n"
415" movl $4, 68(%%esp)\n"
416" jmp .L_break_loop_with_status\n"
417
418".L_break_loop:\n"
419" movl $0, 68(%%esp)\n"
420
421".L_break_loop_with_status:\n"
422/* put in, out, bits, and hold back into ar and pop esp */
423" movl %%esi, 4(%%esp)\n"
424" movl %%edi, 12(%%esp)\n"
425" movl %%ebx, 40(%%esp)\n"
426" movl %%edx, 36(%%esp)\n"
427" movl (%%esp), %%esp\n"
428" popl %%ebp\n"
429" popf\n"
430 :
431 : "m" (ar)
432 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
433 );
434#elif defined( _MSC_VER )
435 __asm {
436 lea eax, ar
437 pushfd
438 push ebp
439 mov [eax], esp
440 mov esp, eax
441 mov esi, [esp+4] /* esi = in */
442 mov edi, [esp+12] /* edi = out */
443 mov edx, [esp+36] /* edx = hold */
444 mov ebx, [esp+40] /* ebx = bits */
445 mov ebp, [esp+44] /* ebp = lcode */
446
447 cld
448 jmp L_do_loop
449
450L_while_test:
451 cmp [esp+20], edi
452 jbe L_break_loop
453 cmp [esp+8], esi
454 jbe L_break_loop
455
456L_do_loop:
457 cmp bl, 15
458 ja L_get_length_code /* if (15 < bits) */
459
460 xor eax, eax
461 lodsw /* al = *(ushort *)in++ */
462 mov cl, bl /* cl = bits, needs it for shifting */
463 add bl, 16 /* bits += 16 */
464 shl eax, cl
465 or edx, eax /* hold |= *((ushort *)in)++ << bits */
466
467L_get_length_code:
468 mov eax, [esp+52] /* eax = lmask */
469 and eax, edx /* eax &= hold */
470 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
471
472L_dolen:
473 mov cl, ah /* cl = this.bits */
474 sub bl, ah /* bits -= this.bits */
475 shr edx, cl /* hold >>= this.bits */
476
477 test al, al
478 jnz L_test_for_length_base /* if (op != 0) 45.7% */
479
480 shr eax, 16 /* output this.val char */
481 stosb
482 jmp L_while_test
483
484L_test_for_length_base:
485 mov ecx, eax /* len = this */
486 shr ecx, 16 /* len = this.val */
487 mov [esp+60], ecx /* len = this */
488 mov cl, al
489
490 test al, 16
491 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
492 and cl, 15 /* op &= 15 */
493 jz L_decode_distance /* if (!op) */
494 cmp bl, cl
495 jae L_add_bits_to_len /* if (op <= bits) */
496
497 mov ch, cl /* stash op in ch, freeing cl */
498 xor eax, eax
499 lodsw /* al = *(ushort *)in++ */
500 mov cl, bl /* cl = bits, needs it for shifting */
501 add bl, 16 /* bits += 16 */
502 shl eax, cl
503 or edx, eax /* hold |= *((ushort *)in)++ << bits */
504 mov cl, ch /* move op back to ecx */
505
506L_add_bits_to_len:
507 mov eax, 1
508 shl eax, cl
509 dec eax
510 sub bl, cl
511 and eax, edx /* eax &= hold */
512 shr edx, cl
513 add [esp+60], eax /* len += hold & mask[op] */
514
515L_decode_distance:
516 cmp bl, 15
517 ja L_get_distance_code /* if (15 < bits) */
518
519 xor eax, eax
520 lodsw /* al = *(ushort *)in++ */
521 mov cl, bl /* cl = bits, needs it for shifting */
522 add bl, 16 /* bits += 16 */
523 shl eax, cl
524 or edx, eax /* hold |= *((ushort *)in)++ << bits */
525
526L_get_distance_code:
527 mov eax, [esp+56] /* eax = dmask */
528 mov ecx, [esp+48] /* ecx = dcode */
529 and eax, edx /* eax &= hold */
530 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
531
532L_dodist:
533 mov ebp, eax /* dist = this */
534 shr ebp, 16 /* dist = this.val */
535 mov cl, ah
536 sub bl, ah /* bits -= this.bits */
537 shr edx, cl /* hold >>= this.bits */
538 mov cl, al /* cl = this.op */
539
540 test al, 16 /* if ((op & 16) == 0) */
541 jz L_test_for_second_level_dist
542 and cl, 15 /* op &= 15 */
543 jz L_check_dist_one
544 cmp bl, cl
545 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
546
547 mov ch, cl /* stash op in ch, freeing cl */
548 xor eax, eax
549 lodsw /* al = *(ushort *)in++ */
550 mov cl, bl /* cl = bits, needs it for shifting */
551 add bl, 16 /* bits += 16 */
552 shl eax, cl
553 or edx, eax /* hold |= *((ushort *)in)++ << bits */
554 mov cl, ch /* move op back to ecx */
555
556L_add_bits_to_dist:
557 mov eax, 1
558 shl eax, cl
559 dec eax /* (1 << op) - 1 */
560 sub bl, cl
561 and eax, edx /* eax &= hold */
562 shr edx, cl
563 add ebp, eax /* dist += hold & ((1 << op) - 1) */
564
565L_check_window:
566 mov [esp+4], esi /* save in so from can use it's reg */
567 mov eax, edi
568 sub eax, [esp+16] /* nbytes = out - beg */
569
570 cmp eax, ebp
571 jb L_clip_window /* if (dist > nbytes) 4.2% */
572
573 mov ecx, [esp+60]
574 mov esi, edi
575 sub esi, ebp /* from = out - dist */
576
577 sub ecx, 3 /* copy from to out */
578 mov al, [esi]
579 mov [edi], al
580 mov al, [esi+1]
581 mov ah, [esi+2]
582 add esi, 3
583 mov [edi+1], al
584 mov [edi+2], ah
585 add edi, 3
586 rep movsb
587
588 mov esi, [esp+4] /* move in back to %esi, toss from */
589 mov ebp, [esp+44] /* ebp = lcode */
590 jmp L_while_test
591
592L_check_dist_one:
593 cmp ebp, 1 /* if dist 1, is a memset */
594 jne L_check_window
595 cmp [esp+16], edi
596 je L_check_window
597
598 dec edi
599 mov ecx, [esp+60]
600 mov al, [edi]
601 sub ecx, 3
602
603 mov [edi+1], al /* memset out with from[-1] */
604 mov [edi+2], al
605 mov [edi+3], al
606 add edi, 4
607 rep stosb
608 mov ebp, [esp+44] /* ebp = lcode */
609 jmp L_while_test
610
611L_test_for_second_level_length:
612 test al, 64
613 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
614
615 mov eax, 1
616 shl eax, cl
617 dec eax
618 and eax, edx /* eax &= hold */
619 add eax, [esp+60] /* eax += this.val */
620 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
621 jmp L_dolen
622
623L_test_for_second_level_dist:
624 test al, 64
625 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
626
627 mov eax, 1
628 shl eax, cl
629 dec eax
630 and eax, edx /* eax &= hold */
631 add eax, ebp /* eax += this.val */
632 mov ecx, [esp+48] /* ecx = dcode */
633 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
634 jmp L_dodist
635
636L_clip_window:
637 mov ecx, eax
638 mov eax, [esp+24] /* prepare for dist compare */
639 neg ecx /* nbytes = -nbytes */
640 mov esi, [esp+32] /* from = window */
641
642 cmp eax, ebp
643 jb L_invalid_distance_too_far /* if (dist > wsize) */
644
645 add ecx, ebp /* nbytes = dist - nbytes */
646 cmp dword ptr [esp+28], 0
647 jne L_wrap_around_window /* if (write != 0) */
648
649 sub eax, ecx
650 add esi, eax /* from += wsize - nbytes */
651
652 mov eax, [esp+60]
653 cmp eax, ecx
654 jbe L_do_copy1 /* if (nbytes >= len) */
655
656 sub eax, ecx /* len -= nbytes */
657 rep movsb
658 mov esi, edi
659 sub esi, ebp /* from = out - dist */
660 jmp L_do_copy1
661
662 cmp eax, ecx
663 jbe L_do_copy1 /* if (nbytes >= len) */
664
665 sub eax, ecx /* len -= nbytes */
666 rep movsb
667 mov esi, edi
668 sub esi, ebp /* from = out - dist */
669 jmp L_do_copy1
670
671L_wrap_around_window:
672 mov eax, [esp+28]
673 cmp ecx, eax
674 jbe L_contiguous_in_window /* if (write >= nbytes) */
675
676 add esi, [esp+24]
677 add esi, eax
678 sub esi, ecx /* from += wsize + write - nbytes */
679 sub ecx, eax /* nbytes -= write */
680
681 mov eax, [esp+60]
682 cmp eax, ecx
683 jbe L_do_copy1 /* if (nbytes >= len) */
684
685 sub eax, ecx /* len -= nbytes */
686 rep movsb
687 mov esi, [esp+32] /* from = window */
688 mov ecx, [esp+28] /* nbytes = write */
689 cmp eax, ecx
690 jbe L_do_copy1 /* if (nbytes >= len) */
691
692 sub eax, ecx /* len -= nbytes */
693 rep movsb
694 mov esi, edi
695 sub esi, ebp /* from = out - dist */
696 jmp L_do_copy1
697
698L_contiguous_in_window:
699 add esi, eax
700 sub esi, ecx /* from += write - nbytes */
701
702 mov eax, [esp+60]
703 cmp eax, ecx
704 jbe L_do_copy1 /* if (nbytes >= len) */
705
706 sub eax, ecx /* len -= nbytes */
707 rep movsb
708 mov esi, edi
709 sub esi, ebp /* from = out - dist */
710
711L_do_copy1:
712 mov ecx, eax
713 rep movsb
714
715 mov esi, [esp+4] /* move in back to %esi, toss from */
716 mov ebp, [esp+44] /* ebp = lcode */
717 jmp L_while_test
718
719L_test_for_end_of_block:
720 test al, 32
721 jz L_invalid_literal_length_code
722 mov dword ptr [esp+68], 1
723 jmp L_break_loop_with_status
724
725L_invalid_literal_length_code:
726 mov dword ptr [esp+68], 2
727 jmp L_break_loop_with_status
728
729L_invalid_distance_code:
730 mov dword ptr [esp+68], 3
731 jmp L_break_loop_with_status
732
733L_invalid_distance_too_far:
734 mov esi, [esp+4]
735 mov dword ptr [esp+68], 4
736 jmp L_break_loop_with_status
737
738L_break_loop:
739 mov dword ptr [esp+68], 0
740
741L_break_loop_with_status:
742/* put in, out, bits, and hold back into ar and pop esp */
743 mov [esp+4], esi
744 mov [esp+12], edi
745 mov [esp+40], ebx
746 mov [esp+36], edx
747 mov esp, [esp]
748 pop ebp
749 popfd
750 }
751#endif
752
753 if (ar.status > 1) {
754 if (ar.status == 2)
755 strm->msg = "invalid literal/length code";
756 else if (ar.status == 3)
757 strm->msg = "invalid distance code";
758 else
759 strm->msg = "invalid distance too far back";
760 state->mode = BAD;
761 }
762 else if ( ar.status == 1 ) {
763 state->mode = TYPE;
764 }
765
766 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
767 ar.len = ar.bits >> 3;
768 ar.in -= ar.len;
769 ar.bits -= ar.len << 3;
770 ar.hold &= (1U << ar.bits) - 1;
771
772 /* update state and return */
773 strm->next_in = ar.in;
774 strm->next_out = ar.out;
775 strm->avail_in = (unsigned)(ar.in < ar.last ? 5 + (ar.last - ar.in) :
776 5 - (ar.in - ar.last));
777 strm->avail_out = (unsigned)(ar.out < ar.end ? 257 + (ar.end - ar.out) :
778 257 - (ar.out - ar.end));
779 state->hold = ar.hold;
780 state->bits = ar.bits;
781 return;
782}
783
diff --git a/contrib/inflate86/inffast.S b/contrib/inflate86/inffast.S
index d1e80ef..3602907 100644
--- a/contrib/inflate86/inffast.S
+++ b/contrib/inflate86/inffast.S
@@ -1,1095 +1,1377 @@
1/* 1/*
2 * inffast.S is a hand tuned assembler version of: 2 * inffast.S is a hand tuned assembler version of:
3 * 3 *
4 * inffast.c -- fast decoding 4 * inffast.c -- fast decoding
5 * Copyright (C) 1995-2003 Mark Adler 5 * Copyright (C) 1995-2003 Mark Adler
6 * For conditions of distribution and use, see copyright notice in zlib.h 6 * For conditions of distribution and use, see copyright notice in zlib.h
7 * 7 *
8 * Copyright (C) 2003 Chris Anderson <christop@charm.net> 8 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9 * Please use the copyright conditions above. 9 * Please use the copyright conditions above.
10 * 10 *
11 * This version (Jan-23-2003) of inflate_fast was coded and tested under 11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that 12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
13 * machine, I found that gzip style archives decompressed about 20% faster than 13 * machine, I found that gzip style archives decompressed about 20% faster than
14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will 14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
15 * depend on how large of a buffer is used for z_stream.next_in & next_out 15 * depend on how large of a buffer is used for z_stream.next_in & next_out
16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in 16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17 * stream processing I/O and crc32/addler32. In my case, this routine used 17 * stream processing I/O and crc32/addler32. In my case, this routine used
18 * 70% of the cpu time and crc32 used 20%. 18 * 70% of the cpu time and crc32 used 20%.
19 * 19 *
20 * I am confident that this version will work in the general case, but I have 20 * I am confident that this version will work in the general case, but I have
21 * not tested a wide variety of datasets or a wide variety of platforms. 21 * not tested a wide variety of datasets or a wide variety of platforms.
22 * 22 *
23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating. 23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24 * It should be a runtime flag instead of compile time flag... 24 * It should be a runtime flag instead of compile time flag...
25 */ 25 *
26 26 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
27.file "inffast.S" 27 * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code
28 28 * is compiled. Without either option, runtime detection is enabled. Runtime
29.globl inflate_fast 29 * detection should work on all modern cpus and the recomended algorithm (flip
30 30 * ID bit on eflags and then use the cpuid instruction) is used in many
31.text 31 * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12
32.align 4,0 32 * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o
33.L_invalid_literal_length_code_msg: 33 * inffast.obj generates a COFF object which can then be linked with MSVC++
34.string "invalid literal/length code" 34 * compiled code. Tested under FreeBSD 4.7 with gcc-2.95.
35 35 *
36.align 4,0 36 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
37.L_invalid_distance_code_msg: 37 * slower than compiler generated code). Adjusted cpuid check to use the MMX
38.string "invalid distance code" 38 * code only for Pentiums < P4 until I have more data on the P4. Speed
39 39 * improvment is only about 15% on the Athlon when compared with code generated
40.align 4,0 40 * with MSVC++. Not sure yet, but I think the P4 will also be slower using the
41.L_invalid_distance_too_far_msg: 41 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
42.string "invalid distance too far back" 42 * have less latency than MMX ops. Added code to buffer the last 11 bytes of
43 43 * the input stream since the MMX code grabs bits in chunks of 32, which
44#if defined( USE_MMX ) 44 * differs from the inffast.c algorithm. I don't think there would have been
45.align 4,0 45 * read overruns where a page boundary was crossed (a segfault), but there
46.L_mask: /* mask[N] = ( 1 << N ) - 1 */ 46 * could have been overruns when next_in ends on unaligned memory (unintialized
47.long 0 47 * memory read).
48.long 1 48 *
49.long 3 49 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C
50.long 7 50 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
51.long 15 51 * structure offsets which are hard coded in this file. This was last tested
52.long 31 52 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
53.long 63 53 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
54.long 127 54 * http://www.charm.net/~christop/zlib/
55.long 255 55 */
56.long 511 56
57.long 1023 57
58.long 2047 58/*
59.long 4095 59 * if you have underscore linking problems (_inflate_fast undefined), try
60.long 8191 60 * using -DGAS_COFF
61.long 16383 61 */
62.long 32767 62#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
63.long 65535 63
64.long 131071 64#if defined( __CYGWIN__ )
65.long 262143 65#define GAS_COFF /* windows object format */
66.long 524287 66#else
67.long 1048575 67#define GAS_ELF
68.long 2097151 68#endif
69.long 4194303 69
70.long 8388607 70#endif /* ! GAS_COFF && ! GAS_ELF */
71.long 16777215 71
72.long 33554431 72
73.long 67108863 73#if defined( GAS_COFF )
74.long 134217727 74
75.long 268435455 75/* coff externals have underscores */
76.long 536870911 76#define inflate_fast _inflate_fast
77.long 1073741823 77#define inflate_fast_use_mmx _inflate_fast_use_mmx
78.long 2147483647 78
79.long 4294967295 79#endif /* GAS_COFF */
80#endif 80
81 81
82.text 82.file "inffast.S"
83 83
84/* 84.globl inflate_fast
85 * struct z_stream offsets, in zlib.h 85
86 */ 86.text
87#define next_in_strm 0 /* strm->next_in */ 87.align 4,0
88#define avail_in_strm 4 /* strm->avail_in */ 88.L_invalid_literal_length_code_msg:
89#define next_out_strm 12 /* strm->next_out */ 89.string "invalid literal/length code"
90#define avail_out_strm 16 /* strm->avail_out */ 90
91#define msg_strm 24 /* strm->msg */ 91.align 4,0
92#define state_strm 28 /* strm->state */ 92.L_invalid_distance_code_msg:
93 93.string "invalid distance code"
94/* 94
95 * struct inflate_state offsets, in inflate.h 95.align 4,0
96 */ 96.L_invalid_distance_too_far_msg:
97#define mode_state 0 /* state->mode */ 97.string "invalid distance too far back"
98#define wsize_state 32 /* state->wsize */ 98
99#define write_state 36 /* state->write */ 99#if ! defined( NO_MMX )
100#define window_state 40 /* state->window */ 100.align 4,0
101#define hold_state 44 /* state->hold */ 101.L_mask: /* mask[N] = ( 1 << N ) - 1 */
102#define bits_state 48 /* state->bits */ 102.long 0
103#define lencode_state 64 /* state->lencode */ 103.long 1
104#define distcode_state 68 /* state->distcode */ 104.long 3
105#define lenbits_state 72 /* state->lenbits */ 105.long 7
106#define distbits_state 76 /* state->distbits */ 106.long 15
107 107.long 31
108/* 108.long 63
109 * inflate_fast's activation record 109.long 127
110 */ 110.long 255
111#define local_var_size 56 /* how much local space for vars */ 111.long 511
112#define strm_sp 80 /* first arg: z_stream * (local_var_size + 24) */ 112.long 1023
113#define start_sp 84 /* second arg: unsigned int (local_var_size + 28) */ 113.long 2047
114 114.long 4095
115/* 115.long 8191
116 * offsets for local vars on stack 116.long 16383
117 */ 117.long 32767
118#define out 52 /* unsigned char* */ 118.long 65535
119#define window 48 /* unsigned char* */ 119.long 131071
120#define wsize 44 /* unsigned int */ 120.long 262143
121#define write 40 /* unsigned int */ 121.long 524287
122#define in 36 /* unsigned char* */ 122.long 1048575
123#define beg 32 /* unsigned char* */ 123.long 2097151
124#define dist 28 /* unsigned int */ 124.long 4194303
125#define len 24 /* unsigned int */ 125.long 8388607
126#define last 20 /* unsigned char* */ 126.long 16777215
127#define end 16 /* unsigned char* */ 127.long 33554431
128#define dcode 12 /* code* */ 128.long 67108863
129#define lcode 8 /* code* */ 129.long 134217727
130#define dmask 4 /* unsigned int */ 130.long 268435455
131#define lmask 0 /* unsigned int */ 131.long 536870911
132 132.long 1073741823
133/* 133.long 2147483647
134 * typedef enum inflate_mode consts, in inflate.h 134.long 4294967295
135 */ 135#endif /* NO_MMX */
136#ifndef NO_GUNZIP 136
137#define GUNZIP 137.text
138#endif 138
139 139/*
140#ifdef GUNZIP 140 * struct z_stream offsets, in zlib.h
141#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */ 141 */
142#define INFLATE_MODE_BAD 26 142#define next_in_strm 0 /* strm->next_in */
143#else 143#define avail_in_strm 4 /* strm->avail_in */
144#define INFLATE_MODE_TYPE 3 144#define next_out_strm 12 /* strm->next_out */
145#define INFLATE_MODE_BAD 17 145#define avail_out_strm 16 /* strm->avail_out */
146#endif 146#define msg_strm 24 /* strm->msg */
147 147#define state_strm 28 /* strm->state */
148 148
149.align 16,0x90 149/*
150inflate_fast: 150 * struct inflate_state offsets, in inflate.h
151 pushl %edi 151 */
152 pushl %esi 152#define mode_state 0 /* state->mode */
153 pushl %ebp 153#define wsize_state 32 /* state->wsize */
154 pushl %ebx 154#define write_state 36 /* state->write */
155 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */ 155#define window_state 40 /* state->window */
156 subl $local_var_size, %esp 156#define hold_state 44 /* state->hold */
157 cld 157#define bits_state 48 /* state->bits */
158#if defined( USE_MMX ) 158#define lencode_state 64 /* state->lencode */
159 emms 159#define distcode_state 68 /* state->distcode */
160#endif 160#define lenbits_state 72 /* state->lenbits */
161 161#define distbits_state 76 /* state->distbits */
162#define strm_r %esi 162
163#define state_r %edi 163/*
164 164 * inflate_fast's activation record
165 movl strm_sp(%esp), strm_r 165 */
166 movl state_strm(strm_r), state_r 166#define local_var_size 64 /* how much local space for vars */
167 167#define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */
168 /* in = strm->next_in; 168#define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */
169 * out = strm->next_out; 169
170 * last = in + strm->avail_in - 5; 170/*
171 * beg = out - (start - strm->avail_out); 171 * offsets for local vars on stack
172 * end = out + (strm->avail_out - 257); 172 */
173 */ 173#define out 60 /* unsigned char* */
174 movl next_in_strm(strm_r), %eax 174#define window 56 /* unsigned char* */
175 movl next_out_strm(strm_r), %ebx 175#define wsize 52 /* unsigned int */
176 movl avail_in_strm(strm_r), %edx 176#define write 48 /* unsigned int */
177 movl avail_out_strm(strm_r), %ecx 177#define in 44 /* unsigned char* */
178 movl start_sp(%esp), %ebp 178#define beg 40 /* unsigned char* */
179 179#define buf 28 /* char[ 12 ] */
180 addl %eax, %edx /* avail_in += next_in */ 180#define len 24 /* unsigned int */
181 subl $5, %edx /* avail_in -= 5 */ 181#define last 20 /* unsigned char* */
182 182#define end 16 /* unsigned char* */
183 subl %ecx, %ebp /* start -= avail_out */ 183#define dcode 12 /* code* */
184 negl %ebp /* start = -start */ 184#define lcode 8 /* code* */
185 addl %ebx, %ebp /* start += next_out */ 185#define dmask 4 /* unsigned int */
186 186#define lmask 0 /* unsigned int */
187 subl $257, %ecx /* avail_out -= 257 */ 187
188 addl %ebx, %ecx /* avail_out += out */ 188/*
189 189 * typedef enum inflate_mode consts, in inflate.h
190 movl %eax, in(%esp) 190 */
191 movl %ebx, out(%esp) 191#ifndef NO_GUNZIP
192 movl %edx, last(%esp) 192#define GUNZIP
193 movl %ebp, beg(%esp) 193#endif
194 movl %ecx, end(%esp) 194
195 195#ifdef GUNZIP
196 /* wsize = state->wsize; 196#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
197 * write = state->write; 197#define INFLATE_MODE_BAD 26
198 * window = state->window; 198#else
199 * hold = state->hold; 199#define INFLATE_MODE_TYPE 3
200 * bits = state->bits; 200#define INFLATE_MODE_BAD 17
201 * lcode = state->lencode; 201#endif
202 * dcode = state->distcode; 202
203 * lmask = ( 1 << state->lenbits ) - 1; 203
204 * dmask = ( 1 << state->distbits ) - 1; 204#if ! defined( USE_MMX ) && ! defined( NO_MMX )
205 */ 205
206 206#define RUN_TIME_MMX
207 movl lencode_state(state_r), %eax 207
208 movl distcode_state(state_r), %ecx 208#define CHECK_MMX 1
209 209#define DO_USE_MMX 2
210 movl %eax, lcode(%esp) 210#define DONT_USE_MMX 3
211 movl %ecx, dcode(%esp) 211
212 212.globl inflate_fast_use_mmx
213 movl $1, %eax 213
214 movl lenbits_state(state_r), %ecx 214.data
215 shll %cl, %eax 215
216 decl %eax 216.align 4,0
217 movl %eax, lmask(%esp) 217inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
218 218.long CHECK_MMX
219 movl $1, %eax 219
220 movl distbits_state(state_r), %ecx 220#if defined( GAS_ELF )
221 shll %cl, %eax 221/* elf info */
222 decl %eax 222.type inflate_fast_use_mmx,@object
223 movl %eax, dmask(%esp) 223.size inflate_fast_use_mmx,4
224 224#endif
225 movl wsize_state(state_r), %eax 225
226 movl write_state(state_r), %ecx 226#endif /* RUN_TIME_MMX */
227 movl window_state(state_r), %edx 227
228 228#if defined( GAS_COFF )
229 movl %eax, wsize(%esp) 229/* coff info: scl 2 = extern, type 32 = function */
230 movl %ecx, write(%esp) 230.def inflate_fast; .scl 2; .type 32; .endef
231 movl %edx, window(%esp) 231#endif
232 232
233#if ! defined( USE_MMX ) 233.text
234 234
235#define hold_r %ebp 235.align 32,0x90
236#define bits_r %bl 236inflate_fast:
237#define bitslong_r %ebx 237 pushl %edi
238 238 pushl %esi
239 movl hold_state(state_r), hold_r 239 pushl %ebp
240 movl bits_state(state_r), bitslong_r 240 pushl %ebx
241 241 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
242#else /* USE_MMX */ 242 subl $local_var_size, %esp
243 243 cld
244#define hold_mm %mm0 244
245#define bits_r %ebp 245#define strm_r %esi
246#define bitslong_r %ebp 246#define state_r %edi
247 247
248 movl hold_state(state_r), %ebx 248 movl strm_sp(%esp), strm_r
249 movl bits_state(state_r), bitslong_r 249 movl state_strm(strm_r), state_r
250 250
251#endif 251 /* in = strm->next_in;
252 252 * out = strm->next_out;
253#undef strm_r 253 * last = in + strm->avail_in - 11;
254#undef state_r 254 * beg = out - (start - strm->avail_out);
255#define in_r %esi 255 * end = out + (strm->avail_out - 257);
256#define from_r %esi 256 */
257#define out_r %edi 257 movl avail_in_strm(strm_r), %edx
258 258 movl next_in_strm(strm_r), %eax
259 movl in(%esp), in_r 259
260 260 addl %eax, %edx /* avail_in += next_in */
261#if ! defined ( USE_MMX ) 261 subl $11, %edx /* avail_in -= 11 */
262 262
263 /* align in_r on word boundary */ 263 movl %eax, in(%esp)
264 testl $1, in_r 264 movl %edx, last(%esp)
265 jz .L_is_aligned 265
266 xorl %eax, %eax 266 movl start_sp(%esp), %ebp
267 movb (in_r), %al 267 movl avail_out_strm(strm_r), %ecx
268 incl in_r 268 movl next_out_strm(strm_r), %ebx
269 movb bits_r, %cl 269
270 addb $8, bits_r 270 subl %ecx, %ebp /* start -= avail_out */
271 shll %cl, %eax 271 negl %ebp /* start = -start */
272 orl %eax, hold_r 272 addl %ebx, %ebp /* start += next_out */
273 273
274#else 274 subl $257, %ecx /* avail_out -= 257 */
275 /* align in_r on long boundary */ 275 addl %ebx, %ecx /* avail_out += out */
276.L_align_long: 276
277 testl $3, in_r 277 movl %ebx, out(%esp)
278 jz .L_is_aligned 278 movl %ebp, beg(%esp)
279 xorl %eax, %eax 279 movl %ecx, end(%esp)
280 movb (in_r), %al 280
281 incl in_r 281 /* wsize = state->wsize;
282 movl bits_r, %ecx 282 * write = state->write;
283 addl $8, bits_r 283 * window = state->window;
284 shll %cl, %eax 284 * hold = state->hold;
285 orl %eax, %ebx 285 * bits = state->bits;
286 jmp .L_align_long 286 * lcode = state->lencode;
287 287 * dcode = state->distcode;
288#endif 288 * lmask = ( 1 << state->lenbits ) - 1;
289 289 * dmask = ( 1 << state->distbits ) - 1;
290.L_is_aligned: 290 */
291 movl out(%esp), out_r 291
292 292 movl lencode_state(state_r), %eax
293#if defined ( USE_MMX ) 293 movl distcode_state(state_r), %ecx
294 294
295#define used_mm %mm1 295 movl %eax, lcode(%esp)
296#define dmask2_mm %mm2 296 movl %ecx, dcode(%esp)
297#define lmask2_mm %mm3 297
298#define lmask_mm %mm4 298 movl $1, %eax
299#define dmask_mm %mm5 299 movl lenbits_state(state_r), %ecx
300#define tmp_mm %mm6 300 shll %cl, %eax
301 301 decl %eax
302 movl out(%esp), out_r 302 movl %eax, lmask(%esp)
303 movd lmask(%esp), lmask_mm 303
304 movq lmask_mm, lmask2_mm 304 movl $1, %eax
305 movd dmask(%esp), dmask_mm 305 movl distbits_state(state_r), %ecx
306 movq dmask_mm, dmask2_mm 306 shll %cl, %eax
307 movd %ebx, hold_mm 307 decl %eax
308 pxor used_mm, used_mm 308 movl %eax, dmask(%esp)
309 movl lcode(%esp), %ebx /* ebx = lcode */ 309
310#endif 310 movl wsize_state(state_r), %eax
311 311 movl write_state(state_r), %ecx
312 jmp .L_do_loop 312 movl window_state(state_r), %edx
313 313
314.align 16,0x90 314 movl %eax, wsize(%esp)
315 315 movl %ecx, write(%esp)
316#if ! defined ( USE_MMX ) 316 movl %edx, window(%esp)
317 317
318.L_do_loop: 318 movl hold_state(state_r), %ebp
319 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out 319 movl bits_state(state_r), %ebx
320 * 320
321 * do { 321#undef strm_r
322 * if (bits < 15) { 322#undef state_r
323 * hold |= *((unsigned short *)in)++ << bits; 323
324 * bits += 16 324#define in_r %esi
325 * } 325#define from_r %esi
326 * this = lcode[hold & lmask] 326#define out_r %edi
327 */ 327
328 cmpb $15, bits_r 328 movl in(%esp), in_r
329 ja .L_get_length_code /* if (15 < bits) */ 329 movl last(%esp), %ecx
330 330 cmpl in_r, %ecx
331 xorl %eax, %eax 331 ja .L_align_long /* if in < last */
332 lodsw /* al = *(ushort *)in++ */ 332
333 movb bits_r, %cl /* cl = bits, needs it for shifting */ 333 addl $11, %ecx /* ecx = &in[ avail_in ] */
334 addb $16, bits_r /* bits += 16 */ 334 subl in_r, %ecx /* ecx = avail_in */
335 shll %cl, %eax 335 movl $12, %eax
336 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ 336 subl %ecx, %eax /* eax = 12 - avail_in */
337 337 leal buf(%esp), %edi
338.L_get_length_code: 338 rep movsb /* memcpy( buf, in, avail_in ) */
339 movl lmask(%esp), %edx /* edx = lmask */ 339 movl %eax, %ecx
340 movl lcode(%esp), %ecx /* ecx = lcode */ 340 xorl %eax, %eax
341 andl hold_r, %edx /* edx &= hold */ 341 rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
342 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */ 342 leal buf(%esp), in_r /* in = buf */
343 343 movl in_r, last(%esp) /* last = in, do just one iteration */
344#else /* USE_MMX */ 344 jmp .L_is_aligned
345 345
346.L_do_loop: 346 /* align in_r on long boundary */
347 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 347.L_align_long:
348 348 testl $3, in_r
349 cmpl $32, bits_r 349 jz .L_is_aligned
350 ja .L_get_length_code /* if (32 < bits) */ 350 xorl %eax, %eax
351 351 movb (in_r), %al
352 movd bits_r, tmp_mm 352 incl in_r
353 movd (in_r), %mm7 353 movl %ebx, %ecx
354 addl $4, in_r 354 addl $8, %ebx
355 psllq tmp_mm, %mm7 355 shll %cl, %eax
356 addl $32, bits_r 356 orl %eax, %ebp
357 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */ 357 jmp .L_align_long
358 358
359.L_get_length_code: 359.L_is_aligned:
360 pand hold_mm, lmask_mm 360 movl out(%esp), out_r
361 movd lmask_mm, %eax 361
362 movq lmask2_mm, lmask_mm 362#if defined( NO_MMX )
363 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */ 363 jmp .L_do_loop
364 364#endif
365#endif 365
366 366#if defined( USE_MMX )
367#if ! defined( USE_MMX ) 367 jmp .L_init_mmx
368 368#endif
369.L_dolen: 369
370 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out 370/*** Runtime MMX check ***/
371 * 371
372 * dolen: 372#if defined( RUN_TIME_MMX )
373 * bits -= this.bits; 373.L_check_mmx:
374 * hold >>= this.bits 374 cmpl $DO_USE_MMX, inflate_fast_use_mmx
375 */ 375 je .L_init_mmx
376 movb %ah, %cl /* cl = this.bits */ 376 ja .L_do_loop /* > 2 */
377 subb %ah, bits_r /* bits -= this.bits */ 377
378 shrl %cl, hold_r /* hold >>= this.bits */ 378 pushl %eax
379 379 pushl %ebx
380 /* check if op is a literal 380 pushl %ecx
381 * if (op == 0) { 381 pushl %edx
382 * PUP(out) = this.val; 382 pushf
383 * } 383 movl (%esp), %eax /* copy eflags to eax */
384 */ 384 xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
385 testb %al, %al 385 * to see if cpu supports cpuid...
386 jnz .L_test_for_length_base /* if (op != 0) 45.7% */ 386 * ID bit method not supported by NexGen but
387 387 * bios may load a cpuid instruction and
388 shrl $16, %eax /* output this.val char */ 388 * cpuid may be disabled on Cyrix 5-6x86 */
389 stosb 389 popf
390 390 pushf
391#else /* USE_MMX */ 391 popl %edx /* copy new eflags to edx */
392 392 xorl %eax, %edx /* test if ID bit is flipped */
393#define len_r %edx 393 jz .L_dont_use_mmx /* not flipped if zero */
394 394 xorl %eax, %eax
395.L_dolen: 395 cpuid
396 movzbl %ah, %ecx /* ecx = this.bits */ 396 cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
397 movl %eax, len_r /* len = this */ 397 jne .L_dont_use_mmx
398 shrl $16, len_r /* len = this.val */ 398 cmpl $0x6c65746e, %ecx
399 movd %ecx, used_mm 399 jne .L_dont_use_mmx
400 subl %ecx, bits_r /* bits -= this.bits */ 400 cmpl $0x49656e69, %edx
401 401 jne .L_dont_use_mmx
402 testb %al, %al 402 movl $1, %eax
403 jnz .L_test_for_length_base /* if (op != 0) 45.7% */ 403 cpuid /* get cpu features */
404 404 shrl $8, %eax
405 movb %dl, (out_r) 405 andl $15, %eax
406 incl out_r 406 cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */
407 407 jne .L_dont_use_mmx
408#endif 408 testl $0x800000, %edx /* test if MMX feature is set (bit 23) */
409 409 jnz .L_use_mmx
410.L_while_test: 410 jmp .L_dont_use_mmx
411 /* while (in < last && out < end) 411.L_use_mmx:
412 */ 412 movl $DO_USE_MMX, inflate_fast_use_mmx
413 cmpl out_r, end(%esp) 413 jmp .L_check_mmx_pop
414 jbe .L_break_loop /* if (out >= end) */ 414.L_dont_use_mmx:
415 415 movl $DONT_USE_MMX, inflate_fast_use_mmx
416 cmpl in_r, last(%esp) 416.L_check_mmx_pop:
417 ja .L_do_loop /* if (in < last) */ 417 popl %edx
418 jmp .L_break_loop 418 popl %ecx
419 419 popl %ebx
420#if ! defined( USE_MMX ) 420 popl %eax
421 421 jmp .L_check_mmx
422.L_test_for_length_base: 422#endif
423 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len 423
424 * 424
425 * else if (op & 16) { 425/*** Non-MMX code ***/
426 * len = this.val 426
427 * op &= 15 427#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
428 * if (op) { 428
429 * if (op > bits) { 429#define hold_r %ebp
430 * hold |= *((unsigned short *)in)++ << bits; 430#define bits_r %bl
431 * bits += 16 431#define bitslong_r %ebx
432 * } 432
433 * len += hold & mask[op]; 433.align 32,0x90
434 * bits -= op; 434.L_while_test:
435 * hold >>= op; 435 /* while (in < last && out < end)
436 * } 436 */
437 */ 437 cmpl out_r, end(%esp)
438#define len_r %edx 438 jbe .L_break_loop /* if (out >= end) */
439 movl %eax, len_r /* len = this */ 439
440 shrl $16, len_r /* len = this.val */ 440 cmpl in_r, last(%esp)
441 movb %al, %cl 441 jbe .L_break_loop
442 442
443 testb $16, %al 443.L_do_loop:
444 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ 444 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
445 andb $15, %cl /* op &= 15 */ 445 *
446 jz .L_save_len /* if (!op) */ 446 * do {
447 cmpb %cl, bits_r 447 * if (bits < 15) {
448 jae .L_add_bits_to_len /* if (op <= bits) */ 448 * hold |= *((unsigned short *)in)++ << bits;
449 449 * bits += 16
450 movb %cl, %ch /* stash op in ch, freeing cl */ 450 * }
451 xorl %eax, %eax 451 * this = lcode[hold & lmask]
452 lodsw /* al = *(ushort *)in++ */ 452 */
453 movb bits_r, %cl /* cl = bits, needs it for shifting */ 453 cmpb $15, bits_r
454 addb $16, bits_r /* bits += 16 */ 454 ja .L_get_length_code /* if (15 < bits) */
455 shll %cl, %eax 455
456 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ 456 xorl %eax, %eax
457 movb %ch, %cl /* move op back to ecx */ 457 lodsw /* al = *(ushort *)in++ */
458 458 movb bits_r, %cl /* cl = bits, needs it for shifting */
459.L_add_bits_to_len: 459 addb $16, bits_r /* bits += 16 */
460 movl $1, %eax 460 shll %cl, %eax
461 shll %cl, %eax 461 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
462 decl %eax 462
463 subb %cl, bits_r 463.L_get_length_code:
464 andl hold_r, %eax /* eax &= hold */ 464 movl lmask(%esp), %edx /* edx = lmask */
465 shrl %cl, hold_r 465 movl lcode(%esp), %ecx /* ecx = lcode */
466 addl %eax, len_r /* len += hold & mask[op] */ 466 andl hold_r, %edx /* edx &= hold */
467 467 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
468.L_save_len: 468
469 movl len_r, len(%esp) /* save len */ 469.L_dolen:
470#undef len_r 470 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
471 471 *
472.L_decode_distance: 472 * dolen:
473 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist 473 * bits -= this.bits;
474 * 474 * hold >>= this.bits
475 * if (bits < 15) { 475 */
476 * hold |= *((unsigned short *)in)++ << bits; 476 movb %ah, %cl /* cl = this.bits */
477 * bits += 16 477 subb %ah, bits_r /* bits -= this.bits */
478 * } 478 shrl %cl, hold_r /* hold >>= this.bits */
479 * this = dcode[hold & dmask]; 479
480 * dodist: 480 /* check if op is a literal
481 * bits -= this.bits; 481 * if (op == 0) {
482 * hold >>= this.bits; 482 * PUP(out) = this.val;
483 * op = this.op; 483 * }
484 */ 484 */
485 485 testb %al, %al
486 cmpb $15, bits_r 486 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
487 ja .L_get_distance_code /* if (15 < bits) */ 487
488 488 shrl $16, %eax /* output this.val char */
489 xorl %eax, %eax 489 stosb
490 lodsw /* al = *(ushort *)in++ */ 490 jmp .L_while_test
491 movb bits_r, %cl /* cl = bits, needs it for shifting */ 491
492 addb $16, bits_r /* bits += 16 */ 492.L_test_for_length_base:
493 shll %cl, %eax 493 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
494 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ 494 *
495 495 * else if (op & 16) {
496.L_get_distance_code: 496 * len = this.val
497 movl dmask(%esp), %edx /* edx = dmask */ 497 * op &= 15
498 movl dcode(%esp), %ecx /* ecx = dcode */ 498 * if (op) {
499 andl hold_r, %edx /* edx &= hold */ 499 * if (op > bits) {
500 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */ 500 * hold |= *((unsigned short *)in)++ << bits;
501 501 * bits += 16
502#else /* USE_MMX */ 502 * }
503 503 * len += hold & mask[op];
504.L_test_for_length_base: 504 * bits -= op;
505 testb $16, %al 505 * hold >>= op;
506 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ 506 * }
507 andl $15, %eax /* op &= 15 */ 507 */
508 jz .L_decode_distance /* if (!op) */ 508#define len_r %edx
509 509 movl %eax, len_r /* len = this */
510 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 510 shrl $16, len_r /* len = this.val */
511 movd %eax, used_mm 511 movb %al, %cl
512 movd hold_mm, %ecx 512
513 subl %eax, bits_r 513 testb $16, %al
514 andl .L_mask(,%eax,4), %ecx 514 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
515 addl %ecx, len_r /* len += hold & mask[op] */ 515 andb $15, %cl /* op &= 15 */
516 516 jz .L_save_len /* if (!op) */
517.L_decode_distance: 517 cmpb %cl, bits_r
518 518 jae .L_add_bits_to_len /* if (op <= bits) */
519 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 519
520 520 movb %cl, %ch /* stash op in ch, freeing cl */
521 cmpl $32, bits_r 521 xorl %eax, %eax
522 ja .L_get_dist_code /* if (32 < bits) */ 522 lodsw /* al = *(ushort *)in++ */
523 523 movb bits_r, %cl /* cl = bits, needs it for shifting */
524 movd bits_r, tmp_mm 524 addb $16, bits_r /* bits += 16 */
525 movd (in_r), %mm7 525 shll %cl, %eax
526 addl $4, in_r 526 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
527 psllq tmp_mm, %mm7 527 movb %ch, %cl /* move op back to ecx */
528 addl $32, bits_r 528
529 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */ 529.L_add_bits_to_len:
530 530 movl $1, %eax
531.L_get_dist_code: 531 shll %cl, %eax
532 movl dcode(%esp), %ebx /* ebx = dcode */ 532 decl %eax
533 pand hold_mm, dmask_mm 533 subb %cl, bits_r
534 movd dmask_mm, %eax 534 andl hold_r, %eax /* eax &= hold */
535 movq dmask2_mm, dmask_mm 535 shrl %cl, hold_r
536 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */ 536 addl %eax, len_r /* len += hold & mask[op] */
537 537
538#endif 538.L_save_len:
539 539 movl len_r, len(%esp) /* save len */
540#if ! defined( USE_MMX ) 540#undef len_r
541 541
542#define dist_r %edx 542.L_decode_distance:
543.L_dodist: 543 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
544 movl %eax, dist_r /* dist = this */ 544 *
545 shrl $16, dist_r /* dist = this.val */ 545 * if (bits < 15) {
546 movb %ah, %cl 546 * hold |= *((unsigned short *)in)++ << bits;
547 subb %ah, bits_r /* bits -= this.bits */ 547 * bits += 16
548 shrl %cl, hold_r /* hold >>= this.bits */ 548 * }
549 549 * this = dcode[hold & dmask];
550 /* if (op & 16) { 550 * dodist:
551 * dist = this.val 551 * bits -= this.bits;
552 * op &= 15 552 * hold >>= this.bits;
553 * if (op > bits) { 553 * op = this.op;
554 * hold |= *((unsigned short *)in)++ << bits; 554 */
555 * bits += 16 555
556 * } 556 cmpb $15, bits_r
557 * dist += hold & mask[op]; 557 ja .L_get_distance_code /* if (15 < bits) */
558 * bits -= op; 558
559 * hold >>= op; 559 xorl %eax, %eax
560 */ 560 lodsw /* al = *(ushort *)in++ */
561 movb %al, %cl /* cl = this.op */ 561 movb bits_r, %cl /* cl = bits, needs it for shifting */
562 562 addb $16, bits_r /* bits += 16 */
563 testb $16, %al /* if ((op & 16) == 0) */ 563 shll %cl, %eax
564 jz .L_test_for_second_level_dist 564 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
565 andb $15, %cl /* op &= 15 */ 565
566 jz .L_check_dist_one 566.L_get_distance_code:
567 cmpb %cl, bits_r 567 movl dmask(%esp), %edx /* edx = dmask */
568 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */ 568 movl dcode(%esp), %ecx /* ecx = dcode */
569 569 andl hold_r, %edx /* edx &= hold */
570 movb %cl, %ch /* stash op in ch, freeing cl */ 570 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
571 xorl %eax, %eax 571
572 lodsw /* al = *(ushort *)in++ */ 572#define dist_r %edx
573 movb bits_r, %cl /* cl = bits, needs it for shifting */ 573.L_dodist:
574 addb $16, bits_r /* bits += 16 */ 574 movl %eax, dist_r /* dist = this */
575 shll %cl, %eax 575 shrl $16, dist_r /* dist = this.val */
576 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ 576 movb %ah, %cl
577 movb %ch, %cl /* move op back to ecx */ 577 subb %ah, bits_r /* bits -= this.bits */
578 578 shrl %cl, hold_r /* hold >>= this.bits */
579.L_add_bits_to_dist: 579
580 movl $1, %eax 580 /* if (op & 16) {
581 shll %cl, %eax 581 * dist = this.val
582 decl %eax /* (1 << op) - 1 */ 582 * op &= 15
583 subb %cl, bits_r 583 * if (op > bits) {
584 andl hold_r, %eax /* eax &= hold */ 584 * hold |= *((unsigned short *)in)++ << bits;
585 shrl %cl, hold_r 585 * bits += 16
586 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */ 586 * }
587 jmp .L_check_window 587 * dist += hold & mask[op];
588 588 * bits -= op;
589#else /* USE_MMX */ 589 * hold >>= op;
590 590 */
591#define dist_r %ebx 591 movb %al, %cl /* cl = this.op */
592.L_dodist: 592
593 movzbl %ah, %ecx /* ecx = this.bits */ 593 testb $16, %al /* if ((op & 16) == 0) */
594 movl %eax, dist_r 594 jz .L_test_for_second_level_dist
595 shrl $16, dist_r /* dist = this.val */ 595 andb $15, %cl /* op &= 15 */
596 subl %ecx, bits_r /* bits -= this.bits */ 596 jz .L_check_dist_one
597 movd %ecx, used_mm 597 cmpb %cl, bits_r
598 598 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
599 testb $16, %al /* if ((op & 16) == 0) */ 599
600 jz .L_test_for_second_level_dist 600 movb %cl, %ch /* stash op in ch, freeing cl */
601 andl $15, %eax /* op &= 15 */ 601 xorl %eax, %eax
602 jz .L_check_dist_one 602 lodsw /* al = *(ushort *)in++ */
603 603 movb bits_r, %cl /* cl = bits, needs it for shifting */
604.L_add_bits_to_dist: 604 addb $16, bits_r /* bits += 16 */
605 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 605 shll %cl, %eax
606 movd %eax, used_mm /* save bit length of current op */ 606 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
607 movd hold_mm, %ecx /* get the next bits on input stream */ 607 movb %ch, %cl /* move op back to ecx */
608 subl %eax, bits_r /* bits -= op bits */ 608
609 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */ 609.L_add_bits_to_dist:
610 addl %ecx, dist_r /* dist += hold & mask[op] */ 610 movl $1, %eax
611 jmp .L_check_window 611 shll %cl, %eax
612 612 decl %eax /* (1 << op) - 1 */
613#endif 613 subb %cl, bits_r
614 614 andl hold_r, %eax /* eax &= hold */
615.align 16,0x90 615 shrl %cl, hold_r
616 616 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
617.L_check_dist_one: 617 jmp .L_check_window
618 cmpl $1, dist_r 618
619 jne .L_check_window 619.L_check_window:
620 cmpl out_r, beg(%esp) 620 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
621 je .L_check_window 621 * %ecx = nbytes
622 622 *
623 decl out_r 623 * nbytes = out - beg;
624#if ! defined( USE_MMX ) 624 * if (dist <= nbytes) {
625 movl len(%esp), %ecx 625 * from = out - dist;
626#else 626 * do {
627 movl len_r, %ecx 627 * PUP(out) = PUP(from);
628#endif 628 * } while (--len > 0) {
629 movb (out_r), %al 629 * }
630 subl $3, %ecx 630 */
631 631
632 movb %al, 1(out_r) 632 movl in_r, in(%esp) /* save in so from can use it's reg */
633 movb %al, 2(out_r) 633 movl out_r, %eax
634 movb %al, 3(out_r) 634 subl beg(%esp), %eax /* nbytes = out - beg */
635 addl $4, out_r 635
636 rep stosb 636 cmpl dist_r, %eax
637 637 jb .L_clip_window /* if (dist > nbytes) 4.2% */
638#if defined( USE_MMX ) 638
639 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ 639 movl len(%esp), %ecx
640#endif 640 movl out_r, from_r
641 jmp .L_while_test 641 subl dist_r, from_r /* from = out - dist */
642 642
643.align 16,0x90 643 subl $3, %ecx
644 644 movb (from_r), %al
645.L_check_window: 645 movb %al, (out_r)
646 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist 646 movb 1(from_r), %al
647 * %ecx = nbytes 647 movb 2(from_r), %dl
648 * 648 addl $3, from_r
649 * nbytes = out - beg; 649 movb %al, 1(out_r)
650 * if (dist <= nbytes) { 650 movb %dl, 2(out_r)
651 * from = out - dist; 651 addl $3, out_r
652 * do { 652 rep movsb
653 * PUP(out) = PUP(from); 653
654 * } while (--len > 0) { 654 movl in(%esp), in_r /* move in back to %esi, toss from */
655 * } 655 jmp .L_while_test
656 */ 656
657 657.align 16,0x90
658 movl in_r, in(%esp) /* save in so from can use it's reg */ 658.L_check_dist_one:
659 movl out_r, %eax 659 cmpl $1, dist_r
660 subl beg(%esp), %eax /* nbytes = out - beg */ 660 jne .L_check_window
661 661 cmpl out_r, beg(%esp)
662 cmpl dist_r, %eax 662 je .L_check_window
663 jb .L_clip_window /* if (dist > nbytes) 4.2% */ 663
664 664 decl out_r
665#if ! defined( USE_MMX ) 665 movl len(%esp), %ecx
666 movl len(%esp), %ecx 666 movb (out_r), %al
667#else 667 subl $3, %ecx
668 movl len_r, %ecx 668
669#endif 669 movb %al, 1(out_r)
670 movl out_r, from_r 670 movb %al, 2(out_r)
671 subl dist_r, from_r /* from = out - dist */ 671 movb %al, 3(out_r)
672 672 addl $4, out_r
673 subl $3, %ecx 673 rep stosb
674 movb (from_r), %al 674
675 movb %al, (out_r) 675 jmp .L_while_test
676 movb 1(from_r), %al 676
677 movb 2(from_r), %dl 677.align 16,0x90
678 addl $3, from_r 678.L_test_for_second_level_length:
679 movb %al, 1(out_r) 679 /* else if ((op & 64) == 0) {
680 movb %dl, 2(out_r) 680 * this = lcode[this.val + (hold & mask[op])];
681 addl $3, out_r 681 * }
682 rep movsb 682 */
683 683 testb $64, %al
684 movl in(%esp), in_r /* move in back to %esi, toss from */ 684 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
685#if defined( USE_MMX ) 685
686 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ 686 movl $1, %eax
687#endif 687 shll %cl, %eax
688 jmp .L_while_test 688 decl %eax
689 689 andl hold_r, %eax /* eax &= hold */
690.align 16,0x90 690 addl %edx, %eax /* eax += this.val */
691 691 movl lcode(%esp), %edx /* edx = lcode */
692#if ! defined( USE_MMX ) 692 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
693 693 jmp .L_dolen
694.L_test_for_second_level_length: 694
695 /* else if ((op & 64) == 0) { 695.align 16,0x90
696 * this = lcode[this.val + (hold & mask[op])]; 696.L_test_for_second_level_dist:
697 * } 697 /* else if ((op & 64) == 0) {
698 */ 698 * this = dcode[this.val + (hold & mask[op])];
699 testb $64, %al 699 * }
700 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */ 700 */
701 701 testb $64, %al
702 movl $1, %eax 702 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
703 shll %cl, %eax 703
704 decl %eax 704 movl $1, %eax
705 andl hold_r, %eax /* eax &= hold */ 705 shll %cl, %eax
706 addl %edx, %eax /* eax += this.val */ 706 decl %eax
707 movl lcode(%esp), %edx /* edx = lcode */ 707 andl hold_r, %eax /* eax &= hold */
708 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */ 708 addl %edx, %eax /* eax += this.val */
709 jmp .L_dolen 709 movl dcode(%esp), %edx /* edx = dcode */
710 710 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
711#else /* USE_MMX */ 711 jmp .L_dodist
712 712
713.L_test_for_second_level_length: 713.align 16,0x90
714 testb $64, %al 714.L_clip_window:
715 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */ 715 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
716 716 * %ecx = nbytes
717 andl $15, %eax 717 *
718 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 718 * else {
719 movd hold_mm, %ecx 719 * if (dist > wsize) {
720 andl .L_mask(,%eax,4), %ecx 720 * invalid distance
721 addl len_r, %ecx 721 * }
722 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */ 722 * from = window;
723 jmp .L_dolen 723 * nbytes = dist - nbytes;
724 724 * if (write == 0) {
725#endif 725 * from += wsize - nbytes;
726 726 */
727.align 16,0x90 727#define nbytes_r %ecx
728 728 movl %eax, nbytes_r
729#if ! defined( USE_MMX ) 729 movl wsize(%esp), %eax /* prepare for dist compare */
730 730 negl nbytes_r /* nbytes = -nbytes */
731.L_test_for_second_level_dist: 731 movl window(%esp), from_r /* from = window */
732 /* else if ((op & 64) == 0) { 732
733 * this = dcode[this.val + (hold & mask[op])]; 733 cmpl dist_r, %eax
734 * } 734 jb .L_invalid_distance_too_far /* if (dist > wsize) */
735 */ 735
736 testb $64, %al 736 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
737 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */ 737 cmpl $0, write(%esp)
738 738 jne .L_wrap_around_window /* if (write != 0) */
739 movl $1, %eax 739
740 shll %cl, %eax 740 subl nbytes_r, %eax
741 decl %eax 741 addl %eax, from_r /* from += wsize - nbytes */
742 andl hold_r, %eax /* eax &= hold */ 742
743 addl %edx, %eax /* eax += this.val */ 743 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
744 movl dcode(%esp), %edx /* edx = dcode */ 744 * %ecx = nbytes, %eax = len
745 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */ 745 *
746 jmp .L_dodist 746 * if (nbytes < len) {
747 747 * len -= nbytes;
748#else /* USE_MMX */ 748 * do {
749 749 * PUP(out) = PUP(from);
750.L_test_for_second_level_dist: 750 * } while (--nbytes);
751 testb $64, %al 751 * from = out - dist;
752 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */ 752 * }
753 753 * }
754 andl $15, %eax 754 */
755 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 755#define len_r %eax
756 movd hold_mm, %ecx 756 movl len(%esp), len_r
757 andl .L_mask(,%eax,4), %ecx 757 cmpl nbytes_r, len_r
758 movl dcode(%esp), %eax /* ecx = dcode */ 758 jbe .L_do_copy1 /* if (nbytes >= len) */
759 addl dist_r, %ecx 759
760 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */ 760 subl nbytes_r, len_r /* len -= nbytes */
761 jmp .L_dodist 761 rep movsb
762 762 movl out_r, from_r
763#endif 763 subl dist_r, from_r /* from = out - dist */
764 764 jmp .L_do_copy1
765.align 16,0x90 765
766.L_clip_window: 766 cmpl nbytes_r, len_r
767 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist 767 jbe .L_do_copy1 /* if (nbytes >= len) */
768 * %ecx = nbytes 768
769 * 769 subl nbytes_r, len_r /* len -= nbytes */
770 * else { 770 rep movsb
771 * if (dist > wsize) { 771 movl out_r, from_r
772 * invalid distance 772 subl dist_r, from_r /* from = out - dist */
773 * } 773 jmp .L_do_copy1
774 * from = window; 774
775 * nbytes = dist - nbytes; 775.L_wrap_around_window:
776 * if (write == 0) { 776 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
777 * from += wsize - nbytes; 777 * %ecx = nbytes, %eax = write, %eax = len
778 */ 778 *
779#define nbytes_r %ecx 779 * else if (write < nbytes) {
780 780 * from += wsize + write - nbytes;
781 movl %eax, nbytes_r 781 * nbytes -= write;
782 movl wsize(%esp), %eax /* prepare for dist compare */ 782 * if (nbytes < len) {
783 negl nbytes_r /* nbytes = -nbytes */ 783 * len -= nbytes;
784 movl window(%esp), from_r /* from = window */ 784 * do {
785 785 * PUP(out) = PUP(from);
786 cmpl dist_r, %eax 786 * } while (--nbytes);
787 jb .L_invalid_distance_too_far /* if (dist > wsize) */ 787 * from = window;
788 788 * nbytes = write;
789 addl dist_r, nbytes_r /* nbytes = dist - nbytes */ 789 * if (nbytes < len) {
790 cmpl $0, write(%esp) 790 * len -= nbytes;
791 jne .L_wrap_around_window /* if (write != 0) */ 791 * do {
792 792 * PUP(out) = PUP(from);
793 subl nbytes_r, %eax 793 * } while(--nbytes);
794 addl %eax, from_r /* from += wsize - nbytes */ 794 * from = out - dist;
795 795 * }
796 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist 796 * }
797 * %ecx = nbytes, %eax = len 797 * }
798 * 798 */
799 * if (nbytes < len) { 799#define write_r %eax
800 * len -= nbytes; 800 movl write(%esp), write_r
801 * do { 801 cmpl write_r, nbytes_r
802 * PUP(out) = PUP(from); 802 jbe .L_contiguous_in_window /* if (write >= nbytes) */
803 * } while (--nbytes); 803
804 * from = out - dist; 804 addl wsize(%esp), from_r
805 * } 805 addl write_r, from_r
806 * } 806 subl nbytes_r, from_r /* from += wsize + write - nbytes */
807 */ 807 subl write_r, nbytes_r /* nbytes -= write */
808 808#undef write_r
809#if ! defined( USE_MMX ) 809
810#define len_r %eax 810 movl len(%esp), len_r
811 movl len(%esp), len_r 811 cmpl nbytes_r, len_r
812#endif 812 jbe .L_do_copy1 /* if (nbytes >= len) */
813 cmpl nbytes_r, len_r 813
814 jbe .L_do_copy1 /* if (nbytes >= len) */ 814 subl nbytes_r, len_r /* len -= nbytes */
815 815 rep movsb
816 subl nbytes_r, len_r /* len -= nbytes */ 816 movl window(%esp), from_r /* from = window */
817 rep movsb 817 movl write(%esp), nbytes_r /* nbytes = write */
818 movl out_r, from_r 818 cmpl nbytes_r, len_r
819 subl dist_r, from_r /* from = out - dist */ 819 jbe .L_do_copy1 /* if (nbytes >= len) */
820 jmp .L_do_copy1 820
821 821 subl nbytes_r, len_r /* len -= nbytes */
822 cmpl nbytes_r, len_r 822 rep movsb
823 jbe .L_do_copy1 /* if (nbytes >= len) */ 823 movl out_r, from_r
824 824 subl dist_r, from_r /* from = out - dist */
825 subl nbytes_r, len_r /* len -= nbytes */ 825 jmp .L_do_copy1
826 rep movsb 826
827 movl out_r, from_r 827.L_contiguous_in_window:
828 subl dist_r, from_r /* from = out - dist */ 828 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
829 jmp .L_do_copy1 829 * %ecx = nbytes, %eax = write, %eax = len
830 830 *
831.L_wrap_around_window: 831 * else {
832 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist 832 * from += write - nbytes;
833 * %ecx = nbytes, %eax = write, %eax = len 833 * if (nbytes < len) {
834 * 834 * len -= nbytes;
835 * else if (write < nbytes) { 835 * do {
836 * from += wsize + write - nbytes; 836 * PUP(out) = PUP(from);
837 * nbytes -= write; 837 * } while (--nbytes);
838 * if (nbytes < len) { 838 * from = out - dist;
839 * len -= nbytes; 839 * }
840 * do { 840 * }
841 * PUP(out) = PUP(from); 841 */
842 * } while (--nbytes); 842#define write_r %eax
843 * from = window; 843 addl write_r, from_r
844 * nbytes = write; 844 subl nbytes_r, from_r /* from += write - nbytes */
845 * if (nbytes < len) { 845#undef write_r
846 * len -= nbytes; 846
847 * do { 847 movl len(%esp), len_r
848 * PUP(out) = PUP(from); 848 cmpl nbytes_r, len_r
849 * } while(--nbytes); 849 jbe .L_do_copy1 /* if (nbytes >= len) */
850 * from = out - dist; 850
851 * } 851 subl nbytes_r, len_r /* len -= nbytes */
852 * } 852 rep movsb
853 * } 853 movl out_r, from_r
854 */ 854 subl dist_r, from_r /* from = out - dist */
855#define write_r %eax 855
856 856.L_do_copy1:
857 movl write(%esp), write_r 857 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
858 cmpl write_r, nbytes_r 858 * %eax = len
859 jbe .L_contiguous_in_window /* if (write >= nbytes) */ 859 *
860 860 * while (len > 0) {
861 addl wsize(%esp), from_r 861 * PUP(out) = PUP(from);
862 addl write_r, from_r 862 * len--;
863 subl nbytes_r, from_r /* from += wsize + write - nbytes */ 863 * }
864 subl write_r, nbytes_r /* nbytes -= write */ 864 * }
865#undef write_r 865 * } while (in < last && out < end);
866 866 */
867#if ! defined( USE_MMX ) 867#undef nbytes_r
868 movl len(%esp), len_r 868#define in_r %esi
869#endif 869 movl len_r, %ecx
870 cmpl nbytes_r, len_r 870 rep movsb
871 jbe .L_do_copy1 /* if (nbytes >= len) */ 871
872 872 movl in(%esp), in_r /* move in back to %esi, toss from */
873 subl nbytes_r, len_r /* len -= nbytes */ 873 jmp .L_while_test
874 rep movsb 874
875 movl window(%esp), from_r /* from = window */ 875#undef len_r
876 movl write(%esp), nbytes_r /* nbytes = write */ 876#undef dist_r
877 cmpl nbytes_r, len_r 877
878 jbe .L_do_copy1 /* if (nbytes >= len) */ 878#endif /* NO_MMX || RUN_TIME_MMX */
879 879
880 subl nbytes_r, len_r /* len -= nbytes */ 880
881 rep movsb 881/*** MMX code ***/
882 movl out_r, from_r 882
883 subl dist_r, from_r /* from = out - dist */ 883#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
884 jmp .L_do_copy1 884
885 885.align 32,0x90
886.L_contiguous_in_window: 886.L_init_mmx:
887 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist 887 emms
888 * %ecx = nbytes, %eax = write, %eax = len 888
889 * 889#undef bits_r
890 * else { 890#undef bitslong_r
891 * from += write - nbytes; 891#define bitslong_r %ebp
892 * if (nbytes < len) { 892#define hold_mm %mm0
893 * len -= nbytes; 893 movd %ebp, hold_mm
894 * do { 894 movl %ebx, bitslong_r
895 * PUP(out) = PUP(from); 895
896 * } while (--nbytes); 896#define used_mm %mm1
897 * from = out - dist; 897#define dmask2_mm %mm2
898 * } 898#define lmask2_mm %mm3
899 * } 899#define lmask_mm %mm4
900 */ 900#define dmask_mm %mm5
901#define write_r %eax 901#define tmp_mm %mm6
902 902
903 addl write_r, from_r 903 movd lmask(%esp), lmask_mm
904 subl nbytes_r, from_r /* from += write - nbytes */ 904 movq lmask_mm, lmask2_mm
905#undef write_r 905 movd dmask(%esp), dmask_mm
906 906 movq dmask_mm, dmask2_mm
907#if ! defined( USE_MMX ) 907 pxor used_mm, used_mm
908 movl len(%esp), len_r 908 movl lcode(%esp), %ebx /* ebx = lcode */
909#endif 909 jmp .L_do_loop_mmx
910 cmpl nbytes_r, len_r 910
911 jbe .L_do_copy1 /* if (nbytes >= len) */ 911.align 32,0x90
912 912.L_while_test_mmx:
913 subl nbytes_r, len_r /* len -= nbytes */ 913 /* while (in < last && out < end)
914 rep movsb 914 */
915 movl out_r, from_r 915 cmpl out_r, end(%esp)
916 subl dist_r, from_r /* from = out - dist */ 916 jbe .L_break_loop /* if (out >= end) */
917 917
918.L_do_copy1: 918 cmpl in_r, last(%esp)
919 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out 919 jbe .L_break_loop
920 * %eax = len 920
921 * 921.L_do_loop_mmx:
922 * while (len > 0) { 922 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
923 * PUP(out) = PUP(from); 923
924 * len--; 924 cmpl $32, bitslong_r
925 * } 925 ja .L_get_length_code_mmx /* if (32 < bits) */
926 * } 926
927 * } while (in < last && out < end); 927 movd bitslong_r, tmp_mm
928 */ 928 movd (in_r), %mm7
929#undef nbytes_r 929 addl $4, in_r
930#define in_r %esi 930 psllq tmp_mm, %mm7
931 931 addl $32, bitslong_r
932 movl len_r, %ecx 932 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
933 rep movsb 933
934 934.L_get_length_code_mmx:
935 movl in(%esp), in_r /* move in back to %esi, toss from */ 935 pand hold_mm, lmask_mm
936#if defined( USE_MMX ) 936 movd lmask_mm, %eax
937 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ 937 movq lmask2_mm, lmask_mm
938#endif 938 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
939 jmp .L_while_test 939
940 940.L_dolen_mmx:
941#undef len_r 941 movzbl %ah, %ecx /* ecx = this.bits */
942#undef from_r 942 movd %ecx, used_mm
943#undef dist_r 943 subl %ecx, bitslong_r /* bits -= this.bits */
944 944
945.L_invalid_distance_code: 945 testb %al, %al
946 /* else { 946 jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
947 * strm->msg = "invalid distance code"; 947
948 * state->mode = BAD; 948 shrl $16, %eax /* output this.val char */
949 * } 949 stosb
950 */ 950 jmp .L_while_test_mmx
951 movl $.L_invalid_distance_code_msg, %ecx 951
952 movl $INFLATE_MODE_BAD, %edx 952.L_test_for_length_base_mmx:
953 jmp .L_update_stream_state 953#define len_r %edx
954 954 movl %eax, len_r /* len = this */
955.L_test_for_end_of_block: 955 shrl $16, len_r /* len = this.val */
956 /* else if (op & 32) { 956
957 * state->mode = TYPE; 957 testb $16, %al
958 * break; 958 jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
959 * } 959 andl $15, %eax /* op &= 15 */
960 */ 960 jz .L_decode_distance_mmx /* if (!op) */
961 testb $32, %al 961
962 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */ 962 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
963 963 movd %eax, used_mm
964 movl $0, %ecx 964 movd hold_mm, %ecx
965 movl $INFLATE_MODE_TYPE, %edx 965 subl %eax, bitslong_r
966 jmp .L_update_stream_state 966 andl .L_mask(,%eax,4), %ecx
967 967 addl %ecx, len_r /* len += hold & mask[op] */
968.L_invalid_literal_length_code: 968
969 /* else { 969.L_decode_distance_mmx:
970 * strm->msg = "invalid literal/length code"; 970 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
971 * state->mode = BAD; 971
972 * } 972 cmpl $32, bitslong_r
973 */ 973 ja .L_get_dist_code_mmx /* if (32 < bits) */
974 movl $.L_invalid_literal_length_code_msg, %ecx 974
975 movl $INFLATE_MODE_BAD, %edx 975 movd bitslong_r, tmp_mm
976 jmp .L_update_stream_state 976 movd (in_r), %mm7
977 977 addl $4, in_r
978.L_invalid_distance_too_far: 978 psllq tmp_mm, %mm7
979 /* strm->msg = "invalid distance too far back"; 979 addl $32, bitslong_r
980 * state->mode = BAD; 980 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
981 */ 981
982 movl in(%esp), in_r /* from_r has in's reg, put in back */ 982.L_get_dist_code_mmx:
983 movl $.L_invalid_distance_too_far_msg, %ecx 983 movl dcode(%esp), %ebx /* ebx = dcode */
984 movl $INFLATE_MODE_BAD, %edx 984 pand hold_mm, dmask_mm
985 jmp .L_update_stream_state 985 movd dmask_mm, %eax
986 986 movq dmask2_mm, dmask_mm
987.L_update_stream_state: 987 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
988 /* set strm->msg = %ecx, strm->state->mode = %edx */ 988
989 movl strm_sp(%esp), %eax 989.L_dodist_mmx:
990 testl %ecx, %ecx /* if (msg != NULL) */ 990#define dist_r %ebx
991 jz .L_skip_msg 991 movzbl %ah, %ecx /* ecx = this.bits */
992 movl %ecx, msg_strm(%eax) /* strm->msg = msg */ 992 movl %eax, dist_r
993.L_skip_msg: 993 shrl $16, dist_r /* dist = this.val */
994 movl state_strm(%eax), %eax /* state = strm->state */ 994 subl %ecx, bitslong_r /* bits -= this.bits */
995 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */ 995 movd %ecx, used_mm
996 996
997.L_break_loop: 997 testb $16, %al /* if ((op & 16) == 0) */
998 998 jz .L_test_for_second_level_dist_mmx
999#define strm_r %eax 999 andl $15, %eax /* op &= 15 */
1000#define state_r %edx 1000 jz .L_check_dist_one_mmx
1001 1001
1002 /* len = bits >> 3; 1002.L_add_bits_to_dist_mmx:
1003 * in -= len; 1003 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1004 * bits -= len << 3; 1004 movd %eax, used_mm /* save bit length of current op */
1005 * hold &= (1U << bits) - 1; 1005 movd hold_mm, %ecx /* get the next bits on input stream */
1006 * state->hold = hold; 1006 subl %eax, bitslong_r /* bits -= op bits */
1007 * state->bits = bits; 1007 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
1008 * strm->next_in = in; 1008 addl %ecx, dist_r /* dist += hold & mask[op] */
1009 * strm->next_out = out; 1009
1010 */ 1010.L_check_window_mmx:
1011 movl strm_sp(%esp), strm_r 1011 movl in_r, in(%esp) /* save in so from can use it's reg */
1012 movl bitslong_r, %ecx 1012 movl out_r, %eax
1013 movl state_strm(strm_r), state_r 1013 subl beg(%esp), %eax /* nbytes = out - beg */
1014 shrl $3, %ecx 1014
1015 subl %ecx, in_r 1015 cmpl dist_r, %eax
1016 shll $3, %ecx 1016 jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */
1017 subl %ecx, bitslong_r 1017
1018 movl out_r, next_out_strm(strm_r) 1018 movl len_r, %ecx
1019 movl in_r, next_in_strm(strm_r) 1019 movl out_r, from_r
1020 movl bitslong_r, bits_state(state_r) 1020 subl dist_r, from_r /* from = out - dist */
1021 1021
1022 movl bitslong_r, %ecx 1022 subl $3, %ecx
1023 movl $1, %ebx /* overwrites bitslong_r, %bl */ 1023 movb (from_r), %al
1024 shll %cl, %ebx 1024 movb %al, (out_r)
1025 decl %ebx 1025 movb 1(from_r), %al
1026 1026 movb 2(from_r), %dl
1027#undef bits_r 1027 addl $3, from_r
1028#undef bitslong_r 1028 movb %al, 1(out_r)
1029 1029 movb %dl, 2(out_r)
1030#if ! defined( USE_MMX ) 1030 addl $3, out_r
1031 1031 rep movsb
1032 andl %ebx, hold_r 1032
1033 movl hold_r, hold_state(state_r) 1033 movl in(%esp), in_r /* move in back to %esi, toss from */
1034 1034 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
1035#else /* USE_MMX */ 1035 jmp .L_while_test_mmx
1036 1036
1037 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ 1037.align 16,0x90
1038 movd hold_mm, %ecx 1038.L_check_dist_one_mmx:
1039 andl %ebx, %ecx 1039 cmpl $1, dist_r
1040 movl %ecx, hold_state(state_r) 1040 jne .L_check_window_mmx
1041 1041 cmpl out_r, beg(%esp)
1042#endif 1042 je .L_check_window_mmx
1043 1043
1044#define last_r %ebx 1044 decl out_r
1045 1045 movl len_r, %ecx
1046 /* strm->avail_in = in < last ? 5 + (last - in) : 5 - (in - last) */ 1046 movb (out_r), %al
1047 movl last(%esp), last_r 1047 subl $3, %ecx
1048 cmpl in_r, last_r 1048
1049 jbe .L_last_is_smaller /* if (in >= last) */ 1049 movb %al, 1(out_r)
1050 1050 movb %al, 2(out_r)
1051 subl in_r, last_r /* last -= in */ 1051 movb %al, 3(out_r)
1052 addl $5, last_r /* last += 5 */ 1052 addl $4, out_r
1053 movl last_r, avail_in_strm(strm_r) 1053 rep stosb
1054 jmp .L_fixup_out 1054
1055.L_last_is_smaller: 1055 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
1056 subl last_r, in_r /* in -= last */ 1056 jmp .L_while_test_mmx
1057 negl in_r /* in = -in */ 1057
1058 addl $5, in_r /* in += 5 */ 1058.align 16,0x90
1059 movl in_r, avail_in_strm(strm_r) 1059.L_test_for_second_level_length_mmx:
1060 1060 testb $64, %al
1061#undef last_r 1061 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
1062#define end_r %ebx 1062
1063 1063 andl $15, %eax
1064.L_fixup_out: 1064 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1065 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/ 1065 movd hold_mm, %ecx
1066 movl end(%esp), end_r 1066 andl .L_mask(,%eax,4), %ecx
1067 cmpl out_r, end_r 1067 addl len_r, %ecx
1068 jbe .L_end_is_smaller /* if (out >= end) */ 1068 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
1069 1069 jmp .L_dolen_mmx
1070 subl out_r, end_r /* end -= out */ 1070
1071 addl $257, end_r /* end += 257 */ 1071.align 16,0x90
1072 movl end_r, avail_out_strm(strm_r) 1072.L_test_for_second_level_dist_mmx:
1073 jmp .L_done 1073 testb $64, %al
1074.L_end_is_smaller: 1074 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
1075 subl end_r, out_r /* out -= end */ 1075
1076 negl out_r /* out = -out */ 1076 andl $15, %eax
1077 addl $257, out_r /* out += 257 */ 1077 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1078 movl out_r, avail_out_strm(strm_r) 1078 movd hold_mm, %ecx
1079 1079 andl .L_mask(,%eax,4), %ecx
1080#undef end_r 1080 movl dcode(%esp), %eax /* ecx = dcode */
1081 1081 addl dist_r, %ecx
1082.L_done: 1082 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
1083#if defined( USE_MMX ) 1083 jmp .L_dodist_mmx
1084 emms 1084
1085#endif 1085.align 16,0x90
1086 addl $local_var_size, %esp 1086.L_clip_window_mmx:
1087 popf 1087#define nbytes_r %ecx
1088 popl %ebx 1088 movl %eax, nbytes_r
1089 popl %ebp 1089 movl wsize(%esp), %eax /* prepare for dist compare */
1090 popl %esi 1090 negl nbytes_r /* nbytes = -nbytes */
1091 popl %edi 1091 movl window(%esp), from_r /* from = window */
1092 ret 1092
1093 1093 cmpl dist_r, %eax
1094.type inflate_fast,@function 1094 jb .L_invalid_distance_too_far /* if (dist > wsize) */
1095.size inflate_fast,.-inflate_fast 1095
1096 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
1097 cmpl $0, write(%esp)
1098 jne .L_wrap_around_window_mmx /* if (write != 0) */
1099
1100 subl nbytes_r, %eax
1101 addl %eax, from_r /* from += wsize - nbytes */
1102
1103 cmpl nbytes_r, len_r
1104 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1105
1106 subl nbytes_r, len_r /* len -= nbytes */
1107 rep movsb
1108 movl out_r, from_r
1109 subl dist_r, from_r /* from = out - dist */
1110 jmp .L_do_copy1_mmx
1111
1112 cmpl nbytes_r, len_r
1113 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1114
1115 subl nbytes_r, len_r /* len -= nbytes */
1116 rep movsb
1117 movl out_r, from_r
1118 subl dist_r, from_r /* from = out - dist */
1119 jmp .L_do_copy1_mmx
1120
1121.L_wrap_around_window_mmx:
1122#define write_r %eax
1123 movl write(%esp), write_r
1124 cmpl write_r, nbytes_r
1125 jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */
1126
1127 addl wsize(%esp), from_r
1128 addl write_r, from_r
1129 subl nbytes_r, from_r /* from += wsize + write - nbytes */
1130 subl write_r, nbytes_r /* nbytes -= write */
1131#undef write_r
1132
1133 cmpl nbytes_r, len_r
1134 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1135
1136 subl nbytes_r, len_r /* len -= nbytes */
1137 rep movsb
1138 movl window(%esp), from_r /* from = window */
1139 movl write(%esp), nbytes_r /* nbytes = write */
1140 cmpl nbytes_r, len_r
1141 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1142
1143 subl nbytes_r, len_r /* len -= nbytes */
1144 rep movsb
1145 movl out_r, from_r
1146 subl dist_r, from_r /* from = out - dist */
1147 jmp .L_do_copy1_mmx
1148
1149.L_contiguous_in_window_mmx:
1150#define write_r %eax
1151 addl write_r, from_r
1152 subl nbytes_r, from_r /* from += write - nbytes */
1153#undef write_r
1154
1155 cmpl nbytes_r, len_r
1156 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1157
1158 subl nbytes_r, len_r /* len -= nbytes */
1159 rep movsb
1160 movl out_r, from_r
1161 subl dist_r, from_r /* from = out - dist */
1162
1163.L_do_copy1_mmx:
1164#undef nbytes_r
1165#define in_r %esi
1166 movl len_r, %ecx
1167 rep movsb
1168
1169 movl in(%esp), in_r /* move in back to %esi, toss from */
1170 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
1171 jmp .L_while_test_mmx
1172
1173#undef hold_r
1174#undef bitslong_r
1175
1176#endif /* USE_MMX || RUN_TIME_MMX */
1177
1178
1179/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
1180
1181.L_invalid_distance_code:
1182 /* else {
1183 * strm->msg = "invalid distance code";
1184 * state->mode = BAD;
1185 * }
1186 */
1187 movl $.L_invalid_distance_code_msg, %ecx
1188 movl $INFLATE_MODE_BAD, %edx
1189 jmp .L_update_stream_state
1190
1191.L_test_for_end_of_block:
1192 /* else if (op & 32) {
1193 * state->mode = TYPE;
1194 * break;
1195 * }
1196 */
1197 testb $32, %al
1198 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
1199
1200 movl $0, %ecx
1201 movl $INFLATE_MODE_TYPE, %edx
1202 jmp .L_update_stream_state
1203
1204.L_invalid_literal_length_code:
1205 /* else {
1206 * strm->msg = "invalid literal/length code";
1207 * state->mode = BAD;
1208 * }
1209 */
1210 movl $.L_invalid_literal_length_code_msg, %ecx
1211 movl $INFLATE_MODE_BAD, %edx
1212 jmp .L_update_stream_state
1213
1214.L_invalid_distance_too_far:
1215 /* strm->msg = "invalid distance too far back";
1216 * state->mode = BAD;
1217 */
1218 movl in(%esp), in_r /* from_r has in's reg, put in back */
1219 movl $.L_invalid_distance_too_far_msg, %ecx
1220 movl $INFLATE_MODE_BAD, %edx
1221 jmp .L_update_stream_state
1222
1223.L_update_stream_state:
1224 /* set strm->msg = %ecx, strm->state->mode = %edx */
1225 movl strm_sp(%esp), %eax
1226 testl %ecx, %ecx /* if (msg != NULL) */
1227 jz .L_skip_msg
1228 movl %ecx, msg_strm(%eax) /* strm->msg = msg */
1229.L_skip_msg:
1230 movl state_strm(%eax), %eax /* state = strm->state */
1231 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
1232 jmp .L_break_loop
1233
1234.align 32,0x90
1235.L_break_loop:
1236
1237/*
1238 * Regs:
1239 *
1240 * bits = %ebp when mmx, and in %ebx when non-mmx
1241 * hold = %hold_mm when mmx, and in %ebp when non-mmx
1242 * in = %esi
1243 * out = %edi
1244 */
1245
1246#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1247
1248#if defined( RUN_TIME_MMX )
1249
1250 cmpl $DO_USE_MMX, inflate_fast_use_mmx
1251 jne .L_update_next_in
1252
1253#endif /* RUN_TIME_MMX */
1254
1255 movl %ebp, %ebx
1256
1257.L_update_next_in:
1258
1259#endif
1260
1261#define strm_r %eax
1262#define state_r %edx
1263
1264 /* len = bits >> 3;
1265 * in -= len;
1266 * bits -= len << 3;
1267 * hold &= (1U << bits) - 1;
1268 * state->hold = hold;
1269 * state->bits = bits;
1270 * strm->next_in = in;
1271 * strm->next_out = out;
1272 */
1273 movl strm_sp(%esp), strm_r
1274 movl %ebx, %ecx
1275 movl state_strm(strm_r), state_r
1276 shrl $3, %ecx
1277 subl %ecx, in_r
1278 shll $3, %ecx
1279 subl %ecx, %ebx
1280 movl out_r, next_out_strm(strm_r)
1281 movl %ebx, bits_state(state_r)
1282 movl %ebx, %ecx
1283
1284 leal buf(%esp), %ebx
1285 cmpl %ebx, last(%esp)
1286 jne .L_buf_not_used /* if buf != last */
1287
1288 subl %ebx, in_r /* in -= buf */
1289 movl next_in_strm(strm_r), %ebx
1290 movl %ebx, last(%esp) /* last = strm->next_in */
1291 addl %ebx, in_r /* in += strm->next_in */
1292 movl avail_in_strm(strm_r), %ebx
1293 subl $11, %ebx
1294 addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */
1295
1296.L_buf_not_used:
1297 movl in_r, next_in_strm(strm_r)
1298
1299 movl $1, %ebx
1300 shll %cl, %ebx
1301 decl %ebx
1302
1303#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1304
1305#if defined( RUN_TIME_MMX )
1306
1307 cmpl $DO_USE_MMX, inflate_fast_use_mmx
1308 jne .L_update_hold
1309
1310#endif /* RUN_TIME_MMX */
1311
1312 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1313 movd hold_mm, %ebp
1314
1315 emms
1316
1317.L_update_hold:
1318
1319#endif /* USE_MMX || RUN_TIME_MMX */
1320
1321 andl %ebx, %ebp
1322 movl %ebp, hold_state(state_r)
1323
1324#define last_r %ebx
1325
1326 /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
1327 movl last(%esp), last_r
1328 cmpl in_r, last_r
1329 jbe .L_last_is_smaller /* if (in >= last) */
1330
1331 subl in_r, last_r /* last -= in */
1332 addl $11, last_r /* last += 11 */
1333 movl last_r, avail_in_strm(strm_r)
1334 jmp .L_fixup_out
1335.L_last_is_smaller:
1336 subl last_r, in_r /* in -= last */
1337 negl in_r /* in = -in */
1338 addl $11, in_r /* in += 11 */
1339 movl in_r, avail_in_strm(strm_r)
1340
1341#undef last_r
1342#define end_r %ebx
1343
1344.L_fixup_out:
1345 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1346 movl end(%esp), end_r
1347 cmpl out_r, end_r
1348 jbe .L_end_is_smaller /* if (out >= end) */
1349
1350 subl out_r, end_r /* end -= out */
1351 addl $257, end_r /* end += 257 */
1352 movl end_r, avail_out_strm(strm_r)
1353 jmp .L_done
1354.L_end_is_smaller:
1355 subl end_r, out_r /* out -= end */
1356 negl out_r /* out = -out */
1357 addl $257, out_r /* out += 257 */
1358 movl out_r, avail_out_strm(strm_r)
1359
1360#undef end_r
1361#undef strm_r
1362#undef state_r
1363
1364.L_done:
1365 addl $local_var_size, %esp
1366 popf
1367 popl %ebx
1368 popl %ebp
1369 popl %esi
1370 popl %edi
1371 ret
1372
1373#if defined( GAS_ELF )
1374/* elf info */
1375.type inflate_fast,@function
1376.size inflate_fast,.-inflate_fast
1377#endif