summaryrefslogtreecommitdiff
path: root/contrib/inflate86/inffas86.c
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-09 23:24:24 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-09 23:24:24 -0700
commit9811b53dd9e8f67015c7199fff12b5bfc6965330 (patch)
treebfa72ee22967fb56833203dfcd31c473c86b1bf1 /contrib/inflate86/inffas86.c
parent79fbcdc939b5d515218187a0d5f2526fb632075a (diff)
downloadzlib-1.2.2.1.tar.gz
zlib-1.2.2.1.tar.bz2
zlib-1.2.2.1.zip
zlib 1.2.2.1v1.2.2.1
Diffstat (limited to 'contrib/inflate86/inffas86.c')
-rw-r--r--contrib/inflate86/inffas86.c798
1 files changed, 586 insertions, 212 deletions
diff --git a/contrib/inflate86/inffas86.c b/contrib/inflate86/inffas86.c
index 4534693..6da7635 100644
--- a/contrib/inflate86/inffas86.c
+++ b/contrib/inflate86/inffas86.c
@@ -7,6 +7,15 @@
7 * Copyright (C) 2003 Chris Anderson <christop@charm.net> 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
8 * Please use the copyright conditions above. 8 * Please use the copyright conditions above.
9 * 9 *
10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
14 * from http://fedora.linux.duke.edu/fc1_x86_64
15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
17 * when decompressing mozilla-source-1.3.tar.gz.
18 *
10 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
11 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
12 * the moment. I have successfully compiled and tested this code with gcc2.96, 21 * the moment. I have successfully compiled and tested this code with gcc2.96,
@@ -65,33 +74,44 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
65{ 74{
66 struct inflate_state FAR *state; 75 struct inflate_state FAR *state;
67 struct inffast_ar { 76 struct inffast_ar {
68 void *esp; /* esp save */ 77/* 64 32 x86 x86_64 */
69 unsigned char FAR *in; /* local strm->next_in */ 78/* ar offset register */
70 unsigned char FAR *last; /* while in < last, enough input available */ 79/* 0 0 */ void *esp; /* esp save */
71 unsigned char FAR *out; /* local strm->next_out */ 80/* 8 4 */ void *ebp; /* ebp save */
72 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 81/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
73 unsigned char FAR *end; /* while out < end, enough space available */ 82/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
74 unsigned wsize; /* window size or zero if not using window */ 83/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
75 unsigned write; /* window write index */ 84/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
76 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 85/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
77 unsigned long hold; /* local strm->hold */ 86/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
78 unsigned bits; /* local strm->bits */ 87/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
79 code const FAR *lcode; /* local strm->lencode */ 88/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
80 code const FAR *dcode; /* local strm->distcode */ 89/* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
81 unsigned lmask; /* mask for first level of length codes */ 90/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
82 unsigned dmask; /* mask for first level of distance codes */ 91/* 92 48 */ unsigned wsize; /* window size */
83 unsigned len; /* match length, unused bytes */ 92/* 96 52 */ unsigned write; /* window write index */
84 unsigned dist; /* match distance */ 93/*100 56 */ unsigned lmask; /* r12 mask for lcode */
85 unsigned status; /* this is set when state changes */ 94/*104 60 */ unsigned dmask; /* r13 mask for dcode */
95/*108 64 */ unsigned len; /* r14 match length */
96/*112 68 */ unsigned dist; /* r15 match distance */
97/*116 72 */ unsigned status; /* set when state chng*/
86 } ar; 98 } ar;
87 99
100#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
101#define PAD_AVAIL_IN 6
102#define PAD_AVAIL_OUT 258
103#else
104#define PAD_AVAIL_IN 5
105#define PAD_AVAIL_OUT 257
106#endif
107
88 /* copy state to local variables */ 108 /* copy state to local variables */
89 state = (struct inflate_state FAR *)strm->state; 109 state = (struct inflate_state FAR *)strm->state;
90 ar.in = strm->next_in; 110 ar.in = strm->next_in;
91 ar.last = ar.in + (strm->avail_in - 5); 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
92 ar.out = strm->next_out; 112 ar.out = strm->next_out;
93 ar.beg = ar.out - (start - strm->avail_out); 113 ar.beg = ar.out - (start - strm->avail_out);
94 ar.end = ar.out + (strm->avail_out - 257); 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
95 ar.wsize = state->wsize; 115 ar.wsize = state->wsize;
96 ar.write = state->write; 116 ar.write = state->write;
97 ar.window = state->window; 117 ar.window = state->window;
@@ -105,32 +125,368 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
105 /* decode literals and length/distances until end-of-block or not enough 125 /* decode literals and length/distances until end-of-block or not enough
106 input data or output space */ 126 input data or output space */
107 127
108 /* align in on 2 byte boundary */ 128 /* align in on 1/2 hold size boundary */
109 if (((unsigned long)(void *)ar.in & 0x1) != 0) { 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
110 ar.hold += (unsigned long)*ar.in++ << ar.bits; 130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
111 ar.bits += 8; 131 ar.bits += 8;
112 } 132 }
113 133
114#if defined( __GNUC__ ) || defined( __ICC ) 134#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
135 __asm__ __volatile__ (
136" leaq %0, %%rax\n"
137" movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
138" movq %%rsp, (%%rax)\n"
139" movq %%rax, %%rsp\n" /* make rsp point to &ar */
140" movq 16(%%rsp), %%rsi\n" /* rsi = in */
141" movq 32(%%rsp), %%rdi\n" /* rdi = out */
142" movq 24(%%rsp), %%r9\n" /* r9 = last */
143" movq 48(%%rsp), %%r10\n" /* r10 = end */
144" movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
145" movq 72(%%rsp), %%r11\n" /* r11 = dcode */
146" movq 80(%%rsp), %%rdx\n" /* rdx = hold */
147" movl 88(%%rsp), %%ebx\n" /* ebx = bits */
148" movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
149" movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
150 /* r14d = len */
151 /* r15d = dist */
152" cld\n"
153" cmpq %%rdi, %%r10\n"
154" je .L_one_time\n" /* if only one decode left */
155" cmpq %%rsi, %%r9\n"
156" je .L_one_time\n"
157" jmp .L_do_loop\n"
158
159".L_one_time:\n"
160" movq %%r12, %%r8\n" /* r8 = lmask */
161" cmpb $32, %%bl\n"
162" ja .L_get_length_code_one_time\n"
163
164" lodsl\n" /* eax = *(uint *)in++ */
165" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
166" addb $32, %%bl\n" /* bits += 32 */
167" shlq %%cl, %%rax\n"
168" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
169" jmp .L_get_length_code_one_time\n"
170
171".align 32,0x90\n"
172".L_while_test:\n"
173" cmpq %%rdi, %%r10\n"
174" jbe .L_break_loop\n"
175" cmpq %%rsi, %%r9\n"
176" jbe .L_break_loop\n"
177
178".L_do_loop:\n"
179" movq %%r12, %%r8\n" /* r8 = lmask */
180" cmpb $32, %%bl\n"
181" ja .L_get_length_code\n" /* if (32 < bits) */
182
183" lodsl\n" /* eax = *(uint *)in++ */
184" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
185" addb $32, %%bl\n" /* bits += 32 */
186" shlq %%cl, %%rax\n"
187" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
188
189".L_get_length_code:\n"
190" andq %%rdx, %%r8\n" /* r8 &= hold */
191" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
192
193" movb %%ah, %%cl\n" /* cl = this.bits */
194" subb %%ah, %%bl\n" /* bits -= this.bits */
195" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
196
197" testb %%al, %%al\n"
198" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
199
200" movq %%r12, %%r8\n" /* r8 = lmask */
201" shrl $16, %%eax\n" /* output this.val char */
202" stosb\n"
203
204".L_get_length_code_one_time:\n"
205" andq %%rdx, %%r8\n" /* r8 &= hold */
206" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
207
208".L_dolen:\n"
209" movb %%ah, %%cl\n" /* cl = this.bits */
210" subb %%ah, %%bl\n" /* bits -= this.bits */
211" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
212
213" testb %%al, %%al\n"
214" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
215
216" shrl $16, %%eax\n" /* output this.val char */
217" stosb\n"
218" jmp .L_while_test\n"
219
220".align 32,0x90\n"
221".L_test_for_length_base:\n"
222" movl %%eax, %%r14d\n" /* len = this */
223" shrl $16, %%r14d\n" /* len = this.val */
224" movb %%al, %%cl\n"
225
226" testb $16, %%al\n"
227" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
228" andb $15, %%cl\n" /* op &= 15 */
229" jz .L_decode_distance\n" /* if (!op) */
230
231".L_add_bits_to_len:\n"
232" subb %%cl, %%bl\n"
233" xorl %%eax, %%eax\n"
234" incl %%eax\n"
235" shll %%cl, %%eax\n"
236" decl %%eax\n"
237" andl %%edx, %%eax\n" /* eax &= hold */
238" shrq %%cl, %%rdx\n"
239" addl %%eax, %%r14d\n" /* len += hold & mask[op] */
240
241".L_decode_distance:\n"
242" movq %%r13, %%r8\n" /* r8 = dmask */
243" cmpb $32, %%bl\n"
244" ja .L_get_distance_code\n" /* if (32 < bits) */
245
246" lodsl\n" /* eax = *(uint *)in++ */
247" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
248" addb $32, %%bl\n" /* bits += 32 */
249" shlq %%cl, %%rax\n"
250" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
251
252".L_get_distance_code:\n"
253" andq %%rdx, %%r8\n" /* r8 &= hold */
254" movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
255
256".L_dodist:\n"
257" movl %%eax, %%r15d\n" /* dist = this */
258" shrl $16, %%r15d\n" /* dist = this.val */
259" movb %%ah, %%cl\n"
260" subb %%ah, %%bl\n" /* bits -= this.bits */
261" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
262" movb %%al, %%cl\n" /* cl = this.op */
263
264" testb $16, %%al\n" /* if ((op & 16) == 0) */
265" jz .L_test_for_second_level_dist\n"
266" andb $15, %%cl\n" /* op &= 15 */
267" jz .L_check_dist_one\n"
268
269".L_add_bits_to_dist:\n"
270" subb %%cl, %%bl\n"
271" xorl %%eax, %%eax\n"
272" incl %%eax\n"
273" shll %%cl, %%eax\n"
274" decl %%eax\n" /* (1 << op) - 1 */
275" andl %%edx, %%eax\n" /* eax &= hold */
276" shrq %%cl, %%rdx\n"
277" addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
278
279".L_check_window:\n"
280" movq %%rsi, %%r8\n" /* save in so from can use it's reg */
281" movq %%rdi, %%rax\n"
282" subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
283
284" cmpl %%r15d, %%eax\n"
285" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
286
287" movl %%r14d, %%ecx\n" /* ecx = len */
288" movq %%rdi, %%rsi\n"
289" subq %%r15, %%rsi\n" /* from = out - dist */
290
291" sarl %%ecx\n"
292" jnc .L_copy_two\n" /* if len % 2 == 0 */
293
294" rep movsw\n"
295" movb (%%rsi), %%al\n"
296" movb %%al, (%%rdi)\n"
297" incq %%rdi\n"
298
299" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
300" jmp .L_while_test\n"
301
302".L_copy_two:\n"
303" rep movsw\n"
304" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
305" jmp .L_while_test\n"
306
307".align 32,0x90\n"
308".L_check_dist_one:\n"
309" cmpl $1, %%r15d\n" /* if dist 1, is a memset */
310" jne .L_check_window\n"
311" cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
312" je .L_check_window\n"
313
314" movl %%r14d, %%ecx\n" /* ecx = len */
315" movb -1(%%rdi), %%al\n"
316" movb %%al, %%ah\n"
317
318" sarl %%ecx\n"
319" jnc .L_set_two\n"
320" movb %%al, (%%rdi)\n"
321" incq %%rdi\n"
322
323".L_set_two:\n"
324" rep stosw\n"
325" jmp .L_while_test\n"
326
327".align 32,0x90\n"
328".L_test_for_second_level_length:\n"
329" testb $64, %%al\n"
330" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
331
332" xorl %%eax, %%eax\n"
333" incl %%eax\n"
334" shll %%cl, %%eax\n"
335" decl %%eax\n"
336" andl %%edx, %%eax\n" /* eax &= hold */
337" addl %%r14d, %%eax\n" /* eax += len */
338" movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
339" jmp .L_dolen\n"
340
341".align 32,0x90\n"
342".L_test_for_second_level_dist:\n"
343" testb $64, %%al\n"
344" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
345
346" xorl %%eax, %%eax\n"
347" incl %%eax\n"
348" shll %%cl, %%eax\n"
349" decl %%eax\n"
350" andl %%edx, %%eax\n" /* eax &= hold */
351" addl %%r15d, %%eax\n" /* eax += dist */
352" movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
353" jmp .L_dodist\n"
354
355".align 32,0x90\n"
356".L_clip_window:\n"
357" movl %%eax, %%ecx\n" /* ecx = nbytes */
358" movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
359" negl %%ecx\n" /* nbytes = -nbytes */
360
361" cmpl %%r15d, %%eax\n"
362" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
363
364" addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
365" cmpl $0, 96(%%rsp)\n"
366" jne .L_wrap_around_window\n" /* if (write != 0) */
367
368" movq 56(%%rsp), %%rsi\n" /* from = window */
369" subl %%ecx, %%eax\n" /* eax -= nbytes */
370" addq %%rax, %%rsi\n" /* from += wsize - nbytes */
371
372" movl %%r14d, %%eax\n" /* eax = len */
373" cmpl %%ecx, %%r14d\n"
374" jbe .L_do_copy\n" /* if (nbytes >= len) */
375
376" subl %%ecx, %%eax\n" /* eax -= nbytes */
377" rep movsb\n"
378" movq %%rdi, %%rsi\n"
379" subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
380" jmp .L_do_copy\n"
381
382".align 32,0x90\n"
383".L_wrap_around_window:\n"
384" movl 96(%%rsp), %%eax\n" /* eax = write */
385" cmpl %%eax, %%ecx\n"
386" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
387
388" movl 92(%%rsp), %%esi\n" /* from = wsize */
389" addq 56(%%rsp), %%rsi\n" /* from += window */
390" addq %%rax, %%rsi\n" /* from += write */
391" subq %%rcx, %%rsi\n" /* from -= nbytes */
392" subl %%eax, %%ecx\n" /* nbytes -= write */
393
394" movl %%r14d, %%eax\n" /* eax = len */
395" cmpl %%ecx, %%eax\n"
396" jbe .L_do_copy\n" /* if (nbytes >= len) */
397
398" subl %%ecx, %%eax\n" /* len -= nbytes */
399" rep movsb\n"
400" movq 56(%%rsp), %%rsi\n" /* from = window */
401" movl 96(%%rsp), %%ecx\n" /* nbytes = write */
402" cmpl %%ecx, %%eax\n"
403" jbe .L_do_copy\n" /* if (nbytes >= len) */
404
405" subl %%ecx, %%eax\n" /* len -= nbytes */
406" rep movsb\n"
407" movq %%rdi, %%rsi\n"
408" subq %%r15, %%rsi\n" /* from = out - dist */
409" jmp .L_do_copy\n"
410
411".align 32,0x90\n"
412".L_contiguous_in_window:\n"
413" movq 56(%%rsp), %%rsi\n" /* rsi = window */
414" addq %%rax, %%rsi\n"
415" subq %%rcx, %%rsi\n" /* from += write - nbytes */
416
417" movl %%r14d, %%eax\n" /* eax = len */
418" cmpl %%ecx, %%eax\n"
419" jbe .L_do_copy\n" /* if (nbytes >= len) */
420
421" subl %%ecx, %%eax\n" /* len -= nbytes */
422" rep movsb\n"
423" movq %%rdi, %%rsi\n"
424" subq %%r15, %%rsi\n" /* from = out - dist */
425" jmp .L_do_copy\n" /* if (nbytes >= len) */
426
427".align 32,0x90\n"
428".L_do_copy:\n"
429" movl %%eax, %%ecx\n" /* ecx = len */
430" rep movsb\n"
431
432" movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
433" jmp .L_while_test\n"
434
435".L_test_for_end_of_block:\n"
436" testb $32, %%al\n"
437" jz .L_invalid_literal_length_code\n"
438" movl $1, 116(%%rsp)\n"
439" jmp .L_break_loop_with_status\n"
440
441".L_invalid_literal_length_code:\n"
442" movl $2, 116(%%rsp)\n"
443" jmp .L_break_loop_with_status\n"
444
445".L_invalid_distance_code:\n"
446" movl $3, 116(%%rsp)\n"
447" jmp .L_break_loop_with_status\n"
448
449".L_invalid_distance_too_far:\n"
450" movl $4, 116(%%rsp)\n"
451" jmp .L_break_loop_with_status\n"
452
453".L_break_loop:\n"
454" movl $0, 116(%%rsp)\n"
455
456".L_break_loop_with_status:\n"
457/* put in, out, bits, and hold back into ar and pop esp */
458" movq %%rsi, 16(%%rsp)\n" /* in */
459" movq %%rdi, 32(%%rsp)\n" /* out */
460" movl %%ebx, 88(%%rsp)\n" /* bits */
461" movq %%rdx, 80(%%rsp)\n" /* hold */
462" movq (%%rsp), %%rax\n" /* restore rbp and rsp */
463" movq 8(%%rsp), %%rbp\n"
464" movq %%rax, %%rsp\n"
465 :
466 : "m" (ar)
467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
469 );
470#elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
115 __asm__ __volatile__ ( 471 __asm__ __volatile__ (
116" leal %0, %%eax\n" 472" leal %0, %%eax\n"
117" pushf\n" 473" movl %%esp, (%%eax)\n" /* save esp, ebp */
118" pushl %%ebp\n" 474" movl %%ebp, 4(%%eax)\n"
119" movl %%esp, (%%eax)\n"
120" movl %%eax, %%esp\n" 475" movl %%eax, %%esp\n"
121" movl 4(%%esp), %%esi\n" /* esi = in */ 476" movl 8(%%esp), %%esi\n" /* esi = in */
122" movl 12(%%esp), %%edi\n" /* edi = out */ 477" movl 16(%%esp), %%edi\n" /* edi = out */
123" movl 36(%%esp), %%edx\n" /* edx = hold */ 478" movl 40(%%esp), %%edx\n" /* edx = hold */
124" movl 40(%%esp), %%ebx\n" /* ebx = bits */ 479" movl 44(%%esp), %%ebx\n" /* ebx = bits */
125" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ 480" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
126 481
127" cld\n" 482" cld\n"
128" jmp .L_do_loop\n" 483" jmp .L_do_loop\n"
129 484
485".align 32,0x90\n"
130".L_while_test:\n" 486".L_while_test:\n"
131" cmpl %%edi, 20(%%esp)\n" 487" cmpl %%edi, 24(%%esp)\n" /* out < end */
132" jbe .L_break_loop\n" 488" jbe .L_break_loop\n"
133" cmpl %%esi, 8(%%esp)\n" 489" cmpl %%esi, 12(%%esp)\n" /* in < last */
134" jbe .L_break_loop\n" 490" jbe .L_break_loop\n"
135 491
136".L_do_loop:\n" 492".L_do_loop:\n"
@@ -145,7 +501,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
145" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 501" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
146 502
147".L_get_length_code:\n" 503".L_get_length_code:\n"
148" movl 52(%%esp), %%eax\n" /* eax = lmask */ 504" movl 56(%%esp), %%eax\n" /* eax = lmask */
149" andl %%edx, %%eax\n" /* eax &= hold */ 505" andl %%edx, %%eax\n" /* eax &= hold */
150" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */ 506" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
151 507
@@ -161,10 +517,11 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
161" stosb\n" 517" stosb\n"
162" jmp .L_while_test\n" 518" jmp .L_while_test\n"
163 519
520".align 32,0x90\n"
164".L_test_for_length_base:\n" 521".L_test_for_length_base:\n"
165" movl %%eax, %%ecx\n" /* len = this */ 522" movl %%eax, %%ecx\n" /* len = this */
166" shrl $16, %%ecx\n" /* len = this.val */ 523" shrl $16, %%ecx\n" /* len = this.val */
167" movl %%ecx, 60(%%esp)\n" /* len = this */ 524" movl %%ecx, 64(%%esp)\n" /* save len */
168" movb %%al, %%cl\n" 525" movb %%al, %%cl\n"
169 526
170" testb $16, %%al\n" 527" testb $16, %%al\n"
@@ -184,13 +541,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
184" movb %%ch, %%cl\n" /* move op back to ecx */ 541" movb %%ch, %%cl\n" /* move op back to ecx */
185 542
186".L_add_bits_to_len:\n" 543".L_add_bits_to_len:\n"
187" movl $1, %%eax\n" 544" subb %%cl, %%bl\n"
545" xorl %%eax, %%eax\n"
546" incl %%eax\n"
188" shll %%cl, %%eax\n" 547" shll %%cl, %%eax\n"
189" decl %%eax\n" 548" decl %%eax\n"
190" subb %%cl, %%bl\n"
191" andl %%edx, %%eax\n" /* eax &= hold */ 549" andl %%edx, %%eax\n" /* eax &= hold */
192" shrl %%cl, %%edx\n" 550" shrl %%cl, %%edx\n"
193" addl %%eax, 60(%%esp)\n" /* len += hold & mask[op] */ 551" addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
194 552
195".L_decode_distance:\n" 553".L_decode_distance:\n"
196" cmpb $15, %%bl\n" 554" cmpb $15, %%bl\n"
@@ -204,8 +562,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
204" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 562" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
205 563
206".L_get_distance_code:\n" 564".L_get_distance_code:\n"
207" movl 56(%%esp), %%eax\n" /* eax = dmask */ 565" movl 60(%%esp), %%eax\n" /* eax = dmask */
208" movl 48(%%esp), %%ecx\n" /* ecx = dcode */ 566" movl 36(%%esp), %%ecx\n" /* ecx = dcode */
209" andl %%edx, %%eax\n" /* eax &= hold */ 567" andl %%edx, %%eax\n" /* eax &= hold */
210" movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */ 568" movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
211 569
@@ -234,223 +592,228 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
234" movb %%ch, %%cl\n" /* move op back to ecx */ 592" movb %%ch, %%cl\n" /* move op back to ecx */
235 593
236".L_add_bits_to_dist:\n" 594".L_add_bits_to_dist:\n"
237" movl $1, %%eax\n" 595" subb %%cl, %%bl\n"
596" xorl %%eax, %%eax\n"
597" incl %%eax\n"
238" shll %%cl, %%eax\n" 598" shll %%cl, %%eax\n"
239" decl %%eax\n" /* (1 << op) - 1 */ 599" decl %%eax\n" /* (1 << op) - 1 */
240" subb %%cl, %%bl\n"
241" andl %%edx, %%eax\n" /* eax &= hold */ 600" andl %%edx, %%eax\n" /* eax &= hold */
242" shrl %%cl, %%edx\n" 601" shrl %%cl, %%edx\n"
243" addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */ 602" addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
244 603
245".L_check_window:\n" 604".L_check_window:\n"
246" movl %%esi, 4(%%esp)\n" /* save in so from can use it's reg */ 605" movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
247" movl %%edi, %%eax\n" 606" movl %%edi, %%eax\n"
248" subl 16(%%esp), %%eax\n" /* nbytes = out - beg */ 607" subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
249 608
250" cmpl %%ebp, %%eax\n" 609" cmpl %%ebp, %%eax\n"
251" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ 610" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
252 611
253" movl 60(%%esp), %%ecx\n" 612" movl 64(%%esp), %%ecx\n" /* ecx = len */
254" movl %%edi, %%esi\n" 613" movl %%edi, %%esi\n"
255" subl %%ebp, %%esi\n" /* from = out - dist */ 614" subl %%ebp, %%esi\n" /* from = out - dist */
256 615
257" subl $3, %%ecx\n" /* copy from to out */ 616" sarl %%ecx\n"
617" jnc .L_copy_two\n" /* if len % 2 == 0 */
618
619" rep movsw\n"
258" movb (%%esi), %%al\n" 620" movb (%%esi), %%al\n"
259" movb %%al, (%%edi)\n" 621" movb %%al, (%%edi)\n"
260" movb 1(%%esi), %%al\n" 622" incl %%edi\n"
261" movb 2(%%esi), %%ah\n"
262" addl $3, %%esi\n"
263" movb %%al, 1(%%edi)\n"
264" movb %%ah, 2(%%edi)\n"
265" addl $3, %%edi\n"
266" rep movsb\n"
267 623
268" movl 4(%%esp), %%esi\n" /* move in back to %esi, toss from */ 624" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
269" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ 625" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
270" jmp .L_while_test\n" 626" jmp .L_while_test\n"
271 627
628".L_copy_two:\n"
629" rep movsw\n"
630" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
631" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
632" jmp .L_while_test\n"
633
634".align 32,0x90\n"
272".L_check_dist_one:\n" 635".L_check_dist_one:\n"
273" cmpl $1, %%ebp\n" /* if dist 1, is a memset */ 636" cmpl $1, %%ebp\n" /* if dist 1, is a memset */
274" jne .L_check_window\n" 637" jne .L_check_window\n"
275" cmpl %%edi, 16(%%esp)\n" 638" cmpl %%edi, 20(%%esp)\n"
276" je .L_check_window\n" 639" je .L_check_window\n" /* out == beg, if outside window */
640
641" movl 64(%%esp), %%ecx\n" /* ecx = len */
642" movb -1(%%edi), %%al\n"
643" movb %%al, %%ah\n"
644
645" sarl %%ecx\n"
646" jnc .L_set_two\n"
647" movb %%al, (%%edi)\n"
648" incl %%edi\n"
277 649
278" decl %%edi\n" 650".L_set_two:\n"
279" movl 60(%%esp), %%ecx\n" 651" rep stosw\n"
280" movb (%%edi), %%al\n" 652" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
281" subl $3, %%ecx\n"
282
283" movb %%al, 1(%%edi)\n" /* memset out with from[-1] */
284" movb %%al, 2(%%edi)\n"
285" movb %%al, 3(%%edi)\n"
286" addl $4, %%edi\n"
287" rep stosb\n"
288" movl 44(%%esp), %%ebp\n" /* ebp = lcode */
289" jmp .L_while_test\n" 653" jmp .L_while_test\n"
290 654
655".align 32,0x90\n"
291".L_test_for_second_level_length:\n" 656".L_test_for_second_level_length:\n"
292" testb $64, %%al\n" 657" testb $64, %%al\n"
293" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ 658" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
294 659
295" movl $1, %%eax\n" 660" xorl %%eax, %%eax\n"
661" incl %%eax\n"
296" shll %%cl, %%eax\n" 662" shll %%cl, %%eax\n"
297" decl %%eax\n" 663" decl %%eax\n"
298" andl %%edx, %%eax\n" /* eax &= hold */ 664" andl %%edx, %%eax\n" /* eax &= hold */
299" addl 60(%%esp), %%eax\n" /* eax += this.val */ 665" addl 64(%%esp), %%eax\n" /* eax += len */
300" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ 666" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
301" jmp .L_dolen\n" 667" jmp .L_dolen\n"
302 668
669".align 32,0x90\n"
303".L_test_for_second_level_dist:\n" 670".L_test_for_second_level_dist:\n"
304" testb $64, %%al\n" 671" testb $64, %%al\n"
305" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ 672" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
306 673
307" movl $1, %%eax\n" 674" xorl %%eax, %%eax\n"
675" incl %%eax\n"
308" shll %%cl, %%eax\n" 676" shll %%cl, %%eax\n"
309" decl %%eax\n" 677" decl %%eax\n"
310" andl %%edx, %%eax\n" /* eax &= hold */ 678" andl %%edx, %%eax\n" /* eax &= hold */
311" addl %%ebp, %%eax\n" /* eax += this.val */ 679" addl %%ebp, %%eax\n" /* eax += dist */
312" movl 48(%%esp), %%ecx\n" /* ecx = dcode */ 680" movl 36(%%esp), %%ecx\n" /* ecx = dcode */
313" movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ 681" movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
314" jmp .L_dodist\n" 682" jmp .L_dodist\n"
315 683
684".align 32,0x90\n"
316".L_clip_window:\n" 685".L_clip_window:\n"
317" movl %%eax, %%ecx\n" 686" movl %%eax, %%ecx\n"
318" movl 24(%%esp), %%eax\n" /* prepare for dist compare */ 687" movl 48(%%esp), %%eax\n" /* eax = wsize */
319" negl %%ecx\n" /* nbytes = -nbytes */ 688" negl %%ecx\n" /* nbytes = -nbytes */
320" movl 32(%%esp), %%esi\n" /* from = window */ 689" movl 28(%%esp), %%esi\n" /* from = window */
321 690
322" cmpl %%ebp, %%eax\n" 691" cmpl %%ebp, %%eax\n"
323" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ 692" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
324 693
325" addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */ 694" addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
326" cmpl $0, 28(%%esp)\n" 695" cmpl $0, 52(%%esp)\n"
327" jne .L_wrap_around_window\n" /* if (write != 0) */ 696" jne .L_wrap_around_window\n" /* if (write != 0) */
328 697
329" subl %%ecx, %%eax\n" 698" subl %%ecx, %%eax\n"
330" addl %%eax, %%esi\n" /* from += wsize - nbytes */ 699" addl %%eax, %%esi\n" /* from += wsize - nbytes */
331 700
332" movl 60(%%esp), %%eax\n" 701" movl 64(%%esp), %%eax\n" /* eax = len */
333" cmpl %%ecx, %%eax\n"
334" jbe .L_do_copy1\n" /* if (nbytes >= len) */
335
336" subl %%ecx, %%eax\n" /* len -= nbytes */
337" rep movsb\n"
338" movl %%edi, %%esi\n"
339" subl %%ebp, %%esi\n" /* from = out - dist */
340" jmp .L_do_copy1\n"
341
342" cmpl %%ecx, %%eax\n" 702" cmpl %%ecx, %%eax\n"
343" jbe .L_do_copy1\n" /* if (nbytes >= len) */ 703" jbe .L_do_copy\n" /* if (nbytes >= len) */
344 704
345" subl %%ecx, %%eax\n" /* len -= nbytes */ 705" subl %%ecx, %%eax\n" /* len -= nbytes */
346" rep movsb\n" 706" rep movsb\n"
347" movl %%edi, %%esi\n" 707" movl %%edi, %%esi\n"
348" subl %%ebp, %%esi\n" /* from = out - dist */ 708" subl %%ebp, %%esi\n" /* from = out - dist */
349" jmp .L_do_copy1\n" 709" jmp .L_do_copy\n"
350 710
711".align 32,0x90\n"
351".L_wrap_around_window:\n" 712".L_wrap_around_window:\n"
352" movl 28(%%esp), %%eax\n" 713" movl 52(%%esp), %%eax\n" /* eax = write */
353" cmpl %%eax, %%ecx\n" 714" cmpl %%eax, %%ecx\n"
354" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ 715" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
355 716
356" addl 24(%%esp), %%esi\n" 717" addl 48(%%esp), %%esi\n" /* from += wsize */
357" addl %%eax, %%esi\n" 718" addl %%eax, %%esi\n" /* from += write */
358" subl %%ecx, %%esi\n" /* from += wsize + write - nbytes */ 719" subl %%ecx, %%esi\n" /* from -= nbytes */
359" subl %%eax, %%ecx\n" /* nbytes -= write */ 720" subl %%eax, %%ecx\n" /* nbytes -= write */
360 721
361" movl 60(%%esp), %%eax\n" 722" movl 64(%%esp), %%eax\n" /* eax = len */
362" cmpl %%ecx, %%eax\n" 723" cmpl %%ecx, %%eax\n"
363" jbe .L_do_copy1\n" /* if (nbytes >= len) */ 724" jbe .L_do_copy\n" /* if (nbytes >= len) */
364 725
365" subl %%ecx, %%eax\n" /* len -= nbytes */ 726" subl %%ecx, %%eax\n" /* len -= nbytes */
366" rep movsb\n" 727" rep movsb\n"
367" movl 32(%%esp), %%esi\n" /* from = window */ 728" movl 28(%%esp), %%esi\n" /* from = window */
368" movl 28(%%esp), %%ecx\n" /* nbytes = write */ 729" movl 52(%%esp), %%ecx\n" /* nbytes = write */
369" cmpl %%ecx, %%eax\n" 730" cmpl %%ecx, %%eax\n"
370" jbe .L_do_copy1\n" /* if (nbytes >= len) */ 731" jbe .L_do_copy\n" /* if (nbytes >= len) */
371 732
372" subl %%ecx, %%eax\n" /* len -= nbytes */ 733" subl %%ecx, %%eax\n" /* len -= nbytes */
373" rep movsb\n" 734" rep movsb\n"
374" movl %%edi, %%esi\n" 735" movl %%edi, %%esi\n"
375" subl %%ebp, %%esi\n" /* from = out - dist */ 736" subl %%ebp, %%esi\n" /* from = out - dist */
376" jmp .L_do_copy1\n" 737" jmp .L_do_copy\n"
377 738
739".align 32,0x90\n"
378".L_contiguous_in_window:\n" 740".L_contiguous_in_window:\n"
379" addl %%eax, %%esi\n" 741" addl %%eax, %%esi\n"
380" subl %%ecx, %%esi\n" /* from += write - nbytes */ 742" subl %%ecx, %%esi\n" /* from += write - nbytes */
381 743
382" movl 60(%%esp), %%eax\n" 744" movl 64(%%esp), %%eax\n" /* eax = len */
383" cmpl %%ecx, %%eax\n" 745" cmpl %%ecx, %%eax\n"
384" jbe .L_do_copy1\n" /* if (nbytes >= len) */ 746" jbe .L_do_copy\n" /* if (nbytes >= len) */
385 747
386" subl %%ecx, %%eax\n" /* len -= nbytes */ 748" subl %%ecx, %%eax\n" /* len -= nbytes */
387" rep movsb\n" 749" rep movsb\n"
388" movl %%edi, %%esi\n" 750" movl %%edi, %%esi\n"
389" subl %%ebp, %%esi\n" /* from = out - dist */ 751" subl %%ebp, %%esi\n" /* from = out - dist */
752" jmp .L_do_copy\n" /* if (nbytes >= len) */
390 753
391".L_do_copy1:\n" 754".align 32,0x90\n"
755".L_do_copy:\n"
392" movl %%eax, %%ecx\n" 756" movl %%eax, %%ecx\n"
393" rep movsb\n" 757" rep movsb\n"
394 758
395" movl 4(%%esp), %%esi\n" /* move in back to %esi, toss from */ 759" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
396" movl 44(%%esp), %%ebp\n" /* ebp = lcode */ 760" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
397" jmp .L_while_test\n" 761" jmp .L_while_test\n"
398 762
399".L_test_for_end_of_block:\n" 763".L_test_for_end_of_block:\n"
400" testb $32, %%al\n" 764" testb $32, %%al\n"
401" jz .L_invalid_literal_length_code\n" 765" jz .L_invalid_literal_length_code\n"
402" movl $1, 68(%%esp)\n" 766" movl $1, 72(%%esp)\n"
403" jmp .L_break_loop_with_status\n" 767" jmp .L_break_loop_with_status\n"
404 768
405".L_invalid_literal_length_code:\n" 769".L_invalid_literal_length_code:\n"
406" movl $2, 68(%%esp)\n" 770" movl $2, 72(%%esp)\n"
407" jmp .L_break_loop_with_status\n" 771" jmp .L_break_loop_with_status\n"
408 772
409".L_invalid_distance_code:\n" 773".L_invalid_distance_code:\n"
410" movl $3, 68(%%esp)\n" 774" movl $3, 72(%%esp)\n"
411" jmp .L_break_loop_with_status\n" 775" jmp .L_break_loop_with_status\n"
412 776
413".L_invalid_distance_too_far:\n" 777".L_invalid_distance_too_far:\n"
414" movl 4(%%esp), %%esi\n" 778" movl 8(%%esp), %%esi\n"
415" movl $4, 68(%%esp)\n" 779" movl $4, 72(%%esp)\n"
416" jmp .L_break_loop_with_status\n" 780" jmp .L_break_loop_with_status\n"
417 781
418".L_break_loop:\n" 782".L_break_loop:\n"
419" movl $0, 68(%%esp)\n" 783" movl $0, 72(%%esp)\n"
420 784
421".L_break_loop_with_status:\n" 785".L_break_loop_with_status:\n"
422/* put in, out, bits, and hold back into ar and pop esp */ 786/* put in, out, bits, and hold back into ar and pop esp */
423" movl %%esi, 4(%%esp)\n" 787" movl %%esi, 8(%%esp)\n" /* save in */
424" movl %%edi, 12(%%esp)\n" 788" movl %%edi, 16(%%esp)\n" /* save out */
425" movl %%ebx, 40(%%esp)\n" 789" movl %%ebx, 44(%%esp)\n" /* save bits */
426" movl %%edx, 36(%%esp)\n" 790" movl %%edx, 40(%%esp)\n" /* save hold */
791" movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
427" movl (%%esp), %%esp\n" 792" movl (%%esp), %%esp\n"
428" popl %%ebp\n"
429" popf\n"
430 : 793 :
431 : "m" (ar) 794 : "m" (ar)
432 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
433 ); 796 );
434#elif defined( _MSC_VER ) 797#elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
435 __asm { 798 __asm {
436 lea eax, ar 799 lea eax, ar
437 pushfd 800 mov [eax], esp /* save esp, ebp */
438 push ebp 801 mov [eax+4], ebp
439 mov [eax], esp
440 mov esp, eax 802 mov esp, eax
441 mov esi, [esp+4] /* esi = in */ 803 mov esi, [esp+8] /* esi = in */
442 mov edi, [esp+12] /* edi = out */ 804 mov edi, [esp+16] /* edi = out */
443 mov edx, [esp+36] /* edx = hold */ 805 mov edx, [esp+40] /* edx = hold */
444 mov ebx, [esp+40] /* ebx = bits */ 806 mov ebx, [esp+44] /* ebx = bits */
445 mov ebp, [esp+44] /* ebp = lcode */ 807 mov ebp, [esp+32] /* ebp = lcode */
446 808
447 cld 809 cld
448 jmp L_do_loop 810 jmp L_do_loop
449 811
812ALIGN 4
450L_while_test: 813L_while_test:
451 cmp [esp+20], edi 814 cmp [esp+24], edi
452 jbe L_break_loop 815 jbe L_break_loop
453 cmp [esp+8], esi 816 cmp [esp+12], esi
454 jbe L_break_loop 817 jbe L_break_loop
455 818
456L_do_loop: 819L_do_loop:
@@ -465,7 +828,7 @@ L_do_loop:
465 or edx, eax /* hold |= *((ushort *)in)++ << bits */ 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
466 829
467L_get_length_code: 830L_get_length_code:
468 mov eax, [esp+52] /* eax = lmask */ 831 mov eax, [esp+56] /* eax = lmask */
469 and eax, edx /* eax &= hold */ 832 and eax, edx /* eax &= hold */
470 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */ 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
471 834
@@ -481,10 +844,11 @@ L_dolen:
481 stosb 844 stosb
482 jmp L_while_test 845 jmp L_while_test
483 846
847ALIGN 4
484L_test_for_length_base: 848L_test_for_length_base:
485 mov ecx, eax /* len = this */ 849 mov ecx, eax /* len = this */
486 shr ecx, 16 /* len = this.val */ 850 shr ecx, 16 /* len = this.val */
487 mov [esp+60], ecx /* len = this */ 851 mov [esp+64], ecx /* save len */
488 mov cl, al 852 mov cl, al
489 853
490 test al, 16 854 test al, 16
@@ -504,13 +868,14 @@ L_test_for_length_base:
504 mov cl, ch /* move op back to ecx */ 868 mov cl, ch /* move op back to ecx */
505 869
506L_add_bits_to_len: 870L_add_bits_to_len:
507 mov eax, 1 871 sub bl, cl
872 xor eax, eax
873 inc eax
508 shl eax, cl 874 shl eax, cl
509 dec eax 875 dec eax
510 sub bl, cl
511 and eax, edx /* eax &= hold */ 876 and eax, edx /* eax &= hold */
512 shr edx, cl 877 shr edx, cl
513 add [esp+60], eax /* len += hold & mask[op] */ 878 add [esp+64], eax /* len += hold & mask[op] */
514 879
515L_decode_distance: 880L_decode_distance:
516 cmp bl, 15 881 cmp bl, 15
@@ -524,8 +889,8 @@ L_decode_distance:
524 or edx, eax /* hold |= *((ushort *)in)++ << bits */ 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
525 890
526L_get_distance_code: 891L_get_distance_code:
527 mov eax, [esp+56] /* eax = dmask */ 892 mov eax, [esp+60] /* eax = dmask */
528 mov ecx, [esp+48] /* ecx = dcode */ 893 mov ecx, [esp+36] /* ecx = dcode */
529 and eax, edx /* eax &= hold */ 894 and eax, edx /* eax &= hold */
530 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */ 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
531 896
@@ -554,200 +919,207 @@ L_dodist:
554 mov cl, ch /* move op back to ecx */ 919 mov cl, ch /* move op back to ecx */
555 920
556L_add_bits_to_dist: 921L_add_bits_to_dist:
557 mov eax, 1 922 sub bl, cl
923 xor eax, eax
924 inc eax
558 shl eax, cl 925 shl eax, cl
559 dec eax /* (1 << op) - 1 */ 926 dec eax /* (1 << op) - 1 */
560 sub bl, cl
561 and eax, edx /* eax &= hold */ 927 and eax, edx /* eax &= hold */
562 shr edx, cl 928 shr edx, cl
563 add ebp, eax /* dist += hold & ((1 << op) - 1) */ 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
564 930
565L_check_window: 931L_check_window:
566 mov [esp+4], esi /* save in so from can use it's reg */ 932 mov [esp+8], esi /* save in so from can use it's reg */
567 mov eax, edi 933 mov eax, edi
568 sub eax, [esp+16] /* nbytes = out - beg */ 934 sub eax, [esp+20] /* nbytes = out - beg */
569 935
570 cmp eax, ebp 936 cmp eax, ebp
571 jb L_clip_window /* if (dist > nbytes) 4.2% */ 937 jb L_clip_window /* if (dist > nbytes) 4.2% */
572 938
573 mov ecx, [esp+60] 939 mov ecx, [esp+64] /* ecx = len */
574 mov esi, edi 940 mov esi, edi
575 sub esi, ebp /* from = out - dist */ 941 sub esi, ebp /* from = out - dist */
576 942
577 sub ecx, 3 /* copy from to out */ 943 sar ecx, 1
944 jnc L_copy_two
945
946 rep movsw
578 mov al, [esi] 947 mov al, [esi]
579 mov [edi], al 948 mov [edi], al
580 mov al, [esi+1] 949 inc edi
581 mov ah, [esi+2] 950
582 add esi, 3 951 mov esi, [esp+8] /* move in back to %esi, toss from */
583 mov [edi+1], al 952 mov ebp, [esp+32] /* ebp = lcode */
584 mov [edi+2], ah 953 jmp L_while_test
585 add edi, 3
586 rep movsb
587 954
588 mov esi, [esp+4] /* move in back to %esi, toss from */ 955L_copy_two:
589 mov ebp, [esp+44] /* ebp = lcode */ 956 rep movsw
957 mov esi, [esp+8] /* move in back to %esi, toss from */
958 mov ebp, [esp+32] /* ebp = lcode */
590 jmp L_while_test 959 jmp L_while_test
591 960
961ALIGN 4
592L_check_dist_one: 962L_check_dist_one:
593 cmp ebp, 1 /* if dist 1, is a memset */ 963 cmp ebp, 1 /* if dist 1, is a memset */
594 jne L_check_window 964 jne L_check_window
595 cmp [esp+16], edi 965 cmp [esp+20], edi
596 je L_check_window 966 je L_check_window /* out == beg, if outside window */
597 967
598 dec edi 968 mov ecx, [esp+64] /* ecx = len */
599 mov ecx, [esp+60] 969 mov al, [edi-1]
600 mov al, [edi] 970 mov ah, al
601 sub ecx, 3 971
602 972 sar ecx, 1
603 mov [edi+1], al /* memset out with from[-1] */ 973 jnc L_set_two
604 mov [edi+2], al 974 mov [edi], al /* memset out with from[-1] */
605 mov [edi+3], al 975 inc edi
606 add edi, 4 976
607 rep stosb 977L_set_two:
608 mov ebp, [esp+44] /* ebp = lcode */ 978 rep stosw
979 mov ebp, [esp+32] /* ebp = lcode */
609 jmp L_while_test 980 jmp L_while_test
610 981
982ALIGN 4
611L_test_for_second_level_length: 983L_test_for_second_level_length:
612 test al, 64 984 test al, 64
613 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */ 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
614 986
615 mov eax, 1 987 xor eax, eax
988 inc eax
616 shl eax, cl 989 shl eax, cl
617 dec eax 990 dec eax
618 and eax, edx /* eax &= hold */ 991 and eax, edx /* eax &= hold */
619 add eax, [esp+60] /* eax += this.val */ 992 add eax, [esp+64] /* eax += len */
620 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/ 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
621 jmp L_dolen 994 jmp L_dolen
622 995
996ALIGN 4
623L_test_for_second_level_dist: 997L_test_for_second_level_dist:
624 test al, 64 998 test al, 64
625 jnz L_invalid_distance_code /* if ((op & 64) != 0) */ 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
626 1000
627 mov eax, 1 1001 xor eax, eax
1002 inc eax
628 shl eax, cl 1003 shl eax, cl
629 dec eax 1004 dec eax
630 and eax, edx /* eax &= hold */ 1005 and eax, edx /* eax &= hold */
631 add eax, ebp /* eax += this.val */ 1006 add eax, ebp /* eax += dist */
632 mov ecx, [esp+48] /* ecx = dcode */ 1007 mov ecx, [esp+36] /* ecx = dcode */
633 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/ 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
634 jmp L_dodist 1009 jmp L_dodist
635 1010
1011ALIGN 4
636L_clip_window: 1012L_clip_window:
637 mov ecx, eax 1013 mov ecx, eax
638 mov eax, [esp+24] /* prepare for dist compare */ 1014 mov eax, [esp+48] /* eax = wsize */
639 neg ecx /* nbytes = -nbytes */ 1015 neg ecx /* nbytes = -nbytes */
640 mov esi, [esp+32] /* from = window */ 1016 mov esi, [esp+28] /* from = window */
641 1017
642 cmp eax, ebp 1018 cmp eax, ebp
643 jb L_invalid_distance_too_far /* if (dist > wsize) */ 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
644 1020
645 add ecx, ebp /* nbytes = dist - nbytes */ 1021 add ecx, ebp /* nbytes = dist - nbytes */
646 cmp dword ptr [esp+28], 0 1022 cmp dword ptr [esp+52], 0
647 jne L_wrap_around_window /* if (write != 0) */ 1023 jne L_wrap_around_window /* if (write != 0) */
648 1024
649 sub eax, ecx 1025 sub eax, ecx
650 add esi, eax /* from += wsize - nbytes */ 1026 add esi, eax /* from += wsize - nbytes */
651 1027
652 mov eax, [esp+60] 1028 mov eax, [esp+64] /* eax = len */
653 cmp eax, ecx
654 jbe L_do_copy1 /* if (nbytes >= len) */
655
656 sub eax, ecx /* len -= nbytes */
657 rep movsb
658 mov esi, edi
659 sub esi, ebp /* from = out - dist */
660 jmp L_do_copy1
661
662 cmp eax, ecx 1029 cmp eax, ecx
663 jbe L_do_copy1 /* if (nbytes >= len) */ 1030 jbe L_do_copy /* if (nbytes >= len) */
664 1031
665 sub eax, ecx /* len -= nbytes */ 1032 sub eax, ecx /* len -= nbytes */
666 rep movsb 1033 rep movsb
667 mov esi, edi 1034 mov esi, edi
668 sub esi, ebp /* from = out - dist */ 1035 sub esi, ebp /* from = out - dist */
669 jmp L_do_copy1 1036 jmp L_do_copy
670 1037
1038ALIGN 4
671L_wrap_around_window: 1039L_wrap_around_window:
672 mov eax, [esp+28] 1040 mov eax, [esp+52] /* eax = write */
673 cmp ecx, eax 1041 cmp ecx, eax
674 jbe L_contiguous_in_window /* if (write >= nbytes) */ 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
675 1043
676 add esi, [esp+24] 1044 add esi, [esp+48] /* from += wsize */
677 add esi, eax 1045 add esi, eax /* from += write */
678 sub esi, ecx /* from += wsize + write - nbytes */ 1046 sub esi, ecx /* from -= nbytes */
679 sub ecx, eax /* nbytes -= write */ 1047 sub ecx, eax /* nbytes -= write */
680 1048
681 mov eax, [esp+60] 1049 mov eax, [esp+64] /* eax = len */
682 cmp eax, ecx 1050 cmp eax, ecx
683 jbe L_do_copy1 /* if (nbytes >= len) */ 1051 jbe L_do_copy /* if (nbytes >= len) */
684 1052
685 sub eax, ecx /* len -= nbytes */ 1053 sub eax, ecx /* len -= nbytes */
686 rep movsb 1054 rep movsb
687 mov esi, [esp+32] /* from = window */ 1055 mov esi, [esp+28] /* from = window */
688 mov ecx, [esp+28] /* nbytes = write */ 1056 mov ecx, [esp+52] /* nbytes = write */
689 cmp eax, ecx 1057 cmp eax, ecx
690 jbe L_do_copy1 /* if (nbytes >= len) */ 1058 jbe L_do_copy /* if (nbytes >= len) */
691 1059
692 sub eax, ecx /* len -= nbytes */ 1060 sub eax, ecx /* len -= nbytes */
693 rep movsb 1061 rep movsb
694 mov esi, edi 1062 mov esi, edi
695 sub esi, ebp /* from = out - dist */ 1063 sub esi, ebp /* from = out - dist */
696 jmp L_do_copy1 1064 jmp L_do_copy
697 1065
1066ALIGN 4
698L_contiguous_in_window: 1067L_contiguous_in_window:
699 add esi, eax 1068 add esi, eax
700 sub esi, ecx /* from += write - nbytes */ 1069 sub esi, ecx /* from += write - nbytes */
701 1070
702 mov eax, [esp+60] 1071 mov eax, [esp+64] /* eax = len */
703 cmp eax, ecx 1072 cmp eax, ecx
704 jbe L_do_copy1 /* if (nbytes >= len) */ 1073 jbe L_do_copy /* if (nbytes >= len) */
705 1074
706 sub eax, ecx /* len -= nbytes */ 1075 sub eax, ecx /* len -= nbytes */
707 rep movsb 1076 rep movsb
708 mov esi, edi 1077 mov esi, edi
709 sub esi, ebp /* from = out - dist */ 1078 sub esi, ebp /* from = out - dist */
1079 jmp L_do_copy
710 1080
711L_do_copy1: 1081ALIGN 4
1082L_do_copy:
712 mov ecx, eax 1083 mov ecx, eax
713 rep movsb 1084 rep movsb
714 1085
715 mov esi, [esp+4] /* move in back to %esi, toss from */ 1086 mov esi, [esp+8] /* move in back to %esi, toss from */
716 mov ebp, [esp+44] /* ebp = lcode */ 1087 mov ebp, [esp+32] /* ebp = lcode */
717 jmp L_while_test 1088 jmp L_while_test
718 1089
719L_test_for_end_of_block: 1090L_test_for_end_of_block:
720 test al, 32 1091 test al, 32
721 jz L_invalid_literal_length_code 1092 jz L_invalid_literal_length_code
722 mov dword ptr [esp+68], 1 1093 mov dword ptr [esp+72], 1
723 jmp L_break_loop_with_status 1094 jmp L_break_loop_with_status
724 1095
725L_invalid_literal_length_code: 1096L_invalid_literal_length_code:
726 mov dword ptr [esp+68], 2 1097 mov dword ptr [esp+72], 2
727 jmp L_break_loop_with_status 1098 jmp L_break_loop_with_status
728 1099
729L_invalid_distance_code: 1100L_invalid_distance_code:
730 mov dword ptr [esp+68], 3 1101 mov dword ptr [esp+72], 3
731 jmp L_break_loop_with_status 1102 jmp L_break_loop_with_status
732 1103
733L_invalid_distance_too_far: 1104L_invalid_distance_too_far:
734 mov esi, [esp+4] 1105 mov esi, [esp+4]
735 mov dword ptr [esp+68], 4 1106 mov dword ptr [esp+72], 4
736 jmp L_break_loop_with_status 1107 jmp L_break_loop_with_status
737 1108
738L_break_loop: 1109L_break_loop:
739 mov dword ptr [esp+68], 0 1110 mov dword ptr [esp+72], 0
740 1111
741L_break_loop_with_status: 1112L_break_loop_with_status:
742/* put in, out, bits, and hold back into ar and pop esp */ 1113/* put in, out, bits, and hold back into ar and pop esp */
743 mov [esp+4], esi 1114 mov [esp+8], esi /* save in */
744 mov [esp+12], edi 1115 mov [esp+16], edi /* save out */
745 mov [esp+40], ebx 1116 mov [esp+44], ebx /* save bits */
746 mov [esp+36], edx 1117 mov [esp+40], edx /* save hold */
1118 mov ebp, [esp+4] /* restore esp, ebp */
747 mov esp, [esp] 1119 mov esp, [esp]
748 pop ebp
749 popfd
750 } 1120 }
1121#else
1122#error "x86 architecture not defined"
751#endif 1123#endif
752 1124
753 if (ar.status > 1) { 1125 if (ar.status > 1) {
@@ -772,10 +1144,12 @@ L_break_loop_with_status:
772 /* update state and return */ 1144 /* update state and return */
773 strm->next_in = ar.in; 1145 strm->next_in = ar.in;
774 strm->next_out = ar.out; 1146 strm->next_out = ar.out;
775 strm->avail_in = (unsigned)(ar.in < ar.last ? 5 + (ar.last - ar.in) : 1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
776 5 - (ar.in - ar.last)); 1148 PAD_AVAIL_IN + (ar.last - ar.in) :
777 strm->avail_out = (unsigned)(ar.out < ar.end ? 257 + (ar.end - ar.out) : 1149 PAD_AVAIL_IN - (ar.in - ar.last));
778 257 - (ar.out - ar.end)); 1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
1152 PAD_AVAIL_OUT - (ar.out - ar.end));
779 state->hold = ar.hold; 1153 state->hold = ar.hold;
780 state->bits = ar.bits; 1154 state->bits = ar.bits;
781 return; 1155 return;