summaryrefslogtreecommitdiff
path: root/contrib/inflate86
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-09 23:21:47 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-09 23:21:47 -0700
commit7c2a874e50b871d04fbd19501f7b42cff55e5abc (patch)
tree1879cd29182ababb17cde77cee5ce74505db4006 /contrib/inflate86
parenta383133c4e7b93113cee912f213cf9502d785fa7 (diff)
downloadzlib-1.2.0.tar.gz
zlib-1.2.0.tar.bz2
zlib-1.2.0.zip
zlib 1.2.0v1.2.0
Diffstat (limited to 'contrib/inflate86')
-rw-r--r--contrib/inflate86/inffast.S1095
1 files changed, 1095 insertions, 0 deletions
diff --git a/contrib/inflate86/inffast.S b/contrib/inflate86/inffast.S
new file mode 100644
index 0000000..d1e80ef
--- /dev/null
+++ b/contrib/inflate86/inffast.S
@@ -0,0 +1,1095 @@
1/*
2 * inffast.S is a hand tuned assembler version of:
3 *
4 * inffast.c -- fast decoding
5 * Copyright (C) 1995-2003 Mark Adler
6 * For conditions of distribution and use, see copyright notice in zlib.h
7 *
8 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9 * Please use the copyright conditions above.
10 *
11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
13 * machine, I found that gzip style archives decompressed about 20% faster than
14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
15 * depend on how large of a buffer is used for z_stream.next_in & next_out
16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17 * stream processing I/O and crc32/addler32. In my case, this routine used
18 * 70% of the cpu time and crc32 used 20%.
19 *
20 * I am confident that this version will work in the general case, but I have
21 * not tested a wide variety of datasets or a wide variety of platforms.
22 *
23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24 * It should be a runtime flag instead of compile time flag...
25 */
26
27.file "inffast.S"
28
29.globl inflate_fast
30
31.text
32.align 4,0
33.L_invalid_literal_length_code_msg:
34.string "invalid literal/length code"
35
36.align 4,0
37.L_invalid_distance_code_msg:
38.string "invalid distance code"
39
40.align 4,0
41.L_invalid_distance_too_far_msg:
42.string "invalid distance too far back"
43
44#if defined( USE_MMX )
45.align 4,0
46.L_mask: /* mask[N] = ( 1 << N ) - 1 */
47.long 0
48.long 1
49.long 3
50.long 7
51.long 15
52.long 31
53.long 63
54.long 127
55.long 255
56.long 511
57.long 1023
58.long 2047
59.long 4095
60.long 8191
61.long 16383
62.long 32767
63.long 65535
64.long 131071
65.long 262143
66.long 524287
67.long 1048575
68.long 2097151
69.long 4194303
70.long 8388607
71.long 16777215
72.long 33554431
73.long 67108863
74.long 134217727
75.long 268435455
76.long 536870911
77.long 1073741823
78.long 2147483647
79.long 4294967295
80#endif
81
82.text
83
84/*
85 * struct z_stream offsets, in zlib.h
86 */
87#define next_in_strm 0 /* strm->next_in */
88#define avail_in_strm 4 /* strm->avail_in */
89#define next_out_strm 12 /* strm->next_out */
90#define avail_out_strm 16 /* strm->avail_out */
91#define msg_strm 24 /* strm->msg */
92#define state_strm 28 /* strm->state */
93
94/*
95 * struct inflate_state offsets, in inflate.h
96 */
97#define mode_state 0 /* state->mode */
98#define wsize_state 32 /* state->wsize */
99#define write_state 36 /* state->write */
100#define window_state 40 /* state->window */
101#define hold_state 44 /* state->hold */
102#define bits_state 48 /* state->bits */
103#define lencode_state 64 /* state->lencode */
104#define distcode_state 68 /* state->distcode */
105#define lenbits_state 72 /* state->lenbits */
106#define distbits_state 76 /* state->distbits */
107
108/*
109 * inflate_fast's activation record
110 */
111#define local_var_size 56 /* how much local space for vars */
112#define strm_sp 80 /* first arg: z_stream * (local_var_size + 24) */
113#define start_sp 84 /* second arg: unsigned int (local_var_size + 28) */
114
115/*
116 * offsets for local vars on stack
117 */
118#define out 52 /* unsigned char* */
119#define window 48 /* unsigned char* */
120#define wsize 44 /* unsigned int */
121#define write 40 /* unsigned int */
122#define in 36 /* unsigned char* */
123#define beg 32 /* unsigned char* */
124#define dist 28 /* unsigned int */
125#define len 24 /* unsigned int */
126#define last 20 /* unsigned char* */
127#define end 16 /* unsigned char* */
128#define dcode 12 /* code* */
129#define lcode 8 /* code* */
130#define dmask 4 /* unsigned int */
131#define lmask 0 /* unsigned int */
132
133/*
134 * typedef enum inflate_mode consts, in inflate.h
135 */
136#ifndef NO_GUNZIP
137#define GUNZIP
138#endif
139
140#ifdef GUNZIP
141#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
142#define INFLATE_MODE_BAD 26
143#else
144#define INFLATE_MODE_TYPE 3
145#define INFLATE_MODE_BAD 17
146#endif
147
148
149.align 16,0x90
150inflate_fast:
151 pushl %edi
152 pushl %esi
153 pushl %ebp
154 pushl %ebx
155 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
156 subl $local_var_size, %esp
157 cld
158#if defined( USE_MMX )
159 emms
160#endif
161
162#define strm_r %esi
163#define state_r %edi
164
165 movl strm_sp(%esp), strm_r
166 movl state_strm(strm_r), state_r
167
168 /* in = strm->next_in;
169 * out = strm->next_out;
170 * last = in + strm->avail_in - 5;
171 * beg = out - (start - strm->avail_out);
172 * end = out + (strm->avail_out - 257);
173 */
174 movl next_in_strm(strm_r), %eax
175 movl next_out_strm(strm_r), %ebx
176 movl avail_in_strm(strm_r), %edx
177 movl avail_out_strm(strm_r), %ecx
178 movl start_sp(%esp), %ebp
179
180 addl %eax, %edx /* avail_in += next_in */
181 subl $5, %edx /* avail_in -= 5 */
182
183 subl %ecx, %ebp /* start -= avail_out */
184 negl %ebp /* start = -start */
185 addl %ebx, %ebp /* start += next_out */
186
187 subl $257, %ecx /* avail_out -= 257 */
188 addl %ebx, %ecx /* avail_out += out */
189
190 movl %eax, in(%esp)
191 movl %ebx, out(%esp)
192 movl %edx, last(%esp)
193 movl %ebp, beg(%esp)
194 movl %ecx, end(%esp)
195
196 /* wsize = state->wsize;
197 * write = state->write;
198 * window = state->window;
199 * hold = state->hold;
200 * bits = state->bits;
201 * lcode = state->lencode;
202 * dcode = state->distcode;
203 * lmask = ( 1 << state->lenbits ) - 1;
204 * dmask = ( 1 << state->distbits ) - 1;
205 */
206
207 movl lencode_state(state_r), %eax
208 movl distcode_state(state_r), %ecx
209
210 movl %eax, lcode(%esp)
211 movl %ecx, dcode(%esp)
212
213 movl $1, %eax
214 movl lenbits_state(state_r), %ecx
215 shll %cl, %eax
216 decl %eax
217 movl %eax, lmask(%esp)
218
219 movl $1, %eax
220 movl distbits_state(state_r), %ecx
221 shll %cl, %eax
222 decl %eax
223 movl %eax, dmask(%esp)
224
225 movl wsize_state(state_r), %eax
226 movl write_state(state_r), %ecx
227 movl window_state(state_r), %edx
228
229 movl %eax, wsize(%esp)
230 movl %ecx, write(%esp)
231 movl %edx, window(%esp)
232
233#if ! defined( USE_MMX )
234
235#define hold_r %ebp
236#define bits_r %bl
237#define bitslong_r %ebx
238
239 movl hold_state(state_r), hold_r
240 movl bits_state(state_r), bitslong_r
241
242#else /* USE_MMX */
243
244#define hold_mm %mm0
245#define bits_r %ebp
246#define bitslong_r %ebp
247
248 movl hold_state(state_r), %ebx
249 movl bits_state(state_r), bitslong_r
250
251#endif
252
253#undef strm_r
254#undef state_r
255#define in_r %esi
256#define from_r %esi
257#define out_r %edi
258
259 movl in(%esp), in_r
260
261#if ! defined ( USE_MMX )
262
263 /* align in_r on word boundary */
264 testl $1, in_r
265 jz .L_is_aligned
266 xorl %eax, %eax
267 movb (in_r), %al
268 incl in_r
269 movb bits_r, %cl
270 addb $8, bits_r
271 shll %cl, %eax
272 orl %eax, hold_r
273
274#else
275 /* align in_r on long boundary */
276.L_align_long:
277 testl $3, in_r
278 jz .L_is_aligned
279 xorl %eax, %eax
280 movb (in_r), %al
281 incl in_r
282 movl bits_r, %ecx
283 addl $8, bits_r
284 shll %cl, %eax
285 orl %eax, %ebx
286 jmp .L_align_long
287
288#endif
289
290.L_is_aligned:
291 movl out(%esp), out_r
292
293#if defined ( USE_MMX )
294
295#define used_mm %mm1
296#define dmask2_mm %mm2
297#define lmask2_mm %mm3
298#define lmask_mm %mm4
299#define dmask_mm %mm5
300#define tmp_mm %mm6
301
302 movl out(%esp), out_r
303 movd lmask(%esp), lmask_mm
304 movq lmask_mm, lmask2_mm
305 movd dmask(%esp), dmask_mm
306 movq dmask_mm, dmask2_mm
307 movd %ebx, hold_mm
308 pxor used_mm, used_mm
309 movl lcode(%esp), %ebx /* ebx = lcode */
310#endif
311
312 jmp .L_do_loop
313
314.align 16,0x90
315
316#if ! defined ( USE_MMX )
317
318.L_do_loop:
319 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
320 *
321 * do {
322 * if (bits < 15) {
323 * hold |= *((unsigned short *)in)++ << bits;
324 * bits += 16
325 * }
326 * this = lcode[hold & lmask]
327 */
328 cmpb $15, bits_r
329 ja .L_get_length_code /* if (15 < bits) */
330
331 xorl %eax, %eax
332 lodsw /* al = *(ushort *)in++ */
333 movb bits_r, %cl /* cl = bits, needs it for shifting */
334 addb $16, bits_r /* bits += 16 */
335 shll %cl, %eax
336 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
337
338.L_get_length_code:
339 movl lmask(%esp), %edx /* edx = lmask */
340 movl lcode(%esp), %ecx /* ecx = lcode */
341 andl hold_r, %edx /* edx &= hold */
342 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
343
344#else /* USE_MMX */
345
346.L_do_loop:
347 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
348
349 cmpl $32, bits_r
350 ja .L_get_length_code /* if (32 < bits) */
351
352 movd bits_r, tmp_mm
353 movd (in_r), %mm7
354 addl $4, in_r
355 psllq tmp_mm, %mm7
356 addl $32, bits_r
357 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
358
359.L_get_length_code:
360 pand hold_mm, lmask_mm
361 movd lmask_mm, %eax
362 movq lmask2_mm, lmask_mm
363 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
364
365#endif
366
367#if ! defined( USE_MMX )
368
369.L_dolen:
370 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
371 *
372 * dolen:
373 * bits -= this.bits;
374 * hold >>= this.bits
375 */
376 movb %ah, %cl /* cl = this.bits */
377 subb %ah, bits_r /* bits -= this.bits */
378 shrl %cl, hold_r /* hold >>= this.bits */
379
380 /* check if op is a literal
381 * if (op == 0) {
382 * PUP(out) = this.val;
383 * }
384 */
385 testb %al, %al
386 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
387
388 shrl $16, %eax /* output this.val char */
389 stosb
390
391#else /* USE_MMX */
392
393#define len_r %edx
394
395.L_dolen:
396 movzbl %ah, %ecx /* ecx = this.bits */
397 movl %eax, len_r /* len = this */
398 shrl $16, len_r /* len = this.val */
399 movd %ecx, used_mm
400 subl %ecx, bits_r /* bits -= this.bits */
401
402 testb %al, %al
403 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
404
405 movb %dl, (out_r)
406 incl out_r
407
408#endif
409
410.L_while_test:
411 /* while (in < last && out < end)
412 */
413 cmpl out_r, end(%esp)
414 jbe .L_break_loop /* if (out >= end) */
415
416 cmpl in_r, last(%esp)
417 ja .L_do_loop /* if (in < last) */
418 jmp .L_break_loop
419
420#if ! defined( USE_MMX )
421
422.L_test_for_length_base:
423 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
424 *
425 * else if (op & 16) {
426 * len = this.val
427 * op &= 15
428 * if (op) {
429 * if (op > bits) {
430 * hold |= *((unsigned short *)in)++ << bits;
431 * bits += 16
432 * }
433 * len += hold & mask[op];
434 * bits -= op;
435 * hold >>= op;
436 * }
437 */
438#define len_r %edx
439 movl %eax, len_r /* len = this */
440 shrl $16, len_r /* len = this.val */
441 movb %al, %cl
442
443 testb $16, %al
444 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
445 andb $15, %cl /* op &= 15 */
446 jz .L_save_len /* if (!op) */
447 cmpb %cl, bits_r
448 jae .L_add_bits_to_len /* if (op <= bits) */
449
450 movb %cl, %ch /* stash op in ch, freeing cl */
451 xorl %eax, %eax
452 lodsw /* al = *(ushort *)in++ */
453 movb bits_r, %cl /* cl = bits, needs it for shifting */
454 addb $16, bits_r /* bits += 16 */
455 shll %cl, %eax
456 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
457 movb %ch, %cl /* move op back to ecx */
458
459.L_add_bits_to_len:
460 movl $1, %eax
461 shll %cl, %eax
462 decl %eax
463 subb %cl, bits_r
464 andl hold_r, %eax /* eax &= hold */
465 shrl %cl, hold_r
466 addl %eax, len_r /* len += hold & mask[op] */
467
468.L_save_len:
469 movl len_r, len(%esp) /* save len */
470#undef len_r
471
472.L_decode_distance:
473 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
474 *
475 * if (bits < 15) {
476 * hold |= *((unsigned short *)in)++ << bits;
477 * bits += 16
478 * }
479 * this = dcode[hold & dmask];
480 * dodist:
481 * bits -= this.bits;
482 * hold >>= this.bits;
483 * op = this.op;
484 */
485
486 cmpb $15, bits_r
487 ja .L_get_distance_code /* if (15 < bits) */
488
489 xorl %eax, %eax
490 lodsw /* al = *(ushort *)in++ */
491 movb bits_r, %cl /* cl = bits, needs it for shifting */
492 addb $16, bits_r /* bits += 16 */
493 shll %cl, %eax
494 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
495
496.L_get_distance_code:
497 movl dmask(%esp), %edx /* edx = dmask */
498 movl dcode(%esp), %ecx /* ecx = dcode */
499 andl hold_r, %edx /* edx &= hold */
500 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
501
502#else /* USE_MMX */
503
504.L_test_for_length_base:
505 testb $16, %al
506 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
507 andl $15, %eax /* op &= 15 */
508 jz .L_decode_distance /* if (!op) */
509
510 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
511 movd %eax, used_mm
512 movd hold_mm, %ecx
513 subl %eax, bits_r
514 andl .L_mask(,%eax,4), %ecx
515 addl %ecx, len_r /* len += hold & mask[op] */
516
517.L_decode_distance:
518
519 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
520
521 cmpl $32, bits_r
522 ja .L_get_dist_code /* if (32 < bits) */
523
524 movd bits_r, tmp_mm
525 movd (in_r), %mm7
526 addl $4, in_r
527 psllq tmp_mm, %mm7
528 addl $32, bits_r
529 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
530
531.L_get_dist_code:
532 movl dcode(%esp), %ebx /* ebx = dcode */
533 pand hold_mm, dmask_mm
534 movd dmask_mm, %eax
535 movq dmask2_mm, dmask_mm
536 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
537
538#endif
539
540#if ! defined( USE_MMX )
541
542#define dist_r %edx
543.L_dodist:
544 movl %eax, dist_r /* dist = this */
545 shrl $16, dist_r /* dist = this.val */
546 movb %ah, %cl
547 subb %ah, bits_r /* bits -= this.bits */
548 shrl %cl, hold_r /* hold >>= this.bits */
549
550 /* if (op & 16) {
551 * dist = this.val
552 * op &= 15
553 * if (op > bits) {
554 * hold |= *((unsigned short *)in)++ << bits;
555 * bits += 16
556 * }
557 * dist += hold & mask[op];
558 * bits -= op;
559 * hold >>= op;
560 */
561 movb %al, %cl /* cl = this.op */
562
563 testb $16, %al /* if ((op & 16) == 0) */
564 jz .L_test_for_second_level_dist
565 andb $15, %cl /* op &= 15 */
566 jz .L_check_dist_one
567 cmpb %cl, bits_r
568 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
569
570 movb %cl, %ch /* stash op in ch, freeing cl */
571 xorl %eax, %eax
572 lodsw /* al = *(ushort *)in++ */
573 movb bits_r, %cl /* cl = bits, needs it for shifting */
574 addb $16, bits_r /* bits += 16 */
575 shll %cl, %eax
576 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
577 movb %ch, %cl /* move op back to ecx */
578
579.L_add_bits_to_dist:
580 movl $1, %eax
581 shll %cl, %eax
582 decl %eax /* (1 << op) - 1 */
583 subb %cl, bits_r
584 andl hold_r, %eax /* eax &= hold */
585 shrl %cl, hold_r
586 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
587 jmp .L_check_window
588
589#else /* USE_MMX */
590
591#define dist_r %ebx
592.L_dodist:
593 movzbl %ah, %ecx /* ecx = this.bits */
594 movl %eax, dist_r
595 shrl $16, dist_r /* dist = this.val */
596 subl %ecx, bits_r /* bits -= this.bits */
597 movd %ecx, used_mm
598
599 testb $16, %al /* if ((op & 16) == 0) */
600 jz .L_test_for_second_level_dist
601 andl $15, %eax /* op &= 15 */
602 jz .L_check_dist_one
603
604.L_add_bits_to_dist:
605 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
606 movd %eax, used_mm /* save bit length of current op */
607 movd hold_mm, %ecx /* get the next bits on input stream */
608 subl %eax, bits_r /* bits -= op bits */
609 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
610 addl %ecx, dist_r /* dist += hold & mask[op] */
611 jmp .L_check_window
612
613#endif
614
615.align 16,0x90
616
617.L_check_dist_one:
618 cmpl $1, dist_r
619 jne .L_check_window
620 cmpl out_r, beg(%esp)
621 je .L_check_window
622
623 decl out_r
624#if ! defined( USE_MMX )
625 movl len(%esp), %ecx
626#else
627 movl len_r, %ecx
628#endif
629 movb (out_r), %al
630 subl $3, %ecx
631
632 movb %al, 1(out_r)
633 movb %al, 2(out_r)
634 movb %al, 3(out_r)
635 addl $4, out_r
636 rep stosb
637
638#if defined( USE_MMX )
639 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
640#endif
641 jmp .L_while_test
642
643.align 16,0x90
644
645.L_check_window:
646 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
647 * %ecx = nbytes
648 *
649 * nbytes = out - beg;
650 * if (dist <= nbytes) {
651 * from = out - dist;
652 * do {
653 * PUP(out) = PUP(from);
654 * } while (--len > 0) {
655 * }
656 */
657
658 movl in_r, in(%esp) /* save in so from can use it's reg */
659 movl out_r, %eax
660 subl beg(%esp), %eax /* nbytes = out - beg */
661
662 cmpl dist_r, %eax
663 jb .L_clip_window /* if (dist > nbytes) 4.2% */
664
665#if ! defined( USE_MMX )
666 movl len(%esp), %ecx
667#else
668 movl len_r, %ecx
669#endif
670 movl out_r, from_r
671 subl dist_r, from_r /* from = out - dist */
672
673 subl $3, %ecx
674 movb (from_r), %al
675 movb %al, (out_r)
676 movb 1(from_r), %al
677 movb 2(from_r), %dl
678 addl $3, from_r
679 movb %al, 1(out_r)
680 movb %dl, 2(out_r)
681 addl $3, out_r
682 rep movsb
683
684 movl in(%esp), in_r /* move in back to %esi, toss from */
685#if defined( USE_MMX )
686 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
687#endif
688 jmp .L_while_test
689
690.align 16,0x90
691
692#if ! defined( USE_MMX )
693
694.L_test_for_second_level_length:
695 /* else if ((op & 64) == 0) {
696 * this = lcode[this.val + (hold & mask[op])];
697 * }
698 */
699 testb $64, %al
700 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
701
702 movl $1, %eax
703 shll %cl, %eax
704 decl %eax
705 andl hold_r, %eax /* eax &= hold */
706 addl %edx, %eax /* eax += this.val */
707 movl lcode(%esp), %edx /* edx = lcode */
708 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
709 jmp .L_dolen
710
711#else /* USE_MMX */
712
713.L_test_for_second_level_length:
714 testb $64, %al
715 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
716
717 andl $15, %eax
718 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
719 movd hold_mm, %ecx
720 andl .L_mask(,%eax,4), %ecx
721 addl len_r, %ecx
722 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
723 jmp .L_dolen
724
725#endif
726
727.align 16,0x90
728
729#if ! defined( USE_MMX )
730
731.L_test_for_second_level_dist:
732 /* else if ((op & 64) == 0) {
733 * this = dcode[this.val + (hold & mask[op])];
734 * }
735 */
736 testb $64, %al
737 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
738
739 movl $1, %eax
740 shll %cl, %eax
741 decl %eax
742 andl hold_r, %eax /* eax &= hold */
743 addl %edx, %eax /* eax += this.val */
744 movl dcode(%esp), %edx /* edx = dcode */
745 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
746 jmp .L_dodist
747
748#else /* USE_MMX */
749
750.L_test_for_second_level_dist:
751 testb $64, %al
752 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
753
754 andl $15, %eax
755 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
756 movd hold_mm, %ecx
757 andl .L_mask(,%eax,4), %ecx
758 movl dcode(%esp), %eax /* ecx = dcode */
759 addl dist_r, %ecx
760 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
761 jmp .L_dodist
762
763#endif
764
765.align 16,0x90
766.L_clip_window:
767 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
768 * %ecx = nbytes
769 *
770 * else {
771 * if (dist > wsize) {
772 * invalid distance
773 * }
774 * from = window;
775 * nbytes = dist - nbytes;
776 * if (write == 0) {
777 * from += wsize - nbytes;
778 */
779#define nbytes_r %ecx
780
781 movl %eax, nbytes_r
782 movl wsize(%esp), %eax /* prepare for dist compare */
783 negl nbytes_r /* nbytes = -nbytes */
784 movl window(%esp), from_r /* from = window */
785
786 cmpl dist_r, %eax
787 jb .L_invalid_distance_too_far /* if (dist > wsize) */
788
789 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
790 cmpl $0, write(%esp)
791 jne .L_wrap_around_window /* if (write != 0) */
792
793 subl nbytes_r, %eax
794 addl %eax, from_r /* from += wsize - nbytes */
795
796 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
797 * %ecx = nbytes, %eax = len
798 *
799 * if (nbytes < len) {
800 * len -= nbytes;
801 * do {
802 * PUP(out) = PUP(from);
803 * } while (--nbytes);
804 * from = out - dist;
805 * }
806 * }
807 */
808
809#if ! defined( USE_MMX )
810#define len_r %eax
811 movl len(%esp), len_r
812#endif
813 cmpl nbytes_r, len_r
814 jbe .L_do_copy1 /* if (nbytes >= len) */
815
816 subl nbytes_r, len_r /* len -= nbytes */
817 rep movsb
818 movl out_r, from_r
819 subl dist_r, from_r /* from = out - dist */
820 jmp .L_do_copy1
821
822 cmpl nbytes_r, len_r
823 jbe .L_do_copy1 /* if (nbytes >= len) */
824
825 subl nbytes_r, len_r /* len -= nbytes */
826 rep movsb
827 movl out_r, from_r
828 subl dist_r, from_r /* from = out - dist */
829 jmp .L_do_copy1
830
831.L_wrap_around_window:
832 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
833 * %ecx = nbytes, %eax = write, %eax = len
834 *
835 * else if (write < nbytes) {
836 * from += wsize + write - nbytes;
837 * nbytes -= write;
838 * if (nbytes < len) {
839 * len -= nbytes;
840 * do {
841 * PUP(out) = PUP(from);
842 * } while (--nbytes);
843 * from = window;
844 * nbytes = write;
845 * if (nbytes < len) {
846 * len -= nbytes;
847 * do {
848 * PUP(out) = PUP(from);
849 * } while(--nbytes);
850 * from = out - dist;
851 * }
852 * }
853 * }
854 */
855#define write_r %eax
856
857 movl write(%esp), write_r
858 cmpl write_r, nbytes_r
859 jbe .L_contiguous_in_window /* if (write >= nbytes) */
860
861 addl wsize(%esp), from_r
862 addl write_r, from_r
863 subl nbytes_r, from_r /* from += wsize + write - nbytes */
864 subl write_r, nbytes_r /* nbytes -= write */
865#undef write_r
866
867#if ! defined( USE_MMX )
868 movl len(%esp), len_r
869#endif
870 cmpl nbytes_r, len_r
871 jbe .L_do_copy1 /* if (nbytes >= len) */
872
873 subl nbytes_r, len_r /* len -= nbytes */
874 rep movsb
875 movl window(%esp), from_r /* from = window */
876 movl write(%esp), nbytes_r /* nbytes = write */
877 cmpl nbytes_r, len_r
878 jbe .L_do_copy1 /* if (nbytes >= len) */
879
880 subl nbytes_r, len_r /* len -= nbytes */
881 rep movsb
882 movl out_r, from_r
883 subl dist_r, from_r /* from = out - dist */
884 jmp .L_do_copy1
885
886.L_contiguous_in_window:
887 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
888 * %ecx = nbytes, %eax = write, %eax = len
889 *
890 * else {
891 * from += write - nbytes;
892 * if (nbytes < len) {
893 * len -= nbytes;
894 * do {
895 * PUP(out) = PUP(from);
896 * } while (--nbytes);
897 * from = out - dist;
898 * }
899 * }
900 */
901#define write_r %eax
902
903 addl write_r, from_r
904 subl nbytes_r, from_r /* from += write - nbytes */
905#undef write_r
906
907#if ! defined( USE_MMX )
908 movl len(%esp), len_r
909#endif
910 cmpl nbytes_r, len_r
911 jbe .L_do_copy1 /* if (nbytes >= len) */
912
913 subl nbytes_r, len_r /* len -= nbytes */
914 rep movsb
915 movl out_r, from_r
916 subl dist_r, from_r /* from = out - dist */
917
918.L_do_copy1:
919 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
920 * %eax = len
921 *
922 * while (len > 0) {
923 * PUP(out) = PUP(from);
924 * len--;
925 * }
926 * }
927 * } while (in < last && out < end);
928 */
929#undef nbytes_r
930#define in_r %esi
931
932 movl len_r, %ecx
933 rep movsb
934
935 movl in(%esp), in_r /* move in back to %esi, toss from */
936#if defined( USE_MMX )
937 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
938#endif
939 jmp .L_while_test
940
941#undef len_r
942#undef from_r
943#undef dist_r
944
945.L_invalid_distance_code:
946 /* else {
947 * strm->msg = "invalid distance code";
948 * state->mode = BAD;
949 * }
950 */
951 movl $.L_invalid_distance_code_msg, %ecx
952 movl $INFLATE_MODE_BAD, %edx
953 jmp .L_update_stream_state
954
955.L_test_for_end_of_block:
956 /* else if (op & 32) {
957 * state->mode = TYPE;
958 * break;
959 * }
960 */
961 testb $32, %al
962 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
963
964 movl $0, %ecx
965 movl $INFLATE_MODE_TYPE, %edx
966 jmp .L_update_stream_state
967
968.L_invalid_literal_length_code:
969 /* else {
970 * strm->msg = "invalid literal/length code";
971 * state->mode = BAD;
972 * }
973 */
974 movl $.L_invalid_literal_length_code_msg, %ecx
975 movl $INFLATE_MODE_BAD, %edx
976 jmp .L_update_stream_state
977
978.L_invalid_distance_too_far:
979 /* strm->msg = "invalid distance too far back";
980 * state->mode = BAD;
981 */
982 movl in(%esp), in_r /* from_r has in's reg, put in back */
983 movl $.L_invalid_distance_too_far_msg, %ecx
984 movl $INFLATE_MODE_BAD, %edx
985 jmp .L_update_stream_state
986
987.L_update_stream_state:
988 /* set strm->msg = %ecx, strm->state->mode = %edx */
989 movl strm_sp(%esp), %eax
990 testl %ecx, %ecx /* if (msg != NULL) */
991 jz .L_skip_msg
992 movl %ecx, msg_strm(%eax) /* strm->msg = msg */
993.L_skip_msg:
994 movl state_strm(%eax), %eax /* state = strm->state */
995 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
996
997.L_break_loop:
998
999#define strm_r %eax
1000#define state_r %edx
1001
1002 /* len = bits >> 3;
1003 * in -= len;
1004 * bits -= len << 3;
1005 * hold &= (1U << bits) - 1;
1006 * state->hold = hold;
1007 * state->bits = bits;
1008 * strm->next_in = in;
1009 * strm->next_out = out;
1010 */
1011 movl strm_sp(%esp), strm_r
1012 movl bitslong_r, %ecx
1013 movl state_strm(strm_r), state_r
1014 shrl $3, %ecx
1015 subl %ecx, in_r
1016 shll $3, %ecx
1017 subl %ecx, bitslong_r
1018 movl out_r, next_out_strm(strm_r)
1019 movl in_r, next_in_strm(strm_r)
1020 movl bitslong_r, bits_state(state_r)
1021
1022 movl bitslong_r, %ecx
1023 movl $1, %ebx /* overwrites bitslong_r, %bl */
1024 shll %cl, %ebx
1025 decl %ebx
1026
1027#undef bits_r
1028#undef bitslong_r
1029
1030#if ! defined( USE_MMX )
1031
1032 andl %ebx, hold_r
1033 movl hold_r, hold_state(state_r)
1034
1035#else /* USE_MMX */
1036
1037 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1038 movd hold_mm, %ecx
1039 andl %ebx, %ecx
1040 movl %ecx, hold_state(state_r)
1041
1042#endif
1043
1044#define last_r %ebx
1045
1046 /* strm->avail_in = in < last ? 5 + (last - in) : 5 - (in - last) */
1047 movl last(%esp), last_r
1048 cmpl in_r, last_r
1049 jbe .L_last_is_smaller /* if (in >= last) */
1050
1051 subl in_r, last_r /* last -= in */
1052 addl $5, last_r /* last += 5 */
1053 movl last_r, avail_in_strm(strm_r)
1054 jmp .L_fixup_out
1055.L_last_is_smaller:
1056 subl last_r, in_r /* in -= last */
1057 negl in_r /* in = -in */
1058 addl $5, in_r /* in += 5 */
1059 movl in_r, avail_in_strm(strm_r)
1060
1061#undef last_r
1062#define end_r %ebx
1063
1064.L_fixup_out:
1065 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1066 movl end(%esp), end_r
1067 cmpl out_r, end_r
1068 jbe .L_end_is_smaller /* if (out >= end) */
1069
1070 subl out_r, end_r /* end -= out */
1071 addl $257, end_r /* end += 257 */
1072 movl end_r, avail_out_strm(strm_r)
1073 jmp .L_done
1074.L_end_is_smaller:
1075 subl end_r, out_r /* out -= end */
1076 negl out_r /* out = -out */
1077 addl $257, out_r /* out += 257 */
1078 movl out_r, avail_out_strm(strm_r)
1079
1080#undef end_r
1081
1082.L_done:
1083#if defined( USE_MMX )
1084 emms
1085#endif
1086 addl $local_var_size, %esp
1087 popf
1088 popl %ebx
1089 popl %ebp
1090 popl %esi
1091 popl %edi
1092 ret
1093
1094.type inflate_fast,@function
1095.size inflate_fast,.-inflate_fast