aboutsummaryrefslogtreecommitdiff
path: root/contrib/masmx86
diff options
context:
space:
mode:
authorMark Adler <zlib@madler.net>2017-10-12 20:08:53 -0700
committerMark Adler <zlib@madler.net>2017-10-12 20:27:14 -0700
commit288f1080317b954b6bdca33708631c011549c008 (patch)
tree9629f01104722ba8e490f04a0790c56513ba989a /contrib/masmx86
parenta5773513942b1c57d0eff51fcb2ebac72796ed95 (diff)
downloadzlib-288f1080317b954b6bdca33708631c011549c008.tar.gz
zlib-288f1080317b954b6bdca33708631c011549c008.tar.bz2
zlib-288f1080317b954b6bdca33708631c011549c008.zip
Remove old assembler code in which bugs have manifested.
In addition, there is not sufficient gain from the inflate assembler code to warrant its inclusion.
Diffstat (limited to 'contrib/masmx86')
-rw-r--r--contrib/masmx86/bld_ml32.bat2
-rw-r--r--contrib/masmx86/inffas32.asm1080
-rw-r--r--contrib/masmx86/match686.asm479
-rw-r--r--contrib/masmx86/readme.txt27
4 files changed, 0 insertions, 1588 deletions
diff --git a/contrib/masmx86/bld_ml32.bat b/contrib/masmx86/bld_ml32.bat
deleted file mode 100644
index e1b86bf..0000000
--- a/contrib/masmx86/bld_ml32.bat
+++ /dev/null
@@ -1,2 +0,0 @@
1ml /coff /Zi /c /Flmatch686.lst match686.asm
2ml /coff /Zi /c /Flinffas32.lst inffas32.asm
diff --git a/contrib/masmx86/inffas32.asm b/contrib/masmx86/inffas32.asm
deleted file mode 100644
index 03d20f8..0000000
--- a/contrib/masmx86/inffas32.asm
+++ /dev/null
@@ -1,1080 +0,0 @@
1;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2; *
3; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4; *
5; * Copyright (C) 1995-2003 Mark Adler
6; * For conditions of distribution and use, see copyright notice in zlib.h
7; *
8; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9; * Please use the copyright conditions above.
10; *
11; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
13; * the moment. I have successfully compiled and tested this code with gcc2.96,
14; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
15; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16; * enabled. I will attempt to merge the MMX code into this version. Newer
17; * versions of this and inffast.S can be found at
18; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19; *
20; * 2005 : modification by Gilles Vollant
21; */
22; For Visual C++ 4.x and higher and ML 6.x and higher
23; ml.exe is in directory \MASM611C of Win95 DDK
24; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26;
27;
28; compile with command line option
29; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
30
31; if you define NO_GZIP (see inflate.h), compile with
32; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33
34
35; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37; in inflate_state in inflate.h)
38zlib1222sup equ 8
39
40
41IFDEF GUNZIP
42 INFLATE_MODE_TYPE equ 11
43 INFLATE_MODE_BAD equ 26
44ELSE
45 IFNDEF NO_GUNZIP
46 INFLATE_MODE_TYPE equ 11
47 INFLATE_MODE_BAD equ 26
48 ELSE
49 INFLATE_MODE_TYPE equ 3
50 INFLATE_MODE_BAD equ 17
51 ENDIF
52ENDIF
53
54
55; 75 "inffast.S"
56;FILE "inffast.S"
57
58;;;GLOBAL _inflate_fast
59
60;;;SECTION .text
61
62
63
64 .586p
65 .mmx
66
67 name inflate_fast_x86
68 .MODEL FLAT
69
70_DATA segment
71inflate_fast_use_mmx:
72 dd 1
73
74
75_TEXT segment
76
77
78
79ALIGN 4
80 db 'Fast decoding Code from Chris Anderson'
81 db 0
82
83ALIGN 4
84invalid_literal_length_code_msg:
85 db 'invalid literal/length code'
86 db 0
87
88ALIGN 4
89invalid_distance_code_msg:
90 db 'invalid distance code'
91 db 0
92
93ALIGN 4
94invalid_distance_too_far_msg:
95 db 'invalid distance too far back'
96 db 0
97
98
99ALIGN 4
100inflate_fast_mask:
101dd 0
102dd 1
103dd 3
104dd 7
105dd 15
106dd 31
107dd 63
108dd 127
109dd 255
110dd 511
111dd 1023
112dd 2047
113dd 4095
114dd 8191
115dd 16383
116dd 32767
117dd 65535
118dd 131071
119dd 262143
120dd 524287
121dd 1048575
122dd 2097151
123dd 4194303
124dd 8388607
125dd 16777215
126dd 33554431
127dd 67108863
128dd 134217727
129dd 268435455
130dd 536870911
131dd 1073741823
132dd 2147483647
133dd 4294967295
134
135
136mode_state equ 0 ;/* state->mode */
137wsize_state equ (32+zlib1222sup) ;/* state->wsize */
138write_state equ (36+4+zlib1222sup) ;/* state->write */
139window_state equ (40+4+zlib1222sup) ;/* state->window */
140hold_state equ (44+4+zlib1222sup) ;/* state->hold */
141bits_state equ (48+4+zlib1222sup) ;/* state->bits */
142lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
143distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
144lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
145distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
146
147
148;;SECTION .text
149; 205 "inffast.S"
150;GLOBAL inflate_fast_use_mmx
151
152;SECTION .data
153
154
155; GLOBAL inflate_fast_use_mmx:object
156;.size inflate_fast_use_mmx, 4
157; 226 "inffast.S"
158;SECTION .text
159
160ALIGN 4
161_inflate_fast proc near
162.FPO (16, 4, 0, 0, 1, 0)
163 push edi
164 push esi
165 push ebp
166 push ebx
167 pushfd
168 sub esp,64
169 cld
170
171
172
173
174 mov esi, [esp+88]
175 mov edi, [esi+28]
176
177
178
179
180
181
182
183 mov edx, [esi+4]
184 mov eax, [esi+0]
185
186 add edx,eax
187 sub edx,11
188
189 mov [esp+44],eax
190 mov [esp+20],edx
191
192 mov ebp, [esp+92]
193 mov ecx, [esi+16]
194 mov ebx, [esi+12]
195
196 sub ebp,ecx
197 neg ebp
198 add ebp,ebx
199
200 sub ecx,257
201 add ecx,ebx
202
203 mov [esp+60],ebx
204 mov [esp+40],ebp
205 mov [esp+16],ecx
206; 285 "inffast.S"
207 mov eax, [edi+lencode_state]
208 mov ecx, [edi+distcode_state]
209
210 mov [esp+8],eax
211 mov [esp+12],ecx
212
213 mov eax,1
214 mov ecx, [edi+lenbits_state]
215 shl eax,cl
216 dec eax
217 mov [esp+0],eax
218
219 mov eax,1
220 mov ecx, [edi+distbits_state]
221 shl eax,cl
222 dec eax
223 mov [esp+4],eax
224
225 mov eax, [edi+wsize_state]
226 mov ecx, [edi+write_state]
227 mov edx, [edi+window_state]
228
229 mov [esp+52],eax
230 mov [esp+48],ecx
231 mov [esp+56],edx
232
233 mov ebp, [edi+hold_state]
234 mov ebx, [edi+bits_state]
235; 321 "inffast.S"
236 mov esi, [esp+44]
237 mov ecx, [esp+20]
238 cmp ecx,esi
239 ja L_align_long
240
241 add ecx,11
242 sub ecx,esi
243 mov eax,12
244 sub eax,ecx
245 lea edi, [esp+28]
246 rep movsb
247 mov ecx,eax
248 xor eax,eax
249 rep stosb
250 lea esi, [esp+28]
251 mov [esp+20],esi
252 jmp L_is_aligned
253
254
255L_align_long:
256 test esi,3
257 jz L_is_aligned
258 xor eax,eax
259 mov al, [esi]
260 inc esi
261 mov ecx,ebx
262 add ebx,8
263 shl eax,cl
264 or ebp,eax
265 jmp L_align_long
266
267L_is_aligned:
268 mov edi, [esp+60]
269; 366 "inffast.S"
270L_check_mmx:
271 cmp dword ptr [inflate_fast_use_mmx],2
272 je L_init_mmx
273 ja L_do_loop
274
275 push eax
276 push ebx
277 push ecx
278 push edx
279 pushfd
280 mov eax, [esp]
281 xor dword ptr [esp],0200000h
282
283
284
285
286 popfd
287 pushfd
288 pop edx
289 xor edx,eax
290 jz L_dont_use_mmx
291 xor eax,eax
292 cpuid
293 cmp ebx,0756e6547h
294 jne L_dont_use_mmx
295 cmp ecx,06c65746eh
296 jne L_dont_use_mmx
297 cmp edx,049656e69h
298 jne L_dont_use_mmx
299 mov eax,1
300 cpuid
301 shr eax,8
302 and eax,15
303 cmp eax,6
304 jne L_dont_use_mmx
305 test edx,0800000h
306 jnz L_use_mmx
307 jmp L_dont_use_mmx
308L_use_mmx:
309 mov dword ptr [inflate_fast_use_mmx],2
310 jmp L_check_mmx_pop
311L_dont_use_mmx:
312 mov dword ptr [inflate_fast_use_mmx],3
313L_check_mmx_pop:
314 pop edx
315 pop ecx
316 pop ebx
317 pop eax
318 jmp L_check_mmx
319; 426 "inffast.S"
320ALIGN 4
321L_do_loop:
322; 437 "inffast.S"
323 cmp bl,15
324 ja L_get_length_code
325
326 xor eax,eax
327 lodsw
328 mov cl,bl
329 add bl,16
330 shl eax,cl
331 or ebp,eax
332
333L_get_length_code:
334 mov edx, [esp+0]
335 mov ecx, [esp+8]
336 and edx,ebp
337 mov eax, [ecx+edx*4]
338
339L_dolen:
340
341
342
343
344
345
346 mov cl,ah
347 sub bl,ah
348 shr ebp,cl
349
350
351
352
353
354
355 test al,al
356 jnz L_test_for_length_base
357
358 shr eax,16
359 stosb
360
361L_while_test:
362
363
364 cmp [esp+16],edi
365 jbe L_break_loop
366
367 cmp [esp+20],esi
368 ja L_do_loop
369 jmp L_break_loop
370
371L_test_for_length_base:
372; 502 "inffast.S"
373 mov edx,eax
374 shr edx,16
375 mov cl,al
376
377 test al,16
378 jz L_test_for_second_level_length
379 and cl,15
380 jz L_save_len
381 cmp bl,cl
382 jae L_add_bits_to_len
383
384 mov ch,cl
385 xor eax,eax
386 lodsw
387 mov cl,bl
388 add bl,16
389 shl eax,cl
390 or ebp,eax
391 mov cl,ch
392
393L_add_bits_to_len:
394 mov eax,1
395 shl eax,cl
396 dec eax
397 sub bl,cl
398 and eax,ebp
399 shr ebp,cl
400 add edx,eax
401
402L_save_len:
403 mov [esp+24],edx
404
405
406L_decode_distance:
407; 549 "inffast.S"
408 cmp bl,15
409 ja L_get_distance_code
410
411 xor eax,eax
412 lodsw
413 mov cl,bl
414 add bl,16
415 shl eax,cl
416 or ebp,eax
417
418L_get_distance_code:
419 mov edx, [esp+4]
420 mov ecx, [esp+12]
421 and edx,ebp
422 mov eax, [ecx+edx*4]
423
424
425L_dodist:
426 mov edx,eax
427 shr edx,16
428 mov cl,ah
429 sub bl,ah
430 shr ebp,cl
431; 584 "inffast.S"
432 mov cl,al
433
434 test al,16
435 jz L_test_for_second_level_dist
436 and cl,15
437 jz L_check_dist_one
438 cmp bl,cl
439 jae L_add_bits_to_dist
440
441 mov ch,cl
442 xor eax,eax
443 lodsw
444 mov cl,bl
445 add bl,16
446 shl eax,cl
447 or ebp,eax
448 mov cl,ch
449
450L_add_bits_to_dist:
451 mov eax,1
452 shl eax,cl
453 dec eax
454 sub bl,cl
455 and eax,ebp
456 shr ebp,cl
457 add edx,eax
458 jmp L_check_window
459
460L_check_window:
461; 625 "inffast.S"
462 mov [esp+44],esi
463 mov eax,edi
464 sub eax, [esp+40]
465
466 cmp eax,edx
467 jb L_clip_window
468
469 mov ecx, [esp+24]
470 mov esi,edi
471 sub esi,edx
472
473 sub ecx,3
474 mov al, [esi]
475 mov [edi],al
476 mov al, [esi+1]
477 mov dl, [esi+2]
478 add esi,3
479 mov [edi+1],al
480 mov [edi+2],dl
481 add edi,3
482 rep movsb
483
484 mov esi, [esp+44]
485 jmp L_while_test
486
487ALIGN 4
488L_check_dist_one:
489 cmp edx,1
490 jne L_check_window
491 cmp [esp+40],edi
492 je L_check_window
493
494 dec edi
495 mov ecx, [esp+24]
496 mov al, [edi]
497 sub ecx,3
498
499 mov [edi+1],al
500 mov [edi+2],al
501 mov [edi+3],al
502 add edi,4
503 rep stosb
504
505 jmp L_while_test
506
507ALIGN 4
508L_test_for_second_level_length:
509
510
511
512
513 test al,64
514 jnz L_test_for_end_of_block
515
516 mov eax,1
517 shl eax,cl
518 dec eax
519 and eax,ebp
520 add eax,edx
521 mov edx, [esp+8]
522 mov eax, [edx+eax*4]
523 jmp L_dolen
524
525ALIGN 4
526L_test_for_second_level_dist:
527
528
529
530
531 test al,64
532 jnz L_invalid_distance_code
533
534 mov eax,1
535 shl eax,cl
536 dec eax
537 and eax,ebp
538 add eax,edx
539 mov edx, [esp+12]
540 mov eax, [edx+eax*4]
541 jmp L_dodist
542
543ALIGN 4
544L_clip_window:
545; 721 "inffast.S"
546 mov ecx,eax
547 mov eax, [esp+52]
548 neg ecx
549 mov esi, [esp+56]
550
551 cmp eax,edx
552 jb L_invalid_distance_too_far
553
554 add ecx,edx
555 cmp dword ptr [esp+48],0
556 jne L_wrap_around_window
557
558 sub eax,ecx
559 add esi,eax
560; 749 "inffast.S"
561 mov eax, [esp+24]
562 cmp eax,ecx
563 jbe L_do_copy1
564
565 sub eax,ecx
566 rep movsb
567 mov esi,edi
568 sub esi,edx
569 jmp L_do_copy1
570
571 cmp eax,ecx
572 jbe L_do_copy1
573
574 sub eax,ecx
575 rep movsb
576 mov esi,edi
577 sub esi,edx
578 jmp L_do_copy1
579
580L_wrap_around_window:
581; 793 "inffast.S"
582 mov eax, [esp+48]
583 cmp ecx,eax
584 jbe L_contiguous_in_window
585
586 add esi, [esp+52]
587 add esi,eax
588 sub esi,ecx
589 sub ecx,eax
590
591
592 mov eax, [esp+24]
593 cmp eax,ecx
594 jbe L_do_copy1
595
596 sub eax,ecx
597 rep movsb
598 mov esi, [esp+56]
599 mov ecx, [esp+48]
600 cmp eax,ecx
601 jbe L_do_copy1
602
603 sub eax,ecx
604 rep movsb
605 mov esi,edi
606 sub esi,edx
607 jmp L_do_copy1
608
609L_contiguous_in_window:
610; 836 "inffast.S"
611 add esi,eax
612 sub esi,ecx
613
614
615 mov eax, [esp+24]
616 cmp eax,ecx
617 jbe L_do_copy1
618
619 sub eax,ecx
620 rep movsb
621 mov esi,edi
622 sub esi,edx
623
624L_do_copy1:
625; 862 "inffast.S"
626 mov ecx,eax
627 rep movsb
628
629 mov esi, [esp+44]
630 jmp L_while_test
631; 878 "inffast.S"
632ALIGN 4
633L_init_mmx:
634 emms
635
636
637
638
639
640 movd mm0,ebp
641 mov ebp,ebx
642; 896 "inffast.S"
643 movd mm4,dword ptr [esp+0]
644 movq mm3,mm4
645 movd mm5,dword ptr [esp+4]
646 movq mm2,mm5
647 pxor mm1,mm1
648 mov ebx, [esp+8]
649 jmp L_do_loop_mmx
650
651ALIGN 4
652L_do_loop_mmx:
653 psrlq mm0,mm1
654
655 cmp ebp,32
656 ja L_get_length_code_mmx
657
658 movd mm6,ebp
659 movd mm7,dword ptr [esi]
660 add esi,4
661 psllq mm7,mm6
662 add ebp,32
663 por mm0,mm7
664
665L_get_length_code_mmx:
666 pand mm4,mm0
667 movd eax,mm4
668 movq mm4,mm3
669 mov eax, [ebx+eax*4]
670
671L_dolen_mmx:
672 movzx ecx,ah
673 movd mm1,ecx
674 sub ebp,ecx
675
676 test al,al
677 jnz L_test_for_length_base_mmx
678
679 shr eax,16
680 stosb
681
682L_while_test_mmx:
683
684
685 cmp [esp+16],edi
686 jbe L_break_loop
687
688 cmp [esp+20],esi
689 ja L_do_loop_mmx
690 jmp L_break_loop
691
692L_test_for_length_base_mmx:
693
694 mov edx,eax
695 shr edx,16
696
697 test al,16
698 jz L_test_for_second_level_length_mmx
699 and eax,15
700 jz L_decode_distance_mmx
701
702 psrlq mm0,mm1
703 movd mm1,eax
704 movd ecx,mm0
705 sub ebp,eax
706 and ecx, [inflate_fast_mask+eax*4]
707 add edx,ecx
708
709L_decode_distance_mmx:
710 psrlq mm0,mm1
711
712 cmp ebp,32
713 ja L_get_dist_code_mmx
714
715 movd mm6,ebp
716 movd mm7,dword ptr [esi]
717 add esi,4
718 psllq mm7,mm6
719 add ebp,32
720 por mm0,mm7
721
722L_get_dist_code_mmx:
723 mov ebx, [esp+12]
724 pand mm5,mm0
725 movd eax,mm5
726 movq mm5,mm2
727 mov eax, [ebx+eax*4]
728
729L_dodist_mmx:
730
731 movzx ecx,ah
732 mov ebx,eax
733 shr ebx,16
734 sub ebp,ecx
735 movd mm1,ecx
736
737 test al,16
738 jz L_test_for_second_level_dist_mmx
739 and eax,15
740 jz L_check_dist_one_mmx
741
742L_add_bits_to_dist_mmx:
743 psrlq mm0,mm1
744 movd mm1,eax
745 movd ecx,mm0
746 sub ebp,eax
747 and ecx, [inflate_fast_mask+eax*4]
748 add ebx,ecx
749
750L_check_window_mmx:
751 mov [esp+44],esi
752 mov eax,edi
753 sub eax, [esp+40]
754
755 cmp eax,ebx
756 jb L_clip_window_mmx
757
758 mov ecx,edx
759 mov esi,edi
760 sub esi,ebx
761
762 sub ecx,3
763 mov al, [esi]
764 mov [edi],al
765 mov al, [esi+1]
766 mov dl, [esi+2]
767 add esi,3
768 mov [edi+1],al
769 mov [edi+2],dl
770 add edi,3
771 rep movsb
772
773 mov esi, [esp+44]
774 mov ebx, [esp+8]
775 jmp L_while_test_mmx
776
777ALIGN 4
778L_check_dist_one_mmx:
779 cmp ebx,1
780 jne L_check_window_mmx
781 cmp [esp+40],edi
782 je L_check_window_mmx
783
784 dec edi
785 mov ecx,edx
786 mov al, [edi]
787 sub ecx,3
788
789 mov [edi+1],al
790 mov [edi+2],al
791 mov [edi+3],al
792 add edi,4
793 rep stosb
794
795 mov ebx, [esp+8]
796 jmp L_while_test_mmx
797
798ALIGN 4
799L_test_for_second_level_length_mmx:
800 test al,64
801 jnz L_test_for_end_of_block
802
803 and eax,15
804 psrlq mm0,mm1
805 movd ecx,mm0
806 and ecx, [inflate_fast_mask+eax*4]
807 add ecx,edx
808 mov eax, [ebx+ecx*4]
809 jmp L_dolen_mmx
810
811ALIGN 4
812L_test_for_second_level_dist_mmx:
813 test al,64
814 jnz L_invalid_distance_code
815
816 and eax,15
817 psrlq mm0,mm1
818 movd ecx,mm0
819 and ecx, [inflate_fast_mask+eax*4]
820 mov eax, [esp+12]
821 add ecx,ebx
822 mov eax, [eax+ecx*4]
823 jmp L_dodist_mmx
824
825ALIGN 4
826L_clip_window_mmx:
827
828 mov ecx,eax
829 mov eax, [esp+52]
830 neg ecx
831 mov esi, [esp+56]
832
833 cmp eax,ebx
834 jb L_invalid_distance_too_far
835
836 add ecx,ebx
837 cmp dword ptr [esp+48],0
838 jne L_wrap_around_window_mmx
839
840 sub eax,ecx
841 add esi,eax
842
843 cmp edx,ecx
844 jbe L_do_copy1_mmx
845
846 sub edx,ecx
847 rep movsb
848 mov esi,edi
849 sub esi,ebx
850 jmp L_do_copy1_mmx
851
852 cmp edx,ecx
853 jbe L_do_copy1_mmx
854
855 sub edx,ecx
856 rep movsb
857 mov esi,edi
858 sub esi,ebx
859 jmp L_do_copy1_mmx
860
861L_wrap_around_window_mmx:
862
863 mov eax, [esp+48]
864 cmp ecx,eax
865 jbe L_contiguous_in_window_mmx
866
867 add esi, [esp+52]
868 add esi,eax
869 sub esi,ecx
870 sub ecx,eax
871
872
873 cmp edx,ecx
874 jbe L_do_copy1_mmx
875
876 sub edx,ecx
877 rep movsb
878 mov esi, [esp+56]
879 mov ecx, [esp+48]
880 cmp edx,ecx
881 jbe L_do_copy1_mmx
882
883 sub edx,ecx
884 rep movsb
885 mov esi,edi
886 sub esi,ebx
887 jmp L_do_copy1_mmx
888
889L_contiguous_in_window_mmx:
890
891 add esi,eax
892 sub esi,ecx
893
894
895 cmp edx,ecx
896 jbe L_do_copy1_mmx
897
898 sub edx,ecx
899 rep movsb
900 mov esi,edi
901 sub esi,ebx
902
903L_do_copy1_mmx:
904
905
906 mov ecx,edx
907 rep movsb
908
909 mov esi, [esp+44]
910 mov ebx, [esp+8]
911 jmp L_while_test_mmx
912; 1174 "inffast.S"
913L_invalid_distance_code:
914
915
916
917
918
919 mov ecx, invalid_distance_code_msg
920 mov edx,INFLATE_MODE_BAD
921 jmp L_update_stream_state
922
923L_test_for_end_of_block:
924
925
926
927
928
929 test al,32
930 jz L_invalid_literal_length_code
931
932 mov ecx,0
933 mov edx,INFLATE_MODE_TYPE
934 jmp L_update_stream_state
935
936L_invalid_literal_length_code:
937
938
939
940
941
942 mov ecx, invalid_literal_length_code_msg
943 mov edx,INFLATE_MODE_BAD
944 jmp L_update_stream_state
945
946L_invalid_distance_too_far:
947
948
949
950 mov esi, [esp+44]
951 mov ecx, invalid_distance_too_far_msg
952 mov edx,INFLATE_MODE_BAD
953 jmp L_update_stream_state
954
955L_update_stream_state:
956
957 mov eax, [esp+88]
958 test ecx,ecx
959 jz L_skip_msg
960 mov [eax+24],ecx
961L_skip_msg:
962 mov eax, [eax+28]
963 mov [eax+mode_state],edx
964 jmp L_break_loop
965
966ALIGN 4
967L_break_loop:
968; 1243 "inffast.S"
969 cmp dword ptr [inflate_fast_use_mmx],2
970 jne L_update_next_in
971
972
973
974 mov ebx,ebp
975
976L_update_next_in:
977; 1266 "inffast.S"
978 mov eax, [esp+88]
979 mov ecx,ebx
980 mov edx, [eax+28]
981 shr ecx,3
982 sub esi,ecx
983 shl ecx,3
984 sub ebx,ecx
985 mov [eax+12],edi
986 mov [edx+bits_state],ebx
987 mov ecx,ebx
988
989 lea ebx, [esp+28]
990 cmp [esp+20],ebx
991 jne L_buf_not_used
992
993 sub esi,ebx
994 mov ebx, [eax+0]
995 mov [esp+20],ebx
996 add esi,ebx
997 mov ebx, [eax+4]
998 sub ebx,11
999 add [esp+20],ebx
1000
1001L_buf_not_used:
1002 mov [eax+0],esi
1003
1004 mov ebx,1
1005 shl ebx,cl
1006 dec ebx
1007
1008
1009
1010
1011
1012 cmp dword ptr [inflate_fast_use_mmx],2
1013 jne L_update_hold
1014
1015
1016
1017 psrlq mm0,mm1
1018 movd ebp,mm0
1019
1020 emms
1021
1022L_update_hold:
1023
1024
1025
1026 and ebp,ebx
1027 mov [edx+hold_state],ebp
1028
1029
1030
1031
1032 mov ebx, [esp+20]
1033 cmp ebx,esi
1034 jbe L_last_is_smaller
1035
1036 sub ebx,esi
1037 add ebx,11
1038 mov [eax+4],ebx
1039 jmp L_fixup_out
1040L_last_is_smaller:
1041 sub esi,ebx
1042 neg esi
1043 add esi,11
1044 mov [eax+4],esi
1045
1046
1047
1048
1049L_fixup_out:
1050
1051 mov ebx, [esp+16]
1052 cmp ebx,edi
1053 jbe L_end_is_smaller
1054
1055 sub ebx,edi
1056 add ebx,257
1057 mov [eax+16],ebx
1058 jmp L_done
1059L_end_is_smaller:
1060 sub edi,ebx
1061 neg edi
1062 add edi,257
1063 mov [eax+16],edi
1064
1065
1066
1067
1068
1069L_done:
1070 add esp,64
1071 popfd
1072 pop ebx
1073 pop ebp
1074 pop esi
1075 pop edi
1076 ret
1077_inflate_fast endp
1078
1079_TEXT ends
1080end
diff --git a/contrib/masmx86/match686.asm b/contrib/masmx86/match686.asm
deleted file mode 100644
index 3b09212..0000000
--- a/contrib/masmx86/match686.asm
+++ /dev/null
@@ -1,479 +0,0 @@
1; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86
2; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
3; File written by Gilles Vollant, by converting match686.S from Brian Raiter
4; for MASM. This is as assembly version of longest_match
5; from Jean-loup Gailly in deflate.c
6;
7; http://www.zlib.net
8; http://www.winimage.com/zLibDll
9; http://www.muppetlabs.com/~breadbox/software/assembly.html
10;
11; For Visual C++ 4.x and higher and ML 6.x and higher
12; ml.exe is distributed in
13; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
14;
15; this file contain two implementation of longest_match
16;
17; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro
18; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)
19;
20; for using an assembly version of longest_match, you need define ASMV in project
21;
22; compile the asm file running
23; ml /coff /Zi /c /Flmatch686.lst match686.asm
24; and do not include match686.obj in your project
25;
26; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for
27; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor
28; with autoselect (with cpu detection code)
29; if you want support the old pentium optimization, you can still use these version
30;
31; this file is not optimized for old pentium, but it compatible with all x86 32 bits
32; processor (starting 80386)
33;
34;
35; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
36
37;uInt longest_match(s, cur_match)
38; deflate_state *s;
39; IPos cur_match; /* current match */
40
41 NbStack equ 76
42 cur_match equ dword ptr[esp+NbStack-0]
43 str_s equ dword ptr[esp+NbStack-4]
44; 5 dword on top (ret,ebp,esi,edi,ebx)
45 adrret equ dword ptr[esp+NbStack-8]
46 pushebp equ dword ptr[esp+NbStack-12]
47 pushedi equ dword ptr[esp+NbStack-16]
48 pushesi equ dword ptr[esp+NbStack-20]
49 pushebx equ dword ptr[esp+NbStack-24]
50
51 chain_length equ dword ptr [esp+NbStack-28]
52 limit equ dword ptr [esp+NbStack-32]
53 best_len equ dword ptr [esp+NbStack-36]
54 window equ dword ptr [esp+NbStack-40]
55 prev equ dword ptr [esp+NbStack-44]
56 scan_start equ word ptr [esp+NbStack-48]
57 wmask equ dword ptr [esp+NbStack-52]
58 match_start_ptr equ dword ptr [esp+NbStack-56]
59 nice_match equ dword ptr [esp+NbStack-60]
60 scan equ dword ptr [esp+NbStack-64]
61
62 windowlen equ dword ptr [esp+NbStack-68]
63 match_start equ dword ptr [esp+NbStack-72]
64 strend equ dword ptr [esp+NbStack-76]
65 NbStackAdd equ (NbStack-24)
66
67 .386p
68
69 name gvmatch
70 .MODEL FLAT
71
72
73
74; all the +zlib1222add offsets are due to the addition of fields
75; in zlib in the deflate_state structure since the asm code was first written
76; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
77; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
78; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
79
80 zlib1222add equ 8
81
82; Note : these value are good with a 8 bytes boundary pack structure
83 dep_chain_length equ 74h+zlib1222add
84 dep_window equ 30h+zlib1222add
85 dep_strstart equ 64h+zlib1222add
86 dep_prev_length equ 70h+zlib1222add
87 dep_nice_match equ 88h+zlib1222add
88 dep_w_size equ 24h+zlib1222add
89 dep_prev equ 38h+zlib1222add
90 dep_w_mask equ 2ch+zlib1222add
91 dep_good_match equ 84h+zlib1222add
92 dep_match_start equ 68h+zlib1222add
93 dep_lookahead equ 6ch+zlib1222add
94
95
96_TEXT segment
97
98IFDEF NOUNDERLINE
99 public longest_match
100 public match_init
101ELSE
102 public _longest_match
103 public _match_init
104ENDIF
105
106 MAX_MATCH equ 258
107 MIN_MATCH equ 3
108 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
109
110
111
112MAX_MATCH equ 258
113MIN_MATCH equ 3
114MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
115MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
116
117
118;;; stack frame offsets
119
120chainlenwmask equ esp + 0 ; high word: current chain len
121 ; low word: s->wmask
122window equ esp + 4 ; local copy of s->window
123windowbestlen equ esp + 8 ; s->window + bestlen
124scanstart equ esp + 16 ; first two bytes of string
125scanend equ esp + 12 ; last two bytes of string
126scanalign equ esp + 20 ; dword-misalignment of string
127nicematch equ esp + 24 ; a good enough match size
128bestlen equ esp + 28 ; size of best match so far
129scan equ esp + 32 ; ptr to string wanting match
130
131LocalVarsSize equ 36
132; saved ebx byte esp + 36
133; saved edi byte esp + 40
134; saved esi byte esp + 44
135; saved ebp byte esp + 48
136; return address byte esp + 52
137deflatestate equ esp + 56 ; the function arguments
138curmatch equ esp + 60
139
140;;; Offsets for fields in the deflate_state structure. These numbers
141;;; are calculated from the definition of deflate_state, with the
142;;; assumption that the compiler will dword-align the fields. (Thus,
143;;; changing the definition of deflate_state could easily cause this
144;;; program to crash horribly, without so much as a warning at
145;;; compile time. Sigh.)
146
147dsWSize equ 36+zlib1222add
148dsWMask equ 44+zlib1222add
149dsWindow equ 48+zlib1222add
150dsPrev equ 56+zlib1222add
151dsMatchLen equ 88+zlib1222add
152dsPrevMatch equ 92+zlib1222add
153dsStrStart equ 100+zlib1222add
154dsMatchStart equ 104+zlib1222add
155dsLookahead equ 108+zlib1222add
156dsPrevLen equ 112+zlib1222add
157dsMaxChainLen equ 116+zlib1222add
158dsGoodMatch equ 132+zlib1222add
159dsNiceMatch equ 136+zlib1222add
160
161
162;;; match686.asm -- Pentium-Pro-optimized version of longest_match()
163;;; Written for zlib 1.1.2
164;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
165;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
166;;;
167;;
168;; This software is provided 'as-is', without any express or implied
169;; warranty. In no event will the authors be held liable for any damages
170;; arising from the use of this software.
171;;
172;; Permission is granted to anyone to use this software for any purpose,
173;; including commercial applications, and to alter it and redistribute it
174;; freely, subject to the following restrictions:
175;;
176;; 1. The origin of this software must not be misrepresented; you must not
177;; claim that you wrote the original software. If you use this software
178;; in a product, an acknowledgment in the product documentation would be
179;; appreciated but is not required.
180;; 2. Altered source versions must be plainly marked as such, and must not be
181;; misrepresented as being the original software
182;; 3. This notice may not be removed or altered from any source distribution.
183;;
184
185;GLOBAL _longest_match, _match_init
186
187
188;SECTION .text
189
190;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
191
192;_longest_match:
193 IFDEF NOUNDERLINE
194 longest_match proc near
195 ELSE
196 _longest_match proc near
197 ENDIF
198.FPO (9, 4, 0, 0, 1, 0)
199
200;;; Save registers that the compiler may be using, and adjust esp to
201;;; make room for our stack frame.
202
203 push ebp
204 push edi
205 push esi
206 push ebx
207 sub esp, LocalVarsSize
208
209;;; Retrieve the function arguments. ecx will hold cur_match
210;;; throughout the entire function. edx will hold the pointer to the
211;;; deflate_state structure during the function's setup (before
212;;; entering the main loop.
213
214 mov edx, [deflatestate]
215 mov ecx, [curmatch]
216
217;;; uInt wmask = s->w_mask;
218;;; unsigned chain_length = s->max_chain_length;
219;;; if (s->prev_length >= s->good_match) {
220;;; chain_length >>= 2;
221;;; }
222
223 mov eax, [edx + dsPrevLen]
224 mov ebx, [edx + dsGoodMatch]
225 cmp eax, ebx
226 mov eax, [edx + dsWMask]
227 mov ebx, [edx + dsMaxChainLen]
228 jl LastMatchGood
229 shr ebx, 2
230LastMatchGood:
231
232;;; chainlen is decremented once beforehand so that the function can
233;;; use the sign flag instead of the zero flag for the exit test.
234;;; It is then shifted into the high word, to make room for the wmask
235;;; value, which it will always accompany.
236
237 dec ebx
238 shl ebx, 16
239 or ebx, eax
240 mov [chainlenwmask], ebx
241
242;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
243
244 mov eax, [edx + dsNiceMatch]
245 mov ebx, [edx + dsLookahead]
246 cmp ebx, eax
247 jl LookaheadLess
248 mov ebx, eax
249LookaheadLess: mov [nicematch], ebx
250
251;;; register Bytef *scan = s->window + s->strstart;
252
253 mov esi, [edx + dsWindow]
254 mov [window], esi
255 mov ebp, [edx + dsStrStart]
256 lea edi, [esi + ebp]
257 mov [scan], edi
258
259;;; Determine how many bytes the scan ptr is off from being
260;;; dword-aligned.
261
262 mov eax, edi
263 neg eax
264 and eax, 3
265 mov [scanalign], eax
266
267;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
268;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
269
270 mov eax, [edx + dsWSize]
271 sub eax, MIN_LOOKAHEAD
272 sub ebp, eax
273 jg LimitPositive
274 xor ebp, ebp
275LimitPositive:
276
277;;; int best_len = s->prev_length;
278
279 mov eax, [edx + dsPrevLen]
280 mov [bestlen], eax
281
282;;; Store the sum of s->window + best_len in esi locally, and in esi.
283
284 add esi, eax
285 mov [windowbestlen], esi
286
287;;; register ush scan_start = *(ushf*)scan;
288;;; register ush scan_end = *(ushf*)(scan+best_len-1);
289;;; Posf *prev = s->prev;
290
291 movzx ebx, word ptr [edi]
292 mov [scanstart], ebx
293 movzx ebx, word ptr [edi + eax - 1]
294 mov [scanend], ebx
295 mov edi, [edx + dsPrev]
296
297;;; Jump into the main loop.
298
299 mov edx, [chainlenwmask]
300 jmp short LoopEntry
301
302align 4
303
304;;; do {
305;;; match = s->window + cur_match;
306;;; if (*(ushf*)(match+best_len-1) != scan_end ||
307;;; *(ushf*)match != scan_start) continue;
308;;; [...]
309;;; } while ((cur_match = prev[cur_match & wmask]) > limit
310;;; && --chain_length != 0);
311;;;
312;;; Here is the inner loop of the function. The function will spend the
313;;; majority of its time in this loop, and majority of that time will
314;;; be spent in the first ten instructions.
315;;;
316;;; Within this loop:
317;;; ebx = scanend
318;;; ecx = curmatch
319;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
320;;; esi = windowbestlen - i.e., (window + bestlen)
321;;; edi = prev
322;;; ebp = limit
323
324LookupLoop:
325 and ecx, edx
326 movzx ecx, word ptr [edi + ecx*2]
327 cmp ecx, ebp
328 jbe LeaveNow
329 sub edx, 00010000h
330 js LeaveNow
331LoopEntry: movzx eax, word ptr [esi + ecx - 1]
332 cmp eax, ebx
333 jnz LookupLoop
334 mov eax, [window]
335 movzx eax, word ptr [eax + ecx]
336 cmp eax, [scanstart]
337 jnz LookupLoop
338
339;;; Store the current value of chainlen.
340
341 mov [chainlenwmask], edx
342
343;;; Point edi to the string under scrutiny, and esi to the string we
344;;; are hoping to match it up with. In actuality, esi and edi are
345;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
346;;; initialized to -(MAX_MATCH_8 - scanalign).
347
348 mov esi, [window]
349 mov edi, [scan]
350 add esi, ecx
351 mov eax, [scanalign]
352 mov edx, 0fffffef8h; -(MAX_MATCH_8)
353 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
354 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
355
356;;; Test the strings for equality, 8 bytes at a time. At the end,
357;;; adjust edx so that it is offset to the exact byte that mismatched.
358;;;
359;;; We already know at this point that the first three bytes of the
360;;; strings match each other, and they can be safely passed over before
361;;; starting the compare loop. So what this code does is skip over 0-3
362;;; bytes, as much as necessary in order to dword-align the edi
363;;; pointer. (esi will still be misaligned three times out of four.)
364;;;
365;;; It should be confessed that this loop usually does not represent
366;;; much of the total running time. Replacing it with a more
367;;; straightforward "rep cmpsb" would not drastically degrade
368;;; performance.
369
370LoopCmps:
371 mov eax, [esi + edx]
372 xor eax, [edi + edx]
373 jnz LeaveLoopCmps
374 mov eax, [esi + edx + 4]
375 xor eax, [edi + edx + 4]
376 jnz LeaveLoopCmps4
377 add edx, 8
378 jnz LoopCmps
379 jmp short LenMaximum
380LeaveLoopCmps4: add edx, 4
381LeaveLoopCmps: test eax, 0000FFFFh
382 jnz LenLower
383 add edx, 2
384 shr eax, 16
385LenLower: sub al, 1
386 adc edx, 0
387
388;;; Calculate the length of the match. If it is longer than MAX_MATCH,
389;;; then automatically accept it as the best possible match and leave.
390
391 lea eax, [edi + edx]
392 mov edi, [scan]
393 sub eax, edi
394 cmp eax, MAX_MATCH
395 jge LenMaximum
396
397;;; If the length of the match is not longer than the best match we
398;;; have so far, then forget it and return to the lookup loop.
399
400 mov edx, [deflatestate]
401 mov ebx, [bestlen]
402 cmp eax, ebx
403 jg LongerMatch
404 mov esi, [windowbestlen]
405 mov edi, [edx + dsPrev]
406 mov ebx, [scanend]
407 mov edx, [chainlenwmask]
408 jmp LookupLoop
409
410;;; s->match_start = cur_match;
411;;; best_len = len;
412;;; if (len >= nice_match) break;
413;;; scan_end = *(ushf*)(scan+best_len-1);
414
415LongerMatch: mov ebx, [nicematch]
416 mov [bestlen], eax
417 mov [edx + dsMatchStart], ecx
418 cmp eax, ebx
419 jge LeaveNow
420 mov esi, [window]
421 add esi, eax
422 mov [windowbestlen], esi
423 movzx ebx, word ptr [edi + eax - 1]
424 mov edi, [edx + dsPrev]
425 mov [scanend], ebx
426 mov edx, [chainlenwmask]
427 jmp LookupLoop
428
429;;; Accept the current string, with the maximum possible length.
430
431LenMaximum: mov edx, [deflatestate]
432 mov dword ptr [bestlen], MAX_MATCH
433 mov [edx + dsMatchStart], ecx
434
435;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
436;;; return s->lookahead;
437
438LeaveNow:
439 mov edx, [deflatestate]
440 mov ebx, [bestlen]
441 mov eax, [edx + dsLookahead]
442 cmp ebx, eax
443 jg LookaheadRet
444 mov eax, ebx
445LookaheadRet:
446
447;;; Restore the stack and return from whence we came.
448
449 add esp, LocalVarsSize
450 pop ebx
451 pop esi
452 pop edi
453 pop ebp
454
455 ret
456; please don't remove this string !
457; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!
458 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
459
460
461 IFDEF NOUNDERLINE
462 longest_match endp
463 ELSE
464 _longest_match endp
465 ENDIF
466
467 IFDEF NOUNDERLINE
468 match_init proc near
469 ret
470 match_init endp
471 ELSE
472 _match_init proc near
473 ret
474 _match_init endp
475 ENDIF
476
477
478_TEXT ends
479end
diff --git a/contrib/masmx86/readme.txt b/contrib/masmx86/readme.txt
deleted file mode 100644
index 3271f72..0000000
--- a/contrib/masmx86/readme.txt
+++ /dev/null
@@ -1,27 +0,0 @@
1
2Summary
3-------
4This directory contains ASM implementations of the functions
5longest_match() and inflate_fast().
6
7
8Use instructions
9----------------
10Assemble using MASM, and copy the object files into the zlib source
11directory, then run the appropriate makefile, as suggested below. You can
12donwload MASM from here:
13
14 http://www.microsoft.com/downloads/details.aspx?displaylang=en&FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
15
16You can also get objects files here:
17
18 http://www.winimage.com/zLibDll/zlib124_masm_obj.zip
19
20Build instructions
21------------------
22* With Microsoft C and MASM:
23nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj"
24
25* With Borland C and TASM:
26make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" OBJPA="+match686c.obj+match686.obj+inffas32.obj"
27