summaryrefslogtreecommitdiff
path: root/contrib/masmx86
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/masmx86')
-rw-r--r--contrib/masmx86/gvmat32.asm905
-rw-r--r--contrib/masmx86/gvmat32c.c206
-rw-r--r--contrib/masmx86/inffas32.asm1033
-rwxr-xr-xcontrib/masmx86/mkasm.bat3
-rw-r--r--contrib/masmx86/readme.txt21
5 files changed, 2168 insertions, 0 deletions
diff --git a/contrib/masmx86/gvmat32.asm b/contrib/masmx86/gvmat32.asm
new file mode 100644
index 0000000..ec360e6
--- /dev/null
+++ b/contrib/masmx86/gvmat32.asm
@@ -0,0 +1,905 @@
1;
2; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
3; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
4; File written by Gilles Vollant, by modifiying the longest_match
5; from Jean-loup Gailly in deflate.c
6; It need wmask == 0x7fff
7; (assembly code is faster with a fixed wmask)
8;
9; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
10; I compile with : "ml /coff /Zi /c gvmat32.asm"
11;
12
13;uInt longest_match_7fff(s, cur_match)
14; deflate_state *s;
15; IPos cur_match; /* current match */
16
17 NbStack equ 76
18 cur_match equ dword ptr[esp+NbStack-0]
19 str_s equ dword ptr[esp+NbStack-4]
20; 5 dword on top (ret,ebp,esi,edi,ebx)
21 adrret equ dword ptr[esp+NbStack-8]
22 pushebp equ dword ptr[esp+NbStack-12]
23 pushedi equ dword ptr[esp+NbStack-16]
24 pushesi equ dword ptr[esp+NbStack-20]
25 pushebx equ dword ptr[esp+NbStack-24]
26
27 chain_length equ dword ptr [esp+NbStack-28]
28 limit equ dword ptr [esp+NbStack-32]
29 best_len equ dword ptr [esp+NbStack-36]
30 window equ dword ptr [esp+NbStack-40]
31 prev equ dword ptr [esp+NbStack-44]
32 scan_start equ word ptr [esp+NbStack-48]
33 wmask equ dword ptr [esp+NbStack-52]
34 match_start_ptr equ dword ptr [esp+NbStack-56]
35 nice_match equ dword ptr [esp+NbStack-60]
36 scan equ dword ptr [esp+NbStack-64]
37
38 windowlen equ dword ptr [esp+NbStack-68]
39 match_start equ dword ptr [esp+NbStack-72]
40 strend equ dword ptr [esp+NbStack-76]
41 NbStackAdd equ (NbStack-24)
42
43 .386p
44
45 name gvmatch
46 .MODEL FLAT
47
48
49
50; all the +4 offsets are due to the addition of pending_buf_size (in zlib
51; in the deflate_state structure since the asm code was first written
52; (if you compile with zlib 1.0.4 or older, remove the +4).
53; Note : these value are good with a 8 bytes boundary pack structure
54 dep_chain_length equ 70h+4
55 dep_window equ 2ch+4
56 dep_strstart equ 60h+4
57 dep_prev_length equ 6ch+4
58 dep_nice_match equ 84h+4
59 dep_w_size equ 20h+4
60 dep_prev equ 34h+4
61 dep_w_mask equ 28h+4
62 dep_good_match equ 80h+4
63 dep_match_start equ 64h+4
64 dep_lookahead equ 68h+4
65
66
67_TEXT segment
68
69IFDEF NOUNDERLINE
70 public longest_match_7fff
71 public longest_match_686
72; public match_init
73ELSE
74 public _longest_match_7fff
75 public _longest_match_686
76; public _match_init
77ENDIF
78
79 MAX_MATCH equ 258
80 MIN_MATCH equ 3
81 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
82
83
84
85IFDEF NOUNDERLINE
86;match_init proc near
87; ret
88;match_init endp
89ELSE
90;_match_init proc near
91; ret
92;_match_init endp
93ENDIF
94
95
96IFDEF NOUNDERLINE
97longest_match_7fff proc near
98ELSE
99_longest_match_7fff proc near
100ENDIF
101
102 mov edx,[esp+4]
103
104
105
106 push ebp
107 push edi
108 push esi
109 push ebx
110
111 sub esp,NbStackAdd
112
113; initialize or check the variables used in match.asm.
114 mov ebp,edx
115
116; chain_length = s->max_chain_length
117; if (prev_length>=good_match) chain_length >>= 2
118 mov edx,[ebp+dep_chain_length]
119 mov ebx,[ebp+dep_prev_length]
120 cmp [ebp+dep_good_match],ebx
121 ja noshr
122 shr edx,2
123noshr:
124; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
125 inc edx
126 mov edi,[ebp+dep_nice_match]
127 mov chain_length,edx
128 mov eax,[ebp+dep_lookahead]
129 cmp eax,edi
130; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
131 jae nolookaheadnicematch
132 mov edi,eax
133nolookaheadnicematch:
134; best_len = s->prev_length
135 mov best_len,ebx
136
137; window = s->window
138 mov esi,[ebp+dep_window]
139 mov ecx,[ebp+dep_strstart]
140 mov window,esi
141
142 mov nice_match,edi
143; scan = window + strstart
144 add esi,ecx
145 mov scan,esi
146; dx = *window
147 mov dx,word ptr [esi]
148; bx = *(window+best_len-1)
149 mov bx,word ptr [esi+ebx-1]
150 add esi,MAX_MATCH-1
151; scan_start = *scan
152 mov scan_start,dx
153; strend = scan + MAX_MATCH-1
154 mov strend,esi
155; bx = scan_end = *(window+best_len-1)
156
157; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
158; s->strstart - (IPos)MAX_DIST(s) : NIL;
159
160 mov esi,[ebp+dep_w_size]
161 sub esi,MIN_LOOKAHEAD
162; here esi = MAX_DIST(s)
163 sub ecx,esi
164 ja nodist
165 xor ecx,ecx
166nodist:
167 mov limit,ecx
168
169; prev = s->prev
170 mov edx,[ebp+dep_prev]
171 mov prev,edx
172
173;
174 mov edx,dword ptr [ebp+dep_match_start]
175 mov bp,scan_start
176 mov eax,cur_match
177 mov match_start,edx
178
179 mov edx,window
180 mov edi,edx
181 add edi,best_len
182 mov esi,prev
183 dec edi
184; windowlen = window + best_len -1
185 mov windowlen,edi
186
187 jmp beginloop2
188 align 4
189
190; here, in the loop
191; eax = ax = cur_match
192; ecx = limit
193; bx = scan_end
194; bp = scan_start
195; edi = windowlen (window + best_len -1)
196; esi = prev
197
198
199;// here; chain_length <=16
200normalbeg0add16:
201 add chain_length,16
202 jz exitloop
203normalbeg0:
204 cmp word ptr[edi+eax],bx
205 je normalbeg2noroll
206rcontlabnoroll:
207; cur_match = prev[cur_match & wmask]
208 and eax,7fffh
209 mov ax,word ptr[esi+eax*2]
210; if cur_match > limit, go to exitloop
211 cmp ecx,eax
212 jnb exitloop
213; if --chain_length != 0, go to exitloop
214 dec chain_length
215 jnz normalbeg0
216 jmp exitloop
217
218normalbeg2noroll:
219; if (scan_start==*(cur_match+window)) goto normalbeg2
220 cmp bp,word ptr[edx+eax]
221 jne rcontlabnoroll
222 jmp normalbeg2
223
224contloop3:
225 mov edi,windowlen
226
227; cur_match = prev[cur_match & wmask]
228 and eax,7fffh
229 mov ax,word ptr[esi+eax*2]
230; if cur_match > limit, go to exitloop
231 cmp ecx,eax
232jnbexitloopshort1:
233 jnb exitloop
234; if --chain_length != 0, go to exitloop
235
236
237; begin the main loop
238beginloop2:
239 sub chain_length,16+1
240; if chain_length <=16, don't use the unrolled loop
241 jna normalbeg0add16
242
243do16:
244 cmp word ptr[edi+eax],bx
245 je normalbeg2dc0
246
247maccn MACRO lab
248 and eax,7fffh
249 mov ax,word ptr[esi+eax*2]
250 cmp ecx,eax
251 jnb exitloop
252 cmp word ptr[edi+eax],bx
253 je lab
254 ENDM
255
256rcontloop0:
257 maccn normalbeg2dc1
258
259rcontloop1:
260 maccn normalbeg2dc2
261
262rcontloop2:
263 maccn normalbeg2dc3
264
265rcontloop3:
266 maccn normalbeg2dc4
267
268rcontloop4:
269 maccn normalbeg2dc5
270
271rcontloop5:
272 maccn normalbeg2dc6
273
274rcontloop6:
275 maccn normalbeg2dc7
276
277rcontloop7:
278 maccn normalbeg2dc8
279
280rcontloop8:
281 maccn normalbeg2dc9
282
283rcontloop9:
284 maccn normalbeg2dc10
285
286rcontloop10:
287 maccn short normalbeg2dc11
288
289rcontloop11:
290 maccn short normalbeg2dc12
291
292rcontloop12:
293 maccn short normalbeg2dc13
294
295rcontloop13:
296 maccn short normalbeg2dc14
297
298rcontloop14:
299 maccn short normalbeg2dc15
300
301rcontloop15:
302 and eax,7fffh
303 mov ax,word ptr[esi+eax*2]
304 cmp ecx,eax
305 jnb exitloop
306
307 sub chain_length,16
308 ja do16
309 jmp normalbeg0add16
310
311;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
312
313normbeg MACRO rcontlab,valsub
314; if we are here, we know that *(match+best_len-1) == scan_end
315 cmp bp,word ptr[edx+eax]
316; if (match != scan_start) goto rcontlab
317 jne rcontlab
318; calculate the good chain_length, and we'll compare scan and match string
319 add chain_length,16-valsub
320 jmp iseq
321 ENDM
322
323
324normalbeg2dc11:
325 normbeg rcontloop11,11
326
327normalbeg2dc12:
328 normbeg short rcontloop12,12
329
330normalbeg2dc13:
331 normbeg short rcontloop13,13
332
333normalbeg2dc14:
334 normbeg short rcontloop14,14
335
336normalbeg2dc15:
337 normbeg short rcontloop15,15
338
339normalbeg2dc10:
340 normbeg rcontloop10,10
341
342normalbeg2dc9:
343 normbeg rcontloop9,9
344
345normalbeg2dc8:
346 normbeg rcontloop8,8
347
348normalbeg2dc7:
349 normbeg rcontloop7,7
350
351normalbeg2dc6:
352 normbeg rcontloop6,6
353
354normalbeg2dc5:
355 normbeg rcontloop5,5
356
357normalbeg2dc4:
358 normbeg rcontloop4,4
359
360normalbeg2dc3:
361 normbeg rcontloop3,3
362
363normalbeg2dc2:
364 normbeg rcontloop2,2
365
366normalbeg2dc1:
367 normbeg rcontloop1,1
368
369normalbeg2dc0:
370 normbeg rcontloop0,0
371
372
373; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
374
375normalbeg2:
376 mov edi,window
377
378 cmp bp,word ptr[edi+eax]
379 jne contloop3 ; if *(ushf*)match != scan_start, continue
380
381iseq:
382; if we are here, we know that *(match+best_len-1) == scan_end
383; and (match == scan_start)
384
385 mov edi,edx
386 mov esi,scan ; esi = scan
387 add edi,eax ; edi = window + cur_match = match
388
389 mov edx,[esi+3] ; compare manually dword at match+3
390 xor edx,[edi+3] ; and scan +3
391
392 jz begincompare ; if equal, go to long compare
393
394; we will determine the unmatch byte and calculate len (in esi)
395 or dl,dl
396 je eq1rr
397 mov esi,3
398 jmp trfinval
399eq1rr:
400 or dx,dx
401 je eq1
402
403 mov esi,4
404 jmp trfinval
405eq1:
406 and edx,0ffffffh
407 jz eq11
408 mov esi,5
409 jmp trfinval
410eq11:
411 mov esi,6
412 jmp trfinval
413
414begincompare:
415 ; here we now scan and match begin same
416 add edi,6
417 add esi,6
418 mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
419 repe cmpsd ; loop until mismatch
420
421 je trfin ; go to trfin if not unmatch
422; we determine the unmatch byte
423 sub esi,4
424 mov edx,[edi-4]
425 xor edx,[esi]
426
427 or dl,dl
428 jnz trfin
429 inc esi
430
431 or dx,dx
432 jnz trfin
433 inc esi
434
435 and edx,0ffffffh
436 jnz trfin
437 inc esi
438
439trfin:
440 sub esi,scan ; esi = len
441trfinval:
442; here we have finised compare, and esi contain len of equal string
443 cmp esi,best_len ; if len > best_len, go newbestlen
444 ja short newbestlen
445; now we restore edx, ecx and esi, for the big loop
446 mov esi,prev
447 mov ecx,limit
448 mov edx,window
449 jmp contloop3
450
451newbestlen:
452 mov best_len,esi ; len become best_len
453
454 mov match_start,eax ; save new position as match_start
455 cmp esi,nice_match ; if best_len >= nice_match, exit
456 jae exitloop
457 mov ecx,scan
458 mov edx,window ; restore edx=window
459 add ecx,esi
460 add esi,edx
461
462 dec esi
463 mov windowlen,esi ; windowlen = window + best_len-1
464 mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
465
466; now we restore ecx and esi, for the big loop :
467 mov esi,prev
468 mov ecx,limit
469 jmp contloop3
470
471exitloop:
472; exit : s->match_start=match_start
473 mov ebx,match_start
474 mov ebp,str_s
475 mov ecx,best_len
476 mov dword ptr [ebp+dep_match_start],ebx
477 mov eax,dword ptr [ebp+dep_lookahead]
478 cmp ecx,eax
479 ja minexlo
480 mov eax,ecx
481minexlo:
482; return min(best_len,s->lookahead)
483
484; restore stack and register ebx,esi,edi,ebp
485 add esp,NbStackAdd
486
487 pop ebx
488 pop esi
489 pop edi
490 pop ebp
491 ret
492InfoAuthor:
493; please don't remove this string !
494; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
495 db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
496
497
498
499IFDEF NOUNDERLINE
500longest_match_7fff endp
501ELSE
502_longest_match_7fff endp
503ENDIF
504
505
506IFDEF NOUNDERLINE
507cpudetect32 proc near
508ELSE
509_cpudetect32 proc near
510ENDIF
511
512 push ebx
513
514 pushfd ; push original EFLAGS
515 pop eax ; get original EFLAGS
516 mov ecx, eax ; save original EFLAGS
517 xor eax, 40000h ; flip AC bit in EFLAGS
518 push eax ; save new EFLAGS value on stack
519 popfd ; replace current EFLAGS value
520 pushfd ; get new EFLAGS
521 pop eax ; store new EFLAGS in EAX
522 xor eax, ecx ; can’t toggle AC bit, processor=80386
523 jz end_cpu_is_386 ; jump if 80386 processor
524 push ecx
525 popfd ; restore AC bit in EFLAGS first
526
527 pushfd
528 pushfd
529 pop ecx
530
531 mov eax, ecx ; get original EFLAGS
532 xor eax, 200000h ; flip ID bit in EFLAGS
533 push eax ; save new EFLAGS value on stack
534 popfd ; replace current EFLAGS value
535 pushfd ; get new EFLAGS
536 pop eax ; store new EFLAGS in EAX
537 popfd ; restore original EFLAGS
538 xor eax, ecx ; can’t toggle ID bit,
539 je is_old_486 ; processor=old
540
541 mov eax,1
542 db 0fh,0a2h ;CPUID
543
544exitcpudetect:
545 pop ebx
546 ret
547
548end_cpu_is_386:
549 mov eax,0300h
550 jmp exitcpudetect
551
552is_old_486:
553 mov eax,0400h
554 jmp exitcpudetect
555
556IFDEF NOUNDERLINE
557cpudetect32 endp
558ELSE
559_cpudetect32 endp
560ENDIF
561
562
563
564
565MAX_MATCH equ 258
566MIN_MATCH equ 3
567MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
568MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
569
570
571;;; stack frame offsets
572
573chainlenwmask equ esp + 0 ; high word: current chain len
574 ; low word: s->wmask
575window equ esp + 4 ; local copy of s->window
576windowbestlen equ esp + 8 ; s->window + bestlen
577scanstart equ esp + 16 ; first two bytes of string
578scanend equ esp + 12 ; last two bytes of string
579scanalign equ esp + 20 ; dword-misalignment of string
580nicematch equ esp + 24 ; a good enough match size
581bestlen equ esp + 28 ; size of best match so far
582scan equ esp + 32 ; ptr to string wanting match
583
584LocalVarsSize equ 36
585; saved ebx byte esp + 36
586; saved edi byte esp + 40
587; saved esi byte esp + 44
588; saved ebp byte esp + 48
589; return address byte esp + 52
590deflatestate equ esp + 56 ; the function arguments
591curmatch equ esp + 60
592
593;;; Offsets for fields in the deflate_state structure. These numbers
594;;; are calculated from the definition of deflate_state, with the
595;;; assumption that the compiler will dword-align the fields. (Thus,
596;;; changing the definition of deflate_state could easily cause this
597;;; program to crash horribly, without so much as a warning at
598;;; compile time. Sigh.)
599
600dsWSize equ 36
601dsWMask equ 44
602dsWindow equ 48
603dsPrev equ 56
604dsMatchLen equ 88
605dsPrevMatch equ 92
606dsStrStart equ 100
607dsMatchStart equ 104
608dsLookahead equ 108
609dsPrevLen equ 112
610dsMaxChainLen equ 116
611dsGoodMatch equ 132
612dsNiceMatch equ 136
613
614
615;;; match.asm -- Pentium-Pro-optimized version of longest_match()
616;;; Written for zlib 1.1.2
617;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
618;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
619;;;
620;;; This is free software; you can redistribute it and/or modify it
621;;; under the terms of the GNU General Public License.
622
623;GLOBAL _longest_match, _match_init
624
625
626;SECTION .text
627
628;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
629
630;_longest_match:
631IFDEF NOUNDERLINE
632longest_match_686 proc near
633ELSE
634_longest_match_686 proc near
635ENDIF
636
637
638;;; Save registers that the compiler may be using, and adjust esp to
639;;; make room for our stack frame.
640
641 push ebp
642 push edi
643 push esi
644 push ebx
645 sub esp, LocalVarsSize
646
647;;; Retrieve the function arguments. ecx will hold cur_match
648;;; throughout the entire function. edx will hold the pointer to the
649;;; deflate_state structure during the function's setup (before
650;;; entering the main loop.
651
652 mov edx, [deflatestate]
653 mov ecx, [curmatch]
654
655;;; uInt wmask = s->w_mask;
656;;; unsigned chain_length = s->max_chain_length;
657;;; if (s->prev_length >= s->good_match) {
658;;; chain_length >>= 2;
659;;; }
660
661 mov eax, [edx + dsPrevLen]
662 mov ebx, [edx + dsGoodMatch]
663 cmp eax, ebx
664 mov eax, [edx + dsWMask]
665 mov ebx, [edx + dsMaxChainLen]
666 jl LastMatchGood
667 shr ebx, 2
668LastMatchGood:
669
670;;; chainlen is decremented once beforehand so that the function can
671;;; use the sign flag instead of the zero flag for the exit test.
672;;; It is then shifted into the high word, to make room for the wmask
673;;; value, which it will always accompany.
674
675 dec ebx
676 shl ebx, 16
677 or ebx, eax
678 mov [chainlenwmask], ebx
679
680;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
681
682 mov eax, [edx + dsNiceMatch]
683 mov ebx, [edx + dsLookahead]
684 cmp ebx, eax
685 jl LookaheadLess
686 mov ebx, eax
687LookaheadLess: mov [nicematch], ebx
688
689;;; register Bytef *scan = s->window + s->strstart;
690
691 mov esi, [edx + dsWindow]
692 mov [window], esi
693 mov ebp, [edx + dsStrStart]
694 lea edi, [esi + ebp]
695 mov [scan], edi
696
697;;; Determine how many bytes the scan ptr is off from being
698;;; dword-aligned.
699
700 mov eax, edi
701 neg eax
702 and eax, 3
703 mov [scanalign], eax
704
705;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
706;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
707
708 mov eax, [edx + dsWSize]
709 sub eax, MIN_LOOKAHEAD
710 sub ebp, eax
711 jg LimitPositive
712 xor ebp, ebp
713LimitPositive:
714
715;;; int best_len = s->prev_length;
716
717 mov eax, [edx + dsPrevLen]
718 mov [bestlen], eax
719
720;;; Store the sum of s->window + best_len in esi locally, and in esi.
721
722 add esi, eax
723 mov [windowbestlen], esi
724
725;;; register ush scan_start = *(ushf*)scan;
726;;; register ush scan_end = *(ushf*)(scan+best_len-1);
727;;; Posf *prev = s->prev;
728
729 movzx ebx, word ptr [edi]
730 mov [scanstart], ebx
731 movzx ebx, word ptr [edi + eax - 1]
732 mov [scanend], ebx
733 mov edi, [edx + dsPrev]
734
735;;; Jump into the main loop.
736
737 mov edx, [chainlenwmask]
738 jmp short LoopEntry
739
740align 4
741
742;;; do {
743;;; match = s->window + cur_match;
744;;; if (*(ushf*)(match+best_len-1) != scan_end ||
745;;; *(ushf*)match != scan_start) continue;
746;;; [...]
747;;; } while ((cur_match = prev[cur_match & wmask]) > limit
748;;; && --chain_length != 0);
749;;;
750;;; Here is the inner loop of the function. The function will spend the
751;;; majority of its time in this loop, and majority of that time will
752;;; be spent in the first ten instructions.
753;;;
754;;; Within this loop:
755;;; ebx = scanend
756;;; ecx = curmatch
757;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
758;;; esi = windowbestlen - i.e., (window + bestlen)
759;;; edi = prev
760;;; ebp = limit
761
762LookupLoop:
763 and ecx, edx
764 movzx ecx, word ptr [edi + ecx*2]
765 cmp ecx, ebp
766 jbe LeaveNow
767 sub edx, 00010000h
768 js LeaveNow
769LoopEntry: movzx eax, word ptr [esi + ecx - 1]
770 cmp eax, ebx
771 jnz LookupLoop
772 mov eax, [window]
773 movzx eax, word ptr [eax + ecx]
774 cmp eax, [scanstart]
775 jnz LookupLoop
776
777;;; Store the current value of chainlen.
778
779 mov [chainlenwmask], edx
780
781;;; Point edi to the string under scrutiny, and esi to the string we
782;;; are hoping to match it up with. In actuality, esi and edi are
783;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
784;;; initialized to -(MAX_MATCH_8 - scanalign).
785
786 mov esi, [window]
787 mov edi, [scan]
788 add esi, ecx
789 mov eax, [scanalign]
790 mov edx, 0fffffef8h; -(MAX_MATCH_8)
791 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
792 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
793
794;;; Test the strings for equality, 8 bytes at a time. At the end,
795;;; adjust edx so that it is offset to the exact byte that mismatched.
796;;;
797;;; We already know at this point that the first three bytes of the
798;;; strings match each other, and they can be safely passed over before
799;;; starting the compare loop. So what this code does is skip over 0-3
800;;; bytes, as much as necessary in order to dword-align the edi
801;;; pointer. (esi will still be misaligned three times out of four.)
802;;;
803;;; It should be confessed that this loop usually does not represent
804;;; much of the total running time. Replacing it with a more
805;;; straightforward "rep cmpsb" would not drastically degrade
806;;; performance.
807
808LoopCmps:
809 mov eax, [esi + edx]
810 xor eax, [edi + edx]
811 jnz LeaveLoopCmps
812 mov eax, [esi + edx + 4]
813 xor eax, [edi + edx + 4]
814 jnz LeaveLoopCmps4
815 add edx, 8
816 jnz LoopCmps
817 jmp short LenMaximum
818LeaveLoopCmps4: add edx, 4
819LeaveLoopCmps: test eax, 0000FFFFh
820 jnz LenLower
821 add edx, 2
822 shr eax, 16
823LenLower: sub al, 1
824 adc edx, 0
825
826;;; Calculate the length of the match. If it is longer than MAX_MATCH,
827;;; then automatically accept it as the best possible match and leave.
828
829 lea eax, [edi + edx]
830 mov edi, [scan]
831 sub eax, edi
832 cmp eax, MAX_MATCH
833 jge LenMaximum
834
835;;; If the length of the match is not longer than the best match we
836;;; have so far, then forget it and return to the lookup loop.
837
838 mov edx, [deflatestate]
839 mov ebx, [bestlen]
840 cmp eax, ebx
841 jg LongerMatch
842 mov esi, [windowbestlen]
843 mov edi, [edx + dsPrev]
844 mov ebx, [scanend]
845 mov edx, [chainlenwmask]
846 jmp LookupLoop
847
848;;; s->match_start = cur_match;
849;;; best_len = len;
850;;; if (len >= nice_match) break;
851;;; scan_end = *(ushf*)(scan+best_len-1);
852
853LongerMatch: mov ebx, [nicematch]
854 mov [bestlen], eax
855 mov [edx + dsMatchStart], ecx
856 cmp eax, ebx
857 jge LeaveNow
858 mov esi, [window]
859 add esi, eax
860 mov [windowbestlen], esi
861 movzx ebx, word ptr [edi + eax - 1]
862 mov edi, [edx + dsPrev]
863 mov [scanend], ebx
864 mov edx, [chainlenwmask]
865 jmp LookupLoop
866
867;;; Accept the current string, with the maximum possible length.
868
869LenMaximum: mov edx, [deflatestate]
870 mov dword ptr [bestlen], MAX_MATCH
871 mov [edx + dsMatchStart], ecx
872
873;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
874;;; return s->lookahead;
875
876LeaveNow:
877 mov edx, [deflatestate]
878 mov ebx, [bestlen]
879 mov eax, [edx + dsLookahead]
880 cmp ebx, eax
881 jg LookaheadRet
882 mov eax, ebx
883LookaheadRet:
884
885;;; Restore the stack and return from whence we came.
886
887 add esp, LocalVarsSize
888 pop ebx
889 pop esi
890 pop edi
891 pop ebp
892
893 ret
894; please don't remove this string !
895; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!
896 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
897
898IFDEF NOUNDERLINE
899longest_match_686 endp
900ELSE
901_longest_match_686 endp
902ENDIF
903
904_TEXT ends
905end
diff --git a/contrib/masmx86/gvmat32c.c b/contrib/masmx86/gvmat32c.c
new file mode 100644
index 0000000..9ed25f3
--- /dev/null
+++ b/contrib/masmx86/gvmat32c.c
@@ -0,0 +1,206 @@
1/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86
2 * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
3 * File written by Gilles Vollant, by modifiying the longest_match
4 * from Jean-loup Gailly in deflate.c
5 * it prepare all parameters and call the assembly longest_match_gvasm
6 * longest_match execute standard C code is wmask != 0x7fff
7 * (assembly code is faster with a fixed wmask)
8 *
9 */
10
11#include "deflate.h"
12
13#ifdef ASMV
14#define NIL 0
15
16#define UNALIGNED_OK
17
18
19/* if your C compiler don't add underline before function name,
20 define ADD_UNDERLINE_ASMFUNC */
21#ifdef ADD_UNDERLINE_ASMFUNC
22#define longest_match_7fff _longest_match_7fff
23#define longest_match_686 _longest_match_686
24#define cpudetect32 _cpudetect32
25#endif
26
27
28
29void match_init()
30{
31}
32
33unsigned long cpudetect32();
34
35uInt longest_match_c(
36 deflate_state *s,
37 IPos cur_match); /* current match */
38
39
40uInt longest_match_7fff(
41 deflate_state *s,
42 IPos cur_match); /* current match */
43
44uInt longest_match_686(
45 deflate_state *s,
46 IPos cur_match); /* current match */
47
48uInt longest_match(
49 deflate_state *s,
50 IPos cur_match) /* current match */
51{
52 static uInt iIsPPro=2;
53
54 if ((s->w_mask == 0x7fff) && (iIsPPro==0))
55 return longest_match_7fff(s,cur_match);
56
57 if (iIsPPro==1)
58 return longest_match_686(s,cur_match);
59
60 if (iIsPPro==2)
61 iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0;
62
63 return longest_match_c(s,cur_match);
64}
65
66
67
68uInt longest_match_c(s, cur_match)
69 deflate_state *s;
70 IPos cur_match; /* current match */
71{
72 unsigned chain_length = s->max_chain_length;/* max hash chain length */
73 register Bytef *scan = s->window + s->strstart; /* current string */
74 register Bytef *match; /* matched string */
75 register int len; /* length of current match */
76 int best_len = s->prev_length; /* best match length so far */
77 int nice_match = s->nice_match; /* stop if match long enough */
78 IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
79 s->strstart - (IPos)MAX_DIST(s) : NIL;
80 /* Stop when cur_match becomes <= limit. To simplify the code,
81 * we prevent matches with the string of window index 0.
82 */
83 Posf *prev = s->prev;
84 uInt wmask = s->w_mask;
85
86#ifdef UNALIGNED_OK
87 /* Compare two bytes at a time. Note: this is not always beneficial.
88 * Try with and without -DUNALIGNED_OK to check.
89 */
90 register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
91 register ush scan_start = *(ushf*)scan;
92 register ush scan_end = *(ushf*)(scan+best_len-1);
93#else
94 register Bytef *strend = s->window + s->strstart + MAX_MATCH;
95 register Byte scan_end1 = scan[best_len-1];
96 register Byte scan_end = scan[best_len];
97#endif
98
99 /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
100 * It is easy to get rid of this optimization if necessary.
101 */
102 Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
103
104 /* Do not waste too much time if we already have a good match: */
105 if (s->prev_length >= s->good_match) {
106 chain_length >>= 2;
107 }
108 /* Do not look for matches beyond the end of the input. This is necessary
109 * to make deflate deterministic.
110 */
111 if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
112
113 Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
114
115 do {
116 Assert(cur_match < s->strstart, "no future");
117 match = s->window + cur_match;
118
119 /* Skip to next match if the match length cannot increase
120 * or if the match length is less than 2:
121 */
122#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
123 /* This code assumes sizeof(unsigned short) == 2. Do not use
124 * UNALIGNED_OK if your compiler uses a different size.
125 */
126 if (*(ushf*)(match+best_len-1) != scan_end ||
127 *(ushf*)match != scan_start) continue;
128
129 /* It is not necessary to compare scan[2] and match[2] since they are
130 * always equal when the other bytes match, given that the hash keys
131 * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
132 * strstart+3, +5, ... up to strstart+257. We check for insufficient
133 * lookahead only every 4th comparison; the 128th check will be made
134 * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
135 * necessary to put more guard bytes at the end of the window, or
136 * to check more often for insufficient lookahead.
137 */
138 Assert(scan[2] == match[2], "scan[2]?");
139 scan++, match++;
140 do {
141 } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
142 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
143 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
144 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
145 scan < strend);
146 /* The funny "do {}" generates better code on most compilers */
147
148 /* Here, scan <= window+strstart+257 */
149 Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
150 if (*scan == *match) scan++;
151
152 len = (MAX_MATCH - 1) - (int)(strend-scan);
153 scan = strend - (MAX_MATCH-1);
154
155#else /* UNALIGNED_OK */
156
157 if (match[best_len] != scan_end ||
158 match[best_len-1] != scan_end1 ||
159 *match != *scan ||
160 *++match != scan[1]) continue;
161
162 /* The check at best_len-1 can be removed because it will be made
163 * again later. (This heuristic is not always a win.)
164 * It is not necessary to compare scan[2] and match[2] since they
165 * are always equal when the other bytes match, given that
166 * the hash keys are equal and that HASH_BITS >= 8.
167 */
168 scan += 2, match++;
169 Assert(*scan == *match, "match[2]?");
170
171 /* We check for insufficient lookahead only every 8th comparison;
172 * the 256th check will be made at strstart+258.
173 */
174 do {
175 } while (*++scan == *++match && *++scan == *++match &&
176 *++scan == *++match && *++scan == *++match &&
177 *++scan == *++match && *++scan == *++match &&
178 *++scan == *++match && *++scan == *++match &&
179 scan < strend);
180
181 Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
182
183 len = MAX_MATCH - (int)(strend - scan);
184 scan = strend - MAX_MATCH;
185
186#endif /* UNALIGNED_OK */
187
188 if (len > best_len) {
189 s->match_start = cur_match;
190 best_len = len;
191 if (len >= nice_match) break;
192#ifdef UNALIGNED_OK
193 scan_end = *(ushf*)(scan+best_len-1);
194#else
195 scan_end1 = scan[best_len-1];
196 scan_end = scan[best_len];
197#endif
198 }
199 } while ((cur_match = prev[cur_match & wmask]) > limit
200 && --chain_length != 0);
201
202 if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
203 return s->lookahead;
204}
205
206#endif /* ASMV */
diff --git a/contrib/masmx86/inffas32.asm b/contrib/masmx86/inffas32.asm
new file mode 100644
index 0000000..7d76e1c
--- /dev/null
+++ b/contrib/masmx86/inffas32.asm
@@ -0,0 +1,1033 @@
1; 75 "inffast.S"
2;FILE "inffast.S"
3
4;;;GLOBAL _inflate_fast
5
6;;;SECTION .text
7
8
9
10 .586p
11 .mmx
12
13 name inflate_fast_x86
14 .MODEL FLAT
15
16_DATA segment
17inflate_fast_use_mmx:
18 dd 1
19
20
21_TEXT segment
22PUBLIC _inflate_fast
23
24ALIGN 4
25_inflate_fast:
26 jmp inflate_fast_entry
27
28
29
30ALIGN 4
31 db 'Fast decoding Code from Chris Anderson'
32 db 0
33
34ALIGN 4
35invalid_literal_length_code_msg:
36 db 'invalid literal/length code'
37 db 0
38
39ALIGN 4
40invalid_distance_code_msg:
41 db 'invalid distance code'
42 db 0
43
44ALIGN 4
45invalid_distance_too_far_msg:
46 db 'invalid distance too far back'
47 db 0
48
49
50ALIGN 4
51inflate_fast_mask:
52dd 0
53dd 1
54dd 3
55dd 7
56dd 15
57dd 31
58dd 63
59dd 127
60dd 255
61dd 511
62dd 1023
63dd 2047
64dd 4095
65dd 8191
66dd 16383
67dd 32767
68dd 65535
69dd 131071
70dd 262143
71dd 524287
72dd 1048575
73dd 2097151
74dd 4194303
75dd 8388607
76dd 16777215
77dd 33554431
78dd 67108863
79dd 134217727
80dd 268435455
81dd 536870911
82dd 1073741823
83dd 2147483647
84dd 4294967295
85
86
87
88mode_state equ 0 ;/* state->mode */
89wsize_state equ 32 ;/* state->wsize */
90write_state equ (36+4) ;/* state->write */
91window_state equ (40+4) ;/* state->window */
92hold_state equ (44+4) ;/* state->hold */
93bits_state equ (48+4) ;/* state->bits */
94lencode_state equ (64+4) ;/* state->lencode */
95distcode_state equ (68+4) ;/* state->distcode */
96lenbits_state equ (72+4) ;/* state->lenbits */
97distbits_state equ (76+4) ;/* state->distbits */
98
99
100;;SECTION .text
101; 205 "inffast.S"
102;GLOBAL inflate_fast_use_mmx
103
104;SECTION .data
105
106
107; GLOBAL inflate_fast_use_mmx:object
108;.size inflate_fast_use_mmx, 4
109; 226 "inffast.S"
110;SECTION .text
111
112ALIGN 4
113inflate_fast_entry:
114 push edi
115 push esi
116 push ebp
117 push ebx
118 pushfd
119 sub esp,64
120 cld
121
122
123
124
125 mov esi, [esp+88]
126 mov edi, [esi+28]
127
128
129
130
131
132
133
134 mov edx, [esi+4]
135 mov eax, [esi+0]
136
137 add edx,eax
138 sub edx,11
139
140 mov [esp+44],eax
141 mov [esp+20],edx
142
143 mov ebp, [esp+92]
144 mov ecx, [esi+16]
145 mov ebx, [esi+12]
146
147 sub ebp,ecx
148 neg ebp
149 add ebp,ebx
150
151 sub ecx,257
152 add ecx,ebx
153
154 mov [esp+60],ebx
155 mov [esp+40],ebp
156 mov [esp+16],ecx
157; 285 "inffast.S"
158 mov eax, [edi+lencode_state]
159 mov ecx, [edi+distcode_state]
160
161 mov [esp+8],eax
162 mov [esp+12],ecx
163
164 mov eax,1
165 mov ecx, [edi+lenbits_state]
166 shl eax,cl
167 dec eax
168 mov [esp+0],eax
169
170 mov eax,1
171 mov ecx, [edi+distbits_state]
172 shl eax,cl
173 dec eax
174 mov [esp+4],eax
175
176 mov eax, [edi+wsize_state]
177 mov ecx, [edi+write_state]
178 mov edx, [edi+window_state]
179
180 mov [esp+52],eax
181 mov [esp+48],ecx
182 mov [esp+56],edx
183
184 mov ebp, [edi+hold_state]
185 mov ebx, [edi+bits_state]
186; 321 "inffast.S"
187 mov esi, [esp+44]
188 mov ecx, [esp+20]
189 cmp ecx,esi
190 ja L_align_long
191
192 add ecx,11
193 sub ecx,esi
194 mov eax,12
195 sub eax,ecx
196 lea edi, [esp+28]
197 rep movsb
198 mov ecx,eax
199 xor eax,eax
200 rep stosb
201 lea esi, [esp+28]
202 mov [esp+20],esi
203 jmp L_is_aligned
204
205
206L_align_long:
207 test esi,3
208 jz L_is_aligned
209 xor eax,eax
210 mov al, [esi]
211 inc esi
212 mov ecx,ebx
213 add ebx,8
214 shl eax,cl
215 or ebp,eax
216 jmp L_align_long
217
218L_is_aligned:
219 mov edi, [esp+60]
220; 366 "inffast.S"
221L_check_mmx:
222 cmp dword ptr [inflate_fast_use_mmx],2
223 je L_init_mmx
224 ja L_do_loop
225
226 push eax
227 push ebx
228 push ecx
229 push edx
230 pushfd
231 mov eax, [esp]
232 xor dword ptr [esp],0200000h
233
234
235
236
237 popfd
238 pushfd
239 pop edx
240 xor edx,eax
241 jz L_dont_use_mmx
242 xor eax,eax
243 cpuid
244 cmp ebx,0756e6547h
245 jne L_dont_use_mmx
246 cmp ecx,06c65746eh
247 jne L_dont_use_mmx
248 cmp edx,049656e69h
249 jne L_dont_use_mmx
250 mov eax,1
251 cpuid
252 shr eax,8
253 and eax,15
254 cmp eax,6
255 jne L_dont_use_mmx
256 test edx,0800000h
257 jnz L_use_mmx
258 jmp L_dont_use_mmx
259L_use_mmx:
260 mov dword ptr [inflate_fast_use_mmx],2
261 jmp L_check_mmx_pop
262L_dont_use_mmx:
263 mov dword ptr [inflate_fast_use_mmx],3
264L_check_mmx_pop:
265 pop edx
266 pop ecx
267 pop ebx
268 pop eax
269 jmp L_check_mmx
270; 426 "inffast.S"
271ALIGN 4
272L_do_loop:
273; 437 "inffast.S"
274 cmp bl,15
275 ja L_get_length_code
276
277 xor eax,eax
278 lodsw
279 mov cl,bl
280 add bl,16
281 shl eax,cl
282 or ebp,eax
283
284L_get_length_code:
285 mov edx, [esp+0]
286 mov ecx, [esp+8]
287 and edx,ebp
288 mov eax, [ecx+edx*4]
289
290L_dolen:
291
292
293
294
295
296
297 mov cl,ah
298 sub bl,ah
299 shr ebp,cl
300
301
302
303
304
305
306 test al,al
307 jnz L_test_for_length_base
308
309 shr eax,16
310 stosb
311
312L_while_test:
313
314
315 cmp [esp+16],edi
316 jbe L_break_loop
317
318 cmp [esp+20],esi
319 ja L_do_loop
320 jmp L_break_loop
321
322L_test_for_length_base:
323; 502 "inffast.S"
324 mov edx,eax
325 shr edx,16
326 mov cl,al
327
328 test al,16
329 jz L_test_for_second_level_length
330 and cl,15
331 jz L_save_len
332 cmp bl,cl
333 jae L_add_bits_to_len
334
335 mov ch,cl
336 xor eax,eax
337 lodsw
338 mov cl,bl
339 add bl,16
340 shl eax,cl
341 or ebp,eax
342 mov cl,ch
343
344L_add_bits_to_len:
345 mov eax,1
346 shl eax,cl
347 dec eax
348 sub bl,cl
349 and eax,ebp
350 shr ebp,cl
351 add edx,eax
352
353L_save_len:
354 mov [esp+24],edx
355
356
357L_decode_distance:
358; 549 "inffast.S"
359 cmp bl,15
360 ja L_get_distance_code
361
362 xor eax,eax
363 lodsw
364 mov cl,bl
365 add bl,16
366 shl eax,cl
367 or ebp,eax
368
369L_get_distance_code:
370 mov edx, [esp+4]
371 mov ecx, [esp+12]
372 and edx,ebp
373 mov eax, [ecx+edx*4]
374
375
376L_dodist:
377 mov edx,eax
378 shr edx,16
379 mov cl,ah
380 sub bl,ah
381 shr ebp,cl
382; 584 "inffast.S"
383 mov cl,al
384
385 test al,16
386 jz L_test_for_second_level_dist
387 and cl,15
388 jz L_check_dist_one
389 cmp bl,cl
390 jae L_add_bits_to_dist
391
392 mov ch,cl
393 xor eax,eax
394 lodsw
395 mov cl,bl
396 add bl,16
397 shl eax,cl
398 or ebp,eax
399 mov cl,ch
400
401L_add_bits_to_dist:
402 mov eax,1
403 shl eax,cl
404 dec eax
405 sub bl,cl
406 and eax,ebp
407 shr ebp,cl
408 add edx,eax
409 jmp L_check_window
410
411L_check_window:
412; 625 "inffast.S"
413 mov [esp+44],esi
414 mov eax,edi
415 sub eax, [esp+40]
416
417 cmp eax,edx
418 jb L_clip_window
419
420 mov ecx, [esp+24]
421 mov esi,edi
422 sub esi,edx
423
424 sub ecx,3
425 mov al, [esi]
426 mov [edi],al
427 mov al, [esi+1]
428 mov dl, [esi+2]
429 add esi,3
430 mov [edi+1],al
431 mov [edi+2],dl
432 add edi,3
433 rep movsb
434
435 mov esi, [esp+44]
436 jmp L_while_test
437
438ALIGN 4
439L_check_dist_one:
440 cmp edx,1
441 jne L_check_window
442 cmp [esp+40],edi
443 je L_check_window
444
445 dec edi
446 mov ecx, [esp+24]
447 mov al, [edi]
448 sub ecx,3
449
450 mov [edi+1],al
451 mov [edi+2],al
452 mov [edi+3],al
453 add edi,4
454 rep stosb
455
456 jmp L_while_test
457
458ALIGN 4
459L_test_for_second_level_length:
460
461
462
463
464 test al,64
465 jnz L_test_for_end_of_block
466
467 mov eax,1
468 shl eax,cl
469 dec eax
470 and eax,ebp
471 add eax,edx
472 mov edx, [esp+8]
473 mov eax, [edx+eax*4]
474 jmp L_dolen
475
476ALIGN 4
477L_test_for_second_level_dist:
478
479
480
481
482 test al,64
483 jnz L_invalid_distance_code
484
485 mov eax,1
486 shl eax,cl
487 dec eax
488 and eax,ebp
489 add eax,edx
490 mov edx, [esp+12]
491 mov eax, [edx+eax*4]
492 jmp L_dodist
493
494ALIGN 4
495L_clip_window:
496; 721 "inffast.S"
497 mov ecx,eax
498 mov eax, [esp+52]
499 neg ecx
500 mov esi, [esp+56]
501
502 cmp eax,edx
503 jb L_invalid_distance_too_far
504
505 add ecx,edx
506 cmp dword ptr [esp+48],0
507 jne L_wrap_around_window
508
509 sub eax,ecx
510 add esi,eax
511; 749 "inffast.S"
512 mov eax, [esp+24]
513 cmp eax,ecx
514 jbe L_do_copy1
515
516 sub eax,ecx
517 rep movsb
518 mov esi,edi
519 sub esi,edx
520 jmp L_do_copy1
521
522 cmp eax,ecx
523 jbe L_do_copy1
524
525 sub eax,ecx
526 rep movsb
527 mov esi,edi
528 sub esi,edx
529 jmp L_do_copy1
530
531L_wrap_around_window:
532; 793 "inffast.S"
533 mov eax, [esp+48]
534 cmp ecx,eax
535 jbe L_contiguous_in_window
536
537 add esi, [esp+52]
538 add esi,eax
539 sub esi,ecx
540 sub ecx,eax
541
542
543 mov eax, [esp+24]
544 cmp eax,ecx
545 jbe L_do_copy1
546
547 sub eax,ecx
548 rep movsb
549 mov esi, [esp+56]
550 mov ecx, [esp+48]
551 cmp eax,ecx
552 jbe L_do_copy1
553
554 sub eax,ecx
555 rep movsb
556 mov esi,edi
557 sub esi,edx
558 jmp L_do_copy1
559
560L_contiguous_in_window:
561; 836 "inffast.S"
562 add esi,eax
563 sub esi,ecx
564
565
566 mov eax, [esp+24]
567 cmp eax,ecx
568 jbe L_do_copy1
569
570 sub eax,ecx
571 rep movsb
572 mov esi,edi
573 sub esi,edx
574
575L_do_copy1:
576; 862 "inffast.S"
577 mov ecx,eax
578 rep movsb
579
580 mov esi, [esp+44]
581 jmp L_while_test
582; 878 "inffast.S"
583ALIGN 4
584L_init_mmx:
585 emms
586
587
588
589
590
591 movd mm0,ebp
592 mov ebp,ebx
593; 896 "inffast.S"
594 movd mm4,[esp+0]
595 movq mm3,mm4
596 movd mm5,[esp+4]
597 movq mm2,mm5
598 pxor mm1,mm1
599 mov ebx, [esp+8]
600 jmp L_do_loop_mmx
601
602ALIGN 4
603L_do_loop_mmx:
604 psrlq mm0,mm1
605
606 cmp ebp,32
607 ja L_get_length_code_mmx
608
609 movd mm6,ebp
610 movd mm7,[esi]
611 add esi,4
612 psllq mm7,mm6
613 add ebp,32
614 por mm0,mm7
615
616L_get_length_code_mmx:
617 pand mm4,mm0
618 movd eax,mm4
619 movq mm4,mm3
620 mov eax, [ebx+eax*4]
621
622L_dolen_mmx:
623 movzx ecx,ah
624 movd mm1,ecx
625 sub ebp,ecx
626
627 test al,al
628 jnz L_test_for_length_base_mmx
629
630 shr eax,16
631 stosb
632
633L_while_test_mmx:
634
635
636 cmp [esp+16],edi
637 jbe L_break_loop
638
639 cmp [esp+20],esi
640 ja L_do_loop_mmx
641 jmp L_break_loop
642
643L_test_for_length_base_mmx:
644
645 mov edx,eax
646 shr edx,16
647
648 test al,16
649 jz L_test_for_second_level_length_mmx
650 and eax,15
651 jz L_decode_distance_mmx
652
653 psrlq mm0,mm1
654 movd mm1,eax
655 movd ecx,mm0
656 sub ebp,eax
657 and ecx, [inflate_fast_mask+eax*4]
658 add edx,ecx
659
660L_decode_distance_mmx:
661 psrlq mm0,mm1
662
663 cmp ebp,32
664 ja L_get_dist_code_mmx
665
666 movd mm6,ebp
667 movd mm7,[esi]
668 add esi,4
669 psllq mm7,mm6
670 add ebp,32
671 por mm0,mm7
672
673L_get_dist_code_mmx:
674 mov ebx, [esp+12]
675 pand mm5,mm0
676 movd eax,mm5
677 movq mm5,mm2
678 mov eax, [ebx+eax*4]
679
680L_dodist_mmx:
681
682 movzx ecx,ah
683 mov ebx,eax
684 shr ebx,16
685 sub ebp,ecx
686 movd mm1,ecx
687
688 test al,16
689 jz L_test_for_second_level_dist_mmx
690 and eax,15
691 jz L_check_dist_one_mmx
692
693L_add_bits_to_dist_mmx:
694 psrlq mm0,mm1
695 movd mm1,eax
696 movd ecx,mm0
697 sub ebp,eax
698 and ecx, [inflate_fast_mask+eax*4]
699 add ebx,ecx
700
701L_check_window_mmx:
702 mov [esp+44],esi
703 mov eax,edi
704 sub eax, [esp+40]
705
706 cmp eax,ebx
707 jb L_clip_window_mmx
708
709 mov ecx,edx
710 mov esi,edi
711 sub esi,ebx
712
713 sub ecx,3
714 mov al, [esi]
715 mov [edi],al
716 mov al, [esi+1]
717 mov dl, [esi+2]
718 add esi,3
719 mov [edi+1],al
720 mov [edi+2],dl
721 add edi,3
722 rep movsb
723
724 mov esi, [esp+44]
725 mov ebx, [esp+8]
726 jmp L_while_test_mmx
727
728ALIGN 4
729L_check_dist_one_mmx:
730 cmp ebx,1
731 jne L_check_window_mmx
732 cmp [esp+40],edi
733 je L_check_window_mmx
734
735 dec edi
736 mov ecx,edx
737 mov al, [edi]
738 sub ecx,3
739
740 mov [edi+1],al
741 mov [edi+2],al
742 mov [edi+3],al
743 add edi,4
744 rep stosb
745
746 mov ebx, [esp+8]
747 jmp L_while_test_mmx
748
749ALIGN 4
750L_test_for_second_level_length_mmx:
751 test al,64
752 jnz L_test_for_end_of_block
753
754 and eax,15
755 psrlq mm0,mm1
756 movd ecx,mm0
757 and ecx, [inflate_fast_mask+eax*4]
758 add ecx,edx
759 mov eax, [ebx+ecx*4]
760 jmp L_dolen_mmx
761
762ALIGN 4
763L_test_for_second_level_dist_mmx:
764 test al,64
765 jnz L_invalid_distance_code
766
767 and eax,15
768 psrlq mm0,mm1
769 movd ecx,mm0
770 and ecx, [inflate_fast_mask+eax*4]
771 mov eax, [esp+12]
772 add ecx,ebx
773 mov eax, [eax+ecx*4]
774 jmp L_dodist_mmx
775
776ALIGN 4
777L_clip_window_mmx:
778
779 mov ecx,eax
780 mov eax, [esp+52]
781 neg ecx
782 mov esi, [esp+56]
783
784 cmp eax,ebx
785 jb L_invalid_distance_too_far
786
787 add ecx,ebx
788 cmp dword ptr [esp+48],0
789 jne L_wrap_around_window_mmx
790
791 sub eax,ecx
792 add esi,eax
793
794 cmp edx,ecx
795 jbe L_do_copy1_mmx
796
797 sub edx,ecx
798 rep movsb
799 mov esi,edi
800 sub esi,ebx
801 jmp L_do_copy1_mmx
802
803 cmp edx,ecx
804 jbe L_do_copy1_mmx
805
806 sub edx,ecx
807 rep movsb
808 mov esi,edi
809 sub esi,ebx
810 jmp L_do_copy1_mmx
811
812L_wrap_around_window_mmx:
813
814 mov eax, [esp+48]
815 cmp ecx,eax
816 jbe L_contiguous_in_window_mmx
817
818 add esi, [esp+52]
819 add esi,eax
820 sub esi,ecx
821 sub ecx,eax
822
823
824 cmp edx,ecx
825 jbe L_do_copy1_mmx
826
827 sub edx,ecx
828 rep movsb
829 mov esi, [esp+56]
830 mov ecx, [esp+48]
831 cmp edx,ecx
832 jbe L_do_copy1_mmx
833
834 sub edx,ecx
835 rep movsb
836 mov esi,edi
837 sub esi,ebx
838 jmp L_do_copy1_mmx
839
840L_contiguous_in_window_mmx:
841
842 add esi,eax
843 sub esi,ecx
844
845
846 cmp edx,ecx
847 jbe L_do_copy1_mmx
848
849 sub edx,ecx
850 rep movsb
851 mov esi,edi
852 sub esi,ebx
853
854L_do_copy1_mmx:
855
856
857 mov ecx,edx
858 rep movsb
859
860 mov esi, [esp+44]
861 mov ebx, [esp+8]
862 jmp L_while_test_mmx
863; 1174 "inffast.S"
864L_invalid_distance_code:
865
866
867
868
869
870 mov ecx, invalid_distance_code_msg
871 mov edx,26
872 jmp L_update_stream_state
873
874L_test_for_end_of_block:
875
876
877
878
879
880 test al,32
881 jz L_invalid_literal_length_code
882
883 mov ecx,0
884 mov edx,11
885 jmp L_update_stream_state
886
887L_invalid_literal_length_code:
888
889
890
891
892
893 mov ecx, invalid_literal_length_code_msg
894 mov edx,26
895 jmp L_update_stream_state
896
897L_invalid_distance_too_far:
898
899
900
901 mov esi, [esp+44]
902 mov ecx, invalid_distance_too_far_msg
903 mov edx,26
904 jmp L_update_stream_state
905
906L_update_stream_state:
907
908 mov eax, [esp+88]
909 test ecx,ecx
910 jz L_skip_msg
911 mov [eax+24],ecx
912L_skip_msg:
913 mov eax, [eax+28]
914 mov [eax+mode_state],edx
915 jmp L_break_loop
916
917ALIGN 4
918L_break_loop:
919; 1243 "inffast.S"
920 cmp dword ptr [inflate_fast_use_mmx],2
921 jne L_update_next_in
922
923
924
925 mov ebx,ebp
926
927L_update_next_in:
928; 1266 "inffast.S"
929 mov eax, [esp+88]
930 mov ecx,ebx
931 mov edx, [eax+28]
932 shr ecx,3
933 sub esi,ecx
934 shl ecx,3
935 sub ebx,ecx
936 mov [eax+12],edi
937 mov [edx+bits_state],ebx
938 mov ecx,ebx
939
940 lea ebx, [esp+28]
941 cmp [esp+20],ebx
942 jne L_buf_not_used
943
944 sub esi,ebx
945 mov ebx, [eax+0]
946 mov [esp+20],ebx
947 add esi,ebx
948 mov ebx, [eax+4]
949 sub ebx,11
950 add [esp+20],ebx
951
952L_buf_not_used:
953 mov [eax+0],esi
954
955 mov ebx,1
956 shl ebx,cl
957 dec ebx
958
959
960
961
962
963 cmp dword ptr [inflate_fast_use_mmx],2
964 jne L_update_hold
965
966
967
968 psrlq mm0,mm1
969 movd ebp,mm0
970
971 emms
972
973L_update_hold:
974
975
976
977 and ebp,ebx
978 mov [edx+hold_state],ebp
979
980
981
982
983 mov ebx, [esp+20]
984 cmp ebx,esi
985 jbe L_last_is_smaller
986
987 sub ebx,esi
988 add ebx,11
989 mov [eax+4],ebx
990 jmp L_fixup_out
991L_last_is_smaller:
992 sub esi,ebx
993 neg esi
994 add esi,11
995 mov [eax+4],esi
996
997
998
999
1000L_fixup_out:
1001
1002 mov ebx, [esp+16]
1003 cmp ebx,edi
1004 jbe L_end_is_smaller
1005
1006 sub ebx,edi
1007 add ebx,257
1008 mov [eax+16],ebx
1009 jmp L_done
1010L_end_is_smaller:
1011 sub edi,ebx
1012 neg edi
1013 add edi,257
1014 mov [eax+16],edi
1015
1016
1017
1018
1019
1020L_done:
1021 add esp,64
1022 popfd
1023 pop ebx
1024 pop ebp
1025 pop esi
1026 pop edi
1027 ret
1028
1029
1030
1031
1032_TEXT ends
1033end
diff --git a/contrib/masmx86/mkasm.bat b/contrib/masmx86/mkasm.bat
new file mode 100755
index 0000000..f3fa0a0
--- /dev/null
+++ b/contrib/masmx86/mkasm.bat
@@ -0,0 +1,3 @@
1cl /I..\.. /O2 /c gvmat32c.c
2ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm
3ml /coff /Zi /c /Flinffas32.lst inffas32.asm
diff --git a/contrib/masmx86/readme.txt b/contrib/masmx86/readme.txt
new file mode 100644
index 0000000..7b57167
--- /dev/null
+++ b/contrib/masmx86/readme.txt
@@ -0,0 +1,21 @@
1
2Summary
3-------
4This directory contains ASM implementations of the functions
5longest_match() and inflate_fast().
6
7
8Use instructions
9----------------
10Copy these files into the zlib source directory, then run the
11appropriate makefile, as suggested below.
12
13
14Build instructions
15------------------
16* With Microsoft C and MASM:
17nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj"
18
19* With Borland C and TASM:
20make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" OBJPA="+gvmat32c.obj+gvmat32.obj+inffas32.obj"
21