summaryrefslogtreecommitdiff
path: root/contrib/masmx86
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-09 23:32:36 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-09 23:32:36 -0700
commit67cc20d0041a32bee12bd9eb20ae218f91b73f77 (patch)
treed7e1b94bd15c30efd57cf9036f5fe89306b6bba0 /contrib/masmx86
parent7751bd4c715ea8478113e34b49b5a794a4642e8e (diff)
downloadzlib-1.2.4-pre1.tar.gz
zlib-1.2.4-pre1.tar.bz2
zlib-1.2.4-pre1.zip
zlib 1.2.4-pre1v1.2.4-pre1
Diffstat (limited to 'contrib/masmx86')
-rw-r--r--contrib/masmx86/bld_ml32.bat2
-rw-r--r--contrib/masmx86/gvmat32.asm972
-rw-r--r--contrib/masmx86/gvmat32c.c62
-rw-r--r--contrib/masmx86/match686.asm478
-rw-r--r--contrib/masmx86/match686.objbin0 -> 3507 bytes
-rwxr-xr-xcontrib/masmx86/mkasm.bat3
-rw-r--r--contrib/masmx86/readme.txt4
7 files changed, 481 insertions, 1040 deletions
diff --git a/contrib/masmx86/bld_ml32.bat b/contrib/masmx86/bld_ml32.bat
index 36962e1..e1b86bf 100644
--- a/contrib/masmx86/bld_ml32.bat
+++ b/contrib/masmx86/bld_ml32.bat
@@ -1,2 +1,2 @@
1ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm 1ml /coff /Zi /c /Flmatch686.lst match686.asm
2ml /coff /Zi /c /Flinffas32.lst inffas32.asm 2ml /coff /Zi /c /Flinffas32.lst inffas32.asm
diff --git a/contrib/masmx86/gvmat32.asm b/contrib/masmx86/gvmat32.asm
deleted file mode 100644
index 8111616..0000000
--- a/contrib/masmx86/gvmat32.asm
+++ /dev/null
@@ -1,972 +0,0 @@
1; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
2; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
3; File written by Gilles Vollant, by modifiying the longest_match
4; from Jean-loup Gailly in deflate.c
5;
6; http://www.zlib.net
7; http://www.winimage.com/zLibDll
8; http://www.muppetlabs.com/~breadbox/software/assembly.html
9;
10; For Visual C++ 4.x and higher and ML 6.x and higher
11; ml.exe is in directory \MASM611C of Win95 DDK
12; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
13; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
14;
15; this file contain two implementation of longest_match
16;
17; longest_match_7fff : written 1996 by Gilles Vollant optimized for
18; first Pentium. Assume s->w_mask == 0x7fff
19; longest_match_686 : written by Brian raiter (1998), optimized for Pentium Pro
20;
21; for using an seembly version of longest_match, you need define ASMV in project
22; There is two way in using gvmat32.asm
23;
24; A) Suggested method
25; if you want include both longest_match_7fff and longest_match_686
26; compile the asm file running
27; ml /coff /Zi /Flgvmat32.lst /c gvmat32.asm
28; and include gvmat32c.c in your project
29; if you have an old cpu (386,486 or first Pentium) and s->w_mask==0x7fff,
30; longest_match_7fff will be used
31; if you have a more modern CPU (Pentium Pro, II and higher)
32; longest_match_686 will be used
33; on old cpu with s->w_mask!=0x7fff, longest_match_686 will be used,
34; but this is not a sitation you'll find often
35;
36; B) Alternative
37; if you are not interresed in old cpu performance and want the smaller
38; binaries possible
39;
40; compile the asm file running
41; ml /coff /Zi /c /Flgvmat32.lst /DNOOLDPENTIUMCODE gvmat32.asm
42; and do not include gvmat32c.c in your project (ou define also
43; NOOLDPENTIUMCODE)
44;
45; note : as I known, longest_match_686 is very faster than longest_match_7fff
46; on pentium Pro/II/III, faster (but less) in P4, but it seem
47; longest_match_7fff can be faster (very very litte) on AMD Athlon64/K8
48;
49; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
50
51;uInt longest_match_7fff(s, cur_match)
52; deflate_state *s;
53; IPos cur_match; /* current match */
54
55 NbStack equ 76
56 cur_match equ dword ptr[esp+NbStack-0]
57 str_s equ dword ptr[esp+NbStack-4]
58; 5 dword on top (ret,ebp,esi,edi,ebx)
59 adrret equ dword ptr[esp+NbStack-8]
60 pushebp equ dword ptr[esp+NbStack-12]
61 pushedi equ dword ptr[esp+NbStack-16]
62 pushesi equ dword ptr[esp+NbStack-20]
63 pushebx equ dword ptr[esp+NbStack-24]
64
65 chain_length equ dword ptr [esp+NbStack-28]
66 limit equ dword ptr [esp+NbStack-32]
67 best_len equ dword ptr [esp+NbStack-36]
68 window equ dword ptr [esp+NbStack-40]
69 prev equ dword ptr [esp+NbStack-44]
70 scan_start equ word ptr [esp+NbStack-48]
71 wmask equ dword ptr [esp+NbStack-52]
72 match_start_ptr equ dword ptr [esp+NbStack-56]
73 nice_match equ dword ptr [esp+NbStack-60]
74 scan equ dword ptr [esp+NbStack-64]
75
76 windowlen equ dword ptr [esp+NbStack-68]
77 match_start equ dword ptr [esp+NbStack-72]
78 strend equ dword ptr [esp+NbStack-76]
79 NbStackAdd equ (NbStack-24)
80
81 .386p
82
83 name gvmatch
84 .MODEL FLAT
85
86
87
88; all the +zlib1222add offsets are due to the addition of fields
89; in zlib in the deflate_state structure since the asm code was first written
90; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
91; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
92; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
93
94 zlib1222add equ 8
95
96; Note : these value are good with a 8 bytes boundary pack structure
97 dep_chain_length equ 74h+zlib1222add
98 dep_window equ 30h+zlib1222add
99 dep_strstart equ 64h+zlib1222add
100 dep_prev_length equ 70h+zlib1222add
101 dep_nice_match equ 88h+zlib1222add
102 dep_w_size equ 24h+zlib1222add
103 dep_prev equ 38h+zlib1222add
104 dep_w_mask equ 2ch+zlib1222add
105 dep_good_match equ 84h+zlib1222add
106 dep_match_start equ 68h+zlib1222add
107 dep_lookahead equ 6ch+zlib1222add
108
109
110_TEXT segment
111
112IFDEF NOUNDERLINE
113 IFDEF NOOLDPENTIUMCODE
114 public longest_match
115 public match_init
116 ELSE
117 public longest_match_7fff
118 public cpudetect32
119 public longest_match_686
120 ENDIF
121ELSE
122 IFDEF NOOLDPENTIUMCODE
123 public _longest_match
124 public _match_init
125 ELSE
126 public _longest_match_7fff
127 public _cpudetect32
128 public _longest_match_686
129 ENDIF
130ENDIF
131
132 MAX_MATCH equ 258
133 MIN_MATCH equ 3
134 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
135
136
137
138IFNDEF NOOLDPENTIUMCODE
139IFDEF NOUNDERLINE
140longest_match_7fff proc near
141ELSE
142_longest_match_7fff proc near
143ENDIF
144
145 mov edx,[esp+4]
146
147
148
149 push ebp
150 push edi
151 push esi
152 push ebx
153
154 sub esp,NbStackAdd
155
156; initialize or check the variables used in match.asm.
157 mov ebp,edx
158
159; chain_length = s->max_chain_length
160; if (prev_length>=good_match) chain_length >>= 2
161 mov edx,[ebp+dep_chain_length]
162 mov ebx,[ebp+dep_prev_length]
163 cmp [ebp+dep_good_match],ebx
164 ja noshr
165 shr edx,2
166noshr:
167; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
168 inc edx
169 mov edi,[ebp+dep_nice_match]
170 mov chain_length,edx
171 mov eax,[ebp+dep_lookahead]
172 cmp eax,edi
173; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
174 jae nolookaheadnicematch
175 mov edi,eax
176nolookaheadnicematch:
177; best_len = s->prev_length
178 mov best_len,ebx
179
180; window = s->window
181 mov esi,[ebp+dep_window]
182 mov ecx,[ebp+dep_strstart]
183 mov window,esi
184
185 mov nice_match,edi
186; scan = window + strstart
187 add esi,ecx
188 mov scan,esi
189; dx = *window
190 mov dx,word ptr [esi]
191; bx = *(window+best_len-1)
192 mov bx,word ptr [esi+ebx-1]
193 add esi,MAX_MATCH-1
194; scan_start = *scan
195 mov scan_start,dx
196; strend = scan + MAX_MATCH-1
197 mov strend,esi
198; bx = scan_end = *(window+best_len-1)
199
200; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
201; s->strstart - (IPos)MAX_DIST(s) : NIL;
202
203 mov esi,[ebp+dep_w_size]
204 sub esi,MIN_LOOKAHEAD
205; here esi = MAX_DIST(s)
206 sub ecx,esi
207 ja nodist
208 xor ecx,ecx
209nodist:
210 mov limit,ecx
211
212; prev = s->prev
213 mov edx,[ebp+dep_prev]
214 mov prev,edx
215
216;
217 mov edx,dword ptr [ebp+dep_match_start]
218 mov bp,scan_start
219 mov eax,cur_match
220 mov match_start,edx
221
222 mov edx,window
223 mov edi,edx
224 add edi,best_len
225 mov esi,prev
226 dec edi
227; windowlen = window + best_len -1
228 mov windowlen,edi
229
230 jmp beginloop2
231 align 4
232
233; here, in the loop
234; eax = ax = cur_match
235; ecx = limit
236; bx = scan_end
237; bp = scan_start
238; edi = windowlen (window + best_len -1)
239; esi = prev
240
241
242;// here; chain_length <=16
243normalbeg0add16:
244 add chain_length,16
245 jz exitloop
246normalbeg0:
247 cmp word ptr[edi+eax],bx
248 je normalbeg2noroll
249rcontlabnoroll:
250; cur_match = prev[cur_match & wmask]
251 and eax,7fffh
252 mov ax,word ptr[esi+eax*2]
253; if cur_match > limit, go to exitloop
254 cmp ecx,eax
255 jnb exitloop
256; if --chain_length != 0, go to exitloop
257 dec chain_length
258 jnz normalbeg0
259 jmp exitloop
260
261normalbeg2noroll:
262; if (scan_start==*(cur_match+window)) goto normalbeg2
263 cmp bp,word ptr[edx+eax]
264 jne rcontlabnoroll
265 jmp normalbeg2
266
267contloop3:
268 mov edi,windowlen
269
270; cur_match = prev[cur_match & wmask]
271 and eax,7fffh
272 mov ax,word ptr[esi+eax*2]
273; if cur_match > limit, go to exitloop
274 cmp ecx,eax
275jnbexitloopshort1:
276 jnb exitloop
277; if --chain_length != 0, go to exitloop
278
279
280; begin the main loop
281beginloop2:
282 sub chain_length,16+1
283; if chain_length <=16, don't use the unrolled loop
284 jna normalbeg0add16
285
286do16:
287 cmp word ptr[edi+eax],bx
288 je normalbeg2dc0
289
290maccn MACRO lab
291 and eax,7fffh
292 mov ax,word ptr[esi+eax*2]
293 cmp ecx,eax
294 jnb exitloop
295 cmp word ptr[edi+eax],bx
296 je lab
297 ENDM
298
299rcontloop0:
300 maccn normalbeg2dc1
301
302rcontloop1:
303 maccn normalbeg2dc2
304
305rcontloop2:
306 maccn normalbeg2dc3
307
308rcontloop3:
309 maccn normalbeg2dc4
310
311rcontloop4:
312 maccn normalbeg2dc5
313
314rcontloop5:
315 maccn normalbeg2dc6
316
317rcontloop6:
318 maccn normalbeg2dc7
319
320rcontloop7:
321 maccn normalbeg2dc8
322
323rcontloop8:
324 maccn normalbeg2dc9
325
326rcontloop9:
327 maccn normalbeg2dc10
328
329rcontloop10:
330 maccn short normalbeg2dc11
331
332rcontloop11:
333 maccn short normalbeg2dc12
334
335rcontloop12:
336 maccn short normalbeg2dc13
337
338rcontloop13:
339 maccn short normalbeg2dc14
340
341rcontloop14:
342 maccn short normalbeg2dc15
343
344rcontloop15:
345 and eax,7fffh
346 mov ax,word ptr[esi+eax*2]
347 cmp ecx,eax
348 jnb exitloop
349
350 sub chain_length,16
351 ja do16
352 jmp normalbeg0add16
353
354;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
355
356normbeg MACRO rcontlab,valsub
357; if we are here, we know that *(match+best_len-1) == scan_end
358 cmp bp,word ptr[edx+eax]
359; if (match != scan_start) goto rcontlab
360 jne rcontlab
361; calculate the good chain_length, and we'll compare scan and match string
362 add chain_length,16-valsub
363 jmp iseq
364 ENDM
365
366
367normalbeg2dc11:
368 normbeg rcontloop11,11
369
370normalbeg2dc12:
371 normbeg short rcontloop12,12
372
373normalbeg2dc13:
374 normbeg short rcontloop13,13
375
376normalbeg2dc14:
377 normbeg short rcontloop14,14
378
379normalbeg2dc15:
380 normbeg short rcontloop15,15
381
382normalbeg2dc10:
383 normbeg rcontloop10,10
384
385normalbeg2dc9:
386 normbeg rcontloop9,9
387
388normalbeg2dc8:
389 normbeg rcontloop8,8
390
391normalbeg2dc7:
392 normbeg rcontloop7,7
393
394normalbeg2dc6:
395 normbeg rcontloop6,6
396
397normalbeg2dc5:
398 normbeg rcontloop5,5
399
400normalbeg2dc4:
401 normbeg rcontloop4,4
402
403normalbeg2dc3:
404 normbeg rcontloop3,3
405
406normalbeg2dc2:
407 normbeg rcontloop2,2
408
409normalbeg2dc1:
410 normbeg rcontloop1,1
411
412normalbeg2dc0:
413 normbeg rcontloop0,0
414
415
416; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
417
418normalbeg2:
419 mov edi,window
420
421 cmp bp,word ptr[edi+eax]
422 jne contloop3 ; if *(ushf*)match != scan_start, continue
423
424iseq:
425; if we are here, we know that *(match+best_len-1) == scan_end
426; and (match == scan_start)
427
428 mov edi,edx
429 mov esi,scan ; esi = scan
430 add edi,eax ; edi = window + cur_match = match
431
432 mov edx,[esi+3] ; compare manually dword at match+3
433 xor edx,[edi+3] ; and scan +3
434
435 jz begincompare ; if equal, go to long compare
436
437; we will determine the unmatch byte and calculate len (in esi)
438 or dl,dl
439 je eq1rr
440 mov esi,3
441 jmp trfinval
442eq1rr:
443 or dx,dx
444 je eq1
445
446 mov esi,4
447 jmp trfinval
448eq1:
449 and edx,0ffffffh
450 jz eq11
451 mov esi,5
452 jmp trfinval
453eq11:
454 mov esi,6
455 jmp trfinval
456
457begincompare:
458 ; here we now scan and match begin same
459 add edi,6
460 add esi,6
461 mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
462 repe cmpsd ; loop until mismatch
463
464 je trfin ; go to trfin if not unmatch
465; we determine the unmatch byte
466 sub esi,4
467 mov edx,[edi-4]
468 xor edx,[esi]
469
470 or dl,dl
471 jnz trfin
472 inc esi
473
474 or dx,dx
475 jnz trfin
476 inc esi
477
478 and edx,0ffffffh
479 jnz trfin
480 inc esi
481
482trfin:
483 sub esi,scan ; esi = len
484trfinval:
485; here we have finised compare, and esi contain len of equal string
486 cmp esi,best_len ; if len > best_len, go newbestlen
487 ja short newbestlen
488; now we restore edx, ecx and esi, for the big loop
489 mov esi,prev
490 mov ecx,limit
491 mov edx,window
492 jmp contloop3
493
494newbestlen:
495 mov best_len,esi ; len become best_len
496
497 mov match_start,eax ; save new position as match_start
498 cmp esi,nice_match ; if best_len >= nice_match, exit
499 jae exitloop
500 mov ecx,scan
501 mov edx,window ; restore edx=window
502 add ecx,esi
503 add esi,edx
504
505 dec esi
506 mov windowlen,esi ; windowlen = window + best_len-1
507 mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
508
509; now we restore ecx and esi, for the big loop :
510 mov esi,prev
511 mov ecx,limit
512 jmp contloop3
513
514exitloop:
515; exit : s->match_start=match_start
516 mov ebx,match_start
517 mov ebp,str_s
518 mov ecx,best_len
519 mov dword ptr [ebp+dep_match_start],ebx
520 mov eax,dword ptr [ebp+dep_lookahead]
521 cmp ecx,eax
522 ja minexlo
523 mov eax,ecx
524minexlo:
525; return min(best_len,s->lookahead)
526
527; restore stack and register ebx,esi,edi,ebp
528 add esp,NbStackAdd
529
530 pop ebx
531 pop esi
532 pop edi
533 pop ebp
534 ret
535InfoAuthor:
536; please don't remove this string !
537; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
538 db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
539
540
541
542IFDEF NOUNDERLINE
543longest_match_7fff endp
544ELSE
545_longest_match_7fff endp
546ENDIF
547
548
549IFDEF NOUNDERLINE
550cpudetect32 proc near
551ELSE
552_cpudetect32 proc near
553ENDIF
554
555 push ebx
556
557 pushfd ; push original EFLAGS
558 pop eax ; get original EFLAGS
559 mov ecx, eax ; save original EFLAGS
560 xor eax, 40000h ; flip AC bit in EFLAGS
561 push eax ; save new EFLAGS value on stack
562 popfd ; replace current EFLAGS value
563 pushfd ; get new EFLAGS
564 pop eax ; store new EFLAGS in EAX
565 xor eax, ecx ; can’t toggle AC bit, processor=80386
566 jz end_cpu_is_386 ; jump if 80386 processor
567 push ecx
568 popfd ; restore AC bit in EFLAGS first
569
570 pushfd
571 pushfd
572 pop ecx
573
574 mov eax, ecx ; get original EFLAGS
575 xor eax, 200000h ; flip ID bit in EFLAGS
576 push eax ; save new EFLAGS value on stack
577 popfd ; replace current EFLAGS value
578 pushfd ; get new EFLAGS
579 pop eax ; store new EFLAGS in EAX
580 popfd ; restore original EFLAGS
581 xor eax, ecx ; can’t toggle ID bit,
582 je is_old_486 ; processor=old
583
584 mov eax,1
585 db 0fh,0a2h ;CPUID
586
587exitcpudetect:
588 pop ebx
589 ret
590
591end_cpu_is_386:
592 mov eax,0300h
593 jmp exitcpudetect
594
595is_old_486:
596 mov eax,0400h
597 jmp exitcpudetect
598
599IFDEF NOUNDERLINE
600cpudetect32 endp
601ELSE
602_cpudetect32 endp
603ENDIF
604ENDIF
605
606MAX_MATCH equ 258
607MIN_MATCH equ 3
608MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
609MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
610
611
612;;; stack frame offsets
613
614chainlenwmask equ esp + 0 ; high word: current chain len
615 ; low word: s->wmask
616window equ esp + 4 ; local copy of s->window
617windowbestlen equ esp + 8 ; s->window + bestlen
618scanstart equ esp + 16 ; first two bytes of string
619scanend equ esp + 12 ; last two bytes of string
620scanalign equ esp + 20 ; dword-misalignment of string
621nicematch equ esp + 24 ; a good enough match size
622bestlen equ esp + 28 ; size of best match so far
623scan equ esp + 32 ; ptr to string wanting match
624
625LocalVarsSize equ 36
626; saved ebx byte esp + 36
627; saved edi byte esp + 40
628; saved esi byte esp + 44
629; saved ebp byte esp + 48
630; return address byte esp + 52
631deflatestate equ esp + 56 ; the function arguments
632curmatch equ esp + 60
633
634;;; Offsets for fields in the deflate_state structure. These numbers
635;;; are calculated from the definition of deflate_state, with the
636;;; assumption that the compiler will dword-align the fields. (Thus,
637;;; changing the definition of deflate_state could easily cause this
638;;; program to crash horribly, without so much as a warning at
639;;; compile time. Sigh.)
640
641dsWSize equ 36+zlib1222add
642dsWMask equ 44+zlib1222add
643dsWindow equ 48+zlib1222add
644dsPrev equ 56+zlib1222add
645dsMatchLen equ 88+zlib1222add
646dsPrevMatch equ 92+zlib1222add
647dsStrStart equ 100+zlib1222add
648dsMatchStart equ 104+zlib1222add
649dsLookahead equ 108+zlib1222add
650dsPrevLen equ 112+zlib1222add
651dsMaxChainLen equ 116+zlib1222add
652dsGoodMatch equ 132+zlib1222add
653dsNiceMatch equ 136+zlib1222add
654
655
656;;; match.asm -- Pentium-Pro-optimized version of longest_match()
657;;; Written for zlib 1.1.2
658;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
659;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
660;;;
661;;; This is free software; you can redistribute it and/or modify it
662;;; under the terms of the GNU General Public License.
663
664;GLOBAL _longest_match, _match_init
665
666
667;SECTION .text
668
669;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
670
671;_longest_match:
672IFDEF NOOLDPENTIUMCODE
673 IFDEF NOUNDERLINE
674 longest_match proc near
675 ELSE
676 _longest_match proc near
677 ENDIF
678ELSE
679 IFDEF NOUNDERLINE
680 longest_match_686 proc near
681 ELSE
682 _longest_match_686 proc near
683 ENDIF
684ENDIF
685
686;;; Save registers that the compiler may be using, and adjust esp to
687;;; make room for our stack frame.
688
689 push ebp
690 push edi
691 push esi
692 push ebx
693 sub esp, LocalVarsSize
694
695;;; Retrieve the function arguments. ecx will hold cur_match
696;;; throughout the entire function. edx will hold the pointer to the
697;;; deflate_state structure during the function's setup (before
698;;; entering the main loop.
699
700 mov edx, [deflatestate]
701 mov ecx, [curmatch]
702
703;;; uInt wmask = s->w_mask;
704;;; unsigned chain_length = s->max_chain_length;
705;;; if (s->prev_length >= s->good_match) {
706;;; chain_length >>= 2;
707;;; }
708
709 mov eax, [edx + dsPrevLen]
710 mov ebx, [edx + dsGoodMatch]
711 cmp eax, ebx
712 mov eax, [edx + dsWMask]
713 mov ebx, [edx + dsMaxChainLen]
714 jl LastMatchGood
715 shr ebx, 2
716LastMatchGood:
717
718;;; chainlen is decremented once beforehand so that the function can
719;;; use the sign flag instead of the zero flag for the exit test.
720;;; It is then shifted into the high word, to make room for the wmask
721;;; value, which it will always accompany.
722
723 dec ebx
724 shl ebx, 16
725 or ebx, eax
726 mov [chainlenwmask], ebx
727
728;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
729
730 mov eax, [edx + dsNiceMatch]
731 mov ebx, [edx + dsLookahead]
732 cmp ebx, eax
733 jl LookaheadLess
734 mov ebx, eax
735LookaheadLess: mov [nicematch], ebx
736
737;;; register Bytef *scan = s->window + s->strstart;
738
739 mov esi, [edx + dsWindow]
740 mov [window], esi
741 mov ebp, [edx + dsStrStart]
742 lea edi, [esi + ebp]
743 mov [scan], edi
744
745;;; Determine how many bytes the scan ptr is off from being
746;;; dword-aligned.
747
748 mov eax, edi
749 neg eax
750 and eax, 3
751 mov [scanalign], eax
752
753;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
754;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
755
756 mov eax, [edx + dsWSize]
757 sub eax, MIN_LOOKAHEAD
758 sub ebp, eax
759 jg LimitPositive
760 xor ebp, ebp
761LimitPositive:
762
763;;; int best_len = s->prev_length;
764
765 mov eax, [edx + dsPrevLen]
766 mov [bestlen], eax
767
768;;; Store the sum of s->window + best_len in esi locally, and in esi.
769
770 add esi, eax
771 mov [windowbestlen], esi
772
773;;; register ush scan_start = *(ushf*)scan;
774;;; register ush scan_end = *(ushf*)(scan+best_len-1);
775;;; Posf *prev = s->prev;
776
777 movzx ebx, word ptr [edi]
778 mov [scanstart], ebx
779 movzx ebx, word ptr [edi + eax - 1]
780 mov [scanend], ebx
781 mov edi, [edx + dsPrev]
782
783;;; Jump into the main loop.
784
785 mov edx, [chainlenwmask]
786 jmp short LoopEntry
787
788align 4
789
790;;; do {
791;;; match = s->window + cur_match;
792;;; if (*(ushf*)(match+best_len-1) != scan_end ||
793;;; *(ushf*)match != scan_start) continue;
794;;; [...]
795;;; } while ((cur_match = prev[cur_match & wmask]) > limit
796;;; && --chain_length != 0);
797;;;
798;;; Here is the inner loop of the function. The function will spend the
799;;; majority of its time in this loop, and majority of that time will
800;;; be spent in the first ten instructions.
801;;;
802;;; Within this loop:
803;;; ebx = scanend
804;;; ecx = curmatch
805;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
806;;; esi = windowbestlen - i.e., (window + bestlen)
807;;; edi = prev
808;;; ebp = limit
809
810LookupLoop:
811 and ecx, edx
812 movzx ecx, word ptr [edi + ecx*2]
813 cmp ecx, ebp
814 jbe LeaveNow
815 sub edx, 00010000h
816 js LeaveNow
817LoopEntry: movzx eax, word ptr [esi + ecx - 1]
818 cmp eax, ebx
819 jnz LookupLoop
820 mov eax, [window]
821 movzx eax, word ptr [eax + ecx]
822 cmp eax, [scanstart]
823 jnz LookupLoop
824
825;;; Store the current value of chainlen.
826
827 mov [chainlenwmask], edx
828
829;;; Point edi to the string under scrutiny, and esi to the string we
830;;; are hoping to match it up with. In actuality, esi and edi are
831;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
832;;; initialized to -(MAX_MATCH_8 - scanalign).
833
834 mov esi, [window]
835 mov edi, [scan]
836 add esi, ecx
837 mov eax, [scanalign]
838 mov edx, 0fffffef8h; -(MAX_MATCH_8)
839 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
840 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
841
842;;; Test the strings for equality, 8 bytes at a time. At the end,
843;;; adjust edx so that it is offset to the exact byte that mismatched.
844;;;
845;;; We already know at this point that the first three bytes of the
846;;; strings match each other, and they can be safely passed over before
847;;; starting the compare loop. So what this code does is skip over 0-3
848;;; bytes, as much as necessary in order to dword-align the edi
849;;; pointer. (esi will still be misaligned three times out of four.)
850;;;
851;;; It should be confessed that this loop usually does not represent
852;;; much of the total running time. Replacing it with a more
853;;; straightforward "rep cmpsb" would not drastically degrade
854;;; performance.
855
856LoopCmps:
857 mov eax, [esi + edx]
858 xor eax, [edi + edx]
859 jnz LeaveLoopCmps
860 mov eax, [esi + edx + 4]
861 xor eax, [edi + edx + 4]
862 jnz LeaveLoopCmps4
863 add edx, 8
864 jnz LoopCmps
865 jmp short LenMaximum
866LeaveLoopCmps4: add edx, 4
867LeaveLoopCmps: test eax, 0000FFFFh
868 jnz LenLower
869 add edx, 2
870 shr eax, 16
871LenLower: sub al, 1
872 adc edx, 0
873
874;;; Calculate the length of the match. If it is longer than MAX_MATCH,
875;;; then automatically accept it as the best possible match and leave.
876
877 lea eax, [edi + edx]
878 mov edi, [scan]
879 sub eax, edi
880 cmp eax, MAX_MATCH
881 jge LenMaximum
882
883;;; If the length of the match is not longer than the best match we
884;;; have so far, then forget it and return to the lookup loop.
885
886 mov edx, [deflatestate]
887 mov ebx, [bestlen]
888 cmp eax, ebx
889 jg LongerMatch
890 mov esi, [windowbestlen]
891 mov edi, [edx + dsPrev]
892 mov ebx, [scanend]
893 mov edx, [chainlenwmask]
894 jmp LookupLoop
895
896;;; s->match_start = cur_match;
897;;; best_len = len;
898;;; if (len >= nice_match) break;
899;;; scan_end = *(ushf*)(scan+best_len-1);
900
901LongerMatch: mov ebx, [nicematch]
902 mov [bestlen], eax
903 mov [edx + dsMatchStart], ecx
904 cmp eax, ebx
905 jge LeaveNow
906 mov esi, [window]
907 add esi, eax
908 mov [windowbestlen], esi
909 movzx ebx, word ptr [edi + eax - 1]
910 mov edi, [edx + dsPrev]
911 mov [scanend], ebx
912 mov edx, [chainlenwmask]
913 jmp LookupLoop
914
915;;; Accept the current string, with the maximum possible length.
916
917LenMaximum: mov edx, [deflatestate]
918 mov dword ptr [bestlen], MAX_MATCH
919 mov [edx + dsMatchStart], ecx
920
921;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
922;;; return s->lookahead;
923
924LeaveNow:
925 mov edx, [deflatestate]
926 mov ebx, [bestlen]
927 mov eax, [edx + dsLookahead]
928 cmp ebx, eax
929 jg LookaheadRet
930 mov eax, ebx
931LookaheadRet:
932
933;;; Restore the stack and return from whence we came.
934
935 add esp, LocalVarsSize
936 pop ebx
937 pop esi
938 pop edi
939 pop ebp
940
941 ret
942; please don't remove this string !
943; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!
944 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
945
946
947IFDEF NOOLDPENTIUMCODE
948 IFDEF NOUNDERLINE
949 longest_match endp
950 ELSE
951 _longest_match endp
952 ENDIF
953
954 IFDEF NOUNDERLINE
955 match_init proc near
956 ret
957 match_init endp
958 ELSE
959 _match_init proc near
960 ret
961 _match_init endp
962 ENDIF
963ELSE
964 IFDEF NOUNDERLINE
965 longest_match_686 endp
966 ELSE
967 _longest_match_686 endp
968 ENDIF
969ENDIF
970
971_TEXT ends
972end
diff --git a/contrib/masmx86/gvmat32c.c b/contrib/masmx86/gvmat32c.c
deleted file mode 100644
index 7ad2b27..0000000
--- a/contrib/masmx86/gvmat32c.c
+++ /dev/null
@@ -1,62 +0,0 @@
1/* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86
2 * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
3 * File written by Gilles Vollant, by modifiying the longest_match
4 * from Jean-loup Gailly in deflate.c
5 * it prepare all parameters and call the assembly longest_match_gvasm
6 * longest_match execute standard C code is wmask != 0x7fff
7 * (assembly code is faster with a fixed wmask)
8 *
9 * Read comment at beginning of gvmat32.asm for more information
10 */
11
12#if defined(ASMV) && (!defined(NOOLDPENTIUMCODE))
13#include "deflate.h"
14
15/* if your C compiler don't add underline before function name,
16 define ADD_UNDERLINE_ASMFUNC */
17#ifdef ADD_UNDERLINE_ASMFUNC
18#define longest_match_7fff _longest_match_7fff
19#define longest_match_686 _longest_match_686
20#define cpudetect32 _cpudetect32
21#endif
22
23
24unsigned long cpudetect32();
25
26uInt longest_match_c(
27 deflate_state *s,
28 IPos cur_match); /* current match */
29
30
31uInt longest_match_7fff(
32 deflate_state *s,
33 IPos cur_match); /* current match */
34
35uInt longest_match_686(
36 deflate_state *s,
37 IPos cur_match); /* current match */
38
39
40static uInt iIsPPro=2;
41
42void match_init ()
43{
44 iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0;
45}
46
47uInt longest_match(
48 deflate_state *s,
49 IPos cur_match) /* current match */
50{
51 if (iIsPPro!=0)
52 return longest_match_686(s,cur_match);
53
54 if (s->w_mask != 0x7fff)
55 return longest_match_686(s,cur_match);
56
57 /* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */
58 return longest_match_7fff(s,cur_match);
59}
60
61
62#endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */
diff --git a/contrib/masmx86/match686.asm b/contrib/masmx86/match686.asm
new file mode 100644
index 0000000..1eaf555
--- /dev/null
+++ b/contrib/masmx86/match686.asm
@@ -0,0 +1,478 @@
1; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86
2; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
3; File written by Gilles Vollant, by converting match686.S from Brian Raiter
4; for MASM. This is as assembly version of longest_match
5; from Jean-loup Gailly in deflate.c
6;
7; http://www.zlib.net
8; http://www.winimage.com/zLibDll
9; http://www.muppetlabs.com/~breadbox/software/assembly.html
10;
11; For Visual C++ 4.x and higher and ML 6.x and higher
12; ml.exe is distributed in
13; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
14;
15; this file contain two implementation of longest_match
16;
17; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro
18; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)
19;
20; for using an assembly version of longest_match, you need define ASMV in project
21;
22; compile the asm file running
23; ml /coff /Zi /c /Flmatch686.lst match686.asm
24; and do not include match686.obj in your project
25;
26; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for
27; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor
28; with autoselect (with cpu detection code)
29; if you want support the old pentium optimization, you can still use these version
30;
31; this file is not optimized for old pentium, but it compatible with all x86 32 bits
32; processor (starting 80386)
33;
34;
35; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
36
37;uInt longest_match(s, cur_match)
38; deflate_state *s;
39; IPos cur_match; /* current match */
40
41 NbStack equ 76
42 cur_match equ dword ptr[esp+NbStack-0]
43 str_s equ dword ptr[esp+NbStack-4]
44; 5 dword on top (ret,ebp,esi,edi,ebx)
45 adrret equ dword ptr[esp+NbStack-8]
46 pushebp equ dword ptr[esp+NbStack-12]
47 pushedi equ dword ptr[esp+NbStack-16]
48 pushesi equ dword ptr[esp+NbStack-20]
49 pushebx equ dword ptr[esp+NbStack-24]
50
51 chain_length equ dword ptr [esp+NbStack-28]
52 limit equ dword ptr [esp+NbStack-32]
53 best_len equ dword ptr [esp+NbStack-36]
54 window equ dword ptr [esp+NbStack-40]
55 prev equ dword ptr [esp+NbStack-44]
56 scan_start equ word ptr [esp+NbStack-48]
57 wmask equ dword ptr [esp+NbStack-52]
58 match_start_ptr equ dword ptr [esp+NbStack-56]
59 nice_match equ dword ptr [esp+NbStack-60]
60 scan equ dword ptr [esp+NbStack-64]
61
62 windowlen equ dword ptr [esp+NbStack-68]
63 match_start equ dword ptr [esp+NbStack-72]
64 strend equ dword ptr [esp+NbStack-76]
65 NbStackAdd equ (NbStack-24)
66
67 .386p
68
69 name gvmatch
70 .MODEL FLAT
71
72
73
74; all the +zlib1222add offsets are due to the addition of fields
75; in zlib in the deflate_state structure since the asm code was first written
76; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
77; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
78; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
79
80 zlib1222add equ 8
81
82; Note : these value are good with a 8 bytes boundary pack structure
83 dep_chain_length equ 74h+zlib1222add
84 dep_window equ 30h+zlib1222add
85 dep_strstart equ 64h+zlib1222add
86 dep_prev_length equ 70h+zlib1222add
87 dep_nice_match equ 88h+zlib1222add
88 dep_w_size equ 24h+zlib1222add
89 dep_prev equ 38h+zlib1222add
90 dep_w_mask equ 2ch+zlib1222add
91 dep_good_match equ 84h+zlib1222add
92 dep_match_start equ 68h+zlib1222add
93 dep_lookahead equ 6ch+zlib1222add
94
95
96_TEXT segment
97
98IFDEF NOUNDERLINE
99 public longest_match
100 public match_init
101ELSE
102 public _longest_match
103 public _match_init
104ENDIF
105
106 MAX_MATCH equ 258
107 MIN_MATCH equ 3
108 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
109
110
111
112MAX_MATCH equ 258
113MIN_MATCH equ 3
114MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
115MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
116
117
118;;; stack frame offsets
119
120chainlenwmask equ esp + 0 ; high word: current chain len
121 ; low word: s->wmask
122window equ esp + 4 ; local copy of s->window
123windowbestlen equ esp + 8 ; s->window + bestlen
124scanstart equ esp + 16 ; first two bytes of string
125scanend equ esp + 12 ; last two bytes of string
126scanalign equ esp + 20 ; dword-misalignment of string
127nicematch equ esp + 24 ; a good enough match size
128bestlen equ esp + 28 ; size of best match so far
129scan equ esp + 32 ; ptr to string wanting match
130
131LocalVarsSize equ 36
132; saved ebx byte esp + 36
133; saved edi byte esp + 40
134; saved esi byte esp + 44
135; saved ebp byte esp + 48
136; return address byte esp + 52
137deflatestate equ esp + 56 ; the function arguments
138curmatch equ esp + 60
139
140;;; Offsets for fields in the deflate_state structure. These numbers
141;;; are calculated from the definition of deflate_state, with the
142;;; assumption that the compiler will dword-align the fields. (Thus,
143;;; changing the definition of deflate_state could easily cause this
144;;; program to crash horribly, without so much as a warning at
145;;; compile time. Sigh.)
146
147dsWSize equ 36+zlib1222add
148dsWMask equ 44+zlib1222add
149dsWindow equ 48+zlib1222add
150dsPrev equ 56+zlib1222add
151dsMatchLen equ 88+zlib1222add
152dsPrevMatch equ 92+zlib1222add
153dsStrStart equ 100+zlib1222add
154dsMatchStart equ 104+zlib1222add
155dsLookahead equ 108+zlib1222add
156dsPrevLen equ 112+zlib1222add
157dsMaxChainLen equ 116+zlib1222add
158dsGoodMatch equ 132+zlib1222add
159dsNiceMatch equ 136+zlib1222add
160
161
162;;; match686.asm -- Pentium-Pro-optimized version of longest_match()
163;;; Written for zlib 1.1.2
164;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
165;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
166;;;
167;;
168;; This software is provided 'as-is', without any express or implied
169;; warranty. In no event will the authors be held liable for any damages
170;; arising from the use of this software.
171;;
172;; Permission is granted to anyone to use this software for any purpose,
173;; including commercial applications, and to alter it and redistribute it
174;; freely, subject to the following restrictions:
175;;
176;; 1. The origin of this software must not be misrepresented; you must not
177;; claim that you wrote the original software. If you use this software
178;; in a product, an acknowledgment in the product documentation would be
179;; appreciated but is not required.
180;; 2. Altered source versions must be plainly marked as such, and must not be
181;; misrepresented as being the original software
182;; 3. This notice may not be removed or altered from any source distribution.
183;;
184
185;GLOBAL _longest_match, _match_init
186
187
188;SECTION .text
189
190;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
191
192;_longest_match:
193 IFDEF NOUNDERLINE
194 longest_match proc near
195 ELSE
196 _longest_match proc near
197 ENDIF
198
199;;; Save registers that the compiler may be using, and adjust esp to
200;;; make room for our stack frame.
201
202 push ebp
203 push edi
204 push esi
205 push ebx
206 sub esp, LocalVarsSize
207
208;;; Retrieve the function arguments. ecx will hold cur_match
209;;; throughout the entire function. edx will hold the pointer to the
210;;; deflate_state structure during the function's setup (before
211;;; entering the main loop.
212
213 mov edx, [deflatestate]
214 mov ecx, [curmatch]
215
216;;; uInt wmask = s->w_mask;
217;;; unsigned chain_length = s->max_chain_length;
218;;; if (s->prev_length >= s->good_match) {
219;;; chain_length >>= 2;
220;;; }
221
222 mov eax, [edx + dsPrevLen]
223 mov ebx, [edx + dsGoodMatch]
224 cmp eax, ebx
225 mov eax, [edx + dsWMask]
226 mov ebx, [edx + dsMaxChainLen]
227 jl LastMatchGood
228 shr ebx, 2
229LastMatchGood:
230
231;;; chainlen is decremented once beforehand so that the function can
232;;; use the sign flag instead of the zero flag for the exit test.
233;;; It is then shifted into the high word, to make room for the wmask
234;;; value, which it will always accompany.
235
236 dec ebx
237 shl ebx, 16
238 or ebx, eax
239 mov [chainlenwmask], ebx
240
241;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
242
243 mov eax, [edx + dsNiceMatch]
244 mov ebx, [edx + dsLookahead]
245 cmp ebx, eax
246 jl LookaheadLess
247 mov ebx, eax
248LookaheadLess: mov [nicematch], ebx
249
250;;; register Bytef *scan = s->window + s->strstart;
251
252 mov esi, [edx + dsWindow]
253 mov [window], esi
254 mov ebp, [edx + dsStrStart]
255 lea edi, [esi + ebp]
256 mov [scan], edi
257
258;;; Determine how many bytes the scan ptr is off from being
259;;; dword-aligned.
260
261 mov eax, edi
262 neg eax
263 and eax, 3
264 mov [scanalign], eax
265
266;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
267;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
268
269 mov eax, [edx + dsWSize]
270 sub eax, MIN_LOOKAHEAD
271 sub ebp, eax
272 jg LimitPositive
273 xor ebp, ebp
274LimitPositive:
275
276;;; int best_len = s->prev_length;
277
278 mov eax, [edx + dsPrevLen]
279 mov [bestlen], eax
280
281;;; Store the sum of s->window + best_len in esi locally, and in esi.
282
283 add esi, eax
284 mov [windowbestlen], esi
285
286;;; register ush scan_start = *(ushf*)scan;
287;;; register ush scan_end = *(ushf*)(scan+best_len-1);
288;;; Posf *prev = s->prev;
289
290 movzx ebx, word ptr [edi]
291 mov [scanstart], ebx
292 movzx ebx, word ptr [edi + eax - 1]
293 mov [scanend], ebx
294 mov edi, [edx + dsPrev]
295
296;;; Jump into the main loop.
297
298 mov edx, [chainlenwmask]
299 jmp short LoopEntry
300
301align 4
302
303;;; do {
304;;; match = s->window + cur_match;
305;;; if (*(ushf*)(match+best_len-1) != scan_end ||
306;;; *(ushf*)match != scan_start) continue;
307;;; [...]
308;;; } while ((cur_match = prev[cur_match & wmask]) > limit
309;;; && --chain_length != 0);
310;;;
311;;; Here is the inner loop of the function. The function will spend the
312;;; majority of its time in this loop, and majority of that time will
313;;; be spent in the first ten instructions.
314;;;
315;;; Within this loop:
316;;; ebx = scanend
317;;; ecx = curmatch
318;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
319;;; esi = windowbestlen - i.e., (window + bestlen)
320;;; edi = prev
321;;; ebp = limit
322
323LookupLoop:
324 and ecx, edx
325 movzx ecx, word ptr [edi + ecx*2]
326 cmp ecx, ebp
327 jbe LeaveNow
328 sub edx, 00010000h
329 js LeaveNow
330LoopEntry: movzx eax, word ptr [esi + ecx - 1]
331 cmp eax, ebx
332 jnz LookupLoop
333 mov eax, [window]
334 movzx eax, word ptr [eax + ecx]
335 cmp eax, [scanstart]
336 jnz LookupLoop
337
338;;; Store the current value of chainlen.
339
340 mov [chainlenwmask], edx
341
342;;; Point edi to the string under scrutiny, and esi to the string we
343;;; are hoping to match it up with. In actuality, esi and edi are
344;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
345;;; initialized to -(MAX_MATCH_8 - scanalign).
346
347 mov esi, [window]
348 mov edi, [scan]
349 add esi, ecx
350 mov eax, [scanalign]
351 mov edx, 0fffffef8h; -(MAX_MATCH_8)
352 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
353 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
354
355;;; Test the strings for equality, 8 bytes at a time. At the end,
356;;; adjust edx so that it is offset to the exact byte that mismatched.
357;;;
358;;; We already know at this point that the first three bytes of the
359;;; strings match each other, and they can be safely passed over before
360;;; starting the compare loop. So what this code does is skip over 0-3
361;;; bytes, as much as necessary in order to dword-align the edi
362;;; pointer. (esi will still be misaligned three times out of four.)
363;;;
364;;; It should be confessed that this loop usually does not represent
365;;; much of the total running time. Replacing it with a more
366;;; straightforward "rep cmpsb" would not drastically degrade
367;;; performance.
368
369LoopCmps:
370 mov eax, [esi + edx]
371 xor eax, [edi + edx]
372 jnz LeaveLoopCmps
373 mov eax, [esi + edx + 4]
374 xor eax, [edi + edx + 4]
375 jnz LeaveLoopCmps4
376 add edx, 8
377 jnz LoopCmps
378 jmp short LenMaximum
379LeaveLoopCmps4: add edx, 4
380LeaveLoopCmps: test eax, 0000FFFFh
381 jnz LenLower
382 add edx, 2
383 shr eax, 16
384LenLower: sub al, 1
385 adc edx, 0
386
387;;; Calculate the length of the match. If it is longer than MAX_MATCH,
388;;; then automatically accept it as the best possible match and leave.
389
390 lea eax, [edi + edx]
391 mov edi, [scan]
392 sub eax, edi
393 cmp eax, MAX_MATCH
394 jge LenMaximum
395
396;;; If the length of the match is not longer than the best match we
397;;; have so far, then forget it and return to the lookup loop.
398
399 mov edx, [deflatestate]
400 mov ebx, [bestlen]
401 cmp eax, ebx
402 jg LongerMatch
403 mov esi, [windowbestlen]
404 mov edi, [edx + dsPrev]
405 mov ebx, [scanend]
406 mov edx, [chainlenwmask]
407 jmp LookupLoop
408
409;;; s->match_start = cur_match;
410;;; best_len = len;
411;;; if (len >= nice_match) break;
412;;; scan_end = *(ushf*)(scan+best_len-1);
413
414LongerMatch: mov ebx, [nicematch]
415 mov [bestlen], eax
416 mov [edx + dsMatchStart], ecx
417 cmp eax, ebx
418 jge LeaveNow
419 mov esi, [window]
420 add esi, eax
421 mov [windowbestlen], esi
422 movzx ebx, word ptr [edi + eax - 1]
423 mov edi, [edx + dsPrev]
424 mov [scanend], ebx
425 mov edx, [chainlenwmask]
426 jmp LookupLoop
427
428;;; Accept the current string, with the maximum possible length.
429
430LenMaximum: mov edx, [deflatestate]
431 mov dword ptr [bestlen], MAX_MATCH
432 mov [edx + dsMatchStart], ecx
433
434;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
435;;; return s->lookahead;
436
437LeaveNow:
438 mov edx, [deflatestate]
439 mov ebx, [bestlen]
440 mov eax, [edx + dsLookahead]
441 cmp ebx, eax
442 jg LookaheadRet
443 mov eax, ebx
444LookaheadRet:
445
446;;; Restore the stack and return from whence we came.
447
448 add esp, LocalVarsSize
449 pop ebx
450 pop esi
451 pop edi
452 pop ebp
453
454 ret
455; please don't remove this string !
456; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!
457 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
458
459
460 IFDEF NOUNDERLINE
461 longest_match endp
462 ELSE
463 _longest_match endp
464 ENDIF
465
466 IFDEF NOUNDERLINE
467 match_init proc near
468 ret
469 match_init endp
470 ELSE
471 _match_init proc near
472 ret
473 _match_init endp
474 ENDIF
475
476
477_TEXT ends
478end
diff --git a/contrib/masmx86/match686.obj b/contrib/masmx86/match686.obj
new file mode 100644
index 0000000..2e4631f
--- /dev/null
+++ b/contrib/masmx86/match686.obj
Binary files differ
diff --git a/contrib/masmx86/mkasm.bat b/contrib/masmx86/mkasm.bat
deleted file mode 100755
index 70a51f8..0000000
--- a/contrib/masmx86/mkasm.bat
+++ /dev/null
@@ -1,3 +0,0 @@
1cl /DASMV /I..\.. /O2 /c gvmat32c.c
2ml /coff /Zi /c /Flgvmat32.lst gvmat32.asm
3ml /coff /Zi /c /Flinffas32.lst inffas32.asm
diff --git a/contrib/masmx86/readme.txt b/contrib/masmx86/readme.txt
index 7b57167..413580e 100644
--- a/contrib/masmx86/readme.txt
+++ b/contrib/masmx86/readme.txt
@@ -14,8 +14,8 @@ appropriate makefile, as suggested below.
14Build instructions 14Build instructions
15------------------ 15------------------
16* With Microsoft C and MASM: 16* With Microsoft C and MASM:
17nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" 17nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj"
18 18
19* With Borland C and TASM: 19* With Borland C and TASM:
20make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="gvmat32c.obj gvmat32.obj inffas32.obj" OBJPA="+gvmat32c.obj+gvmat32.obj+inffas32.obj" 20make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" OBJPA="+match686c.obj+match686.obj+inffas32.obj"
21 21