summaryrefslogtreecommitdiff
path: root/contrib/asm386/gvmat32.asm
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2011-09-09 23:17:33 -0700
committerMark Adler <madler@alumni.caltech.edu>2011-09-09 23:17:33 -0700
commit7850e4e406dce1f7a819297eeb151d1ca18e7cd9 (patch)
treed4befddacae46b06c4924193904de533099610b4 /contrib/asm386/gvmat32.asm
parentebd3c2c0e734fc99a1360014ea52ed04fe6aade4 (diff)
downloadzlib-1.0.7.tar.gz
zlib-1.0.7.tar.bz2
zlib-1.0.7.zip
zlib 1.0.7v1.0.7
Diffstat (limited to 'contrib/asm386/gvmat32.asm')
-rw-r--r--contrib/asm386/gvmat32.asm464
1 files changed, 464 insertions, 0 deletions
diff --git a/contrib/asm386/gvmat32.asm b/contrib/asm386/gvmat32.asm
new file mode 100644
index 0000000..b175871
--- /dev/null
+++ b/contrib/asm386/gvmat32.asm
@@ -0,0 +1,464 @@
1;
2; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
3; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
4; File written by Gilles Vollant, by modifiying the longest_match
5; from Jean-loup Gailly in deflate.c
6; It need wmask == 0x7fff
7; (assembly code is faster with a fixed wmask)
8;
9; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
10; I compile with : "ml /coff /Zi /c gvmat32.asm"
11;
12; uInt longest_match_gvasm(IPos cur_match,int* match_start_ptr,uInt scan_end,
13; uInt scan_start,ush* prev,uch* window,int best_len,
14; IPos limit,uInt chain_length,uch* scanrp,
15; uInt nice_match);
16
17;uInt longest_match(s, cur_match)
18; deflate_state *s;
19; IPos cur_match; /* current match */
20
21 NbStack equ 76
22 cur_match equ dword ptr[esp+NbStack-0]
23 str_s equ dword ptr[esp+NbStack-4]
24 ; 5 dword on top (ret,ebp,esi,edi,ebx)
25 adrret equ dword ptr[esp+NbStack-8]
26 pushebp equ dword ptr[esp+NbStack-12]
27 pushedi equ dword ptr[esp+NbStack-16]
28 pushesi equ dword ptr[esp+NbStack-20]
29 pushebx equ dword ptr[esp+NbStack-24]
30
31 chain_length equ dword ptr [esp+NbStack-28]
32 limit equ dword ptr [esp+NbStack-32]
33 best_len equ dword ptr [esp+NbStack-36]
34 window equ dword ptr [esp+NbStack-40]
35 prev equ dword ptr [esp+NbStack-44]
36 scan_start equ word ptr [esp+NbStack-48]
37 scan_end equ word ptr [esp+NbStack-52]
38 match_start_ptr equ dword ptr [esp+NbStack-56]
39 nice_match equ dword ptr [esp+NbStack-60]
40 scanrp equ dword ptr [esp+NbStack-64]
41
42 windowlen equ dword ptr [esp+NbStack-68]
43 match_start equ dword ptr [esp+NbStack-72]
44 strend equ dword ptr [esp+NbStack-76]
45 NbStackAdd equ (76-24)
46
47 .386p
48
49 name gvmatch
50 .MODEL FLAT
51
52
53@lmtype TYPEDEF PROTO C :PTR , :SDWORD
54longest_match_c PROTO @lmtype
55
56 dep_max_chain_length equ 70h
57 dep_window equ 2ch
58 dep_strstart equ 60h
59 dep_prev_length equ 6ch
60 dep_nice_match equ 84h
61 dep_w_size equ 20h
62 dep_prev equ 34h
63 dep_w_mask equ 28h
64 dep_good_match equ 80h
65 dep_match_start equ 64h
66 dep_lookahead equ 68h
67
68
69_TEXT segment
70 public _longest_match_asm7fff
71
72 MAX_MATCH equ 258
73 MIN_MATCH equ 3
74 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
75
76; initialize or check the variables used in match.asm.
77
78
79; -----------------------------------------------------------------------
80; Set match_start to the longest match starting at the given string and
81; return its length. Matches shorter or equal to prev_length are discarded,
82; in which case the result is equal to prev_length and match_start is
83; garbage.
84; IN assertions: cur_match is the head of the hash chain for the current
85; string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
86
87; int longest_match(cur_match)
88
89_longest_match_asm7fff proc near
90
91
92
93 ; return address
94
95 mov eax,[esp+4]
96 mov bx,[eax+dep_w_mask]
97 cmp bx,7fffh
98 jnz longest_match_c
99
100 push ebp
101 push edi
102 push esi
103 push ebx
104
105 sub esp,NbStackAdd
106
107 ;//mov ebp,str_s
108 mov ebp,eax
109
110 mov eax,[ebp+dep_max_chain_length]
111 mov ebx,[ebp+dep_prev_length]
112 cmp [ebp+dep_good_match],ebx ; if prev_length>=good_match chain_length >>= 2
113 ja noshr
114 shr eax,2
115noshr:
116 mov edi,[ebp+dep_nice_match]
117 mov chain_length,eax
118 mov edx,[ebp+dep_lookahead]
119 cmp edx,edi
120;if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
121 jae nolookaheadnicematch
122 mov edi,edx
123nolookaheadnicematch:
124 mov best_len,ebx
125
126
127 mov esi,[ebp+dep_window]
128 mov ecx,[ebp+dep_strstart]
129 mov window,esi
130
131 mov nice_match,edi
132 add esi,ecx
133 mov scanrp,esi
134 mov ax,word ptr [esi]
135 mov bx,word ptr [esi+ebx-1]
136 add esi,MAX_MATCH-1
137 mov scan_start,ax
138 mov strend,esi
139 mov scan_end,bx
140
141; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
142; s->strstart - (IPos)MAX_DIST(s) : NIL;
143
144 mov esi,[ebp+dep_w_size]
145 sub esi,MIN_LOOKAHEAD
146 ; here esi = MAX_DIST(s)
147 sub ecx,esi
148 ja nodist
149 xor ecx,ecx
150nodist:
151 mov limit,ecx
152
153
154
155
156 mov eax,[ebp+dep_prev]
157 mov prev,eax
158
159 mov ebx,dword ptr [ebp+dep_match_start]
160 mov bp,scan_start
161 mov edx,cur_match
162 mov match_start,ebx
163
164 mov bx,scan_end
165 mov eax,window
166 mov edi,eax
167 add edi,best_len
168 mov esi,prev
169 dec edi
170 mov windowlen,edi
171
172 jmp beginloop2
173 align 4
174
175; here, in the loop
176;;;; eax = chain_length
177; edx = dx = cur_match
178; ecx = limit
179; bx = scan_end
180; bp = scan_start
181; edi = windowlen (window + best_len)
182; esi = prev
183
184
185;// here; eax <=16
186normalbeg0add16:
187 add chain_length,16
188 jz exitloop
189normalbeg0:
190 cmp word ptr[edi+edx-0],bx
191 je normalbeg2
192 and edx,7fffh
193 mov dx,word ptr[esi+edx*2]
194 cmp ecx,edx
195 jnb exitloop
196 dec chain_length
197 jnz normalbeg0
198;jnbexitloopshort1:
199 jmp exitloop
200
201contloop3:
202 mov edi,windowlen
203
204; cur_match = prev[cur_match & wmask]
205 and edx,7fffh
206 mov dx,word ptr[esi+edx*2]
207; if cur_match > limit, go to exitloop
208 cmp ecx,edx
209jnbexitloopshort1:
210 jnb exitloop
211; if --chain_length != 0, go to exitloop
212
213beginloop2:
214 sub chain_length,16+1
215 jna normalbeg0add16
216
217do16:
218 cmp word ptr[edi+edx],bx
219 je normalbeg2dc0
220
221maccn MACRO lab
222 and edx,7fffh
223 mov dx,word ptr[esi+edx*2]
224 cmp ecx,edx
225 jnb exitloop
226 cmp word ptr[edi+edx-0],bx
227 je lab
228 ENDM
229
230rcontloop0:
231 maccn normalbeg2dc1
232
233rcontloop1:
234 maccn normalbeg2dc2
235
236rcontloop2:
237 maccn normalbeg2dc3
238
239rcontloop3:
240 maccn normalbeg2dc4
241
242rcontloop4:
243 maccn normalbeg2dc5
244
245rcontloop5:
246 maccn normalbeg2dc6
247
248rcontloop6:
249 maccn normalbeg2dc7
250
251rcontloop7:
252 maccn normalbeg2dc8
253
254rcontloop8:
255 maccn normalbeg2dc9
256
257rcontloop9:
258 maccn normalbeg2dc10
259
260rcontloop10:
261 maccn normalbeg2dc11
262
263rcontloop11:
264 maccn short normalbeg2dc12
265
266rcontloop12:
267 maccn short normalbeg2dc13
268
269rcontloop13:
270 maccn short normalbeg2dc14
271
272rcontloop14:
273 maccn short normalbeg2dc15
274
275rcontloop15:
276 and edx,7fffh
277 mov dx,word ptr[esi+edx*2]
278 cmp ecx,edx
279 jnb short exitloopshort
280
281 sub chain_length,16
282 ja do16
283 jmp normalbeg0add16
284
285;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
286
287exitloopshort:
288 jmp exitloop
289
290normbeg MACRO rcontlab,valsub
291 cmp bp,word ptr[eax+edx]
292 jne rcontlab
293 add chain_length,16-valsub
294 jmp iseq
295 ENDM
296
297normalbeg2dc12:
298 normbeg rcontloop12,12
299
300normalbeg2dc13:
301 normbeg rcontloop13,13
302
303normalbeg2dc14:
304 normbeg rcontloop14,14
305
306normalbeg2dc15:
307 normbeg rcontloop15,15
308
309normalbeg2dc11:
310 normbeg rcontloop11,11
311
312normalbeg2dc10:
313 normbeg rcontloop10,10
314
315
316normalbeg2dc9:
317 normbeg rcontloop9,9
318
319normalbeg2dc8:
320 normbeg rcontloop8,8
321
322normalbeg2dc7:
323 normbeg rcontloop7,7
324
325normalbeg2dc5:
326 normbeg rcontloop5,5
327
328
329
330
331
332normalbeg2dc6:
333 normbeg rcontloop6,6
334
335normalbeg2dc4:
336 normbeg rcontloop4,4
337
338normalbeg2dc3:
339 normbeg rcontloop3,3
340
341normalbeg2dc2:
342 normbeg rcontloop2,2
343
344normalbeg2dc1:
345 normbeg rcontloop1,1
346
347normalbeg2dc0:
348 normbeg rcontloop0,0
349
350
351; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
352
353normalbeg2:
354
355 ; 10 nop here take 10% time
356 mov edi,window
357 ;mov chain_length,eax ; now, we need eax...
358
359 cmp bp,word ptr[edi+edx]
360 jne contloop3 ; if *(ushf*)match != scan_start, continue
361
362iseq:
363
364 mov edi,eax
365 mov esi,scanrp ; esi = scan
366 add edi,edx ; edi = window + cur_match = match
367
368
369 mov eax,[esi+3] ; compare manually dword at match+3
370 xor eax,[edi+3] ; and scan +3
371
372 jz begincompare ; if equal, go to long compare
373
374 ; we will determine the unmatch byte and calculate len (in esi)
375 or al,al
376 je eq1rr
377 mov esi,3
378 jmp trfinval
379eq1rr:
380 or ax,ax
381 je eq1
382
383 mov esi,4
384 jmp trfinval
385eq1:
386 shl eax,8
387 jz eq11
388 mov esi,5
389 jmp trfinval
390eq11:
391 mov esi,6
392 jmp trfinval
393
394begincompare:
395 ; here we now scan and match begin same
396 add edi,6
397 add esi,6
398 mov ecx,(MAX_MATCH-(2+4))/4 ;//; scan for at most MAX_MATCH bytes
399 repe cmpsd ;//; loop until mismatch
400
401 je trfin ; go to trfin if not unmatch
402 ; we determine the unmatch byte
403 sub esi,4
404 mov eax,[edi-4]
405 xor eax,[esi]
406 or al,al
407
408 jnz trfin
409 inc esi
410
411 or ax,ax
412 jnz trfin
413 inc esi
414
415 shl eax,8
416 jnz trfin
417 inc esi
418
419trfin:
420 sub esi,scanrp ; esi = len
421trfinval:
422 cmp esi,best_len ; if len <= best_len, go contloop2
423 jbe contloop2
424
425 mov best_len,esi ; len become best_len
426
427 mov match_start,edx
428 cmp esi,nice_match ;//; if esi >= nice_match, exit
429 mov ecx,scanrp
430 jae exitloop
431 add esi,window
432 add ecx,best_len
433 dec esi
434 mov windowlen,esi
435 mov bx,[ecx-1]
436
437
438; now we restore eax, ecx and esi, for the big loop :
439contloop2:
440 mov esi,prev
441 mov ecx,limit
442 ;mov eax,chain_length
443 mov eax,window
444 jmp contloop3
445
446exitloop:
447 mov ebx,match_start
448 mov ebp,str_s
449 mov dword ptr [ebp+dep_match_start],ebx
450 mov eax,best_len
451 add esp,NbStackAdd
452
453
454 pop ebx
455 pop esi
456 pop edi
457 pop ebp
458 ret
459
460_longest_match_asm7fff endp
461
462_TEXT ends
463end
464 \ No newline at end of file