diff options
Diffstat (limited to 'contrib/asm386/gvmat32.asm')
-rw-r--r-- | contrib/asm386/gvmat32.asm | 464 |
1 files changed, 464 insertions, 0 deletions
diff --git a/contrib/asm386/gvmat32.asm b/contrib/asm386/gvmat32.asm new file mode 100644 index 0000000..b175871 --- /dev/null +++ b/contrib/asm386/gvmat32.asm | |||
@@ -0,0 +1,464 @@ | |||
1 | ; | ||
2 | ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 | ||
3 | ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. | ||
4 | ; File written by Gilles Vollant, by modifiying the longest_match | ||
5 | ; from Jean-loup Gailly in deflate.c | ||
6 | ; It need wmask == 0x7fff | ||
7 | ; (assembly code is faster with a fixed wmask) | ||
8 | ; | ||
9 | ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) | ||
10 | ; I compile with : "ml /coff /Zi /c gvmat32.asm" | ||
11 | ; | ||
12 | ; uInt longest_match_gvasm(IPos cur_match,int* match_start_ptr,uInt scan_end, | ||
13 | ; uInt scan_start,ush* prev,uch* window,int best_len, | ||
14 | ; IPos limit,uInt chain_length,uch* scanrp, | ||
15 | ; uInt nice_match); | ||
16 | |||
17 | ;uInt longest_match(s, cur_match) | ||
18 | ; deflate_state *s; | ||
19 | ; IPos cur_match; /* current match */ | ||
20 | |||
21 | NbStack equ 76 | ||
22 | cur_match equ dword ptr[esp+NbStack-0] | ||
23 | str_s equ dword ptr[esp+NbStack-4] | ||
24 | ; 5 dword on top (ret,ebp,esi,edi,ebx) | ||
25 | adrret equ dword ptr[esp+NbStack-8] | ||
26 | pushebp equ dword ptr[esp+NbStack-12] | ||
27 | pushedi equ dword ptr[esp+NbStack-16] | ||
28 | pushesi equ dword ptr[esp+NbStack-20] | ||
29 | pushebx equ dword ptr[esp+NbStack-24] | ||
30 | |||
31 | chain_length equ dword ptr [esp+NbStack-28] | ||
32 | limit equ dword ptr [esp+NbStack-32] | ||
33 | best_len equ dword ptr [esp+NbStack-36] | ||
34 | window equ dword ptr [esp+NbStack-40] | ||
35 | prev equ dword ptr [esp+NbStack-44] | ||
36 | scan_start equ word ptr [esp+NbStack-48] | ||
37 | scan_end equ word ptr [esp+NbStack-52] | ||
38 | match_start_ptr equ dword ptr [esp+NbStack-56] | ||
39 | nice_match equ dword ptr [esp+NbStack-60] | ||
40 | scanrp equ dword ptr [esp+NbStack-64] | ||
41 | |||
42 | windowlen equ dword ptr [esp+NbStack-68] | ||
43 | match_start equ dword ptr [esp+NbStack-72] | ||
44 | strend equ dword ptr [esp+NbStack-76] | ||
45 | NbStackAdd equ (76-24) | ||
46 | |||
47 | .386p | ||
48 | |||
49 | name gvmatch | ||
50 | .MODEL FLAT | ||
51 | |||
52 | |||
53 | @lmtype TYPEDEF PROTO C :PTR , :SDWORD | ||
54 | longest_match_c PROTO @lmtype | ||
55 | |||
56 | dep_max_chain_length equ 70h | ||
57 | dep_window equ 2ch | ||
58 | dep_strstart equ 60h | ||
59 | dep_prev_length equ 6ch | ||
60 | dep_nice_match equ 84h | ||
61 | dep_w_size equ 20h | ||
62 | dep_prev equ 34h | ||
63 | dep_w_mask equ 28h | ||
64 | dep_good_match equ 80h | ||
65 | dep_match_start equ 64h | ||
66 | dep_lookahead equ 68h | ||
67 | |||
68 | |||
69 | _TEXT segment | ||
70 | public _longest_match_asm7fff | ||
71 | |||
72 | MAX_MATCH equ 258 | ||
73 | MIN_MATCH equ 3 | ||
74 | MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) | ||
75 | |||
76 | ; initialize or check the variables used in match.asm. | ||
77 | |||
78 | |||
79 | ; ----------------------------------------------------------------------- | ||
80 | ; Set match_start to the longest match starting at the given string and | ||
81 | ; return its length. Matches shorter or equal to prev_length are discarded, | ||
82 | ; in which case the result is equal to prev_length and match_start is | ||
83 | ; garbage. | ||
84 | ; IN assertions: cur_match is the head of the hash chain for the current | ||
85 | ; string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 | ||
86 | |||
87 | ; int longest_match(cur_match) | ||
88 | |||
89 | _longest_match_asm7fff proc near | ||
90 | |||
91 | |||
92 | |||
93 | ; return address | ||
94 | |||
95 | mov eax,[esp+4] | ||
96 | mov bx,[eax+dep_w_mask] | ||
97 | cmp bx,7fffh | ||
98 | jnz longest_match_c | ||
99 | |||
100 | push ebp | ||
101 | push edi | ||
102 | push esi | ||
103 | push ebx | ||
104 | |||
105 | sub esp,NbStackAdd | ||
106 | |||
107 | ;//mov ebp,str_s | ||
108 | mov ebp,eax | ||
109 | |||
110 | mov eax,[ebp+dep_max_chain_length] | ||
111 | mov ebx,[ebp+dep_prev_length] | ||
112 | cmp [ebp+dep_good_match],ebx ; if prev_length>=good_match chain_length >>= 2 | ||
113 | ja noshr | ||
114 | shr eax,2 | ||
115 | noshr: | ||
116 | mov edi,[ebp+dep_nice_match] | ||
117 | mov chain_length,eax | ||
118 | mov edx,[ebp+dep_lookahead] | ||
119 | cmp edx,edi | ||
120 | ;if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | ||
121 | jae nolookaheadnicematch | ||
122 | mov edi,edx | ||
123 | nolookaheadnicematch: | ||
124 | mov best_len,ebx | ||
125 | |||
126 | |||
127 | mov esi,[ebp+dep_window] | ||
128 | mov ecx,[ebp+dep_strstart] | ||
129 | mov window,esi | ||
130 | |||
131 | mov nice_match,edi | ||
132 | add esi,ecx | ||
133 | mov scanrp,esi | ||
134 | mov ax,word ptr [esi] | ||
135 | mov bx,word ptr [esi+ebx-1] | ||
136 | add esi,MAX_MATCH-1 | ||
137 | mov scan_start,ax | ||
138 | mov strend,esi | ||
139 | mov scan_end,bx | ||
140 | |||
141 | ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? | ||
142 | ; s->strstart - (IPos)MAX_DIST(s) : NIL; | ||
143 | |||
144 | mov esi,[ebp+dep_w_size] | ||
145 | sub esi,MIN_LOOKAHEAD | ||
146 | ; here esi = MAX_DIST(s) | ||
147 | sub ecx,esi | ||
148 | ja nodist | ||
149 | xor ecx,ecx | ||
150 | nodist: | ||
151 | mov limit,ecx | ||
152 | |||
153 | |||
154 | |||
155 | |||
156 | mov eax,[ebp+dep_prev] | ||
157 | mov prev,eax | ||
158 | |||
159 | mov ebx,dword ptr [ebp+dep_match_start] | ||
160 | mov bp,scan_start | ||
161 | mov edx,cur_match | ||
162 | mov match_start,ebx | ||
163 | |||
164 | mov bx,scan_end | ||
165 | mov eax,window | ||
166 | mov edi,eax | ||
167 | add edi,best_len | ||
168 | mov esi,prev | ||
169 | dec edi | ||
170 | mov windowlen,edi | ||
171 | |||
172 | jmp beginloop2 | ||
173 | align 4 | ||
174 | |||
175 | ; here, in the loop | ||
176 | ;;;; eax = chain_length | ||
177 | ; edx = dx = cur_match | ||
178 | ; ecx = limit | ||
179 | ; bx = scan_end | ||
180 | ; bp = scan_start | ||
181 | ; edi = windowlen (window + best_len) | ||
182 | ; esi = prev | ||
183 | |||
184 | |||
185 | ;// here; eax <=16 | ||
186 | normalbeg0add16: | ||
187 | add chain_length,16 | ||
188 | jz exitloop | ||
189 | normalbeg0: | ||
190 | cmp word ptr[edi+edx-0],bx | ||
191 | je normalbeg2 | ||
192 | and edx,7fffh | ||
193 | mov dx,word ptr[esi+edx*2] | ||
194 | cmp ecx,edx | ||
195 | jnb exitloop | ||
196 | dec chain_length | ||
197 | jnz normalbeg0 | ||
198 | ;jnbexitloopshort1: | ||
199 | jmp exitloop | ||
200 | |||
201 | contloop3: | ||
202 | mov edi,windowlen | ||
203 | |||
204 | ; cur_match = prev[cur_match & wmask] | ||
205 | and edx,7fffh | ||
206 | mov dx,word ptr[esi+edx*2] | ||
207 | ; if cur_match > limit, go to exitloop | ||
208 | cmp ecx,edx | ||
209 | jnbexitloopshort1: | ||
210 | jnb exitloop | ||
211 | ; if --chain_length != 0, go to exitloop | ||
212 | |||
213 | beginloop2: | ||
214 | sub chain_length,16+1 | ||
215 | jna normalbeg0add16 | ||
216 | |||
217 | do16: | ||
218 | cmp word ptr[edi+edx],bx | ||
219 | je normalbeg2dc0 | ||
220 | |||
221 | maccn MACRO lab | ||
222 | and edx,7fffh | ||
223 | mov dx,word ptr[esi+edx*2] | ||
224 | cmp ecx,edx | ||
225 | jnb exitloop | ||
226 | cmp word ptr[edi+edx-0],bx | ||
227 | je lab | ||
228 | ENDM | ||
229 | |||
230 | rcontloop0: | ||
231 | maccn normalbeg2dc1 | ||
232 | |||
233 | rcontloop1: | ||
234 | maccn normalbeg2dc2 | ||
235 | |||
236 | rcontloop2: | ||
237 | maccn normalbeg2dc3 | ||
238 | |||
239 | rcontloop3: | ||
240 | maccn normalbeg2dc4 | ||
241 | |||
242 | rcontloop4: | ||
243 | maccn normalbeg2dc5 | ||
244 | |||
245 | rcontloop5: | ||
246 | maccn normalbeg2dc6 | ||
247 | |||
248 | rcontloop6: | ||
249 | maccn normalbeg2dc7 | ||
250 | |||
251 | rcontloop7: | ||
252 | maccn normalbeg2dc8 | ||
253 | |||
254 | rcontloop8: | ||
255 | maccn normalbeg2dc9 | ||
256 | |||
257 | rcontloop9: | ||
258 | maccn normalbeg2dc10 | ||
259 | |||
260 | rcontloop10: | ||
261 | maccn normalbeg2dc11 | ||
262 | |||
263 | rcontloop11: | ||
264 | maccn short normalbeg2dc12 | ||
265 | |||
266 | rcontloop12: | ||
267 | maccn short normalbeg2dc13 | ||
268 | |||
269 | rcontloop13: | ||
270 | maccn short normalbeg2dc14 | ||
271 | |||
272 | rcontloop14: | ||
273 | maccn short normalbeg2dc15 | ||
274 | |||
275 | rcontloop15: | ||
276 | and edx,7fffh | ||
277 | mov dx,word ptr[esi+edx*2] | ||
278 | cmp ecx,edx | ||
279 | jnb short exitloopshort | ||
280 | |||
281 | sub chain_length,16 | ||
282 | ja do16 | ||
283 | jmp normalbeg0add16 | ||
284 | |||
285 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
286 | |||
287 | exitloopshort: | ||
288 | jmp exitloop | ||
289 | |||
290 | normbeg MACRO rcontlab,valsub | ||
291 | cmp bp,word ptr[eax+edx] | ||
292 | jne rcontlab | ||
293 | add chain_length,16-valsub | ||
294 | jmp iseq | ||
295 | ENDM | ||
296 | |||
297 | normalbeg2dc12: | ||
298 | normbeg rcontloop12,12 | ||
299 | |||
300 | normalbeg2dc13: | ||
301 | normbeg rcontloop13,13 | ||
302 | |||
303 | normalbeg2dc14: | ||
304 | normbeg rcontloop14,14 | ||
305 | |||
306 | normalbeg2dc15: | ||
307 | normbeg rcontloop15,15 | ||
308 | |||
309 | normalbeg2dc11: | ||
310 | normbeg rcontloop11,11 | ||
311 | |||
312 | normalbeg2dc10: | ||
313 | normbeg rcontloop10,10 | ||
314 | |||
315 | |||
316 | normalbeg2dc9: | ||
317 | normbeg rcontloop9,9 | ||
318 | |||
319 | normalbeg2dc8: | ||
320 | normbeg rcontloop8,8 | ||
321 | |||
322 | normalbeg2dc7: | ||
323 | normbeg rcontloop7,7 | ||
324 | |||
325 | normalbeg2dc5: | ||
326 | normbeg rcontloop5,5 | ||
327 | |||
328 | |||
329 | |||
330 | |||
331 | |||
332 | normalbeg2dc6: | ||
333 | normbeg rcontloop6,6 | ||
334 | |||
335 | normalbeg2dc4: | ||
336 | normbeg rcontloop4,4 | ||
337 | |||
338 | normalbeg2dc3: | ||
339 | normbeg rcontloop3,3 | ||
340 | |||
341 | normalbeg2dc2: | ||
342 | normbeg rcontloop2,2 | ||
343 | |||
344 | normalbeg2dc1: | ||
345 | normbeg rcontloop1,1 | ||
346 | |||
347 | normalbeg2dc0: | ||
348 | normbeg rcontloop0,0 | ||
349 | |||
350 | |||
351 | ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end | ||
352 | |||
353 | normalbeg2: | ||
354 | |||
355 | ; 10 nop here take 10% time | ||
356 | mov edi,window | ||
357 | ;mov chain_length,eax ; now, we need eax... | ||
358 | |||
359 | cmp bp,word ptr[edi+edx] | ||
360 | jne contloop3 ; if *(ushf*)match != scan_start, continue | ||
361 | |||
362 | iseq: | ||
363 | |||
364 | mov edi,eax | ||
365 | mov esi,scanrp ; esi = scan | ||
366 | add edi,edx ; edi = window + cur_match = match | ||
367 | |||
368 | |||
369 | mov eax,[esi+3] ; compare manually dword at match+3 | ||
370 | xor eax,[edi+3] ; and scan +3 | ||
371 | |||
372 | jz begincompare ; if equal, go to long compare | ||
373 | |||
374 | ; we will determine the unmatch byte and calculate len (in esi) | ||
375 | or al,al | ||
376 | je eq1rr | ||
377 | mov esi,3 | ||
378 | jmp trfinval | ||
379 | eq1rr: | ||
380 | or ax,ax | ||
381 | je eq1 | ||
382 | |||
383 | mov esi,4 | ||
384 | jmp trfinval | ||
385 | eq1: | ||
386 | shl eax,8 | ||
387 | jz eq11 | ||
388 | mov esi,5 | ||
389 | jmp trfinval | ||
390 | eq11: | ||
391 | mov esi,6 | ||
392 | jmp trfinval | ||
393 | |||
394 | begincompare: | ||
395 | ; here we now scan and match begin same | ||
396 | add edi,6 | ||
397 | add esi,6 | ||
398 | mov ecx,(MAX_MATCH-(2+4))/4 ;//; scan for at most MAX_MATCH bytes | ||
399 | repe cmpsd ;//; loop until mismatch | ||
400 | |||
401 | je trfin ; go to trfin if not unmatch | ||
402 | ; we determine the unmatch byte | ||
403 | sub esi,4 | ||
404 | mov eax,[edi-4] | ||
405 | xor eax,[esi] | ||
406 | or al,al | ||
407 | |||
408 | jnz trfin | ||
409 | inc esi | ||
410 | |||
411 | or ax,ax | ||
412 | jnz trfin | ||
413 | inc esi | ||
414 | |||
415 | shl eax,8 | ||
416 | jnz trfin | ||
417 | inc esi | ||
418 | |||
419 | trfin: | ||
420 | sub esi,scanrp ; esi = len | ||
421 | trfinval: | ||
422 | cmp esi,best_len ; if len <= best_len, go contloop2 | ||
423 | jbe contloop2 | ||
424 | |||
425 | mov best_len,esi ; len become best_len | ||
426 | |||
427 | mov match_start,edx | ||
428 | cmp esi,nice_match ;//; if esi >= nice_match, exit | ||
429 | mov ecx,scanrp | ||
430 | jae exitloop | ||
431 | add esi,window | ||
432 | add ecx,best_len | ||
433 | dec esi | ||
434 | mov windowlen,esi | ||
435 | mov bx,[ecx-1] | ||
436 | |||
437 | |||
438 | ; now we restore eax, ecx and esi, for the big loop : | ||
439 | contloop2: | ||
440 | mov esi,prev | ||
441 | mov ecx,limit | ||
442 | ;mov eax,chain_length | ||
443 | mov eax,window | ||
444 | jmp contloop3 | ||
445 | |||
446 | exitloop: | ||
447 | mov ebx,match_start | ||
448 | mov ebp,str_s | ||
449 | mov dword ptr [ebp+dep_match_start],ebx | ||
450 | mov eax,best_len | ||
451 | add esp,NbStackAdd | ||
452 | |||
453 | |||
454 | pop ebx | ||
455 | pop esi | ||
456 | pop edi | ||
457 | pop ebp | ||
458 | ret | ||
459 | |||
460 | _longest_match_asm7fff endp | ||
461 | |||
462 | _TEXT ends | ||
463 | end | ||
464 | \ No newline at end of file | ||