diff options
Diffstat (limited to 'contrib/asm386/gvmat32.asm')
-rw-r--r-- | contrib/asm386/gvmat32.asm | 559 |
1 files changed, 0 insertions, 559 deletions
diff --git a/contrib/asm386/gvmat32.asm b/contrib/asm386/gvmat32.asm deleted file mode 100644 index 28d527f..0000000 --- a/contrib/asm386/gvmat32.asm +++ /dev/null | |||
@@ -1,559 +0,0 @@ | |||
1 | ; | ||
2 | ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 | ||
3 | ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. | ||
4 | ; File written by Gilles Vollant, by modifiying the longest_match | ||
5 | ; from Jean-loup Gailly in deflate.c | ||
6 | ; It need wmask == 0x7fff | ||
7 | ; (assembly code is faster with a fixed wmask) | ||
8 | ; | ||
9 | ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) | ||
10 | ; I compile with : "ml /coff /Zi /c gvmat32.asm" | ||
11 | ; | ||
12 | |||
13 | ;uInt longest_match_7fff(s, cur_match) | ||
14 | ; deflate_state *s; | ||
15 | ; IPos cur_match; /* current match */ | ||
16 | |||
17 | NbStack equ 76 | ||
18 | cur_match equ dword ptr[esp+NbStack-0] | ||
19 | str_s equ dword ptr[esp+NbStack-4] | ||
20 | ; 5 dword on top (ret,ebp,esi,edi,ebx) | ||
21 | adrret equ dword ptr[esp+NbStack-8] | ||
22 | pushebp equ dword ptr[esp+NbStack-12] | ||
23 | pushedi equ dword ptr[esp+NbStack-16] | ||
24 | pushesi equ dword ptr[esp+NbStack-20] | ||
25 | pushebx equ dword ptr[esp+NbStack-24] | ||
26 | |||
27 | chain_length equ dword ptr [esp+NbStack-28] | ||
28 | limit equ dword ptr [esp+NbStack-32] | ||
29 | best_len equ dword ptr [esp+NbStack-36] | ||
30 | window equ dword ptr [esp+NbStack-40] | ||
31 | prev equ dword ptr [esp+NbStack-44] | ||
32 | scan_start equ word ptr [esp+NbStack-48] | ||
33 | wmask equ dword ptr [esp+NbStack-52] | ||
34 | match_start_ptr equ dword ptr [esp+NbStack-56] | ||
35 | nice_match equ dword ptr [esp+NbStack-60] | ||
36 | scan equ dword ptr [esp+NbStack-64] | ||
37 | |||
38 | windowlen equ dword ptr [esp+NbStack-68] | ||
39 | match_start equ dword ptr [esp+NbStack-72] | ||
40 | strend equ dword ptr [esp+NbStack-76] | ||
41 | NbStackAdd equ (NbStack-24) | ||
42 | |||
43 | .386p | ||
44 | |||
45 | name gvmatch | ||
46 | .MODEL FLAT | ||
47 | |||
48 | |||
49 | |||
50 | ; all the +4 offsets are due to the addition of pending_buf_size (in zlib | ||
51 | ; in the deflate_state structure since the asm code was first written | ||
52 | ; (if you compile with zlib 1.0.4 or older, remove the +4). | ||
53 | ; Note : these value are good with a 8 bytes boundary pack structure | ||
54 | dep_chain_length equ 70h+4 | ||
55 | dep_window equ 2ch+4 | ||
56 | dep_strstart equ 60h+4 | ||
57 | dep_prev_length equ 6ch+4 | ||
58 | dep_nice_match equ 84h+4 | ||
59 | dep_w_size equ 20h+4 | ||
60 | dep_prev equ 34h+4 | ||
61 | dep_w_mask equ 28h+4 | ||
62 | dep_good_match equ 80h+4 | ||
63 | dep_match_start equ 64h+4 | ||
64 | dep_lookahead equ 68h+4 | ||
65 | |||
66 | |||
67 | _TEXT segment | ||
68 | |||
69 | IFDEF NOUNDERLINE | ||
70 | public longest_match_7fff | ||
71 | ; public match_init | ||
72 | ELSE | ||
73 | public _longest_match_7fff | ||
74 | ; public _match_init | ||
75 | ENDIF | ||
76 | |||
77 | MAX_MATCH equ 258 | ||
78 | MIN_MATCH equ 3 | ||
79 | MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) | ||
80 | |||
81 | |||
82 | |||
83 | IFDEF NOUNDERLINE | ||
84 | ;match_init proc near | ||
85 | ; ret | ||
86 | ;match_init endp | ||
87 | ELSE | ||
88 | ;_match_init proc near | ||
89 | ; ret | ||
90 | ;_match_init endp | ||
91 | ENDIF | ||
92 | |||
93 | |||
94 | IFDEF NOUNDERLINE | ||
95 | longest_match_7fff proc near | ||
96 | ELSE | ||
97 | _longest_match_7fff proc near | ||
98 | ENDIF | ||
99 | |||
100 | mov edx,[esp+4] | ||
101 | |||
102 | |||
103 | |||
104 | push ebp | ||
105 | push edi | ||
106 | push esi | ||
107 | push ebx | ||
108 | |||
109 | sub esp,NbStackAdd | ||
110 | |||
111 | ; initialize or check the variables used in match.asm. | ||
112 | mov ebp,edx | ||
113 | |||
114 | ; chain_length = s->max_chain_length | ||
115 | ; if (prev_length>=good_match) chain_length >>= 2 | ||
116 | mov edx,[ebp+dep_chain_length] | ||
117 | mov ebx,[ebp+dep_prev_length] | ||
118 | cmp [ebp+dep_good_match],ebx | ||
119 | ja noshr | ||
120 | shr edx,2 | ||
121 | noshr: | ||
122 | ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop | ||
123 | inc edx | ||
124 | mov edi,[ebp+dep_nice_match] | ||
125 | mov chain_length,edx | ||
126 | mov eax,[ebp+dep_lookahead] | ||
127 | cmp eax,edi | ||
128 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | ||
129 | jae nolookaheadnicematch | ||
130 | mov edi,eax | ||
131 | nolookaheadnicematch: | ||
132 | ; best_len = s->prev_length | ||
133 | mov best_len,ebx | ||
134 | |||
135 | ; window = s->window | ||
136 | mov esi,[ebp+dep_window] | ||
137 | mov ecx,[ebp+dep_strstart] | ||
138 | mov window,esi | ||
139 | |||
140 | mov nice_match,edi | ||
141 | ; scan = window + strstart | ||
142 | add esi,ecx | ||
143 | mov scan,esi | ||
144 | ; dx = *window | ||
145 | mov dx,word ptr [esi] | ||
146 | ; bx = *(window+best_len-1) | ||
147 | mov bx,word ptr [esi+ebx-1] | ||
148 | add esi,MAX_MATCH-1 | ||
149 | ; scan_start = *scan | ||
150 | mov scan_start,dx | ||
151 | ; strend = scan + MAX_MATCH-1 | ||
152 | mov strend,esi | ||
153 | ; bx = scan_end = *(window+best_len-1) | ||
154 | |||
155 | ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? | ||
156 | ; s->strstart - (IPos)MAX_DIST(s) : NIL; | ||
157 | |||
158 | mov esi,[ebp+dep_w_size] | ||
159 | sub esi,MIN_LOOKAHEAD | ||
160 | ; here esi = MAX_DIST(s) | ||
161 | sub ecx,esi | ||
162 | ja nodist | ||
163 | xor ecx,ecx | ||
164 | nodist: | ||
165 | mov limit,ecx | ||
166 | |||
167 | ; prev = s->prev | ||
168 | mov edx,[ebp+dep_prev] | ||
169 | mov prev,edx | ||
170 | |||
171 | ; | ||
172 | mov edx,dword ptr [ebp+dep_match_start] | ||
173 | mov bp,scan_start | ||
174 | mov eax,cur_match | ||
175 | mov match_start,edx | ||
176 | |||
177 | mov edx,window | ||
178 | mov edi,edx | ||
179 | add edi,best_len | ||
180 | mov esi,prev | ||
181 | dec edi | ||
182 | ; windowlen = window + best_len -1 | ||
183 | mov windowlen,edi | ||
184 | |||
185 | jmp beginloop2 | ||
186 | align 4 | ||
187 | |||
188 | ; here, in the loop | ||
189 | ; eax = ax = cur_match | ||
190 | ; ecx = limit | ||
191 | ; bx = scan_end | ||
192 | ; bp = scan_start | ||
193 | ; edi = windowlen (window + best_len -1) | ||
194 | ; esi = prev | ||
195 | |||
196 | |||
197 | ;// here; chain_length <=16 | ||
198 | normalbeg0add16: | ||
199 | add chain_length,16 | ||
200 | jz exitloop | ||
201 | normalbeg0: | ||
202 | cmp word ptr[edi+eax],bx | ||
203 | je normalbeg2noroll | ||
204 | rcontlabnoroll: | ||
205 | ; cur_match = prev[cur_match & wmask] | ||
206 | and eax,7fffh | ||
207 | mov ax,word ptr[esi+eax*2] | ||
208 | ; if cur_match > limit, go to exitloop | ||
209 | cmp ecx,eax | ||
210 | jnb exitloop | ||
211 | ; if --chain_length != 0, go to exitloop | ||
212 | dec chain_length | ||
213 | jnz normalbeg0 | ||
214 | jmp exitloop | ||
215 | |||
216 | normalbeg2noroll: | ||
217 | ; if (scan_start==*(cur_match+window)) goto normalbeg2 | ||
218 | cmp bp,word ptr[edx+eax] | ||
219 | jne rcontlabnoroll | ||
220 | jmp normalbeg2 | ||
221 | |||
222 | contloop3: | ||
223 | mov edi,windowlen | ||
224 | |||
225 | ; cur_match = prev[cur_match & wmask] | ||
226 | and eax,7fffh | ||
227 | mov ax,word ptr[esi+eax*2] | ||
228 | ; if cur_match > limit, go to exitloop | ||
229 | cmp ecx,eax | ||
230 | jnbexitloopshort1: | ||
231 | jnb exitloop | ||
232 | ; if --chain_length != 0, go to exitloop | ||
233 | |||
234 | |||
235 | ; begin the main loop | ||
236 | beginloop2: | ||
237 | sub chain_length,16+1 | ||
238 | ; if chain_length <=16, don't use the unrolled loop | ||
239 | jna normalbeg0add16 | ||
240 | |||
241 | do16: | ||
242 | cmp word ptr[edi+eax],bx | ||
243 | je normalbeg2dc0 | ||
244 | |||
245 | maccn MACRO lab | ||
246 | and eax,7fffh | ||
247 | mov ax,word ptr[esi+eax*2] | ||
248 | cmp ecx,eax | ||
249 | jnb exitloop | ||
250 | cmp word ptr[edi+eax],bx | ||
251 | je lab | ||
252 | ENDM | ||
253 | |||
254 | rcontloop0: | ||
255 | maccn normalbeg2dc1 | ||
256 | |||
257 | rcontloop1: | ||
258 | maccn normalbeg2dc2 | ||
259 | |||
260 | rcontloop2: | ||
261 | maccn normalbeg2dc3 | ||
262 | |||
263 | rcontloop3: | ||
264 | maccn normalbeg2dc4 | ||
265 | |||
266 | rcontloop4: | ||
267 | maccn normalbeg2dc5 | ||
268 | |||
269 | rcontloop5: | ||
270 | maccn normalbeg2dc6 | ||
271 | |||
272 | rcontloop6: | ||
273 | maccn normalbeg2dc7 | ||
274 | |||
275 | rcontloop7: | ||
276 | maccn normalbeg2dc8 | ||
277 | |||
278 | rcontloop8: | ||
279 | maccn normalbeg2dc9 | ||
280 | |||
281 | rcontloop9: | ||
282 | maccn normalbeg2dc10 | ||
283 | |||
284 | rcontloop10: | ||
285 | maccn short normalbeg2dc11 | ||
286 | |||
287 | rcontloop11: | ||
288 | maccn short normalbeg2dc12 | ||
289 | |||
290 | rcontloop12: | ||
291 | maccn short normalbeg2dc13 | ||
292 | |||
293 | rcontloop13: | ||
294 | maccn short normalbeg2dc14 | ||
295 | |||
296 | rcontloop14: | ||
297 | maccn short normalbeg2dc15 | ||
298 | |||
299 | rcontloop15: | ||
300 | and eax,7fffh | ||
301 | mov ax,word ptr[esi+eax*2] | ||
302 | cmp ecx,eax | ||
303 | jnb exitloop | ||
304 | |||
305 | sub chain_length,16 | ||
306 | ja do16 | ||
307 | jmp normalbeg0add16 | ||
308 | |||
309 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||
310 | |||
311 | normbeg MACRO rcontlab,valsub | ||
312 | ; if we are here, we know that *(match+best_len-1) == scan_end | ||
313 | cmp bp,word ptr[edx+eax] | ||
314 | ; if (match != scan_start) goto rcontlab | ||
315 | jne rcontlab | ||
316 | ; calculate the good chain_length, and we'll compare scan and match string | ||
317 | add chain_length,16-valsub | ||
318 | jmp iseq | ||
319 | ENDM | ||
320 | |||
321 | |||
322 | normalbeg2dc11: | ||
323 | normbeg rcontloop11,11 | ||
324 | |||
325 | normalbeg2dc12: | ||
326 | normbeg short rcontloop12,12 | ||
327 | |||
328 | normalbeg2dc13: | ||
329 | normbeg short rcontloop13,13 | ||
330 | |||
331 | normalbeg2dc14: | ||
332 | normbeg short rcontloop14,14 | ||
333 | |||
334 | normalbeg2dc15: | ||
335 | normbeg short rcontloop15,15 | ||
336 | |||
337 | normalbeg2dc10: | ||
338 | normbeg rcontloop10,10 | ||
339 | |||
340 | normalbeg2dc9: | ||
341 | normbeg rcontloop9,9 | ||
342 | |||
343 | normalbeg2dc8: | ||
344 | normbeg rcontloop8,8 | ||
345 | |||
346 | normalbeg2dc7: | ||
347 | normbeg rcontloop7,7 | ||
348 | |||
349 | normalbeg2dc6: | ||
350 | normbeg rcontloop6,6 | ||
351 | |||
352 | normalbeg2dc5: | ||
353 | normbeg rcontloop5,5 | ||
354 | |||
355 | normalbeg2dc4: | ||
356 | normbeg rcontloop4,4 | ||
357 | |||
358 | normalbeg2dc3: | ||
359 | normbeg rcontloop3,3 | ||
360 | |||
361 | normalbeg2dc2: | ||
362 | normbeg rcontloop2,2 | ||
363 | |||
364 | normalbeg2dc1: | ||
365 | normbeg rcontloop1,1 | ||
366 | |||
367 | normalbeg2dc0: | ||
368 | normbeg rcontloop0,0 | ||
369 | |||
370 | |||
371 | ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end | ||
372 | |||
373 | normalbeg2: | ||
374 | mov edi,window | ||
375 | |||
376 | cmp bp,word ptr[edi+eax] | ||
377 | jne contloop3 ; if *(ushf*)match != scan_start, continue | ||
378 | |||
379 | iseq: | ||
380 | ; if we are here, we know that *(match+best_len-1) == scan_end | ||
381 | ; and (match == scan_start) | ||
382 | |||
383 | mov edi,edx | ||
384 | mov esi,scan ; esi = scan | ||
385 | add edi,eax ; edi = window + cur_match = match | ||
386 | |||
387 | mov edx,[esi+3] ; compare manually dword at match+3 | ||
388 | xor edx,[edi+3] ; and scan +3 | ||
389 | |||
390 | jz begincompare ; if equal, go to long compare | ||
391 | |||
392 | ; we will determine the unmatch byte and calculate len (in esi) | ||
393 | or dl,dl | ||
394 | je eq1rr | ||
395 | mov esi,3 | ||
396 | jmp trfinval | ||
397 | eq1rr: | ||
398 | or dx,dx | ||
399 | je eq1 | ||
400 | |||
401 | mov esi,4 | ||
402 | jmp trfinval | ||
403 | eq1: | ||
404 | and edx,0ffffffh | ||
405 | jz eq11 | ||
406 | mov esi,5 | ||
407 | jmp trfinval | ||
408 | eq11: | ||
409 | mov esi,6 | ||
410 | jmp trfinval | ||
411 | |||
412 | begincompare: | ||
413 | ; here we now scan and match begin same | ||
414 | add edi,6 | ||
415 | add esi,6 | ||
416 | mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes | ||
417 | repe cmpsd ; loop until mismatch | ||
418 | |||
419 | je trfin ; go to trfin if not unmatch | ||
420 | ; we determine the unmatch byte | ||
421 | sub esi,4 | ||
422 | mov edx,[edi-4] | ||
423 | xor edx,[esi] | ||
424 | |||
425 | or dl,dl | ||
426 | jnz trfin | ||
427 | inc esi | ||
428 | |||
429 | or dx,dx | ||
430 | jnz trfin | ||
431 | inc esi | ||
432 | |||
433 | and edx,0ffffffh | ||
434 | jnz trfin | ||
435 | inc esi | ||
436 | |||
437 | trfin: | ||
438 | sub esi,scan ; esi = len | ||
439 | trfinval: | ||
440 | ; here we have finised compare, and esi contain len of equal string | ||
441 | cmp esi,best_len ; if len > best_len, go newbestlen | ||
442 | ja short newbestlen | ||
443 | ; now we restore edx, ecx and esi, for the big loop | ||
444 | mov esi,prev | ||
445 | mov ecx,limit | ||
446 | mov edx,window | ||
447 | jmp contloop3 | ||
448 | |||
449 | newbestlen: | ||
450 | mov best_len,esi ; len become best_len | ||
451 | |||
452 | mov match_start,eax ; save new position as match_start | ||
453 | cmp esi,nice_match ; if best_len >= nice_match, exit | ||
454 | jae exitloop | ||
455 | mov ecx,scan | ||
456 | mov edx,window ; restore edx=window | ||
457 | add ecx,esi | ||
458 | add esi,edx | ||
459 | |||
460 | dec esi | ||
461 | mov windowlen,esi ; windowlen = window + best_len-1 | ||
462 | mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end | ||
463 | |||
464 | ; now we restore ecx and esi, for the big loop : | ||
465 | mov esi,prev | ||
466 | mov ecx,limit | ||
467 | jmp contloop3 | ||
468 | |||
469 | exitloop: | ||
470 | ; exit : s->match_start=match_start | ||
471 | mov ebx,match_start | ||
472 | mov ebp,str_s | ||
473 | mov ecx,best_len | ||
474 | mov dword ptr [ebp+dep_match_start],ebx | ||
475 | mov eax,dword ptr [ebp+dep_lookahead] | ||
476 | cmp ecx,eax | ||
477 | ja minexlo | ||
478 | mov eax,ecx | ||
479 | minexlo: | ||
480 | ; return min(best_len,s->lookahead) | ||
481 | |||
482 | ; restore stack and register ebx,esi,edi,ebp | ||
483 | add esp,NbStackAdd | ||
484 | |||
485 | pop ebx | ||
486 | pop esi | ||
487 | pop edi | ||
488 | pop ebp | ||
489 | ret | ||
490 | InfoAuthor: | ||
491 | ; please don't remove this string ! | ||
492 | ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! | ||
493 | db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah | ||
494 | |||
495 | |||
496 | |||
497 | IFDEF NOUNDERLINE | ||
498 | longest_match_7fff endp | ||
499 | ELSE | ||
500 | _longest_match_7fff endp | ||
501 | ENDIF | ||
502 | |||
503 | |||
504 | IFDEF NOUNDERLINE | ||
505 | cpudetect32 proc near | ||
506 | ELSE | ||
507 | _cpudetect32 proc near | ||
508 | ENDIF | ||
509 | |||
510 | |||
511 | pushfd ; push original EFLAGS | ||
512 | pop eax ; get original EFLAGS | ||
513 | mov ecx, eax ; save original EFLAGS | ||
514 | xor eax, 40000h ; flip AC bit in EFLAGS | ||
515 | push eax ; save new EFLAGS value on stack | ||
516 | popfd ; replace current EFLAGS value | ||
517 | pushfd ; get new EFLAGS | ||
518 | pop eax ; store new EFLAGS in EAX | ||
519 | xor eax, ecx ; can’t toggle AC bit, processor=80386 | ||
520 | jz end_cpu_is_386 ; jump if 80386 processor | ||
521 | push ecx | ||
522 | popfd ; restore AC bit in EFLAGS first | ||
523 | |||
524 | pushfd | ||
525 | pushfd | ||
526 | pop ecx | ||
527 | |||
528 | mov eax, ecx ; get original EFLAGS | ||
529 | xor eax, 200000h ; flip ID bit in EFLAGS | ||
530 | push eax ; save new EFLAGS value on stack | ||
531 | popfd ; replace current EFLAGS value | ||
532 | pushfd ; get new EFLAGS | ||
533 | pop eax ; store new EFLAGS in EAX | ||
534 | popfd ; restore original EFLAGS | ||
535 | xor eax, ecx ; can’t toggle ID bit, | ||
536 | je is_old_486 ; processor=old | ||
537 | |||
538 | mov eax,1 | ||
539 | db 0fh,0a2h ;CPUID | ||
540 | |||
541 | exitcpudetect: | ||
542 | ret | ||
543 | |||
544 | end_cpu_is_386: | ||
545 | mov eax,0300h | ||
546 | jmp exitcpudetect | ||
547 | |||
548 | is_old_486: | ||
549 | mov eax,0400h | ||
550 | jmp exitcpudetect | ||
551 | |||
552 | IFDEF NOUNDERLINE | ||
553 | cpudetect32 endp | ||
554 | ELSE | ||
555 | _cpudetect32 endp | ||
556 | ENDIF | ||
557 | |||
558 | _TEXT ends | ||
559 | end | ||