diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:19:21 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:19:21 -0700 |
commit | b8c9ecb0765fc39423c07613d909c5193378bdfd (patch) | |
tree | c2a2964d00dcfee6c7426ffcdf8e8060727bc2bf /contrib/asm386/gvmat32.asm | |
parent | 6759211ad8a5006689216a86c3267bb503bfccc1 (diff) | |
download | zlib-1.0.9.tar.gz zlib-1.0.9.tar.bz2 zlib-1.0.9.zip |
zlib 1.0.9v1.0.9
Diffstat (limited to '')
-rw-r--r-- | contrib/asm386/gvmat32.asm | 696 |
1 files changed, 395 insertions, 301 deletions
diff --git a/contrib/asm386/gvmat32.asm b/contrib/asm386/gvmat32.asm index 2918a5d..28d527f 100644 --- a/contrib/asm386/gvmat32.asm +++ b/contrib/asm386/gvmat32.asm | |||
@@ -9,40 +9,36 @@ | |||
9 | ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) | 9 | ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) |
10 | ; I compile with : "ml /coff /Zi /c gvmat32.asm" | 10 | ; I compile with : "ml /coff /Zi /c gvmat32.asm" |
11 | ; | 11 | ; |
12 | ; uInt longest_match_gvasm(IPos cur_match,int* match_start_ptr,uInt scan_end, | ||
13 | ; uInt scan_start,ush* prev,uch* window,int best_len, | ||
14 | ; IPos limit,uInt chain_length,uch* scanrp, | ||
15 | ; uInt nice_match); | ||
16 | 12 | ||
17 | ;uInt longest_match(s, cur_match) | 13 | ;uInt longest_match_7fff(s, cur_match) |
18 | ; deflate_state *s; | 14 | ; deflate_state *s; |
19 | ; IPos cur_match; /* current match */ | 15 | ; IPos cur_match; /* current match */ |
20 | 16 | ||
21 | NbStack equ 76 | 17 | NbStack equ 76 |
22 | cur_match equ dword ptr[esp+NbStack-0] | 18 | cur_match equ dword ptr[esp+NbStack-0] |
23 | str_s equ dword ptr[esp+NbStack-4] | 19 | str_s equ dword ptr[esp+NbStack-4] |
24 | ; 5 dword on top (ret,ebp,esi,edi,ebx) | 20 | ; 5 dword on top (ret,ebp,esi,edi,ebx) |
25 | adrret equ dword ptr[esp+NbStack-8] | 21 | adrret equ dword ptr[esp+NbStack-8] |
26 | pushebp equ dword ptr[esp+NbStack-12] | 22 | pushebp equ dword ptr[esp+NbStack-12] |
27 | pushedi equ dword ptr[esp+NbStack-16] | 23 | pushedi equ dword ptr[esp+NbStack-16] |
28 | pushesi equ dword ptr[esp+NbStack-20] | 24 | pushesi equ dword ptr[esp+NbStack-20] |
29 | pushebx equ dword ptr[esp+NbStack-24] | 25 | pushebx equ dword ptr[esp+NbStack-24] |
30 | 26 | ||
31 | chain_length equ dword ptr [esp+NbStack-28] | 27 | chain_length equ dword ptr [esp+NbStack-28] |
32 | limit equ dword ptr [esp+NbStack-32] | 28 | limit equ dword ptr [esp+NbStack-32] |
33 | best_len equ dword ptr [esp+NbStack-36] | 29 | best_len equ dword ptr [esp+NbStack-36] |
34 | window equ dword ptr [esp+NbStack-40] | 30 | window equ dword ptr [esp+NbStack-40] |
35 | prev equ dword ptr [esp+NbStack-44] | 31 | prev equ dword ptr [esp+NbStack-44] |
36 | scan_start equ word ptr [esp+NbStack-48] | 32 | scan_start equ word ptr [esp+NbStack-48] |
37 | scan_end equ word ptr [esp+NbStack-52] | 33 | wmask equ dword ptr [esp+NbStack-52] |
38 | match_start_ptr equ dword ptr [esp+NbStack-56] | 34 | match_start_ptr equ dword ptr [esp+NbStack-56] |
39 | nice_match equ dword ptr [esp+NbStack-60] | 35 | nice_match equ dword ptr [esp+NbStack-60] |
40 | scanrp equ dword ptr [esp+NbStack-64] | 36 | scan equ dword ptr [esp+NbStack-64] |
41 | 37 | ||
42 | windowlen equ dword ptr [esp+NbStack-68] | 38 | windowlen equ dword ptr [esp+NbStack-68] |
43 | match_start equ dword ptr [esp+NbStack-72] | 39 | match_start equ dword ptr [esp+NbStack-72] |
44 | strend equ dword ptr [esp+NbStack-76] | 40 | strend equ dword ptr [esp+NbStack-76] |
45 | NbStackAdd equ (76-24) | 41 | NbStackAdd equ (NbStack-24) |
46 | 42 | ||
47 | .386p | 43 | .386p |
48 | 44 | ||
@@ -50,11 +46,11 @@ | |||
50 | .MODEL FLAT | 46 | .MODEL FLAT |
51 | 47 | ||
52 | 48 | ||
53 | @lmtype TYPEDEF PROTO C :PTR , :SDWORD | ||
54 | longest_match_c PROTO @lmtype | ||
55 | 49 | ||
56 | ; all the +4 offsets are due to the addition of pending_buf_size | 50 | ; all the +4 offsets are due to the addition of pending_buf_size (in zlib |
57 | ; in the deflate_state structure since the asm code was first written | 51 | ; in the deflate_state structure since the asm code was first written |
52 | ; (if you compile with zlib 1.0.4 or older, remove the +4). | ||
53 | ; Note : these value are good with a 8 bytes boundary pack structure | ||
58 | dep_chain_length equ 70h+4 | 54 | dep_chain_length equ 70h+4 |
59 | dep_window equ 2ch+4 | 55 | dep_window equ 2ch+4 |
60 | dep_strstart equ 60h+4 | 56 | dep_strstart equ 60h+4 |
@@ -68,398 +64,496 @@ longest_match_c PROTO @lmtype | |||
68 | dep_lookahead equ 68h+4 | 64 | dep_lookahead equ 68h+4 |
69 | 65 | ||
70 | 66 | ||
71 | _TEXT segment | 67 | _TEXT segment |
72 | public _longest_match_asm7fff | ||
73 | 68 | ||
74 | MAX_MATCH equ 258 | 69 | IFDEF NOUNDERLINE |
75 | MIN_MATCH equ 3 | 70 | public longest_match_7fff |
76 | MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) | 71 | ; public match_init |
77 | 72 | ELSE | |
78 | ; initialize or check the variables used in match.asm. | 73 | public _longest_match_7fff |
74 | ; public _match_init | ||
75 | ENDIF | ||
79 | 76 | ||
77 | MAX_MATCH equ 258 | ||
78 | MIN_MATCH equ 3 | ||
79 | MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) | ||
80 | 80 | ||
81 | ; ----------------------------------------------------------------------- | ||
82 | ; Set match_start to the longest match starting at the given string and | ||
83 | ; return its length. Matches shorter or equal to prev_length are discarded, | ||
84 | ; in which case the result is equal to prev_length and match_start is | ||
85 | ; garbage. | ||
86 | ; IN assertions: cur_match is the head of the hash chain for the current | ||
87 | ; string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 | ||
88 | 81 | ||
89 | ; int longest_match(cur_match) | ||
90 | 82 | ||
91 | _longest_match_asm7fff proc near | 83 | IFDEF NOUNDERLINE |
84 | ;match_init proc near | ||
85 | ; ret | ||
86 | ;match_init endp | ||
87 | ELSE | ||
88 | ;_match_init proc near | ||
89 | ; ret | ||
90 | ;_match_init endp | ||
91 | ENDIF | ||
92 | 92 | ||
93 | 93 | ||
94 | IFDEF NOUNDERLINE | ||
95 | longest_match_7fff proc near | ||
96 | ELSE | ||
97 | _longest_match_7fff proc near | ||
98 | ENDIF | ||
94 | 99 | ||
95 | ; return address | 100 | mov edx,[esp+4] |
96 | 101 | ||
97 | mov eax,[esp+4] | ||
98 | mov bx,[eax+dep_w_mask] | ||
99 | cmp bx,7fffh | ||
100 | jnz longest_match_c | ||
101 | 102 | ||
102 | push ebp | ||
103 | push edi | ||
104 | push esi | ||
105 | push ebx | ||
106 | 103 | ||
107 | sub esp,NbStackAdd | 104 | push ebp |
105 | push edi | ||
106 | push esi | ||
107 | push ebx | ||
108 | 108 | ||
109 | ;//mov ebp,str_s | 109 | sub esp,NbStackAdd |
110 | mov ebp,eax | ||
111 | 110 | ||
112 | mov eax,[ebp+dep_max_chain_length] | 111 | ; initialize or check the variables used in match.asm. |
113 | mov ebx,[ebp+dep_prev_length] | 112 | mov ebp,edx |
114 | cmp [ebp+dep_good_match],ebx ; if prev_length>=good_match chain_length >>= 2 | 113 | |
115 | ja noshr | 114 | ; chain_length = s->max_chain_length |
116 | shr eax,2 | 115 | ; if (prev_length>=good_match) chain_length >>= 2 |
116 | mov edx,[ebp+dep_chain_length] | ||
117 | mov ebx,[ebp+dep_prev_length] | ||
118 | cmp [ebp+dep_good_match],ebx | ||
119 | ja noshr | ||
120 | shr edx,2 | ||
117 | noshr: | 121 | noshr: |
118 | mov edi,[ebp+dep_nice_match] | 122 | ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop |
119 | mov chain_length,eax | 123 | inc edx |
120 | mov edx,[ebp+dep_lookahead] | 124 | mov edi,[ebp+dep_nice_match] |
121 | cmp edx,edi | 125 | mov chain_length,edx |
122 | ;if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | 126 | mov eax,[ebp+dep_lookahead] |
123 | jae nolookaheadnicematch | 127 | cmp eax,edi |
124 | mov edi,edx | 128 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
129 | jae nolookaheadnicematch | ||
130 | mov edi,eax | ||
125 | nolookaheadnicematch: | 131 | nolookaheadnicematch: |
126 | mov best_len,ebx | 132 | ; best_len = s->prev_length |
127 | 133 | mov best_len,ebx | |
128 | 134 | ||
129 | mov esi,[ebp+dep_window] | 135 | ; window = s->window |
130 | mov ecx,[ebp+dep_strstart] | 136 | mov esi,[ebp+dep_window] |
131 | mov window,esi | 137 | mov ecx,[ebp+dep_strstart] |
132 | 138 | mov window,esi | |
133 | mov nice_match,edi | 139 | |
134 | add esi,ecx | 140 | mov nice_match,edi |
135 | mov scanrp,esi | 141 | ; scan = window + strstart |
136 | mov ax,word ptr [esi] | 142 | add esi,ecx |
137 | mov bx,word ptr [esi+ebx-1] | 143 | mov scan,esi |
138 | add esi,MAX_MATCH-1 | 144 | ; dx = *window |
139 | mov scan_start,ax | 145 | mov dx,word ptr [esi] |
140 | mov strend,esi | 146 | ; bx = *(window+best_len-1) |
141 | mov scan_end,bx | 147 | mov bx,word ptr [esi+ebx-1] |
148 | add esi,MAX_MATCH-1 | ||
149 | ; scan_start = *scan | ||
150 | mov scan_start,dx | ||
151 | ; strend = scan + MAX_MATCH-1 | ||
152 | mov strend,esi | ||
153 | ; bx = scan_end = *(window+best_len-1) | ||
142 | 154 | ||
143 | ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? | 155 | ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
144 | ; s->strstart - (IPos)MAX_DIST(s) : NIL; | 156 | ; s->strstart - (IPos)MAX_DIST(s) : NIL; |
145 | 157 | ||
146 | mov esi,[ebp+dep_w_size] | 158 | mov esi,[ebp+dep_w_size] |
147 | sub esi,MIN_LOOKAHEAD | 159 | sub esi,MIN_LOOKAHEAD |
148 | ; here esi = MAX_DIST(s) | 160 | ; here esi = MAX_DIST(s) |
149 | sub ecx,esi | 161 | sub ecx,esi |
150 | ja nodist | 162 | ja nodist |
151 | xor ecx,ecx | 163 | xor ecx,ecx |
152 | nodist: | 164 | nodist: |
153 | mov limit,ecx | 165 | mov limit,ecx |
154 | |||
155 | |||
156 | |||
157 | |||
158 | mov eax,[ebp+dep_prev] | ||
159 | mov prev,eax | ||
160 | 166 | ||
161 | mov ebx,dword ptr [ebp+dep_match_start] | 167 | ; prev = s->prev |
162 | mov bp,scan_start | 168 | mov edx,[ebp+dep_prev] |
163 | mov edx,cur_match | 169 | mov prev,edx |
164 | mov match_start,ebx | ||
165 | 170 | ||
166 | mov bx,scan_end | 171 | ; |
167 | mov eax,window | 172 | mov edx,dword ptr [ebp+dep_match_start] |
168 | mov edi,eax | 173 | mov bp,scan_start |
169 | add edi,best_len | 174 | mov eax,cur_match |
170 | mov esi,prev | 175 | mov match_start,edx |
171 | dec edi | 176 | |
172 | mov windowlen,edi | 177 | mov edx,window |
173 | 178 | mov edi,edx | |
174 | jmp beginloop2 | 179 | add edi,best_len |
175 | align 4 | 180 | mov esi,prev |
181 | dec edi | ||
182 | ; windowlen = window + best_len -1 | ||
183 | mov windowlen,edi | ||
184 | |||
185 | jmp beginloop2 | ||
186 | align 4 | ||
176 | 187 | ||
177 | ; here, in the loop | 188 | ; here, in the loop |
178 | ;;;; eax = chain_length | 189 | ; eax = ax = cur_match |
179 | ; edx = dx = cur_match | ||
180 | ; ecx = limit | 190 | ; ecx = limit |
181 | ; bx = scan_end | 191 | ; bx = scan_end |
182 | ; bp = scan_start | 192 | ; bp = scan_start |
183 | ; edi = windowlen (window + best_len) | 193 | ; edi = windowlen (window + best_len -1) |
184 | ; esi = prev | 194 | ; esi = prev |
185 | 195 | ||
186 | 196 | ||
187 | ;// here; eax <=16 | 197 | ;// here; chain_length <=16 |
188 | normalbeg0add16: | 198 | normalbeg0add16: |
189 | add chain_length,16 | 199 | add chain_length,16 |
190 | jz exitloop | 200 | jz exitloop |
191 | normalbeg0: | 201 | normalbeg0: |
192 | cmp word ptr[edi+edx-0],bx | 202 | cmp word ptr[edi+eax],bx |
193 | je normalbeg2 | 203 | je normalbeg2noroll |
194 | and edx,7fffh | 204 | rcontlabnoroll: |
195 | mov dx,word ptr[esi+edx*2] | 205 | ; cur_match = prev[cur_match & wmask] |
196 | cmp ecx,edx | 206 | and eax,7fffh |
197 | jnb exitloop | 207 | mov ax,word ptr[esi+eax*2] |
198 | dec chain_length | 208 | ; if cur_match > limit, go to exitloop |
199 | jnz normalbeg0 | 209 | cmp ecx,eax |
200 | ;jnbexitloopshort1: | 210 | jnb exitloop |
201 | jmp exitloop | 211 | ; if --chain_length != 0, go to exitloop |
212 | dec chain_length | ||
213 | jnz normalbeg0 | ||
214 | jmp exitloop | ||
215 | |||
216 | normalbeg2noroll: | ||
217 | ; if (scan_start==*(cur_match+window)) goto normalbeg2 | ||
218 | cmp bp,word ptr[edx+eax] | ||
219 | jne rcontlabnoroll | ||
220 | jmp normalbeg2 | ||
202 | 221 | ||
203 | contloop3: | 222 | contloop3: |
204 | mov edi,windowlen | 223 | mov edi,windowlen |
205 | 224 | ||
206 | ; cur_match = prev[cur_match & wmask] | 225 | ; cur_match = prev[cur_match & wmask] |
207 | and edx,7fffh | 226 | and eax,7fffh |
208 | mov dx,word ptr[esi+edx*2] | 227 | mov ax,word ptr[esi+eax*2] |
209 | ; if cur_match > limit, go to exitloop | 228 | ; if cur_match > limit, go to exitloop |
210 | cmp ecx,edx | 229 | cmp ecx,eax |
211 | jnbexitloopshort1: | 230 | jnbexitloopshort1: |
212 | jnb exitloop | 231 | jnb exitloop |
213 | ; if --chain_length != 0, go to exitloop | 232 | ; if --chain_length != 0, go to exitloop |
214 | 233 | ||
234 | |||
235 | ; begin the main loop | ||
215 | beginloop2: | 236 | beginloop2: |
216 | sub chain_length,16+1 | 237 | sub chain_length,16+1 |
217 | jna normalbeg0add16 | 238 | ; if chain_length <=16, don't use the unrolled loop |
239 | jna normalbeg0add16 | ||
218 | 240 | ||
219 | do16: | 241 | do16: |
220 | cmp word ptr[edi+edx],bx | 242 | cmp word ptr[edi+eax],bx |
221 | je normalbeg2dc0 | 243 | je normalbeg2dc0 |
222 | 244 | ||
223 | maccn MACRO lab | 245 | maccn MACRO lab |
224 | and edx,7fffh | 246 | and eax,7fffh |
225 | mov dx,word ptr[esi+edx*2] | 247 | mov ax,word ptr[esi+eax*2] |
226 | cmp ecx,edx | 248 | cmp ecx,eax |
227 | jnb exitloop | 249 | jnb exitloop |
228 | cmp word ptr[edi+edx-0],bx | 250 | cmp word ptr[edi+eax],bx |
229 | je lab | 251 | je lab |
230 | ENDM | 252 | ENDM |
231 | 253 | ||
232 | rcontloop0: | 254 | rcontloop0: |
233 | maccn normalbeg2dc1 | 255 | maccn normalbeg2dc1 |
234 | 256 | ||
235 | rcontloop1: | 257 | rcontloop1: |
236 | maccn normalbeg2dc2 | 258 | maccn normalbeg2dc2 |
237 | 259 | ||
238 | rcontloop2: | 260 | rcontloop2: |
239 | maccn normalbeg2dc3 | 261 | maccn normalbeg2dc3 |
240 | 262 | ||
241 | rcontloop3: | 263 | rcontloop3: |
242 | maccn normalbeg2dc4 | 264 | maccn normalbeg2dc4 |
243 | 265 | ||
244 | rcontloop4: | 266 | rcontloop4: |
245 | maccn normalbeg2dc5 | 267 | maccn normalbeg2dc5 |
246 | 268 | ||
247 | rcontloop5: | 269 | rcontloop5: |
248 | maccn normalbeg2dc6 | 270 | maccn normalbeg2dc6 |
249 | 271 | ||
250 | rcontloop6: | 272 | rcontloop6: |
251 | maccn normalbeg2dc7 | 273 | maccn normalbeg2dc7 |
252 | 274 | ||
253 | rcontloop7: | 275 | rcontloop7: |
254 | maccn normalbeg2dc8 | 276 | maccn normalbeg2dc8 |
255 | 277 | ||
256 | rcontloop8: | 278 | rcontloop8: |
257 | maccn normalbeg2dc9 | 279 | maccn normalbeg2dc9 |
258 | 280 | ||
259 | rcontloop9: | 281 | rcontloop9: |
260 | maccn normalbeg2dc10 | 282 | maccn normalbeg2dc10 |
261 | 283 | ||
262 | rcontloop10: | 284 | rcontloop10: |
263 | maccn normalbeg2dc11 | 285 | maccn short normalbeg2dc11 |
264 | 286 | ||
265 | rcontloop11: | 287 | rcontloop11: |
266 | maccn short normalbeg2dc12 | 288 | maccn short normalbeg2dc12 |
267 | 289 | ||
268 | rcontloop12: | 290 | rcontloop12: |
269 | maccn short normalbeg2dc13 | 291 | maccn short normalbeg2dc13 |
270 | 292 | ||
271 | rcontloop13: | 293 | rcontloop13: |
272 | maccn short normalbeg2dc14 | 294 | maccn short normalbeg2dc14 |
273 | 295 | ||
274 | rcontloop14: | 296 | rcontloop14: |
275 | maccn short normalbeg2dc15 | 297 | maccn short normalbeg2dc15 |
276 | 298 | ||
277 | rcontloop15: | 299 | rcontloop15: |
278 | and edx,7fffh | 300 | and eax,7fffh |
279 | mov dx,word ptr[esi+edx*2] | 301 | mov ax,word ptr[esi+eax*2] |
280 | cmp ecx,edx | 302 | cmp ecx,eax |
281 | jnb short exitloopshort | 303 | jnb exitloop |
282 | 304 | ||
283 | sub chain_length,16 | 305 | sub chain_length,16 |
284 | ja do16 | 306 | ja do16 |
285 | jmp normalbeg0add16 | 307 | jmp normalbeg0add16 |
286 | 308 | ||
287 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 309 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
288 | 310 | ||
289 | exitloopshort: | 311 | normbeg MACRO rcontlab,valsub |
290 | jmp exitloop | 312 | ; if we are here, we know that *(match+best_len-1) == scan_end |
313 | cmp bp,word ptr[edx+eax] | ||
314 | ; if (match != scan_start) goto rcontlab | ||
315 | jne rcontlab | ||
316 | ; calculate the good chain_length, and we'll compare scan and match string | ||
317 | add chain_length,16-valsub | ||
318 | jmp iseq | ||
319 | ENDM | ||
291 | 320 | ||
292 | normbeg MACRO rcontlab,valsub | 321 | |
293 | cmp bp,word ptr[eax+edx] | 322 | normalbeg2dc11: |
294 | jne rcontlab | 323 | normbeg rcontloop11,11 |
295 | add chain_length,16-valsub | ||
296 | jmp iseq | ||
297 | ENDM | ||
298 | 324 | ||
299 | normalbeg2dc12: | 325 | normalbeg2dc12: |
300 | normbeg rcontloop12,12 | 326 | normbeg short rcontloop12,12 |
301 | 327 | ||
302 | normalbeg2dc13: | 328 | normalbeg2dc13: |
303 | normbeg rcontloop13,13 | 329 | normbeg short rcontloop13,13 |
304 | 330 | ||
305 | normalbeg2dc14: | 331 | normalbeg2dc14: |
306 | normbeg rcontloop14,14 | 332 | normbeg short rcontloop14,14 |
307 | 333 | ||
308 | normalbeg2dc15: | 334 | normalbeg2dc15: |
309 | normbeg rcontloop15,15 | 335 | normbeg short rcontloop15,15 |
310 | |||
311 | normalbeg2dc11: | ||
312 | normbeg rcontloop11,11 | ||
313 | 336 | ||
314 | normalbeg2dc10: | 337 | normalbeg2dc10: |
315 | normbeg rcontloop10,10 | 338 | normbeg rcontloop10,10 |
316 | |||
317 | 339 | ||
318 | normalbeg2dc9: | 340 | normalbeg2dc9: |
319 | normbeg rcontloop9,9 | 341 | normbeg rcontloop9,9 |
320 | 342 | ||
321 | normalbeg2dc8: | 343 | normalbeg2dc8: |
322 | normbeg rcontloop8,8 | 344 | normbeg rcontloop8,8 |
323 | 345 | ||
324 | normalbeg2dc7: | 346 | normalbeg2dc7: |
325 | normbeg rcontloop7,7 | 347 | normbeg rcontloop7,7 |
326 | |||
327 | normalbeg2dc5: | ||
328 | normbeg rcontloop5,5 | ||
329 | |||
330 | |||
331 | |||
332 | |||
333 | 348 | ||
334 | normalbeg2dc6: | 349 | normalbeg2dc6: |
335 | normbeg rcontloop6,6 | 350 | normbeg rcontloop6,6 |
351 | |||
352 | normalbeg2dc5: | ||
353 | normbeg rcontloop5,5 | ||
336 | 354 | ||
337 | normalbeg2dc4: | 355 | normalbeg2dc4: |
338 | normbeg rcontloop4,4 | 356 | normbeg rcontloop4,4 |
339 | 357 | ||
340 | normalbeg2dc3: | 358 | normalbeg2dc3: |
341 | normbeg rcontloop3,3 | 359 | normbeg rcontloop3,3 |
342 | 360 | ||
343 | normalbeg2dc2: | 361 | normalbeg2dc2: |
344 | normbeg rcontloop2,2 | 362 | normbeg rcontloop2,2 |
345 | 363 | ||
346 | normalbeg2dc1: | 364 | normalbeg2dc1: |
347 | normbeg rcontloop1,1 | 365 | normbeg rcontloop1,1 |
348 | 366 | ||
349 | normalbeg2dc0: | 367 | normalbeg2dc0: |
350 | normbeg rcontloop0,0 | 368 | normbeg rcontloop0,0 |
351 | 369 | ||
352 | 370 | ||
353 | ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end | 371 | ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end |
354 | 372 | ||
355 | normalbeg2: | 373 | normalbeg2: |
374 | mov edi,window | ||
356 | 375 | ||
357 | ; 10 nop here take 10% time | 376 | cmp bp,word ptr[edi+eax] |
358 | mov edi,window | 377 | jne contloop3 ; if *(ushf*)match != scan_start, continue |
359 | ;mov chain_length,eax ; now, we need eax... | ||
360 | |||
361 | cmp bp,word ptr[edi+edx] | ||
362 | jne contloop3 ; if *(ushf*)match != scan_start, continue | ||
363 | 378 | ||
364 | iseq: | 379 | iseq: |
380 | ; if we are here, we know that *(match+best_len-1) == scan_end | ||
381 | ; and (match == scan_start) | ||
365 | 382 | ||
366 | mov edi,eax | 383 | mov edi,edx |
367 | mov esi,scanrp ; esi = scan | 384 | mov esi,scan ; esi = scan |
368 | add edi,edx ; edi = window + cur_match = match | 385 | add edi,eax ; edi = window + cur_match = match |
369 | 386 | ||
370 | 387 | mov edx,[esi+3] ; compare manually dword at match+3 | |
371 | mov eax,[esi+3] ; compare manually dword at match+3 | 388 | xor edx,[edi+3] ; and scan +3 |
372 | xor eax,[edi+3] ; and scan +3 | ||
373 | 389 | ||
374 | jz begincompare ; if equal, go to long compare | 390 | jz begincompare ; if equal, go to long compare |
375 | 391 | ||
376 | ; we will determine the unmatch byte and calculate len (in esi) | 392 | ; we will determine the unmatch byte and calculate len (in esi) |
377 | or al,al | 393 | or dl,dl |
378 | je eq1rr | 394 | je eq1rr |
379 | mov esi,3 | 395 | mov esi,3 |
380 | jmp trfinval | 396 | jmp trfinval |
381 | eq1rr: | 397 | eq1rr: |
382 | or ax,ax | 398 | or dx,dx |
383 | je eq1 | 399 | je eq1 |
384 | 400 | ||
385 | mov esi,4 | 401 | mov esi,4 |
386 | jmp trfinval | 402 | jmp trfinval |
387 | eq1: | 403 | eq1: |
388 | shl eax,8 | 404 | and edx,0ffffffh |
389 | jz eq11 | 405 | jz eq11 |
390 | mov esi,5 | 406 | mov esi,5 |
391 | jmp trfinval | 407 | jmp trfinval |
392 | eq11: | 408 | eq11: |
393 | mov esi,6 | 409 | mov esi,6 |
394 | jmp trfinval | 410 | jmp trfinval |
395 | 411 | ||
396 | begincompare: | 412 | begincompare: |
397 | ; here we now scan and match begin same | 413 | ; here we now scan and match begin same |
398 | add edi,6 | 414 | add edi,6 |
399 | add esi,6 | 415 | add esi,6 |
400 | mov ecx,(MAX_MATCH-(2+4))/4 ;//; scan for at most MAX_MATCH bytes | 416 | mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes |
401 | repe cmpsd ;//; loop until mismatch | 417 | repe cmpsd ; loop until mismatch |
402 | 418 | ||
403 | je trfin ; go to trfin if not unmatch | 419 | je trfin ; go to trfin if not unmatch |
404 | ; we determine the unmatch byte | 420 | ; we determine the unmatch byte |
405 | sub esi,4 | 421 | sub esi,4 |
406 | mov eax,[edi-4] | 422 | mov edx,[edi-4] |
407 | xor eax,[esi] | 423 | xor edx,[esi] |
408 | or al,al | 424 | |
409 | 425 | or dl,dl | |
410 | jnz trfin | 426 | jnz trfin |
411 | inc esi | 427 | inc esi |
412 | 428 | ||
413 | or ax,ax | 429 | or dx,dx |
414 | jnz trfin | 430 | jnz trfin |
415 | inc esi | 431 | inc esi |
416 | 432 | ||
417 | shl eax,8 | 433 | and edx,0ffffffh |
418 | jnz trfin | 434 | jnz trfin |
419 | inc esi | 435 | inc esi |
420 | 436 | ||
421 | trfin: | 437 | trfin: |
422 | sub esi,scanrp ; esi = len | 438 | sub esi,scan ; esi = len |
423 | trfinval: | 439 | trfinval: |
424 | cmp esi,best_len ; if len <= best_len, go contloop2 | 440 | ; here we have finised compare, and esi contain len of equal string |
425 | jbe contloop2 | 441 | cmp esi,best_len ; if len > best_len, go newbestlen |
426 | 442 | ja short newbestlen | |
427 | mov best_len,esi ; len become best_len | 443 | ; now we restore edx, ecx and esi, for the big loop |
428 | 444 | mov esi,prev | |
429 | mov match_start,edx | 445 | mov ecx,limit |
430 | cmp esi,nice_match ;//; if esi >= nice_match, exit | 446 | mov edx,window |
431 | mov ecx,scanrp | 447 | jmp contloop3 |
432 | jae exitloop | 448 | |
433 | add esi,window | 449 | newbestlen: |
434 | add ecx,best_len | 450 | mov best_len,esi ; len become best_len |
435 | dec esi | 451 | |
436 | mov windowlen,esi | 452 | mov match_start,eax ; save new position as match_start |
437 | mov bx,[ecx-1] | 453 | cmp esi,nice_match ; if best_len >= nice_match, exit |
438 | 454 | jae exitloop | |
439 | 455 | mov ecx,scan | |
440 | ; now we restore eax, ecx and esi, for the big loop : | 456 | mov edx,window ; restore edx=window |
441 | contloop2: | 457 | add ecx,esi |
442 | mov esi,prev | 458 | add esi,edx |
443 | mov ecx,limit | 459 | |
444 | ;mov eax,chain_length | 460 | dec esi |
445 | mov eax,window | 461 | mov windowlen,esi ; windowlen = window + best_len-1 |
446 | jmp contloop3 | 462 | mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end |
447 | 463 | ||
448 | exitloop: | 464 | ; now we restore ecx and esi, for the big loop : |
449 | mov ebx,match_start | 465 | mov esi,prev |
450 | mov ebp,str_s | 466 | mov ecx,limit |
451 | mov dword ptr [ebp+dep_match_start],ebx | 467 | jmp contloop3 |
452 | mov eax,best_len | 468 | |
453 | add esp,NbStackAdd | 469 | exitloop: |
454 | 470 | ; exit : s->match_start=match_start | |
455 | 471 | mov ebx,match_start | |
456 | pop ebx | 472 | mov ebp,str_s |
457 | pop esi | 473 | mov ecx,best_len |
458 | pop edi | 474 | mov dword ptr [ebp+dep_match_start],ebx |
459 | pop ebp | 475 | mov eax,dword ptr [ebp+dep_lookahead] |
460 | ret | 476 | cmp ecx,eax |
461 | 477 | ja minexlo | |
462 | _longest_match_asm7fff endp | 478 | mov eax,ecx |
479 | minexlo: | ||
480 | ; return min(best_len,s->lookahead) | ||
481 | |||
482 | ; restore stack and register ebx,esi,edi,ebp | ||
483 | add esp,NbStackAdd | ||
484 | |||
485 | pop ebx | ||
486 | pop esi | ||
487 | pop edi | ||
488 | pop ebp | ||
489 | ret | ||
490 | InfoAuthor: | ||
491 | ; please don't remove this string ! | ||
492 | ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! | ||
493 | db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah | ||
494 | |||
495 | |||
496 | |||
497 | IFDEF NOUNDERLINE | ||
498 | longest_match_7fff endp | ||
499 | ELSE | ||
500 | _longest_match_7fff endp | ||
501 | ENDIF | ||
502 | |||
503 | |||
504 | IFDEF NOUNDERLINE | ||
505 | cpudetect32 proc near | ||
506 | ELSE | ||
507 | _cpudetect32 proc near | ||
508 | ENDIF | ||
509 | |||
510 | |||
511 | pushfd ; push original EFLAGS | ||
512 | pop eax ; get original EFLAGS | ||
513 | mov ecx, eax ; save original EFLAGS | ||
514 | xor eax, 40000h ; flip AC bit in EFLAGS | ||
515 | push eax ; save new EFLAGS value on stack | ||
516 | popfd ; replace current EFLAGS value | ||
517 | pushfd ; get new EFLAGS | ||
518 | pop eax ; store new EFLAGS in EAX | ||
519 | xor eax, ecx ; can’t toggle AC bit, processor=80386 | ||
520 | jz end_cpu_is_386 ; jump if 80386 processor | ||
521 | push ecx | ||
522 | popfd ; restore AC bit in EFLAGS first | ||
523 | |||
524 | pushfd | ||
525 | pushfd | ||
526 | pop ecx | ||
527 | |||
528 | mov eax, ecx ; get original EFLAGS | ||
529 | xor eax, 200000h ; flip ID bit in EFLAGS | ||
530 | push eax ; save new EFLAGS value on stack | ||
531 | popfd ; replace current EFLAGS value | ||
532 | pushfd ; get new EFLAGS | ||
533 | pop eax ; store new EFLAGS in EAX | ||
534 | popfd ; restore original EFLAGS | ||
535 | xor eax, ecx ; can’t toggle ID bit, | ||
536 | je is_old_486 ; processor=old | ||
537 | |||
538 | mov eax,1 | ||
539 | db 0fh,0a2h ;CPUID | ||
540 | |||
541 | exitcpudetect: | ||
542 | ret | ||
543 | |||
544 | end_cpu_is_386: | ||
545 | mov eax,0300h | ||
546 | jmp exitcpudetect | ||
547 | |||
548 | is_old_486: | ||
549 | mov eax,0400h | ||
550 | jmp exitcpudetect | ||
551 | |||
552 | IFDEF NOUNDERLINE | ||
553 | cpudetect32 endp | ||
554 | ELSE | ||
555 | _cpudetect32 endp | ||
556 | ENDIF | ||
463 | 557 | ||
464 | _TEXT ends | 558 | _TEXT ends |
465 | end | 559 | end |