diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:22:37 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:22:37 -0700 |
commit | 4b5a43a219d51066c01ff2ab86af18b967f2d0dd (patch) | |
tree | 4dcaf0cd18751d04cf638a9a6ec521990d4f2e90 /contrib/masmx86/gvmat32.asm | |
parent | 086e982175da84b3db958191031380794315f95f (diff) | |
download | zlib-1.2.0.5.tar.gz zlib-1.2.0.5.tar.bz2 zlib-1.2.0.5.zip |
zlib 1.2.0.5v1.2.0.5
Diffstat (limited to '')
-rw-r--r-- | contrib/masmx86/gvmat32.asm (renamed from contrib/vstudio/vc70_32/gvmat32.asm) | 1810 |
1 files changed, 905 insertions, 905 deletions
diff --git a/contrib/vstudio/vc70_32/gvmat32.asm b/contrib/masmx86/gvmat32.asm index 320348f..ec360e6 100644 --- a/contrib/vstudio/vc70_32/gvmat32.asm +++ b/contrib/masmx86/gvmat32.asm | |||
@@ -1,905 +1,905 @@ | |||
1 | ; | 1 | ; |
2 | ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 | 2 | ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86 |
3 | ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. | 3 | ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. |
4 | ; File written by Gilles Vollant, by modifiying the longest_match | 4 | ; File written by Gilles Vollant, by modifiying the longest_match |
5 | ; from Jean-loup Gailly in deflate.c | 5 | ; from Jean-loup Gailly in deflate.c |
6 | ; It need wmask == 0x7fff | 6 | ; It need wmask == 0x7fff |
7 | ; (assembly code is faster with a fixed wmask) | 7 | ; (assembly code is faster with a fixed wmask) |
8 | ; | 8 | ; |
9 | ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) | 9 | ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK) |
10 | ; I compile with : "ml /coff /Zi /c gvmat32.asm" | 10 | ; I compile with : "ml /coff /Zi /c gvmat32.asm" |
11 | ; | 11 | ; |
12 | 12 | ||
13 | ;uInt longest_match_7fff(s, cur_match) | 13 | ;uInt longest_match_7fff(s, cur_match) |
14 | ; deflate_state *s; | 14 | ; deflate_state *s; |
15 | ; IPos cur_match; /* current match */ | 15 | ; IPos cur_match; /* current match */ |
16 | 16 | ||
17 | NbStack equ 76 | 17 | NbStack equ 76 |
18 | cur_match equ dword ptr[esp+NbStack-0] | 18 | cur_match equ dword ptr[esp+NbStack-0] |
19 | str_s equ dword ptr[esp+NbStack-4] | 19 | str_s equ dword ptr[esp+NbStack-4] |
20 | ; 5 dword on top (ret,ebp,esi,edi,ebx) | 20 | ; 5 dword on top (ret,ebp,esi,edi,ebx) |
21 | adrret equ dword ptr[esp+NbStack-8] | 21 | adrret equ dword ptr[esp+NbStack-8] |
22 | pushebp equ dword ptr[esp+NbStack-12] | 22 | pushebp equ dword ptr[esp+NbStack-12] |
23 | pushedi equ dword ptr[esp+NbStack-16] | 23 | pushedi equ dword ptr[esp+NbStack-16] |
24 | pushesi equ dword ptr[esp+NbStack-20] | 24 | pushesi equ dword ptr[esp+NbStack-20] |
25 | pushebx equ dword ptr[esp+NbStack-24] | 25 | pushebx equ dword ptr[esp+NbStack-24] |
26 | 26 | ||
27 | chain_length equ dword ptr [esp+NbStack-28] | 27 | chain_length equ dword ptr [esp+NbStack-28] |
28 | limit equ dword ptr [esp+NbStack-32] | 28 | limit equ dword ptr [esp+NbStack-32] |
29 | best_len equ dword ptr [esp+NbStack-36] | 29 | best_len equ dword ptr [esp+NbStack-36] |
30 | window equ dword ptr [esp+NbStack-40] | 30 | window equ dword ptr [esp+NbStack-40] |
31 | prev equ dword ptr [esp+NbStack-44] | 31 | prev equ dword ptr [esp+NbStack-44] |
32 | scan_start equ word ptr [esp+NbStack-48] | 32 | scan_start equ word ptr [esp+NbStack-48] |
33 | wmask equ dword ptr [esp+NbStack-52] | 33 | wmask equ dword ptr [esp+NbStack-52] |
34 | match_start_ptr equ dword ptr [esp+NbStack-56] | 34 | match_start_ptr equ dword ptr [esp+NbStack-56] |
35 | nice_match equ dword ptr [esp+NbStack-60] | 35 | nice_match equ dword ptr [esp+NbStack-60] |
36 | scan equ dword ptr [esp+NbStack-64] | 36 | scan equ dword ptr [esp+NbStack-64] |
37 | 37 | ||
38 | windowlen equ dword ptr [esp+NbStack-68] | 38 | windowlen equ dword ptr [esp+NbStack-68] |
39 | match_start equ dword ptr [esp+NbStack-72] | 39 | match_start equ dword ptr [esp+NbStack-72] |
40 | strend equ dword ptr [esp+NbStack-76] | 40 | strend equ dword ptr [esp+NbStack-76] |
41 | NbStackAdd equ (NbStack-24) | 41 | NbStackAdd equ (NbStack-24) |
42 | 42 | ||
43 | .386p | 43 | .386p |
44 | 44 | ||
45 | name gvmatch | 45 | name gvmatch |
46 | .MODEL FLAT | 46 | .MODEL FLAT |
47 | 47 | ||
48 | 48 | ||
49 | 49 | ||
50 | ; all the +4 offsets are due to the addition of pending_buf_size (in zlib | 50 | ; all the +4 offsets are due to the addition of pending_buf_size (in zlib |
51 | ; in the deflate_state structure since the asm code was first written | 51 | ; in the deflate_state structure since the asm code was first written |
52 | ; (if you compile with zlib 1.0.4 or older, remove the +4). | 52 | ; (if you compile with zlib 1.0.4 or older, remove the +4). |
53 | ; Note : these value are good with a 8 bytes boundary pack structure | 53 | ; Note : these value are good with a 8 bytes boundary pack structure |
54 | dep_chain_length equ 70h+4 | 54 | dep_chain_length equ 70h+4 |
55 | dep_window equ 2ch+4 | 55 | dep_window equ 2ch+4 |
56 | dep_strstart equ 60h+4 | 56 | dep_strstart equ 60h+4 |
57 | dep_prev_length equ 6ch+4 | 57 | dep_prev_length equ 6ch+4 |
58 | dep_nice_match equ 84h+4 | 58 | dep_nice_match equ 84h+4 |
59 | dep_w_size equ 20h+4 | 59 | dep_w_size equ 20h+4 |
60 | dep_prev equ 34h+4 | 60 | dep_prev equ 34h+4 |
61 | dep_w_mask equ 28h+4 | 61 | dep_w_mask equ 28h+4 |
62 | dep_good_match equ 80h+4 | 62 | dep_good_match equ 80h+4 |
63 | dep_match_start equ 64h+4 | 63 | dep_match_start equ 64h+4 |
64 | dep_lookahead equ 68h+4 | 64 | dep_lookahead equ 68h+4 |
65 | 65 | ||
66 | 66 | ||
67 | _TEXT segment | 67 | _TEXT segment |
68 | 68 | ||
69 | IFDEF NOUNDERLINE | 69 | IFDEF NOUNDERLINE |
70 | public longest_match_7fff | 70 | public longest_match_7fff |
71 | public longest_match_686 | 71 | public longest_match_686 |
72 | ; public match_init | 72 | ; public match_init |
73 | ELSE | 73 | ELSE |
74 | public _longest_match_7fff | 74 | public _longest_match_7fff |
75 | public _longest_match_686 | 75 | public _longest_match_686 |
76 | ; public _match_init | 76 | ; public _match_init |
77 | ENDIF | 77 | ENDIF |
78 | 78 | ||
79 | MAX_MATCH equ 258 | 79 | MAX_MATCH equ 258 |
80 | MIN_MATCH equ 3 | 80 | MIN_MATCH equ 3 |
81 | MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) | 81 | MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) |
82 | 82 | ||
83 | 83 | ||
84 | 84 | ||
85 | IFDEF NOUNDERLINE | 85 | IFDEF NOUNDERLINE |
86 | ;match_init proc near | 86 | ;match_init proc near |
87 | ; ret | 87 | ; ret |
88 | ;match_init endp | 88 | ;match_init endp |
89 | ELSE | 89 | ELSE |
90 | ;_match_init proc near | 90 | ;_match_init proc near |
91 | ; ret | 91 | ; ret |
92 | ;_match_init endp | 92 | ;_match_init endp |
93 | ENDIF | 93 | ENDIF |
94 | 94 | ||
95 | 95 | ||
96 | IFDEF NOUNDERLINE | 96 | IFDEF NOUNDERLINE |
97 | longest_match_7fff proc near | 97 | longest_match_7fff proc near |
98 | ELSE | 98 | ELSE |
99 | _longest_match_7fff proc near | 99 | _longest_match_7fff proc near |
100 | ENDIF | 100 | ENDIF |
101 | 101 | ||
102 | mov edx,[esp+4] | 102 | mov edx,[esp+4] |
103 | 103 | ||
104 | 104 | ||
105 | 105 | ||
106 | push ebp | 106 | push ebp |
107 | push edi | 107 | push edi |
108 | push esi | 108 | push esi |
109 | push ebx | 109 | push ebx |
110 | 110 | ||
111 | sub esp,NbStackAdd | 111 | sub esp,NbStackAdd |
112 | 112 | ||
113 | ; initialize or check the variables used in match.asm. | 113 | ; initialize or check the variables used in match.asm. |
114 | mov ebp,edx | 114 | mov ebp,edx |
115 | 115 | ||
116 | ; chain_length = s->max_chain_length | 116 | ; chain_length = s->max_chain_length |
117 | ; if (prev_length>=good_match) chain_length >>= 2 | 117 | ; if (prev_length>=good_match) chain_length >>= 2 |
118 | mov edx,[ebp+dep_chain_length] | 118 | mov edx,[ebp+dep_chain_length] |
119 | mov ebx,[ebp+dep_prev_length] | 119 | mov ebx,[ebp+dep_prev_length] |
120 | cmp [ebp+dep_good_match],ebx | 120 | cmp [ebp+dep_good_match],ebx |
121 | ja noshr | 121 | ja noshr |
122 | shr edx,2 | 122 | shr edx,2 |
123 | noshr: | 123 | noshr: |
124 | ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop | 124 | ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop |
125 | inc edx | 125 | inc edx |
126 | mov edi,[ebp+dep_nice_match] | 126 | mov edi,[ebp+dep_nice_match] |
127 | mov chain_length,edx | 127 | mov chain_length,edx |
128 | mov eax,[ebp+dep_lookahead] | 128 | mov eax,[ebp+dep_lookahead] |
129 | cmp eax,edi | 129 | cmp eax,edi |
130 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | 130 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
131 | jae nolookaheadnicematch | 131 | jae nolookaheadnicematch |
132 | mov edi,eax | 132 | mov edi,eax |
133 | nolookaheadnicematch: | 133 | nolookaheadnicematch: |
134 | ; best_len = s->prev_length | 134 | ; best_len = s->prev_length |
135 | mov best_len,ebx | 135 | mov best_len,ebx |
136 | 136 | ||
137 | ; window = s->window | 137 | ; window = s->window |
138 | mov esi,[ebp+dep_window] | 138 | mov esi,[ebp+dep_window] |
139 | mov ecx,[ebp+dep_strstart] | 139 | mov ecx,[ebp+dep_strstart] |
140 | mov window,esi | 140 | mov window,esi |
141 | 141 | ||
142 | mov nice_match,edi | 142 | mov nice_match,edi |
143 | ; scan = window + strstart | 143 | ; scan = window + strstart |
144 | add esi,ecx | 144 | add esi,ecx |
145 | mov scan,esi | 145 | mov scan,esi |
146 | ; dx = *window | 146 | ; dx = *window |
147 | mov dx,word ptr [esi] | 147 | mov dx,word ptr [esi] |
148 | ; bx = *(window+best_len-1) | 148 | ; bx = *(window+best_len-1) |
149 | mov bx,word ptr [esi+ebx-1] | 149 | mov bx,word ptr [esi+ebx-1] |
150 | add esi,MAX_MATCH-1 | 150 | add esi,MAX_MATCH-1 |
151 | ; scan_start = *scan | 151 | ; scan_start = *scan |
152 | mov scan_start,dx | 152 | mov scan_start,dx |
153 | ; strend = scan + MAX_MATCH-1 | 153 | ; strend = scan + MAX_MATCH-1 |
154 | mov strend,esi | 154 | mov strend,esi |
155 | ; bx = scan_end = *(window+best_len-1) | 155 | ; bx = scan_end = *(window+best_len-1) |
156 | 156 | ||
157 | ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? | 157 | ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
158 | ; s->strstart - (IPos)MAX_DIST(s) : NIL; | 158 | ; s->strstart - (IPos)MAX_DIST(s) : NIL; |
159 | 159 | ||
160 | mov esi,[ebp+dep_w_size] | 160 | mov esi,[ebp+dep_w_size] |
161 | sub esi,MIN_LOOKAHEAD | 161 | sub esi,MIN_LOOKAHEAD |
162 | ; here esi = MAX_DIST(s) | 162 | ; here esi = MAX_DIST(s) |
163 | sub ecx,esi | 163 | sub ecx,esi |
164 | ja nodist | 164 | ja nodist |
165 | xor ecx,ecx | 165 | xor ecx,ecx |
166 | nodist: | 166 | nodist: |
167 | mov limit,ecx | 167 | mov limit,ecx |
168 | 168 | ||
169 | ; prev = s->prev | 169 | ; prev = s->prev |
170 | mov edx,[ebp+dep_prev] | 170 | mov edx,[ebp+dep_prev] |
171 | mov prev,edx | 171 | mov prev,edx |
172 | 172 | ||
173 | ; | 173 | ; |
174 | mov edx,dword ptr [ebp+dep_match_start] | 174 | mov edx,dword ptr [ebp+dep_match_start] |
175 | mov bp,scan_start | 175 | mov bp,scan_start |
176 | mov eax,cur_match | 176 | mov eax,cur_match |
177 | mov match_start,edx | 177 | mov match_start,edx |
178 | 178 | ||
179 | mov edx,window | 179 | mov edx,window |
180 | mov edi,edx | 180 | mov edi,edx |
181 | add edi,best_len | 181 | add edi,best_len |
182 | mov esi,prev | 182 | mov esi,prev |
183 | dec edi | 183 | dec edi |
184 | ; windowlen = window + best_len -1 | 184 | ; windowlen = window + best_len -1 |
185 | mov windowlen,edi | 185 | mov windowlen,edi |
186 | 186 | ||
187 | jmp beginloop2 | 187 | jmp beginloop2 |
188 | align 4 | 188 | align 4 |
189 | 189 | ||
190 | ; here, in the loop | 190 | ; here, in the loop |
191 | ; eax = ax = cur_match | 191 | ; eax = ax = cur_match |
192 | ; ecx = limit | 192 | ; ecx = limit |
193 | ; bx = scan_end | 193 | ; bx = scan_end |
194 | ; bp = scan_start | 194 | ; bp = scan_start |
195 | ; edi = windowlen (window + best_len -1) | 195 | ; edi = windowlen (window + best_len -1) |
196 | ; esi = prev | 196 | ; esi = prev |
197 | 197 | ||
198 | 198 | ||
199 | ;// here; chain_length <=16 | 199 | ;// here; chain_length <=16 |
200 | normalbeg0add16: | 200 | normalbeg0add16: |
201 | add chain_length,16 | 201 | add chain_length,16 |
202 | jz exitloop | 202 | jz exitloop |
203 | normalbeg0: | 203 | normalbeg0: |
204 | cmp word ptr[edi+eax],bx | 204 | cmp word ptr[edi+eax],bx |
205 | je normalbeg2noroll | 205 | je normalbeg2noroll |
206 | rcontlabnoroll: | 206 | rcontlabnoroll: |
207 | ; cur_match = prev[cur_match & wmask] | 207 | ; cur_match = prev[cur_match & wmask] |
208 | and eax,7fffh | 208 | and eax,7fffh |
209 | mov ax,word ptr[esi+eax*2] | 209 | mov ax,word ptr[esi+eax*2] |
210 | ; if cur_match > limit, go to exitloop | 210 | ; if cur_match > limit, go to exitloop |
211 | cmp ecx,eax | 211 | cmp ecx,eax |
212 | jnb exitloop | 212 | jnb exitloop |
213 | ; if --chain_length != 0, go to exitloop | 213 | ; if --chain_length != 0, go to exitloop |
214 | dec chain_length | 214 | dec chain_length |
215 | jnz normalbeg0 | 215 | jnz normalbeg0 |
216 | jmp exitloop | 216 | jmp exitloop |
217 | 217 | ||
218 | normalbeg2noroll: | 218 | normalbeg2noroll: |
219 | ; if (scan_start==*(cur_match+window)) goto normalbeg2 | 219 | ; if (scan_start==*(cur_match+window)) goto normalbeg2 |
220 | cmp bp,word ptr[edx+eax] | 220 | cmp bp,word ptr[edx+eax] |
221 | jne rcontlabnoroll | 221 | jne rcontlabnoroll |
222 | jmp normalbeg2 | 222 | jmp normalbeg2 |
223 | 223 | ||
224 | contloop3: | 224 | contloop3: |
225 | mov edi,windowlen | 225 | mov edi,windowlen |
226 | 226 | ||
227 | ; cur_match = prev[cur_match & wmask] | 227 | ; cur_match = prev[cur_match & wmask] |
228 | and eax,7fffh | 228 | and eax,7fffh |
229 | mov ax,word ptr[esi+eax*2] | 229 | mov ax,word ptr[esi+eax*2] |
230 | ; if cur_match > limit, go to exitloop | 230 | ; if cur_match > limit, go to exitloop |
231 | cmp ecx,eax | 231 | cmp ecx,eax |
232 | jnbexitloopshort1: | 232 | jnbexitloopshort1: |
233 | jnb exitloop | 233 | jnb exitloop |
234 | ; if --chain_length != 0, go to exitloop | 234 | ; if --chain_length != 0, go to exitloop |
235 | 235 | ||
236 | 236 | ||
237 | ; begin the main loop | 237 | ; begin the main loop |
238 | beginloop2: | 238 | beginloop2: |
239 | sub chain_length,16+1 | 239 | sub chain_length,16+1 |
240 | ; if chain_length <=16, don't use the unrolled loop | 240 | ; if chain_length <=16, don't use the unrolled loop |
241 | jna normalbeg0add16 | 241 | jna normalbeg0add16 |
242 | 242 | ||
243 | do16: | 243 | do16: |
244 | cmp word ptr[edi+eax],bx | 244 | cmp word ptr[edi+eax],bx |
245 | je normalbeg2dc0 | 245 | je normalbeg2dc0 |
246 | 246 | ||
247 | maccn MACRO lab | 247 | maccn MACRO lab |
248 | and eax,7fffh | 248 | and eax,7fffh |
249 | mov ax,word ptr[esi+eax*2] | 249 | mov ax,word ptr[esi+eax*2] |
250 | cmp ecx,eax | 250 | cmp ecx,eax |
251 | jnb exitloop | 251 | jnb exitloop |
252 | cmp word ptr[edi+eax],bx | 252 | cmp word ptr[edi+eax],bx |
253 | je lab | 253 | je lab |
254 | ENDM | 254 | ENDM |
255 | 255 | ||
256 | rcontloop0: | 256 | rcontloop0: |
257 | maccn normalbeg2dc1 | 257 | maccn normalbeg2dc1 |
258 | 258 | ||
259 | rcontloop1: | 259 | rcontloop1: |
260 | maccn normalbeg2dc2 | 260 | maccn normalbeg2dc2 |
261 | 261 | ||
262 | rcontloop2: | 262 | rcontloop2: |
263 | maccn normalbeg2dc3 | 263 | maccn normalbeg2dc3 |
264 | 264 | ||
265 | rcontloop3: | 265 | rcontloop3: |
266 | maccn normalbeg2dc4 | 266 | maccn normalbeg2dc4 |
267 | 267 | ||
268 | rcontloop4: | 268 | rcontloop4: |
269 | maccn normalbeg2dc5 | 269 | maccn normalbeg2dc5 |
270 | 270 | ||
271 | rcontloop5: | 271 | rcontloop5: |
272 | maccn normalbeg2dc6 | 272 | maccn normalbeg2dc6 |
273 | 273 | ||
274 | rcontloop6: | 274 | rcontloop6: |
275 | maccn normalbeg2dc7 | 275 | maccn normalbeg2dc7 |
276 | 276 | ||
277 | rcontloop7: | 277 | rcontloop7: |
278 | maccn normalbeg2dc8 | 278 | maccn normalbeg2dc8 |
279 | 279 | ||
280 | rcontloop8: | 280 | rcontloop8: |
281 | maccn normalbeg2dc9 | 281 | maccn normalbeg2dc9 |
282 | 282 | ||
283 | rcontloop9: | 283 | rcontloop9: |
284 | maccn normalbeg2dc10 | 284 | maccn normalbeg2dc10 |
285 | 285 | ||
286 | rcontloop10: | 286 | rcontloop10: |
287 | maccn short normalbeg2dc11 | 287 | maccn short normalbeg2dc11 |
288 | 288 | ||
289 | rcontloop11: | 289 | rcontloop11: |
290 | maccn short normalbeg2dc12 | 290 | maccn short normalbeg2dc12 |
291 | 291 | ||
292 | rcontloop12: | 292 | rcontloop12: |
293 | maccn short normalbeg2dc13 | 293 | maccn short normalbeg2dc13 |
294 | 294 | ||
295 | rcontloop13: | 295 | rcontloop13: |
296 | maccn short normalbeg2dc14 | 296 | maccn short normalbeg2dc14 |
297 | 297 | ||
298 | rcontloop14: | 298 | rcontloop14: |
299 | maccn short normalbeg2dc15 | 299 | maccn short normalbeg2dc15 |
300 | 300 | ||
301 | rcontloop15: | 301 | rcontloop15: |
302 | and eax,7fffh | 302 | and eax,7fffh |
303 | mov ax,word ptr[esi+eax*2] | 303 | mov ax,word ptr[esi+eax*2] |
304 | cmp ecx,eax | 304 | cmp ecx,eax |
305 | jnb exitloop | 305 | jnb exitloop |
306 | 306 | ||
307 | sub chain_length,16 | 307 | sub chain_length,16 |
308 | ja do16 | 308 | ja do16 |
309 | jmp normalbeg0add16 | 309 | jmp normalbeg0add16 |
310 | 310 | ||
311 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 311 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
312 | 312 | ||
313 | normbeg MACRO rcontlab,valsub | 313 | normbeg MACRO rcontlab,valsub |
314 | ; if we are here, we know that *(match+best_len-1) == scan_end | 314 | ; if we are here, we know that *(match+best_len-1) == scan_end |
315 | cmp bp,word ptr[edx+eax] | 315 | cmp bp,word ptr[edx+eax] |
316 | ; if (match != scan_start) goto rcontlab | 316 | ; if (match != scan_start) goto rcontlab |
317 | jne rcontlab | 317 | jne rcontlab |
318 | ; calculate the good chain_length, and we'll compare scan and match string | 318 | ; calculate the good chain_length, and we'll compare scan and match string |
319 | add chain_length,16-valsub | 319 | add chain_length,16-valsub |
320 | jmp iseq | 320 | jmp iseq |
321 | ENDM | 321 | ENDM |
322 | 322 | ||
323 | 323 | ||
324 | normalbeg2dc11: | 324 | normalbeg2dc11: |
325 | normbeg rcontloop11,11 | 325 | normbeg rcontloop11,11 |
326 | 326 | ||
327 | normalbeg2dc12: | 327 | normalbeg2dc12: |
328 | normbeg short rcontloop12,12 | 328 | normbeg short rcontloop12,12 |
329 | 329 | ||
330 | normalbeg2dc13: | 330 | normalbeg2dc13: |
331 | normbeg short rcontloop13,13 | 331 | normbeg short rcontloop13,13 |
332 | 332 | ||
333 | normalbeg2dc14: | 333 | normalbeg2dc14: |
334 | normbeg short rcontloop14,14 | 334 | normbeg short rcontloop14,14 |
335 | 335 | ||
336 | normalbeg2dc15: | 336 | normalbeg2dc15: |
337 | normbeg short rcontloop15,15 | 337 | normbeg short rcontloop15,15 |
338 | 338 | ||
339 | normalbeg2dc10: | 339 | normalbeg2dc10: |
340 | normbeg rcontloop10,10 | 340 | normbeg rcontloop10,10 |
341 | 341 | ||
342 | normalbeg2dc9: | 342 | normalbeg2dc9: |
343 | normbeg rcontloop9,9 | 343 | normbeg rcontloop9,9 |
344 | 344 | ||
345 | normalbeg2dc8: | 345 | normalbeg2dc8: |
346 | normbeg rcontloop8,8 | 346 | normbeg rcontloop8,8 |
347 | 347 | ||
348 | normalbeg2dc7: | 348 | normalbeg2dc7: |
349 | normbeg rcontloop7,7 | 349 | normbeg rcontloop7,7 |
350 | 350 | ||
351 | normalbeg2dc6: | 351 | normalbeg2dc6: |
352 | normbeg rcontloop6,6 | 352 | normbeg rcontloop6,6 |
353 | 353 | ||
354 | normalbeg2dc5: | 354 | normalbeg2dc5: |
355 | normbeg rcontloop5,5 | 355 | normbeg rcontloop5,5 |
356 | 356 | ||
357 | normalbeg2dc4: | 357 | normalbeg2dc4: |
358 | normbeg rcontloop4,4 | 358 | normbeg rcontloop4,4 |
359 | 359 | ||
360 | normalbeg2dc3: | 360 | normalbeg2dc3: |
361 | normbeg rcontloop3,3 | 361 | normbeg rcontloop3,3 |
362 | 362 | ||
363 | normalbeg2dc2: | 363 | normalbeg2dc2: |
364 | normbeg rcontloop2,2 | 364 | normbeg rcontloop2,2 |
365 | 365 | ||
366 | normalbeg2dc1: | 366 | normalbeg2dc1: |
367 | normbeg rcontloop1,1 | 367 | normbeg rcontloop1,1 |
368 | 368 | ||
369 | normalbeg2dc0: | 369 | normalbeg2dc0: |
370 | normbeg rcontloop0,0 | 370 | normbeg rcontloop0,0 |
371 | 371 | ||
372 | 372 | ||
373 | ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end | 373 | ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end |
374 | 374 | ||
375 | normalbeg2: | 375 | normalbeg2: |
376 | mov edi,window | 376 | mov edi,window |
377 | 377 | ||
378 | cmp bp,word ptr[edi+eax] | 378 | cmp bp,word ptr[edi+eax] |
379 | jne contloop3 ; if *(ushf*)match != scan_start, continue | 379 | jne contloop3 ; if *(ushf*)match != scan_start, continue |
380 | 380 | ||
381 | iseq: | 381 | iseq: |
382 | ; if we are here, we know that *(match+best_len-1) == scan_end | 382 | ; if we are here, we know that *(match+best_len-1) == scan_end |
383 | ; and (match == scan_start) | 383 | ; and (match == scan_start) |
384 | 384 | ||
385 | mov edi,edx | 385 | mov edi,edx |
386 | mov esi,scan ; esi = scan | 386 | mov esi,scan ; esi = scan |
387 | add edi,eax ; edi = window + cur_match = match | 387 | add edi,eax ; edi = window + cur_match = match |
388 | 388 | ||
389 | mov edx,[esi+3] ; compare manually dword at match+3 | 389 | mov edx,[esi+3] ; compare manually dword at match+3 |
390 | xor edx,[edi+3] ; and scan +3 | 390 | xor edx,[edi+3] ; and scan +3 |
391 | 391 | ||
392 | jz begincompare ; if equal, go to long compare | 392 | jz begincompare ; if equal, go to long compare |
393 | 393 | ||
394 | ; we will determine the unmatch byte and calculate len (in esi) | 394 | ; we will determine the unmatch byte and calculate len (in esi) |
395 | or dl,dl | 395 | or dl,dl |
396 | je eq1rr | 396 | je eq1rr |
397 | mov esi,3 | 397 | mov esi,3 |
398 | jmp trfinval | 398 | jmp trfinval |
399 | eq1rr: | 399 | eq1rr: |
400 | or dx,dx | 400 | or dx,dx |
401 | je eq1 | 401 | je eq1 |
402 | 402 | ||
403 | mov esi,4 | 403 | mov esi,4 |
404 | jmp trfinval | 404 | jmp trfinval |
405 | eq1: | 405 | eq1: |
406 | and edx,0ffffffh | 406 | and edx,0ffffffh |
407 | jz eq11 | 407 | jz eq11 |
408 | mov esi,5 | 408 | mov esi,5 |
409 | jmp trfinval | 409 | jmp trfinval |
410 | eq11: | 410 | eq11: |
411 | mov esi,6 | 411 | mov esi,6 |
412 | jmp trfinval | 412 | jmp trfinval |
413 | 413 | ||
414 | begincompare: | 414 | begincompare: |
415 | ; here we now scan and match begin same | 415 | ; here we now scan and match begin same |
416 | add edi,6 | 416 | add edi,6 |
417 | add esi,6 | 417 | add esi,6 |
418 | mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes | 418 | mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes |
419 | repe cmpsd ; loop until mismatch | 419 | repe cmpsd ; loop until mismatch |
420 | 420 | ||
421 | je trfin ; go to trfin if not unmatch | 421 | je trfin ; go to trfin if not unmatch |
422 | ; we determine the unmatch byte | 422 | ; we determine the unmatch byte |
423 | sub esi,4 | 423 | sub esi,4 |
424 | mov edx,[edi-4] | 424 | mov edx,[edi-4] |
425 | xor edx,[esi] | 425 | xor edx,[esi] |
426 | 426 | ||
427 | or dl,dl | 427 | or dl,dl |
428 | jnz trfin | 428 | jnz trfin |
429 | inc esi | 429 | inc esi |
430 | 430 | ||
431 | or dx,dx | 431 | or dx,dx |
432 | jnz trfin | 432 | jnz trfin |
433 | inc esi | 433 | inc esi |
434 | 434 | ||
435 | and edx,0ffffffh | 435 | and edx,0ffffffh |
436 | jnz trfin | 436 | jnz trfin |
437 | inc esi | 437 | inc esi |
438 | 438 | ||
439 | trfin: | 439 | trfin: |
440 | sub esi,scan ; esi = len | 440 | sub esi,scan ; esi = len |
441 | trfinval: | 441 | trfinval: |
442 | ; here we have finised compare, and esi contain len of equal string | 442 | ; here we have finised compare, and esi contain len of equal string |
443 | cmp esi,best_len ; if len > best_len, go newbestlen | 443 | cmp esi,best_len ; if len > best_len, go newbestlen |
444 | ja short newbestlen | 444 | ja short newbestlen |
445 | ; now we restore edx, ecx and esi, for the big loop | 445 | ; now we restore edx, ecx and esi, for the big loop |
446 | mov esi,prev | 446 | mov esi,prev |
447 | mov ecx,limit | 447 | mov ecx,limit |
448 | mov edx,window | 448 | mov edx,window |
449 | jmp contloop3 | 449 | jmp contloop3 |
450 | 450 | ||
451 | newbestlen: | 451 | newbestlen: |
452 | mov best_len,esi ; len become best_len | 452 | mov best_len,esi ; len become best_len |
453 | 453 | ||
454 | mov match_start,eax ; save new position as match_start | 454 | mov match_start,eax ; save new position as match_start |
455 | cmp esi,nice_match ; if best_len >= nice_match, exit | 455 | cmp esi,nice_match ; if best_len >= nice_match, exit |
456 | jae exitloop | 456 | jae exitloop |
457 | mov ecx,scan | 457 | mov ecx,scan |
458 | mov edx,window ; restore edx=window | 458 | mov edx,window ; restore edx=window |
459 | add ecx,esi | 459 | add ecx,esi |
460 | add esi,edx | 460 | add esi,edx |
461 | 461 | ||
462 | dec esi | 462 | dec esi |
463 | mov windowlen,esi ; windowlen = window + best_len-1 | 463 | mov windowlen,esi ; windowlen = window + best_len-1 |
464 | mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end | 464 | mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end |
465 | 465 | ||
466 | ; now we restore ecx and esi, for the big loop : | 466 | ; now we restore ecx and esi, for the big loop : |
467 | mov esi,prev | 467 | mov esi,prev |
468 | mov ecx,limit | 468 | mov ecx,limit |
469 | jmp contloop3 | 469 | jmp contloop3 |
470 | 470 | ||
471 | exitloop: | 471 | exitloop: |
472 | ; exit : s->match_start=match_start | 472 | ; exit : s->match_start=match_start |
473 | mov ebx,match_start | 473 | mov ebx,match_start |
474 | mov ebp,str_s | 474 | mov ebp,str_s |
475 | mov ecx,best_len | 475 | mov ecx,best_len |
476 | mov dword ptr [ebp+dep_match_start],ebx | 476 | mov dword ptr [ebp+dep_match_start],ebx |
477 | mov eax,dword ptr [ebp+dep_lookahead] | 477 | mov eax,dword ptr [ebp+dep_lookahead] |
478 | cmp ecx,eax | 478 | cmp ecx,eax |
479 | ja minexlo | 479 | ja minexlo |
480 | mov eax,ecx | 480 | mov eax,ecx |
481 | minexlo: | 481 | minexlo: |
482 | ; return min(best_len,s->lookahead) | 482 | ; return min(best_len,s->lookahead) |
483 | 483 | ||
484 | ; restore stack and register ebx,esi,edi,ebp | 484 | ; restore stack and register ebx,esi,edi,ebp |
485 | add esp,NbStackAdd | 485 | add esp,NbStackAdd |
486 | 486 | ||
487 | pop ebx | 487 | pop ebx |
488 | pop esi | 488 | pop esi |
489 | pop edi | 489 | pop edi |
490 | pop ebp | 490 | pop ebp |
491 | ret | 491 | ret |
492 | InfoAuthor: | 492 | InfoAuthor: |
493 | ; please don't remove this string ! | 493 | ; please don't remove this string ! |
494 | ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! | 494 | ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! |
495 | db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah | 495 | db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah |
496 | 496 | ||
497 | 497 | ||
498 | 498 | ||
499 | IFDEF NOUNDERLINE | 499 | IFDEF NOUNDERLINE |
500 | longest_match_7fff endp | 500 | longest_match_7fff endp |
501 | ELSE | 501 | ELSE |
502 | _longest_match_7fff endp | 502 | _longest_match_7fff endp |
503 | ENDIF | 503 | ENDIF |
504 | 504 | ||
505 | 505 | ||
506 | IFDEF NOUNDERLINE | 506 | IFDEF NOUNDERLINE |
507 | cpudetect32 proc near | 507 | cpudetect32 proc near |
508 | ELSE | 508 | ELSE |
509 | _cpudetect32 proc near | 509 | _cpudetect32 proc near |
510 | ENDIF | 510 | ENDIF |
511 | 511 | ||
512 | push ebx | 512 | push ebx |
513 | 513 | ||
514 | pushfd ; push original EFLAGS | 514 | pushfd ; push original EFLAGS |
515 | pop eax ; get original EFLAGS | 515 | pop eax ; get original EFLAGS |
516 | mov ecx, eax ; save original EFLAGS | 516 | mov ecx, eax ; save original EFLAGS |
517 | xor eax, 40000h ; flip AC bit in EFLAGS | 517 | xor eax, 40000h ; flip AC bit in EFLAGS |
518 | push eax ; save new EFLAGS value on stack | 518 | push eax ; save new EFLAGS value on stack |
519 | popfd ; replace current EFLAGS value | 519 | popfd ; replace current EFLAGS value |
520 | pushfd ; get new EFLAGS | 520 | pushfd ; get new EFLAGS |
521 | pop eax ; store new EFLAGS in EAX | 521 | pop eax ; store new EFLAGS in EAX |
522 | xor eax, ecx ; can’t toggle AC bit, processor=80386 | 522 | xor eax, ecx ; can’t toggle AC bit, processor=80386 |
523 | jz end_cpu_is_386 ; jump if 80386 processor | 523 | jz end_cpu_is_386 ; jump if 80386 processor |
524 | push ecx | 524 | push ecx |
525 | popfd ; restore AC bit in EFLAGS first | 525 | popfd ; restore AC bit in EFLAGS first |
526 | 526 | ||
527 | pushfd | 527 | pushfd |
528 | pushfd | 528 | pushfd |
529 | pop ecx | 529 | pop ecx |
530 | 530 | ||
531 | mov eax, ecx ; get original EFLAGS | 531 | mov eax, ecx ; get original EFLAGS |
532 | xor eax, 200000h ; flip ID bit in EFLAGS | 532 | xor eax, 200000h ; flip ID bit in EFLAGS |
533 | push eax ; save new EFLAGS value on stack | 533 | push eax ; save new EFLAGS value on stack |
534 | popfd ; replace current EFLAGS value | 534 | popfd ; replace current EFLAGS value |
535 | pushfd ; get new EFLAGS | 535 | pushfd ; get new EFLAGS |
536 | pop eax ; store new EFLAGS in EAX | 536 | pop eax ; store new EFLAGS in EAX |
537 | popfd ; restore original EFLAGS | 537 | popfd ; restore original EFLAGS |
538 | xor eax, ecx ; can’t toggle ID bit, | 538 | xor eax, ecx ; can’t toggle ID bit, |
539 | je is_old_486 ; processor=old | 539 | je is_old_486 ; processor=old |
540 | 540 | ||
541 | mov eax,1 | 541 | mov eax,1 |
542 | db 0fh,0a2h ;CPUID | 542 | db 0fh,0a2h ;CPUID |
543 | 543 | ||
544 | exitcpudetect: | 544 | exitcpudetect: |
545 | pop ebx | 545 | pop ebx |
546 | ret | 546 | ret |
547 | 547 | ||
548 | end_cpu_is_386: | 548 | end_cpu_is_386: |
549 | mov eax,0300h | 549 | mov eax,0300h |
550 | jmp exitcpudetect | 550 | jmp exitcpudetect |
551 | 551 | ||
552 | is_old_486: | 552 | is_old_486: |
553 | mov eax,0400h | 553 | mov eax,0400h |
554 | jmp exitcpudetect | 554 | jmp exitcpudetect |
555 | 555 | ||
556 | IFDEF NOUNDERLINE | 556 | IFDEF NOUNDERLINE |
557 | cpudetect32 endp | 557 | cpudetect32 endp |
558 | ELSE | 558 | ELSE |
559 | _cpudetect32 endp | 559 | _cpudetect32 endp |
560 | ENDIF | 560 | ENDIF |
561 | 561 | ||
562 | 562 | ||
563 | 563 | ||
564 | 564 | ||
565 | MAX_MATCH equ 258 | 565 | MAX_MATCH equ 258 |
566 | MIN_MATCH equ 3 | 566 | MIN_MATCH equ 3 |
567 | MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) | 567 | MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) |
568 | MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) | 568 | MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) |
569 | 569 | ||
570 | 570 | ||
571 | ;;; stack frame offsets | 571 | ;;; stack frame offsets |
572 | 572 | ||
573 | chainlenwmask equ esp + 0 ; high word: current chain len | 573 | chainlenwmask equ esp + 0 ; high word: current chain len |
574 | ; low word: s->wmask | 574 | ; low word: s->wmask |
575 | window equ esp + 4 ; local copy of s->window | 575 | window equ esp + 4 ; local copy of s->window |
576 | windowbestlen equ esp + 8 ; s->window + bestlen | 576 | windowbestlen equ esp + 8 ; s->window + bestlen |
577 | scanstart equ esp + 16 ; first two bytes of string | 577 | scanstart equ esp + 16 ; first two bytes of string |
578 | scanend equ esp + 12 ; last two bytes of string | 578 | scanend equ esp + 12 ; last two bytes of string |
579 | scanalign equ esp + 20 ; dword-misalignment of string | 579 | scanalign equ esp + 20 ; dword-misalignment of string |
580 | nicematch equ esp + 24 ; a good enough match size | 580 | nicematch equ esp + 24 ; a good enough match size |
581 | bestlen equ esp + 28 ; size of best match so far | 581 | bestlen equ esp + 28 ; size of best match so far |
582 | scan equ esp + 32 ; ptr to string wanting match | 582 | scan equ esp + 32 ; ptr to string wanting match |
583 | 583 | ||
584 | LocalVarsSize equ 36 | 584 | LocalVarsSize equ 36 |
585 | ; saved ebx byte esp + 36 | 585 | ; saved ebx byte esp + 36 |
586 | ; saved edi byte esp + 40 | 586 | ; saved edi byte esp + 40 |
587 | ; saved esi byte esp + 44 | 587 | ; saved esi byte esp + 44 |
588 | ; saved ebp byte esp + 48 | 588 | ; saved ebp byte esp + 48 |
589 | ; return address byte esp + 52 | 589 | ; return address byte esp + 52 |
590 | deflatestate equ esp + 56 ; the function arguments | 590 | deflatestate equ esp + 56 ; the function arguments |
591 | curmatch equ esp + 60 | 591 | curmatch equ esp + 60 |
592 | 592 | ||
593 | ;;; Offsets for fields in the deflate_state structure. These numbers | 593 | ;;; Offsets for fields in the deflate_state structure. These numbers |
594 | ;;; are calculated from the definition of deflate_state, with the | 594 | ;;; are calculated from the definition of deflate_state, with the |
595 | ;;; assumption that the compiler will dword-align the fields. (Thus, | 595 | ;;; assumption that the compiler will dword-align the fields. (Thus, |
596 | ;;; changing the definition of deflate_state could easily cause this | 596 | ;;; changing the definition of deflate_state could easily cause this |
597 | ;;; program to crash horribly, without so much as a warning at | 597 | ;;; program to crash horribly, without so much as a warning at |
598 | ;;; compile time. Sigh.) | 598 | ;;; compile time. Sigh.) |
599 | 599 | ||
600 | dsWSize equ 36 | 600 | dsWSize equ 36 |
601 | dsWMask equ 44 | 601 | dsWMask equ 44 |
602 | dsWindow equ 48 | 602 | dsWindow equ 48 |
603 | dsPrev equ 56 | 603 | dsPrev equ 56 |
604 | dsMatchLen equ 88 | 604 | dsMatchLen equ 88 |
605 | dsPrevMatch equ 92 | 605 | dsPrevMatch equ 92 |
606 | dsStrStart equ 100 | 606 | dsStrStart equ 100 |
607 | dsMatchStart equ 104 | 607 | dsMatchStart equ 104 |
608 | dsLookahead equ 108 | 608 | dsLookahead equ 108 |
609 | dsPrevLen equ 112 | 609 | dsPrevLen equ 112 |
610 | dsMaxChainLen equ 116 | 610 | dsMaxChainLen equ 116 |
611 | dsGoodMatch equ 132 | 611 | dsGoodMatch equ 132 |
612 | dsNiceMatch equ 136 | 612 | dsNiceMatch equ 136 |
613 | 613 | ||
614 | 614 | ||
615 | ;;; match.asm -- Pentium-Pro-optimized version of longest_match() | 615 | ;;; match.asm -- Pentium-Pro-optimized version of longest_match() |
616 | ;;; Written for zlib 1.1.2 | 616 | ;;; Written for zlib 1.1.2 |
617 | ;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> | 617 | ;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
618 | ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html | 618 | ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html |
619 | ;;; | 619 | ;;; |
620 | ;;; This is free software; you can redistribute it and/or modify it | 620 | ;;; This is free software; you can redistribute it and/or modify it |
621 | ;;; under the terms of the GNU General Public License. | 621 | ;;; under the terms of the GNU General Public License. |
622 | 622 | ||
623 | ;GLOBAL _longest_match, _match_init | 623 | ;GLOBAL _longest_match, _match_init |
624 | 624 | ||
625 | 625 | ||
626 | ;SECTION .text | 626 | ;SECTION .text |
627 | 627 | ||
628 | ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) | 628 | ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
629 | 629 | ||
630 | ;_longest_match: | 630 | ;_longest_match: |
631 | IFDEF NOUNDERLINE | 631 | IFDEF NOUNDERLINE |
632 | longest_match_686 proc near | 632 | longest_match_686 proc near |
633 | ELSE | 633 | ELSE |
634 | _longest_match_686 proc near | 634 | _longest_match_686 proc near |
635 | ENDIF | 635 | ENDIF |
636 | 636 | ||
637 | 637 | ||
638 | ;;; Save registers that the compiler may be using, and adjust esp to | 638 | ;;; Save registers that the compiler may be using, and adjust esp to |
639 | ;;; make room for our stack frame. | 639 | ;;; make room for our stack frame. |
640 | 640 | ||
641 | push ebp | 641 | push ebp |
642 | push edi | 642 | push edi |
643 | push esi | 643 | push esi |
644 | push ebx | 644 | push ebx |
645 | sub esp, LocalVarsSize | 645 | sub esp, LocalVarsSize |
646 | 646 | ||
647 | ;;; Retrieve the function arguments. ecx will hold cur_match | 647 | ;;; Retrieve the function arguments. ecx will hold cur_match |
648 | ;;; throughout the entire function. edx will hold the pointer to the | 648 | ;;; throughout the entire function. edx will hold the pointer to the |
649 | ;;; deflate_state structure during the function's setup (before | 649 | ;;; deflate_state structure during the function's setup (before |
650 | ;;; entering the main loop. | 650 | ;;; entering the main loop. |
651 | 651 | ||
652 | mov edx, [deflatestate] | 652 | mov edx, [deflatestate] |
653 | mov ecx, [curmatch] | 653 | mov ecx, [curmatch] |
654 | 654 | ||
655 | ;;; uInt wmask = s->w_mask; | 655 | ;;; uInt wmask = s->w_mask; |
656 | ;;; unsigned chain_length = s->max_chain_length; | 656 | ;;; unsigned chain_length = s->max_chain_length; |
657 | ;;; if (s->prev_length >= s->good_match) { | 657 | ;;; if (s->prev_length >= s->good_match) { |
658 | ;;; chain_length >>= 2; | 658 | ;;; chain_length >>= 2; |
659 | ;;; } | 659 | ;;; } |
660 | 660 | ||
661 | mov eax, [edx + dsPrevLen] | 661 | mov eax, [edx + dsPrevLen] |
662 | mov ebx, [edx + dsGoodMatch] | 662 | mov ebx, [edx + dsGoodMatch] |
663 | cmp eax, ebx | 663 | cmp eax, ebx |
664 | mov eax, [edx + dsWMask] | 664 | mov eax, [edx + dsWMask] |
665 | mov ebx, [edx + dsMaxChainLen] | 665 | mov ebx, [edx + dsMaxChainLen] |
666 | jl LastMatchGood | 666 | jl LastMatchGood |
667 | shr ebx, 2 | 667 | shr ebx, 2 |
668 | LastMatchGood: | 668 | LastMatchGood: |
669 | 669 | ||
670 | ;;; chainlen is decremented once beforehand so that the function can | 670 | ;;; chainlen is decremented once beforehand so that the function can |
671 | ;;; use the sign flag instead of the zero flag for the exit test. | 671 | ;;; use the sign flag instead of the zero flag for the exit test. |
672 | ;;; It is then shifted into the high word, to make room for the wmask | 672 | ;;; It is then shifted into the high word, to make room for the wmask |
673 | ;;; value, which it will always accompany. | 673 | ;;; value, which it will always accompany. |
674 | 674 | ||
675 | dec ebx | 675 | dec ebx |
676 | shl ebx, 16 | 676 | shl ebx, 16 |
677 | or ebx, eax | 677 | or ebx, eax |
678 | mov [chainlenwmask], ebx | 678 | mov [chainlenwmask], ebx |
679 | 679 | ||
680 | ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | 680 | ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
681 | 681 | ||
682 | mov eax, [edx + dsNiceMatch] | 682 | mov eax, [edx + dsNiceMatch] |
683 | mov ebx, [edx + dsLookahead] | 683 | mov ebx, [edx + dsLookahead] |
684 | cmp ebx, eax | 684 | cmp ebx, eax |
685 | jl LookaheadLess | 685 | jl LookaheadLess |
686 | mov ebx, eax | 686 | mov ebx, eax |
687 | LookaheadLess: mov [nicematch], ebx | 687 | LookaheadLess: mov [nicematch], ebx |
688 | 688 | ||
689 | ;;; register Bytef *scan = s->window + s->strstart; | 689 | ;;; register Bytef *scan = s->window + s->strstart; |
690 | 690 | ||
691 | mov esi, [edx + dsWindow] | 691 | mov esi, [edx + dsWindow] |
692 | mov [window], esi | 692 | mov [window], esi |
693 | mov ebp, [edx + dsStrStart] | 693 | mov ebp, [edx + dsStrStart] |
694 | lea edi, [esi + ebp] | 694 | lea edi, [esi + ebp] |
695 | mov [scan], edi | 695 | mov [scan], edi |
696 | 696 | ||
697 | ;;; Determine how many bytes the scan ptr is off from being | 697 | ;;; Determine how many bytes the scan ptr is off from being |
698 | ;;; dword-aligned. | 698 | ;;; dword-aligned. |
699 | 699 | ||
700 | mov eax, edi | 700 | mov eax, edi |
701 | neg eax | 701 | neg eax |
702 | and eax, 3 | 702 | and eax, 3 |
703 | mov [scanalign], eax | 703 | mov [scanalign], eax |
704 | 704 | ||
705 | ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? | 705 | ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? |
706 | ;;; s->strstart - (IPos)MAX_DIST(s) : NIL; | 706 | ;;; s->strstart - (IPos)MAX_DIST(s) : NIL; |
707 | 707 | ||
708 | mov eax, [edx + dsWSize] | 708 | mov eax, [edx + dsWSize] |
709 | sub eax, MIN_LOOKAHEAD | 709 | sub eax, MIN_LOOKAHEAD |
710 | sub ebp, eax | 710 | sub ebp, eax |
711 | jg LimitPositive | 711 | jg LimitPositive |
712 | xor ebp, ebp | 712 | xor ebp, ebp |
713 | LimitPositive: | 713 | LimitPositive: |
714 | 714 | ||
715 | ;;; int best_len = s->prev_length; | 715 | ;;; int best_len = s->prev_length; |
716 | 716 | ||
717 | mov eax, [edx + dsPrevLen] | 717 | mov eax, [edx + dsPrevLen] |
718 | mov [bestlen], eax | 718 | mov [bestlen], eax |
719 | 719 | ||
720 | ;;; Store the sum of s->window + best_len in esi locally, and in esi. | 720 | ;;; Store the sum of s->window + best_len in esi locally, and in esi. |
721 | 721 | ||
722 | add esi, eax | 722 | add esi, eax |
723 | mov [windowbestlen], esi | 723 | mov [windowbestlen], esi |
724 | 724 | ||
725 | ;;; register ush scan_start = *(ushf*)scan; | 725 | ;;; register ush scan_start = *(ushf*)scan; |
726 | ;;; register ush scan_end = *(ushf*)(scan+best_len-1); | 726 | ;;; register ush scan_end = *(ushf*)(scan+best_len-1); |
727 | ;;; Posf *prev = s->prev; | 727 | ;;; Posf *prev = s->prev; |
728 | 728 | ||
729 | movzx ebx, word ptr [edi] | 729 | movzx ebx, word ptr [edi] |
730 | mov [scanstart], ebx | 730 | mov [scanstart], ebx |
731 | movzx ebx, word ptr [edi + eax - 1] | 731 | movzx ebx, word ptr [edi + eax - 1] |
732 | mov [scanend], ebx | 732 | mov [scanend], ebx |
733 | mov edi, [edx + dsPrev] | 733 | mov edi, [edx + dsPrev] |
734 | 734 | ||
735 | ;;; Jump into the main loop. | 735 | ;;; Jump into the main loop. |
736 | 736 | ||
737 | mov edx, [chainlenwmask] | 737 | mov edx, [chainlenwmask] |
738 | jmp short LoopEntry | 738 | jmp short LoopEntry |
739 | 739 | ||
740 | align 4 | 740 | align 4 |
741 | 741 | ||
742 | ;;; do { | 742 | ;;; do { |
743 | ;;; match = s->window + cur_match; | 743 | ;;; match = s->window + cur_match; |
744 | ;;; if (*(ushf*)(match+best_len-1) != scan_end || | 744 | ;;; if (*(ushf*)(match+best_len-1) != scan_end || |
745 | ;;; *(ushf*)match != scan_start) continue; | 745 | ;;; *(ushf*)match != scan_start) continue; |
746 | ;;; [...] | 746 | ;;; [...] |
747 | ;;; } while ((cur_match = prev[cur_match & wmask]) > limit | 747 | ;;; } while ((cur_match = prev[cur_match & wmask]) > limit |
748 | ;;; && --chain_length != 0); | 748 | ;;; && --chain_length != 0); |
749 | ;;; | 749 | ;;; |
750 | ;;; Here is the inner loop of the function. The function will spend the | 750 | ;;; Here is the inner loop of the function. The function will spend the |
751 | ;;; majority of its time in this loop, and majority of that time will | 751 | ;;; majority of its time in this loop, and majority of that time will |
752 | ;;; be spent in the first ten instructions. | 752 | ;;; be spent in the first ten instructions. |
753 | ;;; | 753 | ;;; |
754 | ;;; Within this loop: | 754 | ;;; Within this loop: |
755 | ;;; ebx = scanend | 755 | ;;; ebx = scanend |
756 | ;;; ecx = curmatch | 756 | ;;; ecx = curmatch |
757 | ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | 757 | ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) |
758 | ;;; esi = windowbestlen - i.e., (window + bestlen) | 758 | ;;; esi = windowbestlen - i.e., (window + bestlen) |
759 | ;;; edi = prev | 759 | ;;; edi = prev |
760 | ;;; ebp = limit | 760 | ;;; ebp = limit |
761 | 761 | ||
762 | LookupLoop: | 762 | LookupLoop: |
763 | and ecx, edx | 763 | and ecx, edx |
764 | movzx ecx, word ptr [edi + ecx*2] | 764 | movzx ecx, word ptr [edi + ecx*2] |
765 | cmp ecx, ebp | 765 | cmp ecx, ebp |
766 | jbe LeaveNow | 766 | jbe LeaveNow |
767 | sub edx, 00010000h | 767 | sub edx, 00010000h |
768 | js LeaveNow | 768 | js LeaveNow |
769 | LoopEntry: movzx eax, word ptr [esi + ecx - 1] | 769 | LoopEntry: movzx eax, word ptr [esi + ecx - 1] |
770 | cmp eax, ebx | 770 | cmp eax, ebx |
771 | jnz LookupLoop | 771 | jnz LookupLoop |
772 | mov eax, [window] | 772 | mov eax, [window] |
773 | movzx eax, word ptr [eax + ecx] | 773 | movzx eax, word ptr [eax + ecx] |
774 | cmp eax, [scanstart] | 774 | cmp eax, [scanstart] |
775 | jnz LookupLoop | 775 | jnz LookupLoop |
776 | 776 | ||
777 | ;;; Store the current value of chainlen. | 777 | ;;; Store the current value of chainlen. |
778 | 778 | ||
779 | mov [chainlenwmask], edx | 779 | mov [chainlenwmask], edx |
780 | 780 | ||
781 | ;;; Point edi to the string under scrutiny, and esi to the string we | 781 | ;;; Point edi to the string under scrutiny, and esi to the string we |
782 | ;;; are hoping to match it up with. In actuality, esi and edi are | 782 | ;;; are hoping to match it up with. In actuality, esi and edi are |
783 | ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is | 783 | ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is |
784 | ;;; initialized to -(MAX_MATCH_8 - scanalign). | 784 | ;;; initialized to -(MAX_MATCH_8 - scanalign). |
785 | 785 | ||
786 | mov esi, [window] | 786 | mov esi, [window] |
787 | mov edi, [scan] | 787 | mov edi, [scan] |
788 | add esi, ecx | 788 | add esi, ecx |
789 | mov eax, [scanalign] | 789 | mov eax, [scanalign] |
790 | mov edx, 0fffffef8h; -(MAX_MATCH_8) | 790 | mov edx, 0fffffef8h; -(MAX_MATCH_8) |
791 | lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] | 791 | lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] |
792 | lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] | 792 | lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] |
793 | 793 | ||
794 | ;;; Test the strings for equality, 8 bytes at a time. At the end, | 794 | ;;; Test the strings for equality, 8 bytes at a time. At the end, |
795 | ;;; adjust edx so that it is offset to the exact byte that mismatched. | 795 | ;;; adjust edx so that it is offset to the exact byte that mismatched. |
796 | ;;; | 796 | ;;; |
797 | ;;; We already know at this point that the first three bytes of the | 797 | ;;; We already know at this point that the first three bytes of the |
798 | ;;; strings match each other, and they can be safely passed over before | 798 | ;;; strings match each other, and they can be safely passed over before |
799 | ;;; starting the compare loop. So what this code does is skip over 0-3 | 799 | ;;; starting the compare loop. So what this code does is skip over 0-3 |
800 | ;;; bytes, as much as necessary in order to dword-align the edi | 800 | ;;; bytes, as much as necessary in order to dword-align the edi |
801 | ;;; pointer. (esi will still be misaligned three times out of four.) | 801 | ;;; pointer. (esi will still be misaligned three times out of four.) |
802 | ;;; | 802 | ;;; |
803 | ;;; It should be confessed that this loop usually does not represent | 803 | ;;; It should be confessed that this loop usually does not represent |
804 | ;;; much of the total running time. Replacing it with a more | 804 | ;;; much of the total running time. Replacing it with a more |
805 | ;;; straightforward "rep cmpsb" would not drastically degrade | 805 | ;;; straightforward "rep cmpsb" would not drastically degrade |
806 | ;;; performance. | 806 | ;;; performance. |
807 | 807 | ||
808 | LoopCmps: | 808 | LoopCmps: |
809 | mov eax, [esi + edx] | 809 | mov eax, [esi + edx] |
810 | xor eax, [edi + edx] | 810 | xor eax, [edi + edx] |
811 | jnz LeaveLoopCmps | 811 | jnz LeaveLoopCmps |
812 | mov eax, [esi + edx + 4] | 812 | mov eax, [esi + edx + 4] |
813 | xor eax, [edi + edx + 4] | 813 | xor eax, [edi + edx + 4] |
814 | jnz LeaveLoopCmps4 | 814 | jnz LeaveLoopCmps4 |
815 | add edx, 8 | 815 | add edx, 8 |
816 | jnz LoopCmps | 816 | jnz LoopCmps |
817 | jmp short LenMaximum | 817 | jmp short LenMaximum |
818 | LeaveLoopCmps4: add edx, 4 | 818 | LeaveLoopCmps4: add edx, 4 |
819 | LeaveLoopCmps: test eax, 0000FFFFh | 819 | LeaveLoopCmps: test eax, 0000FFFFh |
820 | jnz LenLower | 820 | jnz LenLower |
821 | add edx, 2 | 821 | add edx, 2 |
822 | shr eax, 16 | 822 | shr eax, 16 |
823 | LenLower: sub al, 1 | 823 | LenLower: sub al, 1 |
824 | adc edx, 0 | 824 | adc edx, 0 |
825 | 825 | ||
826 | ;;; Calculate the length of the match. If it is longer than MAX_MATCH, | 826 | ;;; Calculate the length of the match. If it is longer than MAX_MATCH, |
827 | ;;; then automatically accept it as the best possible match and leave. | 827 | ;;; then automatically accept it as the best possible match and leave. |
828 | 828 | ||
829 | lea eax, [edi + edx] | 829 | lea eax, [edi + edx] |
830 | mov edi, [scan] | 830 | mov edi, [scan] |
831 | sub eax, edi | 831 | sub eax, edi |
832 | cmp eax, MAX_MATCH | 832 | cmp eax, MAX_MATCH |
833 | jge LenMaximum | 833 | jge LenMaximum |
834 | 834 | ||
835 | ;;; If the length of the match is not longer than the best match we | 835 | ;;; If the length of the match is not longer than the best match we |
836 | ;;; have so far, then forget it and return to the lookup loop. | 836 | ;;; have so far, then forget it and return to the lookup loop. |
837 | 837 | ||
838 | mov edx, [deflatestate] | 838 | mov edx, [deflatestate] |
839 | mov ebx, [bestlen] | 839 | mov ebx, [bestlen] |
840 | cmp eax, ebx | 840 | cmp eax, ebx |
841 | jg LongerMatch | 841 | jg LongerMatch |
842 | mov esi, [windowbestlen] | 842 | mov esi, [windowbestlen] |
843 | mov edi, [edx + dsPrev] | 843 | mov edi, [edx + dsPrev] |
844 | mov ebx, [scanend] | 844 | mov ebx, [scanend] |
845 | mov edx, [chainlenwmask] | 845 | mov edx, [chainlenwmask] |
846 | jmp LookupLoop | 846 | jmp LookupLoop |
847 | 847 | ||
848 | ;;; s->match_start = cur_match; | 848 | ;;; s->match_start = cur_match; |
849 | ;;; best_len = len; | 849 | ;;; best_len = len; |
850 | ;;; if (len >= nice_match) break; | 850 | ;;; if (len >= nice_match) break; |
851 | ;;; scan_end = *(ushf*)(scan+best_len-1); | 851 | ;;; scan_end = *(ushf*)(scan+best_len-1); |
852 | 852 | ||
853 | LongerMatch: mov ebx, [nicematch] | 853 | LongerMatch: mov ebx, [nicematch] |
854 | mov [bestlen], eax | 854 | mov [bestlen], eax |
855 | mov [edx + dsMatchStart], ecx | 855 | mov [edx + dsMatchStart], ecx |
856 | cmp eax, ebx | 856 | cmp eax, ebx |
857 | jge LeaveNow | 857 | jge LeaveNow |
858 | mov esi, [window] | 858 | mov esi, [window] |
859 | add esi, eax | 859 | add esi, eax |
860 | mov [windowbestlen], esi | 860 | mov [windowbestlen], esi |
861 | movzx ebx, word ptr [edi + eax - 1] | 861 | movzx ebx, word ptr [edi + eax - 1] |
862 | mov edi, [edx + dsPrev] | 862 | mov edi, [edx + dsPrev] |
863 | mov [scanend], ebx | 863 | mov [scanend], ebx |
864 | mov edx, [chainlenwmask] | 864 | mov edx, [chainlenwmask] |
865 | jmp LookupLoop | 865 | jmp LookupLoop |
866 | 866 | ||
867 | ;;; Accept the current string, with the maximum possible length. | 867 | ;;; Accept the current string, with the maximum possible length. |
868 | 868 | ||
869 | LenMaximum: mov edx, [deflatestate] | 869 | LenMaximum: mov edx, [deflatestate] |
870 | mov dword ptr [bestlen], MAX_MATCH | 870 | mov dword ptr [bestlen], MAX_MATCH |
871 | mov [edx + dsMatchStart], ecx | 871 | mov [edx + dsMatchStart], ecx |
872 | 872 | ||
873 | ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; | 873 | ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; |
874 | ;;; return s->lookahead; | 874 | ;;; return s->lookahead; |
875 | 875 | ||
876 | LeaveNow: | 876 | LeaveNow: |
877 | mov edx, [deflatestate] | 877 | mov edx, [deflatestate] |
878 | mov ebx, [bestlen] | 878 | mov ebx, [bestlen] |
879 | mov eax, [edx + dsLookahead] | 879 | mov eax, [edx + dsLookahead] |
880 | cmp ebx, eax | 880 | cmp ebx, eax |
881 | jg LookaheadRet | 881 | jg LookaheadRet |
882 | mov eax, ebx | 882 | mov eax, ebx |
883 | LookaheadRet: | 883 | LookaheadRet: |
884 | 884 | ||
885 | ;;; Restore the stack and return from whence we came. | 885 | ;;; Restore the stack and return from whence we came. |
886 | 886 | ||
887 | add esp, LocalVarsSize | 887 | add esp, LocalVarsSize |
888 | pop ebx | 888 | pop ebx |
889 | pop esi | 889 | pop esi |
890 | pop edi | 890 | pop edi |
891 | pop ebp | 891 | pop ebp |
892 | 892 | ||
893 | ret | 893 | ret |
894 | ; please don't remove this string ! | 894 | ; please don't remove this string ! |
895 | ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary! | 895 | ; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary! |
896 | db 0dh,0ah,"asm686 with masm, code optimised assembly code from Brian Raiter, written 1998",0dh,0ah | 896 | db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah |
897 | 897 | ||
898 | IFDEF NOUNDERLINE | 898 | IFDEF NOUNDERLINE |
899 | longest_match_686 endp | 899 | longest_match_686 endp |
900 | ELSE | 900 | ELSE |
901 | _longest_match_686 endp | 901 | _longest_match_686 endp |
902 | ENDIF | 902 | ENDIF |
903 | 903 | ||
904 | _TEXT ends | 904 | _TEXT ends |
905 | end | 905 | end |