diff options
Diffstat (limited to 'contrib/masm686/match.asm')
-rw-r--r-- | contrib/masm686/match.asm | 413 |
1 files changed, 0 insertions, 413 deletions
diff --git a/contrib/masm686/match.asm b/contrib/masm686/match.asm deleted file mode 100644 index 4b03a71..0000000 --- a/contrib/masm686/match.asm +++ /dev/null | |||
@@ -1,413 +0,0 @@ | |||
1 | |||
2 | ; match.asm -- Pentium-Pro optimized version of longest_match() | ||
3 | ; | ||
4 | ; Updated for zlib 1.1.3 and converted to MASM 6.1x | ||
5 | ; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> | ||
6 | ; and Chuck Walbourn <chuckw@kinesoft.com> | ||
7 | ; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> | ||
8 | ; | ||
9 | ; This is free software; you can redistribute it and/or modify it | ||
10 | ; under the terms of the GNU General Public License. | ||
11 | |||
12 | ; Based on match.S | ||
13 | ; Written for zlib 1.1.2 | ||
14 | ; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> | ||
15 | ; | ||
16 | ; Modified by Gilles Vollant (2005) for add gzhead and gzindex | ||
17 | |||
18 | .686P | ||
19 | .MODEL FLAT | ||
20 | |||
21 | ;=========================================================================== | ||
22 | ; EQUATES | ||
23 | ;=========================================================================== | ||
24 | |||
25 | MAX_MATCH EQU 258 | ||
26 | MIN_MATCH EQU 3 | ||
27 | MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) | ||
28 | MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) | ||
29 | |||
30 | ;=========================================================================== | ||
31 | ; STRUCTURES | ||
32 | ;=========================================================================== | ||
33 | |||
34 | ; This STRUCT assumes a 4-byte alignment | ||
35 | |||
36 | DEFLATE_STATE STRUCT | ||
37 | ds_strm dd ? | ||
38 | ds_status dd ? | ||
39 | ds_pending_buf dd ? | ||
40 | ds_pending_buf_size dd ? | ||
41 | ds_pending_out dd ? | ||
42 | ds_pending dd ? | ||
43 | ds_wrap dd ? | ||
44 | ; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) | ||
45 | ds_gzhead dd ? | ||
46 | ds_gzindex dd ? | ||
47 | ds_data_type db ? | ||
48 | ds_method db ? | ||
49 | db ? ; padding | ||
50 | db ? ; padding | ||
51 | ds_last_flush dd ? | ||
52 | ds_w_size dd ? ; used | ||
53 | ds_w_bits dd ? | ||
54 | ds_w_mask dd ? ; used | ||
55 | ds_window dd ? ; used | ||
56 | ds_window_size dd ? | ||
57 | ds_prev dd ? ; used | ||
58 | ds_head dd ? | ||
59 | ds_ins_h dd ? | ||
60 | ds_hash_size dd ? | ||
61 | ds_hash_bits dd ? | ||
62 | ds_hash_mask dd ? | ||
63 | ds_hash_shift dd ? | ||
64 | ds_block_start dd ? | ||
65 | ds_match_length dd ? ; used | ||
66 | ds_prev_match dd ? ; used | ||
67 | ds_match_available dd ? | ||
68 | ds_strstart dd ? ; used | ||
69 | ds_match_start dd ? ; used | ||
70 | ds_lookahead dd ? ; used | ||
71 | ds_prev_length dd ? ; used | ||
72 | ds_max_chain_length dd ? ; used | ||
73 | ds_max_laxy_match dd ? | ||
74 | ds_level dd ? | ||
75 | ds_strategy dd ? | ||
76 | ds_good_match dd ? ; used | ||
77 | ds_nice_match dd ? ; used | ||
78 | |||
79 | ; Don't need anymore of the struct for match | ||
80 | DEFLATE_STATE ENDS | ||
81 | |||
82 | ;=========================================================================== | ||
83 | ; CODE | ||
84 | ;=========================================================================== | ||
85 | _TEXT SEGMENT | ||
86 | |||
87 | ;--------------------------------------------------------------------------- | ||
88 | ; match_init | ||
89 | ;--------------------------------------------------------------------------- | ||
90 | ALIGN 4 | ||
91 | PUBLIC _match_init | ||
92 | _match_init PROC | ||
93 | ; no initialization needed | ||
94 | ret | ||
95 | _match_init ENDP | ||
96 | |||
97 | ;--------------------------------------------------------------------------- | ||
98 | ; uInt longest_match(deflate_state *deflatestate, IPos curmatch) | ||
99 | ;--------------------------------------------------------------------------- | ||
100 | ALIGN 4 | ||
101 | |||
102 | PUBLIC _longest_match | ||
103 | _longest_match PROC | ||
104 | |||
105 | ; Since this code uses EBP for a scratch register, the stack frame must | ||
106 | ; be manually constructed and referenced relative to the ESP register. | ||
107 | |||
108 | ; Stack image | ||
109 | ; Variables | ||
110 | chainlenwmask = 0 ; high word: current chain len | ||
111 | ; low word: s->wmask | ||
112 | window = 4 ; local copy of s->window | ||
113 | windowbestlen = 8 ; s->window + bestlen | ||
114 | scanend = 12 ; last two bytes of string | ||
115 | scanstart = 16 ; first two bytes of string | ||
116 | scanalign = 20 ; dword-misalignment of string | ||
117 | nicematch = 24 ; a good enough match size | ||
118 | bestlen = 28 ; size of best match so far | ||
119 | scan = 32 ; ptr to string wanting match | ||
120 | varsize = 36 ; number of bytes (also offset to last saved register) | ||
121 | |||
122 | ; Saved Registers (actually pushed into place) | ||
123 | ebx_save = 36 | ||
124 | edi_save = 40 | ||
125 | esi_save = 44 | ||
126 | ebp_save = 48 | ||
127 | |||
128 | ; Parameters | ||
129 | retaddr = 52 | ||
130 | deflatestate = 56 | ||
131 | curmatch = 60 | ||
132 | |||
133 | ; Save registers that the compiler may be using | ||
134 | push ebp | ||
135 | push edi | ||
136 | push esi | ||
137 | push ebx | ||
138 | |||
139 | ; Allocate local variable space | ||
140 | sub esp,varsize | ||
141 | |||
142 | ; Retrieve the function arguments. ecx will hold cur_match | ||
143 | ; throughout the entire function. edx will hold the pointer to the | ||
144 | ; deflate_state structure during the function's setup (before | ||
145 | ; entering the main loop). | ||
146 | |||
147 | mov edx, [esp+deflatestate] | ||
148 | ASSUME edx:PTR DEFLATE_STATE | ||
149 | |||
150 | mov ecx, [esp+curmatch] | ||
151 | |||
152 | ; uInt wmask = s->w_mask; | ||
153 | ; unsigned chain_length = s->max_chain_length; | ||
154 | ; if (s->prev_length >= s->good_match) { | ||
155 | ; chain_length >>= 2; | ||
156 | ; } | ||
157 | |||
158 | mov eax, [edx].ds_prev_length | ||
159 | mov ebx, [edx].ds_good_match | ||
160 | cmp eax, ebx | ||
161 | mov eax, [edx].ds_w_mask | ||
162 | mov ebx, [edx].ds_max_chain_length | ||
163 | jl SHORT LastMatchGood | ||
164 | shr ebx, 2 | ||
165 | LastMatchGood: | ||
166 | |||
167 | ; chainlen is decremented once beforehand so that the function can | ||
168 | ; use the sign flag instead of the zero flag for the exit test. | ||
169 | ; It is then shifted into the high word, to make room for the wmask | ||
170 | ; value, which it will always accompany. | ||
171 | |||
172 | dec ebx | ||
173 | shl ebx, 16 | ||
174 | or ebx, eax | ||
175 | mov [esp+chainlenwmask], ebx | ||
176 | |||
177 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | ||
178 | |||
179 | mov eax, [edx].ds_nice_match | ||
180 | mov ebx, [edx].ds_lookahead | ||
181 | cmp ebx, eax | ||
182 | jl SHORT LookaheadLess | ||
183 | mov ebx, eax | ||
184 | LookaheadLess: | ||
185 | mov [esp+nicematch], ebx | ||
186 | |||
187 | ;/* register Bytef *scan = s->window + s->strstart; */ | ||
188 | |||
189 | mov esi, [edx].ds_window | ||
190 | mov [esp+window], esi | ||
191 | mov ebp, [edx].ds_strstart | ||
192 | lea edi, [esi+ebp] | ||
193 | mov [esp+scan],edi | ||
194 | |||
195 | ;/* Determine how many bytes the scan ptr is off from being */ | ||
196 | ;/* dword-aligned. */ | ||
197 | |||
198 | mov eax, edi | ||
199 | neg eax | ||
200 | and eax, 3 | ||
201 | mov [esp+scanalign], eax | ||
202 | |||
203 | ;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ | ||
204 | ;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ | ||
205 | |||
206 | mov eax, [edx].ds_w_size | ||
207 | sub eax, MIN_LOOKAHEAD | ||
208 | sub ebp, eax | ||
209 | jg SHORT LimitPositive | ||
210 | xor ebp, ebp | ||
211 | LimitPositive: | ||
212 | |||
213 | ;/* int best_len = s->prev_length; */ | ||
214 | |||
215 | mov eax, [edx].ds_prev_length | ||
216 | mov [esp+bestlen], eax | ||
217 | |||
218 | ;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ | ||
219 | |||
220 | add esi, eax | ||
221 | mov [esp+windowbestlen], esi | ||
222 | |||
223 | ;/* register ush scan_start = *(ushf*)scan; */ | ||
224 | ;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ | ||
225 | ;/* Posf *prev = s->prev; */ | ||
226 | |||
227 | movzx ebx, WORD PTR[edi] | ||
228 | mov [esp+scanstart], ebx | ||
229 | movzx ebx, WORD PTR[eax+edi-1] | ||
230 | mov [esp+scanend], ebx | ||
231 | mov edi, [edx].ds_prev | ||
232 | |||
233 | ;/* Jump into the main loop. */ | ||
234 | |||
235 | mov edx, [esp+chainlenwmask] | ||
236 | jmp SHORT LoopEntry | ||
237 | |||
238 | ;/* do { | ||
239 | ; * match = s->window + cur_match; | ||
240 | ; * if (*(ushf*)(match+best_len-1) != scan_end || | ||
241 | ; * *(ushf*)match != scan_start) continue; | ||
242 | ; * [...] | ||
243 | ; * } while ((cur_match = prev[cur_match & wmask]) > limit | ||
244 | ; * && --chain_length != 0); | ||
245 | ; * | ||
246 | ; * Here is the inner loop of the function. The function will spend the | ||
247 | ; * majority of its time in this loop, and majority of that time will | ||
248 | ; * be spent in the first ten instructions. | ||
249 | ; * | ||
250 | ; * Within this loop: | ||
251 | ; * %ebx = scanend | ||
252 | ; * %ecx = curmatch | ||
253 | ; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | ||
254 | ; * %esi = windowbestlen - i.e., (window + bestlen) | ||
255 | ; * %edi = prev | ||
256 | ; * %ebp = limit | ||
257 | ; */ | ||
258 | |||
259 | ALIGN 4 | ||
260 | LookupLoop: | ||
261 | and ecx, edx | ||
262 | movzx ecx, WORD PTR[edi+ecx*2] | ||
263 | cmp ecx, ebp | ||
264 | jbe LeaveNow | ||
265 | sub edx, 000010000H | ||
266 | js LeaveNow | ||
267 | |||
268 | LoopEntry: | ||
269 | movzx eax, WORD PTR[esi+ecx-1] | ||
270 | cmp eax, ebx | ||
271 | jnz SHORT LookupLoop | ||
272 | |||
273 | mov eax, [esp+window] | ||
274 | movzx eax, WORD PTR[eax+ecx] | ||
275 | cmp eax, [esp+scanstart] | ||
276 | jnz SHORT LookupLoop | ||
277 | |||
278 | ;/* Store the current value of chainlen. */ | ||
279 | |||
280 | mov [esp+chainlenwmask], edx | ||
281 | |||
282 | ;/* Point %edi to the string under scrutiny, and %esi to the string we */ | ||
283 | ;/* are hoping to match it up with. In actuality, %esi and %edi are */ | ||
284 | ;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ | ||
285 | ;/* initialized to -(MAX_MATCH_8 - scanalign). */ | ||
286 | |||
287 | mov esi, [esp+window] | ||
288 | mov edi, [esp+scan] | ||
289 | add esi, ecx | ||
290 | mov eax, [esp+scanalign] | ||
291 | mov edx, -MAX_MATCH_8 | ||
292 | lea edi, [edi+eax+MAX_MATCH_8] | ||
293 | lea esi, [esi+eax+MAX_MATCH_8] | ||
294 | |||
295 | ;/* Test the strings for equality, 8 bytes at a time. At the end, | ||
296 | ; * adjust %edx so that it is offset to the exact byte that mismatched. | ||
297 | ; * | ||
298 | ; * We already know at this point that the first three bytes of the | ||
299 | ; * strings match each other, and they can be safely passed over before | ||
300 | ; * starting the compare loop. So what this code does is skip over 0-3 | ||
301 | ; * bytes, as much as necessary in order to dword-align the %edi | ||
302 | ; * pointer. (%esi will still be misaligned three times out of four.) | ||
303 | ; * | ||
304 | ; * It should be confessed that this loop usually does not represent | ||
305 | ; * much of the total running time. Replacing it with a more | ||
306 | ; * straightforward "rep cmpsb" would not drastically degrade | ||
307 | ; * performance. | ||
308 | ; */ | ||
309 | |||
310 | LoopCmps: | ||
311 | mov eax, DWORD PTR[esi+edx] | ||
312 | xor eax, DWORD PTR[edi+edx] | ||
313 | jnz SHORT LeaveLoopCmps | ||
314 | |||
315 | mov eax, DWORD PTR[esi+edx+4] | ||
316 | xor eax, DWORD PTR[edi+edx+4] | ||
317 | jnz SHORT LeaveLoopCmps4 | ||
318 | |||
319 | add edx, 8 | ||
320 | jnz SHORT LoopCmps | ||
321 | jmp LenMaximum | ||
322 | ALIGN 4 | ||
323 | |||
324 | LeaveLoopCmps4: | ||
325 | add edx, 4 | ||
326 | |||
327 | LeaveLoopCmps: | ||
328 | test eax, 00000FFFFH | ||
329 | jnz SHORT LenLower | ||
330 | |||
331 | add edx, 2 | ||
332 | shr eax, 16 | ||
333 | |||
334 | LenLower: | ||
335 | sub al, 1 | ||
336 | adc edx, 0 | ||
337 | |||
338 | ;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ | ||
339 | ;/* then automatically accept it as the best possible match and leave. */ | ||
340 | |||
341 | lea eax, [edi+edx] | ||
342 | mov edi, [esp+scan] | ||
343 | sub eax, edi | ||
344 | cmp eax, MAX_MATCH | ||
345 | jge SHORT LenMaximum | ||
346 | |||
347 | ;/* If the length of the match is not longer than the best match we */ | ||
348 | ;/* have so far, then forget it and return to the lookup loop. */ | ||
349 | |||
350 | mov edx, [esp+deflatestate] | ||
351 | mov ebx, [esp+bestlen] | ||
352 | cmp eax, ebx | ||
353 | jg SHORT LongerMatch | ||
354 | mov esi, [esp+windowbestlen] | ||
355 | mov edi, [edx].ds_prev | ||
356 | mov ebx, [esp+scanend] | ||
357 | mov edx, [esp+chainlenwmask] | ||
358 | jmp LookupLoop | ||
359 | ALIGN 4 | ||
360 | |||
361 | ;/* s->match_start = cur_match; */ | ||
362 | ;/* best_len = len; */ | ||
363 | ;/* if (len >= nice_match) break; */ | ||
364 | ;/* scan_end = *(ushf*)(scan+best_len-1); */ | ||
365 | |||
366 | LongerMatch: | ||
367 | mov ebx, [esp+nicematch] | ||
368 | mov [esp+bestlen], eax | ||
369 | mov [edx].ds_match_start, ecx | ||
370 | cmp eax, ebx | ||
371 | jge SHORT LeaveNow | ||
372 | mov esi, [esp+window] | ||
373 | add esi, eax | ||
374 | mov [esp+windowbestlen], esi | ||
375 | movzx ebx, WORD PTR[edi+eax-1] | ||
376 | mov edi, [edx].ds_prev | ||
377 | mov [esp+scanend], ebx | ||
378 | mov edx, [esp+chainlenwmask] | ||
379 | jmp LookupLoop | ||
380 | ALIGN 4 | ||
381 | |||
382 | ;/* Accept the current string, with the maximum possible length. */ | ||
383 | |||
384 | LenMaximum: | ||
385 | mov edx, [esp+deflatestate] | ||
386 | mov DWORD PTR[esp+bestlen], MAX_MATCH | ||
387 | mov [edx].ds_match_start, ecx | ||
388 | |||
389 | ;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ | ||
390 | ;/* return s->lookahead; */ | ||
391 | |||
392 | LeaveNow: | ||
393 | mov edx, [esp+deflatestate] | ||
394 | mov ebx, [esp+bestlen] | ||
395 | mov eax, [edx].ds_lookahead | ||
396 | cmp ebx, eax | ||
397 | jg SHORT LookaheadRet | ||
398 | mov eax, ebx | ||
399 | LookaheadRet: | ||
400 | |||
401 | ; Restore the stack and return from whence we came. | ||
402 | |||
403 | add esp, varsize | ||
404 | pop ebx | ||
405 | pop esi | ||
406 | pop edi | ||
407 | pop ebp | ||
408 | ret | ||
409 | |||
410 | _longest_match ENDP | ||
411 | |||
412 | _TEXT ENDS | ||
413 | END | ||