diff options
Diffstat (limited to 'contrib/masm686')
-rw-r--r-- | contrib/masm686/match.asm | 821 |
1 files changed, 413 insertions, 408 deletions
diff --git a/contrib/masm686/match.asm b/contrib/masm686/match.asm index 2287804..4b03a71 100644 --- a/contrib/masm686/match.asm +++ b/contrib/masm686/match.asm | |||
@@ -1,408 +1,413 @@ | |||
1 | 1 | ||
2 | ; match.asm -- Pentium-Pro optimized version of longest_match() | 2 | ; match.asm -- Pentium-Pro optimized version of longest_match() |
3 | ; | 3 | ; |
4 | ; Updated for zlib 1.1.3 and converted to MASM 6.1x | 4 | ; Updated for zlib 1.1.3 and converted to MASM 6.1x |
5 | ; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> | 5 | ; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> |
6 | ; and Chuck Walbourn <chuckw@kinesoft.com> | 6 | ; and Chuck Walbourn <chuckw@kinesoft.com> |
7 | ; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> | 7 | ; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> |
8 | ; | 8 | ; |
9 | ; This is free software; you can redistribute it and/or modify it | 9 | ; This is free software; you can redistribute it and/or modify it |
10 | ; under the terms of the GNU General Public License. | 10 | ; under the terms of the GNU General Public License. |
11 | 11 | ||
12 | ; Based on match.S | 12 | ; Based on match.S |
13 | ; Written for zlib 1.1.2 | 13 | ; Written for zlib 1.1.2 |
14 | ; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> | 14 | ; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
15 | 15 | ; | |
16 | .686P | 16 | ; Modified by Gilles Vollant (2005) for add gzhead and gzindex |
17 | .MODEL FLAT | 17 | |
18 | 18 | .686P | |
19 | ;=========================================================================== | 19 | .MODEL FLAT |
20 | ; EQUATES | 20 | |
21 | ;=========================================================================== | 21 | ;=========================================================================== |
22 | 22 | ; EQUATES | |
23 | MAX_MATCH EQU 258 | 23 | ;=========================================================================== |
24 | MIN_MATCH EQU 3 | 24 | |
25 | MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) | 25 | MAX_MATCH EQU 258 |
26 | MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) | 26 | MIN_MATCH EQU 3 |
27 | 27 | MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) | |
28 | ;=========================================================================== | 28 | MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) |
29 | ; STRUCTURES | 29 | |
30 | ;=========================================================================== | 30 | ;=========================================================================== |
31 | 31 | ; STRUCTURES | |
32 | ; This STRUCT assumes a 4-byte alignment | 32 | ;=========================================================================== |
33 | 33 | ||
34 | DEFLATE_STATE STRUCT | 34 | ; This STRUCT assumes a 4-byte alignment |
35 | ds_strm dd ? | 35 | |
36 | ds_status dd ? | 36 | DEFLATE_STATE STRUCT |
37 | ds_pending_buf dd ? | 37 | ds_strm dd ? |
38 | ds_pending_buf_size dd ? | 38 | ds_status dd ? |
39 | ds_pending_out dd ? | 39 | ds_pending_buf dd ? |
40 | ds_pending dd ? | 40 | ds_pending_buf_size dd ? |
41 | ds_wrap dd ? | 41 | ds_pending_out dd ? |
42 | ds_data_type db ? | 42 | ds_pending dd ? |
43 | ds_method db ? | 43 | ds_wrap dd ? |
44 | db ? ; padding | 44 | ; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) |
45 | db ? ; padding | 45 | ds_gzhead dd ? |
46 | ds_last_flush dd ? | 46 | ds_gzindex dd ? |
47 | ds_w_size dd ? ; used | 47 | ds_data_type db ? |
48 | ds_w_bits dd ? | 48 | ds_method db ? |
49 | ds_w_mask dd ? ; used | 49 | db ? ; padding |
50 | ds_window dd ? ; used | 50 | db ? ; padding |
51 | ds_window_size dd ? | 51 | ds_last_flush dd ? |
52 | ds_prev dd ? ; used | 52 | ds_w_size dd ? ; used |
53 | ds_head dd ? | 53 | ds_w_bits dd ? |
54 | ds_ins_h dd ? | 54 | ds_w_mask dd ? ; used |
55 | ds_hash_size dd ? | 55 | ds_window dd ? ; used |
56 | ds_hash_bits dd ? | 56 | ds_window_size dd ? |
57 | ds_hash_mask dd ? | 57 | ds_prev dd ? ; used |
58 | ds_hash_shift dd ? | 58 | ds_head dd ? |
59 | ds_block_start dd ? | 59 | ds_ins_h dd ? |
60 | ds_match_length dd ? ; used | 60 | ds_hash_size dd ? |
61 | ds_prev_match dd ? ; used | 61 | ds_hash_bits dd ? |
62 | ds_match_available dd ? | 62 | ds_hash_mask dd ? |
63 | ds_strstart dd ? ; used | 63 | ds_hash_shift dd ? |
64 | ds_match_start dd ? ; used | 64 | ds_block_start dd ? |
65 | ds_lookahead dd ? ; used | 65 | ds_match_length dd ? ; used |
66 | ds_prev_length dd ? ; used | 66 | ds_prev_match dd ? ; used |
67 | ds_max_chain_length dd ? ; used | 67 | ds_match_available dd ? |
68 | ds_max_laxy_match dd ? | 68 | ds_strstart dd ? ; used |
69 | ds_level dd ? | 69 | ds_match_start dd ? ; used |
70 | ds_strategy dd ? | 70 | ds_lookahead dd ? ; used |
71 | ds_good_match dd ? ; used | 71 | ds_prev_length dd ? ; used |
72 | ds_nice_match dd ? ; used | 72 | ds_max_chain_length dd ? ; used |
73 | 73 | ds_max_laxy_match dd ? | |
74 | ; Don't need anymore of the struct for match | 74 | ds_level dd ? |
75 | DEFLATE_STATE ENDS | 75 | ds_strategy dd ? |
76 | 76 | ds_good_match dd ? ; used | |
77 | ;=========================================================================== | 77 | ds_nice_match dd ? ; used |
78 | ; CODE | 78 | |
79 | ;=========================================================================== | 79 | ; Don't need anymore of the struct for match |
80 | _TEXT SEGMENT | 80 | DEFLATE_STATE ENDS |
81 | 81 | ||
82 | ;--------------------------------------------------------------------------- | 82 | ;=========================================================================== |
83 | ; match_init | 83 | ; CODE |
84 | ;--------------------------------------------------------------------------- | 84 | ;=========================================================================== |
85 | ALIGN 4 | 85 | _TEXT SEGMENT |
86 | PUBLIC _match_init | 86 | |
87 | _match_init PROC | 87 | ;--------------------------------------------------------------------------- |
88 | ; no initialization needed | 88 | ; match_init |
89 | ret | 89 | ;--------------------------------------------------------------------------- |
90 | _match_init ENDP | 90 | ALIGN 4 |
91 | 91 | PUBLIC _match_init | |
92 | ;--------------------------------------------------------------------------- | 92 | _match_init PROC |
93 | ; uInt longest_match(deflate_state *deflatestate, IPos curmatch) | 93 | ; no initialization needed |
94 | ;--------------------------------------------------------------------------- | 94 | ret |
95 | ALIGN 4 | 95 | _match_init ENDP |
96 | 96 | ||
97 | PUBLIC _longest_match | 97 | ;--------------------------------------------------------------------------- |
98 | _longest_match PROC | 98 | ; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
99 | 99 | ;--------------------------------------------------------------------------- | |
100 | ; Since this code uses EBP for a scratch register, the stack frame must | 100 | ALIGN 4 |
101 | ; be manually constructed and referenced relative to the ESP register. | 101 | |
102 | 102 | PUBLIC _longest_match | |
103 | ; Stack image | 103 | _longest_match PROC |
104 | ; Variables | 104 | |
105 | chainlenwmask = 0 ; high word: current chain len | 105 | ; Since this code uses EBP for a scratch register, the stack frame must |
106 | ; low word: s->wmask | 106 | ; be manually constructed and referenced relative to the ESP register. |
107 | window = 4 ; local copy of s->window | 107 | |
108 | windowbestlen = 8 ; s->window + bestlen | 108 | ; Stack image |
109 | scanend = 12 ; last two bytes of string | 109 | ; Variables |
110 | scanstart = 16 ; first two bytes of string | 110 | chainlenwmask = 0 ; high word: current chain len |
111 | scanalign = 20 ; dword-misalignment of string | 111 | ; low word: s->wmask |
112 | nicematch = 24 ; a good enough match size | 112 | window = 4 ; local copy of s->window |
113 | bestlen = 28 ; size of best match so far | 113 | windowbestlen = 8 ; s->window + bestlen |
114 | scan = 32 ; ptr to string wanting match | 114 | scanend = 12 ; last two bytes of string |
115 | varsize = 36 ; number of bytes (also offset to last saved register) | 115 | scanstart = 16 ; first two bytes of string |
116 | 116 | scanalign = 20 ; dword-misalignment of string | |
117 | ; Saved Registers (actually pushed into place) | 117 | nicematch = 24 ; a good enough match size |
118 | ebx_save = 36 | 118 | bestlen = 28 ; size of best match so far |
119 | edi_save = 40 | 119 | scan = 32 ; ptr to string wanting match |
120 | esi_save = 44 | 120 | varsize = 36 ; number of bytes (also offset to last saved register) |
121 | ebp_save = 48 | 121 | |
122 | 122 | ; Saved Registers (actually pushed into place) | |
123 | ; Parameters | 123 | ebx_save = 36 |
124 | retaddr = 52 | 124 | edi_save = 40 |
125 | deflatestate = 56 | 125 | esi_save = 44 |
126 | curmatch = 60 | 126 | ebp_save = 48 |
127 | 127 | ||
128 | ; Save registers that the compiler may be using | 128 | ; Parameters |
129 | push ebp | 129 | retaddr = 52 |
130 | push edi | 130 | deflatestate = 56 |
131 | push esi | 131 | curmatch = 60 |
132 | push ebx | 132 | |
133 | 133 | ; Save registers that the compiler may be using | |
134 | ; Allocate local variable space | 134 | push ebp |
135 | sub esp,varsize | 135 | push edi |
136 | 136 | push esi | |
137 | ; Retrieve the function arguments. ecx will hold cur_match | 137 | push ebx |
138 | ; throughout the entire function. edx will hold the pointer to the | 138 | |
139 | ; deflate_state structure during the function's setup (before | 139 | ; Allocate local variable space |
140 | ; entering the main loop). | 140 | sub esp,varsize |
141 | 141 | ||
142 | mov edx, [esp+deflatestate] | 142 | ; Retrieve the function arguments. ecx will hold cur_match |
143 | ASSUME edx:PTR DEFLATE_STATE | 143 | ; throughout the entire function. edx will hold the pointer to the |
144 | 144 | ; deflate_state structure during the function's setup (before | |
145 | mov ecx, [esp+curmatch] | 145 | ; entering the main loop). |
146 | 146 | ||
147 | ; uInt wmask = s->w_mask; | 147 | mov edx, [esp+deflatestate] |
148 | ; unsigned chain_length = s->max_chain_length; | 148 | ASSUME edx:PTR DEFLATE_STATE |
149 | ; if (s->prev_length >= s->good_match) { | 149 | |
150 | ; chain_length >>= 2; | 150 | mov ecx, [esp+curmatch] |
151 | ; } | 151 | |
152 | 152 | ; uInt wmask = s->w_mask; | |
153 | mov eax, [edx].ds_prev_length | 153 | ; unsigned chain_length = s->max_chain_length; |
154 | mov ebx, [edx].ds_good_match | 154 | ; if (s->prev_length >= s->good_match) { |
155 | cmp eax, ebx | 155 | ; chain_length >>= 2; |
156 | mov eax, [edx].ds_w_mask | 156 | ; } |
157 | mov ebx, [edx].ds_max_chain_length | 157 | |
158 | jl SHORT LastMatchGood | 158 | mov eax, [edx].ds_prev_length |
159 | shr ebx, 2 | 159 | mov ebx, [edx].ds_good_match |
160 | LastMatchGood: | 160 | cmp eax, ebx |
161 | 161 | mov eax, [edx].ds_w_mask | |
162 | ; chainlen is decremented once beforehand so that the function can | 162 | mov ebx, [edx].ds_max_chain_length |
163 | ; use the sign flag instead of the zero flag for the exit test. | 163 | jl SHORT LastMatchGood |
164 | ; It is then shifted into the high word, to make room for the wmask | 164 | shr ebx, 2 |
165 | ; value, which it will always accompany. | 165 | LastMatchGood: |
166 | 166 | ||
167 | dec ebx | 167 | ; chainlen is decremented once beforehand so that the function can |
168 | shl ebx, 16 | 168 | ; use the sign flag instead of the zero flag for the exit test. |
169 | or ebx, eax | 169 | ; It is then shifted into the high word, to make room for the wmask |
170 | mov [esp+chainlenwmask], ebx | 170 | ; value, which it will always accompany. |
171 | 171 | ||
172 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | 172 | dec ebx |
173 | 173 | shl ebx, 16 | |
174 | mov eax, [edx].ds_nice_match | 174 | or ebx, eax |
175 | mov ebx, [edx].ds_lookahead | 175 | mov [esp+chainlenwmask], ebx |
176 | cmp ebx, eax | 176 | |
177 | jl SHORT LookaheadLess | 177 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
178 | mov ebx, eax | 178 | |
179 | LookaheadLess: | 179 | mov eax, [edx].ds_nice_match |
180 | mov [esp+nicematch], ebx | 180 | mov ebx, [edx].ds_lookahead |
181 | 181 | cmp ebx, eax | |
182 | ;/* register Bytef *scan = s->window + s->strstart; */ | 182 | jl SHORT LookaheadLess |
183 | 183 | mov ebx, eax | |
184 | mov esi, [edx].ds_window | 184 | LookaheadLess: |
185 | mov [esp+window], esi | 185 | mov [esp+nicematch], ebx |
186 | mov ebp, [edx].ds_strstart | 186 | |
187 | lea edi, [esi+ebp] | 187 | ;/* register Bytef *scan = s->window + s->strstart; */ |
188 | mov [esp+scan],edi | 188 | |
189 | 189 | mov esi, [edx].ds_window | |
190 | ;/* Determine how many bytes the scan ptr is off from being */ | 190 | mov [esp+window], esi |
191 | ;/* dword-aligned. */ | 191 | mov ebp, [edx].ds_strstart |
192 | 192 | lea edi, [esi+ebp] | |
193 | mov eax, edi | 193 | mov [esp+scan],edi |
194 | neg eax | 194 | |
195 | and eax, 3 | 195 | ;/* Determine how many bytes the scan ptr is off from being */ |
196 | mov [esp+scanalign], eax | 196 | ;/* dword-aligned. */ |
197 | 197 | ||
198 | ;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ | 198 | mov eax, edi |
199 | ;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ | 199 | neg eax |
200 | 200 | and eax, 3 | |
201 | mov eax, [edx].ds_w_size | 201 | mov [esp+scanalign], eax |
202 | sub eax, MIN_LOOKAHEAD | 202 | |
203 | sub ebp, eax | 203 | ;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ |
204 | jg SHORT LimitPositive | 204 | ;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ |
205 | xor ebp, ebp | 205 | |
206 | LimitPositive: | 206 | mov eax, [edx].ds_w_size |
207 | 207 | sub eax, MIN_LOOKAHEAD | |
208 | ;/* int best_len = s->prev_length; */ | 208 | sub ebp, eax |
209 | 209 | jg SHORT LimitPositive | |
210 | mov eax, [edx].ds_prev_length | 210 | xor ebp, ebp |
211 | mov [esp+bestlen], eax | 211 | LimitPositive: |
212 | 212 | ||
213 | ;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ | 213 | ;/* int best_len = s->prev_length; */ |
214 | 214 | ||
215 | add esi, eax | 215 | mov eax, [edx].ds_prev_length |
216 | mov [esp+windowbestlen], esi | 216 | mov [esp+bestlen], eax |
217 | 217 | ||
218 | ;/* register ush scan_start = *(ushf*)scan; */ | 218 | ;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ |
219 | ;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ | 219 | |
220 | ;/* Posf *prev = s->prev; */ | 220 | add esi, eax |
221 | 221 | mov [esp+windowbestlen], esi | |
222 | movzx ebx, WORD PTR[edi] | 222 | |
223 | mov [esp+scanstart], ebx | 223 | ;/* register ush scan_start = *(ushf*)scan; */ |
224 | movzx ebx, WORD PTR[eax+edi-1] | 224 | ;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ |
225 | mov [esp+scanend], ebx | 225 | ;/* Posf *prev = s->prev; */ |
226 | mov edi, [edx].ds_prev | 226 | |
227 | 227 | movzx ebx, WORD PTR[edi] | |
228 | ;/* Jump into the main loop. */ | 228 | mov [esp+scanstart], ebx |
229 | 229 | movzx ebx, WORD PTR[eax+edi-1] | |
230 | mov edx, [esp+chainlenwmask] | 230 | mov [esp+scanend], ebx |
231 | jmp SHORT LoopEntry | 231 | mov edi, [edx].ds_prev |
232 | 232 | ||
233 | ;/* do { | 233 | ;/* Jump into the main loop. */ |
234 | ; * match = s->window + cur_match; | 234 | |
235 | ; * if (*(ushf*)(match+best_len-1) != scan_end || | 235 | mov edx, [esp+chainlenwmask] |
236 | ; * *(ushf*)match != scan_start) continue; | 236 | jmp SHORT LoopEntry |
237 | ; * [...] | 237 | |
238 | ; * } while ((cur_match = prev[cur_match & wmask]) > limit | 238 | ;/* do { |
239 | ; * && --chain_length != 0); | 239 | ; * match = s->window + cur_match; |
240 | ; * | 240 | ; * if (*(ushf*)(match+best_len-1) != scan_end || |
241 | ; * Here is the inner loop of the function. The function will spend the | 241 | ; * *(ushf*)match != scan_start) continue; |
242 | ; * majority of its time in this loop, and majority of that time will | 242 | ; * [...] |
243 | ; * be spent in the first ten instructions. | 243 | ; * } while ((cur_match = prev[cur_match & wmask]) > limit |
244 | ; * | 244 | ; * && --chain_length != 0); |
245 | ; * Within this loop: | 245 | ; * |
246 | ; * %ebx = scanend | 246 | ; * Here is the inner loop of the function. The function will spend the |
247 | ; * %ecx = curmatch | 247 | ; * majority of its time in this loop, and majority of that time will |
248 | ; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | 248 | ; * be spent in the first ten instructions. |
249 | ; * %esi = windowbestlen - i.e., (window + bestlen) | 249 | ; * |
250 | ; * %edi = prev | 250 | ; * Within this loop: |
251 | ; * %ebp = limit | 251 | ; * %ebx = scanend |
252 | ; */ | 252 | ; * %ecx = curmatch |
253 | 253 | ; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | |
254 | ALIGN 4 | 254 | ; * %esi = windowbestlen - i.e., (window + bestlen) |
255 | LookupLoop: | 255 | ; * %edi = prev |
256 | and ecx, edx | 256 | ; * %ebp = limit |
257 | movzx ecx, WORD PTR[edi+ecx*2] | 257 | ; */ |
258 | cmp ecx, ebp | 258 | |
259 | jbe LeaveNow | 259 | ALIGN 4 |
260 | sub edx, 000010000H | 260 | LookupLoop: |
261 | js LeaveNow | 261 | and ecx, edx |
262 | 262 | movzx ecx, WORD PTR[edi+ecx*2] | |
263 | LoopEntry: | 263 | cmp ecx, ebp |
264 | movzx eax, WORD PTR[esi+ecx-1] | 264 | jbe LeaveNow |
265 | cmp eax, ebx | 265 | sub edx, 000010000H |
266 | jnz SHORT LookupLoop | 266 | js LeaveNow |
267 | 267 | ||
268 | mov eax, [esp+window] | 268 | LoopEntry: |
269 | movzx eax, WORD PTR[eax+ecx] | 269 | movzx eax, WORD PTR[esi+ecx-1] |
270 | cmp eax, [esp+scanstart] | 270 | cmp eax, ebx |
271 | jnz SHORT LookupLoop | 271 | jnz SHORT LookupLoop |
272 | 272 | ||
273 | ;/* Store the current value of chainlen. */ | 273 | mov eax, [esp+window] |
274 | 274 | movzx eax, WORD PTR[eax+ecx] | |
275 | mov [esp+chainlenwmask], edx | 275 | cmp eax, [esp+scanstart] |
276 | 276 | jnz SHORT LookupLoop | |
277 | ;/* Point %edi to the string under scrutiny, and %esi to the string we */ | 277 | |
278 | ;/* are hoping to match it up with. In actuality, %esi and %edi are */ | 278 | ;/* Store the current value of chainlen. */ |
279 | ;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ | 279 | |
280 | ;/* initialized to -(MAX_MATCH_8 - scanalign). */ | 280 | mov [esp+chainlenwmask], edx |
281 | 281 | ||
282 | mov esi, [esp+window] | 282 | ;/* Point %edi to the string under scrutiny, and %esi to the string we */ |
283 | mov edi, [esp+scan] | 283 | ;/* are hoping to match it up with. In actuality, %esi and %edi are */ |
284 | add esi, ecx | 284 | ;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ |
285 | mov eax, [esp+scanalign] | 285 | ;/* initialized to -(MAX_MATCH_8 - scanalign). */ |
286 | mov edx, -MAX_MATCH_8 | 286 | |
287 | lea edi, [edi+eax+MAX_MATCH_8] | 287 | mov esi, [esp+window] |
288 | lea esi, [esi+eax+MAX_MATCH_8] | 288 | mov edi, [esp+scan] |
289 | 289 | add esi, ecx | |
290 | ;/* Test the strings for equality, 8 bytes at a time. At the end, | 290 | mov eax, [esp+scanalign] |
291 | ; * adjust %edx so that it is offset to the exact byte that mismatched. | 291 | mov edx, -MAX_MATCH_8 |
292 | ; * | 292 | lea edi, [edi+eax+MAX_MATCH_8] |
293 | ; * We already know at this point that the first three bytes of the | 293 | lea esi, [esi+eax+MAX_MATCH_8] |
294 | ; * strings match each other, and they can be safely passed over before | 294 | |
295 | ; * starting the compare loop. So what this code does is skip over 0-3 | 295 | ;/* Test the strings for equality, 8 bytes at a time. At the end, |
296 | ; * bytes, as much as necessary in order to dword-align the %edi | 296 | ; * adjust %edx so that it is offset to the exact byte that mismatched. |
297 | ; * pointer. (%esi will still be misaligned three times out of four.) | 297 | ; * |
298 | ; * | 298 | ; * We already know at this point that the first three bytes of the |
299 | ; * It should be confessed that this loop usually does not represent | 299 | ; * strings match each other, and they can be safely passed over before |
300 | ; * much of the total running time. Replacing it with a more | 300 | ; * starting the compare loop. So what this code does is skip over 0-3 |
301 | ; * straightforward "rep cmpsb" would not drastically degrade | 301 | ; * bytes, as much as necessary in order to dword-align the %edi |
302 | ; * performance. | 302 | ; * pointer. (%esi will still be misaligned three times out of four.) |
303 | ; */ | 303 | ; * |
304 | 304 | ; * It should be confessed that this loop usually does not represent | |
305 | LoopCmps: | 305 | ; * much of the total running time. Replacing it with a more |
306 | mov eax, DWORD PTR[esi+edx] | 306 | ; * straightforward "rep cmpsb" would not drastically degrade |
307 | xor eax, DWORD PTR[edi+edx] | 307 | ; * performance. |
308 | jnz SHORT LeaveLoopCmps | 308 | ; */ |
309 | 309 | ||
310 | mov eax, DWORD PTR[esi+edx+4] | 310 | LoopCmps: |
311 | xor eax, DWORD PTR[edi+edx+4] | 311 | mov eax, DWORD PTR[esi+edx] |
312 | jnz SHORT LeaveLoopCmps4 | 312 | xor eax, DWORD PTR[edi+edx] |
313 | 313 | jnz SHORT LeaveLoopCmps | |
314 | add edx, 8 | 314 | |
315 | jnz SHORT LoopCmps | 315 | mov eax, DWORD PTR[esi+edx+4] |
316 | jmp LenMaximum | 316 | xor eax, DWORD PTR[edi+edx+4] |
317 | ALIGN 4 | 317 | jnz SHORT LeaveLoopCmps4 |
318 | 318 | ||
319 | LeaveLoopCmps4: | 319 | add edx, 8 |
320 | add edx, 4 | 320 | jnz SHORT LoopCmps |
321 | 321 | jmp LenMaximum | |
322 | LeaveLoopCmps: | 322 | ALIGN 4 |
323 | test eax, 00000FFFFH | 323 | |
324 | jnz SHORT LenLower | 324 | LeaveLoopCmps4: |
325 | 325 | add edx, 4 | |
326 | add edx, 2 | 326 | |
327 | shr eax, 16 | 327 | LeaveLoopCmps: |
328 | 328 | test eax, 00000FFFFH | |
329 | LenLower: | 329 | jnz SHORT LenLower |
330 | sub al, 1 | 330 | |
331 | adc edx, 0 | 331 | add edx, 2 |
332 | 332 | shr eax, 16 | |
333 | ;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ | 333 | |
334 | ;/* then automatically accept it as the best possible match and leave. */ | 334 | LenLower: |
335 | 335 | sub al, 1 | |
336 | lea eax, [edi+edx] | 336 | adc edx, 0 |
337 | mov edi, [esp+scan] | 337 | |
338 | sub eax, edi | 338 | ;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ |
339 | cmp eax, MAX_MATCH | 339 | ;/* then automatically accept it as the best possible match and leave. */ |
340 | jge SHORT LenMaximum | 340 | |
341 | 341 | lea eax, [edi+edx] | |
342 | ;/* If the length of the match is not longer than the best match we */ | 342 | mov edi, [esp+scan] |
343 | ;/* have so far, then forget it and return to the lookup loop. */ | 343 | sub eax, edi |
344 | 344 | cmp eax, MAX_MATCH | |
345 | mov edx, [esp+deflatestate] | 345 | jge SHORT LenMaximum |
346 | mov ebx, [esp+bestlen] | 346 | |
347 | cmp eax, ebx | 347 | ;/* If the length of the match is not longer than the best match we */ |
348 | jg SHORT LongerMatch | 348 | ;/* have so far, then forget it and return to the lookup loop. */ |
349 | mov esi, [esp+windowbestlen] | 349 | |
350 | mov edi, [edx].ds_prev | 350 | mov edx, [esp+deflatestate] |
351 | mov ebx, [esp+scanend] | 351 | mov ebx, [esp+bestlen] |
352 | mov edx, [esp+chainlenwmask] | 352 | cmp eax, ebx |
353 | jmp LookupLoop | 353 | jg SHORT LongerMatch |
354 | ALIGN 4 | 354 | mov esi, [esp+windowbestlen] |
355 | 355 | mov edi, [edx].ds_prev | |
356 | ;/* s->match_start = cur_match; */ | 356 | mov ebx, [esp+scanend] |
357 | ;/* best_len = len; */ | 357 | mov edx, [esp+chainlenwmask] |
358 | ;/* if (len >= nice_match) break; */ | 358 | jmp LookupLoop |
359 | ;/* scan_end = *(ushf*)(scan+best_len-1); */ | 359 | ALIGN 4 |
360 | 360 | ||
361 | LongerMatch: | 361 | ;/* s->match_start = cur_match; */ |
362 | mov ebx, [esp+nicematch] | 362 | ;/* best_len = len; */ |
363 | mov [esp+bestlen], eax | 363 | ;/* if (len >= nice_match) break; */ |
364 | mov [edx].ds_match_start, ecx | 364 | ;/* scan_end = *(ushf*)(scan+best_len-1); */ |
365 | cmp eax, ebx | 365 | |
366 | jge SHORT LeaveNow | 366 | LongerMatch: |
367 | mov esi, [esp+window] | 367 | mov ebx, [esp+nicematch] |
368 | add esi, eax | 368 | mov [esp+bestlen], eax |
369 | mov [esp+windowbestlen], esi | 369 | mov [edx].ds_match_start, ecx |
370 | movzx ebx, WORD PTR[edi+eax-1] | 370 | cmp eax, ebx |
371 | mov edi, [edx].ds_prev | 371 | jge SHORT LeaveNow |
372 | mov [esp+scanend], ebx | 372 | mov esi, [esp+window] |
373 | mov edx, [esp+chainlenwmask] | 373 | add esi, eax |
374 | jmp LookupLoop | 374 | mov [esp+windowbestlen], esi |
375 | ALIGN 4 | 375 | movzx ebx, WORD PTR[edi+eax-1] |
376 | 376 | mov edi, [edx].ds_prev | |
377 | ;/* Accept the current string, with the maximum possible length. */ | 377 | mov [esp+scanend], ebx |
378 | 378 | mov edx, [esp+chainlenwmask] | |
379 | LenMaximum: | 379 | jmp LookupLoop |
380 | mov edx, [esp+deflatestate] | 380 | ALIGN 4 |
381 | mov DWORD PTR[esp+bestlen], MAX_MATCH | 381 | |
382 | mov [edx].ds_match_start, ecx | 382 | ;/* Accept the current string, with the maximum possible length. */ |
383 | 383 | ||
384 | ;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ | 384 | LenMaximum: |
385 | ;/* return s->lookahead; */ | 385 | mov edx, [esp+deflatestate] |
386 | 386 | mov DWORD PTR[esp+bestlen], MAX_MATCH | |
387 | LeaveNow: | 387 | mov [edx].ds_match_start, ecx |
388 | mov edx, [esp+deflatestate] | 388 | |
389 | mov ebx, [esp+bestlen] | 389 | ;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ |
390 | mov eax, [edx].ds_lookahead | 390 | ;/* return s->lookahead; */ |
391 | cmp ebx, eax | 391 | |
392 | jg SHORT LookaheadRet | 392 | LeaveNow: |
393 | mov eax, ebx | 393 | mov edx, [esp+deflatestate] |
394 | LookaheadRet: | 394 | mov ebx, [esp+bestlen] |
395 | 395 | mov eax, [edx].ds_lookahead | |
396 | ; Restore the stack and return from whence we came. | 396 | cmp ebx, eax |
397 | 397 | jg SHORT LookaheadRet | |
398 | add esp, varsize | 398 | mov eax, ebx |
399 | pop ebx | 399 | LookaheadRet: |
400 | pop esi | 400 | |
401 | pop edi | 401 | ; Restore the stack and return from whence we came. |
402 | pop ebp | 402 | |
403 | ret | 403 | add esp, varsize |
404 | 404 | pop ebx | |
405 | _longest_match ENDP | 405 | pop esi |
406 | 406 | pop edi | |
407 | _TEXT ENDS | 407 | pop ebp |
408 | END | 408 | ret |
409 | |||
410 | _longest_match ENDP | ||
411 | |||
412 | _TEXT ENDS | ||
413 | END | ||