diff options
Diffstat (limited to 'contrib/masm686')
| -rw-r--r-- | contrib/masm686/match.asm | 821 |
1 files changed, 413 insertions, 408 deletions
diff --git a/contrib/masm686/match.asm b/contrib/masm686/match.asm index 2287804d..4b03a71a 100644 --- a/contrib/masm686/match.asm +++ b/contrib/masm686/match.asm | |||
| @@ -1,408 +1,413 @@ | |||
| 1 | 1 | ||
| 2 | ; match.asm -- Pentium-Pro optimized version of longest_match() | 2 | ; match.asm -- Pentium-Pro optimized version of longest_match() |
| 3 | ; | 3 | ; |
| 4 | ; Updated for zlib 1.1.3 and converted to MASM 6.1x | 4 | ; Updated for zlib 1.1.3 and converted to MASM 6.1x |
| 5 | ; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> | 5 | ; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> |
| 6 | ; and Chuck Walbourn <chuckw@kinesoft.com> | 6 | ; and Chuck Walbourn <chuckw@kinesoft.com> |
| 7 | ; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> | 7 | ; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> |
| 8 | ; | 8 | ; |
| 9 | ; This is free software; you can redistribute it and/or modify it | 9 | ; This is free software; you can redistribute it and/or modify it |
| 10 | ; under the terms of the GNU General Public License. | 10 | ; under the terms of the GNU General Public License. |
| 11 | 11 | ||
| 12 | ; Based on match.S | 12 | ; Based on match.S |
| 13 | ; Written for zlib 1.1.2 | 13 | ; Written for zlib 1.1.2 |
| 14 | ; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> | 14 | ; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> |
| 15 | 15 | ; | |
| 16 | .686P | 16 | ; Modified by Gilles Vollant (2005) for add gzhead and gzindex |
| 17 | .MODEL FLAT | 17 | |
| 18 | 18 | .686P | |
| 19 | ;=========================================================================== | 19 | .MODEL FLAT |
| 20 | ; EQUATES | 20 | |
| 21 | ;=========================================================================== | 21 | ;=========================================================================== |
| 22 | 22 | ; EQUATES | |
| 23 | MAX_MATCH EQU 258 | 23 | ;=========================================================================== |
| 24 | MIN_MATCH EQU 3 | 24 | |
| 25 | MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) | 25 | MAX_MATCH EQU 258 |
| 26 | MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) | 26 | MIN_MATCH EQU 3 |
| 27 | 27 | MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) | |
| 28 | ;=========================================================================== | 28 | MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) |
| 29 | ; STRUCTURES | 29 | |
| 30 | ;=========================================================================== | 30 | ;=========================================================================== |
| 31 | 31 | ; STRUCTURES | |
| 32 | ; This STRUCT assumes a 4-byte alignment | 32 | ;=========================================================================== |
| 33 | 33 | ||
| 34 | DEFLATE_STATE STRUCT | 34 | ; This STRUCT assumes a 4-byte alignment |
| 35 | ds_strm dd ? | 35 | |
| 36 | ds_status dd ? | 36 | DEFLATE_STATE STRUCT |
| 37 | ds_pending_buf dd ? | 37 | ds_strm dd ? |
| 38 | ds_pending_buf_size dd ? | 38 | ds_status dd ? |
| 39 | ds_pending_out dd ? | 39 | ds_pending_buf dd ? |
| 40 | ds_pending dd ? | 40 | ds_pending_buf_size dd ? |
| 41 | ds_wrap dd ? | 41 | ds_pending_out dd ? |
| 42 | ds_data_type db ? | 42 | ds_pending dd ? |
| 43 | ds_method db ? | 43 | ds_wrap dd ? |
| 44 | db ? ; padding | 44 | ; gzhead and gzindex are added in zlib 1.2.2.2 (see deflate.h) |
| 45 | db ? ; padding | 45 | ds_gzhead dd ? |
| 46 | ds_last_flush dd ? | 46 | ds_gzindex dd ? |
| 47 | ds_w_size dd ? ; used | 47 | ds_data_type db ? |
| 48 | ds_w_bits dd ? | 48 | ds_method db ? |
| 49 | ds_w_mask dd ? ; used | 49 | db ? ; padding |
| 50 | ds_window dd ? ; used | 50 | db ? ; padding |
| 51 | ds_window_size dd ? | 51 | ds_last_flush dd ? |
| 52 | ds_prev dd ? ; used | 52 | ds_w_size dd ? ; used |
| 53 | ds_head dd ? | 53 | ds_w_bits dd ? |
| 54 | ds_ins_h dd ? | 54 | ds_w_mask dd ? ; used |
| 55 | ds_hash_size dd ? | 55 | ds_window dd ? ; used |
| 56 | ds_hash_bits dd ? | 56 | ds_window_size dd ? |
| 57 | ds_hash_mask dd ? | 57 | ds_prev dd ? ; used |
| 58 | ds_hash_shift dd ? | 58 | ds_head dd ? |
| 59 | ds_block_start dd ? | 59 | ds_ins_h dd ? |
| 60 | ds_match_length dd ? ; used | 60 | ds_hash_size dd ? |
| 61 | ds_prev_match dd ? ; used | 61 | ds_hash_bits dd ? |
| 62 | ds_match_available dd ? | 62 | ds_hash_mask dd ? |
| 63 | ds_strstart dd ? ; used | 63 | ds_hash_shift dd ? |
| 64 | ds_match_start dd ? ; used | 64 | ds_block_start dd ? |
| 65 | ds_lookahead dd ? ; used | 65 | ds_match_length dd ? ; used |
| 66 | ds_prev_length dd ? ; used | 66 | ds_prev_match dd ? ; used |
| 67 | ds_max_chain_length dd ? ; used | 67 | ds_match_available dd ? |
| 68 | ds_max_laxy_match dd ? | 68 | ds_strstart dd ? ; used |
| 69 | ds_level dd ? | 69 | ds_match_start dd ? ; used |
| 70 | ds_strategy dd ? | 70 | ds_lookahead dd ? ; used |
| 71 | ds_good_match dd ? ; used | 71 | ds_prev_length dd ? ; used |
| 72 | ds_nice_match dd ? ; used | 72 | ds_max_chain_length dd ? ; used |
| 73 | 73 | ds_max_laxy_match dd ? | |
| 74 | ; Don't need anymore of the struct for match | 74 | ds_level dd ? |
| 75 | DEFLATE_STATE ENDS | 75 | ds_strategy dd ? |
| 76 | 76 | ds_good_match dd ? ; used | |
| 77 | ;=========================================================================== | 77 | ds_nice_match dd ? ; used |
| 78 | ; CODE | 78 | |
| 79 | ;=========================================================================== | 79 | ; Don't need anymore of the struct for match |
| 80 | _TEXT SEGMENT | 80 | DEFLATE_STATE ENDS |
| 81 | 81 | ||
| 82 | ;--------------------------------------------------------------------------- | 82 | ;=========================================================================== |
| 83 | ; match_init | 83 | ; CODE |
| 84 | ;--------------------------------------------------------------------------- | 84 | ;=========================================================================== |
| 85 | ALIGN 4 | 85 | _TEXT SEGMENT |
| 86 | PUBLIC _match_init | 86 | |
| 87 | _match_init PROC | 87 | ;--------------------------------------------------------------------------- |
| 88 | ; no initialization needed | 88 | ; match_init |
| 89 | ret | 89 | ;--------------------------------------------------------------------------- |
| 90 | _match_init ENDP | 90 | ALIGN 4 |
| 91 | 91 | PUBLIC _match_init | |
| 92 | ;--------------------------------------------------------------------------- | 92 | _match_init PROC |
| 93 | ; uInt longest_match(deflate_state *deflatestate, IPos curmatch) | 93 | ; no initialization needed |
| 94 | ;--------------------------------------------------------------------------- | 94 | ret |
| 95 | ALIGN 4 | 95 | _match_init ENDP |
| 96 | 96 | ||
| 97 | PUBLIC _longest_match | 97 | ;--------------------------------------------------------------------------- |
| 98 | _longest_match PROC | 98 | ; uInt longest_match(deflate_state *deflatestate, IPos curmatch) |
| 99 | 99 | ;--------------------------------------------------------------------------- | |
| 100 | ; Since this code uses EBP for a scratch register, the stack frame must | 100 | ALIGN 4 |
| 101 | ; be manually constructed and referenced relative to the ESP register. | 101 | |
| 102 | 102 | PUBLIC _longest_match | |
| 103 | ; Stack image | 103 | _longest_match PROC |
| 104 | ; Variables | 104 | |
| 105 | chainlenwmask = 0 ; high word: current chain len | 105 | ; Since this code uses EBP for a scratch register, the stack frame must |
| 106 | ; low word: s->wmask | 106 | ; be manually constructed and referenced relative to the ESP register. |
| 107 | window = 4 ; local copy of s->window | 107 | |
| 108 | windowbestlen = 8 ; s->window + bestlen | 108 | ; Stack image |
| 109 | scanend = 12 ; last two bytes of string | 109 | ; Variables |
| 110 | scanstart = 16 ; first two bytes of string | 110 | chainlenwmask = 0 ; high word: current chain len |
| 111 | scanalign = 20 ; dword-misalignment of string | 111 | ; low word: s->wmask |
| 112 | nicematch = 24 ; a good enough match size | 112 | window = 4 ; local copy of s->window |
| 113 | bestlen = 28 ; size of best match so far | 113 | windowbestlen = 8 ; s->window + bestlen |
| 114 | scan = 32 ; ptr to string wanting match | 114 | scanend = 12 ; last two bytes of string |
| 115 | varsize = 36 ; number of bytes (also offset to last saved register) | 115 | scanstart = 16 ; first two bytes of string |
| 116 | 116 | scanalign = 20 ; dword-misalignment of string | |
| 117 | ; Saved Registers (actually pushed into place) | 117 | nicematch = 24 ; a good enough match size |
| 118 | ebx_save = 36 | 118 | bestlen = 28 ; size of best match so far |
| 119 | edi_save = 40 | 119 | scan = 32 ; ptr to string wanting match |
| 120 | esi_save = 44 | 120 | varsize = 36 ; number of bytes (also offset to last saved register) |
| 121 | ebp_save = 48 | 121 | |
| 122 | 122 | ; Saved Registers (actually pushed into place) | |
| 123 | ; Parameters | 123 | ebx_save = 36 |
| 124 | retaddr = 52 | 124 | edi_save = 40 |
| 125 | deflatestate = 56 | 125 | esi_save = 44 |
| 126 | curmatch = 60 | 126 | ebp_save = 48 |
| 127 | 127 | ||
| 128 | ; Save registers that the compiler may be using | 128 | ; Parameters |
| 129 | push ebp | 129 | retaddr = 52 |
| 130 | push edi | 130 | deflatestate = 56 |
| 131 | push esi | 131 | curmatch = 60 |
| 132 | push ebx | 132 | |
| 133 | 133 | ; Save registers that the compiler may be using | |
| 134 | ; Allocate local variable space | 134 | push ebp |
| 135 | sub esp,varsize | 135 | push edi |
| 136 | 136 | push esi | |
| 137 | ; Retrieve the function arguments. ecx will hold cur_match | 137 | push ebx |
| 138 | ; throughout the entire function. edx will hold the pointer to the | 138 | |
| 139 | ; deflate_state structure during the function's setup (before | 139 | ; Allocate local variable space |
| 140 | ; entering the main loop). | 140 | sub esp,varsize |
| 141 | 141 | ||
| 142 | mov edx, [esp+deflatestate] | 142 | ; Retrieve the function arguments. ecx will hold cur_match |
| 143 | ASSUME edx:PTR DEFLATE_STATE | 143 | ; throughout the entire function. edx will hold the pointer to the |
| 144 | 144 | ; deflate_state structure during the function's setup (before | |
| 145 | mov ecx, [esp+curmatch] | 145 | ; entering the main loop). |
| 146 | 146 | ||
| 147 | ; uInt wmask = s->w_mask; | 147 | mov edx, [esp+deflatestate] |
| 148 | ; unsigned chain_length = s->max_chain_length; | 148 | ASSUME edx:PTR DEFLATE_STATE |
| 149 | ; if (s->prev_length >= s->good_match) { | 149 | |
| 150 | ; chain_length >>= 2; | 150 | mov ecx, [esp+curmatch] |
| 151 | ; } | 151 | |
| 152 | 152 | ; uInt wmask = s->w_mask; | |
| 153 | mov eax, [edx].ds_prev_length | 153 | ; unsigned chain_length = s->max_chain_length; |
| 154 | mov ebx, [edx].ds_good_match | 154 | ; if (s->prev_length >= s->good_match) { |
| 155 | cmp eax, ebx | 155 | ; chain_length >>= 2; |
| 156 | mov eax, [edx].ds_w_mask | 156 | ; } |
| 157 | mov ebx, [edx].ds_max_chain_length | 157 | |
| 158 | jl SHORT LastMatchGood | 158 | mov eax, [edx].ds_prev_length |
| 159 | shr ebx, 2 | 159 | mov ebx, [edx].ds_good_match |
| 160 | LastMatchGood: | 160 | cmp eax, ebx |
| 161 | 161 | mov eax, [edx].ds_w_mask | |
| 162 | ; chainlen is decremented once beforehand so that the function can | 162 | mov ebx, [edx].ds_max_chain_length |
| 163 | ; use the sign flag instead of the zero flag for the exit test. | 163 | jl SHORT LastMatchGood |
| 164 | ; It is then shifted into the high word, to make room for the wmask | 164 | shr ebx, 2 |
| 165 | ; value, which it will always accompany. | 165 | LastMatchGood: |
| 166 | 166 | ||
| 167 | dec ebx | 167 | ; chainlen is decremented once beforehand so that the function can |
| 168 | shl ebx, 16 | 168 | ; use the sign flag instead of the zero flag for the exit test. |
| 169 | or ebx, eax | 169 | ; It is then shifted into the high word, to make room for the wmask |
| 170 | mov [esp+chainlenwmask], ebx | 170 | ; value, which it will always accompany. |
| 171 | 171 | ||
| 172 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | 172 | dec ebx |
| 173 | 173 | shl ebx, 16 | |
| 174 | mov eax, [edx].ds_nice_match | 174 | or ebx, eax |
| 175 | mov ebx, [edx].ds_lookahead | 175 | mov [esp+chainlenwmask], ebx |
| 176 | cmp ebx, eax | 176 | |
| 177 | jl SHORT LookaheadLess | 177 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; |
| 178 | mov ebx, eax | 178 | |
| 179 | LookaheadLess: | 179 | mov eax, [edx].ds_nice_match |
| 180 | mov [esp+nicematch], ebx | 180 | mov ebx, [edx].ds_lookahead |
| 181 | 181 | cmp ebx, eax | |
| 182 | ;/* register Bytef *scan = s->window + s->strstart; */ | 182 | jl SHORT LookaheadLess |
| 183 | 183 | mov ebx, eax | |
| 184 | mov esi, [edx].ds_window | 184 | LookaheadLess: |
| 185 | mov [esp+window], esi | 185 | mov [esp+nicematch], ebx |
| 186 | mov ebp, [edx].ds_strstart | 186 | |
| 187 | lea edi, [esi+ebp] | 187 | ;/* register Bytef *scan = s->window + s->strstart; */ |
| 188 | mov [esp+scan],edi | 188 | |
| 189 | 189 | mov esi, [edx].ds_window | |
| 190 | ;/* Determine how many bytes the scan ptr is off from being */ | 190 | mov [esp+window], esi |
| 191 | ;/* dword-aligned. */ | 191 | mov ebp, [edx].ds_strstart |
| 192 | 192 | lea edi, [esi+ebp] | |
| 193 | mov eax, edi | 193 | mov [esp+scan],edi |
| 194 | neg eax | 194 | |
| 195 | and eax, 3 | 195 | ;/* Determine how many bytes the scan ptr is off from being */ |
| 196 | mov [esp+scanalign], eax | 196 | ;/* dword-aligned. */ |
| 197 | 197 | ||
| 198 | ;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ | 198 | mov eax, edi |
| 199 | ;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ | 199 | neg eax |
| 200 | 200 | and eax, 3 | |
| 201 | mov eax, [edx].ds_w_size | 201 | mov [esp+scanalign], eax |
| 202 | sub eax, MIN_LOOKAHEAD | 202 | |
| 203 | sub ebp, eax | 203 | ;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ |
| 204 | jg SHORT LimitPositive | 204 | ;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ |
| 205 | xor ebp, ebp | 205 | |
| 206 | LimitPositive: | 206 | mov eax, [edx].ds_w_size |
| 207 | 207 | sub eax, MIN_LOOKAHEAD | |
| 208 | ;/* int best_len = s->prev_length; */ | 208 | sub ebp, eax |
| 209 | 209 | jg SHORT LimitPositive | |
| 210 | mov eax, [edx].ds_prev_length | 210 | xor ebp, ebp |
| 211 | mov [esp+bestlen], eax | 211 | LimitPositive: |
| 212 | 212 | ||
| 213 | ;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ | 213 | ;/* int best_len = s->prev_length; */ |
| 214 | 214 | ||
| 215 | add esi, eax | 215 | mov eax, [edx].ds_prev_length |
| 216 | mov [esp+windowbestlen], esi | 216 | mov [esp+bestlen], eax |
| 217 | 217 | ||
| 218 | ;/* register ush scan_start = *(ushf*)scan; */ | 218 | ;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ |
| 219 | ;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ | 219 | |
| 220 | ;/* Posf *prev = s->prev; */ | 220 | add esi, eax |
| 221 | 221 | mov [esp+windowbestlen], esi | |
| 222 | movzx ebx, WORD PTR[edi] | 222 | |
| 223 | mov [esp+scanstart], ebx | 223 | ;/* register ush scan_start = *(ushf*)scan; */ |
| 224 | movzx ebx, WORD PTR[eax+edi-1] | 224 | ;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ |
| 225 | mov [esp+scanend], ebx | 225 | ;/* Posf *prev = s->prev; */ |
| 226 | mov edi, [edx].ds_prev | 226 | |
| 227 | 227 | movzx ebx, WORD PTR[edi] | |
| 228 | ;/* Jump into the main loop. */ | 228 | mov [esp+scanstart], ebx |
| 229 | 229 | movzx ebx, WORD PTR[eax+edi-1] | |
| 230 | mov edx, [esp+chainlenwmask] | 230 | mov [esp+scanend], ebx |
| 231 | jmp SHORT LoopEntry | 231 | mov edi, [edx].ds_prev |
| 232 | 232 | ||
| 233 | ;/* do { | 233 | ;/* Jump into the main loop. */ |
| 234 | ; * match = s->window + cur_match; | 234 | |
| 235 | ; * if (*(ushf*)(match+best_len-1) != scan_end || | 235 | mov edx, [esp+chainlenwmask] |
| 236 | ; * *(ushf*)match != scan_start) continue; | 236 | jmp SHORT LoopEntry |
| 237 | ; * [...] | 237 | |
| 238 | ; * } while ((cur_match = prev[cur_match & wmask]) > limit | 238 | ;/* do { |
| 239 | ; * && --chain_length != 0); | 239 | ; * match = s->window + cur_match; |
| 240 | ; * | 240 | ; * if (*(ushf*)(match+best_len-1) != scan_end || |
| 241 | ; * Here is the inner loop of the function. The function will spend the | 241 | ; * *(ushf*)match != scan_start) continue; |
| 242 | ; * majority of its time in this loop, and majority of that time will | 242 | ; * [...] |
| 243 | ; * be spent in the first ten instructions. | 243 | ; * } while ((cur_match = prev[cur_match & wmask]) > limit |
| 244 | ; * | 244 | ; * && --chain_length != 0); |
| 245 | ; * Within this loop: | 245 | ; * |
| 246 | ; * %ebx = scanend | 246 | ; * Here is the inner loop of the function. The function will spend the |
| 247 | ; * %ecx = curmatch | 247 | ; * majority of its time in this loop, and majority of that time will |
| 248 | ; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | 248 | ; * be spent in the first ten instructions. |
| 249 | ; * %esi = windowbestlen - i.e., (window + bestlen) | 249 | ; * |
| 250 | ; * %edi = prev | 250 | ; * Within this loop: |
| 251 | ; * %ebp = limit | 251 | ; * %ebx = scanend |
| 252 | ; */ | 252 | ; * %ecx = curmatch |
| 253 | 253 | ; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | |
| 254 | ALIGN 4 | 254 | ; * %esi = windowbestlen - i.e., (window + bestlen) |
| 255 | LookupLoop: | 255 | ; * %edi = prev |
| 256 | and ecx, edx | 256 | ; * %ebp = limit |
| 257 | movzx ecx, WORD PTR[edi+ecx*2] | 257 | ; */ |
| 258 | cmp ecx, ebp | 258 | |
| 259 | jbe LeaveNow | 259 | ALIGN 4 |
| 260 | sub edx, 000010000H | 260 | LookupLoop: |
| 261 | js LeaveNow | 261 | and ecx, edx |
| 262 | 262 | movzx ecx, WORD PTR[edi+ecx*2] | |
| 263 | LoopEntry: | 263 | cmp ecx, ebp |
| 264 | movzx eax, WORD PTR[esi+ecx-1] | 264 | jbe LeaveNow |
| 265 | cmp eax, ebx | 265 | sub edx, 000010000H |
| 266 | jnz SHORT LookupLoop | 266 | js LeaveNow |
| 267 | 267 | ||
| 268 | mov eax, [esp+window] | 268 | LoopEntry: |
| 269 | movzx eax, WORD PTR[eax+ecx] | 269 | movzx eax, WORD PTR[esi+ecx-1] |
| 270 | cmp eax, [esp+scanstart] | 270 | cmp eax, ebx |
| 271 | jnz SHORT LookupLoop | 271 | jnz SHORT LookupLoop |
| 272 | 272 | ||
| 273 | ;/* Store the current value of chainlen. */ | 273 | mov eax, [esp+window] |
| 274 | 274 | movzx eax, WORD PTR[eax+ecx] | |
| 275 | mov [esp+chainlenwmask], edx | 275 | cmp eax, [esp+scanstart] |
| 276 | 276 | jnz SHORT LookupLoop | |
| 277 | ;/* Point %edi to the string under scrutiny, and %esi to the string we */ | 277 | |
| 278 | ;/* are hoping to match it up with. In actuality, %esi and %edi are */ | 278 | ;/* Store the current value of chainlen. */ |
| 279 | ;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ | 279 | |
| 280 | ;/* initialized to -(MAX_MATCH_8 - scanalign). */ | 280 | mov [esp+chainlenwmask], edx |
| 281 | 281 | ||
| 282 | mov esi, [esp+window] | 282 | ;/* Point %edi to the string under scrutiny, and %esi to the string we */ |
| 283 | mov edi, [esp+scan] | 283 | ;/* are hoping to match it up with. In actuality, %esi and %edi are */ |
| 284 | add esi, ecx | 284 | ;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ |
| 285 | mov eax, [esp+scanalign] | 285 | ;/* initialized to -(MAX_MATCH_8 - scanalign). */ |
| 286 | mov edx, -MAX_MATCH_8 | 286 | |
| 287 | lea edi, [edi+eax+MAX_MATCH_8] | 287 | mov esi, [esp+window] |
| 288 | lea esi, [esi+eax+MAX_MATCH_8] | 288 | mov edi, [esp+scan] |
| 289 | 289 | add esi, ecx | |
| 290 | ;/* Test the strings for equality, 8 bytes at a time. At the end, | 290 | mov eax, [esp+scanalign] |
| 291 | ; * adjust %edx so that it is offset to the exact byte that mismatched. | 291 | mov edx, -MAX_MATCH_8 |
| 292 | ; * | 292 | lea edi, [edi+eax+MAX_MATCH_8] |
| 293 | ; * We already know at this point that the first three bytes of the | 293 | lea esi, [esi+eax+MAX_MATCH_8] |
| 294 | ; * strings match each other, and they can be safely passed over before | 294 | |
| 295 | ; * starting the compare loop. So what this code does is skip over 0-3 | 295 | ;/* Test the strings for equality, 8 bytes at a time. At the end, |
| 296 | ; * bytes, as much as necessary in order to dword-align the %edi | 296 | ; * adjust %edx so that it is offset to the exact byte that mismatched. |
| 297 | ; * pointer. (%esi will still be misaligned three times out of four.) | 297 | ; * |
| 298 | ; * | 298 | ; * We already know at this point that the first three bytes of the |
| 299 | ; * It should be confessed that this loop usually does not represent | 299 | ; * strings match each other, and they can be safely passed over before |
| 300 | ; * much of the total running time. Replacing it with a more | 300 | ; * starting the compare loop. So what this code does is skip over 0-3 |
| 301 | ; * straightforward "rep cmpsb" would not drastically degrade | 301 | ; * bytes, as much as necessary in order to dword-align the %edi |
| 302 | ; * performance. | 302 | ; * pointer. (%esi will still be misaligned three times out of four.) |
| 303 | ; */ | 303 | ; * |
| 304 | 304 | ; * It should be confessed that this loop usually does not represent | |
| 305 | LoopCmps: | 305 | ; * much of the total running time. Replacing it with a more |
| 306 | mov eax, DWORD PTR[esi+edx] | 306 | ; * straightforward "rep cmpsb" would not drastically degrade |
| 307 | xor eax, DWORD PTR[edi+edx] | 307 | ; * performance. |
| 308 | jnz SHORT LeaveLoopCmps | 308 | ; */ |
| 309 | 309 | ||
| 310 | mov eax, DWORD PTR[esi+edx+4] | 310 | LoopCmps: |
| 311 | xor eax, DWORD PTR[edi+edx+4] | 311 | mov eax, DWORD PTR[esi+edx] |
| 312 | jnz SHORT LeaveLoopCmps4 | 312 | xor eax, DWORD PTR[edi+edx] |
| 313 | 313 | jnz SHORT LeaveLoopCmps | |
| 314 | add edx, 8 | 314 | |
| 315 | jnz SHORT LoopCmps | 315 | mov eax, DWORD PTR[esi+edx+4] |
| 316 | jmp LenMaximum | 316 | xor eax, DWORD PTR[edi+edx+4] |
| 317 | ALIGN 4 | 317 | jnz SHORT LeaveLoopCmps4 |
| 318 | 318 | ||
| 319 | LeaveLoopCmps4: | 319 | add edx, 8 |
| 320 | add edx, 4 | 320 | jnz SHORT LoopCmps |
| 321 | 321 | jmp LenMaximum | |
| 322 | LeaveLoopCmps: | 322 | ALIGN 4 |
| 323 | test eax, 00000FFFFH | 323 | |
| 324 | jnz SHORT LenLower | 324 | LeaveLoopCmps4: |
| 325 | 325 | add edx, 4 | |
| 326 | add edx, 2 | 326 | |
| 327 | shr eax, 16 | 327 | LeaveLoopCmps: |
| 328 | 328 | test eax, 00000FFFFH | |
| 329 | LenLower: | 329 | jnz SHORT LenLower |
| 330 | sub al, 1 | 330 | |
| 331 | adc edx, 0 | 331 | add edx, 2 |
| 332 | 332 | shr eax, 16 | |
| 333 | ;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ | 333 | |
| 334 | ;/* then automatically accept it as the best possible match and leave. */ | 334 | LenLower: |
| 335 | 335 | sub al, 1 | |
| 336 | lea eax, [edi+edx] | 336 | adc edx, 0 |
| 337 | mov edi, [esp+scan] | 337 | |
| 338 | sub eax, edi | 338 | ;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ |
| 339 | cmp eax, MAX_MATCH | 339 | ;/* then automatically accept it as the best possible match and leave. */ |
| 340 | jge SHORT LenMaximum | 340 | |
| 341 | 341 | lea eax, [edi+edx] | |
| 342 | ;/* If the length of the match is not longer than the best match we */ | 342 | mov edi, [esp+scan] |
| 343 | ;/* have so far, then forget it and return to the lookup loop. */ | 343 | sub eax, edi |
| 344 | 344 | cmp eax, MAX_MATCH | |
| 345 | mov edx, [esp+deflatestate] | 345 | jge SHORT LenMaximum |
| 346 | mov ebx, [esp+bestlen] | 346 | |
| 347 | cmp eax, ebx | 347 | ;/* If the length of the match is not longer than the best match we */ |
| 348 | jg SHORT LongerMatch | 348 | ;/* have so far, then forget it and return to the lookup loop. */ |
| 349 | mov esi, [esp+windowbestlen] | 349 | |
| 350 | mov edi, [edx].ds_prev | 350 | mov edx, [esp+deflatestate] |
| 351 | mov ebx, [esp+scanend] | 351 | mov ebx, [esp+bestlen] |
| 352 | mov edx, [esp+chainlenwmask] | 352 | cmp eax, ebx |
| 353 | jmp LookupLoop | 353 | jg SHORT LongerMatch |
| 354 | ALIGN 4 | 354 | mov esi, [esp+windowbestlen] |
| 355 | 355 | mov edi, [edx].ds_prev | |
| 356 | ;/* s->match_start = cur_match; */ | 356 | mov ebx, [esp+scanend] |
| 357 | ;/* best_len = len; */ | 357 | mov edx, [esp+chainlenwmask] |
| 358 | ;/* if (len >= nice_match) break; */ | 358 | jmp LookupLoop |
| 359 | ;/* scan_end = *(ushf*)(scan+best_len-1); */ | 359 | ALIGN 4 |
| 360 | 360 | ||
| 361 | LongerMatch: | 361 | ;/* s->match_start = cur_match; */ |
| 362 | mov ebx, [esp+nicematch] | 362 | ;/* best_len = len; */ |
| 363 | mov [esp+bestlen], eax | 363 | ;/* if (len >= nice_match) break; */ |
| 364 | mov [edx].ds_match_start, ecx | 364 | ;/* scan_end = *(ushf*)(scan+best_len-1); */ |
| 365 | cmp eax, ebx | 365 | |
| 366 | jge SHORT LeaveNow | 366 | LongerMatch: |
| 367 | mov esi, [esp+window] | 367 | mov ebx, [esp+nicematch] |
| 368 | add esi, eax | 368 | mov [esp+bestlen], eax |
| 369 | mov [esp+windowbestlen], esi | 369 | mov [edx].ds_match_start, ecx |
| 370 | movzx ebx, WORD PTR[edi+eax-1] | 370 | cmp eax, ebx |
| 371 | mov edi, [edx].ds_prev | 371 | jge SHORT LeaveNow |
| 372 | mov [esp+scanend], ebx | 372 | mov esi, [esp+window] |
| 373 | mov edx, [esp+chainlenwmask] | 373 | add esi, eax |
| 374 | jmp LookupLoop | 374 | mov [esp+windowbestlen], esi |
| 375 | ALIGN 4 | 375 | movzx ebx, WORD PTR[edi+eax-1] |
| 376 | 376 | mov edi, [edx].ds_prev | |
| 377 | ;/* Accept the current string, with the maximum possible length. */ | 377 | mov [esp+scanend], ebx |
| 378 | 378 | mov edx, [esp+chainlenwmask] | |
| 379 | LenMaximum: | 379 | jmp LookupLoop |
| 380 | mov edx, [esp+deflatestate] | 380 | ALIGN 4 |
| 381 | mov DWORD PTR[esp+bestlen], MAX_MATCH | 381 | |
| 382 | mov [edx].ds_match_start, ecx | 382 | ;/* Accept the current string, with the maximum possible length. */ |
| 383 | 383 | ||
| 384 | ;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ | 384 | LenMaximum: |
| 385 | ;/* return s->lookahead; */ | 385 | mov edx, [esp+deflatestate] |
| 386 | 386 | mov DWORD PTR[esp+bestlen], MAX_MATCH | |
| 387 | LeaveNow: | 387 | mov [edx].ds_match_start, ecx |
| 388 | mov edx, [esp+deflatestate] | 388 | |
| 389 | mov ebx, [esp+bestlen] | 389 | ;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ |
| 390 | mov eax, [edx].ds_lookahead | 390 | ;/* return s->lookahead; */ |
| 391 | cmp ebx, eax | 391 | |
| 392 | jg SHORT LookaheadRet | 392 | LeaveNow: |
| 393 | mov eax, ebx | 393 | mov edx, [esp+deflatestate] |
| 394 | LookaheadRet: | 394 | mov ebx, [esp+bestlen] |
| 395 | 395 | mov eax, [edx].ds_lookahead | |
| 396 | ; Restore the stack and return from whence we came. | 396 | cmp ebx, eax |
| 397 | 397 | jg SHORT LookaheadRet | |
| 398 | add esp, varsize | 398 | mov eax, ebx |
| 399 | pop ebx | 399 | LookaheadRet: |
| 400 | pop esi | 400 | |
| 401 | pop edi | 401 | ; Restore the stack and return from whence we came. |
| 402 | pop ebp | 402 | |
| 403 | ret | 403 | add esp, varsize |
| 404 | 404 | pop ebx | |
| 405 | _longest_match ENDP | 405 | pop esi |
| 406 | 406 | pop edi | |
| 407 | _TEXT ENDS | 407 | pop ebp |
| 408 | END | 408 | ret |
| 409 | |||
| 410 | _longest_match ENDP | ||
| 411 | |||
| 412 | _TEXT ENDS | ||
| 413 | END | ||
