summaryrefslogtreecommitdiff
path: root/contrib/asm386/gvmat32.asm
blob: b175871d92719e10e69d31d15b0627cf0728ae01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
;
; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
; File written by Gilles Vollant, by modifiying the longest_match
;  from Jean-loup Gailly in deflate.c
; It need wmask == 0x7fff
;     (assembly code is faster with a fixed wmask)
;
; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
;   I compile with : "ml /coff /Zi /c gvmat32.asm"
;
; uInt longest_match_gvasm(IPos cur_match,int* match_start_ptr,uInt scan_end,
;                          uInt scan_start,ush* prev,uch* window,int best_len,
;                          IPos limit,uInt chain_length,uch* scanrp,
;                          uInt nice_match);

;uInt longest_match(s, cur_match)
;    deflate_state *s;
;    IPos cur_match;                             /* current match */

	NbStack		equ	76
	cur_match	equ	dword ptr[esp+NbStack-0]
	str_s		equ	dword ptr[esp+NbStack-4]
	; 5 dword on top (ret,ebp,esi,edi,ebx)
	adrret		equ	dword ptr[esp+NbStack-8]
	pushebp		equ	dword ptr[esp+NbStack-12]
	pushedi		equ	dword ptr[esp+NbStack-16]
	pushesi		equ	dword ptr[esp+NbStack-20]
	pushebx		equ	dword ptr[esp+NbStack-24]

	chain_length    equ dword ptr [esp+NbStack-28]
	limit           equ dword ptr [esp+NbStack-32]
	best_len        equ dword ptr [esp+NbStack-36]
	window          equ dword ptr [esp+NbStack-40]
	prev            equ dword ptr [esp+NbStack-44]
	scan_start      equ  word ptr [esp+NbStack-48]
	scan_end        equ  word ptr [esp+NbStack-52]
	match_start_ptr equ dword ptr [esp+NbStack-56]
	nice_match      equ dword ptr [esp+NbStack-60]
	scanrp          equ dword ptr [esp+NbStack-64]

	windowlen       equ dword ptr [esp+NbStack-68]
	match_start     equ dword ptr [esp+NbStack-72]
	strend			equ dword ptr [esp+NbStack-76]
	NbStackAdd		equ	(76-24)

    .386p

    name    gvmatch
    .MODEL  FLAT


@lmtype				TYPEDEF         PROTO C :PTR , :SDWORD
longest_match_c		PROTO           @lmtype

	dep_max_chain_length	equ	70h
	dep_window				equ	2ch
	dep_strstart			equ	60h
	dep_prev_length			equ 6ch
	dep_nice_match			equ 84h
	dep_w_size				equ	20h
	dep_prev				equ	34h
	dep_w_mask				equ	28h
	dep_good_match			equ 80h
	dep_match_start			equ	64h
	dep_lookahead			equ	68h


_TEXT   segment
    public  _longest_match_asm7fff

    MAX_MATCH		equ 258
	MIN_MATCH		equ 3
	MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)

; initialize or check the variables used in match.asm.


; -----------------------------------------------------------------------
; Set match_start to the longest match starting at the given string and
; return its length. Matches shorter or equal to prev_length are discarded,
; in which case the result is equal to prev_length and match_start is
; garbage.
; IN assertions: cur_match is the head of the hash chain for the current
;   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1

; int longest_match(cur_match)

_longest_match_asm7fff proc near



	    ; return address

		mov		eax,[esp+4]
		mov		bx,[eax+dep_w_mask]
		cmp		bx,7fffh
		jnz		longest_match_c

	    push    ebp             
	    push    edi             
	    push    esi             
	    push    ebx             

	    sub     esp,NbStackAdd

		;//mov		ebp,str_s
		mov		ebp,eax

		mov		eax,[ebp+dep_max_chain_length]
		mov		ebx,[ebp+dep_prev_length]
		cmp		[ebp+dep_good_match],ebx	; if prev_length>=good_match chain_length >>= 2
		ja		noshr		
		shr		eax,2
noshr:
		mov		edi,[ebp+dep_nice_match]
		mov		chain_length,eax
		mov		edx,[ebp+dep_lookahead]
		cmp		edx,edi
;if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
		jae		nolookaheadnicematch
		mov		edi,edx
nolookaheadnicematch:
		mov		best_len,ebx


		mov		esi,[ebp+dep_window]
		mov		ecx,[ebp+dep_strstart]
		mov		window,esi
		
		mov		nice_match,edi
		add		esi,ecx
		mov		scanrp,esi
		mov		ax,word ptr [esi]
		mov		bx,word ptr [esi+ebx-1]
		add		esi,MAX_MATCH-1
		mov		scan_start,ax
		mov		strend,esi
		mov		scan_end,bx

;    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
;        s->strstart - (IPos)MAX_DIST(s) : NIL;

		mov		esi,[ebp+dep_w_size]
		sub		esi,MIN_LOOKAHEAD
		; here esi = MAX_DIST(s)
		sub		ecx,esi
		ja		nodist
		xor		ecx,ecx
nodist:
		mov		limit,ecx




		mov		eax,[ebp+dep_prev]
		mov		prev,eax

	    mov     ebx,dword ptr [ebp+dep_match_start]
	    mov     bp,scan_start
	    mov     edx,cur_match
	    mov     match_start,ebx

	    mov     bx,scan_end
	    mov     eax,window
		mov		edi,eax
	    add     edi,best_len
	    mov     esi,prev
		dec     edi
	    mov     windowlen,edi
	    
	    jmp     beginloop2
	    align   4

; here, in the loop
;;;;       eax = chain_length
;       edx = dx = cur_match
;       ecx = limit
;        bx = scan_end
;        bp = scan_start
;       edi = windowlen (window + best_len)
;       esi = prev


;// here; eax <=16
normalbeg0add16: 
		add		chain_length,16
		jz		exitloop
normalbeg0: 
	    cmp     word ptr[edi+edx-0],bx
	    je      normalbeg2
	    and     edx,7fffh
	    mov     dx,word ptr[esi+edx*2]
	    cmp     ecx,edx
	    jnb     exitloop
	    dec     chain_length
		jnz		normalbeg0
;jnbexitloopshort1:
		jmp     exitloop

contloop3:
	    mov     edi,windowlen

; cur_match = prev[cur_match & wmask]
	    and		edx,7fffh
	    mov     dx,word ptr[esi+edx*2]
; if cur_match > limit, go to exitloop
	    cmp     ecx,edx
jnbexitloopshort1:
	    jnb     exitloop
; if --chain_length != 0, go to exitloop

beginloop2:
		sub		chain_length,16+1
		jna     normalbeg0add16

do16:
	    cmp     word ptr[edi+edx],bx
	    je      normalbeg2dc0

maccn	MACRO	lab
	    and     edx,7fffh
	    mov     dx,word ptr[esi+edx*2]
	    cmp     ecx,edx
	    jnb     exitloop
	    cmp     word ptr[edi+edx-0],bx
	    je      lab
		ENDM

rcontloop0:
		maccn normalbeg2dc1

rcontloop1:
		maccn normalbeg2dc2

rcontloop2:
		maccn normalbeg2dc3

rcontloop3:
		maccn normalbeg2dc4

rcontloop4:
		maccn normalbeg2dc5

rcontloop5:
		maccn normalbeg2dc6

rcontloop6:
		maccn normalbeg2dc7

rcontloop7:
		maccn normalbeg2dc8

rcontloop8:
		maccn normalbeg2dc9

rcontloop9:
		maccn normalbeg2dc10

rcontloop10:
		maccn normalbeg2dc11

rcontloop11:
		maccn short normalbeg2dc12

rcontloop12:
		maccn short normalbeg2dc13

rcontloop13:
		maccn short normalbeg2dc14

rcontloop14:
		maccn short normalbeg2dc15

rcontloop15:
	    and     edx,7fffh
	    mov     dx,word ptr[esi+edx*2]
	    cmp     ecx,edx
	    jnb     short exitloopshort

		sub		chain_length,16
		ja		do16
		jmp		normalbeg0add16

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

exitloopshort:
		jmp		exitloop                  

normbeg	MACRO	rcontlab,valsub
	    cmp     bp,word ptr[eax+edx]
		jne		rcontlab
		add     chain_length,16-valsub
		jmp		iseq
		ENDM

normalbeg2dc12:
		normbeg	rcontloop12,12

normalbeg2dc13:
		normbeg	rcontloop13,13

normalbeg2dc14:
		normbeg	rcontloop14,14

normalbeg2dc15:
		normbeg	rcontloop15,15

normalbeg2dc11:
		normbeg	rcontloop11,11

normalbeg2dc10:
		normbeg	rcontloop10,10


normalbeg2dc9:
		normbeg	rcontloop9,9

normalbeg2dc8:
		normbeg	rcontloop8,8

normalbeg2dc7:
		normbeg	rcontloop7,7

normalbeg2dc5:
		normbeg	rcontloop5,5





normalbeg2dc6:
		normbeg	rcontloop6,6

normalbeg2dc4:
		normbeg	rcontloop4,4

normalbeg2dc3:
		normbeg	rcontloop3,3

normalbeg2dc2:
		normbeg	rcontloop2,2

normalbeg2dc1:
		normbeg	rcontloop1,1

normalbeg2dc0:
		normbeg	rcontloop0,0


; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end

normalbeg2:

			 ; 10 nop here take 10% time
		mov		edi,window
		  ;mov     chain_length,eax    ; now, we need eax...
					
	    cmp     bp,word ptr[edi+edx]
	    jne     contloop3                   ; if *(ushf*)match != scan_start, continue

iseq:

		mov		edi,eax
	    mov     esi,scanrp                  ; esi = scan    
	    add     edi,edx				; edi = window + cur_match = match

		       
	    mov     eax,[esi+3]					; compare manually dword at match+3
	    xor     eax,[edi+3]                 ;       and scan +3

	    jz      begincompare                ; if equal, go to long compare

			; we will determine the unmatch byte and calculate len (in esi)
		or		al,al
		je		eq1rr
	    mov     esi,3
	    jmp     trfinval
eq1rr:
	    or      ax,ax                               
	    je      eq1

	    mov     esi,4
	    jmp     trfinval
eq1:
	    shl     eax,8
	    jz      eq11
	    mov     esi,5
	    jmp     trfinval
eq11:
	    mov     esi,6
	    jmp     trfinval

begincompare:
		; here we now scan and match begin same
	    add     edi,6
	    add     esi,6
	    mov     ecx,(MAX_MATCH-(2+4))/4     ;//; scan for at most MAX_MATCH bytes
	    repe    cmpsd                                               ;//; loop until mismatch

	    je      trfin                                               ; go to trfin if not unmatch
			; we determine the unmatch byte
	    sub     esi,4
	    mov     eax,[edi-4]
	    xor     eax,[esi]
	    or      al,al

	    jnz     trfin
	    inc     esi

	    or      ax,ax
	    jnz     trfin
	    inc     esi

	    shl     eax,8           
	    jnz     trfin
	    inc     esi

trfin:      
	    sub     esi,scanrp          ; esi = len
trfinval:
	    cmp     esi,best_len        ; if len <= best_len, go contloop2
	    jbe     contloop2

	    mov     best_len,esi        ; len become best_len

	    mov     match_start,edx
	    cmp     esi,nice_match ;//; if esi >= nice_match, exit
	    mov     ecx,scanrp
	    jae     exitloop
	    add     esi,window
	    add     ecx,best_len
			dec             esi
	    mov     windowlen,esi
	    mov     bx,[ecx-1]


; now we restore eax, ecx and esi, for the big loop :
contloop2:
	    mov     esi,prev
	    mov     ecx,limit
	    ;mov     eax,chain_length
		mov		eax,window
	    jmp     contloop3

exitloop:        
	    mov     ebx,match_start
		mov		ebp,str_s
	    mov     dword ptr [ebp+dep_match_start],ebx
	    mov     eax,best_len
	    add     esp,NbStackAdd


	    pop     ebx
	    pop     esi
	    pop     edi
	    pop     ebp 
	    ret

_longest_match_asm7fff endp

_TEXT   ends
end