diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/bn-586.pl')
-rw-r--r-- | src/lib/libcrypto/bn/asm/bn-586.pl | 675 |
1 files changed, 0 insertions, 675 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl deleted file mode 100644 index 26c2685a72..0000000000 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ /dev/null | |||
@@ -1,675 +0,0 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | |||
3 | push(@INC,"perlasm","../../perlasm"); | ||
4 | require "x86asm.pl"; | ||
5 | |||
6 | &asm_init($ARGV[0],$0); | ||
7 | |||
8 | $sse2=0; | ||
9 | for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | ||
10 | |||
11 | &external_label("OPENSSL_ia32cap_P") if ($sse2); | ||
12 | |||
13 | &bn_mul_add_words("bn_mul_add_words"); | ||
14 | &bn_mul_words("bn_mul_words"); | ||
15 | &bn_sqr_words("bn_sqr_words"); | ||
16 | &bn_div_words("bn_div_words"); | ||
17 | &bn_add_words("bn_add_words"); | ||
18 | &bn_sub_words("bn_sub_words"); | ||
19 | &bn_sub_part_words("bn_sub_part_words"); | ||
20 | |||
21 | &asm_finish(); | ||
22 | |||
23 | sub bn_mul_add_words | ||
24 | { | ||
25 | local($name)=@_; | ||
26 | |||
27 | &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | ||
28 | |||
29 | &comment(""); | ||
30 | $Low="eax"; | ||
31 | $High="edx"; | ||
32 | $a="ebx"; | ||
33 | $w="ebp"; | ||
34 | $r="edi"; | ||
35 | $c="esi"; | ||
36 | |||
37 | &xor($c,$c); # clear carry | ||
38 | &mov($r,&wparam(0)); # | ||
39 | |||
40 | &mov("ecx",&wparam(2)); # | ||
41 | &mov($a,&wparam(1)); # | ||
42 | |||
43 | &and("ecx",0xfffffff8); # num / 8 | ||
44 | &mov($w,&wparam(3)); # | ||
45 | |||
46 | &push("ecx"); # Up the stack for a tmp variable | ||
47 | |||
48 | &jz(&label("maw_finish")); | ||
49 | |||
50 | if ($sse2) { | ||
51 | &picmeup("eax","OPENSSL_ia32cap_P"); | ||
52 | &bt(&DWP(0,"eax"),26); | ||
53 | &jnc(&label("maw_loop")); | ||
54 | |||
55 | &movd("mm0",$w); # mm0 = w | ||
56 | &pxor("mm1","mm1"); # mm1 = carry_in | ||
57 | |||
58 | &set_label("maw_sse2_loop",0); | ||
59 | &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] | ||
60 | &paddq("mm1","mm3"); # mm1 = carry_in + r[0] | ||
61 | &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] | ||
62 | &pmuludq("mm2","mm0"); # mm2 = w*a[0] | ||
63 | &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] | ||
64 | &pmuludq("mm4","mm0"); # mm4 = w*a[1] | ||
65 | &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] | ||
66 | &pmuludq("mm6","mm0"); # mm6 = w*a[2] | ||
67 | &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] | ||
68 | &pmuludq("mm7","mm0"); # mm7 = w*a[3] | ||
69 | &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] | ||
70 | &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] | ||
71 | &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] | ||
72 | &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] | ||
73 | &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] | ||
74 | &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] | ||
75 | &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] | ||
76 | &movd(&DWP(0,$r,"",0),"mm1"); | ||
77 | &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] | ||
78 | &pmuludq("mm2","mm0"); # mm2 = w*a[4] | ||
79 | &psrlq("mm1",32); # mm1 = carry0 | ||
80 | &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] | ||
81 | &pmuludq("mm4","mm0"); # mm4 = w*a[5] | ||
82 | &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] | ||
83 | &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] | ||
84 | &pmuludq("mm6","mm0"); # mm6 = w*a[6] | ||
85 | &movd(&DWP(4,$r,"",0),"mm1"); | ||
86 | &psrlq("mm1",32); # mm1 = carry1 | ||
87 | &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] | ||
88 | &add($a,32); | ||
89 | &pmuludq("mm3","mm0"); # mm3 = w*a[7] | ||
90 | &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] | ||
91 | &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] | ||
92 | &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] | ||
93 | &movd(&DWP(8,$r,"",0),"mm1"); | ||
94 | &psrlq("mm1",32); # mm1 = carry2 | ||
95 | &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] | ||
96 | &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] | ||
97 | &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] | ||
98 | &movd(&DWP(12,$r,"",0),"mm1"); | ||
99 | &psrlq("mm1",32); # mm1 = carry3 | ||
100 | &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] | ||
101 | &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] | ||
102 | &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] | ||
103 | &movd(&DWP(16,$r,"",0),"mm1"); | ||
104 | &psrlq("mm1",32); # mm1 = carry4 | ||
105 | &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] | ||
106 | &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] | ||
107 | &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] | ||
108 | &movd(&DWP(20,$r,"",0),"mm1"); | ||
109 | &psrlq("mm1",32); # mm1 = carry5 | ||
110 | &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] | ||
111 | &movd(&DWP(24,$r,"",0),"mm1"); | ||
112 | &psrlq("mm1",32); # mm1 = carry6 | ||
113 | &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] | ||
114 | &movd(&DWP(28,$r,"",0),"mm1"); | ||
115 | &add($r,32); | ||
116 | &psrlq("mm1",32); # mm1 = carry_out | ||
117 | |||
118 | &sub("ecx",8); | ||
119 | &jnz(&label("maw_sse2_loop")); | ||
120 | |||
121 | &movd($c,"mm1"); # c = carry_out | ||
122 | &emms(); | ||
123 | |||
124 | &jmp(&label("maw_finish")); | ||
125 | } | ||
126 | |||
127 | &set_label("maw_loop",0); | ||
128 | |||
129 | &mov(&swtmp(0),"ecx"); # | ||
130 | |||
131 | for ($i=0; $i<32; $i+=4) | ||
132 | { | ||
133 | &comment("Round $i"); | ||
134 | |||
135 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
136 | &mul($w); # *a * w | ||
137 | &add("eax",$c); # L(t)+= *r | ||
138 | &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r | ||
139 | &adc("edx",0); # H(t)+=carry | ||
140 | &add("eax",$c); # L(t)+=c | ||
141 | &adc("edx",0); # H(t)+=carry | ||
142 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
143 | &mov($c,"edx"); # c= H(t); | ||
144 | } | ||
145 | |||
146 | &comment(""); | ||
147 | &mov("ecx",&swtmp(0)); # | ||
148 | &add($a,32); | ||
149 | &add($r,32); | ||
150 | &sub("ecx",8); | ||
151 | &jnz(&label("maw_loop")); | ||
152 | |||
153 | &set_label("maw_finish",0); | ||
154 | &mov("ecx",&wparam(2)); # get num | ||
155 | &and("ecx",7); | ||
156 | &jnz(&label("maw_finish2")); # helps branch prediction | ||
157 | &jmp(&label("maw_end")); | ||
158 | |||
159 | &set_label("maw_finish2",1); | ||
160 | for ($i=0; $i<7; $i++) | ||
161 | { | ||
162 | &comment("Tail Round $i"); | ||
163 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
164 | &mul($w); # *a * w | ||
165 | &add("eax",$c); # L(t)+=c | ||
166 | &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r | ||
167 | &adc("edx",0); # H(t)+=carry | ||
168 | &add("eax",$c); | ||
169 | &adc("edx",0); # H(t)+=carry | ||
170 | &dec("ecx") if ($i != 7-1); | ||
171 | &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); | ||
172 | &mov($c,"edx"); # c= H(t); | ||
173 | &jz(&label("maw_end")) if ($i != 7-1); | ||
174 | } | ||
175 | &set_label("maw_end",0); | ||
176 | &mov("eax",$c); | ||
177 | |||
178 | &pop("ecx"); # clear variable from | ||
179 | |||
180 | &function_end($name); | ||
181 | } | ||
182 | |||
183 | sub bn_mul_words | ||
184 | { | ||
185 | local($name)=@_; | ||
186 | |||
187 | &function_begin($name,""); | ||
188 | |||
189 | &comment(""); | ||
190 | $Low="eax"; | ||
191 | $High="edx"; | ||
192 | $a="ebx"; | ||
193 | $w="ecx"; | ||
194 | $r="edi"; | ||
195 | $c="esi"; | ||
196 | $num="ebp"; | ||
197 | |||
198 | &xor($c,$c); # clear carry | ||
199 | &mov($r,&wparam(0)); # | ||
200 | &mov($a,&wparam(1)); # | ||
201 | &mov($num,&wparam(2)); # | ||
202 | &mov($w,&wparam(3)); # | ||
203 | |||
204 | &and($num,0xfffffff8); # num / 8 | ||
205 | &jz(&label("mw_finish")); | ||
206 | |||
207 | &set_label("mw_loop",0); | ||
208 | for ($i=0; $i<32; $i+=4) | ||
209 | { | ||
210 | &comment("Round $i"); | ||
211 | |||
212 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
213 | &mul($w); # *a * w | ||
214 | &add("eax",$c); # L(t)+=c | ||
215 | # XXX | ||
216 | |||
217 | &adc("edx",0); # H(t)+=carry | ||
218 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
219 | |||
220 | &mov($c,"edx"); # c= H(t); | ||
221 | } | ||
222 | |||
223 | &comment(""); | ||
224 | &add($a,32); | ||
225 | &add($r,32); | ||
226 | &sub($num,8); | ||
227 | &jz(&label("mw_finish")); | ||
228 | &jmp(&label("mw_loop")); | ||
229 | |||
230 | &set_label("mw_finish",0); | ||
231 | &mov($num,&wparam(2)); # get num | ||
232 | &and($num,7); | ||
233 | &jnz(&label("mw_finish2")); | ||
234 | &jmp(&label("mw_end")); | ||
235 | |||
236 | &set_label("mw_finish2",1); | ||
237 | for ($i=0; $i<7; $i++) | ||
238 | { | ||
239 | &comment("Tail Round $i"); | ||
240 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
241 | &mul($w); # *a * w | ||
242 | &add("eax",$c); # L(t)+=c | ||
243 | # XXX | ||
244 | &adc("edx",0); # H(t)+=carry | ||
245 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); | ||
246 | &mov($c,"edx"); # c= H(t); | ||
247 | &dec($num) if ($i != 7-1); | ||
248 | &jz(&label("mw_end")) if ($i != 7-1); | ||
249 | } | ||
250 | &set_label("mw_end",0); | ||
251 | &mov("eax",$c); | ||
252 | |||
253 | &function_end($name); | ||
254 | } | ||
255 | |||
256 | sub bn_sqr_words | ||
257 | { | ||
258 | local($name)=@_; | ||
259 | |||
260 | &function_begin($name,""); | ||
261 | |||
262 | &comment(""); | ||
263 | $r="esi"; | ||
264 | $a="edi"; | ||
265 | $num="ebx"; | ||
266 | |||
267 | &mov($r,&wparam(0)); # | ||
268 | &mov($a,&wparam(1)); # | ||
269 | &mov($num,&wparam(2)); # | ||
270 | |||
271 | &and($num,0xfffffff8); # num / 8 | ||
272 | &jz(&label("sw_finish")); | ||
273 | |||
274 | &set_label("sw_loop",0); | ||
275 | for ($i=0; $i<32; $i+=4) | ||
276 | { | ||
277 | &comment("Round $i"); | ||
278 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
279 | # XXX | ||
280 | &mul("eax"); # *a * *a | ||
281 | &mov(&DWP($i*2,$r,"",0),"eax"); # | ||
282 | &mov(&DWP($i*2+4,$r,"",0),"edx");# | ||
283 | } | ||
284 | |||
285 | &comment(""); | ||
286 | &add($a,32); | ||
287 | &add($r,64); | ||
288 | &sub($num,8); | ||
289 | &jnz(&label("sw_loop")); | ||
290 | |||
291 | &set_label("sw_finish",0); | ||
292 | &mov($num,&wparam(2)); # get num | ||
293 | &and($num,7); | ||
294 | &jz(&label("sw_end")); | ||
295 | |||
296 | for ($i=0; $i<7; $i++) | ||
297 | { | ||
298 | &comment("Tail Round $i"); | ||
299 | &mov("eax",&DWP($i*4,$a,"",0)); # *a | ||
300 | # XXX | ||
301 | &mul("eax"); # *a * *a | ||
302 | &mov(&DWP($i*8,$r,"",0),"eax"); # | ||
303 | &dec($num) if ($i != 7-1); | ||
304 | &mov(&DWP($i*8+4,$r,"",0),"edx"); | ||
305 | &jz(&label("sw_end")) if ($i != 7-1); | ||
306 | } | ||
307 | &set_label("sw_end",0); | ||
308 | |||
309 | &function_end($name); | ||
310 | } | ||
311 | |||
312 | sub bn_div_words | ||
313 | { | ||
314 | local($name)=@_; | ||
315 | |||
316 | &function_begin($name,""); | ||
317 | &mov("edx",&wparam(0)); # | ||
318 | &mov("eax",&wparam(1)); # | ||
319 | &mov("ebx",&wparam(2)); # | ||
320 | &div("ebx"); | ||
321 | &function_end($name); | ||
322 | } | ||
323 | |||
324 | sub bn_add_words | ||
325 | { | ||
326 | local($name)=@_; | ||
327 | |||
328 | &function_begin($name,""); | ||
329 | |||
330 | &comment(""); | ||
331 | $a="esi"; | ||
332 | $b="edi"; | ||
333 | $c="eax"; | ||
334 | $r="ebx"; | ||
335 | $tmp1="ecx"; | ||
336 | $tmp2="edx"; | ||
337 | $num="ebp"; | ||
338 | |||
339 | &mov($r,&wparam(0)); # get r | ||
340 | &mov($a,&wparam(1)); # get a | ||
341 | &mov($b,&wparam(2)); # get b | ||
342 | &mov($num,&wparam(3)); # get num | ||
343 | &xor($c,$c); # clear carry | ||
344 | &and($num,0xfffffff8); # num / 8 | ||
345 | |||
346 | &jz(&label("aw_finish")); | ||
347 | |||
348 | &set_label("aw_loop",0); | ||
349 | for ($i=0; $i<8; $i++) | ||
350 | { | ||
351 | &comment("Round $i"); | ||
352 | |||
353 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
354 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
355 | &add($tmp1,$c); | ||
356 | &mov($c,0); | ||
357 | &adc($c,$c); | ||
358 | &add($tmp1,$tmp2); | ||
359 | &adc($c,0); | ||
360 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
361 | } | ||
362 | |||
363 | &comment(""); | ||
364 | &add($a,32); | ||
365 | &add($b,32); | ||
366 | &add($r,32); | ||
367 | &sub($num,8); | ||
368 | &jnz(&label("aw_loop")); | ||
369 | |||
370 | &set_label("aw_finish",0); | ||
371 | &mov($num,&wparam(3)); # get num | ||
372 | &and($num,7); | ||
373 | &jz(&label("aw_end")); | ||
374 | |||
375 | for ($i=0; $i<7; $i++) | ||
376 | { | ||
377 | &comment("Tail Round $i"); | ||
378 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
379 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
380 | &add($tmp1,$c); | ||
381 | &mov($c,0); | ||
382 | &adc($c,$c); | ||
383 | &add($tmp1,$tmp2); | ||
384 | &adc($c,0); | ||
385 | &dec($num) if ($i != 6); | ||
386 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
387 | &jz(&label("aw_end")) if ($i != 6); | ||
388 | } | ||
389 | &set_label("aw_end",0); | ||
390 | |||
391 | # &mov("eax",$c); # $c is "eax" | ||
392 | |||
393 | &function_end($name); | ||
394 | } | ||
395 | |||
396 | sub bn_sub_words | ||
397 | { | ||
398 | local($name)=@_; | ||
399 | |||
400 | &function_begin($name,""); | ||
401 | |||
402 | &comment(""); | ||
403 | $a="esi"; | ||
404 | $b="edi"; | ||
405 | $c="eax"; | ||
406 | $r="ebx"; | ||
407 | $tmp1="ecx"; | ||
408 | $tmp2="edx"; | ||
409 | $num="ebp"; | ||
410 | |||
411 | &mov($r,&wparam(0)); # get r | ||
412 | &mov($a,&wparam(1)); # get a | ||
413 | &mov($b,&wparam(2)); # get b | ||
414 | &mov($num,&wparam(3)); # get num | ||
415 | &xor($c,$c); # clear carry | ||
416 | &and($num,0xfffffff8); # num / 8 | ||
417 | |||
418 | &jz(&label("aw_finish")); | ||
419 | |||
420 | &set_label("aw_loop",0); | ||
421 | for ($i=0; $i<8; $i++) | ||
422 | { | ||
423 | &comment("Round $i"); | ||
424 | |||
425 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
426 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
427 | &sub($tmp1,$c); | ||
428 | &mov($c,0); | ||
429 | &adc($c,$c); | ||
430 | &sub($tmp1,$tmp2); | ||
431 | &adc($c,0); | ||
432 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
433 | } | ||
434 | |||
435 | &comment(""); | ||
436 | &add($a,32); | ||
437 | &add($b,32); | ||
438 | &add($r,32); | ||
439 | &sub($num,8); | ||
440 | &jnz(&label("aw_loop")); | ||
441 | |||
442 | &set_label("aw_finish",0); | ||
443 | &mov($num,&wparam(3)); # get num | ||
444 | &and($num,7); | ||
445 | &jz(&label("aw_end")); | ||
446 | |||
447 | for ($i=0; $i<7; $i++) | ||
448 | { | ||
449 | &comment("Tail Round $i"); | ||
450 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
451 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
452 | &sub($tmp1,$c); | ||
453 | &mov($c,0); | ||
454 | &adc($c,$c); | ||
455 | &sub($tmp1,$tmp2); | ||
456 | &adc($c,0); | ||
457 | &dec($num) if ($i != 6); | ||
458 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
459 | &jz(&label("aw_end")) if ($i != 6); | ||
460 | } | ||
461 | &set_label("aw_end",0); | ||
462 | |||
463 | # &mov("eax",$c); # $c is "eax" | ||
464 | |||
465 | &function_end($name); | ||
466 | } | ||
467 | |||
468 | sub bn_sub_part_words | ||
469 | { | ||
470 | local($name)=@_; | ||
471 | |||
472 | &function_begin($name,""); | ||
473 | |||
474 | &comment(""); | ||
475 | $a="esi"; | ||
476 | $b="edi"; | ||
477 | $c="eax"; | ||
478 | $r="ebx"; | ||
479 | $tmp1="ecx"; | ||
480 | $tmp2="edx"; | ||
481 | $num="ebp"; | ||
482 | |||
483 | &mov($r,&wparam(0)); # get r | ||
484 | &mov($a,&wparam(1)); # get a | ||
485 | &mov($b,&wparam(2)); # get b | ||
486 | &mov($num,&wparam(3)); # get num | ||
487 | &xor($c,$c); # clear carry | ||
488 | &and($num,0xfffffff8); # num / 8 | ||
489 | |||
490 | &jz(&label("aw_finish")); | ||
491 | |||
492 | &set_label("aw_loop",0); | ||
493 | for ($i=0; $i<8; $i++) | ||
494 | { | ||
495 | &comment("Round $i"); | ||
496 | |||
497 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
498 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
499 | &sub($tmp1,$c); | ||
500 | &mov($c,0); | ||
501 | &adc($c,$c); | ||
502 | &sub($tmp1,$tmp2); | ||
503 | &adc($c,0); | ||
504 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
505 | } | ||
506 | |||
507 | &comment(""); | ||
508 | &add($a,32); | ||
509 | &add($b,32); | ||
510 | &add($r,32); | ||
511 | &sub($num,8); | ||
512 | &jnz(&label("aw_loop")); | ||
513 | |||
514 | &set_label("aw_finish",0); | ||
515 | &mov($num,&wparam(3)); # get num | ||
516 | &and($num,7); | ||
517 | &jz(&label("aw_end")); | ||
518 | |||
519 | for ($i=0; $i<7; $i++) | ||
520 | { | ||
521 | &comment("Tail Round $i"); | ||
522 | &mov($tmp1,&DWP(0,$a,"",0)); # *a | ||
523 | &mov($tmp2,&DWP(0,$b,"",0));# *b | ||
524 | &sub($tmp1,$c); | ||
525 | &mov($c,0); | ||
526 | &adc($c,$c); | ||
527 | &sub($tmp1,$tmp2); | ||
528 | &adc($c,0); | ||
529 | &mov(&DWP(0,$r,"",0),$tmp1); # *r | ||
530 | &add($a, 4); | ||
531 | &add($b, 4); | ||
532 | &add($r, 4); | ||
533 | &dec($num) if ($i != 6); | ||
534 | &jz(&label("aw_end")) if ($i != 6); | ||
535 | } | ||
536 | &set_label("aw_end",0); | ||
537 | |||
538 | &cmp(&wparam(4),0); | ||
539 | &je(&label("pw_end")); | ||
540 | |||
541 | &mov($num,&wparam(4)); # get dl | ||
542 | &cmp($num,0); | ||
543 | &je(&label("pw_end")); | ||
544 | &jge(&label("pw_pos")); | ||
545 | |||
546 | &comment("pw_neg"); | ||
547 | &mov($tmp2,0); | ||
548 | &sub($tmp2,$num); | ||
549 | &mov($num,$tmp2); | ||
550 | &and($num,0xfffffff8); # num / 8 | ||
551 | &jz(&label("pw_neg_finish")); | ||
552 | |||
553 | &set_label("pw_neg_loop",0); | ||
554 | for ($i=0; $i<8; $i++) | ||
555 | { | ||
556 | &comment("dl<0 Round $i"); | ||
557 | |||
558 | &mov($tmp1,0); | ||
559 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
560 | &sub($tmp1,$c); | ||
561 | &mov($c,0); | ||
562 | &adc($c,$c); | ||
563 | &sub($tmp1,$tmp2); | ||
564 | &adc($c,0); | ||
565 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
566 | } | ||
567 | |||
568 | &comment(""); | ||
569 | &add($b,32); | ||
570 | &add($r,32); | ||
571 | &sub($num,8); | ||
572 | &jnz(&label("pw_neg_loop")); | ||
573 | |||
574 | &set_label("pw_neg_finish",0); | ||
575 | &mov($tmp2,&wparam(4)); # get dl | ||
576 | &mov($num,0); | ||
577 | &sub($num,$tmp2); | ||
578 | &and($num,7); | ||
579 | &jz(&label("pw_end")); | ||
580 | |||
581 | for ($i=0; $i<7; $i++) | ||
582 | { | ||
583 | &comment("dl<0 Tail Round $i"); | ||
584 | &mov($tmp1,0); | ||
585 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
586 | &sub($tmp1,$c); | ||
587 | &mov($c,0); | ||
588 | &adc($c,$c); | ||
589 | &sub($tmp1,$tmp2); | ||
590 | &adc($c,0); | ||
591 | &dec($num) if ($i != 6); | ||
592 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
593 | &jz(&label("pw_end")) if ($i != 6); | ||
594 | } | ||
595 | |||
596 | &jmp(&label("pw_end")); | ||
597 | |||
598 | &set_label("pw_pos",0); | ||
599 | |||
600 | &and($num,0xfffffff8); # num / 8 | ||
601 | &jz(&label("pw_pos_finish")); | ||
602 | |||
603 | &set_label("pw_pos_loop",0); | ||
604 | |||
605 | for ($i=0; $i<8; $i++) | ||
606 | { | ||
607 | &comment("dl>0 Round $i"); | ||
608 | |||
609 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
610 | &sub($tmp1,$c); | ||
611 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
612 | &jnc(&label("pw_nc".$i)); | ||
613 | } | ||
614 | |||
615 | &comment(""); | ||
616 | &add($a,32); | ||
617 | &add($r,32); | ||
618 | &sub($num,8); | ||
619 | &jnz(&label("pw_pos_loop")); | ||
620 | |||
621 | &set_label("pw_pos_finish",0); | ||
622 | &mov($num,&wparam(4)); # get dl | ||
623 | &and($num,7); | ||
624 | &jz(&label("pw_end")); | ||
625 | |||
626 | for ($i=0; $i<7; $i++) | ||
627 | { | ||
628 | &comment("dl>0 Tail Round $i"); | ||
629 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
630 | &sub($tmp1,$c); | ||
631 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
632 | &jnc(&label("pw_tail_nc".$i)); | ||
633 | &dec($num) if ($i != 6); | ||
634 | &jz(&label("pw_end")) if ($i != 6); | ||
635 | } | ||
636 | &mov($c,1); | ||
637 | &jmp(&label("pw_end")); | ||
638 | |||
639 | &set_label("pw_nc_loop",0); | ||
640 | for ($i=0; $i<8; $i++) | ||
641 | { | ||
642 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
643 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
644 | &set_label("pw_nc".$i,0); | ||
645 | } | ||
646 | |||
647 | &comment(""); | ||
648 | &add($a,32); | ||
649 | &add($r,32); | ||
650 | &sub($num,8); | ||
651 | &jnz(&label("pw_nc_loop")); | ||
652 | |||
653 | &mov($num,&wparam(4)); # get dl | ||
654 | &and($num,7); | ||
655 | &jz(&label("pw_nc_end")); | ||
656 | |||
657 | for ($i=0; $i<7; $i++) | ||
658 | { | ||
659 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
660 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
661 | &set_label("pw_tail_nc".$i,0); | ||
662 | &dec($num) if ($i != 6); | ||
663 | &jz(&label("pw_nc_end")) if ($i != 6); | ||
664 | } | ||
665 | |||
666 | &set_label("pw_nc_end",0); | ||
667 | &mov($c,0); | ||
668 | |||
669 | &set_label("pw_end",0); | ||
670 | |||
671 | # &mov("eax",$c); # $c is "eax" | ||
672 | |||
673 | &function_end($name); | ||
674 | } | ||
675 | |||