diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/bn-586.pl')
-rw-r--r-- | src/lib/libcrypto/bn/asm/bn-586.pl | 567 |
1 files changed, 0 insertions, 567 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl deleted file mode 100644 index 71b775af8d..0000000000 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ /dev/null | |||
@@ -1,567 +0,0 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | |||
3 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
4 | push(@INC,"${dir}","${dir}../../perlasm"); | ||
5 | require "x86asm.pl"; | ||
6 | |||
7 | &asm_init($ARGV[0],$0); | ||
8 | |||
9 | $sse2=0; | ||
10 | for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | ||
11 | |||
12 | &external_label("OPENSSL_ia32cap_P") if ($sse2); | ||
13 | |||
14 | &bn_mul_add_words("bn_mul_add_words"); | ||
15 | &bn_mul_words("bn_mul_words"); | ||
16 | &bn_sqr_words("bn_sqr_words"); | ||
17 | &bn_div_words("bn_div_words"); | ||
18 | &bn_add_words("bn_add_words"); | ||
19 | &bn_sub_words("bn_sub_words"); | ||
20 | |||
21 | &asm_finish(); | ||
22 | |||
23 | sub bn_mul_add_words | ||
24 | { | ||
25 | local($name)=@_; | ||
26 | |||
27 | &function_begin_B($name,""); | ||
28 | |||
29 | $r="eax"; | ||
30 | $a="edx"; | ||
31 | $c="ecx"; | ||
32 | |||
33 | if ($sse2) { | ||
34 | &picsetup("eax"); | ||
35 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
36 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | ||
37 | &jnc(&label("maw_non_sse2")); | ||
38 | |||
39 | &mov($r,&wparam(0)); | ||
40 | &mov($a,&wparam(1)); | ||
41 | &mov($c,&wparam(2)); | ||
42 | &movd("mm0",&wparam(3)); # mm0 = w | ||
43 | &pxor("mm1","mm1"); # mm1 = carry_in | ||
44 | &jmp(&label("maw_sse2_entry")); | ||
45 | |||
46 | &set_label("maw_sse2_unrolled",16); | ||
47 | &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] | ||
48 | &paddq("mm1","mm3"); # mm1 = carry_in + r[0] | ||
49 | &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] | ||
50 | &pmuludq("mm2","mm0"); # mm2 = w*a[0] | ||
51 | &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] | ||
52 | &pmuludq("mm4","mm0"); # mm4 = w*a[1] | ||
53 | &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] | ||
54 | &pmuludq("mm6","mm0"); # mm6 = w*a[2] | ||
55 | &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] | ||
56 | &pmuludq("mm7","mm0"); # mm7 = w*a[3] | ||
57 | &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] | ||
58 | &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] | ||
59 | &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] | ||
60 | &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] | ||
61 | &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] | ||
62 | &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] | ||
63 | &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] | ||
64 | &movd(&DWP(0,$r,"",0),"mm1"); | ||
65 | &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] | ||
66 | &pmuludq("mm2","mm0"); # mm2 = w*a[4] | ||
67 | &psrlq("mm1",32); # mm1 = carry0 | ||
68 | &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] | ||
69 | &pmuludq("mm4","mm0"); # mm4 = w*a[5] | ||
70 | &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] | ||
71 | &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] | ||
72 | &pmuludq("mm6","mm0"); # mm6 = w*a[6] | ||
73 | &movd(&DWP(4,$r,"",0),"mm1"); | ||
74 | &psrlq("mm1",32); # mm1 = carry1 | ||
75 | &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] | ||
76 | &add($a,32); | ||
77 | &pmuludq("mm3","mm0"); # mm3 = w*a[7] | ||
78 | &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] | ||
79 | &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] | ||
80 | &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] | ||
81 | &movd(&DWP(8,$r,"",0),"mm1"); | ||
82 | &psrlq("mm1",32); # mm1 = carry2 | ||
83 | &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] | ||
84 | &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] | ||
85 | &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] | ||
86 | &movd(&DWP(12,$r,"",0),"mm1"); | ||
87 | &psrlq("mm1",32); # mm1 = carry3 | ||
88 | &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] | ||
89 | &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] | ||
90 | &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] | ||
91 | &movd(&DWP(16,$r,"",0),"mm1"); | ||
92 | &psrlq("mm1",32); # mm1 = carry4 | ||
93 | &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] | ||
94 | &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] | ||
95 | &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] | ||
96 | &movd(&DWP(20,$r,"",0),"mm1"); | ||
97 | &psrlq("mm1",32); # mm1 = carry5 | ||
98 | &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] | ||
99 | &movd(&DWP(24,$r,"",0),"mm1"); | ||
100 | &psrlq("mm1",32); # mm1 = carry6 | ||
101 | &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] | ||
102 | &movd(&DWP(28,$r,"",0),"mm1"); | ||
103 | &lea($r,&DWP(32,$r)); | ||
104 | &psrlq("mm1",32); # mm1 = carry_out | ||
105 | |||
106 | &sub($c,8); | ||
107 | &jz(&label("maw_sse2_exit")); | ||
108 | &set_label("maw_sse2_entry"); | ||
109 | &test($c,0xfffffff8); | ||
110 | &jnz(&label("maw_sse2_unrolled")); | ||
111 | |||
112 | &set_label("maw_sse2_loop",4); | ||
113 | &movd("mm2",&DWP(0,$a)); # mm2 = a[i] | ||
114 | &movd("mm3",&DWP(0,$r)); # mm3 = r[i] | ||
115 | &pmuludq("mm2","mm0"); # a[i] *= w | ||
116 | &lea($a,&DWP(4,$a)); | ||
117 | &paddq("mm1","mm3"); # carry += r[i] | ||
118 | &paddq("mm1","mm2"); # carry += a[i]*w | ||
119 | &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low | ||
120 | &sub($c,1); | ||
121 | &psrlq("mm1",32); # carry = carry_high | ||
122 | &lea($r,&DWP(4,$r)); | ||
123 | &jnz(&label("maw_sse2_loop")); | ||
124 | &set_label("maw_sse2_exit"); | ||
125 | &movd("eax","mm1"); # c = carry_out | ||
126 | &emms(); | ||
127 | &ret(); | ||
128 | |||
129 | &set_label("maw_non_sse2",16); | ||
130 | } | ||
131 | |||
132 | # function_begin prologue | ||
133 | &push("ebp"); | ||
134 | &push("ebx"); | ||
135 | &push("esi"); | ||
136 | &push("edi"); | ||
137 | |||
138 | &comment(""); | ||
139 | $Low="eax"; | ||
140 | $High="edx"; | ||
141 | $a="ebx"; | ||
142 | $w="ebp"; | ||
143 | $r="edi"; | ||
144 | $c="esi"; | ||
145 | |||
146 | &xor($c,$c); # clear carry | ||
147 | &mov($r,&wparam(0)); # | ||
148 | |||
149 | &mov("ecx",&wparam(2)); # | ||
150 | &mov($a,&wparam(1)); # | ||
151 | |||
152 | &and("ecx",0xfffffff8); # num / 8 | ||
153 | &mov($w,&wparam(3)); # | ||
154 | |||
155 | &push("ecx"); # Up the stack for a tmp variable | ||
156 | |||
157 | &jz(&label("maw_finish")); | ||
158 | |||
159 | &set_label("maw_loop",16); | ||
160 | |||
161 | for ($i=0; $i<32; $i+=4) | ||
162 | { | ||
163 | &comment("Round $i"); | ||
164 | |||
165 | &mov("eax",&DWP($i,$a)); # *a | ||
166 | &mul($w); # *a * w | ||
167 | &add("eax",$c); # L(t)+= c | ||
168 | &adc("edx",0); # H(t)+=carry | ||
169 | &add("eax",&DWP($i,$r)); # L(t)+= *r | ||
170 | &adc("edx",0); # H(t)+=carry | ||
171 | &mov(&DWP($i,$r),"eax"); # *r= L(t); | ||
172 | &mov($c,"edx"); # c= H(t); | ||
173 | } | ||
174 | |||
175 | &comment(""); | ||
176 | &sub("ecx",8); | ||
177 | &lea($a,&DWP(32,$a)); | ||
178 | &lea($r,&DWP(32,$r)); | ||
179 | &jnz(&label("maw_loop")); | ||
180 | |||
181 | &set_label("maw_finish",0); | ||
182 | &mov("ecx",&wparam(2)); # get num | ||
183 | &and("ecx",7); | ||
184 | &jnz(&label("maw_finish2")); # helps branch prediction | ||
185 | &jmp(&label("maw_end")); | ||
186 | |||
187 | &set_label("maw_finish2",1); | ||
188 | for ($i=0; $i<7; $i++) | ||
189 | { | ||
190 | &comment("Tail Round $i"); | ||
191 | &mov("eax",&DWP($i*4,$a)); # *a | ||
192 | &mul($w); # *a * w | ||
193 | &add("eax",$c); # L(t)+=c | ||
194 | &adc("edx",0); # H(t)+=carry | ||
195 | &add("eax",&DWP($i*4,$r)); # L(t)+= *r | ||
196 | &adc("edx",0); # H(t)+=carry | ||
197 | &dec("ecx") if ($i != 7-1); | ||
198 | &mov(&DWP($i*4,$r),"eax"); # *r= L(t); | ||
199 | &mov($c,"edx"); # c= H(t); | ||
200 | &jz(&label("maw_end")) if ($i != 7-1); | ||
201 | } | ||
202 | &set_label("maw_end",0); | ||
203 | &mov("eax",$c); | ||
204 | |||
205 | &pop("ecx"); # clear variable from | ||
206 | |||
207 | &function_end($name); | ||
208 | } | ||
209 | |||
210 | sub bn_mul_words | ||
211 | { | ||
212 | local($name)=@_; | ||
213 | |||
214 | &function_begin_B($name,""); | ||
215 | |||
216 | $r="eax"; | ||
217 | $a="edx"; | ||
218 | $c="ecx"; | ||
219 | |||
220 | if ($sse2) { | ||
221 | &picsetup("eax"); | ||
222 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
223 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | ||
224 | &jnc(&label("mw_non_sse2")); | ||
225 | |||
226 | &mov($r,&wparam(0)); | ||
227 | &mov($a,&wparam(1)); | ||
228 | &mov($c,&wparam(2)); | ||
229 | &movd("mm0",&wparam(3)); # mm0 = w | ||
230 | &pxor("mm1","mm1"); # mm1 = carry = 0 | ||
231 | |||
232 | &set_label("mw_sse2_loop",16); | ||
233 | &movd("mm2",&DWP(0,$a)); # mm2 = a[i] | ||
234 | &pmuludq("mm2","mm0"); # a[i] *= w | ||
235 | &lea($a,&DWP(4,$a)); | ||
236 | &paddq("mm1","mm2"); # carry += a[i]*w | ||
237 | &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low | ||
238 | &sub($c,1); | ||
239 | &psrlq("mm1",32); # carry = carry_high | ||
240 | &lea($r,&DWP(4,$r)); | ||
241 | &jnz(&label("mw_sse2_loop")); | ||
242 | |||
243 | &movd("eax","mm1"); # return carry | ||
244 | &emms(); | ||
245 | &ret(); | ||
246 | &set_label("mw_non_sse2",16); | ||
247 | } | ||
248 | |||
249 | # function_begin prologue | ||
250 | &push("ebp"); | ||
251 | &push("ebx"); | ||
252 | &push("esi"); | ||
253 | &push("edi"); | ||
254 | |||
255 | &comment(""); | ||
256 | $Low="eax"; | ||
257 | $High="edx"; | ||
258 | $a="ebx"; | ||
259 | $w="ecx"; | ||
260 | $r="edi"; | ||
261 | $c="esi"; | ||
262 | $num="ebp"; | ||
263 | |||
264 | &xor($c,$c); # clear carry | ||
265 | &mov($r,&wparam(0)); # | ||
266 | &mov($a,&wparam(1)); # | ||
267 | &mov($num,&wparam(2)); # | ||
268 | &mov($w,&wparam(3)); # | ||
269 | |||
270 | &and($num,0xfffffff8); # num / 8 | ||
271 | &jz(&label("mw_finish")); | ||
272 | |||
273 | &set_label("mw_loop",0); | ||
274 | for ($i=0; $i<32; $i+=4) | ||
275 | { | ||
276 | &comment("Round $i"); | ||
277 | |||
278 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
279 | &mul($w); # *a * w | ||
280 | &add("eax",$c); # L(t)+=c | ||
281 | # XXX | ||
282 | |||
283 | &adc("edx",0); # H(t)+=carry | ||
284 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
285 | |||
286 | &mov($c,"edx"); # c= H(t); | ||
287 | } | ||
288 | |||
289 | &comment(""); | ||
290 | &add($a,32); | ||
291 | &add($r,32); | ||
292 | &sub($num,8); | ||
293 | &jz(&label("mw_finish")); | ||
294 | &jmp(&label("mw_loop")); | ||
295 | |||
296 | &set_label("mw_finish",0); | ||
297 | &mov($num,&wparam(2)); # get num | ||
298 | &and($num,7); | ||
299 | &jnz(&label("mw_finish2")); | ||
300 | &jmp(&label("mw_end")); | ||
301 | |||
302 | &set_label("mw_finish2",1); | ||
303 | for ($i=0; $i<7; $i++) | ||
304 | { | ||
305 | &comment("Tail Round $i"); | ||
306 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
307 | &mul($w); # *a * w | ||
308 | &add("eax",$c); # L(t)+=c | ||
309 | # XXX | ||
310 | &adc("edx",0); # H(t)+=carry | ||
311 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); | ||
312 | &mov($c,"edx"); # c= H(t); | ||
313 | &dec($num) if ($i != 7-1); | ||
314 | &jz(&label("mw_end")) if ($i != 7-1); | ||
315 | } | ||
316 | &set_label("mw_end",0); | ||
317 | &mov("eax",$c); | ||
318 | |||
319 | &function_end($name); | ||
320 | } | ||
321 | |||
322 | sub bn_sqr_words | ||
323 | { | ||
324 | local($name)=@_; | ||
325 | |||
326 | &function_begin_B($name,""); | ||
327 | |||
328 | $r="eax"; | ||
329 | $a="edx"; | ||
330 | $c="ecx"; | ||
331 | |||
332 | if ($sse2) { | ||
333 | &picsetup("eax"); | ||
334 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
335 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | ||
336 | &jnc(&label("sqr_non_sse2")); | ||
337 | |||
338 | &mov($r,&wparam(0)); | ||
339 | &mov($a,&wparam(1)); | ||
340 | &mov($c,&wparam(2)); | ||
341 | |||
342 | &set_label("sqr_sse2_loop",16); | ||
343 | &movd("mm0",&DWP(0,$a)); # mm0 = a[i] | ||
344 | &pmuludq("mm0","mm0"); # a[i] *= a[i] | ||
345 | &lea($a,&DWP(4,$a)); # a++ | ||
346 | &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] | ||
347 | &sub($c,1); | ||
348 | &lea($r,&DWP(8,$r)); # r += 2 | ||
349 | &jnz(&label("sqr_sse2_loop")); | ||
350 | |||
351 | &emms(); | ||
352 | &ret(); | ||
353 | &set_label("sqr_non_sse2",16); | ||
354 | } | ||
355 | |||
356 | # function_begin prologue | ||
357 | &push("ebp"); | ||
358 | &push("ebx"); | ||
359 | &push("esi"); | ||
360 | &push("edi"); | ||
361 | |||
362 | &comment(""); | ||
363 | $r="esi"; | ||
364 | $a="edi"; | ||
365 | $num="ebx"; | ||
366 | |||
367 | &mov($r,&wparam(0)); # | ||
368 | &mov($a,&wparam(1)); # | ||
369 | &mov($num,&wparam(2)); # | ||
370 | |||
371 | &and($num,0xfffffff8); # num / 8 | ||
372 | &jz(&label("sw_finish")); | ||
373 | |||
374 | &set_label("sw_loop",0); | ||
375 | for ($i=0; $i<32; $i+=4) | ||
376 | { | ||
377 | &comment("Round $i"); | ||
378 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
379 | # XXX | ||
380 | &mul("eax"); # *a * *a | ||
381 | &mov(&DWP($i*2,$r,"",0),"eax"); # | ||
382 | &mov(&DWP($i*2+4,$r,"",0),"edx");# | ||
383 | } | ||
384 | |||
385 | &comment(""); | ||
386 | &add($a,32); | ||
387 | &add($r,64); | ||
388 | &sub($num,8); | ||
389 | &jnz(&label("sw_loop")); | ||
390 | |||
391 | &set_label("sw_finish",0); | ||
392 | &mov($num,&wparam(2)); # get num | ||
393 | &and($num,7); | ||
394 | &jz(&label("sw_end")); | ||
395 | |||
396 | for ($i=0; $i<7; $i++) | ||
397 | { | ||
398 | &comment("Tail Round $i"); | ||
399 | &mov("eax",&DWP($i*4,$a,"",0)); # *a | ||
400 | # XXX | ||
401 | &mul("eax"); # *a * *a | ||
402 | &mov(&DWP($i*8,$r,"",0),"eax"); # | ||
403 | &dec($num) if ($i != 7-1); | ||
404 | &mov(&DWP($i*8+4,$r,"",0),"edx"); | ||
405 | &jz(&label("sw_end")) if ($i != 7-1); | ||
406 | } | ||
407 | &set_label("sw_end",0); | ||
408 | |||
409 | &function_end($name); | ||
410 | } | ||
411 | |||
412 | sub bn_div_words | ||
413 | { | ||
414 | local($name)=@_; | ||
415 | |||
416 | &function_begin_B($name,""); | ||
417 | &mov("edx",&wparam(0)); # | ||
418 | &mov("eax",&wparam(1)); # | ||
419 | &mov("ecx",&wparam(2)); # | ||
420 | &div("ecx"); | ||
421 | &ret(); | ||
422 | &function_end_B($name); | ||
423 | } | ||
424 | |||
425 | sub bn_add_words | ||
426 | { | ||
427 | local($name)=@_; | ||
428 | |||
429 | &function_begin($name,""); | ||
430 | |||
431 | &comment(""); | ||
432 | $a="esi"; | ||
433 | $b="edi"; | ||
434 | $c="eax"; | ||
435 | $r="ebx"; | ||
436 | $tmp1="ecx"; | ||
437 | $tmp2="edx"; | ||
438 | $num="ebp"; | ||
439 | |||
440 | &mov($r,&wparam(0)); # get r | ||
441 | &mov($a,&wparam(1)); # get a | ||
442 | &mov($b,&wparam(2)); # get b | ||
443 | &mov($num,&wparam(3)); # get num | ||
444 | &xor($c,$c); # clear carry | ||
445 | &and($num,0xfffffff8); # num / 8 | ||
446 | |||
447 | &jz(&label("aw_finish")); | ||
448 | |||
449 | &set_label("aw_loop",0); | ||
450 | for ($i=0; $i<8; $i++) | ||
451 | { | ||
452 | &comment("Round $i"); | ||
453 | |||
454 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
455 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
456 | &add($tmp1,$c); | ||
457 | &mov($c,0); | ||
458 | &adc($c,$c); | ||
459 | &add($tmp1,$tmp2); | ||
460 | &adc($c,0); | ||
461 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
462 | } | ||
463 | |||
464 | &comment(""); | ||
465 | &add($a,32); | ||
466 | &add($b,32); | ||
467 | &add($r,32); | ||
468 | &sub($num,8); | ||
469 | &jnz(&label("aw_loop")); | ||
470 | |||
471 | &set_label("aw_finish",0); | ||
472 | &mov($num,&wparam(3)); # get num | ||
473 | &and($num,7); | ||
474 | &jz(&label("aw_end")); | ||
475 | |||
476 | for ($i=0; $i<7; $i++) | ||
477 | { | ||
478 | &comment("Tail Round $i"); | ||
479 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
480 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
481 | &add($tmp1,$c); | ||
482 | &mov($c,0); | ||
483 | &adc($c,$c); | ||
484 | &add($tmp1,$tmp2); | ||
485 | &adc($c,0); | ||
486 | &dec($num) if ($i != 6); | ||
487 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
488 | &jz(&label("aw_end")) if ($i != 6); | ||
489 | } | ||
490 | &set_label("aw_end",0); | ||
491 | |||
492 | # &mov("eax",$c); # $c is "eax" | ||
493 | |||
494 | &function_end($name); | ||
495 | } | ||
496 | |||
497 | sub bn_sub_words | ||
498 | { | ||
499 | local($name)=@_; | ||
500 | |||
501 | &function_begin($name,""); | ||
502 | |||
503 | &comment(""); | ||
504 | $a="esi"; | ||
505 | $b="edi"; | ||
506 | $c="eax"; | ||
507 | $r="ebx"; | ||
508 | $tmp1="ecx"; | ||
509 | $tmp2="edx"; | ||
510 | $num="ebp"; | ||
511 | |||
512 | &mov($r,&wparam(0)); # get r | ||
513 | &mov($a,&wparam(1)); # get a | ||
514 | &mov($b,&wparam(2)); # get b | ||
515 | &mov($num,&wparam(3)); # get num | ||
516 | &xor($c,$c); # clear carry | ||
517 | &and($num,0xfffffff8); # num / 8 | ||
518 | |||
519 | &jz(&label("aw_finish")); | ||
520 | |||
521 | &set_label("aw_loop",0); | ||
522 | for ($i=0; $i<8; $i++) | ||
523 | { | ||
524 | &comment("Round $i"); | ||
525 | |||
526 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
527 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
528 | &sub($tmp1,$c); | ||
529 | &mov($c,0); | ||
530 | &adc($c,$c); | ||
531 | &sub($tmp1,$tmp2); | ||
532 | &adc($c,0); | ||
533 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
534 | } | ||
535 | |||
536 | &comment(""); | ||
537 | &add($a,32); | ||
538 | &add($b,32); | ||
539 | &add($r,32); | ||
540 | &sub($num,8); | ||
541 | &jnz(&label("aw_loop")); | ||
542 | |||
543 | &set_label("aw_finish",0); | ||
544 | &mov($num,&wparam(3)); # get num | ||
545 | &and($num,7); | ||
546 | &jz(&label("aw_end")); | ||
547 | |||
548 | for ($i=0; $i<7; $i++) | ||
549 | { | ||
550 | &comment("Tail Round $i"); | ||
551 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
552 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
553 | &sub($tmp1,$c); | ||
554 | &mov($c,0); | ||
555 | &adc($c,$c); | ||
556 | &sub($tmp1,$tmp2); | ||
557 | &adc($c,0); | ||
558 | &dec($num) if ($i != 6); | ||
559 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
560 | &jz(&label("aw_end")) if ($i != 6); | ||
561 | } | ||
562 | &set_label("aw_end",0); | ||
563 | |||
564 | # &mov("eax",$c); # $c is "eax" | ||
565 | |||
566 | &function_end($name); | ||
567 | } | ||