summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/bn-586.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/bn-586.pl')
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl567
1 files changed, 0 insertions, 567 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
deleted file mode 100644
index 71b775af8d..0000000000
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ /dev/null
@@ -1,567 +0,0 @@
1#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],$0);
8
9$sse2=0;
10for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
11
12&external_label("OPENSSL_ia32cap_P") if ($sse2);
13
14&bn_mul_add_words("bn_mul_add_words");
15&bn_mul_words("bn_mul_words");
16&bn_sqr_words("bn_sqr_words");
17&bn_div_words("bn_div_words");
18&bn_add_words("bn_add_words");
19&bn_sub_words("bn_sub_words");
20
21&asm_finish();
22
23sub bn_mul_add_words
24 {
25 local($name)=@_;
26
27 &function_begin_B($name,"");
28
29 $r="eax";
30 $a="edx";
31 $c="ecx";
32
33 if ($sse2) {
34 &picsetup("eax");
35 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
36 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
37 &jnc(&label("maw_non_sse2"));
38
39 &mov($r,&wparam(0));
40 &mov($a,&wparam(1));
41 &mov($c,&wparam(2));
42 &movd("mm0",&wparam(3)); # mm0 = w
43 &pxor("mm1","mm1"); # mm1 = carry_in
44 &jmp(&label("maw_sse2_entry"));
45
46 &set_label("maw_sse2_unrolled",16);
47 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
48 &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
49 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
50 &pmuludq("mm2","mm0"); # mm2 = w*a[0]
51 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
52 &pmuludq("mm4","mm0"); # mm4 = w*a[1]
53 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
54 &pmuludq("mm6","mm0"); # mm6 = w*a[2]
55 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
56 &pmuludq("mm7","mm0"); # mm7 = w*a[3]
57 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
58 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
59 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
60 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
61 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
62 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
63 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
64 &movd(&DWP(0,$r,"",0),"mm1");
65 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
66 &pmuludq("mm2","mm0"); # mm2 = w*a[4]
67 &psrlq("mm1",32); # mm1 = carry0
68 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
69 &pmuludq("mm4","mm0"); # mm4 = w*a[5]
70 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
71 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
72 &pmuludq("mm6","mm0"); # mm6 = w*a[6]
73 &movd(&DWP(4,$r,"",0),"mm1");
74 &psrlq("mm1",32); # mm1 = carry1
75 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
76 &add($a,32);
77 &pmuludq("mm3","mm0"); # mm3 = w*a[7]
78 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
79 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
80 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
81 &movd(&DWP(8,$r,"",0),"mm1");
82 &psrlq("mm1",32); # mm1 = carry2
83 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
84 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
85 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
86 &movd(&DWP(12,$r,"",0),"mm1");
87 &psrlq("mm1",32); # mm1 = carry3
88 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
89 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
90 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
91 &movd(&DWP(16,$r,"",0),"mm1");
92 &psrlq("mm1",32); # mm1 = carry4
93 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
94 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
95 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
96 &movd(&DWP(20,$r,"",0),"mm1");
97 &psrlq("mm1",32); # mm1 = carry5
98 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
99 &movd(&DWP(24,$r,"",0),"mm1");
100 &psrlq("mm1",32); # mm1 = carry6
101 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
102 &movd(&DWP(28,$r,"",0),"mm1");
103 &lea($r,&DWP(32,$r));
104 &psrlq("mm1",32); # mm1 = carry_out
105
106 &sub($c,8);
107 &jz(&label("maw_sse2_exit"));
108 &set_label("maw_sse2_entry");
109 &test($c,0xfffffff8);
110 &jnz(&label("maw_sse2_unrolled"));
111
112 &set_label("maw_sse2_loop",4);
113 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
114 &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
115 &pmuludq("mm2","mm0"); # a[i] *= w
116 &lea($a,&DWP(4,$a));
117 &paddq("mm1","mm3"); # carry += r[i]
118 &paddq("mm1","mm2"); # carry += a[i]*w
119 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
120 &sub($c,1);
121 &psrlq("mm1",32); # carry = carry_high
122 &lea($r,&DWP(4,$r));
123 &jnz(&label("maw_sse2_loop"));
124 &set_label("maw_sse2_exit");
125 &movd("eax","mm1"); # c = carry_out
126 &emms();
127 &ret();
128
129 &set_label("maw_non_sse2",16);
130 }
131
132 # function_begin prologue
133 &push("ebp");
134 &push("ebx");
135 &push("esi");
136 &push("edi");
137
138 &comment("");
139 $Low="eax";
140 $High="edx";
141 $a="ebx";
142 $w="ebp";
143 $r="edi";
144 $c="esi";
145
146 &xor($c,$c); # clear carry
147 &mov($r,&wparam(0)); #
148
149 &mov("ecx",&wparam(2)); #
150 &mov($a,&wparam(1)); #
151
152 &and("ecx",0xfffffff8); # num / 8
153 &mov($w,&wparam(3)); #
154
155 &push("ecx"); # Up the stack for a tmp variable
156
157 &jz(&label("maw_finish"));
158
159 &set_label("maw_loop",16);
160
161 for ($i=0; $i<32; $i+=4)
162 {
163 &comment("Round $i");
164
165 &mov("eax",&DWP($i,$a)); # *a
166 &mul($w); # *a * w
167 &add("eax",$c); # L(t)+= c
168 &adc("edx",0); # H(t)+=carry
169 &add("eax",&DWP($i,$r)); # L(t)+= *r
170 &adc("edx",0); # H(t)+=carry
171 &mov(&DWP($i,$r),"eax"); # *r= L(t);
172 &mov($c,"edx"); # c= H(t);
173 }
174
175 &comment("");
176 &sub("ecx",8);
177 &lea($a,&DWP(32,$a));
178 &lea($r,&DWP(32,$r));
179 &jnz(&label("maw_loop"));
180
181 &set_label("maw_finish",0);
182 &mov("ecx",&wparam(2)); # get num
183 &and("ecx",7);
184 &jnz(&label("maw_finish2")); # helps branch prediction
185 &jmp(&label("maw_end"));
186
187 &set_label("maw_finish2",1);
188 for ($i=0; $i<7; $i++)
189 {
190 &comment("Tail Round $i");
191 &mov("eax",&DWP($i*4,$a)); # *a
192 &mul($w); # *a * w
193 &add("eax",$c); # L(t)+=c
194 &adc("edx",0); # H(t)+=carry
195 &add("eax",&DWP($i*4,$r)); # L(t)+= *r
196 &adc("edx",0); # H(t)+=carry
197 &dec("ecx") if ($i != 7-1);
198 &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
199 &mov($c,"edx"); # c= H(t);
200 &jz(&label("maw_end")) if ($i != 7-1);
201 }
202 &set_label("maw_end",0);
203 &mov("eax",$c);
204
205 &pop("ecx"); # clear variable from
206
207 &function_end($name);
208 }
209
210sub bn_mul_words
211 {
212 local($name)=@_;
213
214 &function_begin_B($name,"");
215
216 $r="eax";
217 $a="edx";
218 $c="ecx";
219
220 if ($sse2) {
221 &picsetup("eax");
222 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
223 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
224 &jnc(&label("mw_non_sse2"));
225
226 &mov($r,&wparam(0));
227 &mov($a,&wparam(1));
228 &mov($c,&wparam(2));
229 &movd("mm0",&wparam(3)); # mm0 = w
230 &pxor("mm1","mm1"); # mm1 = carry = 0
231
232 &set_label("mw_sse2_loop",16);
233 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
234 &pmuludq("mm2","mm0"); # a[i] *= w
235 &lea($a,&DWP(4,$a));
236 &paddq("mm1","mm2"); # carry += a[i]*w
237 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
238 &sub($c,1);
239 &psrlq("mm1",32); # carry = carry_high
240 &lea($r,&DWP(4,$r));
241 &jnz(&label("mw_sse2_loop"));
242
243 &movd("eax","mm1"); # return carry
244 &emms();
245 &ret();
246 &set_label("mw_non_sse2",16);
247 }
248
249 # function_begin prologue
250 &push("ebp");
251 &push("ebx");
252 &push("esi");
253 &push("edi");
254
255 &comment("");
256 $Low="eax";
257 $High="edx";
258 $a="ebx";
259 $w="ecx";
260 $r="edi";
261 $c="esi";
262 $num="ebp";
263
264 &xor($c,$c); # clear carry
265 &mov($r,&wparam(0)); #
266 &mov($a,&wparam(1)); #
267 &mov($num,&wparam(2)); #
268 &mov($w,&wparam(3)); #
269
270 &and($num,0xfffffff8); # num / 8
271 &jz(&label("mw_finish"));
272
273 &set_label("mw_loop",0);
274 for ($i=0; $i<32; $i+=4)
275 {
276 &comment("Round $i");
277
278 &mov("eax",&DWP($i,$a,"",0)); # *a
279 &mul($w); # *a * w
280 &add("eax",$c); # L(t)+=c
281 # XXX
282
283 &adc("edx",0); # H(t)+=carry
284 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
285
286 &mov($c,"edx"); # c= H(t);
287 }
288
289 &comment("");
290 &add($a,32);
291 &add($r,32);
292 &sub($num,8);
293 &jz(&label("mw_finish"));
294 &jmp(&label("mw_loop"));
295
296 &set_label("mw_finish",0);
297 &mov($num,&wparam(2)); # get num
298 &and($num,7);
299 &jnz(&label("mw_finish2"));
300 &jmp(&label("mw_end"));
301
302 &set_label("mw_finish2",1);
303 for ($i=0; $i<7; $i++)
304 {
305 &comment("Tail Round $i");
306 &mov("eax",&DWP($i*4,$a,"",0));# *a
307 &mul($w); # *a * w
308 &add("eax",$c); # L(t)+=c
309 # XXX
310 &adc("edx",0); # H(t)+=carry
311 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
312 &mov($c,"edx"); # c= H(t);
313 &dec($num) if ($i != 7-1);
314 &jz(&label("mw_end")) if ($i != 7-1);
315 }
316 &set_label("mw_end",0);
317 &mov("eax",$c);
318
319 &function_end($name);
320 }
321
322sub bn_sqr_words
323 {
324 local($name)=@_;
325
326 &function_begin_B($name,"");
327
328 $r="eax";
329 $a="edx";
330 $c="ecx";
331
332 if ($sse2) {
333 &picsetup("eax");
334 &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
335 &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
336 &jnc(&label("sqr_non_sse2"));
337
338 &mov($r,&wparam(0));
339 &mov($a,&wparam(1));
340 &mov($c,&wparam(2));
341
342 &set_label("sqr_sse2_loop",16);
343 &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
344 &pmuludq("mm0","mm0"); # a[i] *= a[i]
345 &lea($a,&DWP(4,$a)); # a++
346 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
347 &sub($c,1);
348 &lea($r,&DWP(8,$r)); # r += 2
349 &jnz(&label("sqr_sse2_loop"));
350
351 &emms();
352 &ret();
353 &set_label("sqr_non_sse2",16);
354 }
355
356 # function_begin prologue
357 &push("ebp");
358 &push("ebx");
359 &push("esi");
360 &push("edi");
361
362 &comment("");
363 $r="esi";
364 $a="edi";
365 $num="ebx";
366
367 &mov($r,&wparam(0)); #
368 &mov($a,&wparam(1)); #
369 &mov($num,&wparam(2)); #
370
371 &and($num,0xfffffff8); # num / 8
372 &jz(&label("sw_finish"));
373
374 &set_label("sw_loop",0);
375 for ($i=0; $i<32; $i+=4)
376 {
377 &comment("Round $i");
378 &mov("eax",&DWP($i,$a,"",0)); # *a
379 # XXX
380 &mul("eax"); # *a * *a
381 &mov(&DWP($i*2,$r,"",0),"eax"); #
382 &mov(&DWP($i*2+4,$r,"",0),"edx");#
383 }
384
385 &comment("");
386 &add($a,32);
387 &add($r,64);
388 &sub($num,8);
389 &jnz(&label("sw_loop"));
390
391 &set_label("sw_finish",0);
392 &mov($num,&wparam(2)); # get num
393 &and($num,7);
394 &jz(&label("sw_end"));
395
396 for ($i=0; $i<7; $i++)
397 {
398 &comment("Tail Round $i");
399 &mov("eax",&DWP($i*4,$a,"",0)); # *a
400 # XXX
401 &mul("eax"); # *a * *a
402 &mov(&DWP($i*8,$r,"",0),"eax"); #
403 &dec($num) if ($i != 7-1);
404 &mov(&DWP($i*8+4,$r,"",0),"edx");
405 &jz(&label("sw_end")) if ($i != 7-1);
406 }
407 &set_label("sw_end",0);
408
409 &function_end($name);
410 }
411
412sub bn_div_words
413 {
414 local($name)=@_;
415
416 &function_begin_B($name,"");
417 &mov("edx",&wparam(0)); #
418 &mov("eax",&wparam(1)); #
419 &mov("ecx",&wparam(2)); #
420 &div("ecx");
421 &ret();
422 &function_end_B($name);
423 }
424
425sub bn_add_words
426 {
427 local($name)=@_;
428
429 &function_begin($name,"");
430
431 &comment("");
432 $a="esi";
433 $b="edi";
434 $c="eax";
435 $r="ebx";
436 $tmp1="ecx";
437 $tmp2="edx";
438 $num="ebp";
439
440 &mov($r,&wparam(0)); # get r
441 &mov($a,&wparam(1)); # get a
442 &mov($b,&wparam(2)); # get b
443 &mov($num,&wparam(3)); # get num
444 &xor($c,$c); # clear carry
445 &and($num,0xfffffff8); # num / 8
446
447 &jz(&label("aw_finish"));
448
449 &set_label("aw_loop",0);
450 for ($i=0; $i<8; $i++)
451 {
452 &comment("Round $i");
453
454 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
455 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
456 &add($tmp1,$c);
457 &mov($c,0);
458 &adc($c,$c);
459 &add($tmp1,$tmp2);
460 &adc($c,0);
461 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
462 }
463
464 &comment("");
465 &add($a,32);
466 &add($b,32);
467 &add($r,32);
468 &sub($num,8);
469 &jnz(&label("aw_loop"));
470
471 &set_label("aw_finish",0);
472 &mov($num,&wparam(3)); # get num
473 &and($num,7);
474 &jz(&label("aw_end"));
475
476 for ($i=0; $i<7; $i++)
477 {
478 &comment("Tail Round $i");
479 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
480 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
481 &add($tmp1,$c);
482 &mov($c,0);
483 &adc($c,$c);
484 &add($tmp1,$tmp2);
485 &adc($c,0);
486 &dec($num) if ($i != 6);
487 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
488 &jz(&label("aw_end")) if ($i != 6);
489 }
490 &set_label("aw_end",0);
491
492# &mov("eax",$c); # $c is "eax"
493
494 &function_end($name);
495 }
496
497sub bn_sub_words
498 {
499 local($name)=@_;
500
501 &function_begin($name,"");
502
503 &comment("");
504 $a="esi";
505 $b="edi";
506 $c="eax";
507 $r="ebx";
508 $tmp1="ecx";
509 $tmp2="edx";
510 $num="ebp";
511
512 &mov($r,&wparam(0)); # get r
513 &mov($a,&wparam(1)); # get a
514 &mov($b,&wparam(2)); # get b
515 &mov($num,&wparam(3)); # get num
516 &xor($c,$c); # clear carry
517 &and($num,0xfffffff8); # num / 8
518
519 &jz(&label("aw_finish"));
520
521 &set_label("aw_loop",0);
522 for ($i=0; $i<8; $i++)
523 {
524 &comment("Round $i");
525
526 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
527 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
528 &sub($tmp1,$c);
529 &mov($c,0);
530 &adc($c,$c);
531 &sub($tmp1,$tmp2);
532 &adc($c,0);
533 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
534 }
535
536 &comment("");
537 &add($a,32);
538 &add($b,32);
539 &add($r,32);
540 &sub($num,8);
541 &jnz(&label("aw_loop"));
542
543 &set_label("aw_finish",0);
544 &mov($num,&wparam(3)); # get num
545 &and($num,7);
546 &jz(&label("aw_end"));
547
548 for ($i=0; $i<7; $i++)
549 {
550 &comment("Tail Round $i");
551 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
552 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
553 &sub($tmp1,$c);
554 &mov($c,0);
555 &adc($c,$c);
556 &sub($tmp1,$tmp2);
557 &adc($c,0);
558 &dec($num) if ($i != 6);
559 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
560 &jz(&label("aw_end")) if ($i != 6);
561 }
562 &set_label("aw_end",0);
563
564# &mov("eax",$c); # $c is "eax"
565
566 &function_end($name);
567 }