summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm
diff options
context:
space:
mode:
authorbeck <>1999-09-29 05:53:45 +0000
committerbeck <>1999-09-29 05:53:45 +0000
commit648e4f0876a3773381cbfff3192dd84dd1c8c925 (patch)
treebd9d01e3969ffa5aac92128af3e515520c88fc0e /src/lib/libcrypto/bn/asm
parent756086c41b0487beefc3d5b3400f80095d0e4157 (diff)
downloadopenbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.gz
openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.bz2
openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.zip
new files for OpenSSL 0.9.4
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/add.pl119
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/div.pl144
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul.pl116
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl120
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl213
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl98
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl177
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sqr.pl113
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl109
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl132
-rw-r--r--src/lib/libcrypto/bn/asm/alpha.works/sub.pl108
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/add.pl118
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/div.pl144
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul.pl104
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_add.pl123
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_c4.pl215
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl98
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/mul_c8.pl177
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sqr.pl113
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl109
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl132
-rw-r--r--src/lib/libcrypto/bn/asm/alpha/sub.pl108
-rw-r--r--src/lib/libcrypto/bn/asm/x86/add.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl277
-rw-r--r--src/lib/libcrypto/bn/asm/x86/div.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/x86/f3
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul.pl77
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul_add.pl87
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sqr.pl60
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sub.pl76
30 files changed, 3561 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl
new file mode 100644
index 0000000000..4dc76e6b69
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl
@@ -0,0 +1,119 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &br(&label("finish"));
23 &blt($count,&label("finish"));
24
25 ($a0,$b0)=&NR(2);
26 &ld($a0,&QWPw(0,$ap));
27 &ld($b0,&QWPw(0,$bp));
28
29##########################################################
30 &set_label("loop");
31
32 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
33 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
34 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
35 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
36 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
37 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
38
39 ($o0,$t0)=&NR(2);
40 &add($a0,$b0,$o0);
41 &cmpult($o0,$b0,$t0);
42 &add($o0,$cc,$o0);
43 &cmpult($o0,$cc,$cc);
44 &add($cc,$t0,$cc); &FR($t0);
45
46 ($t1,$o1)=&NR(2);
47
48 &add($a1,$b1,$o1); &FR($a1);
49 &cmpult($o1,$b1,$t1); &FR($b1);
50 &add($o1,$cc,$o1);
51 &cmpult($o1,$cc,$cc);
52 &add($cc,$t1,$cc); &FR($t1);
53
54 ($t2,$o2)=&NR(2);
55
56 &add($a2,$b2,$o2); &FR($a2);
57 &cmpult($o2,$b2,$t2); &FR($b2);
58 &add($o2,$cc,$o2);
59 &cmpult($o2,$cc,$cc);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &add($a3,$b3,$o3); &FR($a3);
65 &cmpult($o3,$b3,$t3); &FR($b3);
66 &add($o3,$cc,$o3);
67 &cmpult($o3,$cc,$cc);
68 &add($cc,$t3,$cc); &FR($t3);
69
70 &st($o0,&QWPw(0,$rp)); &FR($o0);
71 &st($o1,&QWPw(0,$rp)); &FR($o1);
72 &st($o2,&QWPw(0,$rp)); &FR($o2);
73 &st($o3,&QWPw(0,$rp)); &FR($o3);
74
75 &sub($count,4,$count); # count-=4
76 &add($ap,4*$QWS,$ap); # count+=4
77 &add($bp,4*$QWS,$bp); # count+=4
78 &add($rp,4*$QWS,$rp); # count+=4
79
80 &blt($count,&label("finish"));
81 &ld($a0,&QWPw(0,$ap));
82 &ld($b0,&QWPw(0,$bp));
83 &br(&label("loop"));
84##################################################
85 # Do the last 0..3 words
86
87 ($t0,$o0)=&NR(2);
88 &set_label("last_loop");
89
90 &ld($a0,&QWPw(0,$ap)); # get a
91 &ld($b0,&QWPw(0,$bp)); # get b
92
93 &add($a0,$b0,$o0);
94 &cmpult($o0,$b0,$t0); # will we borrow?
95 &add($o0,$cc,$o0); # will we borrow?
96 &cmpult($o0,$cc,$cc); # will we borrow?
97 &add($cc,$t0,$cc); # add the borrows
98 &st($o0,&QWPw(0,$rp)); # save
99
100 &add($ap,$QWS,$ap);
101 &add($bp,$QWS,$bp);
102 &add($rp,$QWS,$rp);
103 &sub($count,1,$count);
104 &bgt($count,&label("last_loop"));
105 &function_end_A($name);
106
107######################################################
108 &set_label("finish");
109 &add($count,4,$count);
110 &bgt($count,&label("last_loop"));
111
112 &FR($o0,$t0,$a0,$b0);
113 &set_label("end");
114 &function_end($name);
115
116 &fin_pool;
117 }
118
1191;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl
new file mode 100644
index 0000000000..7ec144377f
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl
@@ -0,0 +1,144 @@
1#!/usr/local/bin/perl
2
3sub bn_div64
4 {
5 local($data)=<<'EOF';
6 #
7 # What follows was taken directly from the C compiler with a few
8 # hacks to redo the lables.
9 #
10.text
11 .set noreorder
12 .set volatile
13 .align 3
14 .globl bn_div64
15 .ent bn_div64
16bn_div64:
17 ldgp $29,0($27)
18bn_div64..ng:
19 lda $30,-48($30)
20 .frame $30,48,$26,0
21 stq $26,0($30)
22 stq $9,8($30)
23 stq $10,16($30)
24 stq $11,24($30)
25 stq $12,32($30)
26 stq $13,40($30)
27 .mask 0x4003e00,-48
28 .prologue 1
29 bis $16,$16,$9
30 bis $17,$17,$10
31 bis $18,$18,$11
32 bis $31,$31,$13
33 bis $31,2,$12
34 bne $11,$9119
35 lda $0,-1
36 br $31,$9136
37 .align 4
38$9119:
39 bis $11,$11,$16
40 jsr $26,BN_num_bits_word
41 ldgp $29,0($26)
42 subq $0,64,$1
43 beq $1,$9120
44 bis $31,1,$1
45 sll $1,$0,$1
46 cmpule $9,$1,$1
47 bne $1,$9120
48 # lda $16,_IO_stderr_
49 # lda $17,$C32
50 # bis $0,$0,$18
51 # jsr $26,fprintf
52 # ldgp $29,0($26)
53 jsr $26,abort
54 ldgp $29,0($26)
55 .align 4
56$9120:
57 bis $31,64,$3
58 cmpult $9,$11,$2
59 subq $3,$0,$1
60 addl $1,$31,$0
61 subq $9,$11,$1
62 cmoveq $2,$1,$9
63 beq $0,$9122
64 zapnot $0,15,$2
65 subq $3,$0,$1
66 sll $11,$2,$11
67 sll $9,$2,$3
68 srl $10,$1,$1
69 sll $10,$2,$10
70 bis $3,$1,$9
71$9122:
72 srl $11,32,$5
73 zapnot $11,15,$6
74 lda $7,-1
75 .align 5
76$9123:
77 srl $9,32,$1
78 subq $1,$5,$1
79 bne $1,$9126
80 zapnot $7,15,$27
81 br $31,$9127
82 .align 4
83$9126:
84 bis $9,$9,$24
85 bis $5,$5,$25
86 divqu $24,$25,$27
87$9127:
88 srl $10,32,$4
89 .align 5
90$9128:
91 mulq $27,$5,$1
92 subq $9,$1,$3
93 zapnot $3,240,$1
94 bne $1,$9129
95 mulq $6,$27,$2
96 sll $3,32,$1
97 addq $1,$4,$1
98 cmpule $2,$1,$2
99 bne $2,$9129
100 subq $27,1,$27
101 br $31,$9128
102 .align 4
103$9129:
104 mulq $27,$6,$1
105 mulq $27,$5,$4
106 srl $1,32,$3
107 sll $1,32,$1
108 addq $4,$3,$4
109 cmpult $10,$1,$2
110 subq $10,$1,$10
111 addq $2,$4,$2
112 cmpult $9,$2,$1
113 bis $2,$2,$4
114 beq $1,$9134
115 addq $9,$11,$9
116 subq $27,1,$27
117$9134:
118 subl $12,1,$12
119 subq $9,$4,$9
120 beq $12,$9124
121 sll $27,32,$13
122 sll $9,32,$2
123 srl $10,32,$1
124 sll $10,32,$10
125 bis $2,$1,$9
126 br $31,$9123
127 .align 4
128$9124:
129 bis $13,$27,$0
130$9136:
131 ldq $26,0($30)
132 ldq $9,8($30)
133 ldq $10,16($30)
134 ldq $11,24($30)
135 ldq $12,32($30)
136 ldq $13,40($30)
137 addq $30,48,$30
138 ret $31,($26),1
139 .end bn_div64
140EOF
141 &asm_add($data);
142 }
143
1441;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl
new file mode 100644
index 0000000000..b182bae452
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl
@@ -0,0 +1,116 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &br(&label("finish"));
23 &blt($count,&label("finish"));
24
25 ($a0,$r0)=&NR(2);
26 &ld($a0,&QWPw(0,$ap));
27 &ld($r0,&QWPw(0,$rp));
28
29$a=<<'EOF';
30##########################################################
31 &set_label("loop");
32
33 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
34 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
35 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
36 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
37 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
38 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
39
40 ($o0,$t0)=&NR(2);
41 &add($a0,$b0,$o0);
42 &cmpult($o0,$b0,$t0);
43 &add($o0,$cc,$o0);
44 &cmpult($o0,$cc,$cc);
45 &add($cc,$t0,$cc); &FR($t0);
46
47 ($t1,$o1)=&NR(2);
48
49 &add($a1,$b1,$o1); &FR($a1);
50 &cmpult($o1,$b1,$t1); &FR($b1);
51 &add($o1,$cc,$o1);
52 &cmpult($o1,$cc,$cc);
53 &add($cc,$t1,$cc); &FR($t1);
54
55 ($t2,$o2)=&NR(2);
56
57 &add($a2,$b2,$o2); &FR($a2);
58 &cmpult($o2,$b2,$t2); &FR($b2);
59 &add($o2,$cc,$o2);
60 &cmpult($o2,$cc,$cc);
61 &add($cc,$t2,$cc); &FR($t2);
62
63 ($t3,$o3)=&NR(2);
64
65 &add($a3,$b3,$o3); &FR($a3);
66 &cmpult($o3,$b3,$t3); &FR($b3);
67 &add($o3,$cc,$o3);
68 &cmpult($o3,$cc,$cc);
69 &add($cc,$t3,$cc); &FR($t3);
70
71 &st($o0,&QWPw(0,$rp)); &FR($o0);
72 &st($o1,&QWPw(0,$rp)); &FR($o1);
73 &st($o2,&QWPw(0,$rp)); &FR($o2);
74 &st($o3,&QWPw(0,$rp)); &FR($o3);
75
76 &sub($count,4,$count); # count-=4
77 &add($ap,4*$QWS,$ap); # count+=4
78 &add($bp,4*$QWS,$bp); # count+=4
79 &add($rp,4*$QWS,$rp); # count+=4
80
81 &blt($count,&label("finish"));
82 &ld($a0,&QWPw(0,$ap));
83 &ld($b0,&QWPw(0,$bp));
84 &br(&label("loop"));
85EOF
86##################################################
87 # Do the last 0..3 words
88
89 &set_label("last_loop");
90
91 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
92 &mul($a0,$word,($l0)=&NR(1));
93 &add($ap,$QWS,$ap);
94 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
95 &add($l0,$cc,$l0);
96 &add($rp,$QWS,$rp);
97 &sub($count,1,$count);
98 &cmpult($l0,$cc,$cc);
99 &st($l0,&QWPw(-1,$rp)); &FR($l0);
100 &add($h0,$cc,$cc); &FR($h0);
101
102 &bgt($count,&label("last_loop"));
103 &function_end_A($name);
104
105######################################################
106 &set_label("finish");
107 &add($count,4,$count);
108 &bgt($count,&label("last_loop"));
109
110 &set_label("end");
111 &function_end($name);
112
113 &fin_pool;
114 }
115
1161;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl
new file mode 100644
index 0000000000..e37f6315fb
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl
@@ -0,0 +1,120 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &br(&label("finish"));
23 &blt($count,&label("finish"));
24
25 ($a0,$r0)=&NR(2);
26 &ld($a0,&QWPw(0,$ap));
27 &ld($r0,&QWPw(0,$rp));
28
29$a=<<'EOF';
30##########################################################
31 &set_label("loop");
32
33 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
34 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
35 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
36 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
37 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
38 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
39
40 ($o0,$t0)=&NR(2);
41 &add($a0,$b0,$o0);
42 &cmpult($o0,$b0,$t0);
43 &add($o0,$cc,$o0);
44 &cmpult($o0,$cc,$cc);
45 &add($cc,$t0,$cc); &FR($t0);
46
47 ($t1,$o1)=&NR(2);
48
49 &add($a1,$b1,$o1); &FR($a1);
50 &cmpult($o1,$b1,$t1); &FR($b1);
51 &add($o1,$cc,$o1);
52 &cmpult($o1,$cc,$cc);
53 &add($cc,$t1,$cc); &FR($t1);
54
55 ($t2,$o2)=&NR(2);
56
57 &add($a2,$b2,$o2); &FR($a2);
58 &cmpult($o2,$b2,$t2); &FR($b2);
59 &add($o2,$cc,$o2);
60 &cmpult($o2,$cc,$cc);
61 &add($cc,$t2,$cc); &FR($t2);
62
63 ($t3,$o3)=&NR(2);
64
65 &add($a3,$b3,$o3); &FR($a3);
66 &cmpult($o3,$b3,$t3); &FR($b3);
67 &add($o3,$cc,$o3);
68 &cmpult($o3,$cc,$cc);
69 &add($cc,$t3,$cc); &FR($t3);
70
71 &st($o0,&QWPw(0,$rp)); &FR($o0);
72 &st($o1,&QWPw(0,$rp)); &FR($o1);
73 &st($o2,&QWPw(0,$rp)); &FR($o2);
74 &st($o3,&QWPw(0,$rp)); &FR($o3);
75
76 &sub($count,4,$count); # count-=4
77 &add($ap,4*$QWS,$ap); # count+=4
78 &add($bp,4*$QWS,$bp); # count+=4
79 &add($rp,4*$QWS,$rp); # count+=4
80
81 &blt($count,&label("finish"));
82 &ld($a0,&QWPw(0,$ap));
83 &ld($b0,&QWPw(0,$bp));
84 &br(&label("loop"));
85EOF
86##################################################
87 # Do the last 0..3 words
88
89 &set_label("last_loop");
90
91 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
92 &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
93 &mul($a0,$word,($l0)=&NR(1));
94 &sub($count,1,$count);
95 &add($ap,$QWS,$ap);
96 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
97 &add($r0,$l0,$r0);
98 &add($rp,$QWS,$rp);
99 &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
100 &add($r0,$cc,$r0);
101 &add($h0,$t0,$h0); &FR($t0);
102 &cmpult($r0,$cc,$cc);
103 &st($r0,&QWPw(-1,$rp)); &FR($r0);
104 &add($h0,$cc,$cc); &FR($h0);
105
106 &bgt($count,&label("last_loop"));
107 &function_end_A($name);
108
109######################################################
110 &set_label("finish");
111 &add($count,4,$count);
112 &bgt($count,&label("last_loop"));
113
114 &set_label("end");
115 &function_end($name);
116
117 &fin_pool;
118 }
119
1201;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl
new file mode 100644
index 0000000000..5efd201281
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl
@@ -0,0 +1,213 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub mul_add_c
5 {
6 local($a,$b,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9 &mul($a,$b,($l1)=&NR(1));
10 &muh($a,$b,($h1)=&NR(1));
11 &add($c0,$l1,$c0);
12 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
13 &add($t1,$h1,$h1); &FR($t1);
14 &add($c1,$h1,$c1);
15 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
16 &add($c2,$t2,$c2); &FR($t2);
17 }
18
19sub bn_mul_comba4
20 {
21 local($name)=@_;
22 local(@a,@b,$r,$c0,$c1,$c2);
23
24 $cnt=1;
25 &init_pool(3);
26
27 $rp=&wparam(0);
28 $ap=&wparam(1);
29 $bp=&wparam(2);
30
31 &function_begin($name,"");
32
33 &comment("");
34
35 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
36 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
37 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
38 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
39 &mul($a[0],$b[0],($r00)=&NR(1));
40 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42 &muh($a[0],$b[0],($r01)=&NR(1));
43 &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
44 &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
45 &mul($a[0],$b[1],($r02)=&NR(1));
46
47 ($R,$H1,$H2)=&NR(3);
48
49 &st($r00,&QWPw(0,$rp)); &FR($r00);
50
51 &mov("zero",$R);
52 &mul($a[1],$b[0],($r03)=&NR(1));
53
54 &mov("zero",$H1);
55 &mov("zero",$H0);
56 &add($R,$r01,$R);
57 &muh($a[0],$b[1],($r04)=&NR(1));
58 &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
59 &add($R,$r02,$R);
60 &add($H1,$t01,$H1) &FR($t01);
61 &muh($a[1],$b[0],($r05)=&NR(1));
62 &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
63 &add($R,$r03,$R);
64 &add($H2,$t02,$H2) &FR($t02);
65 &mul($a[0],$b[2],($r06)=&NR(1));
66 &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
67 &add($H1,$t03,$H1) &FR($t03);
68 &st($R,&QWPw(1,$rp));
69 &add($H1,$H2,$R);
70
71 &mov("zero",$H1);
72 &add($R,$r04,$R);
73 &mov("zero",$H2);
74 &mul($a[1],$b[1],($r07)=&NR(1));
75 &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
76 &add($R,$r05,$R);
77 &add($H1,$t04,$H1) &FR($t04);
78 &mul($a[2],$b[0],($r08)=&NR(1));
79 &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
80 &add($R,$r01,$R);
81 &add($H2,$t05,$H2) &FR($t05);
82 &muh($a[0],$b[2],($r09)=&NR(1));
83 &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
84 &add($R,$r07,$R);
85 &add($H1,$t06,$H1) &FR($t06);
86 &muh($a[1],$b[1],($r10)=&NR(1));
87 &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
88 &add($R,$r08,$R);
89 &add($H2,$t07,$H2) &FR($t07);
90 &muh($a[2],$b[0],($r11)=&NR(1));
91 &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
92 &add($H1,$t08,$H1) &FR($t08);
93 &st($R,&QWPw(2,$rp));
94 &add($H1,$H2,$R);
95
96 &mov("zero",$H1);
97 &add($R,$r09,$R);
98 &mov("zero",$H2);
99 &mul($a[0],$b[3],($r12)=&NR(1));
100 &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
101 &add($R,$r10,$R);
102 &add($H1,$t09,$H1) &FR($t09);
103 &mul($a[1],$b[2],($r13)=&NR(1));
104 &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
105 &add($R,$r11,$R);
106 &add($H1,$t10,$H1) &FR($t10);
107 &mul($a[2],$b[1],($r14)=&NR(1));
108 &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
109 &add($R,$r12,$R);
110 &add($H1,$t11,$H1) &FR($t11);
111 &mul($a[3],$b[0],($r15)=&NR(1));
112 &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
113 &add($R,$r13,$R);
114 &add($H1,$t12,$H1) &FR($t12);
115 &muh($a[0],$b[3],($r16)=&NR(1));
116 &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
117 &add($R,$r14,$R);
118 &add($H1,$t13,$H1) &FR($t13);
119 &muh($a[1],$b[2],($r17)=&NR(1));
120 &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
121 &add($R,$r15,$R);
122 &add($H1,$t14,$H1) &FR($t14);
123 &muh($a[2],$b[1],($r18)=&NR(1));
124 &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
125 &add($H1,$t15,$H1) &FR($t15);
126 &st($R,&QWPw(3,$rp));
127 &add($H1,$H2,$R);
128
129 &mov("zero",$H1);
130 &add($R,$r16,$R);
131 &mov("zero",$H2);
132 &muh($a[3],$b[0],($r19)=&NR(1));
133 &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
134 &add($R,$r17,$R);
135 &add($H1,$t16,$H1) &FR($t16);
136 &mul($a[1],$b[3],($r20)=&NR(1));
137 &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
138 &add($R,$r18,$R);
139 &add($H1,$t17,$H1) &FR($t17);
140 &mul($a[2],$b[2],($r21)=&NR(1));
141 &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
142 &add($R,$r19,$R);
143 &add($H1,$t18,$H1) &FR($t18);
144 &mul($a[3],$b[1],($r22)=&NR(1));
145 &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
146 &add($R,$r20,$R);
147 &add($H1,$t19,$H1) &FR($t19);
148 &muh($a[1],$b[3],($r23)=&NR(1));
149 &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
150 &add($R,$r21,$R);
151 &add($H1,$t20,$H1) &FR($t20);
152 &muh($a[2],$b[2],($r24)=&NR(1));
153 &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
154 &add($R,$r22,$R);
155 &add($H1,$t21,$H1) &FR($t21);
156 &muh($a[3],$b[1],($r25)=&NR(1));
157 &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
158 &add($H1,$t22,$H1) &FR($t22);
159 &st($R,&QWPw(4,$rp));
160 &add($H1,$H2,$R);
161
162 &mov("zero",$H1);
163 &add($R,$r23,$R);
164 &mov("zero",$H2);
165 &mul($a[2],$b[3],($r26)=&NR(1));
166 &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
167 &add($R,$r24,$R);
168 &add($H1,$t23,$H1) &FR($t23);
169 &mul($a[3],$b[2],($r27)=&NR(1));
170 &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
171 &add($R,$r25,$R);
172 &add($H1,$t24,$H1) &FR($t24);
173 &muh($a[2],$b[3],($r28)=&NR(1));
174 &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
175 &add($R,$r26,$R);
176 &add($H1,$t25,$H1) &FR($t25);
177 &muh($a[3],$b[2],($r29)=&NR(1));
178 &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
179 &add($R,$r27,$R);
180 &add($H1,$t26,$H1) &FR($t26);
181 &mul($a[3],$b[3],($r30)=&NR(1));
182 &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
183 &add($H1,$t27,$H1) &FR($t27);
184 &st($R,&QWPw(5,$rp));
185 &add($H1,$H2,$R);
186
187 &mov("zero",$H1);
188 &add($R,$r28,$R);
189 &mov("zero",$H2);
190 &muh($a[3],$b[3],($r31)=&NR(1));
191 &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
192 &add($R,$r29,$R);
193 &add($H1,$t28,$H1) &FR($t28);
194 ############
195 &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
196 &add($R,$r30,$R);
197 &add($H1,$t29,$H1) &FR($t29);
198 ############
199 &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
200 &add($H1,$t30,$H1) &FR($t30);
201 &st($R,&QWPw(6,$rp));
202 &add($H1,$H2,$R);
203
204 &add($R,$r31,$R); &FR($r31);
205 &st($R,&QWPw(7,$rp));
206
207 &FR($R,$H1,$H2);
208 &function_end($name);
209
210 &fin_pool;
211 }
212
2131;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl
new file mode 100644
index 0000000000..79d86dd25c
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl
@@ -0,0 +1,98 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub mul_add_c
5 {
6 local($a,$b,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9print STDERR "count=$cnt\n"; $cnt++;
10 &mul($a,$b,($l1)=&NR(1));
11 &muh($a,$b,($h1)=&NR(1));
12 &add($c0,$l1,$c0);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &add($t1,$h1,$h1); &FR($t1);
15 &add($c1,$h1,$c1);
16 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
17 &add($c2,$t2,$c2); &FR($t2);
18 }
19
20sub bn_mul_comba4
21 {
22 local($name)=@_;
23 local(@a,@b,$r,$c0,$c1,$c2);
24
25 $cnt=1;
26 &init_pool(3);
27
28 $rp=&wparam(0);
29 $ap=&wparam(1);
30 $bp=&wparam(2);
31
32 &function_begin($name,"");
33
34 &comment("");
35
36 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
37 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
38 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
39 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
40 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
43 &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
44
45 ($c0,$c1,$c2)=&NR(3);
46 &mov("zero",$c2);
47 &mul($a[0],$b[0],$c0);
48 &muh($a[0],$b[0],$c1);
49 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
50 ($c0,$c1,$c2)=($c1,$c2,$c0);
51 &mov("zero",$c2);
52
53 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
54 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
55 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
56 ($c0,$c1,$c2)=($c1,$c2,$c0);
57 &mov("zero",$c2);
58
59 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
60 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
61 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
62 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
63 ($c0,$c1,$c2)=($c1,$c2,$c0);
64 &mov("zero",$c2);
65
66 &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
67 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
68 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
69 &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
70 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
71 ($c0,$c1,$c2)=($c1,$c2,$c0);
72 &mov("zero",$c2);
73
74 &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
75 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
76 &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
77 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
78 ($c0,$c1,$c2)=($c1,$c2,$c0);
79 &mov("zero",$c2);
80
81 &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
82 &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
83 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
84 ($c0,$c1,$c2)=($c1,$c2,$c0);
85 &mov("zero",$c2);
86
87 &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
88 &st($c0,&QWPw(6,$rp));
89 &st($c1,&QWPw(7,$rp));
90
91 &FR($c0,$c1,$c2);
92
93 &function_end($name);
94
95 &fin_pool;
96 }
97
981;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl
new file mode 100644
index 0000000000..525ca7494b
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl
@@ -0,0 +1,177 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(3);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19
20 &stack_push(2);
21 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
22 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
23 &st($reg_s0,&swtmp(0)); &FR($reg_s0);
24 &st($reg_s1,&swtmp(1)); &FR($reg_s1);
25 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
26 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
27 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
28 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
29 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
30 &ld(($b[3])=&NR(1),&QWPw(3,$bp));
31 &ld(($a[4])=&NR(1),&QWPw(1,$ap));
32 &ld(($b[4])=&NR(1),&QWPw(1,$bp));
33 &ld(($a[5])=&NR(1),&QWPw(1,$ap));
34 &ld(($b[5])=&NR(1),&QWPw(1,$bp));
35 &ld(($a[6])=&NR(1),&QWPw(1,$ap));
36 &ld(($b[6])=&NR(1),&QWPw(1,$bp));
37 &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
38 &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
39
40 ($c0,$c1,$c2)=&NR(3);
41 &mov("zero",$c2);
42 &mul($a[0],$b[0],$c0);
43 &muh($a[0],$b[0],$c1);
44 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
49 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
55 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
56 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
62 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
63 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
64 &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
65 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
66 ($c0,$c1,$c2)=($c1,$c2,$c0);
67 &mov("zero",$c2);
68
69 &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
70 &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
71 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
72 &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
73 &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
74 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
75 ($c0,$c1,$c2)=($c1,$c2,$c0);
76 &mov("zero",$c2);
77
78 &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
79 &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
80 &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
81 &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
82 &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
83 &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
84 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
85 ($c0,$c1,$c2)=($c1,$c2,$c0);
86 &mov("zero",$c2);
87
88 &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
89 &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
90 &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
91 &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
92 &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
93 &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
94 &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
95 &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
100 &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
101 &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
102 &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
103 &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
104 &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
105 &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
106 &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
107 &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
108 ($c0,$c1,$c2)=($c1,$c2,$c0);
109 &mov("zero",$c2);
110
111 &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
112 &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
113 &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
114 &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
115 &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
116 &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
117 &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
118 &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
119 ($c0,$c1,$c2)=($c1,$c2,$c0);
120 &mov("zero",$c2);
121
122 &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
123 &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
124 &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
125 &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
126 &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
127 &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
128 &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
129 ($c0,$c1,$c2)=($c1,$c2,$c0);
130 &mov("zero",$c2);
131
132 &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
133 &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
134 &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
135 &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
136 &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
137 &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
138 ($c0,$c1,$c2)=($c1,$c2,$c0);
139 &mov("zero",$c2);
140
141 &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
142 &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
143 &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
144 &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
145 &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
146 ($c0,$c1,$c2)=($c1,$c2,$c0);
147 &mov("zero",$c2);
148
149 &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
150 &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
151 &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
152 &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
153 ($c0,$c1,$c2)=($c1,$c2,$c0);
154 &mov("zero",$c2);
155
156 &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
157 &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
158 &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
159 ($c0,$c1,$c2)=($c1,$c2,$c0);
160 &mov("zero",$c2);
161
162 &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
163 &st($c0,&QWPw(14,$rp));
164 &st($c1,&QWPw(15,$rp));
165
166 &FR($c0,$c1,$c2);
167
168 &ld($reg_s0,&swtmp(0));
169 &ld($reg_s1,&swtmp(1));
170 &stack_pop(2);
171
172 &function_end($name);
173
174 &fin_pool;
175 }
176
1771;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl
new file mode 100644
index 0000000000..a55b696906
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl
@@ -0,0 +1,113 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(3);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19 &sub($count,4,$count);
20 &mov("zero",$cc);
21 &br(&label("finish"));
22 &blt($count,&label("finish"));
23
24 ($a0,$r0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($r0,&QWPw(0,$rp));
27
28$a=<<'EOF';
29##########################################################
30 &set_label("loop");
31
32 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
33 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
34 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
35 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
36 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
37 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
38
39 ($o0,$t0)=&NR(2);
40 &add($a0,$b0,$o0);
41 &cmpult($o0,$b0,$t0);
42 &add($o0,$cc,$o0);
43 &cmpult($o0,$cc,$cc);
44 &add($cc,$t0,$cc); &FR($t0);
45
46 ($t1,$o1)=&NR(2);
47
48 &add($a1,$b1,$o1); &FR($a1);
49 &cmpult($o1,$b1,$t1); &FR($b1);
50 &add($o1,$cc,$o1);
51 &cmpult($o1,$cc,$cc);
52 &add($cc,$t1,$cc); &FR($t1);
53
54 ($t2,$o2)=&NR(2);
55
56 &add($a2,$b2,$o2); &FR($a2);
57 &cmpult($o2,$b2,$t2); &FR($b2);
58 &add($o2,$cc,$o2);
59 &cmpult($o2,$cc,$cc);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &add($a3,$b3,$o3); &FR($a3);
65 &cmpult($o3,$b3,$t3); &FR($b3);
66 &add($o3,$cc,$o3);
67 &cmpult($o3,$cc,$cc);
68 &add($cc,$t3,$cc); &FR($t3);
69
70 &st($o0,&QWPw(0,$rp)); &FR($o0);
71 &st($o1,&QWPw(0,$rp)); &FR($o1);
72 &st($o2,&QWPw(0,$rp)); &FR($o2);
73 &st($o3,&QWPw(0,$rp)); &FR($o3);
74
75 &sub($count,4,$count); # count-=4
76 &add($ap,4*$QWS,$ap); # count+=4
77 &add($bp,4*$QWS,$bp); # count+=4
78 &add($rp,4*$QWS,$rp); # count+=4
79
80 &blt($count,&label("finish"));
81 &ld($a0,&QWPw(0,$ap));
82 &ld($b0,&QWPw(0,$bp));
83 &br(&label("loop"));
84EOF
85##################################################
86 # Do the last 0..3 words
87
88 &set_label("last_loop");
89
90 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
91 &mul($a0,$a0,($l0)=&NR(1));
92 &add($ap,$QWS,$ap);
93 &add($rp,2*$QWS,$rp);
94 &sub($count,1,$count);
95 &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
96 &st($l0,&QWPw(-2,$rp)); &FR($l0);
97 &st($h0,&QWPw(-1,$rp)); &FR($h0);
98
99 &bgt($count,&label("last_loop"));
100 &function_end_A($name);
101
102######################################################
103 &set_label("finish");
104 &add($count,4,$count);
105 &bgt($count,&label("last_loop"));
106
107 &set_label("end");
108 &function_end($name);
109
110 &fin_pool;
111 }
112
1131;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl
new file mode 100644
index 0000000000..bf33f5b503
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl
@@ -0,0 +1,109 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub sqr_add_c
5 {
6 local($a,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9 &mul($a,$a,($l1)=&NR(1));
10 &muh($a,$a,($h1)=&NR(1));
11 &add($c0,$l1,$c0);
12 &add($c1,$h1,$c1);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
15 &add($c1,$t1,$c1); &FR($t1);
16 &add($c2,$t2,$c2); &FR($t2);
17 }
18
19sub sqr_add_c2
20 {
21 local($a,$b,$c0,$c1,$c2)=@_;
22 local($l1,$h1,$t1,$t2);
23
24 &mul($a,$b,($l1)=&NR(1));
25 &muh($a,$b,($h1)=&NR(1));
26 &cmplt($l1,"zero",($lc1)=&NR(1));
27 &cmplt($h1,"zero",($hc1)=&NR(1));
28 &add($l1,$l1,$l1);
29 &add($h1,$h1,$h1);
30 &add($h1,$lc1,$h1); &FR($lc1);
31 &add($c2,$hc1,$c2); &FR($hc1);
32
33 &add($c0,$l1,$c0);
34 &add($c1,$h1,$c1);
35 &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
36 &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
37
38 &add($c1,$lc1,$c1); &FR($lc1);
39 &add($c2,$hc1,$c2); &FR($hc1);
40 }
41
42
43sub bn_sqr_comba4
44 {
45 local($name)=@_;
46 local(@a,@b,$r,$c0,$c1,$c2);
47
48 $cnt=1;
49 &init_pool(2);
50
51 $rp=&wparam(0);
52 $ap=&wparam(1);
53
54 &function_begin($name,"");
55
56 &comment("");
57
58 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
59 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
60 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
61 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
62
63 ($c0,$c1,$c2)=&NR(3);
64
65 &mov("zero",$c2);
66 &mul($a[0],$a[0],$c0);
67 &muh($a[0],$a[0],$c1);
68 &st($c0,&QWPw(0,$rp));
69 ($c0,$c1,$c2)=($c1,$c2,$c0);
70 &mov("zero",$c2);
71
72 &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
73 &st($c0,&QWPw(1,$rp));
74 ($c0,$c1,$c2)=($c1,$c2,$c0);
75 &mov("zero",$c2);
76
77 &sqr_add_c($a[1],$c0,$c1,$c2);
78 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
79 &st($c0,&QWPw(2,$rp));
80 ($c0,$c1,$c2)=($c1,$c2,$c0);
81 &mov("zero",$c2);
82
83 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
84 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
85 &st($c0,&QWPw(3,$rp));
86 ($c0,$c1,$c2)=($c1,$c2,$c0);
87 &mov("zero",$c2);
88
89 &sqr_add_c($a[2],$c0,$c1,$c2);
90 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
91 &st($c0,&QWPw(4,$rp));
92 ($c0,$c1,$c2)=($c1,$c2,$c0);
93 &mov("zero",$c2);
94
95 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
96 &st($c0,&QWPw(5,$rp));
97 ($c0,$c1,$c2)=($c1,$c2,$c0);
98 &mov("zero",$c2);
99
100 &sqr_add_c($a[3],$c0,$c1,$c2);
101 &st($c0,&QWPw(6,$rp));
102 &st($c1,&QWPw(7,$rp));
103
104 &function_end($name);
105
106 &fin_pool;
107 }
108
1091;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
new file mode 100644
index 0000000000..b4afe085f1
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl
@@ -0,0 +1,132 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(2);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14
15 &function_begin($name,"");
16
17 &comment("");
18
19 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
20 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
21 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
22 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
23 &ld(($a[4])=&NR(1),&QWPw(4,$ap));
24 &ld(($a[5])=&NR(1),&QWPw(5,$ap));
25 &ld(($a[6])=&NR(1),&QWPw(6,$ap));
26 &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
27
28 ($c0,$c1,$c2)=&NR(3);
29
30 &mov("zero",$c2);
31 &mul($a[0],$a[0],$c0);
32 &muh($a[0],$a[0],$c1);
33 &st($c0,&QWPw(0,$rp));
34 ($c0,$c1,$c2)=($c1,$c2,$c0);
35 &mov("zero",$c2);
36
37 &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
38 &st($c0,&QWPw(1,$rp));
39 ($c0,$c1,$c2)=($c1,$c2,$c0);
40 &mov("zero",$c2);
41
42 &sqr_add_c($a[1],$c0,$c1,$c2);
43 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
44 &st($c0,&QWPw(2,$rp));
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
49 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(3,$rp));
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &sqr_add_c($a[2],$c0,$c1,$c2);
55 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
56 &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(4,$rp));
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
62 &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
63 &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
64 &st($c0,&QWPw(5,$rp));
65 ($c0,$c1,$c2)=($c1,$c2,$c0);
66 &mov("zero",$c2);
67
68 &sqr_add_c($a[3],$c0,$c1,$c2);
69 &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
70 &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
71 &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
72 &st($c0,&QWPw(6,$rp));
73 ($c0,$c1,$c2)=($c1,$c2,$c0);
74 &mov("zero",$c2);
75
76 &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
77 &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
78 &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
79 &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
80 &st($c0,&QWPw(7,$rp));
81 ($c0,$c1,$c2)=($c1,$c2,$c0);
82 &mov("zero",$c2);
83
84 &sqr_add_c($a[4],$c0,$c1,$c2);
85 &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
86 &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
87 &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
88 &st($c0,&QWPw(8,$rp));
89 ($c0,$c1,$c2)=($c1,$c2,$c0);
90 &mov("zero",$c2);
91
92 &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
93 &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
94 &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
95 &st($c0,&QWPw(9,$rp));
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &sqr_add_c($a[5],$c0,$c1,$c2);
100 &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
101 &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
102 &st($c0,&QWPw(10,$rp));
103 ($c0,$c1,$c2)=($c1,$c2,$c0);
104 &mov("zero",$c2);
105
106 &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
107 &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
108 &st($c0,&QWPw(11,$rp));
109 ($c0,$c1,$c2)=($c1,$c2,$c0);
110 &mov("zero",$c2);
111
112 &sqr_add_c($a[6],$c0,$c1,$c2);
113 &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
114 &st($c0,&QWPw(12,$rp));
115 ($c0,$c1,$c2)=($c1,$c2,$c0);
116 &mov("zero",$c2);
117
118 &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
119 &st($c0,&QWPw(13,$rp));
120 ($c0,$c1,$c2)=($c1,$c2,$c0);
121 &mov("zero",$c2);
122
123 &sqr_add_c($a[7],$c0,$c1,$c2);
124 &st($c0,&QWPw(14,$rp));
125 &st($c1,&QWPw(15,$rp));
126
127 &function_end($name);
128
129 &fin_pool;
130 }
131
1321;
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl
new file mode 100644
index 0000000000..d998da5c21
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl
@@ -0,0 +1,108 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &blt($count,&label("finish"));
23
24 ($a0,$b0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($b0,&QWPw(0,$bp));
27
28##########################################################
29 &set_label("loop");
30
31 ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
32 &ld($a1,&QWPw(1,$ap));
33 &cmpult($a0,$b0,$tmp); # will we borrow?
34 &ld($b1,&QWPw(1,$bp));
35 &sub($a0,$b0,$a0); # do the subtract
36 &ld($a2,&QWPw(2,$ap));
37 &cmpult($a0,$cc,$b0); # will we borrow?
38 &ld($b2,&QWPw(2,$bp));
39 &sub($a0,$cc,$o0); # will we borrow?
40 &ld($a3,&QWPw(3,$ap));
41 &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
42
43 &cmpult($a1,$b1,$t1); # will we borrow?
44 &sub($a1,$b1,$a1); # do the subtract
45 &ld($b3,&QWPw(3,$bp));
46 &cmpult($a1,$cc,$b1); # will we borrow?
47 &sub($a1,$cc,$o1); # will we borrow?
48 &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
49
50 &cmpult($a2,$b2,$tmp); # will we borrow?
51 &sub($a2,$b2,$a2); # do the subtract
52 &st($o0,&QWPw(0,$rp)); &FR($o0); # save
53 &cmpult($a2,$cc,$b2); # will we borrow?
54 &sub($a2,$cc,$o2); # will we borrow?
55 &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
56
57 &cmpult($a3,$b3,$t3); # will we borrow?
58 &sub($a3,$b3,$a3); # do the subtract
59 &st($o1,&QWPw(1,$rp)); &FR($o1);
60 &cmpult($a3,$cc,$b3); # will we borrow?
61 &sub($a3,$cc,$o3); # will we borrow?
62 &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
63
64 &st($o2,&QWPw(2,$rp)); &FR($o2);
65 &sub($count,4,$count); # count-=4
66 &st($o3,&QWPw(3,$rp)); &FR($o3);
67 &add($ap,4*$QWS,$ap); # count+=4
68 &add($bp,4*$QWS,$bp); # count+=4
69 &add($rp,4*$QWS,$rp); # count+=4
70
71 &blt($count,&label("finish"));
72 &ld($a0,&QWPw(0,$ap));
73 &ld($b0,&QWPw(0,$bp));
74 &br(&label("loop"));
75##################################################
76 # Do the last 0..3 words
77
78 &set_label("last_loop");
79
80 &ld($a0,&QWPw(0,$ap)); # get a
81 &ld($b0,&QWPw(0,$bp)); # get b
82 &cmpult($a0,$b0,$tmp); # will we borrow?
83 &sub($a0,$b0,$a0); # do the subtract
84 &cmpult($a0,$cc,$b0); # will we borrow?
85 &sub($a0,$cc,$a0); # will we borrow?
86 &st($a0,&QWPw(0,$rp)); # save
87 &add($b0,$tmp,$cc); # add the borrows
88
89 &add($ap,$QWS,$ap);
90 &add($bp,$QWS,$bp);
91 &add($rp,$QWS,$rp);
92 &sub($count,1,$count);
93 &bgt($count,&label("last_loop"));
94 &function_end_A($name);
95
96######################################################
97 &set_label("finish");
98 &add($count,4,$count);
99 &bgt($count,&label("last_loop"));
100
101 &FR($a0,$b0);
102 &set_label("end");
103 &function_end($name);
104
105 &fin_pool;
106 }
107
1081;
diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl
new file mode 100644
index 0000000000..13bf516428
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/add.pl
@@ -0,0 +1,118 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &blt($count,&label("finish"));
23
24 ($a0,$b0)=&NR(2);
25
26##########################################################
27 &set_label("loop");
28
29 &ld(($a0)=&NR(1),&QWPw(0,$ap));
30 &ld(($b0)=&NR(1),&QWPw(0,$bp));
31 &ld(($a1)=&NR(1),&QWPw(1,$ap));
32 &ld(($b1)=&NR(1),&QWPw(1,$bp));
33
34 ($o0,$t0)=&NR(2);
35 &add($a0,$b0,$o0);
36 &ld(($a2)=&NR(1),&QWPw(2,$ap));
37 &cmpult($o0,$b0,$t0);
38 &add($o0,$cc,$o0);
39 &cmpult($o0,$cc,$cc);
40 &ld(($b2)=&NR(1),&QWPw(2,$bp));
41 &add($cc,$t0,$cc); &FR($t0);
42
43 ($t1,$o1)=&NR(2);
44
45 &add($a1,$b1,$o1); &FR($a1);
46 &cmpult($o1,$b1,$t1); &FR($b1);
47 &add($o1,$cc,$o1);
48 &cmpult($o1,$cc,$cc);
49 &ld(($a3)=&NR(1),&QWPw(3,$ap));
50 &add($cc,$t1,$cc); &FR($t1);
51
52 ($t2,$o2)=&NR(2);
53
54 &add($a2,$b2,$o2); &FR($a2);
55 &cmpult($o2,$b2,$t2); &FR($b2);
56 &add($o2,$cc,$o2);
57 &cmpult($o2,$cc,$cc);
58 &ld(($b3)=&NR(1),&QWPw(3,$bp));
59 &st($o0,&QWPw(0,$rp)); &FR($o0);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &st($o1,&QWPw(0,$rp)); &FR($o1);
65 &add($a3,$b3,$o3); &FR($a3);
66 &cmpult($o3,$b3,$t3); &FR($b3);
67 &add($o3,$cc,$o3);
68 &st($o2,&QWPw(0,$rp)); &FR($o2);
69 &cmpult($o3,$cc,$cc);
70 &st($o3,&QWPw(0,$rp)); &FR($o3);
71 &add($cc,$t3,$cc); &FR($t3);
72
73
74 &sub($count,4,$count); # count-=4
75 &add($ap,4*$QWS,$ap); # count+=4
76 &add($bp,4*$QWS,$bp); # count+=4
77 &add($rp,4*$QWS,$rp); # count+=4
78
79 ###
80 &bge($count,&label("loop"));
81 ###
82 &br(&label("finish"));
83##################################################
84 # Do the last 0..3 words
85
86 ($t0,$o0)=&NR(2);
87 &set_label("last_loop");
88
89 &ld($a0,&QWPw(0,$ap)); # get a
90 &ld($b0,&QWPw(0,$bp)); # get b
91 &add($ap,$QWS,$ap);
92 &add($bp,$QWS,$bp);
93 &add($a0,$b0,$o0);
94 &sub($count,1,$count);
95 &cmpult($o0,$b0,$t0); # will we borrow?
96 &add($o0,$cc,$o0); # will we borrow?
97 &cmpult($o0,$cc,$cc); # will we borrow?
98 &add($rp,$QWS,$rp);
99 &st($o0,&QWPw(-1,$rp)); # save
100 &add($cc,$t0,$cc); # add the borrows
101
102 ###
103 &bgt($count,&label("last_loop"));
104 &function_end_A($name);
105
106######################################################
107 &set_label("finish");
108 &add($count,4,$count);
109 &bgt($count,&label("last_loop"));
110
111 &FR($o0,$t0,$a0,$b0);
112 &set_label("end");
113 &function_end($name);
114
115 &fin_pool;
116 }
117
1181;
diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl
new file mode 100644
index 0000000000..e9e680897a
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/div.pl
@@ -0,0 +1,144 @@
1#!/usr/local/bin/perl
2
3sub bn_div_words
4 {
5 local($data)=<<'EOF';
6 #
7 # What follows was taken directly from the C compiler with a few
8 # hacks to redo the lables.
9 #
10.text
11 .set noreorder
12 .set volatile
13 .align 3
14 .globl bn_div_words
15 .ent bn_div_words
16bn_div_words
17 ldgp $29,0($27)
18bn_div_words.ng:
19 lda $30,-48($30)
20 .frame $30,48,$26,0
21 stq $26,0($30)
22 stq $9,8($30)
23 stq $10,16($30)
24 stq $11,24($30)
25 stq $12,32($30)
26 stq $13,40($30)
27 .mask 0x4003e00,-48
28 .prologue 1
29 bis $16,$16,$9
30 bis $17,$17,$10
31 bis $18,$18,$11
32 bis $31,$31,$13
33 bis $31,2,$12
34 bne $11,$9119
35 lda $0,-1
36 br $31,$9136
37 .align 4
38$9119:
39 bis $11,$11,$16
40 jsr $26,BN_num_bits_word
41 ldgp $29,0($26)
42 subq $0,64,$1
43 beq $1,$9120
44 bis $31,1,$1
45 sll $1,$0,$1
46 cmpule $9,$1,$1
47 bne $1,$9120
48 # lda $16,_IO_stderr_
49 # lda $17,$C32
50 # bis $0,$0,$18
51 # jsr $26,fprintf
52 # ldgp $29,0($26)
53 jsr $26,abort
54 ldgp $29,0($26)
55 .align 4
56$9120:
57 bis $31,64,$3
58 cmpult $9,$11,$2
59 subq $3,$0,$1
60 addl $1,$31,$0
61 subq $9,$11,$1
62 cmoveq $2,$1,$9
63 beq $0,$9122
64 zapnot $0,15,$2
65 subq $3,$0,$1
66 sll $11,$2,$11
67 sll $9,$2,$3
68 srl $10,$1,$1
69 sll $10,$2,$10
70 bis $3,$1,$9
71$9122:
72 srl $11,32,$5
73 zapnot $11,15,$6
74 lda $7,-1
75 .align 5
76$9123:
77 srl $9,32,$1
78 subq $1,$5,$1
79 bne $1,$9126
80 zapnot $7,15,$27
81 br $31,$9127
82 .align 4
83$9126:
84 bis $9,$9,$24
85 bis $5,$5,$25
86 divqu $24,$25,$27
87$9127:
88 srl $10,32,$4
89 .align 5
90$9128:
91 mulq $27,$5,$1
92 subq $9,$1,$3
93 zapnot $3,240,$1
94 bne $1,$9129
95 mulq $6,$27,$2
96 sll $3,32,$1
97 addq $1,$4,$1
98 cmpule $2,$1,$2
99 bne $2,$9129
100 subq $27,1,$27
101 br $31,$9128
102 .align 4
103$9129:
104 mulq $27,$6,$1
105 mulq $27,$5,$4
106 srl $1,32,$3
107 sll $1,32,$1
108 addq $4,$3,$4
109 cmpult $10,$1,$2
110 subq $10,$1,$10
111 addq $2,$4,$2
112 cmpult $9,$2,$1
113 bis $2,$2,$4
114 beq $1,$9134
115 addq $9,$11,$9
116 subq $27,1,$27
117$9134:
118 subl $12,1,$12
119 subq $9,$4,$9
120 beq $12,$9124
121 sll $27,32,$13
122 sll $9,32,$2
123 srl $10,32,$1
124 sll $10,32,$10
125 bis $2,$1,$9
126 br $31,$9123
127 .align 4
128$9124:
129 bis $13,$27,$0
130$9136:
131 ldq $26,0($30)
132 ldq $9,8($30)
133 ldq $10,16($30)
134 ldq $11,24($30)
135 ldq $12,32($30)
136 ldq $13,40($30)
137 addq $30,48,$30
138 ret $31,($26),1
139 .end bn_div_words
140EOF
141 &asm_add($data);
142 }
143
1441;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl
new file mode 100644
index 0000000000..76c926566c
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul.pl
@@ -0,0 +1,104 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 ###
23 &blt($count,&label("finish"));
24
25 ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
26
27 &set_label("loop");
28
29 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
30 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
31
32 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
33 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
34 ### wait 8
35 &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
36 ### wait 8
37 &muh($a1,$word,($h1)=&NR(1)); &FR($a1);
38 &add($l0,$cc,$l0); ### wait 8
39 &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
40 &cmpult($l0,$cc,$cc); ### wait 8
41 &muh($a2,$word,($h2)=&NR(1)); &FR($a2);
42 &add($h0,$cc,$cc); &FR($h0); ### wait 8
43 &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
44 &add($l1,$cc,$l1); ### wait 8
45 &st($l0,&QWPw(0,$rp)); &FR($l0);
46 &cmpult($l1,$cc,$cc); ### wait 8
47 &muh($a3,$word,($h3)=&NR(1)); &FR($a3);
48 &add($h1,$cc,$cc); &FR($h1);
49 &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
50 &add($l2,$cc,$l2);
51 &st($l1,&QWPw(1,$rp)); &FR($l1);
52 &cmpult($l2,$cc,$cc);
53 &add($h2,$cc,$cc); &FR($h2);
54 &sub($count,4,$count); # count-=4
55 &st($l2,&QWPw(2,$rp)); &FR($l2);
56 &add($l3,$cc,$l3);
57 &cmpult($l3,$cc,$cc);
58 &add($bp,4*$QWS,$bp); # count+=4
59 &add($h3,$cc,$cc); &FR($h3);
60 &add($ap,4*$QWS,$ap); # count+=4
61 &st($l3,&QWPw(3,$rp)); &FR($l3);
62 &add($rp,4*$QWS,$rp); # count+=4
63 ###
64 &blt($count,&label("finish"));
65 ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
66 &br(&label("finish"));
67##################################################
68
69##################################################
70 # Do the last 0..3 words
71
72 &set_label("last_loop");
73
74 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
75 ###
76 ###
77 ###
78 &muh($a0,$word,($h0)=&NR(1));
79 ### Wait 8 for next mul issue
80 &mul($a0,$word,($l0)=&NR(1)); &FR($a0)
81 &add($ap,$QWS,$ap);
82 ### Loose 12 until result is available
83 &add($rp,$QWS,$rp);
84 &sub($count,1,$count);
85 &add($l0,$cc,$l0);
86 ###
87 &st($l0,&QWPw(-1,$rp)); &FR($l0);
88 &cmpult($l0,$cc,$cc);
89 &add($h0,$cc,$cc); &FR($h0);
90 &bgt($count,&label("last_loop"));
91 &function_end_A($name);
92
93######################################################
94 &set_label("finish");
95 &add($count,4,$count);
96 &bgt($count,&label("last_loop"));
97
98 &set_label("end");
99 &function_end($name);
100
101 &fin_pool;
102 }
103
1041;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl
new file mode 100644
index 0000000000..0d6df69bc4
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl
@@ -0,0 +1,123 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15 $word=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 ###
23 &blt($count,&label("finish"));
24
25 &ld(($a0)=&NR(1),&QWPw(0,$ap));
26
27$a=<<'EOF';
28##########################################################
29 &set_label("loop");
30
31 &ld(($r0)=&NR(1),&QWPw(0,$rp));
32 &ld(($a1)=&NR(1),&QWPw(1,$ap));
33 &muh($a0,$word,($h0)=&NR(1));
34 &ld(($r1)=&NR(1),&QWPw(1,$rp));
35 &ld(($a2)=&NR(1),&QWPw(2,$ap));
36 ###
37 &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
38 &ld(($r2)=&NR(1),&QWPw(2,$rp));
39 &muh($a1,$word,($h1)=&NR(1));
40 &ld(($a3)=&NR(1),&QWPw(3,$ap));
41 &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
42 &ld(($r3)=&NR(1),&QWPw(3,$rp));
43 &add($r0,$l0,$r0);
44 &add($r1,$l1,$r1);
45 &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
46 &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1);
47 &muh($a2,$word,($h2)=&NR(1));
48 &add($r0,$cc,$r0);
49 &add($h0,$t0,$h0); &FR($t0);
50 &cmpult($r0,$cc,$cc);
51 &add($h1,$t1,$h1); &FR($t1);
52 &add($h0,$cc,$cc); &FR($h0);
53 &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
54 &add($r1,$cc,$r1);
55 &cmpult($r1,$cc,$cc);
56 &add($r2,$l2,$r2);
57 &add($h1,$cc,$cc); &FR($h1);
58 &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2);
59 &muh($a3,$word,($h3)=&NR(1));
60 &add($r2,$cc,$r2);
61 &st($r0,&QWPw(0,$rp)); &FR($r0);
62 &add($h2,$t2,$h2); &FR($t2);
63 &st($r1,&QWPw(1,$rp)); &FR($r1);
64 &cmpult($r2,$cc,$cc);
65 &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
66 &add($h2,$cc,$cc); &FR($h2);
67 &st($r2,&QWPw(2,$rp)); &FR($r2);
68 &sub($count,4,$count); # count-=4
69 &add($rp,4*$QWS,$rp); # count+=4
70 &add($r3,$l3,$r3);
71 &add($ap,4*$QWS,$ap); # count+=4
72 &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3);
73 &add($r3,$cc,$r3);
74 &add($h3,$t3,$h3); &FR($t3);
75 &cmpult($r3,$cc,$cc);
76 &st($r3,&QWPw(-1,$rp)); &FR($r3);
77 &add($h3,$cc,$cc); &FR($h3);
78
79 ###
80 &blt($count,&label("finish"));
81 &ld(($a0)=&NR(1),&QWPw(0,$ap));
82 &br(&label("loop"));
83EOF
84##################################################
85 # Do the last 0..3 words
86
87 &set_label("last_loop");
88
89 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
90 &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
91 ###
92 ###
93 &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
94 ### wait 8
95 &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
96 &add($rp,$QWS,$rp);
97 &add($ap,$QWS,$ap);
98 &sub($count,1,$count);
99 ### wait 3 until l0 is available
100 &add($r0,$l0,$r0);
101 ###
102 &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
103 &add($r0,$cc,$r0);
104 &add($h0,$t0,$h0); &FR($t0);
105 &cmpult($r0,$cc,$cc);
106 &add($h0,$cc,$cc); &FR($h0);
107
108 &st($r0,&QWPw(-1,$rp)); &FR($r0);
109 &bgt($count,&label("last_loop"));
110 &function_end_A($name);
111
112######################################################
113 &set_label("finish");
114 &add($count,4,$count);
115 &bgt($count,&label("last_loop"));
116
117 &set_label("end");
118 &function_end($name);
119
120 &fin_pool;
121 }
122
1231;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl
new file mode 100644
index 0000000000..9cc876ded4
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl
@@ -0,0 +1,215 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4# upto
5
6sub mul_add_c
7 {
8 local($a,$b,$c0,$c1,$c2)=@_;
9 local($l1,$h1,$t1,$t2);
10
11 &mul($a,$b,($l1)=&NR(1));
12 &muh($a,$b,($h1)=&NR(1));
13 &add($c0,$l1,$c0);
14 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
15 &add($t1,$h1,$h1); &FR($t1);
16 &add($c1,$h1,$c1);
17 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
18 &add($c2,$t2,$c2); &FR($t2);
19 }
20
21sub bn_mul_comba4
22 {
23 local($name)=@_;
24 local(@a,@b,$r,$c0,$c1,$c2);
25
26 $cnt=1;
27 &init_pool(3);
28
29 $rp=&wparam(0);
30 $ap=&wparam(1);
31 $bp=&wparam(2);
32
33 &function_begin($name,"");
34
35 &comment("");
36
37 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
38 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
39 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
40 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
41 &mul($a[0],$b[0],($r00)=&NR(1));
42 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
43 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
44 &muh($a[0],$b[0],($r01)=&NR(1));
45 &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
46 &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
47 &mul($a[0],$b[1],($r02)=&NR(1));
48
49 ($R,$H1,$H2)=&NR(3);
50
51 &st($r00,&QWPw(0,$rp)); &FR($r00);
52
53 &mov("zero",$R);
54 &mul($a[1],$b[0],($r03)=&NR(1));
55
56 &mov("zero",$H1);
57 &mov("zero",$H0);
58 &add($R,$r01,$R);
59 &muh($a[0],$b[1],($r04)=&NR(1));
60 &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
61 &add($R,$r02,$R);
62 &add($H1,$t01,$H1) &FR($t01);
63 &muh($a[1],$b[0],($r05)=&NR(1));
64 &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
65 &add($R,$r03,$R);
66 &add($H2,$t02,$H2) &FR($t02);
67 &mul($a[0],$b[2],($r06)=&NR(1));
68 &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
69 &add($H1,$t03,$H1) &FR($t03);
70 &st($R,&QWPw(1,$rp));
71 &add($H1,$H2,$R);
72
73 &mov("zero",$H1);
74 &add($R,$r04,$R);
75 &mov("zero",$H2);
76 &mul($a[1],$b[1],($r07)=&NR(1));
77 &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
78 &add($R,$r05,$R);
79 &add($H1,$t04,$H1) &FR($t04);
80 &mul($a[2],$b[0],($r08)=&NR(1));
81 &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
82 &add($R,$r01,$R);
83 &add($H2,$t05,$H2) &FR($t05);
84 &muh($a[0],$b[2],($r09)=&NR(1));
85 &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
86 &add($R,$r07,$R);
87 &add($H1,$t06,$H1) &FR($t06);
88 &muh($a[1],$b[1],($r10)=&NR(1));
89 &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
90 &add($R,$r08,$R);
91 &add($H2,$t07,$H2) &FR($t07);
92 &muh($a[2],$b[0],($r11)=&NR(1));
93 &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
94 &add($H1,$t08,$H1) &FR($t08);
95 &st($R,&QWPw(2,$rp));
96 &add($H1,$H2,$R);
97
98 &mov("zero",$H1);
99 &add($R,$r09,$R);
100 &mov("zero",$H2);
101 &mul($a[0],$b[3],($r12)=&NR(1));
102 &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
103 &add($R,$r10,$R);
104 &add($H1,$t09,$H1) &FR($t09);
105 &mul($a[1],$b[2],($r13)=&NR(1));
106 &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
107 &add($R,$r11,$R);
108 &add($H1,$t10,$H1) &FR($t10);
109 &mul($a[2],$b[1],($r14)=&NR(1));
110 &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
111 &add($R,$r12,$R);
112 &add($H1,$t11,$H1) &FR($t11);
113 &mul($a[3],$b[0],($r15)=&NR(1));
114 &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
115 &add($R,$r13,$R);
116 &add($H1,$t12,$H1) &FR($t12);
117 &muh($a[0],$b[3],($r16)=&NR(1));
118 &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
119 &add($R,$r14,$R);
120 &add($H1,$t13,$H1) &FR($t13);
121 &muh($a[1],$b[2],($r17)=&NR(1));
122 &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
123 &add($R,$r15,$R);
124 &add($H1,$t14,$H1) &FR($t14);
125 &muh($a[2],$b[1],($r18)=&NR(1));
126 &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
127 &add($H1,$t15,$H1) &FR($t15);
128 &st($R,&QWPw(3,$rp));
129 &add($H1,$H2,$R);
130
131 &mov("zero",$H1);
132 &add($R,$r16,$R);
133 &mov("zero",$H2);
134 &muh($a[3],$b[0],($r19)=&NR(1));
135 &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
136 &add($R,$r17,$R);
137 &add($H1,$t16,$H1) &FR($t16);
138 &mul($a[1],$b[3],($r20)=&NR(1));
139 &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
140 &add($R,$r18,$R);
141 &add($H1,$t17,$H1) &FR($t17);
142 &mul($a[2],$b[2],($r21)=&NR(1));
143 &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
144 &add($R,$r19,$R);
145 &add($H1,$t18,$H1) &FR($t18);
146 &mul($a[3],$b[1],($r22)=&NR(1));
147 &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
148 &add($R,$r20,$R);
149 &add($H1,$t19,$H1) &FR($t19);
150 &muh($a[1],$b[3],($r23)=&NR(1));
151 &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
152 &add($R,$r21,$R);
153 &add($H1,$t20,$H1) &FR($t20);
154 &muh($a[2],$b[2],($r24)=&NR(1));
155 &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
156 &add($R,$r22,$R);
157 &add($H1,$t21,$H1) &FR($t21);
158 &muh($a[3],$b[1],($r25)=&NR(1));
159 &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
160 &add($H1,$t22,$H1) &FR($t22);
161 &st($R,&QWPw(4,$rp));
162 &add($H1,$H2,$R);
163
164 &mov("zero",$H1);
165 &add($R,$r23,$R);
166 &mov("zero",$H2);
167 &mul($a[2],$b[3],($r26)=&NR(1));
168 &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
169 &add($R,$r24,$R);
170 &add($H1,$t23,$H1) &FR($t23);
171 &mul($a[3],$b[2],($r27)=&NR(1));
172 &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
173 &add($R,$r25,$R);
174 &add($H1,$t24,$H1) &FR($t24);
175 &muh($a[2],$b[3],($r28)=&NR(1));
176 &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
177 &add($R,$r26,$R);
178 &add($H1,$t25,$H1) &FR($t25);
179 &muh($a[3],$b[2],($r29)=&NR(1));
180 &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
181 &add($R,$r27,$R);
182 &add($H1,$t26,$H1) &FR($t26);
183 &mul($a[3],$b[3],($r30)=&NR(1));
184 &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
185 &add($H1,$t27,$H1) &FR($t27);
186 &st($R,&QWPw(5,$rp));
187 &add($H1,$H2,$R);
188
189 &mov("zero",$H1);
190 &add($R,$r28,$R);
191 &mov("zero",$H2);
192 &muh($a[3],$b[3],($r31)=&NR(1));
193 &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
194 &add($R,$r29,$R);
195 &add($H1,$t28,$H1) &FR($t28);
196 ############
197 &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
198 &add($R,$r30,$R);
199 &add($H1,$t29,$H1) &FR($t29);
200 ############
201 &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
202 &add($H1,$t30,$H1) &FR($t30);
203 &st($R,&QWPw(6,$rp));
204 &add($H1,$H2,$R);
205
206 &add($R,$r31,$R); &FR($r31);
207 &st($R,&QWPw(7,$rp));
208
209 &FR($R,$H1,$H2);
210 &function_end($name);
211
212 &fin_pool;
213 }
214
2151;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl
new file mode 100644
index 0000000000..79d86dd25c
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl
@@ -0,0 +1,98 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub mul_add_c
5 {
6 local($a,$b,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9print STDERR "count=$cnt\n"; $cnt++;
10 &mul($a,$b,($l1)=&NR(1));
11 &muh($a,$b,($h1)=&NR(1));
12 &add($c0,$l1,$c0);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &add($t1,$h1,$h1); &FR($t1);
15 &add($c1,$h1,$c1);
16 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
17 &add($c2,$t2,$c2); &FR($t2);
18 }
19
20sub bn_mul_comba4
21 {
22 local($name)=@_;
23 local(@a,@b,$r,$c0,$c1,$c2);
24
25 $cnt=1;
26 &init_pool(3);
27
28 $rp=&wparam(0);
29 $ap=&wparam(1);
30 $bp=&wparam(2);
31
32 &function_begin($name,"");
33
34 &comment("");
35
36 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
37 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
38 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
39 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
40 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
41 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
42 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
43 &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
44
45 ($c0,$c1,$c2)=&NR(3);
46 &mov("zero",$c2);
47 &mul($a[0],$b[0],$c0);
48 &muh($a[0],$b[0],$c1);
49 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
50 ($c0,$c1,$c2)=($c1,$c2,$c0);
51 &mov("zero",$c2);
52
53 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
54 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
55 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
56 ($c0,$c1,$c2)=($c1,$c2,$c0);
57 &mov("zero",$c2);
58
59 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
60 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
61 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
62 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
63 ($c0,$c1,$c2)=($c1,$c2,$c0);
64 &mov("zero",$c2);
65
66 &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
67 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
68 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
69 &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
70 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
71 ($c0,$c1,$c2)=($c1,$c2,$c0);
72 &mov("zero",$c2);
73
74 &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
75 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
76 &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
77 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
78 ($c0,$c1,$c2)=($c1,$c2,$c0);
79 &mov("zero",$c2);
80
81 &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
82 &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
83 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
84 ($c0,$c1,$c2)=($c1,$c2,$c0);
85 &mov("zero",$c2);
86
87 &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
88 &st($c0,&QWPw(6,$rp));
89 &st($c1,&QWPw(7,$rp));
90
91 &FR($c0,$c1,$c2);
92
93 &function_end($name);
94
95 &fin_pool;
96 }
97
981;
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl
new file mode 100644
index 0000000000..525ca7494b
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl
@@ -0,0 +1,177 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(3);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19
20 &stack_push(2);
21 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
22 &ld(($b[0])=&NR(1),&QWPw(0,$bp));
23 &st($reg_s0,&swtmp(0)); &FR($reg_s0);
24 &st($reg_s1,&swtmp(1)); &FR($reg_s1);
25 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
26 &ld(($b[1])=&NR(1),&QWPw(1,$bp));
27 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
28 &ld(($b[2])=&NR(1),&QWPw(2,$bp));
29 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
30 &ld(($b[3])=&NR(1),&QWPw(3,$bp));
31 &ld(($a[4])=&NR(1),&QWPw(1,$ap));
32 &ld(($b[4])=&NR(1),&QWPw(1,$bp));
33 &ld(($a[5])=&NR(1),&QWPw(1,$ap));
34 &ld(($b[5])=&NR(1),&QWPw(1,$bp));
35 &ld(($a[6])=&NR(1),&QWPw(1,$ap));
36 &ld(($b[6])=&NR(1),&QWPw(1,$bp));
37 &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
38 &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
39
40 ($c0,$c1,$c2)=&NR(3);
41 &mov("zero",$c2);
42 &mul($a[0],$b[0],$c0);
43 &muh($a[0],$b[0],$c1);
44 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
49 &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
55 &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
56 &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
62 &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
63 &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
64 &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
65 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
66 ($c0,$c1,$c2)=($c1,$c2,$c0);
67 &mov("zero",$c2);
68
69 &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
70 &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
71 &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
72 &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
73 &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
74 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
75 ($c0,$c1,$c2)=($c1,$c2,$c0);
76 &mov("zero",$c2);
77
78 &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
79 &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
80 &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
81 &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
82 &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
83 &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
84 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
85 ($c0,$c1,$c2)=($c1,$c2,$c0);
86 &mov("zero",$c2);
87
88 &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
89 &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
90 &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
91 &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
92 &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
93 &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
94 &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
95 &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
100 &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
101 &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
102 &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
103 &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
104 &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
105 &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
106 &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
107 &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
108 ($c0,$c1,$c2)=($c1,$c2,$c0);
109 &mov("zero",$c2);
110
111 &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
112 &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
113 &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
114 &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
115 &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
116 &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
117 &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
118 &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
119 ($c0,$c1,$c2)=($c1,$c2,$c0);
120 &mov("zero",$c2);
121
122 &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
123 &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
124 &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
125 &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
126 &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
127 &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
128 &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
129 ($c0,$c1,$c2)=($c1,$c2,$c0);
130 &mov("zero",$c2);
131
132 &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
133 &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
134 &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
135 &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
136 &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
137 &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
138 ($c0,$c1,$c2)=($c1,$c2,$c0);
139 &mov("zero",$c2);
140
141 &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
142 &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
143 &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
144 &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
145 &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
146 ($c0,$c1,$c2)=($c1,$c2,$c0);
147 &mov("zero",$c2);
148
149 &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
150 &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
151 &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
152 &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
153 ($c0,$c1,$c2)=($c1,$c2,$c0);
154 &mov("zero",$c2);
155
156 &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
157 &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
158 &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
159 ($c0,$c1,$c2)=($c1,$c2,$c0);
160 &mov("zero",$c2);
161
162 &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
163 &st($c0,&QWPw(14,$rp));
164 &st($c1,&QWPw(15,$rp));
165
166 &FR($c0,$c1,$c2);
167
168 &ld($reg_s0,&swtmp(0));
169 &ld($reg_s1,&swtmp(1));
170 &stack_pop(2);
171
172 &function_end($name);
173
174 &fin_pool;
175 }
176
1771;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl
new file mode 100644
index 0000000000..a55b696906
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sqr.pl
@@ -0,0 +1,113 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r,$couny);
8
9 &init_pool(3);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $count=&wparam(2);
15
16 &function_begin($name,"");
17
18 &comment("");
19 &sub($count,4,$count);
20 &mov("zero",$cc);
21 &br(&label("finish"));
22 &blt($count,&label("finish"));
23
24 ($a0,$r0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($r0,&QWPw(0,$rp));
27
28$a=<<'EOF';
29##########################################################
30 &set_label("loop");
31
32 ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
33 ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
34 ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
35 ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
36 ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
37 ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
38
39 ($o0,$t0)=&NR(2);
40 &add($a0,$b0,$o0);
41 &cmpult($o0,$b0,$t0);
42 &add($o0,$cc,$o0);
43 &cmpult($o0,$cc,$cc);
44 &add($cc,$t0,$cc); &FR($t0);
45
46 ($t1,$o1)=&NR(2);
47
48 &add($a1,$b1,$o1); &FR($a1);
49 &cmpult($o1,$b1,$t1); &FR($b1);
50 &add($o1,$cc,$o1);
51 &cmpult($o1,$cc,$cc);
52 &add($cc,$t1,$cc); &FR($t1);
53
54 ($t2,$o2)=&NR(2);
55
56 &add($a2,$b2,$o2); &FR($a2);
57 &cmpult($o2,$b2,$t2); &FR($b2);
58 &add($o2,$cc,$o2);
59 &cmpult($o2,$cc,$cc);
60 &add($cc,$t2,$cc); &FR($t2);
61
62 ($t3,$o3)=&NR(2);
63
64 &add($a3,$b3,$o3); &FR($a3);
65 &cmpult($o3,$b3,$t3); &FR($b3);
66 &add($o3,$cc,$o3);
67 &cmpult($o3,$cc,$cc);
68 &add($cc,$t3,$cc); &FR($t3);
69
70 &st($o0,&QWPw(0,$rp)); &FR($o0);
71 &st($o1,&QWPw(0,$rp)); &FR($o1);
72 &st($o2,&QWPw(0,$rp)); &FR($o2);
73 &st($o3,&QWPw(0,$rp)); &FR($o3);
74
75 &sub($count,4,$count); # count-=4
76 &add($ap,4*$QWS,$ap); # count+=4
77 &add($bp,4*$QWS,$bp); # count+=4
78 &add($rp,4*$QWS,$rp); # count+=4
79
80 &blt($count,&label("finish"));
81 &ld($a0,&QWPw(0,$ap));
82 &ld($b0,&QWPw(0,$bp));
83 &br(&label("loop"));
84EOF
85##################################################
86 # Do the last 0..3 words
87
88 &set_label("last_loop");
89
90 &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
91 &mul($a0,$a0,($l0)=&NR(1));
92 &add($ap,$QWS,$ap);
93 &add($rp,2*$QWS,$rp);
94 &sub($count,1,$count);
95 &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
96 &st($l0,&QWPw(-2,$rp)); &FR($l0);
97 &st($h0,&QWPw(-1,$rp)); &FR($h0);
98
99 &bgt($count,&label("last_loop"));
100 &function_end_A($name);
101
102######################################################
103 &set_label("finish");
104 &add($count,4,$count);
105 &bgt($count,&label("last_loop"));
106
107 &set_label("end");
108 &function_end($name);
109
110 &fin_pool;
111 }
112
1131;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl
new file mode 100644
index 0000000000..bf33f5b503
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl
@@ -0,0 +1,109 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub sqr_add_c
5 {
6 local($a,$c0,$c1,$c2)=@_;
7 local($l1,$h1,$t1,$t2);
8
9 &mul($a,$a,($l1)=&NR(1));
10 &muh($a,$a,($h1)=&NR(1));
11 &add($c0,$l1,$c0);
12 &add($c1,$h1,$c1);
13 &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
14 &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
15 &add($c1,$t1,$c1); &FR($t1);
16 &add($c2,$t2,$c2); &FR($t2);
17 }
18
19sub sqr_add_c2
20 {
21 local($a,$b,$c0,$c1,$c2)=@_;
22 local($l1,$h1,$t1,$t2);
23
24 &mul($a,$b,($l1)=&NR(1));
25 &muh($a,$b,($h1)=&NR(1));
26 &cmplt($l1,"zero",($lc1)=&NR(1));
27 &cmplt($h1,"zero",($hc1)=&NR(1));
28 &add($l1,$l1,$l1);
29 &add($h1,$h1,$h1);
30 &add($h1,$lc1,$h1); &FR($lc1);
31 &add($c2,$hc1,$c2); &FR($hc1);
32
33 &add($c0,$l1,$c0);
34 &add($c1,$h1,$c1);
35 &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
36 &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
37
38 &add($c1,$lc1,$c1); &FR($lc1);
39 &add($c2,$hc1,$c2); &FR($hc1);
40 }
41
42
43sub bn_sqr_comba4
44 {
45 local($name)=@_;
46 local(@a,@b,$r,$c0,$c1,$c2);
47
48 $cnt=1;
49 &init_pool(2);
50
51 $rp=&wparam(0);
52 $ap=&wparam(1);
53
54 &function_begin($name,"");
55
56 &comment("");
57
58 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
59 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
60 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
61 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
62
63 ($c0,$c1,$c2)=&NR(3);
64
65 &mov("zero",$c2);
66 &mul($a[0],$a[0],$c0);
67 &muh($a[0],$a[0],$c1);
68 &st($c0,&QWPw(0,$rp));
69 ($c0,$c1,$c2)=($c1,$c2,$c0);
70 &mov("zero",$c2);
71
72 &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
73 &st($c0,&QWPw(1,$rp));
74 ($c0,$c1,$c2)=($c1,$c2,$c0);
75 &mov("zero",$c2);
76
77 &sqr_add_c($a[1],$c0,$c1,$c2);
78 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
79 &st($c0,&QWPw(2,$rp));
80 ($c0,$c1,$c2)=($c1,$c2,$c0);
81 &mov("zero",$c2);
82
83 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
84 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
85 &st($c0,&QWPw(3,$rp));
86 ($c0,$c1,$c2)=($c1,$c2,$c0);
87 &mov("zero",$c2);
88
89 &sqr_add_c($a[2],$c0,$c1,$c2);
90 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
91 &st($c0,&QWPw(4,$rp));
92 ($c0,$c1,$c2)=($c1,$c2,$c0);
93 &mov("zero",$c2);
94
95 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
96 &st($c0,&QWPw(5,$rp));
97 ($c0,$c1,$c2)=($c1,$c2,$c0);
98 &mov("zero",$c2);
99
100 &sqr_add_c($a[3],$c0,$c1,$c2);
101 &st($c0,&QWPw(6,$rp));
102 &st($c1,&QWPw(7,$rp));
103
104 &function_end($name);
105
106 &fin_pool;
107 }
108
1091;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl
new file mode 100644
index 0000000000..b4afe085f1
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl
@@ -0,0 +1,132 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sqr_comba8
5 {
6 local($name)=@_;
7 local(@a,@b,$r,$c0,$c1,$c2);
8
9 $cnt=1;
10 &init_pool(2);
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14
15 &function_begin($name,"");
16
17 &comment("");
18
19 &ld(($a[0])=&NR(1),&QWPw(0,$ap));
20 &ld(($a[1])=&NR(1),&QWPw(1,$ap));
21 &ld(($a[2])=&NR(1),&QWPw(2,$ap));
22 &ld(($a[3])=&NR(1),&QWPw(3,$ap));
23 &ld(($a[4])=&NR(1),&QWPw(4,$ap));
24 &ld(($a[5])=&NR(1),&QWPw(5,$ap));
25 &ld(($a[6])=&NR(1),&QWPw(6,$ap));
26 &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
27
28 ($c0,$c1,$c2)=&NR(3);
29
30 &mov("zero",$c2);
31 &mul($a[0],$a[0],$c0);
32 &muh($a[0],$a[0],$c1);
33 &st($c0,&QWPw(0,$rp));
34 ($c0,$c1,$c2)=($c1,$c2,$c0);
35 &mov("zero",$c2);
36
37 &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
38 &st($c0,&QWPw(1,$rp));
39 ($c0,$c1,$c2)=($c1,$c2,$c0);
40 &mov("zero",$c2);
41
42 &sqr_add_c($a[1],$c0,$c1,$c2);
43 &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
44 &st($c0,&QWPw(2,$rp));
45 ($c0,$c1,$c2)=($c1,$c2,$c0);
46 &mov("zero",$c2);
47
48 &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
49 &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
50 &st($c0,&QWPw(3,$rp));
51 ($c0,$c1,$c2)=($c1,$c2,$c0);
52 &mov("zero",$c2);
53
54 &sqr_add_c($a[2],$c0,$c1,$c2);
55 &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
56 &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
57 &st($c0,&QWPw(4,$rp));
58 ($c0,$c1,$c2)=($c1,$c2,$c0);
59 &mov("zero",$c2);
60
61 &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
62 &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
63 &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
64 &st($c0,&QWPw(5,$rp));
65 ($c0,$c1,$c2)=($c1,$c2,$c0);
66 &mov("zero",$c2);
67
68 &sqr_add_c($a[3],$c0,$c1,$c2);
69 &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
70 &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
71 &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
72 &st($c0,&QWPw(6,$rp));
73 ($c0,$c1,$c2)=($c1,$c2,$c0);
74 &mov("zero",$c2);
75
76 &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
77 &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
78 &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
79 &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
80 &st($c0,&QWPw(7,$rp));
81 ($c0,$c1,$c2)=($c1,$c2,$c0);
82 &mov("zero",$c2);
83
84 &sqr_add_c($a[4],$c0,$c1,$c2);
85 &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
86 &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
87 &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
88 &st($c0,&QWPw(8,$rp));
89 ($c0,$c1,$c2)=($c1,$c2,$c0);
90 &mov("zero",$c2);
91
92 &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
93 &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
94 &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
95 &st($c0,&QWPw(9,$rp));
96 ($c0,$c1,$c2)=($c1,$c2,$c0);
97 &mov("zero",$c2);
98
99 &sqr_add_c($a[5],$c0,$c1,$c2);
100 &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
101 &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
102 &st($c0,&QWPw(10,$rp));
103 ($c0,$c1,$c2)=($c1,$c2,$c0);
104 &mov("zero",$c2);
105
106 &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
107 &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
108 &st($c0,&QWPw(11,$rp));
109 ($c0,$c1,$c2)=($c1,$c2,$c0);
110 &mov("zero",$c2);
111
112 &sqr_add_c($a[6],$c0,$c1,$c2);
113 &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
114 &st($c0,&QWPw(12,$rp));
115 ($c0,$c1,$c2)=($c1,$c2,$c0);
116 &mov("zero",$c2);
117
118 &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
119 &st($c0,&QWPw(13,$rp));
120 ($c0,$c1,$c2)=($c1,$c2,$c0);
121 &mov("zero",$c2);
122
123 &sqr_add_c($a[7],$c0,$c1,$c2);
124 &st($c0,&QWPw(14,$rp));
125 &st($c1,&QWPw(15,$rp));
126
127 &function_end($name);
128
129 &fin_pool;
130 }
131
1321;
diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl
new file mode 100644
index 0000000000..d998da5c21
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/alpha/sub.pl
@@ -0,0 +1,108 @@
1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7 local($cc,$a,$b,$r);
8
9 &init_pool(4);
10 ($cc)=GR("r0");
11
12 $rp=&wparam(0);
13 $ap=&wparam(1);
14 $bp=&wparam(2);
15 $count=&wparam(3);
16
17 &function_begin($name,"");
18
19 &comment("");
20 &sub($count,4,$count);
21 &mov("zero",$cc);
22 &blt($count,&label("finish"));
23
24 ($a0,$b0)=&NR(2);
25 &ld($a0,&QWPw(0,$ap));
26 &ld($b0,&QWPw(0,$bp));
27
28##########################################################
29 &set_label("loop");
30
31 ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
32 &ld($a1,&QWPw(1,$ap));
33 &cmpult($a0,$b0,$tmp); # will we borrow?
34 &ld($b1,&QWPw(1,$bp));
35 &sub($a0,$b0,$a0); # do the subtract
36 &ld($a2,&QWPw(2,$ap));
37 &cmpult($a0,$cc,$b0); # will we borrow?
38 &ld($b2,&QWPw(2,$bp));
39 &sub($a0,$cc,$o0); # will we borrow?
40 &ld($a3,&QWPw(3,$ap));
41 &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
42
43 &cmpult($a1,$b1,$t1); # will we borrow?
44 &sub($a1,$b1,$a1); # do the subtract
45 &ld($b3,&QWPw(3,$bp));
46 &cmpult($a1,$cc,$b1); # will we borrow?
47 &sub($a1,$cc,$o1); # will we borrow?
48 &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
49
50 &cmpult($a2,$b2,$tmp); # will we borrow?
51 &sub($a2,$b2,$a2); # do the subtract
52 &st($o0,&QWPw(0,$rp)); &FR($o0); # save
53 &cmpult($a2,$cc,$b2); # will we borrow?
54 &sub($a2,$cc,$o2); # will we borrow?
55 &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
56
57 &cmpult($a3,$b3,$t3); # will we borrow?
58 &sub($a3,$b3,$a3); # do the subtract
59 &st($o1,&QWPw(1,$rp)); &FR($o1);
60 &cmpult($a3,$cc,$b3); # will we borrow?
61 &sub($a3,$cc,$o3); # will we borrow?
62 &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
63
64 &st($o2,&QWPw(2,$rp)); &FR($o2);
65 &sub($count,4,$count); # count-=4
66 &st($o3,&QWPw(3,$rp)); &FR($o3);
67 &add($ap,4*$QWS,$ap); # count+=4
68 &add($bp,4*$QWS,$bp); # count+=4
69 &add($rp,4*$QWS,$rp); # count+=4
70
71 &blt($count,&label("finish"));
72 &ld($a0,&QWPw(0,$ap));
73 &ld($b0,&QWPw(0,$bp));
74 &br(&label("loop"));
75##################################################
76 # Do the last 0..3 words
77
78 &set_label("last_loop");
79
80 &ld($a0,&QWPw(0,$ap)); # get a
81 &ld($b0,&QWPw(0,$bp)); # get b
82 &cmpult($a0,$b0,$tmp); # will we borrow?
83 &sub($a0,$b0,$a0); # do the subtract
84 &cmpult($a0,$cc,$b0); # will we borrow?
85 &sub($a0,$cc,$a0); # will we borrow?
86 &st($a0,&QWPw(0,$rp)); # save
87 &add($b0,$tmp,$cc); # add the borrows
88
89 &add($ap,$QWS,$ap);
90 &add($bp,$QWS,$bp);
91 &add($rp,$QWS,$rp);
92 &sub($count,1,$count);
93 &bgt($count,&label("last_loop"));
94 &function_end_A($name);
95
96######################################################
97 &set_label("finish");
98 &add($count,4,$count);
99 &bgt($count,&label("last_loop"));
100
101 &FR($a0,$b0);
102 &set_label("end");
103 &function_end($name);
104
105 &fin_pool;
106 }
107
1081;
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl
new file mode 100644
index 0000000000..0b5cf583e3
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/add.pl
@@ -0,0 +1,76 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &add($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &add($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &add($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &add($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
new file mode 100644
index 0000000000..2291253629
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/comba.pl
@@ -0,0 +1,277 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub mul_add_c
5 {
6 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7
8 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
9 # words, and 1 if load return value
10
11 &comment("mul a[$ai]*b[$bi]");
12
13 # "eax" and "edx" will always be pre-loaded.
14 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
15 # &mov("edx",&DWP($bi*4,$b,"",0));
16
17 &mul("edx");
18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ###
22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
25 ###
26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
30 }
31
32sub sqr_add_c
33 {
34 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
35
36 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
37 # words, and 1 if load return value
38
39 &comment("sqr a[$ai]*a[$bi]");
40
41 # "eax" and "edx" will always be pre-loaded.
42 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
43 # &mov("edx",&DWP($bi*4,$b,"",0));
44
45 if ($ai == $bi)
46 { &mul("eax");}
47 else
48 { &mul("edx");}
49 &add($c0,"eax");
50 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
51 ###
52 &adc($c1,"edx");
53 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
54 ###
55 &adc($c2,0);
56 # is pos > 1, it means it is the last loop
57 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
58 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
59 }
60
61sub sqr_add_c2
62 {
63 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
64
65 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
66 # words, and 1 if load return value
67
68 &comment("sqr a[$ai]*a[$bi]");
69
70 # "eax" and "edx" will always be pre-loaded.
71 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
72 # &mov("edx",&DWP($bi*4,$a,"",0));
73
74 if ($ai == $bi)
75 { &mul("eax");}
76 else
77 { &mul("edx");}
78 &add("eax","eax");
79 ###
80 &adc("edx","edx");
81 ###
82 &adc($c2,0);
83 &add($c0,"eax");
84 &adc($c1,"edx");
85 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
86 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
87 &adc($c2,0);
88 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
89 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
90 ###
91 }
92
93sub bn_mul_comba
94 {
95 local($name,$num)=@_;
96 local($a,$b,$c0,$c1,$c2);
97 local($i,$as,$ae,$bs,$be,$ai,$bi);
98 local($tot,$end);
99
100 &function_begin_B($name,"");
101
102 $c0="ebx";
103 $c1="ecx";
104 $c2="ebp";
105 $a="esi";
106 $b="edi";
107
108 $as=0;
109 $ae=0;
110 $bs=0;
111 $be=0;
112 $tot=$num+$num-1;
113
114 &push("esi");
115 &mov($a,&wparam(1));
116 &push("edi");
117 &mov($b,&wparam(2));
118 &push("ebp");
119 &push("ebx");
120
121 &xor($c0,$c0);
122 &mov("eax",&DWP(0,$a,"",0)); # load the first word
123 &xor($c1,$c1);
124 &mov("edx",&DWP(0,$b,"",0)); # load the first second
125
126 for ($i=0; $i<$tot; $i++)
127 {
128 $ai=$as;
129 $bi=$bs;
130 $end=$be+1;
131
132 &comment("################## Calculate word $i");
133
134 for ($j=$bs; $j<$end; $j++)
135 {
136 &xor($c2,$c2) if ($j == $bs);
137 if (($j+1) == $end)
138 {
139 $v=1;
140 $v=2 if (($i+1) == $tot);
141 }
142 else
143 { $v=0; }
144 if (($j+1) != $end)
145 {
146 $na=($ai-1);
147 $nb=($bi+1);
148 }
149 else
150 {
151 $na=$as+($i < ($num-1));
152 $nb=$bs+($i >= ($num-1));
153 }
154#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
155 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
156 if ($v)
157 {
158 &comment("saved r[$i]");
159 # &mov("eax",&wparam(0));
160 # &mov(&DWP($i*4,"eax","",0),$c0);
161 ($c0,$c1,$c2)=($c1,$c2,$c0);
162 }
163 $ai--;
164 $bi++;
165 }
166 $as++ if ($i < ($num-1));
167 $ae++ if ($i >= ($num-1));
168
169 $bs++ if ($i >= ($num-1));
170 $be++ if ($i < ($num-1));
171 }
172 &comment("save r[$i]");
173 # &mov("eax",&wparam(0));
174 &mov(&DWP($i*4,"eax","",0),$c0);
175
176 &pop("ebx");
177 &pop("ebp");
178 &pop("edi");
179 &pop("esi");
180 &ret();
181 &function_end_B($name);
182 }
183
184sub bn_sqr_comba
185 {
186 local($name,$num)=@_;
187 local($r,$a,$c0,$c1,$c2)=@_;
188 local($i,$as,$ae,$bs,$be,$ai,$bi);
189 local($b,$tot,$end,$half);
190
191 &function_begin_B($name,"");
192
193 $c0="ebx";
194 $c1="ecx";
195 $c2="ebp";
196 $a="esi";
197 $r="edi";
198
199 &push("esi");
200 &push("edi");
201 &push("ebp");
202 &push("ebx");
203 &mov($r,&wparam(0));
204 &mov($a,&wparam(1));
205 &xor($c0,$c0);
206 &xor($c1,$c1);
207 &mov("eax",&DWP(0,$a,"",0)); # load the first word
208
209 $as=0;
210 $ae=0;
211 $bs=0;
212 $be=0;
213 $tot=$num+$num-1;
214
215 for ($i=0; $i<$tot; $i++)
216 {
217 $ai=$as;
218 $bi=$bs;
219 $end=$be+1;
220
221 &comment("############### Calculate word $i");
222 for ($j=$bs; $j<$end; $j++)
223 {
224 &xor($c2,$c2) if ($j == $bs);
225 if (($ai-1) < ($bi+1))
226 {
227 $v=1;
228 $v=2 if ($i+1) == $tot;
229 }
230 else
231 { $v=0; }
232 if (!$v)
233 {
234 $na=$ai-1;
235 $nb=$bi+1;
236 }
237 else
238 {
239 $na=$as+($i < ($num-1));
240 $nb=$bs+($i >= ($num-1));
241 }
242 if ($ai == $bi)
243 {
244 &sqr_add_c($r,$a,$ai,$bi,
245 $c0,$c1,$c2,$v,$i,$na,$nb);
246 }
247 else
248 {
249 &sqr_add_c2($r,$a,$ai,$bi,
250 $c0,$c1,$c2,$v,$i,$na,$nb);
251 }
252 if ($v)
253 {
254 &comment("saved r[$i]");
255 #&mov(&DWP($i*4,$r,"",0),$c0);
256 ($c0,$c1,$c2)=($c1,$c2,$c0);
257 last;
258 }
259 $ai--;
260 $bi++;
261 }
262 $as++ if ($i < ($num-1));
263 $ae++ if ($i >= ($num-1));
264
265 $bs++ if ($i >= ($num-1));
266 $be++ if ($i < ($num-1));
267 }
268 &mov(&DWP($i*4,$r,"",0),$c0);
269 &pop("ebx");
270 &pop("ebp");
271 &pop("edi");
272 &pop("esi");
273 &ret();
274 &function_end_B($name);
275 }
276
2771;
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl
new file mode 100644
index 0000000000..0e90152caa
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/div.pl
@@ -0,0 +1,15 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_div_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9 &mov("edx",&wparam(0)); #
10 &mov("eax",&wparam(1)); #
11 &mov("ebx",&wparam(2)); #
12 &div("ebx");
13 &function_end($name);
14 }
151;
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f
new file mode 100644
index 0000000000..22e4112224
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/f
@@ -0,0 +1,3 @@
1#!/usr/local/bin/perl
2# x86 assember
3
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl
new file mode 100644
index 0000000000..674cb9b055
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/mul.pl
@@ -0,0 +1,77 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ecx";
15 $r="edi";
16 $c="esi";
17 $num="ebp";
18
19 &xor($c,$c); # clear carry
20 &mov($r,&wparam(0)); #
21 &mov($a,&wparam(1)); #
22 &mov($num,&wparam(2)); #
23 &mov($w,&wparam(3)); #
24
25 &and($num,0xfffffff8); # num / 8
26 &jz(&label("mw_finish"));
27
28 &set_label("mw_loop",0);
29 for ($i=0; $i<32; $i+=4)
30 {
31 &comment("Round $i");
32
33 &mov("eax",&DWP($i,$a,"",0)); # *a
34 &mul($w); # *a * w
35 &add("eax",$c); # L(t)+=c
36 # XXX
37
38 &adc("edx",0); # H(t)+=carry
39 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
40
41 &mov($c,"edx"); # c= H(t);
42 }
43
44 &comment("");
45 &add($a,32);
46 &add($r,32);
47 &sub($num,8);
48 &jz(&label("mw_finish"));
49 &jmp(&label("mw_loop"));
50
51 &set_label("mw_finish",0);
52 &mov($num,&wparam(2)); # get num
53 &and($num,7);
54 &jnz(&label("mw_finish2"));
55 &jmp(&label("mw_end"));
56
57 &set_label("mw_finish2",1);
58 for ($i=0; $i<7; $i++)
59 {
60 &comment("Tail Round $i");
61 &mov("eax",&DWP($i*4,$a,"",0));# *a
62 &mul($w); # *a * w
63 &add("eax",$c); # L(t)+=c
64 # XXX
65 &adc("edx",0); # H(t)+=carry
66 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
67 &mov($c,"edx"); # c= H(t);
68 &dec($num) if ($i != 7-1);
69 &jz(&label("mw_end")) if ($i != 7-1);
70 }
71 &set_label("mw_end",0);
72 &mov("eax",$c);
73
74 &function_end($name);
75 }
76
771;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
new file mode 100644
index 0000000000..61830d3a90
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
@@ -0,0 +1,87 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ebp";
15 $r="edi";
16 $c="esi";
17
18 &xor($c,$c); # clear carry
19 &mov($r,&wparam(0)); #
20
21 &mov("ecx",&wparam(2)); #
22 &mov($a,&wparam(1)); #
23
24 &and("ecx",0xfffffff8); # num / 8
25 &mov($w,&wparam(3)); #
26
27 &push("ecx"); # Up the stack for a tmp variable
28
29 &jz(&label("maw_finish"));
30
31 &set_label("maw_loop",0);
32
33 &mov(&swtmp(0),"ecx"); #
34
35 for ($i=0; $i<32; $i+=4)
36 {
37 &comment("Round $i");
38
39 &mov("eax",&DWP($i,$a,"",0)); # *a
40 &mul($w); # *a * w
41 &add("eax",$c); # L(t)+= *r
42 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
43 &adc("edx",0); # H(t)+=carry
44 &add("eax",$c); # L(t)+=c
45 &adc("edx",0); # H(t)+=carry
46 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
47 &mov($c,"edx"); # c= H(t);
48 }
49
50 &comment("");
51 &mov("ecx",&swtmp(0)); #
52 &add($a,32);
53 &add($r,32);
54 &sub("ecx",8);
55 &jnz(&label("maw_loop"));
56
57 &set_label("maw_finish",0);
58 &mov("ecx",&wparam(2)); # get num
59 &and("ecx",7);
60 &jnz(&label("maw_finish2")); # helps branch prediction
61 &jmp(&label("maw_end"));
62
63 &set_label("maw_finish2",1);
64 for ($i=0; $i<7; $i++)
65 {
66 &comment("Tail Round $i");
67 &mov("eax",&DWP($i*4,$a,"",0));# *a
68 &mul($w); # *a * w
69 &add("eax",$c); # L(t)+=c
70 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
71 &adc("edx",0); # H(t)+=carry
72 &add("eax",$c);
73 &adc("edx",0); # H(t)+=carry
74 &dec("ecx") if ($i != 7-1);
75 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
76 &mov($c,"edx"); # c= H(t);
77 &jz(&label("maw_end")) if ($i != 7-1);
78 }
79 &set_label("maw_end",0);
80 &mov("eax",$c);
81
82 &pop("ecx"); # clear variable from
83
84 &function_end($name);
85 }
86
871;
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl
new file mode 100644
index 0000000000..1f90993cf6
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/sqr.pl
@@ -0,0 +1,60 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $r="esi";
12 $a="edi";
13 $num="ebx";
14
15 &mov($r,&wparam(0)); #
16 &mov($a,&wparam(1)); #
17 &mov($num,&wparam(2)); #
18
19 &and($num,0xfffffff8); # num / 8
20 &jz(&label("sw_finish"));
21
22 &set_label("sw_loop",0);
23 for ($i=0; $i<32; $i+=4)
24 {
25 &comment("Round $i");
26 &mov("eax",&DWP($i,$a,"",0)); # *a
27 # XXX
28 &mul("eax"); # *a * *a
29 &mov(&DWP($i*2,$r,"",0),"eax"); #
30 &mov(&DWP($i*2+4,$r,"",0),"edx");#
31 }
32
33 &comment("");
34 &add($a,32);
35 &add($r,64);
36 &sub($num,8);
37 &jnz(&label("sw_loop"));
38
39 &set_label("sw_finish",0);
40 &mov($num,&wparam(2)); # get num
41 &and($num,7);
42 &jz(&label("sw_end"));
43
44 for ($i=0; $i<7; $i++)
45 {
46 &comment("Tail Round $i");
47 &mov("eax",&DWP($i*4,$a,"",0)); # *a
48 # XXX
49 &mul("eax"); # *a * *a
50 &mov(&DWP($i*8,$r,"",0),"eax"); #
51 &dec($num) if ($i != 7-1);
52 &mov(&DWP($i*8+4,$r,"",0),"edx");
53 &jz(&label("sw_end")) if ($i != 7-1);
54 }
55 &set_label("sw_end",0);
56
57 &function_end($name);
58 }
59
601;
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl
new file mode 100644
index 0000000000..837b0e1b07
--- /dev/null
+++ b/src/lib/libcrypto/bn/asm/x86/sub.pl
@@ -0,0 +1,76 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &sub($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &sub($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &sub($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &sub($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;