diff options
author | beck <> | 1999-09-29 05:53:45 +0000 |
---|---|---|
committer | beck <> | 1999-09-29 05:53:45 +0000 |
commit | 648e4f0876a3773381cbfff3192dd84dd1c8c925 (patch) | |
tree | bd9d01e3969ffa5aac92128af3e515520c88fc0e /src/lib/libcrypto/bn/asm | |
parent | 756086c41b0487beefc3d5b3400f80095d0e4157 (diff) | |
download | openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.gz openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.bz2 openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.zip |
new files for OpenSSL 0.9.4
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
30 files changed, 3561 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl new file mode 100644 index 0000000000..4dc76e6b69 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl | |||
@@ -0,0 +1,119 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &br(&label("finish")); | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0,$b0)=&NR(2); | ||
26 | &ld($a0,&QWPw(0,$ap)); | ||
27 | &ld($b0,&QWPw(0,$bp)); | ||
28 | |||
29 | ########################################################## | ||
30 | &set_label("loop"); | ||
31 | |||
32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
38 | |||
39 | ($o0,$t0)=&NR(2); | ||
40 | &add($a0,$b0,$o0); | ||
41 | &cmpult($o0,$b0,$t0); | ||
42 | &add($o0,$cc,$o0); | ||
43 | &cmpult($o0,$cc,$cc); | ||
44 | &add($cc,$t0,$cc); &FR($t0); | ||
45 | |||
46 | ($t1,$o1)=&NR(2); | ||
47 | |||
48 | &add($a1,$b1,$o1); &FR($a1); | ||
49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
50 | &add($o1,$cc,$o1); | ||
51 | &cmpult($o1,$cc,$cc); | ||
52 | &add($cc,$t1,$cc); &FR($t1); | ||
53 | |||
54 | ($t2,$o2)=&NR(2); | ||
55 | |||
56 | &add($a2,$b2,$o2); &FR($a2); | ||
57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
58 | &add($o2,$cc,$o2); | ||
59 | &cmpult($o2,$cc,$cc); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &add($a3,$b3,$o3); &FR($a3); | ||
65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
66 | &add($o3,$cc,$o3); | ||
67 | &cmpult($o3,$cc,$cc); | ||
68 | &add($cc,$t3,$cc); &FR($t3); | ||
69 | |||
70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
74 | |||
75 | &sub($count,4,$count); # count-=4 | ||
76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
79 | |||
80 | &blt($count,&label("finish")); | ||
81 | &ld($a0,&QWPw(0,$ap)); | ||
82 | &ld($b0,&QWPw(0,$bp)); | ||
83 | &br(&label("loop")); | ||
84 | ################################################## | ||
85 | # Do the last 0..3 words | ||
86 | |||
87 | ($t0,$o0)=&NR(2); | ||
88 | &set_label("last_loop"); | ||
89 | |||
90 | &ld($a0,&QWPw(0,$ap)); # get a | ||
91 | &ld($b0,&QWPw(0,$bp)); # get b | ||
92 | |||
93 | &add($a0,$b0,$o0); | ||
94 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
95 | &add($o0,$cc,$o0); # will we borrow? | ||
96 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
97 | &add($cc,$t0,$cc); # add the borrows | ||
98 | &st($o0,&QWPw(0,$rp)); # save | ||
99 | |||
100 | &add($ap,$QWS,$ap); | ||
101 | &add($bp,$QWS,$bp); | ||
102 | &add($rp,$QWS,$rp); | ||
103 | &sub($count,1,$count); | ||
104 | &bgt($count,&label("last_loop")); | ||
105 | &function_end_A($name); | ||
106 | |||
107 | ###################################################### | ||
108 | &set_label("finish"); | ||
109 | &add($count,4,$count); | ||
110 | &bgt($count,&label("last_loop")); | ||
111 | |||
112 | &FR($o0,$t0,$a0,$b0); | ||
113 | &set_label("end"); | ||
114 | &function_end($name); | ||
115 | |||
116 | &fin_pool; | ||
117 | } | ||
118 | |||
119 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl new file mode 100644 index 0000000000..7ec144377f --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl | |||
@@ -0,0 +1,144 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | |||
3 | sub bn_div64 | ||
4 | { | ||
5 | local($data)=<<'EOF'; | ||
6 | # | ||
7 | # What follows was taken directly from the C compiler with a few | ||
8 | # hacks to redo the lables. | ||
9 | # | ||
10 | .text | ||
11 | .set noreorder | ||
12 | .set volatile | ||
13 | .align 3 | ||
14 | .globl bn_div64 | ||
15 | .ent bn_div64 | ||
16 | bn_div64: | ||
17 | ldgp $29,0($27) | ||
18 | bn_div64..ng: | ||
19 | lda $30,-48($30) | ||
20 | .frame $30,48,$26,0 | ||
21 | stq $26,0($30) | ||
22 | stq $9,8($30) | ||
23 | stq $10,16($30) | ||
24 | stq $11,24($30) | ||
25 | stq $12,32($30) | ||
26 | stq $13,40($30) | ||
27 | .mask 0x4003e00,-48 | ||
28 | .prologue 1 | ||
29 | bis $16,$16,$9 | ||
30 | bis $17,$17,$10 | ||
31 | bis $18,$18,$11 | ||
32 | bis $31,$31,$13 | ||
33 | bis $31,2,$12 | ||
34 | bne $11,$9119 | ||
35 | lda $0,-1 | ||
36 | br $31,$9136 | ||
37 | .align 4 | ||
38 | $9119: | ||
39 | bis $11,$11,$16 | ||
40 | jsr $26,BN_num_bits_word | ||
41 | ldgp $29,0($26) | ||
42 | subq $0,64,$1 | ||
43 | beq $1,$9120 | ||
44 | bis $31,1,$1 | ||
45 | sll $1,$0,$1 | ||
46 | cmpule $9,$1,$1 | ||
47 | bne $1,$9120 | ||
48 | # lda $16,_IO_stderr_ | ||
49 | # lda $17,$C32 | ||
50 | # bis $0,$0,$18 | ||
51 | # jsr $26,fprintf | ||
52 | # ldgp $29,0($26) | ||
53 | jsr $26,abort | ||
54 | ldgp $29,0($26) | ||
55 | .align 4 | ||
56 | $9120: | ||
57 | bis $31,64,$3 | ||
58 | cmpult $9,$11,$2 | ||
59 | subq $3,$0,$1 | ||
60 | addl $1,$31,$0 | ||
61 | subq $9,$11,$1 | ||
62 | cmoveq $2,$1,$9 | ||
63 | beq $0,$9122 | ||
64 | zapnot $0,15,$2 | ||
65 | subq $3,$0,$1 | ||
66 | sll $11,$2,$11 | ||
67 | sll $9,$2,$3 | ||
68 | srl $10,$1,$1 | ||
69 | sll $10,$2,$10 | ||
70 | bis $3,$1,$9 | ||
71 | $9122: | ||
72 | srl $11,32,$5 | ||
73 | zapnot $11,15,$6 | ||
74 | lda $7,-1 | ||
75 | .align 5 | ||
76 | $9123: | ||
77 | srl $9,32,$1 | ||
78 | subq $1,$5,$1 | ||
79 | bne $1,$9126 | ||
80 | zapnot $7,15,$27 | ||
81 | br $31,$9127 | ||
82 | .align 4 | ||
83 | $9126: | ||
84 | bis $9,$9,$24 | ||
85 | bis $5,$5,$25 | ||
86 | divqu $24,$25,$27 | ||
87 | $9127: | ||
88 | srl $10,32,$4 | ||
89 | .align 5 | ||
90 | $9128: | ||
91 | mulq $27,$5,$1 | ||
92 | subq $9,$1,$3 | ||
93 | zapnot $3,240,$1 | ||
94 | bne $1,$9129 | ||
95 | mulq $6,$27,$2 | ||
96 | sll $3,32,$1 | ||
97 | addq $1,$4,$1 | ||
98 | cmpule $2,$1,$2 | ||
99 | bne $2,$9129 | ||
100 | subq $27,1,$27 | ||
101 | br $31,$9128 | ||
102 | .align 4 | ||
103 | $9129: | ||
104 | mulq $27,$6,$1 | ||
105 | mulq $27,$5,$4 | ||
106 | srl $1,32,$3 | ||
107 | sll $1,32,$1 | ||
108 | addq $4,$3,$4 | ||
109 | cmpult $10,$1,$2 | ||
110 | subq $10,$1,$10 | ||
111 | addq $2,$4,$2 | ||
112 | cmpult $9,$2,$1 | ||
113 | bis $2,$2,$4 | ||
114 | beq $1,$9134 | ||
115 | addq $9,$11,$9 | ||
116 | subq $27,1,$27 | ||
117 | $9134: | ||
118 | subl $12,1,$12 | ||
119 | subq $9,$4,$9 | ||
120 | beq $12,$9124 | ||
121 | sll $27,32,$13 | ||
122 | sll $9,32,$2 | ||
123 | srl $10,32,$1 | ||
124 | sll $10,32,$10 | ||
125 | bis $2,$1,$9 | ||
126 | br $31,$9123 | ||
127 | .align 4 | ||
128 | $9124: | ||
129 | bis $13,$27,$0 | ||
130 | $9136: | ||
131 | ldq $26,0($30) | ||
132 | ldq $9,8($30) | ||
133 | ldq $10,16($30) | ||
134 | ldq $11,24($30) | ||
135 | ldq $12,32($30) | ||
136 | ldq $13,40($30) | ||
137 | addq $30,48,$30 | ||
138 | ret $31,($26),1 | ||
139 | .end bn_div64 | ||
140 | EOF | ||
141 | &asm_add($data); | ||
142 | } | ||
143 | |||
144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl new file mode 100644 index 0000000000..b182bae452 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl | |||
@@ -0,0 +1,116 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &br(&label("finish")); | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0,$r0)=&NR(2); | ||
26 | &ld($a0,&QWPw(0,$ap)); | ||
27 | &ld($r0,&QWPw(0,$rp)); | ||
28 | |||
29 | $a=<<'EOF'; | ||
30 | ########################################################## | ||
31 | &set_label("loop"); | ||
32 | |||
33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
39 | |||
40 | ($o0,$t0)=&NR(2); | ||
41 | &add($a0,$b0,$o0); | ||
42 | &cmpult($o0,$b0,$t0); | ||
43 | &add($o0,$cc,$o0); | ||
44 | &cmpult($o0,$cc,$cc); | ||
45 | &add($cc,$t0,$cc); &FR($t0); | ||
46 | |||
47 | ($t1,$o1)=&NR(2); | ||
48 | |||
49 | &add($a1,$b1,$o1); &FR($a1); | ||
50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
51 | &add($o1,$cc,$o1); | ||
52 | &cmpult($o1,$cc,$cc); | ||
53 | &add($cc,$t1,$cc); &FR($t1); | ||
54 | |||
55 | ($t2,$o2)=&NR(2); | ||
56 | |||
57 | &add($a2,$b2,$o2); &FR($a2); | ||
58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
59 | &add($o2,$cc,$o2); | ||
60 | &cmpult($o2,$cc,$cc); | ||
61 | &add($cc,$t2,$cc); &FR($t2); | ||
62 | |||
63 | ($t3,$o3)=&NR(2); | ||
64 | |||
65 | &add($a3,$b3,$o3); &FR($a3); | ||
66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
67 | &add($o3,$cc,$o3); | ||
68 | &cmpult($o3,$cc,$cc); | ||
69 | &add($cc,$t3,$cc); &FR($t3); | ||
70 | |||
71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
75 | |||
76 | &sub($count,4,$count); # count-=4 | ||
77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
80 | |||
81 | &blt($count,&label("finish")); | ||
82 | &ld($a0,&QWPw(0,$ap)); | ||
83 | &ld($b0,&QWPw(0,$bp)); | ||
84 | &br(&label("loop")); | ||
85 | EOF | ||
86 | ################################################## | ||
87 | # Do the last 0..3 words | ||
88 | |||
89 | &set_label("last_loop"); | ||
90 | |||
91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
92 | &mul($a0,$word,($l0)=&NR(1)); | ||
93 | &add($ap,$QWS,$ap); | ||
94 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
95 | &add($l0,$cc,$l0); | ||
96 | &add($rp,$QWS,$rp); | ||
97 | &sub($count,1,$count); | ||
98 | &cmpult($l0,$cc,$cc); | ||
99 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
100 | &add($h0,$cc,$cc); &FR($h0); | ||
101 | |||
102 | &bgt($count,&label("last_loop")); | ||
103 | &function_end_A($name); | ||
104 | |||
105 | ###################################################### | ||
106 | &set_label("finish"); | ||
107 | &add($count,4,$count); | ||
108 | &bgt($count,&label("last_loop")); | ||
109 | |||
110 | &set_label("end"); | ||
111 | &function_end($name); | ||
112 | |||
113 | &fin_pool; | ||
114 | } | ||
115 | |||
116 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl new file mode 100644 index 0000000000..e37f6315fb --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | |||
@@ -0,0 +1,120 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &br(&label("finish")); | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0,$r0)=&NR(2); | ||
26 | &ld($a0,&QWPw(0,$ap)); | ||
27 | &ld($r0,&QWPw(0,$rp)); | ||
28 | |||
29 | $a=<<'EOF'; | ||
30 | ########################################################## | ||
31 | &set_label("loop"); | ||
32 | |||
33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
39 | |||
40 | ($o0,$t0)=&NR(2); | ||
41 | &add($a0,$b0,$o0); | ||
42 | &cmpult($o0,$b0,$t0); | ||
43 | &add($o0,$cc,$o0); | ||
44 | &cmpult($o0,$cc,$cc); | ||
45 | &add($cc,$t0,$cc); &FR($t0); | ||
46 | |||
47 | ($t1,$o1)=&NR(2); | ||
48 | |||
49 | &add($a1,$b1,$o1); &FR($a1); | ||
50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
51 | &add($o1,$cc,$o1); | ||
52 | &cmpult($o1,$cc,$cc); | ||
53 | &add($cc,$t1,$cc); &FR($t1); | ||
54 | |||
55 | ($t2,$o2)=&NR(2); | ||
56 | |||
57 | &add($a2,$b2,$o2); &FR($a2); | ||
58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
59 | &add($o2,$cc,$o2); | ||
60 | &cmpult($o2,$cc,$cc); | ||
61 | &add($cc,$t2,$cc); &FR($t2); | ||
62 | |||
63 | ($t3,$o3)=&NR(2); | ||
64 | |||
65 | &add($a3,$b3,$o3); &FR($a3); | ||
66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
67 | &add($o3,$cc,$o3); | ||
68 | &cmpult($o3,$cc,$cc); | ||
69 | &add($cc,$t3,$cc); &FR($t3); | ||
70 | |||
71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
75 | |||
76 | &sub($count,4,$count); # count-=4 | ||
77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
80 | |||
81 | &blt($count,&label("finish")); | ||
82 | &ld($a0,&QWPw(0,$ap)); | ||
83 | &ld($b0,&QWPw(0,$bp)); | ||
84 | &br(&label("loop")); | ||
85 | EOF | ||
86 | ################################################## | ||
87 | # Do the last 0..3 words | ||
88 | |||
89 | &set_label("last_loop"); | ||
90 | |||
91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
92 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
93 | &mul($a0,$word,($l0)=&NR(1)); | ||
94 | &sub($count,1,$count); | ||
95 | &add($ap,$QWS,$ap); | ||
96 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
97 | &add($r0,$l0,$r0); | ||
98 | &add($rp,$QWS,$rp); | ||
99 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
100 | &add($r0,$cc,$r0); | ||
101 | &add($h0,$t0,$h0); &FR($t0); | ||
102 | &cmpult($r0,$cc,$cc); | ||
103 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
104 | &add($h0,$cc,$cc); &FR($h0); | ||
105 | |||
106 | &bgt($count,&label("last_loop")); | ||
107 | &function_end_A($name); | ||
108 | |||
109 | ###################################################### | ||
110 | &set_label("finish"); | ||
111 | &add($count,4,$count); | ||
112 | &bgt($count,&label("last_loop")); | ||
113 | |||
114 | &set_label("end"); | ||
115 | &function_end($name); | ||
116 | |||
117 | &fin_pool; | ||
118 | } | ||
119 | |||
120 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl new file mode 100644 index 0000000000..5efd201281 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | |||
@@ -0,0 +1,213 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | &mul($a,$b,($l1)=&NR(1)); | ||
10 | &muh($a,$b,($h1)=&NR(1)); | ||
11 | &add($c0,$l1,$c0); | ||
12 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
13 | &add($t1,$h1,$h1); &FR($t1); | ||
14 | &add($c1,$h1,$c1); | ||
15 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
16 | &add($c2,$t2,$c2); &FR($t2); | ||
17 | } | ||
18 | |||
19 | sub bn_mul_comba4 | ||
20 | { | ||
21 | local($name)=@_; | ||
22 | local(@a,@b,$r,$c0,$c1,$c2); | ||
23 | |||
24 | $cnt=1; | ||
25 | &init_pool(3); | ||
26 | |||
27 | $rp=&wparam(0); | ||
28 | $ap=&wparam(1); | ||
29 | $bp=&wparam(2); | ||
30 | |||
31 | &function_begin($name,""); | ||
32 | |||
33 | &comment(""); | ||
34 | |||
35 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
36 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
37 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
38 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
39 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
42 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
43 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
44 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
45 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
46 | |||
47 | ($R,$H1,$H2)=&NR(3); | ||
48 | |||
49 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
50 | |||
51 | &mov("zero",$R); | ||
52 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
53 | |||
54 | &mov("zero",$H1); | ||
55 | &mov("zero",$H0); | ||
56 | &add($R,$r01,$R); | ||
57 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
58 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
59 | &add($R,$r02,$R); | ||
60 | &add($H1,$t01,$H1) &FR($t01); | ||
61 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
62 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
63 | &add($R,$r03,$R); | ||
64 | &add($H2,$t02,$H2) &FR($t02); | ||
65 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
66 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
67 | &add($H1,$t03,$H1) &FR($t03); | ||
68 | &st($R,&QWPw(1,$rp)); | ||
69 | &add($H1,$H2,$R); | ||
70 | |||
71 | &mov("zero",$H1); | ||
72 | &add($R,$r04,$R); | ||
73 | &mov("zero",$H2); | ||
74 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
75 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
76 | &add($R,$r05,$R); | ||
77 | &add($H1,$t04,$H1) &FR($t04); | ||
78 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
79 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
80 | &add($R,$r01,$R); | ||
81 | &add($H2,$t05,$H2) &FR($t05); | ||
82 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
83 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
84 | &add($R,$r07,$R); | ||
85 | &add($H1,$t06,$H1) &FR($t06); | ||
86 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
87 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
88 | &add($R,$r08,$R); | ||
89 | &add($H2,$t07,$H2) &FR($t07); | ||
90 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
91 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
92 | &add($H1,$t08,$H1) &FR($t08); | ||
93 | &st($R,&QWPw(2,$rp)); | ||
94 | &add($H1,$H2,$R); | ||
95 | |||
96 | &mov("zero",$H1); | ||
97 | &add($R,$r09,$R); | ||
98 | &mov("zero",$H2); | ||
99 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
100 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
101 | &add($R,$r10,$R); | ||
102 | &add($H1,$t09,$H1) &FR($t09); | ||
103 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
104 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
105 | &add($R,$r11,$R); | ||
106 | &add($H1,$t10,$H1) &FR($t10); | ||
107 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
108 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
109 | &add($R,$r12,$R); | ||
110 | &add($H1,$t11,$H1) &FR($t11); | ||
111 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
112 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
113 | &add($R,$r13,$R); | ||
114 | &add($H1,$t12,$H1) &FR($t12); | ||
115 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
116 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
117 | &add($R,$r14,$R); | ||
118 | &add($H1,$t13,$H1) &FR($t13); | ||
119 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
120 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
121 | &add($R,$r15,$R); | ||
122 | &add($H1,$t14,$H1) &FR($t14); | ||
123 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
124 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
125 | &add($H1,$t15,$H1) &FR($t15); | ||
126 | &st($R,&QWPw(3,$rp)); | ||
127 | &add($H1,$H2,$R); | ||
128 | |||
129 | &mov("zero",$H1); | ||
130 | &add($R,$r16,$R); | ||
131 | &mov("zero",$H2); | ||
132 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
133 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
134 | &add($R,$r17,$R); | ||
135 | &add($H1,$t16,$H1) &FR($t16); | ||
136 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
137 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
138 | &add($R,$r18,$R); | ||
139 | &add($H1,$t17,$H1) &FR($t17); | ||
140 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
141 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
142 | &add($R,$r19,$R); | ||
143 | &add($H1,$t18,$H1) &FR($t18); | ||
144 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
145 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
146 | &add($R,$r20,$R); | ||
147 | &add($H1,$t19,$H1) &FR($t19); | ||
148 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
149 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
150 | &add($R,$r21,$R); | ||
151 | &add($H1,$t20,$H1) &FR($t20); | ||
152 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
153 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
154 | &add($R,$r22,$R); | ||
155 | &add($H1,$t21,$H1) &FR($t21); | ||
156 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
157 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
158 | &add($H1,$t22,$H1) &FR($t22); | ||
159 | &st($R,&QWPw(4,$rp)); | ||
160 | &add($H1,$H2,$R); | ||
161 | |||
162 | &mov("zero",$H1); | ||
163 | &add($R,$r23,$R); | ||
164 | &mov("zero",$H2); | ||
165 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
166 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
167 | &add($R,$r24,$R); | ||
168 | &add($H1,$t23,$H1) &FR($t23); | ||
169 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
170 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
171 | &add($R,$r25,$R); | ||
172 | &add($H1,$t24,$H1) &FR($t24); | ||
173 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
174 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
175 | &add($R,$r26,$R); | ||
176 | &add($H1,$t25,$H1) &FR($t25); | ||
177 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
178 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
179 | &add($R,$r27,$R); | ||
180 | &add($H1,$t26,$H1) &FR($t26); | ||
181 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
182 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
183 | &add($H1,$t27,$H1) &FR($t27); | ||
184 | &st($R,&QWPw(5,$rp)); | ||
185 | &add($H1,$H2,$R); | ||
186 | |||
187 | &mov("zero",$H1); | ||
188 | &add($R,$r28,$R); | ||
189 | &mov("zero",$H2); | ||
190 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
191 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
192 | &add($R,$r29,$R); | ||
193 | &add($H1,$t28,$H1) &FR($t28); | ||
194 | ############ | ||
195 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
196 | &add($R,$r30,$R); | ||
197 | &add($H1,$t29,$H1) &FR($t29); | ||
198 | ############ | ||
199 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
200 | &add($H1,$t30,$H1) &FR($t30); | ||
201 | &st($R,&QWPw(6,$rp)); | ||
202 | &add($H1,$H2,$R); | ||
203 | |||
204 | &add($R,$r31,$R); &FR($r31); | ||
205 | &st($R,&QWPw(7,$rp)); | ||
206 | |||
207 | &FR($R,$H1,$H2); | ||
208 | &function_end($name); | ||
209 | |||
210 | &fin_pool; | ||
211 | } | ||
212 | |||
213 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl | |||
@@ -0,0 +1,98 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | print STDERR "count=$cnt\n"; $cnt++; | ||
10 | &mul($a,$b,($l1)=&NR(1)); | ||
11 | &muh($a,$b,($h1)=&NR(1)); | ||
12 | &add($c0,$l1,$c0); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &add($t1,$h1,$h1); &FR($t1); | ||
15 | &add($c1,$h1,$c1); | ||
16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
17 | &add($c2,$t2,$c2); &FR($t2); | ||
18 | } | ||
19 | |||
20 | sub bn_mul_comba4 | ||
21 | { | ||
22 | local($name)=@_; | ||
23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
24 | |||
25 | $cnt=1; | ||
26 | &init_pool(3); | ||
27 | |||
28 | $rp=&wparam(0); | ||
29 | $ap=&wparam(1); | ||
30 | $bp=&wparam(2); | ||
31 | |||
32 | &function_begin($name,""); | ||
33 | |||
34 | &comment(""); | ||
35 | |||
36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
44 | |||
45 | ($c0,$c1,$c2)=&NR(3); | ||
46 | &mov("zero",$c2); | ||
47 | &mul($a[0],$b[0],$c0); | ||
48 | &muh($a[0],$b[0],$c1); | ||
49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
51 | &mov("zero",$c2); | ||
52 | |||
53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
57 | &mov("zero",$c2); | ||
58 | |||
59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
64 | &mov("zero",$c2); | ||
65 | |||
66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
72 | &mov("zero",$c2); | ||
73 | |||
74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
79 | &mov("zero",$c2); | ||
80 | |||
81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
85 | &mov("zero",$c2); | ||
86 | |||
87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
88 | &st($c0,&QWPw(6,$rp)); | ||
89 | &st($c1,&QWPw(7,$rp)); | ||
90 | |||
91 | &FR($c0,$c1,$c2); | ||
92 | |||
93 | &function_end($name); | ||
94 | |||
95 | &fin_pool; | ||
96 | } | ||
97 | |||
98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | |||
@@ -0,0 +1,177 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(3); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | |||
20 | &stack_push(2); | ||
21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
39 | |||
40 | ($c0,$c1,$c2)=&NR(3); | ||
41 | &mov("zero",$c2); | ||
42 | &mul($a[0],$b[0],$c0); | ||
43 | &muh($a[0],$b[0],$c1); | ||
44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
67 | &mov("zero",$c2); | ||
68 | |||
69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
76 | &mov("zero",$c2); | ||
77 | |||
78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
86 | &mov("zero",$c2); | ||
87 | |||
88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
109 | &mov("zero",$c2); | ||
110 | |||
111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
120 | &mov("zero",$c2); | ||
121 | |||
122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
130 | &mov("zero",$c2); | ||
131 | |||
132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
139 | &mov("zero",$c2); | ||
140 | |||
141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
147 | &mov("zero",$c2); | ||
148 | |||
149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
154 | &mov("zero",$c2); | ||
155 | |||
156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
160 | &mov("zero",$c2); | ||
161 | |||
162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
163 | &st($c0,&QWPw(14,$rp)); | ||
164 | &st($c1,&QWPw(15,$rp)); | ||
165 | |||
166 | &FR($c0,$c1,$c2); | ||
167 | |||
168 | &ld($reg_s0,&swtmp(0)); | ||
169 | &ld($reg_s1,&swtmp(1)); | ||
170 | &stack_pop(2); | ||
171 | |||
172 | &function_end($name); | ||
173 | |||
174 | &fin_pool; | ||
175 | } | ||
176 | |||
177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | |||
@@ -0,0 +1,113 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(3); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | &sub($count,4,$count); | ||
20 | &mov("zero",$cc); | ||
21 | &br(&label("finish")); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$r0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($r0,&QWPw(0,$rp)); | ||
27 | |||
28 | $a=<<'EOF'; | ||
29 | ########################################################## | ||
30 | &set_label("loop"); | ||
31 | |||
32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
38 | |||
39 | ($o0,$t0)=&NR(2); | ||
40 | &add($a0,$b0,$o0); | ||
41 | &cmpult($o0,$b0,$t0); | ||
42 | &add($o0,$cc,$o0); | ||
43 | &cmpult($o0,$cc,$cc); | ||
44 | &add($cc,$t0,$cc); &FR($t0); | ||
45 | |||
46 | ($t1,$o1)=&NR(2); | ||
47 | |||
48 | &add($a1,$b1,$o1); &FR($a1); | ||
49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
50 | &add($o1,$cc,$o1); | ||
51 | &cmpult($o1,$cc,$cc); | ||
52 | &add($cc,$t1,$cc); &FR($t1); | ||
53 | |||
54 | ($t2,$o2)=&NR(2); | ||
55 | |||
56 | &add($a2,$b2,$o2); &FR($a2); | ||
57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
58 | &add($o2,$cc,$o2); | ||
59 | &cmpult($o2,$cc,$cc); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &add($a3,$b3,$o3); &FR($a3); | ||
65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
66 | &add($o3,$cc,$o3); | ||
67 | &cmpult($o3,$cc,$cc); | ||
68 | &add($cc,$t3,$cc); &FR($t3); | ||
69 | |||
70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
74 | |||
75 | &sub($count,4,$count); # count-=4 | ||
76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
79 | |||
80 | &blt($count,&label("finish")); | ||
81 | &ld($a0,&QWPw(0,$ap)); | ||
82 | &ld($b0,&QWPw(0,$bp)); | ||
83 | &br(&label("loop")); | ||
84 | EOF | ||
85 | ################################################## | ||
86 | # Do the last 0..3 words | ||
87 | |||
88 | &set_label("last_loop"); | ||
89 | |||
90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
92 | &add($ap,$QWS,$ap); | ||
93 | &add($rp,2*$QWS,$rp); | ||
94 | &sub($count,1,$count); | ||
95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
98 | |||
99 | &bgt($count,&label("last_loop")); | ||
100 | &function_end_A($name); | ||
101 | |||
102 | ###################################################### | ||
103 | &set_label("finish"); | ||
104 | &add($count,4,$count); | ||
105 | &bgt($count,&label("last_loop")); | ||
106 | |||
107 | &set_label("end"); | ||
108 | &function_end($name); | ||
109 | |||
110 | &fin_pool; | ||
111 | } | ||
112 | |||
113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | |||
@@ -0,0 +1,109 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub sqr_add_c | ||
5 | { | ||
6 | local($a,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | &mul($a,$a,($l1)=&NR(1)); | ||
10 | &muh($a,$a,($h1)=&NR(1)); | ||
11 | &add($c0,$l1,$c0); | ||
12 | &add($c1,$h1,$c1); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
15 | &add($c1,$t1,$c1); &FR($t1); | ||
16 | &add($c2,$t2,$c2); &FR($t2); | ||
17 | } | ||
18 | |||
19 | sub sqr_add_c2 | ||
20 | { | ||
21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
22 | local($l1,$h1,$t1,$t2); | ||
23 | |||
24 | &mul($a,$b,($l1)=&NR(1)); | ||
25 | &muh($a,$b,($h1)=&NR(1)); | ||
26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
28 | &add($l1,$l1,$l1); | ||
29 | &add($h1,$h1,$h1); | ||
30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
32 | |||
33 | &add($c0,$l1,$c0); | ||
34 | &add($c1,$h1,$c1); | ||
35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
37 | |||
38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
40 | } | ||
41 | |||
42 | |||
43 | sub bn_sqr_comba4 | ||
44 | { | ||
45 | local($name)=@_; | ||
46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
47 | |||
48 | $cnt=1; | ||
49 | &init_pool(2); | ||
50 | |||
51 | $rp=&wparam(0); | ||
52 | $ap=&wparam(1); | ||
53 | |||
54 | &function_begin($name,""); | ||
55 | |||
56 | &comment(""); | ||
57 | |||
58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
62 | |||
63 | ($c0,$c1,$c2)=&NR(3); | ||
64 | |||
65 | &mov("zero",$c2); | ||
66 | &mul($a[0],$a[0],$c0); | ||
67 | &muh($a[0],$a[0],$c1); | ||
68 | &st($c0,&QWPw(0,$rp)); | ||
69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
70 | &mov("zero",$c2); | ||
71 | |||
72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
73 | &st($c0,&QWPw(1,$rp)); | ||
74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
75 | &mov("zero",$c2); | ||
76 | |||
77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
79 | &st($c0,&QWPw(2,$rp)); | ||
80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
81 | &mov("zero",$c2); | ||
82 | |||
83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
85 | &st($c0,&QWPw(3,$rp)); | ||
86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
87 | &mov("zero",$c2); | ||
88 | |||
89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
91 | &st($c0,&QWPw(4,$rp)); | ||
92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
93 | &mov("zero",$c2); | ||
94 | |||
95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
96 | &st($c0,&QWPw(5,$rp)); | ||
97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
98 | &mov("zero",$c2); | ||
99 | |||
100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
101 | &st($c0,&QWPw(6,$rp)); | ||
102 | &st($c1,&QWPw(7,$rp)); | ||
103 | |||
104 | &function_end($name); | ||
105 | |||
106 | &fin_pool; | ||
107 | } | ||
108 | |||
109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | |||
@@ -0,0 +1,132 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(2); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | |||
15 | &function_begin($name,""); | ||
16 | |||
17 | &comment(""); | ||
18 | |||
19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
27 | |||
28 | ($c0,$c1,$c2)=&NR(3); | ||
29 | |||
30 | &mov("zero",$c2); | ||
31 | &mul($a[0],$a[0],$c0); | ||
32 | &muh($a[0],$a[0],$c1); | ||
33 | &st($c0,&QWPw(0,$rp)); | ||
34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
35 | &mov("zero",$c2); | ||
36 | |||
37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
38 | &st($c0,&QWPw(1,$rp)); | ||
39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
40 | &mov("zero",$c2); | ||
41 | |||
42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
44 | &st($c0,&QWPw(2,$rp)); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(3,$rp)); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(4,$rp)); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
64 | &st($c0,&QWPw(5,$rp)); | ||
65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
66 | &mov("zero",$c2); | ||
67 | |||
68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
72 | &st($c0,&QWPw(6,$rp)); | ||
73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
74 | &mov("zero",$c2); | ||
75 | |||
76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
80 | &st($c0,&QWPw(7,$rp)); | ||
81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
82 | &mov("zero",$c2); | ||
83 | |||
84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
88 | &st($c0,&QWPw(8,$rp)); | ||
89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
90 | &mov("zero",$c2); | ||
91 | |||
92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(9,$rp)); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
102 | &st($c0,&QWPw(10,$rp)); | ||
103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
104 | &mov("zero",$c2); | ||
105 | |||
106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
108 | &st($c0,&QWPw(11,$rp)); | ||
109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
110 | &mov("zero",$c2); | ||
111 | |||
112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
114 | &st($c0,&QWPw(12,$rp)); | ||
115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
116 | &mov("zero",$c2); | ||
117 | |||
118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
119 | &st($c0,&QWPw(13,$rp)); | ||
120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
121 | &mov("zero",$c2); | ||
122 | |||
123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
124 | &st($c0,&QWPw(14,$rp)); | ||
125 | &st($c1,&QWPw(15,$rp)); | ||
126 | |||
127 | &function_end($name); | ||
128 | |||
129 | &fin_pool; | ||
130 | } | ||
131 | |||
132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl | |||
@@ -0,0 +1,108 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sub_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$b0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($b0,&QWPw(0,$bp)); | ||
27 | |||
28 | ########################################################## | ||
29 | &set_label("loop"); | ||
30 | |||
31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
32 | &ld($a1,&QWPw(1,$ap)); | ||
33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
34 | &ld($b1,&QWPw(1,$bp)); | ||
35 | &sub($a0,$b0,$a0); # do the subtract | ||
36 | &ld($a2,&QWPw(2,$ap)); | ||
37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
38 | &ld($b2,&QWPw(2,$bp)); | ||
39 | &sub($a0,$cc,$o0); # will we borrow? | ||
40 | &ld($a3,&QWPw(3,$ap)); | ||
41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
42 | |||
43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
44 | &sub($a1,$b1,$a1); # do the subtract | ||
45 | &ld($b3,&QWPw(3,$bp)); | ||
46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
47 | &sub($a1,$cc,$o1); # will we borrow? | ||
48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
49 | |||
50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
51 | &sub($a2,$b2,$a2); # do the subtract | ||
52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
54 | &sub($a2,$cc,$o2); # will we borrow? | ||
55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
56 | |||
57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
58 | &sub($a3,$b3,$a3); # do the subtract | ||
59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
61 | &sub($a3,$cc,$o3); # will we borrow? | ||
62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
63 | |||
64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
65 | &sub($count,4,$count); # count-=4 | ||
66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
70 | |||
71 | &blt($count,&label("finish")); | ||
72 | &ld($a0,&QWPw(0,$ap)); | ||
73 | &ld($b0,&QWPw(0,$bp)); | ||
74 | &br(&label("loop")); | ||
75 | ################################################## | ||
76 | # Do the last 0..3 words | ||
77 | |||
78 | &set_label("last_loop"); | ||
79 | |||
80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
83 | &sub($a0,$b0,$a0); # do the subtract | ||
84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
85 | &sub($a0,$cc,$a0); # will we borrow? | ||
86 | &st($a0,&QWPw(0,$rp)); # save | ||
87 | &add($b0,$tmp,$cc); # add the borrows | ||
88 | |||
89 | &add($ap,$QWS,$ap); | ||
90 | &add($bp,$QWS,$bp); | ||
91 | &add($rp,$QWS,$rp); | ||
92 | &sub($count,1,$count); | ||
93 | &bgt($count,&label("last_loop")); | ||
94 | &function_end_A($name); | ||
95 | |||
96 | ###################################################### | ||
97 | &set_label("finish"); | ||
98 | &add($count,4,$count); | ||
99 | &bgt($count,&label("last_loop")); | ||
100 | |||
101 | &FR($a0,$b0); | ||
102 | &set_label("end"); | ||
103 | &function_end($name); | ||
104 | |||
105 | &fin_pool; | ||
106 | } | ||
107 | |||
108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl new file mode 100644 index 0000000000..13bf516428 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/add.pl | |||
@@ -0,0 +1,118 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$b0)=&NR(2); | ||
25 | |||
26 | ########################################################## | ||
27 | &set_label("loop"); | ||
28 | |||
29 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
30 | &ld(($b0)=&NR(1),&QWPw(0,$bp)); | ||
31 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
32 | &ld(($b1)=&NR(1),&QWPw(1,$bp)); | ||
33 | |||
34 | ($o0,$t0)=&NR(2); | ||
35 | &add($a0,$b0,$o0); | ||
36 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
37 | &cmpult($o0,$b0,$t0); | ||
38 | &add($o0,$cc,$o0); | ||
39 | &cmpult($o0,$cc,$cc); | ||
40 | &ld(($b2)=&NR(1),&QWPw(2,$bp)); | ||
41 | &add($cc,$t0,$cc); &FR($t0); | ||
42 | |||
43 | ($t1,$o1)=&NR(2); | ||
44 | |||
45 | &add($a1,$b1,$o1); &FR($a1); | ||
46 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
47 | &add($o1,$cc,$o1); | ||
48 | &cmpult($o1,$cc,$cc); | ||
49 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
50 | &add($cc,$t1,$cc); &FR($t1); | ||
51 | |||
52 | ($t2,$o2)=&NR(2); | ||
53 | |||
54 | &add($a2,$b2,$o2); &FR($a2); | ||
55 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
56 | &add($o2,$cc,$o2); | ||
57 | &cmpult($o2,$cc,$cc); | ||
58 | &ld(($b3)=&NR(1),&QWPw(3,$bp)); | ||
59 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
65 | &add($a3,$b3,$o3); &FR($a3); | ||
66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
67 | &add($o3,$cc,$o3); | ||
68 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
69 | &cmpult($o3,$cc,$cc); | ||
70 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
71 | &add($cc,$t3,$cc); &FR($t3); | ||
72 | |||
73 | |||
74 | &sub($count,4,$count); # count-=4 | ||
75 | &add($ap,4*$QWS,$ap); # count+=4 | ||
76 | &add($bp,4*$QWS,$bp); # count+=4 | ||
77 | &add($rp,4*$QWS,$rp); # count+=4 | ||
78 | |||
79 | ### | ||
80 | &bge($count,&label("loop")); | ||
81 | ### | ||
82 | &br(&label("finish")); | ||
83 | ################################################## | ||
84 | # Do the last 0..3 words | ||
85 | |||
86 | ($t0,$o0)=&NR(2); | ||
87 | &set_label("last_loop"); | ||
88 | |||
89 | &ld($a0,&QWPw(0,$ap)); # get a | ||
90 | &ld($b0,&QWPw(0,$bp)); # get b | ||
91 | &add($ap,$QWS,$ap); | ||
92 | &add($bp,$QWS,$bp); | ||
93 | &add($a0,$b0,$o0); | ||
94 | &sub($count,1,$count); | ||
95 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
96 | &add($o0,$cc,$o0); # will we borrow? | ||
97 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
98 | &add($rp,$QWS,$rp); | ||
99 | &st($o0,&QWPw(-1,$rp)); # save | ||
100 | &add($cc,$t0,$cc); # add the borrows | ||
101 | |||
102 | ### | ||
103 | &bgt($count,&label("last_loop")); | ||
104 | &function_end_A($name); | ||
105 | |||
106 | ###################################################### | ||
107 | &set_label("finish"); | ||
108 | &add($count,4,$count); | ||
109 | &bgt($count,&label("last_loop")); | ||
110 | |||
111 | &FR($o0,$t0,$a0,$b0); | ||
112 | &set_label("end"); | ||
113 | &function_end($name); | ||
114 | |||
115 | &fin_pool; | ||
116 | } | ||
117 | |||
118 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl new file mode 100644 index 0000000000..e9e680897a --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/div.pl | |||
@@ -0,0 +1,144 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | |||
3 | sub bn_div_words | ||
4 | { | ||
5 | local($data)=<<'EOF'; | ||
6 | # | ||
7 | # What follows was taken directly from the C compiler with a few | ||
8 | # hacks to redo the lables. | ||
9 | # | ||
10 | .text | ||
11 | .set noreorder | ||
12 | .set volatile | ||
13 | .align 3 | ||
14 | .globl bn_div_words | ||
15 | .ent bn_div_words | ||
16 | bn_div_words | ||
17 | ldgp $29,0($27) | ||
18 | bn_div_words.ng: | ||
19 | lda $30,-48($30) | ||
20 | .frame $30,48,$26,0 | ||
21 | stq $26,0($30) | ||
22 | stq $9,8($30) | ||
23 | stq $10,16($30) | ||
24 | stq $11,24($30) | ||
25 | stq $12,32($30) | ||
26 | stq $13,40($30) | ||
27 | .mask 0x4003e00,-48 | ||
28 | .prologue 1 | ||
29 | bis $16,$16,$9 | ||
30 | bis $17,$17,$10 | ||
31 | bis $18,$18,$11 | ||
32 | bis $31,$31,$13 | ||
33 | bis $31,2,$12 | ||
34 | bne $11,$9119 | ||
35 | lda $0,-1 | ||
36 | br $31,$9136 | ||
37 | .align 4 | ||
38 | $9119: | ||
39 | bis $11,$11,$16 | ||
40 | jsr $26,BN_num_bits_word | ||
41 | ldgp $29,0($26) | ||
42 | subq $0,64,$1 | ||
43 | beq $1,$9120 | ||
44 | bis $31,1,$1 | ||
45 | sll $1,$0,$1 | ||
46 | cmpule $9,$1,$1 | ||
47 | bne $1,$9120 | ||
48 | # lda $16,_IO_stderr_ | ||
49 | # lda $17,$C32 | ||
50 | # bis $0,$0,$18 | ||
51 | # jsr $26,fprintf | ||
52 | # ldgp $29,0($26) | ||
53 | jsr $26,abort | ||
54 | ldgp $29,0($26) | ||
55 | .align 4 | ||
56 | $9120: | ||
57 | bis $31,64,$3 | ||
58 | cmpult $9,$11,$2 | ||
59 | subq $3,$0,$1 | ||
60 | addl $1,$31,$0 | ||
61 | subq $9,$11,$1 | ||
62 | cmoveq $2,$1,$9 | ||
63 | beq $0,$9122 | ||
64 | zapnot $0,15,$2 | ||
65 | subq $3,$0,$1 | ||
66 | sll $11,$2,$11 | ||
67 | sll $9,$2,$3 | ||
68 | srl $10,$1,$1 | ||
69 | sll $10,$2,$10 | ||
70 | bis $3,$1,$9 | ||
71 | $9122: | ||
72 | srl $11,32,$5 | ||
73 | zapnot $11,15,$6 | ||
74 | lda $7,-1 | ||
75 | .align 5 | ||
76 | $9123: | ||
77 | srl $9,32,$1 | ||
78 | subq $1,$5,$1 | ||
79 | bne $1,$9126 | ||
80 | zapnot $7,15,$27 | ||
81 | br $31,$9127 | ||
82 | .align 4 | ||
83 | $9126: | ||
84 | bis $9,$9,$24 | ||
85 | bis $5,$5,$25 | ||
86 | divqu $24,$25,$27 | ||
87 | $9127: | ||
88 | srl $10,32,$4 | ||
89 | .align 5 | ||
90 | $9128: | ||
91 | mulq $27,$5,$1 | ||
92 | subq $9,$1,$3 | ||
93 | zapnot $3,240,$1 | ||
94 | bne $1,$9129 | ||
95 | mulq $6,$27,$2 | ||
96 | sll $3,32,$1 | ||
97 | addq $1,$4,$1 | ||
98 | cmpule $2,$1,$2 | ||
99 | bne $2,$9129 | ||
100 | subq $27,1,$27 | ||
101 | br $31,$9128 | ||
102 | .align 4 | ||
103 | $9129: | ||
104 | mulq $27,$6,$1 | ||
105 | mulq $27,$5,$4 | ||
106 | srl $1,32,$3 | ||
107 | sll $1,32,$1 | ||
108 | addq $4,$3,$4 | ||
109 | cmpult $10,$1,$2 | ||
110 | subq $10,$1,$10 | ||
111 | addq $2,$4,$2 | ||
112 | cmpult $9,$2,$1 | ||
113 | bis $2,$2,$4 | ||
114 | beq $1,$9134 | ||
115 | addq $9,$11,$9 | ||
116 | subq $27,1,$27 | ||
117 | $9134: | ||
118 | subl $12,1,$12 | ||
119 | subq $9,$4,$9 | ||
120 | beq $12,$9124 | ||
121 | sll $27,32,$13 | ||
122 | sll $9,32,$2 | ||
123 | srl $10,32,$1 | ||
124 | sll $10,32,$10 | ||
125 | bis $2,$1,$9 | ||
126 | br $31,$9123 | ||
127 | .align 4 | ||
128 | $9124: | ||
129 | bis $13,$27,$0 | ||
130 | $9136: | ||
131 | ldq $26,0($30) | ||
132 | ldq $9,8($30) | ||
133 | ldq $10,16($30) | ||
134 | ldq $11,24($30) | ||
135 | ldq $12,32($30) | ||
136 | ldq $13,40($30) | ||
137 | addq $30,48,$30 | ||
138 | ret $31,($26),1 | ||
139 | .end bn_div_words | ||
140 | EOF | ||
141 | &asm_add($data); | ||
142 | } | ||
143 | |||
144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl new file mode 100644 index 0000000000..76c926566c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul.pl | |||
@@ -0,0 +1,104 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | ### | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
26 | |||
27 | &set_label("loop"); | ||
28 | |||
29 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
30 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
31 | |||
32 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
33 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
34 | ### wait 8 | ||
35 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
36 | ### wait 8 | ||
37 | &muh($a1,$word,($h1)=&NR(1)); &FR($a1); | ||
38 | &add($l0,$cc,$l0); ### wait 8 | ||
39 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
40 | &cmpult($l0,$cc,$cc); ### wait 8 | ||
41 | &muh($a2,$word,($h2)=&NR(1)); &FR($a2); | ||
42 | &add($h0,$cc,$cc); &FR($h0); ### wait 8 | ||
43 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
44 | &add($l1,$cc,$l1); ### wait 8 | ||
45 | &st($l0,&QWPw(0,$rp)); &FR($l0); | ||
46 | &cmpult($l1,$cc,$cc); ### wait 8 | ||
47 | &muh($a3,$word,($h3)=&NR(1)); &FR($a3); | ||
48 | &add($h1,$cc,$cc); &FR($h1); | ||
49 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
50 | &add($l2,$cc,$l2); | ||
51 | &st($l1,&QWPw(1,$rp)); &FR($l1); | ||
52 | &cmpult($l2,$cc,$cc); | ||
53 | &add($h2,$cc,$cc); &FR($h2); | ||
54 | &sub($count,4,$count); # count-=4 | ||
55 | &st($l2,&QWPw(2,$rp)); &FR($l2); | ||
56 | &add($l3,$cc,$l3); | ||
57 | &cmpult($l3,$cc,$cc); | ||
58 | &add($bp,4*$QWS,$bp); # count+=4 | ||
59 | &add($h3,$cc,$cc); &FR($h3); | ||
60 | &add($ap,4*$QWS,$ap); # count+=4 | ||
61 | &st($l3,&QWPw(3,$rp)); &FR($l3); | ||
62 | &add($rp,4*$QWS,$rp); # count+=4 | ||
63 | ### | ||
64 | &blt($count,&label("finish")); | ||
65 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
66 | &br(&label("finish")); | ||
67 | ################################################## | ||
68 | |||
69 | ################################################## | ||
70 | # Do the last 0..3 words | ||
71 | |||
72 | &set_label("last_loop"); | ||
73 | |||
74 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
75 | ### | ||
76 | ### | ||
77 | ### | ||
78 | &muh($a0,$word,($h0)=&NR(1)); | ||
79 | ### Wait 8 for next mul issue | ||
80 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0) | ||
81 | &add($ap,$QWS,$ap); | ||
82 | ### Loose 12 until result is available | ||
83 | &add($rp,$QWS,$rp); | ||
84 | &sub($count,1,$count); | ||
85 | &add($l0,$cc,$l0); | ||
86 | ### | ||
87 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
88 | &cmpult($l0,$cc,$cc); | ||
89 | &add($h0,$cc,$cc); &FR($h0); | ||
90 | &bgt($count,&label("last_loop")); | ||
91 | &function_end_A($name); | ||
92 | |||
93 | ###################################################### | ||
94 | &set_label("finish"); | ||
95 | &add($count,4,$count); | ||
96 | &bgt($count,&label("last_loop")); | ||
97 | |||
98 | &set_label("end"); | ||
99 | &function_end($name); | ||
100 | |||
101 | &fin_pool; | ||
102 | } | ||
103 | |||
104 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl new file mode 100644 index 0000000000..0d6df69bc4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl | |||
@@ -0,0 +1,123 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | ### | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
26 | |||
27 | $a=<<'EOF'; | ||
28 | ########################################################## | ||
29 | &set_label("loop"); | ||
30 | |||
31 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); | ||
32 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
33 | &muh($a0,$word,($h0)=&NR(1)); | ||
34 | &ld(($r1)=&NR(1),&QWPw(1,$rp)); | ||
35 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
36 | ### | ||
37 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
38 | &ld(($r2)=&NR(1),&QWPw(2,$rp)); | ||
39 | &muh($a1,$word,($h1)=&NR(1)); | ||
40 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
41 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
42 | &ld(($r3)=&NR(1),&QWPw(3,$rp)); | ||
43 | &add($r0,$l0,$r0); | ||
44 | &add($r1,$l1,$r1); | ||
45 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
46 | &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); | ||
47 | &muh($a2,$word,($h2)=&NR(1)); | ||
48 | &add($r0,$cc,$r0); | ||
49 | &add($h0,$t0,$h0); &FR($t0); | ||
50 | &cmpult($r0,$cc,$cc); | ||
51 | &add($h1,$t1,$h1); &FR($t1); | ||
52 | &add($h0,$cc,$cc); &FR($h0); | ||
53 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
54 | &add($r1,$cc,$r1); | ||
55 | &cmpult($r1,$cc,$cc); | ||
56 | &add($r2,$l2,$r2); | ||
57 | &add($h1,$cc,$cc); &FR($h1); | ||
58 | &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); | ||
59 | &muh($a3,$word,($h3)=&NR(1)); | ||
60 | &add($r2,$cc,$r2); | ||
61 | &st($r0,&QWPw(0,$rp)); &FR($r0); | ||
62 | &add($h2,$t2,$h2); &FR($t2); | ||
63 | &st($r1,&QWPw(1,$rp)); &FR($r1); | ||
64 | &cmpult($r2,$cc,$cc); | ||
65 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
66 | &add($h2,$cc,$cc); &FR($h2); | ||
67 | &st($r2,&QWPw(2,$rp)); &FR($r2); | ||
68 | &sub($count,4,$count); # count-=4 | ||
69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
70 | &add($r3,$l3,$r3); | ||
71 | &add($ap,4*$QWS,$ap); # count+=4 | ||
72 | &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); | ||
73 | &add($r3,$cc,$r3); | ||
74 | &add($h3,$t3,$h3); &FR($t3); | ||
75 | &cmpult($r3,$cc,$cc); | ||
76 | &st($r3,&QWPw(-1,$rp)); &FR($r3); | ||
77 | &add($h3,$cc,$cc); &FR($h3); | ||
78 | |||
79 | ### | ||
80 | &blt($count,&label("finish")); | ||
81 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
82 | &br(&label("loop")); | ||
83 | EOF | ||
84 | ################################################## | ||
85 | # Do the last 0..3 words | ||
86 | |||
87 | &set_label("last_loop"); | ||
88 | |||
89 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
90 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
91 | ### | ||
92 | ### | ||
93 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
94 | ### wait 8 | ||
95 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
96 | &add($rp,$QWS,$rp); | ||
97 | &add($ap,$QWS,$ap); | ||
98 | &sub($count,1,$count); | ||
99 | ### wait 3 until l0 is available | ||
100 | &add($r0,$l0,$r0); | ||
101 | ### | ||
102 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
103 | &add($r0,$cc,$r0); | ||
104 | &add($h0,$t0,$h0); &FR($t0); | ||
105 | &cmpult($r0,$cc,$cc); | ||
106 | &add($h0,$cc,$cc); &FR($h0); | ||
107 | |||
108 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
109 | &bgt($count,&label("last_loop")); | ||
110 | &function_end_A($name); | ||
111 | |||
112 | ###################################################### | ||
113 | &set_label("finish"); | ||
114 | &add($count,4,$count); | ||
115 | &bgt($count,&label("last_loop")); | ||
116 | |||
117 | &set_label("end"); | ||
118 | &function_end($name); | ||
119 | |||
120 | &fin_pool; | ||
121 | } | ||
122 | |||
123 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl new file mode 100644 index 0000000000..9cc876ded4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl | |||
@@ -0,0 +1,215 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | # upto | ||
5 | |||
6 | sub mul_add_c | ||
7 | { | ||
8 | local($a,$b,$c0,$c1,$c2)=@_; | ||
9 | local($l1,$h1,$t1,$t2); | ||
10 | |||
11 | &mul($a,$b,($l1)=&NR(1)); | ||
12 | &muh($a,$b,($h1)=&NR(1)); | ||
13 | &add($c0,$l1,$c0); | ||
14 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
15 | &add($t1,$h1,$h1); &FR($t1); | ||
16 | &add($c1,$h1,$c1); | ||
17 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
18 | &add($c2,$t2,$c2); &FR($t2); | ||
19 | } | ||
20 | |||
21 | sub bn_mul_comba4 | ||
22 | { | ||
23 | local($name)=@_; | ||
24 | local(@a,@b,$r,$c0,$c1,$c2); | ||
25 | |||
26 | $cnt=1; | ||
27 | &init_pool(3); | ||
28 | |||
29 | $rp=&wparam(0); | ||
30 | $ap=&wparam(1); | ||
31 | $bp=&wparam(2); | ||
32 | |||
33 | &function_begin($name,""); | ||
34 | |||
35 | &comment(""); | ||
36 | |||
37 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
38 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
39 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
40 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
41 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
42 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
43 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
44 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
45 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
46 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
47 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
48 | |||
49 | ($R,$H1,$H2)=&NR(3); | ||
50 | |||
51 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
52 | |||
53 | &mov("zero",$R); | ||
54 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
55 | |||
56 | &mov("zero",$H1); | ||
57 | &mov("zero",$H0); | ||
58 | &add($R,$r01,$R); | ||
59 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
60 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
61 | &add($R,$r02,$R); | ||
62 | &add($H1,$t01,$H1) &FR($t01); | ||
63 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
64 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
65 | &add($R,$r03,$R); | ||
66 | &add($H2,$t02,$H2) &FR($t02); | ||
67 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
68 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
69 | &add($H1,$t03,$H1) &FR($t03); | ||
70 | &st($R,&QWPw(1,$rp)); | ||
71 | &add($H1,$H2,$R); | ||
72 | |||
73 | &mov("zero",$H1); | ||
74 | &add($R,$r04,$R); | ||
75 | &mov("zero",$H2); | ||
76 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
77 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
78 | &add($R,$r05,$R); | ||
79 | &add($H1,$t04,$H1) &FR($t04); | ||
80 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
81 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
82 | &add($R,$r01,$R); | ||
83 | &add($H2,$t05,$H2) &FR($t05); | ||
84 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
85 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
86 | &add($R,$r07,$R); | ||
87 | &add($H1,$t06,$H1) &FR($t06); | ||
88 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
89 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
90 | &add($R,$r08,$R); | ||
91 | &add($H2,$t07,$H2) &FR($t07); | ||
92 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
93 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
94 | &add($H1,$t08,$H1) &FR($t08); | ||
95 | &st($R,&QWPw(2,$rp)); | ||
96 | &add($H1,$H2,$R); | ||
97 | |||
98 | &mov("zero",$H1); | ||
99 | &add($R,$r09,$R); | ||
100 | &mov("zero",$H2); | ||
101 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
102 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
103 | &add($R,$r10,$R); | ||
104 | &add($H1,$t09,$H1) &FR($t09); | ||
105 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
106 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
107 | &add($R,$r11,$R); | ||
108 | &add($H1,$t10,$H1) &FR($t10); | ||
109 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
110 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
111 | &add($R,$r12,$R); | ||
112 | &add($H1,$t11,$H1) &FR($t11); | ||
113 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
114 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
115 | &add($R,$r13,$R); | ||
116 | &add($H1,$t12,$H1) &FR($t12); | ||
117 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
118 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
119 | &add($R,$r14,$R); | ||
120 | &add($H1,$t13,$H1) &FR($t13); | ||
121 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
122 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
123 | &add($R,$r15,$R); | ||
124 | &add($H1,$t14,$H1) &FR($t14); | ||
125 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
126 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
127 | &add($H1,$t15,$H1) &FR($t15); | ||
128 | &st($R,&QWPw(3,$rp)); | ||
129 | &add($H1,$H2,$R); | ||
130 | |||
131 | &mov("zero",$H1); | ||
132 | &add($R,$r16,$R); | ||
133 | &mov("zero",$H2); | ||
134 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
135 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
136 | &add($R,$r17,$R); | ||
137 | &add($H1,$t16,$H1) &FR($t16); | ||
138 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
139 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
140 | &add($R,$r18,$R); | ||
141 | &add($H1,$t17,$H1) &FR($t17); | ||
142 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
143 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
144 | &add($R,$r19,$R); | ||
145 | &add($H1,$t18,$H1) &FR($t18); | ||
146 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
147 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
148 | &add($R,$r20,$R); | ||
149 | &add($H1,$t19,$H1) &FR($t19); | ||
150 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
151 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
152 | &add($R,$r21,$R); | ||
153 | &add($H1,$t20,$H1) &FR($t20); | ||
154 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
155 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
156 | &add($R,$r22,$R); | ||
157 | &add($H1,$t21,$H1) &FR($t21); | ||
158 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
159 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
160 | &add($H1,$t22,$H1) &FR($t22); | ||
161 | &st($R,&QWPw(4,$rp)); | ||
162 | &add($H1,$H2,$R); | ||
163 | |||
164 | &mov("zero",$H1); | ||
165 | &add($R,$r23,$R); | ||
166 | &mov("zero",$H2); | ||
167 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
168 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
169 | &add($R,$r24,$R); | ||
170 | &add($H1,$t23,$H1) &FR($t23); | ||
171 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
172 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
173 | &add($R,$r25,$R); | ||
174 | &add($H1,$t24,$H1) &FR($t24); | ||
175 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
176 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
177 | &add($R,$r26,$R); | ||
178 | &add($H1,$t25,$H1) &FR($t25); | ||
179 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
180 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
181 | &add($R,$r27,$R); | ||
182 | &add($H1,$t26,$H1) &FR($t26); | ||
183 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
184 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
185 | &add($H1,$t27,$H1) &FR($t27); | ||
186 | &st($R,&QWPw(5,$rp)); | ||
187 | &add($H1,$H2,$R); | ||
188 | |||
189 | &mov("zero",$H1); | ||
190 | &add($R,$r28,$R); | ||
191 | &mov("zero",$H2); | ||
192 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
193 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
194 | &add($R,$r29,$R); | ||
195 | &add($H1,$t28,$H1) &FR($t28); | ||
196 | ############ | ||
197 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
198 | &add($R,$r30,$R); | ||
199 | &add($H1,$t29,$H1) &FR($t29); | ||
200 | ############ | ||
201 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
202 | &add($H1,$t30,$H1) &FR($t30); | ||
203 | &st($R,&QWPw(6,$rp)); | ||
204 | &add($H1,$H2,$R); | ||
205 | |||
206 | &add($R,$r31,$R); &FR($r31); | ||
207 | &st($R,&QWPw(7,$rp)); | ||
208 | |||
209 | &FR($R,$H1,$H2); | ||
210 | &function_end($name); | ||
211 | |||
212 | &fin_pool; | ||
213 | } | ||
214 | |||
215 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl | |||
@@ -0,0 +1,98 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | print STDERR "count=$cnt\n"; $cnt++; | ||
10 | &mul($a,$b,($l1)=&NR(1)); | ||
11 | &muh($a,$b,($h1)=&NR(1)); | ||
12 | &add($c0,$l1,$c0); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &add($t1,$h1,$h1); &FR($t1); | ||
15 | &add($c1,$h1,$c1); | ||
16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
17 | &add($c2,$t2,$c2); &FR($t2); | ||
18 | } | ||
19 | |||
20 | sub bn_mul_comba4 | ||
21 | { | ||
22 | local($name)=@_; | ||
23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
24 | |||
25 | $cnt=1; | ||
26 | &init_pool(3); | ||
27 | |||
28 | $rp=&wparam(0); | ||
29 | $ap=&wparam(1); | ||
30 | $bp=&wparam(2); | ||
31 | |||
32 | &function_begin($name,""); | ||
33 | |||
34 | &comment(""); | ||
35 | |||
36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
44 | |||
45 | ($c0,$c1,$c2)=&NR(3); | ||
46 | &mov("zero",$c2); | ||
47 | &mul($a[0],$b[0],$c0); | ||
48 | &muh($a[0],$b[0],$c1); | ||
49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
51 | &mov("zero",$c2); | ||
52 | |||
53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
57 | &mov("zero",$c2); | ||
58 | |||
59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
64 | &mov("zero",$c2); | ||
65 | |||
66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
72 | &mov("zero",$c2); | ||
73 | |||
74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
79 | &mov("zero",$c2); | ||
80 | |||
81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
85 | &mov("zero",$c2); | ||
86 | |||
87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
88 | &st($c0,&QWPw(6,$rp)); | ||
89 | &st($c1,&QWPw(7,$rp)); | ||
90 | |||
91 | &FR($c0,$c1,$c2); | ||
92 | |||
93 | &function_end($name); | ||
94 | |||
95 | &fin_pool; | ||
96 | } | ||
97 | |||
98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl | |||
@@ -0,0 +1,177 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(3); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | |||
20 | &stack_push(2); | ||
21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
39 | |||
40 | ($c0,$c1,$c2)=&NR(3); | ||
41 | &mov("zero",$c2); | ||
42 | &mul($a[0],$b[0],$c0); | ||
43 | &muh($a[0],$b[0],$c1); | ||
44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
67 | &mov("zero",$c2); | ||
68 | |||
69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
76 | &mov("zero",$c2); | ||
77 | |||
78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
86 | &mov("zero",$c2); | ||
87 | |||
88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
109 | &mov("zero",$c2); | ||
110 | |||
111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
120 | &mov("zero",$c2); | ||
121 | |||
122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
130 | &mov("zero",$c2); | ||
131 | |||
132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
139 | &mov("zero",$c2); | ||
140 | |||
141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
147 | &mov("zero",$c2); | ||
148 | |||
149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
154 | &mov("zero",$c2); | ||
155 | |||
156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
160 | &mov("zero",$c2); | ||
161 | |||
162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
163 | &st($c0,&QWPw(14,$rp)); | ||
164 | &st($c1,&QWPw(15,$rp)); | ||
165 | |||
166 | &FR($c0,$c1,$c2); | ||
167 | |||
168 | &ld($reg_s0,&swtmp(0)); | ||
169 | &ld($reg_s1,&swtmp(1)); | ||
170 | &stack_pop(2); | ||
171 | |||
172 | &function_end($name); | ||
173 | |||
174 | &fin_pool; | ||
175 | } | ||
176 | |||
177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr.pl | |||
@@ -0,0 +1,113 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(3); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | &sub($count,4,$count); | ||
20 | &mov("zero",$cc); | ||
21 | &br(&label("finish")); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$r0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($r0,&QWPw(0,$rp)); | ||
27 | |||
28 | $a=<<'EOF'; | ||
29 | ########################################################## | ||
30 | &set_label("loop"); | ||
31 | |||
32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
38 | |||
39 | ($o0,$t0)=&NR(2); | ||
40 | &add($a0,$b0,$o0); | ||
41 | &cmpult($o0,$b0,$t0); | ||
42 | &add($o0,$cc,$o0); | ||
43 | &cmpult($o0,$cc,$cc); | ||
44 | &add($cc,$t0,$cc); &FR($t0); | ||
45 | |||
46 | ($t1,$o1)=&NR(2); | ||
47 | |||
48 | &add($a1,$b1,$o1); &FR($a1); | ||
49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
50 | &add($o1,$cc,$o1); | ||
51 | &cmpult($o1,$cc,$cc); | ||
52 | &add($cc,$t1,$cc); &FR($t1); | ||
53 | |||
54 | ($t2,$o2)=&NR(2); | ||
55 | |||
56 | &add($a2,$b2,$o2); &FR($a2); | ||
57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
58 | &add($o2,$cc,$o2); | ||
59 | &cmpult($o2,$cc,$cc); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &add($a3,$b3,$o3); &FR($a3); | ||
65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
66 | &add($o3,$cc,$o3); | ||
67 | &cmpult($o3,$cc,$cc); | ||
68 | &add($cc,$t3,$cc); &FR($t3); | ||
69 | |||
70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
74 | |||
75 | &sub($count,4,$count); # count-=4 | ||
76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
79 | |||
80 | &blt($count,&label("finish")); | ||
81 | &ld($a0,&QWPw(0,$ap)); | ||
82 | &ld($b0,&QWPw(0,$bp)); | ||
83 | &br(&label("loop")); | ||
84 | EOF | ||
85 | ################################################## | ||
86 | # Do the last 0..3 words | ||
87 | |||
88 | &set_label("last_loop"); | ||
89 | |||
90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
92 | &add($ap,$QWS,$ap); | ||
93 | &add($rp,2*$QWS,$rp); | ||
94 | &sub($count,1,$count); | ||
95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
98 | |||
99 | &bgt($count,&label("last_loop")); | ||
100 | &function_end_A($name); | ||
101 | |||
102 | ###################################################### | ||
103 | &set_label("finish"); | ||
104 | &add($count,4,$count); | ||
105 | &bgt($count,&label("last_loop")); | ||
106 | |||
107 | &set_label("end"); | ||
108 | &function_end($name); | ||
109 | |||
110 | &fin_pool; | ||
111 | } | ||
112 | |||
113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl | |||
@@ -0,0 +1,109 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub sqr_add_c | ||
5 | { | ||
6 | local($a,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | &mul($a,$a,($l1)=&NR(1)); | ||
10 | &muh($a,$a,($h1)=&NR(1)); | ||
11 | &add($c0,$l1,$c0); | ||
12 | &add($c1,$h1,$c1); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
15 | &add($c1,$t1,$c1); &FR($t1); | ||
16 | &add($c2,$t2,$c2); &FR($t2); | ||
17 | } | ||
18 | |||
19 | sub sqr_add_c2 | ||
20 | { | ||
21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
22 | local($l1,$h1,$t1,$t2); | ||
23 | |||
24 | &mul($a,$b,($l1)=&NR(1)); | ||
25 | &muh($a,$b,($h1)=&NR(1)); | ||
26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
28 | &add($l1,$l1,$l1); | ||
29 | &add($h1,$h1,$h1); | ||
30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
32 | |||
33 | &add($c0,$l1,$c0); | ||
34 | &add($c1,$h1,$c1); | ||
35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
37 | |||
38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
40 | } | ||
41 | |||
42 | |||
43 | sub bn_sqr_comba4 | ||
44 | { | ||
45 | local($name)=@_; | ||
46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
47 | |||
48 | $cnt=1; | ||
49 | &init_pool(2); | ||
50 | |||
51 | $rp=&wparam(0); | ||
52 | $ap=&wparam(1); | ||
53 | |||
54 | &function_begin($name,""); | ||
55 | |||
56 | &comment(""); | ||
57 | |||
58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
62 | |||
63 | ($c0,$c1,$c2)=&NR(3); | ||
64 | |||
65 | &mov("zero",$c2); | ||
66 | &mul($a[0],$a[0],$c0); | ||
67 | &muh($a[0],$a[0],$c1); | ||
68 | &st($c0,&QWPw(0,$rp)); | ||
69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
70 | &mov("zero",$c2); | ||
71 | |||
72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
73 | &st($c0,&QWPw(1,$rp)); | ||
74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
75 | &mov("zero",$c2); | ||
76 | |||
77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
79 | &st($c0,&QWPw(2,$rp)); | ||
80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
81 | &mov("zero",$c2); | ||
82 | |||
83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
85 | &st($c0,&QWPw(3,$rp)); | ||
86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
87 | &mov("zero",$c2); | ||
88 | |||
89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
91 | &st($c0,&QWPw(4,$rp)); | ||
92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
93 | &mov("zero",$c2); | ||
94 | |||
95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
96 | &st($c0,&QWPw(5,$rp)); | ||
97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
98 | &mov("zero",$c2); | ||
99 | |||
100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
101 | &st($c0,&QWPw(6,$rp)); | ||
102 | &st($c1,&QWPw(7,$rp)); | ||
103 | |||
104 | &function_end($name); | ||
105 | |||
106 | &fin_pool; | ||
107 | } | ||
108 | |||
109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl | |||
@@ -0,0 +1,132 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(2); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | |||
15 | &function_begin($name,""); | ||
16 | |||
17 | &comment(""); | ||
18 | |||
19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
27 | |||
28 | ($c0,$c1,$c2)=&NR(3); | ||
29 | |||
30 | &mov("zero",$c2); | ||
31 | &mul($a[0],$a[0],$c0); | ||
32 | &muh($a[0],$a[0],$c1); | ||
33 | &st($c0,&QWPw(0,$rp)); | ||
34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
35 | &mov("zero",$c2); | ||
36 | |||
37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
38 | &st($c0,&QWPw(1,$rp)); | ||
39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
40 | &mov("zero",$c2); | ||
41 | |||
42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
44 | &st($c0,&QWPw(2,$rp)); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(3,$rp)); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(4,$rp)); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
64 | &st($c0,&QWPw(5,$rp)); | ||
65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
66 | &mov("zero",$c2); | ||
67 | |||
68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
72 | &st($c0,&QWPw(6,$rp)); | ||
73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
74 | &mov("zero",$c2); | ||
75 | |||
76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
80 | &st($c0,&QWPw(7,$rp)); | ||
81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
82 | &mov("zero",$c2); | ||
83 | |||
84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
88 | &st($c0,&QWPw(8,$rp)); | ||
89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
90 | &mov("zero",$c2); | ||
91 | |||
92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(9,$rp)); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
102 | &st($c0,&QWPw(10,$rp)); | ||
103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
104 | &mov("zero",$c2); | ||
105 | |||
106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
108 | &st($c0,&QWPw(11,$rp)); | ||
109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
110 | &mov("zero",$c2); | ||
111 | |||
112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
114 | &st($c0,&QWPw(12,$rp)); | ||
115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
116 | &mov("zero",$c2); | ||
117 | |||
118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
119 | &st($c0,&QWPw(13,$rp)); | ||
120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
121 | &mov("zero",$c2); | ||
122 | |||
123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
124 | &st($c0,&QWPw(14,$rp)); | ||
125 | &st($c1,&QWPw(15,$rp)); | ||
126 | |||
127 | &function_end($name); | ||
128 | |||
129 | &fin_pool; | ||
130 | } | ||
131 | |||
132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sub.pl | |||
@@ -0,0 +1,108 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sub_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$b0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($b0,&QWPw(0,$bp)); | ||
27 | |||
28 | ########################################################## | ||
29 | &set_label("loop"); | ||
30 | |||
31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
32 | &ld($a1,&QWPw(1,$ap)); | ||
33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
34 | &ld($b1,&QWPw(1,$bp)); | ||
35 | &sub($a0,$b0,$a0); # do the subtract | ||
36 | &ld($a2,&QWPw(2,$ap)); | ||
37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
38 | &ld($b2,&QWPw(2,$bp)); | ||
39 | &sub($a0,$cc,$o0); # will we borrow? | ||
40 | &ld($a3,&QWPw(3,$ap)); | ||
41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
42 | |||
43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
44 | &sub($a1,$b1,$a1); # do the subtract | ||
45 | &ld($b3,&QWPw(3,$bp)); | ||
46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
47 | &sub($a1,$cc,$o1); # will we borrow? | ||
48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
49 | |||
50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
51 | &sub($a2,$b2,$a2); # do the subtract | ||
52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
54 | &sub($a2,$cc,$o2); # will we borrow? | ||
55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
56 | |||
57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
58 | &sub($a3,$b3,$a3); # do the subtract | ||
59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
61 | &sub($a3,$cc,$o3); # will we borrow? | ||
62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
63 | |||
64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
65 | &sub($count,4,$count); # count-=4 | ||
66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
70 | |||
71 | &blt($count,&label("finish")); | ||
72 | &ld($a0,&QWPw(0,$ap)); | ||
73 | &ld($b0,&QWPw(0,$bp)); | ||
74 | &br(&label("loop")); | ||
75 | ################################################## | ||
76 | # Do the last 0..3 words | ||
77 | |||
78 | &set_label("last_loop"); | ||
79 | |||
80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
83 | &sub($a0,$b0,$a0); # do the subtract | ||
84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
85 | &sub($a0,$cc,$a0); # will we borrow? | ||
86 | &st($a0,&QWPw(0,$rp)); # save | ||
87 | &add($b0,$tmp,$cc); # add the borrows | ||
88 | |||
89 | &add($ap,$QWS,$ap); | ||
90 | &add($bp,$QWS,$bp); | ||
91 | &add($rp,$QWS,$rp); | ||
92 | &sub($count,1,$count); | ||
93 | &bgt($count,&label("last_loop")); | ||
94 | &function_end_A($name); | ||
95 | |||
96 | ###################################################### | ||
97 | &set_label("finish"); | ||
98 | &add($count,4,$count); | ||
99 | &bgt($count,&label("last_loop")); | ||
100 | |||
101 | &FR($a0,$b0); | ||
102 | &set_label("end"); | ||
103 | &function_end($name); | ||
104 | |||
105 | &fin_pool; | ||
106 | } | ||
107 | |||
108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl new file mode 100644 index 0000000000..0b5cf583e3 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/add.pl | |||
@@ -0,0 +1,76 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $a="esi"; | ||
12 | $b="edi"; | ||
13 | $c="eax"; | ||
14 | $r="ebx"; | ||
15 | $tmp1="ecx"; | ||
16 | $tmp2="edx"; | ||
17 | $num="ebp"; | ||
18 | |||
19 | &mov($r,&wparam(0)); # get r | ||
20 | &mov($a,&wparam(1)); # get a | ||
21 | &mov($b,&wparam(2)); # get b | ||
22 | &mov($num,&wparam(3)); # get num | ||
23 | &xor($c,$c); # clear carry | ||
24 | &and($num,0xfffffff8); # num / 8 | ||
25 | |||
26 | &jz(&label("aw_finish")); | ||
27 | |||
28 | &set_label("aw_loop",0); | ||
29 | for ($i=0; $i<8; $i++) | ||
30 | { | ||
31 | &comment("Round $i"); | ||
32 | |||
33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
35 | &add($tmp1,$c); | ||
36 | &mov($c,0); | ||
37 | &adc($c,$c); | ||
38 | &add($tmp1,$tmp2); | ||
39 | &adc($c,0); | ||
40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
41 | } | ||
42 | |||
43 | &comment(""); | ||
44 | &add($a,32); | ||
45 | &add($b,32); | ||
46 | &add($r,32); | ||
47 | &sub($num,8); | ||
48 | &jnz(&label("aw_loop")); | ||
49 | |||
50 | &set_label("aw_finish",0); | ||
51 | &mov($num,&wparam(3)); # get num | ||
52 | &and($num,7); | ||
53 | &jz(&label("aw_end")); | ||
54 | |||
55 | for ($i=0; $i<7; $i++) | ||
56 | { | ||
57 | &comment("Tail Round $i"); | ||
58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
60 | &add($tmp1,$c); | ||
61 | &mov($c,0); | ||
62 | &adc($c,$c); | ||
63 | &add($tmp1,$tmp2); | ||
64 | &adc($c,0); | ||
65 | &dec($num) if ($i != 6); | ||
66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
67 | &jz(&label("aw_end")) if ($i != 6); | ||
68 | } | ||
69 | &set_label("aw_end",0); | ||
70 | |||
71 | # &mov("eax",$c); # $c is "eax" | ||
72 | |||
73 | &function_end($name); | ||
74 | } | ||
75 | |||
76 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl new file mode 100644 index 0000000000..2291253629 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/comba.pl | |||
@@ -0,0 +1,277 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
7 | |||
8 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
9 | # words, and 1 if load return value | ||
10 | |||
11 | &comment("mul a[$ai]*b[$bi]"); | ||
12 | |||
13 | # "eax" and "edx" will always be pre-loaded. | ||
14 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
15 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
16 | |||
17 | &mul("edx"); | ||
18 | &add($c0,"eax"); | ||
19 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a | ||
20 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] | ||
21 | ### | ||
22 | &adc($c1,"edx"); | ||
23 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b | ||
24 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b | ||
25 | ### | ||
26 | &adc($c2,0); | ||
27 | # is pos > 1, it means it is the last loop | ||
28 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; | ||
29 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a | ||
30 | } | ||
31 | |||
32 | sub sqr_add_c | ||
33 | { | ||
34 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
35 | |||
36 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
37 | # words, and 1 if load return value | ||
38 | |||
39 | &comment("sqr a[$ai]*a[$bi]"); | ||
40 | |||
41 | # "eax" and "edx" will always be pre-loaded. | ||
42 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
43 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
44 | |||
45 | if ($ai == $bi) | ||
46 | { &mul("eax");} | ||
47 | else | ||
48 | { &mul("edx");} | ||
49 | &add($c0,"eax"); | ||
50 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
51 | ### | ||
52 | &adc($c1,"edx"); | ||
53 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | ||
54 | ### | ||
55 | &adc($c2,0); | ||
56 | # is pos > 1, it means it is the last loop | ||
57 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
58 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
59 | } | ||
60 | |||
61 | sub sqr_add_c2 | ||
62 | { | ||
63 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
64 | |||
65 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
66 | # words, and 1 if load return value | ||
67 | |||
68 | &comment("sqr a[$ai]*a[$bi]"); | ||
69 | |||
70 | # "eax" and "edx" will always be pre-loaded. | ||
71 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
72 | # &mov("edx",&DWP($bi*4,$a,"",0)); | ||
73 | |||
74 | if ($ai == $bi) | ||
75 | { &mul("eax");} | ||
76 | else | ||
77 | { &mul("edx");} | ||
78 | &add("eax","eax"); | ||
79 | ### | ||
80 | &adc("edx","edx"); | ||
81 | ### | ||
82 | &adc($c2,0); | ||
83 | &add($c0,"eax"); | ||
84 | &adc($c1,"edx"); | ||
85 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
86 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
87 | &adc($c2,0); | ||
88 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
89 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | ||
90 | ### | ||
91 | } | ||
92 | |||
93 | sub bn_mul_comba | ||
94 | { | ||
95 | local($name,$num)=@_; | ||
96 | local($a,$b,$c0,$c1,$c2); | ||
97 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
98 | local($tot,$end); | ||
99 | |||
100 | &function_begin_B($name,""); | ||
101 | |||
102 | $c0="ebx"; | ||
103 | $c1="ecx"; | ||
104 | $c2="ebp"; | ||
105 | $a="esi"; | ||
106 | $b="edi"; | ||
107 | |||
108 | $as=0; | ||
109 | $ae=0; | ||
110 | $bs=0; | ||
111 | $be=0; | ||
112 | $tot=$num+$num-1; | ||
113 | |||
114 | &push("esi"); | ||
115 | &mov($a,&wparam(1)); | ||
116 | &push("edi"); | ||
117 | &mov($b,&wparam(2)); | ||
118 | &push("ebp"); | ||
119 | &push("ebx"); | ||
120 | |||
121 | &xor($c0,$c0); | ||
122 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
123 | &xor($c1,$c1); | ||
124 | &mov("edx",&DWP(0,$b,"",0)); # load the first second | ||
125 | |||
126 | for ($i=0; $i<$tot; $i++) | ||
127 | { | ||
128 | $ai=$as; | ||
129 | $bi=$bs; | ||
130 | $end=$be+1; | ||
131 | |||
132 | &comment("################## Calculate word $i"); | ||
133 | |||
134 | for ($j=$bs; $j<$end; $j++) | ||
135 | { | ||
136 | &xor($c2,$c2) if ($j == $bs); | ||
137 | if (($j+1) == $end) | ||
138 | { | ||
139 | $v=1; | ||
140 | $v=2 if (($i+1) == $tot); | ||
141 | } | ||
142 | else | ||
143 | { $v=0; } | ||
144 | if (($j+1) != $end) | ||
145 | { | ||
146 | $na=($ai-1); | ||
147 | $nb=($bi+1); | ||
148 | } | ||
149 | else | ||
150 | { | ||
151 | $na=$as+($i < ($num-1)); | ||
152 | $nb=$bs+($i >= ($num-1)); | ||
153 | } | ||
154 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | ||
155 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | ||
156 | if ($v) | ||
157 | { | ||
158 | &comment("saved r[$i]"); | ||
159 | # &mov("eax",&wparam(0)); | ||
160 | # &mov(&DWP($i*4,"eax","",0),$c0); | ||
161 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
162 | } | ||
163 | $ai--; | ||
164 | $bi++; | ||
165 | } | ||
166 | $as++ if ($i < ($num-1)); | ||
167 | $ae++ if ($i >= ($num-1)); | ||
168 | |||
169 | $bs++ if ($i >= ($num-1)); | ||
170 | $be++ if ($i < ($num-1)); | ||
171 | } | ||
172 | &comment("save r[$i]"); | ||
173 | # &mov("eax",&wparam(0)); | ||
174 | &mov(&DWP($i*4,"eax","",0),$c0); | ||
175 | |||
176 | &pop("ebx"); | ||
177 | &pop("ebp"); | ||
178 | &pop("edi"); | ||
179 | &pop("esi"); | ||
180 | &ret(); | ||
181 | &function_end_B($name); | ||
182 | } | ||
183 | |||
184 | sub bn_sqr_comba | ||
185 | { | ||
186 | local($name,$num)=@_; | ||
187 | local($r,$a,$c0,$c1,$c2)=@_; | ||
188 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
189 | local($b,$tot,$end,$half); | ||
190 | |||
191 | &function_begin_B($name,""); | ||
192 | |||
193 | $c0="ebx"; | ||
194 | $c1="ecx"; | ||
195 | $c2="ebp"; | ||
196 | $a="esi"; | ||
197 | $r="edi"; | ||
198 | |||
199 | &push("esi"); | ||
200 | &push("edi"); | ||
201 | &push("ebp"); | ||
202 | &push("ebx"); | ||
203 | &mov($r,&wparam(0)); | ||
204 | &mov($a,&wparam(1)); | ||
205 | &xor($c0,$c0); | ||
206 | &xor($c1,$c1); | ||
207 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
208 | |||
209 | $as=0; | ||
210 | $ae=0; | ||
211 | $bs=0; | ||
212 | $be=0; | ||
213 | $tot=$num+$num-1; | ||
214 | |||
215 | for ($i=0; $i<$tot; $i++) | ||
216 | { | ||
217 | $ai=$as; | ||
218 | $bi=$bs; | ||
219 | $end=$be+1; | ||
220 | |||
221 | &comment("############### Calculate word $i"); | ||
222 | for ($j=$bs; $j<$end; $j++) | ||
223 | { | ||
224 | &xor($c2,$c2) if ($j == $bs); | ||
225 | if (($ai-1) < ($bi+1)) | ||
226 | { | ||
227 | $v=1; | ||
228 | $v=2 if ($i+1) == $tot; | ||
229 | } | ||
230 | else | ||
231 | { $v=0; } | ||
232 | if (!$v) | ||
233 | { | ||
234 | $na=$ai-1; | ||
235 | $nb=$bi+1; | ||
236 | } | ||
237 | else | ||
238 | { | ||
239 | $na=$as+($i < ($num-1)); | ||
240 | $nb=$bs+($i >= ($num-1)); | ||
241 | } | ||
242 | if ($ai == $bi) | ||
243 | { | ||
244 | &sqr_add_c($r,$a,$ai,$bi, | ||
245 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
246 | } | ||
247 | else | ||
248 | { | ||
249 | &sqr_add_c2($r,$a,$ai,$bi, | ||
250 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
251 | } | ||
252 | if ($v) | ||
253 | { | ||
254 | &comment("saved r[$i]"); | ||
255 | #&mov(&DWP($i*4,$r,"",0),$c0); | ||
256 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
257 | last; | ||
258 | } | ||
259 | $ai--; | ||
260 | $bi++; | ||
261 | } | ||
262 | $as++ if ($i < ($num-1)); | ||
263 | $ae++ if ($i >= ($num-1)); | ||
264 | |||
265 | $bs++ if ($i >= ($num-1)); | ||
266 | $be++ if ($i < ($num-1)); | ||
267 | } | ||
268 | &mov(&DWP($i*4,$r,"",0),$c0); | ||
269 | &pop("ebx"); | ||
270 | &pop("ebp"); | ||
271 | &pop("edi"); | ||
272 | &pop("esi"); | ||
273 | &ret(); | ||
274 | &function_end_B($name); | ||
275 | } | ||
276 | |||
277 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl new file mode 100644 index 0000000000..0e90152caa --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/div.pl | |||
@@ -0,0 +1,15 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_div_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | &mov("edx",&wparam(0)); # | ||
10 | &mov("eax",&wparam(1)); # | ||
11 | &mov("ebx",&wparam(2)); # | ||
12 | &div("ebx"); | ||
13 | &function_end($name); | ||
14 | } | ||
15 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f new file mode 100644 index 0000000000..22e4112224 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/f | |||
@@ -0,0 +1,3 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl new file mode 100644 index 0000000000..674cb9b055 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul.pl | |||
@@ -0,0 +1,77 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_mul_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $Low="eax"; | ||
12 | $High="edx"; | ||
13 | $a="ebx"; | ||
14 | $w="ecx"; | ||
15 | $r="edi"; | ||
16 | $c="esi"; | ||
17 | $num="ebp"; | ||
18 | |||
19 | &xor($c,$c); # clear carry | ||
20 | &mov($r,&wparam(0)); # | ||
21 | &mov($a,&wparam(1)); # | ||
22 | &mov($num,&wparam(2)); # | ||
23 | &mov($w,&wparam(3)); # | ||
24 | |||
25 | &and($num,0xfffffff8); # num / 8 | ||
26 | &jz(&label("mw_finish")); | ||
27 | |||
28 | &set_label("mw_loop",0); | ||
29 | for ($i=0; $i<32; $i+=4) | ||
30 | { | ||
31 | &comment("Round $i"); | ||
32 | |||
33 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
34 | &mul($w); # *a * w | ||
35 | &add("eax",$c); # L(t)+=c | ||
36 | # XXX | ||
37 | |||
38 | &adc("edx",0); # H(t)+=carry | ||
39 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
40 | |||
41 | &mov($c,"edx"); # c= H(t); | ||
42 | } | ||
43 | |||
44 | &comment(""); | ||
45 | &add($a,32); | ||
46 | &add($r,32); | ||
47 | &sub($num,8); | ||
48 | &jz(&label("mw_finish")); | ||
49 | &jmp(&label("mw_loop")); | ||
50 | |||
51 | &set_label("mw_finish",0); | ||
52 | &mov($num,&wparam(2)); # get num | ||
53 | &and($num,7); | ||
54 | &jnz(&label("mw_finish2")); | ||
55 | &jmp(&label("mw_end")); | ||
56 | |||
57 | &set_label("mw_finish2",1); | ||
58 | for ($i=0; $i<7; $i++) | ||
59 | { | ||
60 | &comment("Tail Round $i"); | ||
61 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
62 | &mul($w); # *a * w | ||
63 | &add("eax",$c); # L(t)+=c | ||
64 | # XXX | ||
65 | &adc("edx",0); # H(t)+=carry | ||
66 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); | ||
67 | &mov($c,"edx"); # c= H(t); | ||
68 | &dec($num) if ($i != 7-1); | ||
69 | &jz(&label("mw_end")) if ($i != 7-1); | ||
70 | } | ||
71 | &set_label("mw_end",0); | ||
72 | &mov("eax",$c); | ||
73 | |||
74 | &function_end($name); | ||
75 | } | ||
76 | |||
77 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl new file mode 100644 index 0000000000..61830d3a90 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul_add.pl | |||
@@ -0,0 +1,87 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_mul_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $Low="eax"; | ||
12 | $High="edx"; | ||
13 | $a="ebx"; | ||
14 | $w="ebp"; | ||
15 | $r="edi"; | ||
16 | $c="esi"; | ||
17 | |||
18 | &xor($c,$c); # clear carry | ||
19 | &mov($r,&wparam(0)); # | ||
20 | |||
21 | &mov("ecx",&wparam(2)); # | ||
22 | &mov($a,&wparam(1)); # | ||
23 | |||
24 | &and("ecx",0xfffffff8); # num / 8 | ||
25 | &mov($w,&wparam(3)); # | ||
26 | |||
27 | &push("ecx"); # Up the stack for a tmp variable | ||
28 | |||
29 | &jz(&label("maw_finish")); | ||
30 | |||
31 | &set_label("maw_loop",0); | ||
32 | |||
33 | &mov(&swtmp(0),"ecx"); # | ||
34 | |||
35 | for ($i=0; $i<32; $i+=4) | ||
36 | { | ||
37 | &comment("Round $i"); | ||
38 | |||
39 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
40 | &mul($w); # *a * w | ||
41 | &add("eax",$c); # L(t)+= *r | ||
42 | &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r | ||
43 | &adc("edx",0); # H(t)+=carry | ||
44 | &add("eax",$c); # L(t)+=c | ||
45 | &adc("edx",0); # H(t)+=carry | ||
46 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
47 | &mov($c,"edx"); # c= H(t); | ||
48 | } | ||
49 | |||
50 | &comment(""); | ||
51 | &mov("ecx",&swtmp(0)); # | ||
52 | &add($a,32); | ||
53 | &add($r,32); | ||
54 | &sub("ecx",8); | ||
55 | &jnz(&label("maw_loop")); | ||
56 | |||
57 | &set_label("maw_finish",0); | ||
58 | &mov("ecx",&wparam(2)); # get num | ||
59 | &and("ecx",7); | ||
60 | &jnz(&label("maw_finish2")); # helps branch prediction | ||
61 | &jmp(&label("maw_end")); | ||
62 | |||
63 | &set_label("maw_finish2",1); | ||
64 | for ($i=0; $i<7; $i++) | ||
65 | { | ||
66 | &comment("Tail Round $i"); | ||
67 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
68 | &mul($w); # *a * w | ||
69 | &add("eax",$c); # L(t)+=c | ||
70 | &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r | ||
71 | &adc("edx",0); # H(t)+=carry | ||
72 | &add("eax",$c); | ||
73 | &adc("edx",0); # H(t)+=carry | ||
74 | &dec("ecx") if ($i != 7-1); | ||
75 | &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); | ||
76 | &mov($c,"edx"); # c= H(t); | ||
77 | &jz(&label("maw_end")) if ($i != 7-1); | ||
78 | } | ||
79 | &set_label("maw_end",0); | ||
80 | &mov("eax",$c); | ||
81 | |||
82 | &pop("ecx"); # clear variable from | ||
83 | |||
84 | &function_end($name); | ||
85 | } | ||
86 | |||
87 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl new file mode 100644 index 0000000000..1f90993cf6 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sqr.pl | |||
@@ -0,0 +1,60 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_sqr_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $r="esi"; | ||
12 | $a="edi"; | ||
13 | $num="ebx"; | ||
14 | |||
15 | &mov($r,&wparam(0)); # | ||
16 | &mov($a,&wparam(1)); # | ||
17 | &mov($num,&wparam(2)); # | ||
18 | |||
19 | &and($num,0xfffffff8); # num / 8 | ||
20 | &jz(&label("sw_finish")); | ||
21 | |||
22 | &set_label("sw_loop",0); | ||
23 | for ($i=0; $i<32; $i+=4) | ||
24 | { | ||
25 | &comment("Round $i"); | ||
26 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
27 | # XXX | ||
28 | &mul("eax"); # *a * *a | ||
29 | &mov(&DWP($i*2,$r,"",0),"eax"); # | ||
30 | &mov(&DWP($i*2+4,$r,"",0),"edx");# | ||
31 | } | ||
32 | |||
33 | &comment(""); | ||
34 | &add($a,32); | ||
35 | &add($r,64); | ||
36 | &sub($num,8); | ||
37 | &jnz(&label("sw_loop")); | ||
38 | |||
39 | &set_label("sw_finish",0); | ||
40 | &mov($num,&wparam(2)); # get num | ||
41 | &and($num,7); | ||
42 | &jz(&label("sw_end")); | ||
43 | |||
44 | for ($i=0; $i<7; $i++) | ||
45 | { | ||
46 | &comment("Tail Round $i"); | ||
47 | &mov("eax",&DWP($i*4,$a,"",0)); # *a | ||
48 | # XXX | ||
49 | &mul("eax"); # *a * *a | ||
50 | &mov(&DWP($i*8,$r,"",0),"eax"); # | ||
51 | &dec($num) if ($i != 7-1); | ||
52 | &mov(&DWP($i*8+4,$r,"",0),"edx"); | ||
53 | &jz(&label("sw_end")) if ($i != 7-1); | ||
54 | } | ||
55 | &set_label("sw_end",0); | ||
56 | |||
57 | &function_end($name); | ||
58 | } | ||
59 | |||
60 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl new file mode 100644 index 0000000000..837b0e1b07 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sub.pl | |||
@@ -0,0 +1,76 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_sub_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $a="esi"; | ||
12 | $b="edi"; | ||
13 | $c="eax"; | ||
14 | $r="ebx"; | ||
15 | $tmp1="ecx"; | ||
16 | $tmp2="edx"; | ||
17 | $num="ebp"; | ||
18 | |||
19 | &mov($r,&wparam(0)); # get r | ||
20 | &mov($a,&wparam(1)); # get a | ||
21 | &mov($b,&wparam(2)); # get b | ||
22 | &mov($num,&wparam(3)); # get num | ||
23 | &xor($c,$c); # clear carry | ||
24 | &and($num,0xfffffff8); # num / 8 | ||
25 | |||
26 | &jz(&label("aw_finish")); | ||
27 | |||
28 | &set_label("aw_loop",0); | ||
29 | for ($i=0; $i<8; $i++) | ||
30 | { | ||
31 | &comment("Round $i"); | ||
32 | |||
33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
35 | &sub($tmp1,$c); | ||
36 | &mov($c,0); | ||
37 | &adc($c,$c); | ||
38 | &sub($tmp1,$tmp2); | ||
39 | &adc($c,0); | ||
40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
41 | } | ||
42 | |||
43 | &comment(""); | ||
44 | &add($a,32); | ||
45 | &add($b,32); | ||
46 | &add($r,32); | ||
47 | &sub($num,8); | ||
48 | &jnz(&label("aw_loop")); | ||
49 | |||
50 | &set_label("aw_finish",0); | ||
51 | &mov($num,&wparam(3)); # get num | ||
52 | &and($num,7); | ||
53 | &jz(&label("aw_end")); | ||
54 | |||
55 | for ($i=0; $i<7; $i++) | ||
56 | { | ||
57 | &comment("Tail Round $i"); | ||
58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
60 | &sub($tmp1,$c); | ||
61 | &mov($c,0); | ||
62 | &adc($c,$c); | ||
63 | &sub($tmp1,$tmp2); | ||
64 | &adc($c,0); | ||
65 | &dec($num) if ($i != 6); | ||
66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
67 | &jz(&label("aw_end")) if ($i != 6); | ||
68 | } | ||
69 | &set_label("aw_end",0); | ||
70 | |||
71 | # &mov("eax",$c); # $c is "eax" | ||
72 | |||
73 | &function_end($name); | ||
74 | } | ||
75 | |||
76 | 1; | ||