diff options
author | beck <> | 1999-09-29 05:53:45 +0000 |
---|---|---|
committer | beck <> | 1999-09-29 05:53:45 +0000 |
commit | 648e4f0876a3773381cbfff3192dd84dd1c8c925 (patch) | |
tree | bd9d01e3969ffa5aac92128af3e515520c88fc0e /src/lib/libcrypto/bn | |
parent | 756086c41b0487beefc3d5b3400f80095d0e4157 (diff) | |
download | openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.gz openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.tar.bz2 openbsd-648e4f0876a3773381cbfff3192dd84dd1c8c925.zip |
new files for OpenSSL 0.9.4
Diffstat (limited to 'src/lib/libcrypto/bn')
41 files changed, 5507 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/add.pl b/src/lib/libcrypto/bn/asm/alpha.works/add.pl new file mode 100644 index 0000000000..4dc76e6b69 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/add.pl | |||
@@ -0,0 +1,119 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &br(&label("finish")); | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0,$b0)=&NR(2); | ||
26 | &ld($a0,&QWPw(0,$ap)); | ||
27 | &ld($b0,&QWPw(0,$bp)); | ||
28 | |||
29 | ########################################################## | ||
30 | &set_label("loop"); | ||
31 | |||
32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
38 | |||
39 | ($o0,$t0)=&NR(2); | ||
40 | &add($a0,$b0,$o0); | ||
41 | &cmpult($o0,$b0,$t0); | ||
42 | &add($o0,$cc,$o0); | ||
43 | &cmpult($o0,$cc,$cc); | ||
44 | &add($cc,$t0,$cc); &FR($t0); | ||
45 | |||
46 | ($t1,$o1)=&NR(2); | ||
47 | |||
48 | &add($a1,$b1,$o1); &FR($a1); | ||
49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
50 | &add($o1,$cc,$o1); | ||
51 | &cmpult($o1,$cc,$cc); | ||
52 | &add($cc,$t1,$cc); &FR($t1); | ||
53 | |||
54 | ($t2,$o2)=&NR(2); | ||
55 | |||
56 | &add($a2,$b2,$o2); &FR($a2); | ||
57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
58 | &add($o2,$cc,$o2); | ||
59 | &cmpult($o2,$cc,$cc); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &add($a3,$b3,$o3); &FR($a3); | ||
65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
66 | &add($o3,$cc,$o3); | ||
67 | &cmpult($o3,$cc,$cc); | ||
68 | &add($cc,$t3,$cc); &FR($t3); | ||
69 | |||
70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
74 | |||
75 | &sub($count,4,$count); # count-=4 | ||
76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
79 | |||
80 | &blt($count,&label("finish")); | ||
81 | &ld($a0,&QWPw(0,$ap)); | ||
82 | &ld($b0,&QWPw(0,$bp)); | ||
83 | &br(&label("loop")); | ||
84 | ################################################## | ||
85 | # Do the last 0..3 words | ||
86 | |||
87 | ($t0,$o0)=&NR(2); | ||
88 | &set_label("last_loop"); | ||
89 | |||
90 | &ld($a0,&QWPw(0,$ap)); # get a | ||
91 | &ld($b0,&QWPw(0,$bp)); # get b | ||
92 | |||
93 | &add($a0,$b0,$o0); | ||
94 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
95 | &add($o0,$cc,$o0); # will we borrow? | ||
96 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
97 | &add($cc,$t0,$cc); # add the borrows | ||
98 | &st($o0,&QWPw(0,$rp)); # save | ||
99 | |||
100 | &add($ap,$QWS,$ap); | ||
101 | &add($bp,$QWS,$bp); | ||
102 | &add($rp,$QWS,$rp); | ||
103 | &sub($count,1,$count); | ||
104 | &bgt($count,&label("last_loop")); | ||
105 | &function_end_A($name); | ||
106 | |||
107 | ###################################################### | ||
108 | &set_label("finish"); | ||
109 | &add($count,4,$count); | ||
110 | &bgt($count,&label("last_loop")); | ||
111 | |||
112 | &FR($o0,$t0,$a0,$b0); | ||
113 | &set_label("end"); | ||
114 | &function_end($name); | ||
115 | |||
116 | &fin_pool; | ||
117 | } | ||
118 | |||
119 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/div.pl b/src/lib/libcrypto/bn/asm/alpha.works/div.pl new file mode 100644 index 0000000000..7ec144377f --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/div.pl | |||
@@ -0,0 +1,144 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | |||
3 | sub bn_div64 | ||
4 | { | ||
5 | local($data)=<<'EOF'; | ||
6 | # | ||
7 | # What follows was taken directly from the C compiler with a few | ||
8 | # hacks to redo the lables. | ||
9 | # | ||
10 | .text | ||
11 | .set noreorder | ||
12 | .set volatile | ||
13 | .align 3 | ||
14 | .globl bn_div64 | ||
15 | .ent bn_div64 | ||
16 | bn_div64: | ||
17 | ldgp $29,0($27) | ||
18 | bn_div64..ng: | ||
19 | lda $30,-48($30) | ||
20 | .frame $30,48,$26,0 | ||
21 | stq $26,0($30) | ||
22 | stq $9,8($30) | ||
23 | stq $10,16($30) | ||
24 | stq $11,24($30) | ||
25 | stq $12,32($30) | ||
26 | stq $13,40($30) | ||
27 | .mask 0x4003e00,-48 | ||
28 | .prologue 1 | ||
29 | bis $16,$16,$9 | ||
30 | bis $17,$17,$10 | ||
31 | bis $18,$18,$11 | ||
32 | bis $31,$31,$13 | ||
33 | bis $31,2,$12 | ||
34 | bne $11,$9119 | ||
35 | lda $0,-1 | ||
36 | br $31,$9136 | ||
37 | .align 4 | ||
38 | $9119: | ||
39 | bis $11,$11,$16 | ||
40 | jsr $26,BN_num_bits_word | ||
41 | ldgp $29,0($26) | ||
42 | subq $0,64,$1 | ||
43 | beq $1,$9120 | ||
44 | bis $31,1,$1 | ||
45 | sll $1,$0,$1 | ||
46 | cmpule $9,$1,$1 | ||
47 | bne $1,$9120 | ||
48 | # lda $16,_IO_stderr_ | ||
49 | # lda $17,$C32 | ||
50 | # bis $0,$0,$18 | ||
51 | # jsr $26,fprintf | ||
52 | # ldgp $29,0($26) | ||
53 | jsr $26,abort | ||
54 | ldgp $29,0($26) | ||
55 | .align 4 | ||
56 | $9120: | ||
57 | bis $31,64,$3 | ||
58 | cmpult $9,$11,$2 | ||
59 | subq $3,$0,$1 | ||
60 | addl $1,$31,$0 | ||
61 | subq $9,$11,$1 | ||
62 | cmoveq $2,$1,$9 | ||
63 | beq $0,$9122 | ||
64 | zapnot $0,15,$2 | ||
65 | subq $3,$0,$1 | ||
66 | sll $11,$2,$11 | ||
67 | sll $9,$2,$3 | ||
68 | srl $10,$1,$1 | ||
69 | sll $10,$2,$10 | ||
70 | bis $3,$1,$9 | ||
71 | $9122: | ||
72 | srl $11,32,$5 | ||
73 | zapnot $11,15,$6 | ||
74 | lda $7,-1 | ||
75 | .align 5 | ||
76 | $9123: | ||
77 | srl $9,32,$1 | ||
78 | subq $1,$5,$1 | ||
79 | bne $1,$9126 | ||
80 | zapnot $7,15,$27 | ||
81 | br $31,$9127 | ||
82 | .align 4 | ||
83 | $9126: | ||
84 | bis $9,$9,$24 | ||
85 | bis $5,$5,$25 | ||
86 | divqu $24,$25,$27 | ||
87 | $9127: | ||
88 | srl $10,32,$4 | ||
89 | .align 5 | ||
90 | $9128: | ||
91 | mulq $27,$5,$1 | ||
92 | subq $9,$1,$3 | ||
93 | zapnot $3,240,$1 | ||
94 | bne $1,$9129 | ||
95 | mulq $6,$27,$2 | ||
96 | sll $3,32,$1 | ||
97 | addq $1,$4,$1 | ||
98 | cmpule $2,$1,$2 | ||
99 | bne $2,$9129 | ||
100 | subq $27,1,$27 | ||
101 | br $31,$9128 | ||
102 | .align 4 | ||
103 | $9129: | ||
104 | mulq $27,$6,$1 | ||
105 | mulq $27,$5,$4 | ||
106 | srl $1,32,$3 | ||
107 | sll $1,32,$1 | ||
108 | addq $4,$3,$4 | ||
109 | cmpult $10,$1,$2 | ||
110 | subq $10,$1,$10 | ||
111 | addq $2,$4,$2 | ||
112 | cmpult $9,$2,$1 | ||
113 | bis $2,$2,$4 | ||
114 | beq $1,$9134 | ||
115 | addq $9,$11,$9 | ||
116 | subq $27,1,$27 | ||
117 | $9134: | ||
118 | subl $12,1,$12 | ||
119 | subq $9,$4,$9 | ||
120 | beq $12,$9124 | ||
121 | sll $27,32,$13 | ||
122 | sll $9,32,$2 | ||
123 | srl $10,32,$1 | ||
124 | sll $10,32,$10 | ||
125 | bis $2,$1,$9 | ||
126 | br $31,$9123 | ||
127 | .align 4 | ||
128 | $9124: | ||
129 | bis $13,$27,$0 | ||
130 | $9136: | ||
131 | ldq $26,0($30) | ||
132 | ldq $9,8($30) | ||
133 | ldq $10,16($30) | ||
134 | ldq $11,24($30) | ||
135 | ldq $12,32($30) | ||
136 | ldq $13,40($30) | ||
137 | addq $30,48,$30 | ||
138 | ret $31,($26),1 | ||
139 | .end bn_div64 | ||
140 | EOF | ||
141 | &asm_add($data); | ||
142 | } | ||
143 | |||
144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl new file mode 100644 index 0000000000..b182bae452 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul.pl | |||
@@ -0,0 +1,116 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &br(&label("finish")); | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0,$r0)=&NR(2); | ||
26 | &ld($a0,&QWPw(0,$ap)); | ||
27 | &ld($r0,&QWPw(0,$rp)); | ||
28 | |||
29 | $a=<<'EOF'; | ||
30 | ########################################################## | ||
31 | &set_label("loop"); | ||
32 | |||
33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
39 | |||
40 | ($o0,$t0)=&NR(2); | ||
41 | &add($a0,$b0,$o0); | ||
42 | &cmpult($o0,$b0,$t0); | ||
43 | &add($o0,$cc,$o0); | ||
44 | &cmpult($o0,$cc,$cc); | ||
45 | &add($cc,$t0,$cc); &FR($t0); | ||
46 | |||
47 | ($t1,$o1)=&NR(2); | ||
48 | |||
49 | &add($a1,$b1,$o1); &FR($a1); | ||
50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
51 | &add($o1,$cc,$o1); | ||
52 | &cmpult($o1,$cc,$cc); | ||
53 | &add($cc,$t1,$cc); &FR($t1); | ||
54 | |||
55 | ($t2,$o2)=&NR(2); | ||
56 | |||
57 | &add($a2,$b2,$o2); &FR($a2); | ||
58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
59 | &add($o2,$cc,$o2); | ||
60 | &cmpult($o2,$cc,$cc); | ||
61 | &add($cc,$t2,$cc); &FR($t2); | ||
62 | |||
63 | ($t3,$o3)=&NR(2); | ||
64 | |||
65 | &add($a3,$b3,$o3); &FR($a3); | ||
66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
67 | &add($o3,$cc,$o3); | ||
68 | &cmpult($o3,$cc,$cc); | ||
69 | &add($cc,$t3,$cc); &FR($t3); | ||
70 | |||
71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
75 | |||
76 | &sub($count,4,$count); # count-=4 | ||
77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
80 | |||
81 | &blt($count,&label("finish")); | ||
82 | &ld($a0,&QWPw(0,$ap)); | ||
83 | &ld($b0,&QWPw(0,$bp)); | ||
84 | &br(&label("loop")); | ||
85 | EOF | ||
86 | ################################################## | ||
87 | # Do the last 0..3 words | ||
88 | |||
89 | &set_label("last_loop"); | ||
90 | |||
91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
92 | &mul($a0,$word,($l0)=&NR(1)); | ||
93 | &add($ap,$QWS,$ap); | ||
94 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
95 | &add($l0,$cc,$l0); | ||
96 | &add($rp,$QWS,$rp); | ||
97 | &sub($count,1,$count); | ||
98 | &cmpult($l0,$cc,$cc); | ||
99 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
100 | &add($h0,$cc,$cc); &FR($h0); | ||
101 | |||
102 | &bgt($count,&label("last_loop")); | ||
103 | &function_end_A($name); | ||
104 | |||
105 | ###################################################### | ||
106 | &set_label("finish"); | ||
107 | &add($count,4,$count); | ||
108 | &bgt($count,&label("last_loop")); | ||
109 | |||
110 | &set_label("end"); | ||
111 | &function_end($name); | ||
112 | |||
113 | &fin_pool; | ||
114 | } | ||
115 | |||
116 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl new file mode 100644 index 0000000000..e37f6315fb --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_add.pl | |||
@@ -0,0 +1,120 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &br(&label("finish")); | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0,$r0)=&NR(2); | ||
26 | &ld($a0,&QWPw(0,$ap)); | ||
27 | &ld($r0,&QWPw(0,$rp)); | ||
28 | |||
29 | $a=<<'EOF'; | ||
30 | ########################################################## | ||
31 | &set_label("loop"); | ||
32 | |||
33 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
34 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
35 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
36 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
37 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
38 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
39 | |||
40 | ($o0,$t0)=&NR(2); | ||
41 | &add($a0,$b0,$o0); | ||
42 | &cmpult($o0,$b0,$t0); | ||
43 | &add($o0,$cc,$o0); | ||
44 | &cmpult($o0,$cc,$cc); | ||
45 | &add($cc,$t0,$cc); &FR($t0); | ||
46 | |||
47 | ($t1,$o1)=&NR(2); | ||
48 | |||
49 | &add($a1,$b1,$o1); &FR($a1); | ||
50 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
51 | &add($o1,$cc,$o1); | ||
52 | &cmpult($o1,$cc,$cc); | ||
53 | &add($cc,$t1,$cc); &FR($t1); | ||
54 | |||
55 | ($t2,$o2)=&NR(2); | ||
56 | |||
57 | &add($a2,$b2,$o2); &FR($a2); | ||
58 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
59 | &add($o2,$cc,$o2); | ||
60 | &cmpult($o2,$cc,$cc); | ||
61 | &add($cc,$t2,$cc); &FR($t2); | ||
62 | |||
63 | ($t3,$o3)=&NR(2); | ||
64 | |||
65 | &add($a3,$b3,$o3); &FR($a3); | ||
66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
67 | &add($o3,$cc,$o3); | ||
68 | &cmpult($o3,$cc,$cc); | ||
69 | &add($cc,$t3,$cc); &FR($t3); | ||
70 | |||
71 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
72 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
73 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
74 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
75 | |||
76 | &sub($count,4,$count); # count-=4 | ||
77 | &add($ap,4*$QWS,$ap); # count+=4 | ||
78 | &add($bp,4*$QWS,$bp); # count+=4 | ||
79 | &add($rp,4*$QWS,$rp); # count+=4 | ||
80 | |||
81 | &blt($count,&label("finish")); | ||
82 | &ld($a0,&QWPw(0,$ap)); | ||
83 | &ld($b0,&QWPw(0,$bp)); | ||
84 | &br(&label("loop")); | ||
85 | EOF | ||
86 | ################################################## | ||
87 | # Do the last 0..3 words | ||
88 | |||
89 | &set_label("last_loop"); | ||
90 | |||
91 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
92 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
93 | &mul($a0,$word,($l0)=&NR(1)); | ||
94 | &sub($count,1,$count); | ||
95 | &add($ap,$QWS,$ap); | ||
96 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
97 | &add($r0,$l0,$r0); | ||
98 | &add($rp,$QWS,$rp); | ||
99 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
100 | &add($r0,$cc,$r0); | ||
101 | &add($h0,$t0,$h0); &FR($t0); | ||
102 | &cmpult($r0,$cc,$cc); | ||
103 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
104 | &add($h0,$cc,$cc); &FR($h0); | ||
105 | |||
106 | &bgt($count,&label("last_loop")); | ||
107 | &function_end_A($name); | ||
108 | |||
109 | ###################################################### | ||
110 | &set_label("finish"); | ||
111 | &add($count,4,$count); | ||
112 | &bgt($count,&label("last_loop")); | ||
113 | |||
114 | &set_label("end"); | ||
115 | &function_end($name); | ||
116 | |||
117 | &fin_pool; | ||
118 | } | ||
119 | |||
120 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl new file mode 100644 index 0000000000..5efd201281 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.pl | |||
@@ -0,0 +1,213 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | &mul($a,$b,($l1)=&NR(1)); | ||
10 | &muh($a,$b,($h1)=&NR(1)); | ||
11 | &add($c0,$l1,$c0); | ||
12 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
13 | &add($t1,$h1,$h1); &FR($t1); | ||
14 | &add($c1,$h1,$c1); | ||
15 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
16 | &add($c2,$t2,$c2); &FR($t2); | ||
17 | } | ||
18 | |||
19 | sub bn_mul_comba4 | ||
20 | { | ||
21 | local($name)=@_; | ||
22 | local(@a,@b,$r,$c0,$c1,$c2); | ||
23 | |||
24 | $cnt=1; | ||
25 | &init_pool(3); | ||
26 | |||
27 | $rp=&wparam(0); | ||
28 | $ap=&wparam(1); | ||
29 | $bp=&wparam(2); | ||
30 | |||
31 | &function_begin($name,""); | ||
32 | |||
33 | &comment(""); | ||
34 | |||
35 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
36 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
37 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
38 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
39 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
42 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
43 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
44 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
45 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
46 | |||
47 | ($R,$H1,$H2)=&NR(3); | ||
48 | |||
49 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
50 | |||
51 | &mov("zero",$R); | ||
52 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
53 | |||
54 | &mov("zero",$H1); | ||
55 | &mov("zero",$H0); | ||
56 | &add($R,$r01,$R); | ||
57 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
58 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
59 | &add($R,$r02,$R); | ||
60 | &add($H1,$t01,$H1) &FR($t01); | ||
61 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
62 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
63 | &add($R,$r03,$R); | ||
64 | &add($H2,$t02,$H2) &FR($t02); | ||
65 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
66 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
67 | &add($H1,$t03,$H1) &FR($t03); | ||
68 | &st($R,&QWPw(1,$rp)); | ||
69 | &add($H1,$H2,$R); | ||
70 | |||
71 | &mov("zero",$H1); | ||
72 | &add($R,$r04,$R); | ||
73 | &mov("zero",$H2); | ||
74 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
75 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
76 | &add($R,$r05,$R); | ||
77 | &add($H1,$t04,$H1) &FR($t04); | ||
78 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
79 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
80 | &add($R,$r01,$R); | ||
81 | &add($H2,$t05,$H2) &FR($t05); | ||
82 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
83 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
84 | &add($R,$r07,$R); | ||
85 | &add($H1,$t06,$H1) &FR($t06); | ||
86 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
87 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
88 | &add($R,$r08,$R); | ||
89 | &add($H2,$t07,$H2) &FR($t07); | ||
90 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
91 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
92 | &add($H1,$t08,$H1) &FR($t08); | ||
93 | &st($R,&QWPw(2,$rp)); | ||
94 | &add($H1,$H2,$R); | ||
95 | |||
96 | &mov("zero",$H1); | ||
97 | &add($R,$r09,$R); | ||
98 | &mov("zero",$H2); | ||
99 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
100 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
101 | &add($R,$r10,$R); | ||
102 | &add($H1,$t09,$H1) &FR($t09); | ||
103 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
104 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
105 | &add($R,$r11,$R); | ||
106 | &add($H1,$t10,$H1) &FR($t10); | ||
107 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
108 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
109 | &add($R,$r12,$R); | ||
110 | &add($H1,$t11,$H1) &FR($t11); | ||
111 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
112 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
113 | &add($R,$r13,$R); | ||
114 | &add($H1,$t12,$H1) &FR($t12); | ||
115 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
116 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
117 | &add($R,$r14,$R); | ||
118 | &add($H1,$t13,$H1) &FR($t13); | ||
119 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
120 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
121 | &add($R,$r15,$R); | ||
122 | &add($H1,$t14,$H1) &FR($t14); | ||
123 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
124 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
125 | &add($H1,$t15,$H1) &FR($t15); | ||
126 | &st($R,&QWPw(3,$rp)); | ||
127 | &add($H1,$H2,$R); | ||
128 | |||
129 | &mov("zero",$H1); | ||
130 | &add($R,$r16,$R); | ||
131 | &mov("zero",$H2); | ||
132 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
133 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
134 | &add($R,$r17,$R); | ||
135 | &add($H1,$t16,$H1) &FR($t16); | ||
136 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
137 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
138 | &add($R,$r18,$R); | ||
139 | &add($H1,$t17,$H1) &FR($t17); | ||
140 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
141 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
142 | &add($R,$r19,$R); | ||
143 | &add($H1,$t18,$H1) &FR($t18); | ||
144 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
145 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
146 | &add($R,$r20,$R); | ||
147 | &add($H1,$t19,$H1) &FR($t19); | ||
148 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
149 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
150 | &add($R,$r21,$R); | ||
151 | &add($H1,$t20,$H1) &FR($t20); | ||
152 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
153 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
154 | &add($R,$r22,$R); | ||
155 | &add($H1,$t21,$H1) &FR($t21); | ||
156 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
157 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
158 | &add($H1,$t22,$H1) &FR($t22); | ||
159 | &st($R,&QWPw(4,$rp)); | ||
160 | &add($H1,$H2,$R); | ||
161 | |||
162 | &mov("zero",$H1); | ||
163 | &add($R,$r23,$R); | ||
164 | &mov("zero",$H2); | ||
165 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
166 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
167 | &add($R,$r24,$R); | ||
168 | &add($H1,$t23,$H1) &FR($t23); | ||
169 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
170 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
171 | &add($R,$r25,$R); | ||
172 | &add($H1,$t24,$H1) &FR($t24); | ||
173 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
174 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
175 | &add($R,$r26,$R); | ||
176 | &add($H1,$t25,$H1) &FR($t25); | ||
177 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
178 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
179 | &add($R,$r27,$R); | ||
180 | &add($H1,$t26,$H1) &FR($t26); | ||
181 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
182 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
183 | &add($H1,$t27,$H1) &FR($t27); | ||
184 | &st($R,&QWPw(5,$rp)); | ||
185 | &add($H1,$H2,$R); | ||
186 | |||
187 | &mov("zero",$H1); | ||
188 | &add($R,$r28,$R); | ||
189 | &mov("zero",$H2); | ||
190 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
191 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
192 | &add($R,$r29,$R); | ||
193 | &add($H1,$t28,$H1) &FR($t28); | ||
194 | ############ | ||
195 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
196 | &add($R,$r30,$R); | ||
197 | &add($H1,$t29,$H1) &FR($t29); | ||
198 | ############ | ||
199 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
200 | &add($H1,$t30,$H1) &FR($t30); | ||
201 | &st($R,&QWPw(6,$rp)); | ||
202 | &add($H1,$H2,$R); | ||
203 | |||
204 | &add($R,$r31,$R); &FR($r31); | ||
205 | &st($R,&QWPw(7,$rp)); | ||
206 | |||
207 | &FR($R,$H1,$H2); | ||
208 | &function_end($name); | ||
209 | |||
210 | &fin_pool; | ||
211 | } | ||
212 | |||
213 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c4.works.pl | |||
@@ -0,0 +1,98 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | print STDERR "count=$cnt\n"; $cnt++; | ||
10 | &mul($a,$b,($l1)=&NR(1)); | ||
11 | &muh($a,$b,($h1)=&NR(1)); | ||
12 | &add($c0,$l1,$c0); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &add($t1,$h1,$h1); &FR($t1); | ||
15 | &add($c1,$h1,$c1); | ||
16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
17 | &add($c2,$t2,$c2); &FR($t2); | ||
18 | } | ||
19 | |||
20 | sub bn_mul_comba4 | ||
21 | { | ||
22 | local($name)=@_; | ||
23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
24 | |||
25 | $cnt=1; | ||
26 | &init_pool(3); | ||
27 | |||
28 | $rp=&wparam(0); | ||
29 | $ap=&wparam(1); | ||
30 | $bp=&wparam(2); | ||
31 | |||
32 | &function_begin($name,""); | ||
33 | |||
34 | &comment(""); | ||
35 | |||
36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
44 | |||
45 | ($c0,$c1,$c2)=&NR(3); | ||
46 | &mov("zero",$c2); | ||
47 | &mul($a[0],$b[0],$c0); | ||
48 | &muh($a[0],$b[0],$c1); | ||
49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
51 | &mov("zero",$c2); | ||
52 | |||
53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
57 | &mov("zero",$c2); | ||
58 | |||
59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
64 | &mov("zero",$c2); | ||
65 | |||
66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
72 | &mov("zero",$c2); | ||
73 | |||
74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
79 | &mov("zero",$c2); | ||
80 | |||
81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
85 | &mov("zero",$c2); | ||
86 | |||
87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
88 | &st($c0,&QWPw(6,$rp)); | ||
89 | &st($c1,&QWPw(7,$rp)); | ||
90 | |||
91 | &FR($c0,$c1,$c2); | ||
92 | |||
93 | &function_end($name); | ||
94 | |||
95 | &fin_pool; | ||
96 | } | ||
97 | |||
98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/mul_c8.pl | |||
@@ -0,0 +1,177 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(3); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | |||
20 | &stack_push(2); | ||
21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
39 | |||
40 | ($c0,$c1,$c2)=&NR(3); | ||
41 | &mov("zero",$c2); | ||
42 | &mul($a[0],$b[0],$c0); | ||
43 | &muh($a[0],$b[0],$c1); | ||
44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
67 | &mov("zero",$c2); | ||
68 | |||
69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
76 | &mov("zero",$c2); | ||
77 | |||
78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
86 | &mov("zero",$c2); | ||
87 | |||
88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
109 | &mov("zero",$c2); | ||
110 | |||
111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
120 | &mov("zero",$c2); | ||
121 | |||
122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
130 | &mov("zero",$c2); | ||
131 | |||
132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
139 | &mov("zero",$c2); | ||
140 | |||
141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
147 | &mov("zero",$c2); | ||
148 | |||
149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
154 | &mov("zero",$c2); | ||
155 | |||
156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
160 | &mov("zero",$c2); | ||
161 | |||
162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
163 | &st($c0,&QWPw(14,$rp)); | ||
164 | &st($c1,&QWPw(15,$rp)); | ||
165 | |||
166 | &FR($c0,$c1,$c2); | ||
167 | |||
168 | &ld($reg_s0,&swtmp(0)); | ||
169 | &ld($reg_s1,&swtmp(1)); | ||
170 | &stack_pop(2); | ||
171 | |||
172 | &function_end($name); | ||
173 | |||
174 | &fin_pool; | ||
175 | } | ||
176 | |||
177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr.pl | |||
@@ -0,0 +1,113 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(3); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | &sub($count,4,$count); | ||
20 | &mov("zero",$cc); | ||
21 | &br(&label("finish")); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$r0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($r0,&QWPw(0,$rp)); | ||
27 | |||
28 | $a=<<'EOF'; | ||
29 | ########################################################## | ||
30 | &set_label("loop"); | ||
31 | |||
32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
38 | |||
39 | ($o0,$t0)=&NR(2); | ||
40 | &add($a0,$b0,$o0); | ||
41 | &cmpult($o0,$b0,$t0); | ||
42 | &add($o0,$cc,$o0); | ||
43 | &cmpult($o0,$cc,$cc); | ||
44 | &add($cc,$t0,$cc); &FR($t0); | ||
45 | |||
46 | ($t1,$o1)=&NR(2); | ||
47 | |||
48 | &add($a1,$b1,$o1); &FR($a1); | ||
49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
50 | &add($o1,$cc,$o1); | ||
51 | &cmpult($o1,$cc,$cc); | ||
52 | &add($cc,$t1,$cc); &FR($t1); | ||
53 | |||
54 | ($t2,$o2)=&NR(2); | ||
55 | |||
56 | &add($a2,$b2,$o2); &FR($a2); | ||
57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
58 | &add($o2,$cc,$o2); | ||
59 | &cmpult($o2,$cc,$cc); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &add($a3,$b3,$o3); &FR($a3); | ||
65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
66 | &add($o3,$cc,$o3); | ||
67 | &cmpult($o3,$cc,$cc); | ||
68 | &add($cc,$t3,$cc); &FR($t3); | ||
69 | |||
70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
74 | |||
75 | &sub($count,4,$count); # count-=4 | ||
76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
79 | |||
80 | &blt($count,&label("finish")); | ||
81 | &ld($a0,&QWPw(0,$ap)); | ||
82 | &ld($b0,&QWPw(0,$bp)); | ||
83 | &br(&label("loop")); | ||
84 | EOF | ||
85 | ################################################## | ||
86 | # Do the last 0..3 words | ||
87 | |||
88 | &set_label("last_loop"); | ||
89 | |||
90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
92 | &add($ap,$QWS,$ap); | ||
93 | &add($rp,2*$QWS,$rp); | ||
94 | &sub($count,1,$count); | ||
95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
98 | |||
99 | &bgt($count,&label("last_loop")); | ||
100 | &function_end_A($name); | ||
101 | |||
102 | ###################################################### | ||
103 | &set_label("finish"); | ||
104 | &add($count,4,$count); | ||
105 | &bgt($count,&label("last_loop")); | ||
106 | |||
107 | &set_label("end"); | ||
108 | &function_end($name); | ||
109 | |||
110 | &fin_pool; | ||
111 | } | ||
112 | |||
113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c4.pl | |||
@@ -0,0 +1,109 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub sqr_add_c | ||
5 | { | ||
6 | local($a,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | &mul($a,$a,($l1)=&NR(1)); | ||
10 | &muh($a,$a,($h1)=&NR(1)); | ||
11 | &add($c0,$l1,$c0); | ||
12 | &add($c1,$h1,$c1); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
15 | &add($c1,$t1,$c1); &FR($t1); | ||
16 | &add($c2,$t2,$c2); &FR($t2); | ||
17 | } | ||
18 | |||
19 | sub sqr_add_c2 | ||
20 | { | ||
21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
22 | local($l1,$h1,$t1,$t2); | ||
23 | |||
24 | &mul($a,$b,($l1)=&NR(1)); | ||
25 | &muh($a,$b,($h1)=&NR(1)); | ||
26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
28 | &add($l1,$l1,$l1); | ||
29 | &add($h1,$h1,$h1); | ||
30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
32 | |||
33 | &add($c0,$l1,$c0); | ||
34 | &add($c1,$h1,$c1); | ||
35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
37 | |||
38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
40 | } | ||
41 | |||
42 | |||
43 | sub bn_sqr_comba4 | ||
44 | { | ||
45 | local($name)=@_; | ||
46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
47 | |||
48 | $cnt=1; | ||
49 | &init_pool(2); | ||
50 | |||
51 | $rp=&wparam(0); | ||
52 | $ap=&wparam(1); | ||
53 | |||
54 | &function_begin($name,""); | ||
55 | |||
56 | &comment(""); | ||
57 | |||
58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
62 | |||
63 | ($c0,$c1,$c2)=&NR(3); | ||
64 | |||
65 | &mov("zero",$c2); | ||
66 | &mul($a[0],$a[0],$c0); | ||
67 | &muh($a[0],$a[0],$c1); | ||
68 | &st($c0,&QWPw(0,$rp)); | ||
69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
70 | &mov("zero",$c2); | ||
71 | |||
72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
73 | &st($c0,&QWPw(1,$rp)); | ||
74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
75 | &mov("zero",$c2); | ||
76 | |||
77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
79 | &st($c0,&QWPw(2,$rp)); | ||
80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
81 | &mov("zero",$c2); | ||
82 | |||
83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
85 | &st($c0,&QWPw(3,$rp)); | ||
86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
87 | &mov("zero",$c2); | ||
88 | |||
89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
91 | &st($c0,&QWPw(4,$rp)); | ||
92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
93 | &mov("zero",$c2); | ||
94 | |||
95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
96 | &st($c0,&QWPw(5,$rp)); | ||
97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
98 | &mov("zero",$c2); | ||
99 | |||
100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
101 | &st($c0,&QWPw(6,$rp)); | ||
102 | &st($c1,&QWPw(7,$rp)); | ||
103 | |||
104 | &function_end($name); | ||
105 | |||
106 | &fin_pool; | ||
107 | } | ||
108 | |||
109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sqr_c8.pl | |||
@@ -0,0 +1,132 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(2); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | |||
15 | &function_begin($name,""); | ||
16 | |||
17 | &comment(""); | ||
18 | |||
19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
27 | |||
28 | ($c0,$c1,$c2)=&NR(3); | ||
29 | |||
30 | &mov("zero",$c2); | ||
31 | &mul($a[0],$a[0],$c0); | ||
32 | &muh($a[0],$a[0],$c1); | ||
33 | &st($c0,&QWPw(0,$rp)); | ||
34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
35 | &mov("zero",$c2); | ||
36 | |||
37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
38 | &st($c0,&QWPw(1,$rp)); | ||
39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
40 | &mov("zero",$c2); | ||
41 | |||
42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
44 | &st($c0,&QWPw(2,$rp)); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(3,$rp)); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(4,$rp)); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
64 | &st($c0,&QWPw(5,$rp)); | ||
65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
66 | &mov("zero",$c2); | ||
67 | |||
68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
72 | &st($c0,&QWPw(6,$rp)); | ||
73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
74 | &mov("zero",$c2); | ||
75 | |||
76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
80 | &st($c0,&QWPw(7,$rp)); | ||
81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
82 | &mov("zero",$c2); | ||
83 | |||
84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
88 | &st($c0,&QWPw(8,$rp)); | ||
89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
90 | &mov("zero",$c2); | ||
91 | |||
92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(9,$rp)); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
102 | &st($c0,&QWPw(10,$rp)); | ||
103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
104 | &mov("zero",$c2); | ||
105 | |||
106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
108 | &st($c0,&QWPw(11,$rp)); | ||
109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
110 | &mov("zero",$c2); | ||
111 | |||
112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
114 | &st($c0,&QWPw(12,$rp)); | ||
115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
116 | &mov("zero",$c2); | ||
117 | |||
118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
119 | &st($c0,&QWPw(13,$rp)); | ||
120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
121 | &mov("zero",$c2); | ||
122 | |||
123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
124 | &st($c0,&QWPw(14,$rp)); | ||
125 | &st($c1,&QWPw(15,$rp)); | ||
126 | |||
127 | &function_end($name); | ||
128 | |||
129 | &fin_pool; | ||
130 | } | ||
131 | |||
132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha.works/sub.pl b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.works/sub.pl | |||
@@ -0,0 +1,108 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sub_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$b0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($b0,&QWPw(0,$bp)); | ||
27 | |||
28 | ########################################################## | ||
29 | &set_label("loop"); | ||
30 | |||
31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
32 | &ld($a1,&QWPw(1,$ap)); | ||
33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
34 | &ld($b1,&QWPw(1,$bp)); | ||
35 | &sub($a0,$b0,$a0); # do the subtract | ||
36 | &ld($a2,&QWPw(2,$ap)); | ||
37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
38 | &ld($b2,&QWPw(2,$bp)); | ||
39 | &sub($a0,$cc,$o0); # will we borrow? | ||
40 | &ld($a3,&QWPw(3,$ap)); | ||
41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
42 | |||
43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
44 | &sub($a1,$b1,$a1); # do the subtract | ||
45 | &ld($b3,&QWPw(3,$bp)); | ||
46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
47 | &sub($a1,$cc,$o1); # will we borrow? | ||
48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
49 | |||
50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
51 | &sub($a2,$b2,$a2); # do the subtract | ||
52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
54 | &sub($a2,$cc,$o2); # will we borrow? | ||
55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
56 | |||
57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
58 | &sub($a3,$b3,$a3); # do the subtract | ||
59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
61 | &sub($a3,$cc,$o3); # will we borrow? | ||
62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
63 | |||
64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
65 | &sub($count,4,$count); # count-=4 | ||
66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
70 | |||
71 | &blt($count,&label("finish")); | ||
72 | &ld($a0,&QWPw(0,$ap)); | ||
73 | &ld($b0,&QWPw(0,$bp)); | ||
74 | &br(&label("loop")); | ||
75 | ################################################## | ||
76 | # Do the last 0..3 words | ||
77 | |||
78 | &set_label("last_loop"); | ||
79 | |||
80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
83 | &sub($a0,$b0,$a0); # do the subtract | ||
84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
85 | &sub($a0,$cc,$a0); # will we borrow? | ||
86 | &st($a0,&QWPw(0,$rp)); # save | ||
87 | &add($b0,$tmp,$cc); # add the borrows | ||
88 | |||
89 | &add($ap,$QWS,$ap); | ||
90 | &add($bp,$QWS,$bp); | ||
91 | &add($rp,$QWS,$rp); | ||
92 | &sub($count,1,$count); | ||
93 | &bgt($count,&label("last_loop")); | ||
94 | &function_end_A($name); | ||
95 | |||
96 | ###################################################### | ||
97 | &set_label("finish"); | ||
98 | &add($count,4,$count); | ||
99 | &bgt($count,&label("last_loop")); | ||
100 | |||
101 | &FR($a0,$b0); | ||
102 | &set_label("end"); | ||
103 | &function_end($name); | ||
104 | |||
105 | &fin_pool; | ||
106 | } | ||
107 | |||
108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/add.pl b/src/lib/libcrypto/bn/asm/alpha/add.pl new file mode 100644 index 0000000000..13bf516428 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/add.pl | |||
@@ -0,0 +1,118 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$b0)=&NR(2); | ||
25 | |||
26 | ########################################################## | ||
27 | &set_label("loop"); | ||
28 | |||
29 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
30 | &ld(($b0)=&NR(1),&QWPw(0,$bp)); | ||
31 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
32 | &ld(($b1)=&NR(1),&QWPw(1,$bp)); | ||
33 | |||
34 | ($o0,$t0)=&NR(2); | ||
35 | &add($a0,$b0,$o0); | ||
36 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
37 | &cmpult($o0,$b0,$t0); | ||
38 | &add($o0,$cc,$o0); | ||
39 | &cmpult($o0,$cc,$cc); | ||
40 | &ld(($b2)=&NR(1),&QWPw(2,$bp)); | ||
41 | &add($cc,$t0,$cc); &FR($t0); | ||
42 | |||
43 | ($t1,$o1)=&NR(2); | ||
44 | |||
45 | &add($a1,$b1,$o1); &FR($a1); | ||
46 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
47 | &add($o1,$cc,$o1); | ||
48 | &cmpult($o1,$cc,$cc); | ||
49 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
50 | &add($cc,$t1,$cc); &FR($t1); | ||
51 | |||
52 | ($t2,$o2)=&NR(2); | ||
53 | |||
54 | &add($a2,$b2,$o2); &FR($a2); | ||
55 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
56 | &add($o2,$cc,$o2); | ||
57 | &cmpult($o2,$cc,$cc); | ||
58 | &ld(($b3)=&NR(1),&QWPw(3,$bp)); | ||
59 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
65 | &add($a3,$b3,$o3); &FR($a3); | ||
66 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
67 | &add($o3,$cc,$o3); | ||
68 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
69 | &cmpult($o3,$cc,$cc); | ||
70 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
71 | &add($cc,$t3,$cc); &FR($t3); | ||
72 | |||
73 | |||
74 | &sub($count,4,$count); # count-=4 | ||
75 | &add($ap,4*$QWS,$ap); # count+=4 | ||
76 | &add($bp,4*$QWS,$bp); # count+=4 | ||
77 | &add($rp,4*$QWS,$rp); # count+=4 | ||
78 | |||
79 | ### | ||
80 | &bge($count,&label("loop")); | ||
81 | ### | ||
82 | &br(&label("finish")); | ||
83 | ################################################## | ||
84 | # Do the last 0..3 words | ||
85 | |||
86 | ($t0,$o0)=&NR(2); | ||
87 | &set_label("last_loop"); | ||
88 | |||
89 | &ld($a0,&QWPw(0,$ap)); # get a | ||
90 | &ld($b0,&QWPw(0,$bp)); # get b | ||
91 | &add($ap,$QWS,$ap); | ||
92 | &add($bp,$QWS,$bp); | ||
93 | &add($a0,$b0,$o0); | ||
94 | &sub($count,1,$count); | ||
95 | &cmpult($o0,$b0,$t0); # will we borrow? | ||
96 | &add($o0,$cc,$o0); # will we borrow? | ||
97 | &cmpult($o0,$cc,$cc); # will we borrow? | ||
98 | &add($rp,$QWS,$rp); | ||
99 | &st($o0,&QWPw(-1,$rp)); # save | ||
100 | &add($cc,$t0,$cc); # add the borrows | ||
101 | |||
102 | ### | ||
103 | &bgt($count,&label("last_loop")); | ||
104 | &function_end_A($name); | ||
105 | |||
106 | ###################################################### | ||
107 | &set_label("finish"); | ||
108 | &add($count,4,$count); | ||
109 | &bgt($count,&label("last_loop")); | ||
110 | |||
111 | &FR($o0,$t0,$a0,$b0); | ||
112 | &set_label("end"); | ||
113 | &function_end($name); | ||
114 | |||
115 | &fin_pool; | ||
116 | } | ||
117 | |||
118 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/div.pl b/src/lib/libcrypto/bn/asm/alpha/div.pl new file mode 100644 index 0000000000..e9e680897a --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/div.pl | |||
@@ -0,0 +1,144 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | |||
3 | sub bn_div_words | ||
4 | { | ||
5 | local($data)=<<'EOF'; | ||
6 | # | ||
7 | # What follows was taken directly from the C compiler with a few | ||
8 | # hacks to redo the lables. | ||
9 | # | ||
10 | .text | ||
11 | .set noreorder | ||
12 | .set volatile | ||
13 | .align 3 | ||
14 | .globl bn_div_words | ||
15 | .ent bn_div_words | ||
16 | bn_div_words | ||
17 | ldgp $29,0($27) | ||
18 | bn_div_words.ng: | ||
19 | lda $30,-48($30) | ||
20 | .frame $30,48,$26,0 | ||
21 | stq $26,0($30) | ||
22 | stq $9,8($30) | ||
23 | stq $10,16($30) | ||
24 | stq $11,24($30) | ||
25 | stq $12,32($30) | ||
26 | stq $13,40($30) | ||
27 | .mask 0x4003e00,-48 | ||
28 | .prologue 1 | ||
29 | bis $16,$16,$9 | ||
30 | bis $17,$17,$10 | ||
31 | bis $18,$18,$11 | ||
32 | bis $31,$31,$13 | ||
33 | bis $31,2,$12 | ||
34 | bne $11,$9119 | ||
35 | lda $0,-1 | ||
36 | br $31,$9136 | ||
37 | .align 4 | ||
38 | $9119: | ||
39 | bis $11,$11,$16 | ||
40 | jsr $26,BN_num_bits_word | ||
41 | ldgp $29,0($26) | ||
42 | subq $0,64,$1 | ||
43 | beq $1,$9120 | ||
44 | bis $31,1,$1 | ||
45 | sll $1,$0,$1 | ||
46 | cmpule $9,$1,$1 | ||
47 | bne $1,$9120 | ||
48 | # lda $16,_IO_stderr_ | ||
49 | # lda $17,$C32 | ||
50 | # bis $0,$0,$18 | ||
51 | # jsr $26,fprintf | ||
52 | # ldgp $29,0($26) | ||
53 | jsr $26,abort | ||
54 | ldgp $29,0($26) | ||
55 | .align 4 | ||
56 | $9120: | ||
57 | bis $31,64,$3 | ||
58 | cmpult $9,$11,$2 | ||
59 | subq $3,$0,$1 | ||
60 | addl $1,$31,$0 | ||
61 | subq $9,$11,$1 | ||
62 | cmoveq $2,$1,$9 | ||
63 | beq $0,$9122 | ||
64 | zapnot $0,15,$2 | ||
65 | subq $3,$0,$1 | ||
66 | sll $11,$2,$11 | ||
67 | sll $9,$2,$3 | ||
68 | srl $10,$1,$1 | ||
69 | sll $10,$2,$10 | ||
70 | bis $3,$1,$9 | ||
71 | $9122: | ||
72 | srl $11,32,$5 | ||
73 | zapnot $11,15,$6 | ||
74 | lda $7,-1 | ||
75 | .align 5 | ||
76 | $9123: | ||
77 | srl $9,32,$1 | ||
78 | subq $1,$5,$1 | ||
79 | bne $1,$9126 | ||
80 | zapnot $7,15,$27 | ||
81 | br $31,$9127 | ||
82 | .align 4 | ||
83 | $9126: | ||
84 | bis $9,$9,$24 | ||
85 | bis $5,$5,$25 | ||
86 | divqu $24,$25,$27 | ||
87 | $9127: | ||
88 | srl $10,32,$4 | ||
89 | .align 5 | ||
90 | $9128: | ||
91 | mulq $27,$5,$1 | ||
92 | subq $9,$1,$3 | ||
93 | zapnot $3,240,$1 | ||
94 | bne $1,$9129 | ||
95 | mulq $6,$27,$2 | ||
96 | sll $3,32,$1 | ||
97 | addq $1,$4,$1 | ||
98 | cmpule $2,$1,$2 | ||
99 | bne $2,$9129 | ||
100 | subq $27,1,$27 | ||
101 | br $31,$9128 | ||
102 | .align 4 | ||
103 | $9129: | ||
104 | mulq $27,$6,$1 | ||
105 | mulq $27,$5,$4 | ||
106 | srl $1,32,$3 | ||
107 | sll $1,32,$1 | ||
108 | addq $4,$3,$4 | ||
109 | cmpult $10,$1,$2 | ||
110 | subq $10,$1,$10 | ||
111 | addq $2,$4,$2 | ||
112 | cmpult $9,$2,$1 | ||
113 | bis $2,$2,$4 | ||
114 | beq $1,$9134 | ||
115 | addq $9,$11,$9 | ||
116 | subq $27,1,$27 | ||
117 | $9134: | ||
118 | subl $12,1,$12 | ||
119 | subq $9,$4,$9 | ||
120 | beq $12,$9124 | ||
121 | sll $27,32,$13 | ||
122 | sll $9,32,$2 | ||
123 | srl $10,32,$1 | ||
124 | sll $10,32,$10 | ||
125 | bis $2,$1,$9 | ||
126 | br $31,$9123 | ||
127 | .align 4 | ||
128 | $9124: | ||
129 | bis $13,$27,$0 | ||
130 | $9136: | ||
131 | ldq $26,0($30) | ||
132 | ldq $9,8($30) | ||
133 | ldq $10,16($30) | ||
134 | ldq $11,24($30) | ||
135 | ldq $12,32($30) | ||
136 | ldq $13,40($30) | ||
137 | addq $30,48,$30 | ||
138 | ret $31,($26),1 | ||
139 | .end bn_div_words | ||
140 | EOF | ||
141 | &asm_add($data); | ||
142 | } | ||
143 | |||
144 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul.pl b/src/lib/libcrypto/bn/asm/alpha/mul.pl new file mode 100644 index 0000000000..76c926566c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul.pl | |||
@@ -0,0 +1,104 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | ### | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
26 | |||
27 | &set_label("loop"); | ||
28 | |||
29 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
30 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
31 | |||
32 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
33 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
34 | ### wait 8 | ||
35 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
36 | ### wait 8 | ||
37 | &muh($a1,$word,($h1)=&NR(1)); &FR($a1); | ||
38 | &add($l0,$cc,$l0); ### wait 8 | ||
39 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
40 | &cmpult($l0,$cc,$cc); ### wait 8 | ||
41 | &muh($a2,$word,($h2)=&NR(1)); &FR($a2); | ||
42 | &add($h0,$cc,$cc); &FR($h0); ### wait 8 | ||
43 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
44 | &add($l1,$cc,$l1); ### wait 8 | ||
45 | &st($l0,&QWPw(0,$rp)); &FR($l0); | ||
46 | &cmpult($l1,$cc,$cc); ### wait 8 | ||
47 | &muh($a3,$word,($h3)=&NR(1)); &FR($a3); | ||
48 | &add($h1,$cc,$cc); &FR($h1); | ||
49 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
50 | &add($l2,$cc,$l2); | ||
51 | &st($l1,&QWPw(1,$rp)); &FR($l1); | ||
52 | &cmpult($l2,$cc,$cc); | ||
53 | &add($h2,$cc,$cc); &FR($h2); | ||
54 | &sub($count,4,$count); # count-=4 | ||
55 | &st($l2,&QWPw(2,$rp)); &FR($l2); | ||
56 | &add($l3,$cc,$l3); | ||
57 | &cmpult($l3,$cc,$cc); | ||
58 | &add($bp,4*$QWS,$bp); # count+=4 | ||
59 | &add($h3,$cc,$cc); &FR($h3); | ||
60 | &add($ap,4*$QWS,$ap); # count+=4 | ||
61 | &st($l3,&QWPw(3,$rp)); &FR($l3); | ||
62 | &add($rp,4*$QWS,$rp); # count+=4 | ||
63 | ### | ||
64 | &blt($count,&label("finish")); | ||
65 | ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); | ||
66 | &br(&label("finish")); | ||
67 | ################################################## | ||
68 | |||
69 | ################################################## | ||
70 | # Do the last 0..3 words | ||
71 | |||
72 | &set_label("last_loop"); | ||
73 | |||
74 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
75 | ### | ||
76 | ### | ||
77 | ### | ||
78 | &muh($a0,$word,($h0)=&NR(1)); | ||
79 | ### Wait 8 for next mul issue | ||
80 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0) | ||
81 | &add($ap,$QWS,$ap); | ||
82 | ### Loose 12 until result is available | ||
83 | &add($rp,$QWS,$rp); | ||
84 | &sub($count,1,$count); | ||
85 | &add($l0,$cc,$l0); | ||
86 | ### | ||
87 | &st($l0,&QWPw(-1,$rp)); &FR($l0); | ||
88 | &cmpult($l0,$cc,$cc); | ||
89 | &add($h0,$cc,$cc); &FR($h0); | ||
90 | &bgt($count,&label("last_loop")); | ||
91 | &function_end_A($name); | ||
92 | |||
93 | ###################################################### | ||
94 | &set_label("finish"); | ||
95 | &add($count,4,$count); | ||
96 | &bgt($count,&label("last_loop")); | ||
97 | |||
98 | &set_label("end"); | ||
99 | &function_end($name); | ||
100 | |||
101 | &fin_pool; | ||
102 | } | ||
103 | |||
104 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_add.pl b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl new file mode 100644 index 0000000000..0d6df69bc4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_add.pl | |||
@@ -0,0 +1,123 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | $word=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | ### | ||
23 | &blt($count,&label("finish")); | ||
24 | |||
25 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
26 | |||
27 | $a=<<'EOF'; | ||
28 | ########################################################## | ||
29 | &set_label("loop"); | ||
30 | |||
31 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); | ||
32 | &ld(($a1)=&NR(1),&QWPw(1,$ap)); | ||
33 | &muh($a0,$word,($h0)=&NR(1)); | ||
34 | &ld(($r1)=&NR(1),&QWPw(1,$rp)); | ||
35 | &ld(($a2)=&NR(1),&QWPw(2,$ap)); | ||
36 | ### | ||
37 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
38 | &ld(($r2)=&NR(1),&QWPw(2,$rp)); | ||
39 | &muh($a1,$word,($h1)=&NR(1)); | ||
40 | &ld(($a3)=&NR(1),&QWPw(3,$ap)); | ||
41 | &mul($a1,$word,($l1)=&NR(1)); &FR($a1); | ||
42 | &ld(($r3)=&NR(1),&QWPw(3,$rp)); | ||
43 | &add($r0,$l0,$r0); | ||
44 | &add($r1,$l1,$r1); | ||
45 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
46 | &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); | ||
47 | &muh($a2,$word,($h2)=&NR(1)); | ||
48 | &add($r0,$cc,$r0); | ||
49 | &add($h0,$t0,$h0); &FR($t0); | ||
50 | &cmpult($r0,$cc,$cc); | ||
51 | &add($h1,$t1,$h1); &FR($t1); | ||
52 | &add($h0,$cc,$cc); &FR($h0); | ||
53 | &mul($a2,$word,($l2)=&NR(1)); &FR($a2); | ||
54 | &add($r1,$cc,$r1); | ||
55 | &cmpult($r1,$cc,$cc); | ||
56 | &add($r2,$l2,$r2); | ||
57 | &add($h1,$cc,$cc); &FR($h1); | ||
58 | &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); | ||
59 | &muh($a3,$word,($h3)=&NR(1)); | ||
60 | &add($r2,$cc,$r2); | ||
61 | &st($r0,&QWPw(0,$rp)); &FR($r0); | ||
62 | &add($h2,$t2,$h2); &FR($t2); | ||
63 | &st($r1,&QWPw(1,$rp)); &FR($r1); | ||
64 | &cmpult($r2,$cc,$cc); | ||
65 | &mul($a3,$word,($l3)=&NR(1)); &FR($a3); | ||
66 | &add($h2,$cc,$cc); &FR($h2); | ||
67 | &st($r2,&QWPw(2,$rp)); &FR($r2); | ||
68 | &sub($count,4,$count); # count-=4 | ||
69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
70 | &add($r3,$l3,$r3); | ||
71 | &add($ap,4*$QWS,$ap); # count+=4 | ||
72 | &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); | ||
73 | &add($r3,$cc,$r3); | ||
74 | &add($h3,$t3,$h3); &FR($t3); | ||
75 | &cmpult($r3,$cc,$cc); | ||
76 | &st($r3,&QWPw(-1,$rp)); &FR($r3); | ||
77 | &add($h3,$cc,$cc); &FR($h3); | ||
78 | |||
79 | ### | ||
80 | &blt($count,&label("finish")); | ||
81 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); | ||
82 | &br(&label("loop")); | ||
83 | EOF | ||
84 | ################################################## | ||
85 | # Do the last 0..3 words | ||
86 | |||
87 | &set_label("last_loop"); | ||
88 | |||
89 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
90 | &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b | ||
91 | ### | ||
92 | ### | ||
93 | &muh($a0,$word,($h0)=&NR(1)); &FR($a0); | ||
94 | ### wait 8 | ||
95 | &mul($a0,$word,($l0)=&NR(1)); &FR($a0); | ||
96 | &add($rp,$QWS,$rp); | ||
97 | &add($ap,$QWS,$ap); | ||
98 | &sub($count,1,$count); | ||
99 | ### wait 3 until l0 is available | ||
100 | &add($r0,$l0,$r0); | ||
101 | ### | ||
102 | &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); | ||
103 | &add($r0,$cc,$r0); | ||
104 | &add($h0,$t0,$h0); &FR($t0); | ||
105 | &cmpult($r0,$cc,$cc); | ||
106 | &add($h0,$cc,$cc); &FR($h0); | ||
107 | |||
108 | &st($r0,&QWPw(-1,$rp)); &FR($r0); | ||
109 | &bgt($count,&label("last_loop")); | ||
110 | &function_end_A($name); | ||
111 | |||
112 | ###################################################### | ||
113 | &set_label("finish"); | ||
114 | &add($count,4,$count); | ||
115 | &bgt($count,&label("last_loop")); | ||
116 | |||
117 | &set_label("end"); | ||
118 | &function_end($name); | ||
119 | |||
120 | &fin_pool; | ||
121 | } | ||
122 | |||
123 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl new file mode 100644 index 0000000000..9cc876ded4 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.pl | |||
@@ -0,0 +1,215 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | # upto | ||
5 | |||
6 | sub mul_add_c | ||
7 | { | ||
8 | local($a,$b,$c0,$c1,$c2)=@_; | ||
9 | local($l1,$h1,$t1,$t2); | ||
10 | |||
11 | &mul($a,$b,($l1)=&NR(1)); | ||
12 | &muh($a,$b,($h1)=&NR(1)); | ||
13 | &add($c0,$l1,$c0); | ||
14 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
15 | &add($t1,$h1,$h1); &FR($t1); | ||
16 | &add($c1,$h1,$c1); | ||
17 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
18 | &add($c2,$t2,$c2); &FR($t2); | ||
19 | } | ||
20 | |||
21 | sub bn_mul_comba4 | ||
22 | { | ||
23 | local($name)=@_; | ||
24 | local(@a,@b,$r,$c0,$c1,$c2); | ||
25 | |||
26 | $cnt=1; | ||
27 | &init_pool(3); | ||
28 | |||
29 | $rp=&wparam(0); | ||
30 | $ap=&wparam(1); | ||
31 | $bp=&wparam(2); | ||
32 | |||
33 | &function_begin($name,""); | ||
34 | |||
35 | &comment(""); | ||
36 | |||
37 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
38 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
39 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
40 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
41 | &mul($a[0],$b[0],($r00)=&NR(1)); | ||
42 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
43 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
44 | &muh($a[0],$b[0],($r01)=&NR(1)); | ||
45 | &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
46 | &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
47 | &mul($a[0],$b[1],($r02)=&NR(1)); | ||
48 | |||
49 | ($R,$H1,$H2)=&NR(3); | ||
50 | |||
51 | &st($r00,&QWPw(0,$rp)); &FR($r00); | ||
52 | |||
53 | &mov("zero",$R); | ||
54 | &mul($a[1],$b[0],($r03)=&NR(1)); | ||
55 | |||
56 | &mov("zero",$H1); | ||
57 | &mov("zero",$H0); | ||
58 | &add($R,$r01,$R); | ||
59 | &muh($a[0],$b[1],($r04)=&NR(1)); | ||
60 | &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); | ||
61 | &add($R,$r02,$R); | ||
62 | &add($H1,$t01,$H1) &FR($t01); | ||
63 | &muh($a[1],$b[0],($r05)=&NR(1)); | ||
64 | &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); | ||
65 | &add($R,$r03,$R); | ||
66 | &add($H2,$t02,$H2) &FR($t02); | ||
67 | &mul($a[0],$b[2],($r06)=&NR(1)); | ||
68 | &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); | ||
69 | &add($H1,$t03,$H1) &FR($t03); | ||
70 | &st($R,&QWPw(1,$rp)); | ||
71 | &add($H1,$H2,$R); | ||
72 | |||
73 | &mov("zero",$H1); | ||
74 | &add($R,$r04,$R); | ||
75 | &mov("zero",$H2); | ||
76 | &mul($a[1],$b[1],($r07)=&NR(1)); | ||
77 | &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); | ||
78 | &add($R,$r05,$R); | ||
79 | &add($H1,$t04,$H1) &FR($t04); | ||
80 | &mul($a[2],$b[0],($r08)=&NR(1)); | ||
81 | &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); | ||
82 | &add($R,$r01,$R); | ||
83 | &add($H2,$t05,$H2) &FR($t05); | ||
84 | &muh($a[0],$b[2],($r09)=&NR(1)); | ||
85 | &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); | ||
86 | &add($R,$r07,$R); | ||
87 | &add($H1,$t06,$H1) &FR($t06); | ||
88 | &muh($a[1],$b[1],($r10)=&NR(1)); | ||
89 | &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); | ||
90 | &add($R,$r08,$R); | ||
91 | &add($H2,$t07,$H2) &FR($t07); | ||
92 | &muh($a[2],$b[0],($r11)=&NR(1)); | ||
93 | &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); | ||
94 | &add($H1,$t08,$H1) &FR($t08); | ||
95 | &st($R,&QWPw(2,$rp)); | ||
96 | &add($H1,$H2,$R); | ||
97 | |||
98 | &mov("zero",$H1); | ||
99 | &add($R,$r09,$R); | ||
100 | &mov("zero",$H2); | ||
101 | &mul($a[0],$b[3],($r12)=&NR(1)); | ||
102 | &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); | ||
103 | &add($R,$r10,$R); | ||
104 | &add($H1,$t09,$H1) &FR($t09); | ||
105 | &mul($a[1],$b[2],($r13)=&NR(1)); | ||
106 | &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); | ||
107 | &add($R,$r11,$R); | ||
108 | &add($H1,$t10,$H1) &FR($t10); | ||
109 | &mul($a[2],$b[1],($r14)=&NR(1)); | ||
110 | &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); | ||
111 | &add($R,$r12,$R); | ||
112 | &add($H1,$t11,$H1) &FR($t11); | ||
113 | &mul($a[3],$b[0],($r15)=&NR(1)); | ||
114 | &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); | ||
115 | &add($R,$r13,$R); | ||
116 | &add($H1,$t12,$H1) &FR($t12); | ||
117 | &muh($a[0],$b[3],($r16)=&NR(1)); | ||
118 | &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); | ||
119 | &add($R,$r14,$R); | ||
120 | &add($H1,$t13,$H1) &FR($t13); | ||
121 | &muh($a[1],$b[2],($r17)=&NR(1)); | ||
122 | &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); | ||
123 | &add($R,$r15,$R); | ||
124 | &add($H1,$t14,$H1) &FR($t14); | ||
125 | &muh($a[2],$b[1],($r18)=&NR(1)); | ||
126 | &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); | ||
127 | &add($H1,$t15,$H1) &FR($t15); | ||
128 | &st($R,&QWPw(3,$rp)); | ||
129 | &add($H1,$H2,$R); | ||
130 | |||
131 | &mov("zero",$H1); | ||
132 | &add($R,$r16,$R); | ||
133 | &mov("zero",$H2); | ||
134 | &muh($a[3],$b[0],($r19)=&NR(1)); | ||
135 | &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); | ||
136 | &add($R,$r17,$R); | ||
137 | &add($H1,$t16,$H1) &FR($t16); | ||
138 | &mul($a[1],$b[3],($r20)=&NR(1)); | ||
139 | &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); | ||
140 | &add($R,$r18,$R); | ||
141 | &add($H1,$t17,$H1) &FR($t17); | ||
142 | &mul($a[2],$b[2],($r21)=&NR(1)); | ||
143 | &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); | ||
144 | &add($R,$r19,$R); | ||
145 | &add($H1,$t18,$H1) &FR($t18); | ||
146 | &mul($a[3],$b[1],($r22)=&NR(1)); | ||
147 | &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); | ||
148 | &add($R,$r20,$R); | ||
149 | &add($H1,$t19,$H1) &FR($t19); | ||
150 | &muh($a[1],$b[3],($r23)=&NR(1)); | ||
151 | &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); | ||
152 | &add($R,$r21,$R); | ||
153 | &add($H1,$t20,$H1) &FR($t20); | ||
154 | &muh($a[2],$b[2],($r24)=&NR(1)); | ||
155 | &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); | ||
156 | &add($R,$r22,$R); | ||
157 | &add($H1,$t21,$H1) &FR($t21); | ||
158 | &muh($a[3],$b[1],($r25)=&NR(1)); | ||
159 | &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); | ||
160 | &add($H1,$t22,$H1) &FR($t22); | ||
161 | &st($R,&QWPw(4,$rp)); | ||
162 | &add($H1,$H2,$R); | ||
163 | |||
164 | &mov("zero",$H1); | ||
165 | &add($R,$r23,$R); | ||
166 | &mov("zero",$H2); | ||
167 | &mul($a[2],$b[3],($r26)=&NR(1)); | ||
168 | &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); | ||
169 | &add($R,$r24,$R); | ||
170 | &add($H1,$t23,$H1) &FR($t23); | ||
171 | &mul($a[3],$b[2],($r27)=&NR(1)); | ||
172 | &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); | ||
173 | &add($R,$r25,$R); | ||
174 | &add($H1,$t24,$H1) &FR($t24); | ||
175 | &muh($a[2],$b[3],($r28)=&NR(1)); | ||
176 | &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); | ||
177 | &add($R,$r26,$R); | ||
178 | &add($H1,$t25,$H1) &FR($t25); | ||
179 | &muh($a[3],$b[2],($r29)=&NR(1)); | ||
180 | &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); | ||
181 | &add($R,$r27,$R); | ||
182 | &add($H1,$t26,$H1) &FR($t26); | ||
183 | &mul($a[3],$b[3],($r30)=&NR(1)); | ||
184 | &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); | ||
185 | &add($H1,$t27,$H1) &FR($t27); | ||
186 | &st($R,&QWPw(5,$rp)); | ||
187 | &add($H1,$H2,$R); | ||
188 | |||
189 | &mov("zero",$H1); | ||
190 | &add($R,$r28,$R); | ||
191 | &mov("zero",$H2); | ||
192 | &muh($a[3],$b[3],($r31)=&NR(1)); | ||
193 | &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); | ||
194 | &add($R,$r29,$R); | ||
195 | &add($H1,$t28,$H1) &FR($t28); | ||
196 | ############ | ||
197 | &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); | ||
198 | &add($R,$r30,$R); | ||
199 | &add($H1,$t29,$H1) &FR($t29); | ||
200 | ############ | ||
201 | &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); | ||
202 | &add($H1,$t30,$H1) &FR($t30); | ||
203 | &st($R,&QWPw(6,$rp)); | ||
204 | &add($H1,$H2,$R); | ||
205 | |||
206 | &add($R,$r31,$R); &FR($r31); | ||
207 | &st($R,&QWPw(7,$rp)); | ||
208 | |||
209 | &FR($R,$H1,$H2); | ||
210 | &function_end($name); | ||
211 | |||
212 | &fin_pool; | ||
213 | } | ||
214 | |||
215 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl new file mode 100644 index 0000000000..79d86dd25c --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c4.works.pl | |||
@@ -0,0 +1,98 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$b,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | print STDERR "count=$cnt\n"; $cnt++; | ||
10 | &mul($a,$b,($l1)=&NR(1)); | ||
11 | &muh($a,$b,($h1)=&NR(1)); | ||
12 | &add($c0,$l1,$c0); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &add($t1,$h1,$h1); &FR($t1); | ||
15 | &add($c1,$h1,$c1); | ||
16 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
17 | &add($c2,$t2,$c2); &FR($t2); | ||
18 | } | ||
19 | |||
20 | sub bn_mul_comba4 | ||
21 | { | ||
22 | local($name)=@_; | ||
23 | local(@a,@b,$r,$c0,$c1,$c2); | ||
24 | |||
25 | $cnt=1; | ||
26 | &init_pool(3); | ||
27 | |||
28 | $rp=&wparam(0); | ||
29 | $ap=&wparam(1); | ||
30 | $bp=&wparam(2); | ||
31 | |||
32 | &function_begin($name,""); | ||
33 | |||
34 | &comment(""); | ||
35 | |||
36 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
37 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
38 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
39 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
40 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
41 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
42 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
43 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); | ||
44 | |||
45 | ($c0,$c1,$c2)=&NR(3); | ||
46 | &mov("zero",$c2); | ||
47 | &mul($a[0],$b[0],$c0); | ||
48 | &muh($a[0],$b[0],$c1); | ||
49 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
50 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
51 | &mov("zero",$c2); | ||
52 | |||
53 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
54 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
55 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
56 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
57 | &mov("zero",$c2); | ||
58 | |||
59 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
60 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
61 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
62 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
63 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
64 | &mov("zero",$c2); | ||
65 | |||
66 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); | ||
67 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
68 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
69 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
70 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
71 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
72 | &mov("zero",$c2); | ||
73 | |||
74 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); | ||
75 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
76 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
77 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
78 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
79 | &mov("zero",$c2); | ||
80 | |||
81 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); | ||
82 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
83 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); | ||
84 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
85 | &mov("zero",$c2); | ||
86 | |||
87 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); | ||
88 | &st($c0,&QWPw(6,$rp)); | ||
89 | &st($c1,&QWPw(7,$rp)); | ||
90 | |||
91 | &FR($c0,$c1,$c2); | ||
92 | |||
93 | &function_end($name); | ||
94 | |||
95 | &fin_pool; | ||
96 | } | ||
97 | |||
98 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl new file mode 100644 index 0000000000..525ca7494b --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/mul_c8.pl | |||
@@ -0,0 +1,177 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_mul_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(3); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | |||
20 | &stack_push(2); | ||
21 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
22 | &ld(($b[0])=&NR(1),&QWPw(0,$bp)); | ||
23 | &st($reg_s0,&swtmp(0)); &FR($reg_s0); | ||
24 | &st($reg_s1,&swtmp(1)); &FR($reg_s1); | ||
25 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
26 | &ld(($b[1])=&NR(1),&QWPw(1,$bp)); | ||
27 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
28 | &ld(($b[2])=&NR(1),&QWPw(2,$bp)); | ||
29 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
30 | &ld(($b[3])=&NR(1),&QWPw(3,$bp)); | ||
31 | &ld(($a[4])=&NR(1),&QWPw(1,$ap)); | ||
32 | &ld(($b[4])=&NR(1),&QWPw(1,$bp)); | ||
33 | &ld(($a[5])=&NR(1),&QWPw(1,$ap)); | ||
34 | &ld(($b[5])=&NR(1),&QWPw(1,$bp)); | ||
35 | &ld(($a[6])=&NR(1),&QWPw(1,$ap)); | ||
36 | &ld(($b[6])=&NR(1),&QWPw(1,$bp)); | ||
37 | &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); | ||
38 | &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); | ||
39 | |||
40 | ($c0,$c1,$c2)=&NR(3); | ||
41 | &mov("zero",$c2); | ||
42 | &mul($a[0],$b[0],$c0); | ||
43 | &muh($a[0],$b[0],$c1); | ||
44 | &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &mul_add_c($a[0],$b[1],$c0,$c1,$c2); | ||
49 | &mul_add_c($a[1],$b[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &mul_add_c($a[0],$b[2],$c0,$c1,$c2); | ||
55 | &mul_add_c($a[1],$b[1],$c0,$c1,$c2); | ||
56 | &mul_add_c($a[2],$b[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &mul_add_c($a[0],$b[3],$c0,$c1,$c2); | ||
62 | &mul_add_c($a[1],$b[2],$c0,$c1,$c2); | ||
63 | &mul_add_c($a[2],$b[1],$c0,$c1,$c2); | ||
64 | &mul_add_c($a[3],$b[0],$c0,$c1,$c2); | ||
65 | &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); | ||
66 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
67 | &mov("zero",$c2); | ||
68 | |||
69 | &mul_add_c($a[0],$b[4],$c0,$c1,$c2); | ||
70 | &mul_add_c($a[1],$b[3],$c0,$c1,$c2); | ||
71 | &mul_add_c($a[2],$b[2],$c0,$c1,$c2); | ||
72 | &mul_add_c($a[3],$b[1],$c0,$c1,$c2); | ||
73 | &mul_add_c($a[4],$b[0],$c0,$c1,$c2); | ||
74 | &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); | ||
75 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
76 | &mov("zero",$c2); | ||
77 | |||
78 | &mul_add_c($a[0],$b[5],$c0,$c1,$c2); | ||
79 | &mul_add_c($a[1],$b[4],$c0,$c1,$c2); | ||
80 | &mul_add_c($a[2],$b[3],$c0,$c1,$c2); | ||
81 | &mul_add_c($a[3],$b[2],$c0,$c1,$c2); | ||
82 | &mul_add_c($a[4],$b[1],$c0,$c1,$c2); | ||
83 | &mul_add_c($a[5],$b[0],$c0,$c1,$c2); | ||
84 | &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); | ||
85 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
86 | &mov("zero",$c2); | ||
87 | |||
88 | &mul_add_c($a[0],$b[6],$c0,$c1,$c2); | ||
89 | &mul_add_c($a[1],$b[5],$c0,$c1,$c2); | ||
90 | &mul_add_c($a[2],$b[4],$c0,$c1,$c2); | ||
91 | &mul_add_c($a[3],$b[3],$c0,$c1,$c2); | ||
92 | &mul_add_c($a[4],$b[2],$c0,$c1,$c2); | ||
93 | &mul_add_c($a[5],$b[1],$c0,$c1,$c2); | ||
94 | &mul_add_c($a[6],$b[0],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); | ||
100 | &mul_add_c($a[1],$b[6],$c0,$c1,$c2); | ||
101 | &mul_add_c($a[2],$b[5],$c0,$c1,$c2); | ||
102 | &mul_add_c($a[3],$b[4],$c0,$c1,$c2); | ||
103 | &mul_add_c($a[4],$b[3],$c0,$c1,$c2); | ||
104 | &mul_add_c($a[5],$b[2],$c0,$c1,$c2); | ||
105 | &mul_add_c($a[6],$b[1],$c0,$c1,$c2); | ||
106 | &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); | ||
107 | &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); | ||
108 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
109 | &mov("zero",$c2); | ||
110 | |||
111 | &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); | ||
112 | &mul_add_c($a[2],$b[6],$c0,$c1,$c2); | ||
113 | &mul_add_c($a[3],$b[5],$c0,$c1,$c2); | ||
114 | &mul_add_c($a[4],$b[4],$c0,$c1,$c2); | ||
115 | &mul_add_c($a[5],$b[3],$c0,$c1,$c2); | ||
116 | &mul_add_c($a[6],$b[2],$c0,$c1,$c2); | ||
117 | &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); | ||
118 | &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); | ||
119 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
120 | &mov("zero",$c2); | ||
121 | |||
122 | &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); | ||
123 | &mul_add_c($a[3],$b[6],$c0,$c1,$c2); | ||
124 | &mul_add_c($a[4],$b[5],$c0,$c1,$c2); | ||
125 | &mul_add_c($a[5],$b[4],$c0,$c1,$c2); | ||
126 | &mul_add_c($a[6],$b[3],$c0,$c1,$c2); | ||
127 | &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); | ||
128 | &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); | ||
129 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
130 | &mov("zero",$c2); | ||
131 | |||
132 | &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); | ||
133 | &mul_add_c($a[4],$b[6],$c0,$c1,$c2); | ||
134 | &mul_add_c($a[5],$b[5],$c0,$c1,$c2); | ||
135 | &mul_add_c($a[6],$b[4],$c0,$c1,$c2); | ||
136 | &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); | ||
137 | &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); | ||
138 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
139 | &mov("zero",$c2); | ||
140 | |||
141 | &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); | ||
142 | &mul_add_c($a[5],$b[6],$c0,$c1,$c2); | ||
143 | &mul_add_c($a[6],$b[5],$c0,$c1,$c2); | ||
144 | &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); | ||
145 | &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); | ||
146 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
147 | &mov("zero",$c2); | ||
148 | |||
149 | &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); | ||
150 | &mul_add_c($a[6],$b[6],$c0,$c1,$c2); | ||
151 | &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); | ||
152 | &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); | ||
153 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
154 | &mov("zero",$c2); | ||
155 | |||
156 | &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); | ||
157 | &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); | ||
158 | &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); | ||
159 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
160 | &mov("zero",$c2); | ||
161 | |||
162 | &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); | ||
163 | &st($c0,&QWPw(14,$rp)); | ||
164 | &st($c1,&QWPw(15,$rp)); | ||
165 | |||
166 | &FR($c0,$c1,$c2); | ||
167 | |||
168 | &ld($reg_s0,&swtmp(0)); | ||
169 | &ld($reg_s1,&swtmp(1)); | ||
170 | &stack_pop(2); | ||
171 | |||
172 | &function_end($name); | ||
173 | |||
174 | &fin_pool; | ||
175 | } | ||
176 | |||
177 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr.pl b/src/lib/libcrypto/bn/asm/alpha/sqr.pl new file mode 100644 index 0000000000..a55b696906 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr.pl | |||
@@ -0,0 +1,113 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r,$couny); | ||
8 | |||
9 | &init_pool(3); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $count=&wparam(2); | ||
15 | |||
16 | &function_begin($name,""); | ||
17 | |||
18 | &comment(""); | ||
19 | &sub($count,4,$count); | ||
20 | &mov("zero",$cc); | ||
21 | &br(&label("finish")); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$r0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($r0,&QWPw(0,$rp)); | ||
27 | |||
28 | $a=<<'EOF'; | ||
29 | ########################################################## | ||
30 | &set_label("loop"); | ||
31 | |||
32 | ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); | ||
33 | ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); | ||
34 | ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); | ||
35 | ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); | ||
36 | ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); | ||
37 | ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); | ||
38 | |||
39 | ($o0,$t0)=&NR(2); | ||
40 | &add($a0,$b0,$o0); | ||
41 | &cmpult($o0,$b0,$t0); | ||
42 | &add($o0,$cc,$o0); | ||
43 | &cmpult($o0,$cc,$cc); | ||
44 | &add($cc,$t0,$cc); &FR($t0); | ||
45 | |||
46 | ($t1,$o1)=&NR(2); | ||
47 | |||
48 | &add($a1,$b1,$o1); &FR($a1); | ||
49 | &cmpult($o1,$b1,$t1); &FR($b1); | ||
50 | &add($o1,$cc,$o1); | ||
51 | &cmpult($o1,$cc,$cc); | ||
52 | &add($cc,$t1,$cc); &FR($t1); | ||
53 | |||
54 | ($t2,$o2)=&NR(2); | ||
55 | |||
56 | &add($a2,$b2,$o2); &FR($a2); | ||
57 | &cmpult($o2,$b2,$t2); &FR($b2); | ||
58 | &add($o2,$cc,$o2); | ||
59 | &cmpult($o2,$cc,$cc); | ||
60 | &add($cc,$t2,$cc); &FR($t2); | ||
61 | |||
62 | ($t3,$o3)=&NR(2); | ||
63 | |||
64 | &add($a3,$b3,$o3); &FR($a3); | ||
65 | &cmpult($o3,$b3,$t3); &FR($b3); | ||
66 | &add($o3,$cc,$o3); | ||
67 | &cmpult($o3,$cc,$cc); | ||
68 | &add($cc,$t3,$cc); &FR($t3); | ||
69 | |||
70 | &st($o0,&QWPw(0,$rp)); &FR($o0); | ||
71 | &st($o1,&QWPw(0,$rp)); &FR($o1); | ||
72 | &st($o2,&QWPw(0,$rp)); &FR($o2); | ||
73 | &st($o3,&QWPw(0,$rp)); &FR($o3); | ||
74 | |||
75 | &sub($count,4,$count); # count-=4 | ||
76 | &add($ap,4*$QWS,$ap); # count+=4 | ||
77 | &add($bp,4*$QWS,$bp); # count+=4 | ||
78 | &add($rp,4*$QWS,$rp); # count+=4 | ||
79 | |||
80 | &blt($count,&label("finish")); | ||
81 | &ld($a0,&QWPw(0,$ap)); | ||
82 | &ld($b0,&QWPw(0,$bp)); | ||
83 | &br(&label("loop")); | ||
84 | EOF | ||
85 | ################################################## | ||
86 | # Do the last 0..3 words | ||
87 | |||
88 | &set_label("last_loop"); | ||
89 | |||
90 | &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a | ||
91 | &mul($a0,$a0,($l0)=&NR(1)); | ||
92 | &add($ap,$QWS,$ap); | ||
93 | &add($rp,2*$QWS,$rp); | ||
94 | &sub($count,1,$count); | ||
95 | &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); | ||
96 | &st($l0,&QWPw(-2,$rp)); &FR($l0); | ||
97 | &st($h0,&QWPw(-1,$rp)); &FR($h0); | ||
98 | |||
99 | &bgt($count,&label("last_loop")); | ||
100 | &function_end_A($name); | ||
101 | |||
102 | ###################################################### | ||
103 | &set_label("finish"); | ||
104 | &add($count,4,$count); | ||
105 | &bgt($count,&label("last_loop")); | ||
106 | |||
107 | &set_label("end"); | ||
108 | &function_end($name); | ||
109 | |||
110 | &fin_pool; | ||
111 | } | ||
112 | |||
113 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl new file mode 100644 index 0000000000..bf33f5b503 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c4.pl | |||
@@ -0,0 +1,109 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub sqr_add_c | ||
5 | { | ||
6 | local($a,$c0,$c1,$c2)=@_; | ||
7 | local($l1,$h1,$t1,$t2); | ||
8 | |||
9 | &mul($a,$a,($l1)=&NR(1)); | ||
10 | &muh($a,$a,($h1)=&NR(1)); | ||
11 | &add($c0,$l1,$c0); | ||
12 | &add($c1,$h1,$c1); | ||
13 | &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); | ||
14 | &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); | ||
15 | &add($c1,$t1,$c1); &FR($t1); | ||
16 | &add($c2,$t2,$c2); &FR($t2); | ||
17 | } | ||
18 | |||
19 | sub sqr_add_c2 | ||
20 | { | ||
21 | local($a,$b,$c0,$c1,$c2)=@_; | ||
22 | local($l1,$h1,$t1,$t2); | ||
23 | |||
24 | &mul($a,$b,($l1)=&NR(1)); | ||
25 | &muh($a,$b,($h1)=&NR(1)); | ||
26 | &cmplt($l1,"zero",($lc1)=&NR(1)); | ||
27 | &cmplt($h1,"zero",($hc1)=&NR(1)); | ||
28 | &add($l1,$l1,$l1); | ||
29 | &add($h1,$h1,$h1); | ||
30 | &add($h1,$lc1,$h1); &FR($lc1); | ||
31 | &add($c2,$hc1,$c2); &FR($hc1); | ||
32 | |||
33 | &add($c0,$l1,$c0); | ||
34 | &add($c1,$h1,$c1); | ||
35 | &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); | ||
36 | &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); | ||
37 | |||
38 | &add($c1,$lc1,$c1); &FR($lc1); | ||
39 | &add($c2,$hc1,$c2); &FR($hc1); | ||
40 | } | ||
41 | |||
42 | |||
43 | sub bn_sqr_comba4 | ||
44 | { | ||
45 | local($name)=@_; | ||
46 | local(@a,@b,$r,$c0,$c1,$c2); | ||
47 | |||
48 | $cnt=1; | ||
49 | &init_pool(2); | ||
50 | |||
51 | $rp=&wparam(0); | ||
52 | $ap=&wparam(1); | ||
53 | |||
54 | &function_begin($name,""); | ||
55 | |||
56 | &comment(""); | ||
57 | |||
58 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
59 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
60 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
61 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); | ||
62 | |||
63 | ($c0,$c1,$c2)=&NR(3); | ||
64 | |||
65 | &mov("zero",$c2); | ||
66 | &mul($a[0],$a[0],$c0); | ||
67 | &muh($a[0],$a[0],$c1); | ||
68 | &st($c0,&QWPw(0,$rp)); | ||
69 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
70 | &mov("zero",$c2); | ||
71 | |||
72 | &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); | ||
73 | &st($c0,&QWPw(1,$rp)); | ||
74 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
75 | &mov("zero",$c2); | ||
76 | |||
77 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
79 | &st($c0,&QWPw(2,$rp)); | ||
80 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
81 | &mov("zero",$c2); | ||
82 | |||
83 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
84 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
85 | &st($c0,&QWPw(3,$rp)); | ||
86 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
87 | &mov("zero",$c2); | ||
88 | |||
89 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
90 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
91 | &st($c0,&QWPw(4,$rp)); | ||
92 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
93 | &mov("zero",$c2); | ||
94 | |||
95 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
96 | &st($c0,&QWPw(5,$rp)); | ||
97 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
98 | &mov("zero",$c2); | ||
99 | |||
100 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
101 | &st($c0,&QWPw(6,$rp)); | ||
102 | &st($c1,&QWPw(7,$rp)); | ||
103 | |||
104 | &function_end($name); | ||
105 | |||
106 | &fin_pool; | ||
107 | } | ||
108 | |||
109 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl new file mode 100644 index 0000000000..b4afe085f1 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sqr_c8.pl | |||
@@ -0,0 +1,132 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sqr_comba8 | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local(@a,@b,$r,$c0,$c1,$c2); | ||
8 | |||
9 | $cnt=1; | ||
10 | &init_pool(2); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | |||
15 | &function_begin($name,""); | ||
16 | |||
17 | &comment(""); | ||
18 | |||
19 | &ld(($a[0])=&NR(1),&QWPw(0,$ap)); | ||
20 | &ld(($a[1])=&NR(1),&QWPw(1,$ap)); | ||
21 | &ld(($a[2])=&NR(1),&QWPw(2,$ap)); | ||
22 | &ld(($a[3])=&NR(1),&QWPw(3,$ap)); | ||
23 | &ld(($a[4])=&NR(1),&QWPw(4,$ap)); | ||
24 | &ld(($a[5])=&NR(1),&QWPw(5,$ap)); | ||
25 | &ld(($a[6])=&NR(1),&QWPw(6,$ap)); | ||
26 | &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); | ||
27 | |||
28 | ($c0,$c1,$c2)=&NR(3); | ||
29 | |||
30 | &mov("zero",$c2); | ||
31 | &mul($a[0],$a[0],$c0); | ||
32 | &muh($a[0],$a[0],$c1); | ||
33 | &st($c0,&QWPw(0,$rp)); | ||
34 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
35 | &mov("zero",$c2); | ||
36 | |||
37 | &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); | ||
38 | &st($c0,&QWPw(1,$rp)); | ||
39 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
40 | &mov("zero",$c2); | ||
41 | |||
42 | &sqr_add_c($a[1],$c0,$c1,$c2); | ||
43 | &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); | ||
44 | &st($c0,&QWPw(2,$rp)); | ||
45 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
46 | &mov("zero",$c2); | ||
47 | |||
48 | &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); | ||
49 | &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); | ||
50 | &st($c0,&QWPw(3,$rp)); | ||
51 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
52 | &mov("zero",$c2); | ||
53 | |||
54 | &sqr_add_c($a[2],$c0,$c1,$c2); | ||
55 | &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); | ||
56 | &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); | ||
57 | &st($c0,&QWPw(4,$rp)); | ||
58 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
59 | &mov("zero",$c2); | ||
60 | |||
61 | &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); | ||
62 | &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); | ||
63 | &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); | ||
64 | &st($c0,&QWPw(5,$rp)); | ||
65 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
66 | &mov("zero",$c2); | ||
67 | |||
68 | &sqr_add_c($a[3],$c0,$c1,$c2); | ||
69 | &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); | ||
70 | &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); | ||
71 | &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); | ||
72 | &st($c0,&QWPw(6,$rp)); | ||
73 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
74 | &mov("zero",$c2); | ||
75 | |||
76 | &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); | ||
77 | &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); | ||
78 | &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); | ||
79 | &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); | ||
80 | &st($c0,&QWPw(7,$rp)); | ||
81 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
82 | &mov("zero",$c2); | ||
83 | |||
84 | &sqr_add_c($a[4],$c0,$c1,$c2); | ||
85 | &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); | ||
86 | &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); | ||
87 | &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); | ||
88 | &st($c0,&QWPw(8,$rp)); | ||
89 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
90 | &mov("zero",$c2); | ||
91 | |||
92 | &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); | ||
93 | &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); | ||
94 | &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); | ||
95 | &st($c0,&QWPw(9,$rp)); | ||
96 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
97 | &mov("zero",$c2); | ||
98 | |||
99 | &sqr_add_c($a[5],$c0,$c1,$c2); | ||
100 | &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); | ||
101 | &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); | ||
102 | &st($c0,&QWPw(10,$rp)); | ||
103 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
104 | &mov("zero",$c2); | ||
105 | |||
106 | &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); | ||
107 | &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); | ||
108 | &st($c0,&QWPw(11,$rp)); | ||
109 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
110 | &mov("zero",$c2); | ||
111 | |||
112 | &sqr_add_c($a[6],$c0,$c1,$c2); | ||
113 | &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); | ||
114 | &st($c0,&QWPw(12,$rp)); | ||
115 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
116 | &mov("zero",$c2); | ||
117 | |||
118 | &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); | ||
119 | &st($c0,&QWPw(13,$rp)); | ||
120 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
121 | &mov("zero",$c2); | ||
122 | |||
123 | &sqr_add_c($a[7],$c0,$c1,$c2); | ||
124 | &st($c0,&QWPw(14,$rp)); | ||
125 | &st($c1,&QWPw(15,$rp)); | ||
126 | |||
127 | &function_end($name); | ||
128 | |||
129 | &fin_pool; | ||
130 | } | ||
131 | |||
132 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/alpha/sub.pl b/src/lib/libcrypto/bn/asm/alpha/sub.pl new file mode 100644 index 0000000000..d998da5c21 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha/sub.pl | |||
@@ -0,0 +1,108 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # alpha assember | ||
3 | |||
4 | sub bn_sub_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | local($cc,$a,$b,$r); | ||
8 | |||
9 | &init_pool(4); | ||
10 | ($cc)=GR("r0"); | ||
11 | |||
12 | $rp=&wparam(0); | ||
13 | $ap=&wparam(1); | ||
14 | $bp=&wparam(2); | ||
15 | $count=&wparam(3); | ||
16 | |||
17 | &function_begin($name,""); | ||
18 | |||
19 | &comment(""); | ||
20 | &sub($count,4,$count); | ||
21 | &mov("zero",$cc); | ||
22 | &blt($count,&label("finish")); | ||
23 | |||
24 | ($a0,$b0)=&NR(2); | ||
25 | &ld($a0,&QWPw(0,$ap)); | ||
26 | &ld($b0,&QWPw(0,$bp)); | ||
27 | |||
28 | ########################################################## | ||
29 | &set_label("loop"); | ||
30 | |||
31 | ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); | ||
32 | &ld($a1,&QWPw(1,$ap)); | ||
33 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
34 | &ld($b1,&QWPw(1,$bp)); | ||
35 | &sub($a0,$b0,$a0); # do the subtract | ||
36 | &ld($a2,&QWPw(2,$ap)); | ||
37 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
38 | &ld($b2,&QWPw(2,$bp)); | ||
39 | &sub($a0,$cc,$o0); # will we borrow? | ||
40 | &ld($a3,&QWPw(3,$ap)); | ||
41 | &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); | ||
42 | |||
43 | &cmpult($a1,$b1,$t1); # will we borrow? | ||
44 | &sub($a1,$b1,$a1); # do the subtract | ||
45 | &ld($b3,&QWPw(3,$bp)); | ||
46 | &cmpult($a1,$cc,$b1); # will we borrow? | ||
47 | &sub($a1,$cc,$o1); # will we borrow? | ||
48 | &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); | ||
49 | |||
50 | &cmpult($a2,$b2,$tmp); # will we borrow? | ||
51 | &sub($a2,$b2,$a2); # do the subtract | ||
52 | &st($o0,&QWPw(0,$rp)); &FR($o0); # save | ||
53 | &cmpult($a2,$cc,$b2); # will we borrow? | ||
54 | &sub($a2,$cc,$o2); # will we borrow? | ||
55 | &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); | ||
56 | |||
57 | &cmpult($a3,$b3,$t3); # will we borrow? | ||
58 | &sub($a3,$b3,$a3); # do the subtract | ||
59 | &st($o1,&QWPw(1,$rp)); &FR($o1); | ||
60 | &cmpult($a3,$cc,$b3); # will we borrow? | ||
61 | &sub($a3,$cc,$o3); # will we borrow? | ||
62 | &add($b3,$t3,$cc); &FR($t3,$a3,$b3); | ||
63 | |||
64 | &st($o2,&QWPw(2,$rp)); &FR($o2); | ||
65 | &sub($count,4,$count); # count-=4 | ||
66 | &st($o3,&QWPw(3,$rp)); &FR($o3); | ||
67 | &add($ap,4*$QWS,$ap); # count+=4 | ||
68 | &add($bp,4*$QWS,$bp); # count+=4 | ||
69 | &add($rp,4*$QWS,$rp); # count+=4 | ||
70 | |||
71 | &blt($count,&label("finish")); | ||
72 | &ld($a0,&QWPw(0,$ap)); | ||
73 | &ld($b0,&QWPw(0,$bp)); | ||
74 | &br(&label("loop")); | ||
75 | ################################################## | ||
76 | # Do the last 0..3 words | ||
77 | |||
78 | &set_label("last_loop"); | ||
79 | |||
80 | &ld($a0,&QWPw(0,$ap)); # get a | ||
81 | &ld($b0,&QWPw(0,$bp)); # get b | ||
82 | &cmpult($a0,$b0,$tmp); # will we borrow? | ||
83 | &sub($a0,$b0,$a0); # do the subtract | ||
84 | &cmpult($a0,$cc,$b0); # will we borrow? | ||
85 | &sub($a0,$cc,$a0); # will we borrow? | ||
86 | &st($a0,&QWPw(0,$rp)); # save | ||
87 | &add($b0,$tmp,$cc); # add the borrows | ||
88 | |||
89 | &add($ap,$QWS,$ap); | ||
90 | &add($bp,$QWS,$bp); | ||
91 | &add($rp,$QWS,$rp); | ||
92 | &sub($count,1,$count); | ||
93 | &bgt($count,&label("last_loop")); | ||
94 | &function_end_A($name); | ||
95 | |||
96 | ###################################################### | ||
97 | &set_label("finish"); | ||
98 | &add($count,4,$count); | ||
99 | &bgt($count,&label("last_loop")); | ||
100 | |||
101 | &FR($a0,$b0); | ||
102 | &set_label("end"); | ||
103 | &function_end($name); | ||
104 | |||
105 | &fin_pool; | ||
106 | } | ||
107 | |||
108 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl new file mode 100644 index 0000000000..0b5cf583e3 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/add.pl | |||
@@ -0,0 +1,76 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $a="esi"; | ||
12 | $b="edi"; | ||
13 | $c="eax"; | ||
14 | $r="ebx"; | ||
15 | $tmp1="ecx"; | ||
16 | $tmp2="edx"; | ||
17 | $num="ebp"; | ||
18 | |||
19 | &mov($r,&wparam(0)); # get r | ||
20 | &mov($a,&wparam(1)); # get a | ||
21 | &mov($b,&wparam(2)); # get b | ||
22 | &mov($num,&wparam(3)); # get num | ||
23 | &xor($c,$c); # clear carry | ||
24 | &and($num,0xfffffff8); # num / 8 | ||
25 | |||
26 | &jz(&label("aw_finish")); | ||
27 | |||
28 | &set_label("aw_loop",0); | ||
29 | for ($i=0; $i<8; $i++) | ||
30 | { | ||
31 | &comment("Round $i"); | ||
32 | |||
33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
35 | &add($tmp1,$c); | ||
36 | &mov($c,0); | ||
37 | &adc($c,$c); | ||
38 | &add($tmp1,$tmp2); | ||
39 | &adc($c,0); | ||
40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
41 | } | ||
42 | |||
43 | &comment(""); | ||
44 | &add($a,32); | ||
45 | &add($b,32); | ||
46 | &add($r,32); | ||
47 | &sub($num,8); | ||
48 | &jnz(&label("aw_loop")); | ||
49 | |||
50 | &set_label("aw_finish",0); | ||
51 | &mov($num,&wparam(3)); # get num | ||
52 | &and($num,7); | ||
53 | &jz(&label("aw_end")); | ||
54 | |||
55 | for ($i=0; $i<7; $i++) | ||
56 | { | ||
57 | &comment("Tail Round $i"); | ||
58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
60 | &add($tmp1,$c); | ||
61 | &mov($c,0); | ||
62 | &adc($c,$c); | ||
63 | &add($tmp1,$tmp2); | ||
64 | &adc($c,0); | ||
65 | &dec($num) if ($i != 6); | ||
66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
67 | &jz(&label("aw_end")) if ($i != 6); | ||
68 | } | ||
69 | &set_label("aw_end",0); | ||
70 | |||
71 | # &mov("eax",$c); # $c is "eax" | ||
72 | |||
73 | &function_end($name); | ||
74 | } | ||
75 | |||
76 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl new file mode 100644 index 0000000000..2291253629 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/comba.pl | |||
@@ -0,0 +1,277 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub mul_add_c | ||
5 | { | ||
6 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
7 | |||
8 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
9 | # words, and 1 if load return value | ||
10 | |||
11 | &comment("mul a[$ai]*b[$bi]"); | ||
12 | |||
13 | # "eax" and "edx" will always be pre-loaded. | ||
14 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
15 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
16 | |||
17 | &mul("edx"); | ||
18 | &add($c0,"eax"); | ||
19 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a | ||
20 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] | ||
21 | ### | ||
22 | &adc($c1,"edx"); | ||
23 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b | ||
24 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b | ||
25 | ### | ||
26 | &adc($c2,0); | ||
27 | # is pos > 1, it means it is the last loop | ||
28 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; | ||
29 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a | ||
30 | } | ||
31 | |||
32 | sub sqr_add_c | ||
33 | { | ||
34 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
35 | |||
36 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
37 | # words, and 1 if load return value | ||
38 | |||
39 | &comment("sqr a[$ai]*a[$bi]"); | ||
40 | |||
41 | # "eax" and "edx" will always be pre-loaded. | ||
42 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
43 | # &mov("edx",&DWP($bi*4,$b,"",0)); | ||
44 | |||
45 | if ($ai == $bi) | ||
46 | { &mul("eax");} | ||
47 | else | ||
48 | { &mul("edx");} | ||
49 | &add($c0,"eax"); | ||
50 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
51 | ### | ||
52 | &adc($c1,"edx"); | ||
53 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | ||
54 | ### | ||
55 | &adc($c2,0); | ||
56 | # is pos > 1, it means it is the last loop | ||
57 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
58 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
59 | } | ||
60 | |||
61 | sub sqr_add_c2 | ||
62 | { | ||
63 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | ||
64 | |||
65 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | ||
66 | # words, and 1 if load return value | ||
67 | |||
68 | &comment("sqr a[$ai]*a[$bi]"); | ||
69 | |||
70 | # "eax" and "edx" will always be pre-loaded. | ||
71 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | ||
72 | # &mov("edx",&DWP($bi*4,$a,"",0)); | ||
73 | |||
74 | if ($ai == $bi) | ||
75 | { &mul("eax");} | ||
76 | else | ||
77 | { &mul("edx");} | ||
78 | &add("eax","eax"); | ||
79 | ### | ||
80 | &adc("edx","edx"); | ||
81 | ### | ||
82 | &adc($c2,0); | ||
83 | &add($c0,"eax"); | ||
84 | &adc($c1,"edx"); | ||
85 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | ||
86 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | ||
87 | &adc($c2,0); | ||
88 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | ||
89 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | ||
90 | ### | ||
91 | } | ||
92 | |||
93 | sub bn_mul_comba | ||
94 | { | ||
95 | local($name,$num)=@_; | ||
96 | local($a,$b,$c0,$c1,$c2); | ||
97 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
98 | local($tot,$end); | ||
99 | |||
100 | &function_begin_B($name,""); | ||
101 | |||
102 | $c0="ebx"; | ||
103 | $c1="ecx"; | ||
104 | $c2="ebp"; | ||
105 | $a="esi"; | ||
106 | $b="edi"; | ||
107 | |||
108 | $as=0; | ||
109 | $ae=0; | ||
110 | $bs=0; | ||
111 | $be=0; | ||
112 | $tot=$num+$num-1; | ||
113 | |||
114 | &push("esi"); | ||
115 | &mov($a,&wparam(1)); | ||
116 | &push("edi"); | ||
117 | &mov($b,&wparam(2)); | ||
118 | &push("ebp"); | ||
119 | &push("ebx"); | ||
120 | |||
121 | &xor($c0,$c0); | ||
122 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
123 | &xor($c1,$c1); | ||
124 | &mov("edx",&DWP(0,$b,"",0)); # load the first second | ||
125 | |||
126 | for ($i=0; $i<$tot; $i++) | ||
127 | { | ||
128 | $ai=$as; | ||
129 | $bi=$bs; | ||
130 | $end=$be+1; | ||
131 | |||
132 | &comment("################## Calculate word $i"); | ||
133 | |||
134 | for ($j=$bs; $j<$end; $j++) | ||
135 | { | ||
136 | &xor($c2,$c2) if ($j == $bs); | ||
137 | if (($j+1) == $end) | ||
138 | { | ||
139 | $v=1; | ||
140 | $v=2 if (($i+1) == $tot); | ||
141 | } | ||
142 | else | ||
143 | { $v=0; } | ||
144 | if (($j+1) != $end) | ||
145 | { | ||
146 | $na=($ai-1); | ||
147 | $nb=($bi+1); | ||
148 | } | ||
149 | else | ||
150 | { | ||
151 | $na=$as+($i < ($num-1)); | ||
152 | $nb=$bs+($i >= ($num-1)); | ||
153 | } | ||
154 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | ||
155 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | ||
156 | if ($v) | ||
157 | { | ||
158 | &comment("saved r[$i]"); | ||
159 | # &mov("eax",&wparam(0)); | ||
160 | # &mov(&DWP($i*4,"eax","",0),$c0); | ||
161 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
162 | } | ||
163 | $ai--; | ||
164 | $bi++; | ||
165 | } | ||
166 | $as++ if ($i < ($num-1)); | ||
167 | $ae++ if ($i >= ($num-1)); | ||
168 | |||
169 | $bs++ if ($i >= ($num-1)); | ||
170 | $be++ if ($i < ($num-1)); | ||
171 | } | ||
172 | &comment("save r[$i]"); | ||
173 | # &mov("eax",&wparam(0)); | ||
174 | &mov(&DWP($i*4,"eax","",0),$c0); | ||
175 | |||
176 | &pop("ebx"); | ||
177 | &pop("ebp"); | ||
178 | &pop("edi"); | ||
179 | &pop("esi"); | ||
180 | &ret(); | ||
181 | &function_end_B($name); | ||
182 | } | ||
183 | |||
184 | sub bn_sqr_comba | ||
185 | { | ||
186 | local($name,$num)=@_; | ||
187 | local($r,$a,$c0,$c1,$c2)=@_; | ||
188 | local($i,$as,$ae,$bs,$be,$ai,$bi); | ||
189 | local($b,$tot,$end,$half); | ||
190 | |||
191 | &function_begin_B($name,""); | ||
192 | |||
193 | $c0="ebx"; | ||
194 | $c1="ecx"; | ||
195 | $c2="ebp"; | ||
196 | $a="esi"; | ||
197 | $r="edi"; | ||
198 | |||
199 | &push("esi"); | ||
200 | &push("edi"); | ||
201 | &push("ebp"); | ||
202 | &push("ebx"); | ||
203 | &mov($r,&wparam(0)); | ||
204 | &mov($a,&wparam(1)); | ||
205 | &xor($c0,$c0); | ||
206 | &xor($c1,$c1); | ||
207 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | ||
208 | |||
209 | $as=0; | ||
210 | $ae=0; | ||
211 | $bs=0; | ||
212 | $be=0; | ||
213 | $tot=$num+$num-1; | ||
214 | |||
215 | for ($i=0; $i<$tot; $i++) | ||
216 | { | ||
217 | $ai=$as; | ||
218 | $bi=$bs; | ||
219 | $end=$be+1; | ||
220 | |||
221 | &comment("############### Calculate word $i"); | ||
222 | for ($j=$bs; $j<$end; $j++) | ||
223 | { | ||
224 | &xor($c2,$c2) if ($j == $bs); | ||
225 | if (($ai-1) < ($bi+1)) | ||
226 | { | ||
227 | $v=1; | ||
228 | $v=2 if ($i+1) == $tot; | ||
229 | } | ||
230 | else | ||
231 | { $v=0; } | ||
232 | if (!$v) | ||
233 | { | ||
234 | $na=$ai-1; | ||
235 | $nb=$bi+1; | ||
236 | } | ||
237 | else | ||
238 | { | ||
239 | $na=$as+($i < ($num-1)); | ||
240 | $nb=$bs+($i >= ($num-1)); | ||
241 | } | ||
242 | if ($ai == $bi) | ||
243 | { | ||
244 | &sqr_add_c($r,$a,$ai,$bi, | ||
245 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
246 | } | ||
247 | else | ||
248 | { | ||
249 | &sqr_add_c2($r,$a,$ai,$bi, | ||
250 | $c0,$c1,$c2,$v,$i,$na,$nb); | ||
251 | } | ||
252 | if ($v) | ||
253 | { | ||
254 | &comment("saved r[$i]"); | ||
255 | #&mov(&DWP($i*4,$r,"",0),$c0); | ||
256 | ($c0,$c1,$c2)=($c1,$c2,$c0); | ||
257 | last; | ||
258 | } | ||
259 | $ai--; | ||
260 | $bi++; | ||
261 | } | ||
262 | $as++ if ($i < ($num-1)); | ||
263 | $ae++ if ($i >= ($num-1)); | ||
264 | |||
265 | $bs++ if ($i >= ($num-1)); | ||
266 | $be++ if ($i < ($num-1)); | ||
267 | } | ||
268 | &mov(&DWP($i*4,$r,"",0),$c0); | ||
269 | &pop("ebx"); | ||
270 | &pop("ebp"); | ||
271 | &pop("edi"); | ||
272 | &pop("esi"); | ||
273 | &ret(); | ||
274 | &function_end_B($name); | ||
275 | } | ||
276 | |||
277 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl new file mode 100644 index 0000000000..0e90152caa --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/div.pl | |||
@@ -0,0 +1,15 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_div_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | &mov("edx",&wparam(0)); # | ||
10 | &mov("eax",&wparam(1)); # | ||
11 | &mov("ebx",&wparam(2)); # | ||
12 | &div("ebx"); | ||
13 | &function_end($name); | ||
14 | } | ||
15 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/f b/src/lib/libcrypto/bn/asm/x86/f new file mode 100644 index 0000000000..22e4112224 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/f | |||
@@ -0,0 +1,3 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl new file mode 100644 index 0000000000..674cb9b055 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul.pl | |||
@@ -0,0 +1,77 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_mul_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $Low="eax"; | ||
12 | $High="edx"; | ||
13 | $a="ebx"; | ||
14 | $w="ecx"; | ||
15 | $r="edi"; | ||
16 | $c="esi"; | ||
17 | $num="ebp"; | ||
18 | |||
19 | &xor($c,$c); # clear carry | ||
20 | &mov($r,&wparam(0)); # | ||
21 | &mov($a,&wparam(1)); # | ||
22 | &mov($num,&wparam(2)); # | ||
23 | &mov($w,&wparam(3)); # | ||
24 | |||
25 | &and($num,0xfffffff8); # num / 8 | ||
26 | &jz(&label("mw_finish")); | ||
27 | |||
28 | &set_label("mw_loop",0); | ||
29 | for ($i=0; $i<32; $i+=4) | ||
30 | { | ||
31 | &comment("Round $i"); | ||
32 | |||
33 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
34 | &mul($w); # *a * w | ||
35 | &add("eax",$c); # L(t)+=c | ||
36 | # XXX | ||
37 | |||
38 | &adc("edx",0); # H(t)+=carry | ||
39 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
40 | |||
41 | &mov($c,"edx"); # c= H(t); | ||
42 | } | ||
43 | |||
44 | &comment(""); | ||
45 | &add($a,32); | ||
46 | &add($r,32); | ||
47 | &sub($num,8); | ||
48 | &jz(&label("mw_finish")); | ||
49 | &jmp(&label("mw_loop")); | ||
50 | |||
51 | &set_label("mw_finish",0); | ||
52 | &mov($num,&wparam(2)); # get num | ||
53 | &and($num,7); | ||
54 | &jnz(&label("mw_finish2")); | ||
55 | &jmp(&label("mw_end")); | ||
56 | |||
57 | &set_label("mw_finish2",1); | ||
58 | for ($i=0; $i<7; $i++) | ||
59 | { | ||
60 | &comment("Tail Round $i"); | ||
61 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
62 | &mul($w); # *a * w | ||
63 | &add("eax",$c); # L(t)+=c | ||
64 | # XXX | ||
65 | &adc("edx",0); # H(t)+=carry | ||
66 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); | ||
67 | &mov($c,"edx"); # c= H(t); | ||
68 | &dec($num) if ($i != 7-1); | ||
69 | &jz(&label("mw_end")) if ($i != 7-1); | ||
70 | } | ||
71 | &set_label("mw_end",0); | ||
72 | &mov("eax",$c); | ||
73 | |||
74 | &function_end($name); | ||
75 | } | ||
76 | |||
77 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl new file mode 100644 index 0000000000..61830d3a90 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/mul_add.pl | |||
@@ -0,0 +1,87 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_mul_add_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $Low="eax"; | ||
12 | $High="edx"; | ||
13 | $a="ebx"; | ||
14 | $w="ebp"; | ||
15 | $r="edi"; | ||
16 | $c="esi"; | ||
17 | |||
18 | &xor($c,$c); # clear carry | ||
19 | &mov($r,&wparam(0)); # | ||
20 | |||
21 | &mov("ecx",&wparam(2)); # | ||
22 | &mov($a,&wparam(1)); # | ||
23 | |||
24 | &and("ecx",0xfffffff8); # num / 8 | ||
25 | &mov($w,&wparam(3)); # | ||
26 | |||
27 | &push("ecx"); # Up the stack for a tmp variable | ||
28 | |||
29 | &jz(&label("maw_finish")); | ||
30 | |||
31 | &set_label("maw_loop",0); | ||
32 | |||
33 | &mov(&swtmp(0),"ecx"); # | ||
34 | |||
35 | for ($i=0; $i<32; $i+=4) | ||
36 | { | ||
37 | &comment("Round $i"); | ||
38 | |||
39 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
40 | &mul($w); # *a * w | ||
41 | &add("eax",$c); # L(t)+= *r | ||
42 | &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r | ||
43 | &adc("edx",0); # H(t)+=carry | ||
44 | &add("eax",$c); # L(t)+=c | ||
45 | &adc("edx",0); # H(t)+=carry | ||
46 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | ||
47 | &mov($c,"edx"); # c= H(t); | ||
48 | } | ||
49 | |||
50 | &comment(""); | ||
51 | &mov("ecx",&swtmp(0)); # | ||
52 | &add($a,32); | ||
53 | &add($r,32); | ||
54 | &sub("ecx",8); | ||
55 | &jnz(&label("maw_loop")); | ||
56 | |||
57 | &set_label("maw_finish",0); | ||
58 | &mov("ecx",&wparam(2)); # get num | ||
59 | &and("ecx",7); | ||
60 | &jnz(&label("maw_finish2")); # helps branch prediction | ||
61 | &jmp(&label("maw_end")); | ||
62 | |||
63 | &set_label("maw_finish2",1); | ||
64 | for ($i=0; $i<7; $i++) | ||
65 | { | ||
66 | &comment("Tail Round $i"); | ||
67 | &mov("eax",&DWP($i*4,$a,"",0));# *a | ||
68 | &mul($w); # *a * w | ||
69 | &add("eax",$c); # L(t)+=c | ||
70 | &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r | ||
71 | &adc("edx",0); # H(t)+=carry | ||
72 | &add("eax",$c); | ||
73 | &adc("edx",0); # H(t)+=carry | ||
74 | &dec("ecx") if ($i != 7-1); | ||
75 | &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); | ||
76 | &mov($c,"edx"); # c= H(t); | ||
77 | &jz(&label("maw_end")) if ($i != 7-1); | ||
78 | } | ||
79 | &set_label("maw_end",0); | ||
80 | &mov("eax",$c); | ||
81 | |||
82 | &pop("ecx"); # clear variable from | ||
83 | |||
84 | &function_end($name); | ||
85 | } | ||
86 | |||
87 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl new file mode 100644 index 0000000000..1f90993cf6 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sqr.pl | |||
@@ -0,0 +1,60 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_sqr_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $r="esi"; | ||
12 | $a="edi"; | ||
13 | $num="ebx"; | ||
14 | |||
15 | &mov($r,&wparam(0)); # | ||
16 | &mov($a,&wparam(1)); # | ||
17 | &mov($num,&wparam(2)); # | ||
18 | |||
19 | &and($num,0xfffffff8); # num / 8 | ||
20 | &jz(&label("sw_finish")); | ||
21 | |||
22 | &set_label("sw_loop",0); | ||
23 | for ($i=0; $i<32; $i+=4) | ||
24 | { | ||
25 | &comment("Round $i"); | ||
26 | &mov("eax",&DWP($i,$a,"",0)); # *a | ||
27 | # XXX | ||
28 | &mul("eax"); # *a * *a | ||
29 | &mov(&DWP($i*2,$r,"",0),"eax"); # | ||
30 | &mov(&DWP($i*2+4,$r,"",0),"edx");# | ||
31 | } | ||
32 | |||
33 | &comment(""); | ||
34 | &add($a,32); | ||
35 | &add($r,64); | ||
36 | &sub($num,8); | ||
37 | &jnz(&label("sw_loop")); | ||
38 | |||
39 | &set_label("sw_finish",0); | ||
40 | &mov($num,&wparam(2)); # get num | ||
41 | &and($num,7); | ||
42 | &jz(&label("sw_end")); | ||
43 | |||
44 | for ($i=0; $i<7; $i++) | ||
45 | { | ||
46 | &comment("Tail Round $i"); | ||
47 | &mov("eax",&DWP($i*4,$a,"",0)); # *a | ||
48 | # XXX | ||
49 | &mul("eax"); # *a * *a | ||
50 | &mov(&DWP($i*8,$r,"",0),"eax"); # | ||
51 | &dec($num) if ($i != 7-1); | ||
52 | &mov(&DWP($i*8+4,$r,"",0),"edx"); | ||
53 | &jz(&label("sw_end")) if ($i != 7-1); | ||
54 | } | ||
55 | &set_label("sw_end",0); | ||
56 | |||
57 | &function_end($name); | ||
58 | } | ||
59 | |||
60 | 1; | ||
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl new file mode 100644 index 0000000000..837b0e1b07 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/x86/sub.pl | |||
@@ -0,0 +1,76 @@ | |||
1 | #!/usr/local/bin/perl | ||
2 | # x86 assember | ||
3 | |||
4 | sub bn_sub_words | ||
5 | { | ||
6 | local($name)=@_; | ||
7 | |||
8 | &function_begin($name,""); | ||
9 | |||
10 | &comment(""); | ||
11 | $a="esi"; | ||
12 | $b="edi"; | ||
13 | $c="eax"; | ||
14 | $r="ebx"; | ||
15 | $tmp1="ecx"; | ||
16 | $tmp2="edx"; | ||
17 | $num="ebp"; | ||
18 | |||
19 | &mov($r,&wparam(0)); # get r | ||
20 | &mov($a,&wparam(1)); # get a | ||
21 | &mov($b,&wparam(2)); # get b | ||
22 | &mov($num,&wparam(3)); # get num | ||
23 | &xor($c,$c); # clear carry | ||
24 | &and($num,0xfffffff8); # num / 8 | ||
25 | |||
26 | &jz(&label("aw_finish")); | ||
27 | |||
28 | &set_label("aw_loop",0); | ||
29 | for ($i=0; $i<8; $i++) | ||
30 | { | ||
31 | &comment("Round $i"); | ||
32 | |||
33 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
34 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b | ||
35 | &sub($tmp1,$c); | ||
36 | &mov($c,0); | ||
37 | &adc($c,$c); | ||
38 | &sub($tmp1,$tmp2); | ||
39 | &adc($c,0); | ||
40 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r | ||
41 | } | ||
42 | |||
43 | &comment(""); | ||
44 | &add($a,32); | ||
45 | &add($b,32); | ||
46 | &add($r,32); | ||
47 | &sub($num,8); | ||
48 | &jnz(&label("aw_loop")); | ||
49 | |||
50 | &set_label("aw_finish",0); | ||
51 | &mov($num,&wparam(3)); # get num | ||
52 | &and($num,7); | ||
53 | &jz(&label("aw_end")); | ||
54 | |||
55 | for ($i=0; $i<7; $i++) | ||
56 | { | ||
57 | &comment("Tail Round $i"); | ||
58 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a | ||
59 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b | ||
60 | &sub($tmp1,$c); | ||
61 | &mov($c,0); | ||
62 | &adc($c,$c); | ||
63 | &sub($tmp1,$tmp2); | ||
64 | &adc($c,0); | ||
65 | &dec($num) if ($i != 6); | ||
66 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *a | ||
67 | &jz(&label("aw_end")) if ($i != 6); | ||
68 | } | ||
69 | &set_label("aw_end",0); | ||
70 | |||
71 | # &mov("eax",$c); # $c is "eax" | ||
72 | |||
73 | &function_end($name); | ||
74 | } | ||
75 | |||
76 | 1; | ||
diff --git a/src/lib/libcrypto/bn/old/b_sqr.c b/src/lib/libcrypto/bn/old/b_sqr.c new file mode 100644 index 0000000000..715cb1c8ab --- /dev/null +++ b/src/lib/libcrypto/bn/old/b_sqr.c | |||
@@ -0,0 +1,199 @@ | |||
1 | /* crypto/bn/bn_mul.c */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | |||
59 | #include <stdio.h> | ||
60 | #include "cryptlib.h" | ||
61 | #include "bn_lcl.h" | ||
62 | |||
63 | static int bn_mm(BIGNUM *m,BIGNUM *A,BIGNUM *B, BIGNUM *sk,BN_CTX *ctx); | ||
64 | |||
65 | /* r must be different to a and b */ | ||
66 | /* int BN_mmul(r, a, b) */ | ||
67 | int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b) | ||
68 | { | ||
69 | BN_ULONG *ap,*bp,*rp; | ||
70 | BIGNUM *sk; | ||
71 | int i,n,ret; | ||
72 | int max,al,bl; | ||
73 | BN_CTX ctx; | ||
74 | |||
75 | bn_check_top(a); | ||
76 | bn_check_top(b); | ||
77 | |||
78 | al=a->top; | ||
79 | bl=b->top; | ||
80 | if ((al == 0) || (bl == 0)) | ||
81 | { | ||
82 | r->top=0; | ||
83 | return(1); | ||
84 | } | ||
85 | #ifdef BN_MUL_DEBUG | ||
86 | printf("BN_mul(%d,%d)\n",a->top,b->top); | ||
87 | #endif | ||
88 | |||
89 | if ( (bn_limit_bits > 0) && | ||
90 | (bl > bn_limit_num) && (al > bn_limit_num)) | ||
91 | { | ||
92 | n=(BN_num_bits_word(al|bl)-bn_limit_bits); | ||
93 | n*=2; | ||
94 | sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n); | ||
95 | memset(sk,0,sizeof(BIGNUM)*n); | ||
96 | memset(&ctx,0,sizeof(ctx)); | ||
97 | |||
98 | ret=bn_mm(r,a,b,&(sk[0]),&ctx); | ||
99 | for (i=0; i<n; i+=2) | ||
100 | { | ||
101 | BN_clear_free(&sk[i]); | ||
102 | BN_clear_free(&sk[i+1]); | ||
103 | } | ||
104 | Free(sk); | ||
105 | return(ret); | ||
106 | } | ||
107 | |||
108 | max=(al+bl); | ||
109 | if (bn_wexpand(r,max) == NULL) return(0); | ||
110 | r->top=max; | ||
111 | r->neg=a->neg^b->neg; | ||
112 | ap=a->d; | ||
113 | bp=b->d; | ||
114 | rp=r->d; | ||
115 | |||
116 | rp[al]=bn_mul_words(rp,ap,al,*(bp++)); | ||
117 | rp++; | ||
118 | for (i=1; i<bl; i++) | ||
119 | { | ||
120 | rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); | ||
121 | rp++; | ||
122 | } | ||
123 | if ((max > 0) && (r->d[max-1] == 0)) r->top--; | ||
124 | return(1); | ||
125 | } | ||
126 | |||
127 | |||
128 | #define ahal (sk[0]) | ||
129 | #define blbh (sk[1]) | ||
130 | |||
131 | /* r must be different to a and b */ | ||
132 | int bn_mm(BIGNUM *m, BIGNUM *A, BIGNUM *B, BIGNUM *sk, BN_CTX *ctx) | ||
133 | { | ||
134 | int n,num,sqr=0; | ||
135 | int an,bn; | ||
136 | BIGNUM ah,al,bh,bl; | ||
137 | |||
138 | an=A->top; | ||
139 | bn=B->top; | ||
140 | #ifdef BN_MUL_DEBUG | ||
141 | printf("bn_mm(%d,%d)\n",A->top,B->top); | ||
142 | #endif | ||
143 | |||
144 | if (A == B) sqr=1; | ||
145 | num=(an>bn)?an:bn; | ||
146 | n=(num+1)/2; | ||
147 | /* Are going to now chop things into 'num' word chunks. */ | ||
148 | |||
149 | BN_init(&ah); | ||
150 | BN_init(&al); | ||
151 | BN_init(&bh); | ||
152 | BN_init(&bl); | ||
153 | |||
154 | bn_set_low (&al,A,n); | ||
155 | bn_set_high(&ah,A,n); | ||
156 | bn_set_low (&bl,B,n); | ||
157 | bn_set_high(&bh,B,n); | ||
158 | |||
159 | BN_sub(&ahal,&ah,&al); | ||
160 | BN_sub(&blbh,&bl,&bh); | ||
161 | |||
162 | if (num <= (bn_limit_num+bn_limit_num)) | ||
163 | { | ||
164 | BN_mul(m,&ahal,&blbh); | ||
165 | if (sqr) | ||
166 | { | ||
167 | BN_sqr(&ahal,&al,ctx); | ||
168 | BN_sqr(&blbh,&ah,ctx); | ||
169 | } | ||
170 | else | ||
171 | { | ||
172 | BN_mul(&ahal,&al,&bl); | ||
173 | BN_mul(&blbh,&ah,&bh); | ||
174 | } | ||
175 | } | ||
176 | else | ||
177 | { | ||
178 | bn_mm(m,&ahal,&blbh,&(sk[2]),ctx); | ||
179 | bn_mm(&ahal,&al,&bl,&(sk[2]),ctx); | ||
180 | bn_mm(&blbh,&ah,&bh,&(sk[2]),ctx); | ||
181 | } | ||
182 | |||
183 | BN_add(m,m,&ahal); | ||
184 | BN_add(m,m,&blbh); | ||
185 | |||
186 | BN_lshift(m,m,n*BN_BITS2); | ||
187 | BN_lshift(&blbh,&blbh,n*BN_BITS2*2); | ||
188 | |||
189 | BN_add(m,m,&ahal); | ||
190 | BN_add(m,m,&blbh); | ||
191 | |||
192 | m->neg=A->neg^B->neg; | ||
193 | return(1); | ||
194 | } | ||
195 | #undef ahal (sk[0]) | ||
196 | #undef blbh (sk[1]) | ||
197 | |||
198 | #include "bn_low.c" | ||
199 | #include "bn_high.c" | ||
diff --git a/src/lib/libcrypto/bn/old/bn_com.c b/src/lib/libcrypto/bn/old/bn_com.c new file mode 100644 index 0000000000..7666b2304c --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_com.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* crypto/bn/bn_mulw.c */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | |||
59 | #include <stdio.h> | ||
60 | #include "cryptlib.h" | ||
61 | #include "bn_lcl.h" | ||
62 | |||
63 | #ifdef BN_LLONG | ||
64 | |||
65 | ab | ||
66 | 12 | ||
67 | a2 b2 | ||
68 | a1 b1 | ||
69 | |||
70 | abc | ||
71 | 123 | ||
72 | a3 b3 c3 | ||
73 | a2 b2 c2 | ||
74 | a1 b1 c1 | ||
75 | |||
76 | abcd | ||
77 | 1234 | ||
78 | a4 b4 c4 d4 | ||
79 | a3 b3 c3 d3 | ||
80 | a2 b2 c2 d2 | ||
81 | a1 b1 c1 d1 | ||
82 | |||
83 | abcde | ||
84 | 01234 | ||
85 | a5 b5 c5 d5 e5 | ||
86 | a4 b4 c4 d4 e4 | ||
87 | a3 b3 c3 d3 e3 | ||
88 | a2 b2 c2 d2 e2 | ||
89 | a1 b1 c1 d1 e1 | ||
90 | a0 b0 c0 d0 e0 | ||
diff --git a/src/lib/libcrypto/bn/old/bn_high.c b/src/lib/libcrypto/bn/old/bn_high.c new file mode 100644 index 0000000000..763bcb605b --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_high.c | |||
@@ -0,0 +1,135 @@ | |||
1 | #include <stdio.h> | ||
2 | #include "cryptlib.h" | ||
3 | #include "bn_lcl.h" | ||
4 | |||
5 | #undef BN_MUL_HIGH_DEBUG | ||
6 | |||
7 | #ifdef BN_MUL_HIGH_DEBUG | ||
8 | #define debug_BN_print(a,b,c) BN_print_fp(a,b); printf(c); | ||
9 | #else | ||
10 | #define debug_BN_print(a,b,c) | ||
11 | #endif | ||
12 | |||
13 | int BN_mul_high(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *low, int words); | ||
14 | |||
15 | #undef t1 | ||
16 | #undef t2 | ||
17 | |||
18 | int BN_mul_high(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *low, int words) | ||
19 | { | ||
20 | int w2,borrow=0,full=0; | ||
21 | BIGNUM t1,t2,t3,h,ah,al,bh,bl,m,s0,s1; | ||
22 | BN_ULONG ul1,ul2; | ||
23 | |||
24 | BN_mul(r,a,b); | ||
25 | BN_rshift(r,r,words*BN_BITS2); | ||
26 | return(1); | ||
27 | |||
28 | w2=(words+1)/2; | ||
29 | |||
30 | #ifdef BN_MUL_HIGH_DEBUG | ||
31 | fprintf(stdout,"words=%d w2=%d\n",words,w2); | ||
32 | #endif | ||
33 | debug_BN_print(stdout,a," a\n"); | ||
34 | debug_BN_print(stdout,b," b\n"); | ||
35 | debug_BN_print(stdout,low," low\n"); | ||
36 | BN_init(&al); BN_init(&ah); | ||
37 | BN_init(&bl); BN_init(&bh); | ||
38 | BN_init(&t1); BN_init(&t2); BN_init(&t3); | ||
39 | BN_init(&s0); BN_init(&s1); | ||
40 | BN_init(&h); BN_init(&m); | ||
41 | |||
42 | bn_set_low (&al,a,w2); | ||
43 | bn_set_high(&ah,a,w2); | ||
44 | bn_set_low (&bl,b,w2); | ||
45 | bn_set_high(&bh,b,w2); | ||
46 | |||
47 | bn_set_low(&s0,low,w2); | ||
48 | bn_set_high(&s1,low,w2); | ||
49 | |||
50 | debug_BN_print(stdout,&al," al\n"); | ||
51 | debug_BN_print(stdout,&ah," ah\n"); | ||
52 | debug_BN_print(stdout,&bl," bl\n"); | ||
53 | debug_BN_print(stdout,&bh," bh\n"); | ||
54 | debug_BN_print(stdout,&s0," s0\n"); | ||
55 | debug_BN_print(stdout,&s1," s1\n"); | ||
56 | |||
57 | /* Calculate (al-ah)*(bh-bl) */ | ||
58 | BN_sub(&t1,&al,&ah); | ||
59 | BN_sub(&t2,&bh,&bl); | ||
60 | BN_mul(&m,&t1,&t2); | ||
61 | |||
62 | /* Calculate ah*bh */ | ||
63 | BN_mul(&h,&ah,&bh); | ||
64 | |||
65 | /* s0 == low(al*bl) | ||
66 | * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) | ||
67 | * We know s0 and s1 so the only unknown is high(al*bl) | ||
68 | * high(al*bl) == s1 - low(ah*bh+(al-ah)*(bh-bl)+s0) | ||
69 | */ | ||
70 | BN_add(&m,&m,&h); | ||
71 | BN_add(&t2,&m,&s0); | ||
72 | |||
73 | debug_BN_print(stdout,&t2," middle value\n"); | ||
74 | |||
75 | /* Quick and dirty mask off of high words */ | ||
76 | if (w2 < t2.top) t2.top=w2; | ||
77 | #if 0 | ||
78 | bn_set_low(&t3,&t2,w2); | ||
79 | #endif | ||
80 | |||
81 | debug_BN_print(stdout,&t2," low middle value\n"); | ||
82 | BN_sub(&t1,&s1,&t2); | ||
83 | |||
84 | if (t1.neg) | ||
85 | { | ||
86 | debug_BN_print(stdout,&t1," before\n"); | ||
87 | BN_zero(&t2); | ||
88 | BN_set_bit(&t2,w2*BN_BITS2); | ||
89 | BN_add(&t1,&t2,&t1); | ||
90 | /* BN_mask_bits(&t1,w2*BN_BITS2); */ | ||
91 | /* if (words < t1.top) t1.top=words; */ | ||
92 | debug_BN_print(stdout,&t1," after\n"); | ||
93 | borrow=1; | ||
94 | } | ||
95 | |||
96 | /* XXXXX SPEED THIS UP */ | ||
97 | /* al*bl == high(al*bl)<<words+s0 */ | ||
98 | BN_lshift(&t1,&t1,w2*BN_BITS2); | ||
99 | BN_add(&t1,&t1,&s0); | ||
100 | if (w2*2 < t1.top) t1.top=w2*2; /* This should not happen? */ | ||
101 | |||
102 | /* We now have | ||
103 | * al*bl - t1 | ||
104 | * (al-ah)*(bh-bl)+ah*bh - m | ||
105 | * ah*bh - h | ||
106 | */ | ||
107 | #if 0 | ||
108 | BN_add(&m,&m,&t1); | ||
109 | debug_BN_print(stdout,&t1," s10\n"); | ||
110 | debug_BN_print(stdout,&m," s21\n"); | ||
111 | debug_BN_print(stdout,&h," s32\n"); | ||
112 | BN_lshift(&m,&m,w2*BN_BITS2); | ||
113 | BN_lshift(&h,&h,w2*2*BN_BITS2); | ||
114 | BN_add(r,&m,&t1); | ||
115 | BN_add(r,r,&h); | ||
116 | BN_rshift(r,r,w2*2*BN_BITS2); | ||
117 | #else | ||
118 | BN_add(&m,&m,&t1); /* Do a cmp then +1 if needed? */ | ||
119 | bn_set_high(&t3,&t1,w2); | ||
120 | BN_add(&m,&m,&t3); | ||
121 | bn_set_high(&t3,&m,w2); | ||
122 | BN_add(r,&h,&t3); | ||
123 | #endif | ||
124 | |||
125 | #ifdef BN_MUL_HIGH_DEBUG | ||
126 | printf("carry=%d\n",borrow); | ||
127 | #endif | ||
128 | debug_BN_print(stdout,r," ret\n"); | ||
129 | BN_free(&t1); BN_free(&t2); | ||
130 | BN_free(&m); BN_free(&h); | ||
131 | return(1); | ||
132 | } | ||
133 | |||
134 | |||
135 | |||
diff --git a/src/lib/libcrypto/bn/old/bn_ka.c b/src/lib/libcrypto/bn/old/bn_ka.c new file mode 100644 index 0000000000..378c94dc5a --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_ka.c | |||
@@ -0,0 +1,567 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <strings.h> | ||
4 | #include "bn_lcl.h" | ||
5 | |||
6 | /* r is 2*n2 words in size, | ||
7 | * a and b are both n2 words in size. | ||
8 | * n2 must be a power of 2. | ||
9 | * We multiply and return the result. | ||
10 | * t must be 2*n2 words in size | ||
11 | * We calulate | ||
12 | * a[0]*b[0] | ||
13 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) | ||
14 | * a[1]*b[1] | ||
15 | */ | ||
16 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | ||
17 | BN_ULONG *t) | ||
18 | { | ||
19 | int n=n2/2; | ||
20 | int neg,zero,c1,c2; | ||
21 | BN_ULONG ln,lo,*p; | ||
22 | |||
23 | #ifdef BN_COUNT | ||
24 | printf(" bn_mul_recursive %d * %d\n",n2,n2); | ||
25 | #endif | ||
26 | if (n2 <= 8) | ||
27 | { | ||
28 | if (n2 == 8) | ||
29 | bn_mul_comba8(r,a,b); | ||
30 | else | ||
31 | bn_mul_normal(r,a,n2,b,n2); | ||
32 | return; | ||
33 | } | ||
34 | |||
35 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) | ||
36 | { | ||
37 | /* This should not happen */ | ||
38 | /*abort(); */ | ||
39 | bn_mul_normal(r,a,n2,b,n2); | ||
40 | return; | ||
41 | } | ||
42 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | ||
43 | c1=bn_cmp_words(a,&(a[n]),n); | ||
44 | c2=bn_cmp_words(&(b[n]),b,n); | ||
45 | zero=neg=0; | ||
46 | switch (c1*3+c2) | ||
47 | { | ||
48 | case -4: | ||
49 | bn_sub_words(t, &(a[n]),a, n); /* - */ | ||
50 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | ||
51 | break; | ||
52 | case -3: | ||
53 | zero=1; | ||
54 | break; | ||
55 | case -2: | ||
56 | bn_sub_words(t, &(a[n]),a, n); /* - */ | ||
57 | bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ | ||
58 | neg=1; | ||
59 | break; | ||
60 | case -1: | ||
61 | case 0: | ||
62 | case 1: | ||
63 | zero=1; | ||
64 | break; | ||
65 | case 2: | ||
66 | bn_sub_words(t, a, &(a[n]),n); /* + */ | ||
67 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | ||
68 | neg=1; | ||
69 | break; | ||
70 | case 3: | ||
71 | zero=1; | ||
72 | break; | ||
73 | case 4: | ||
74 | bn_sub_words(t, a, &(a[n]),n); | ||
75 | bn_sub_words(&(t[n]),&(b[n]),b, n); | ||
76 | break; | ||
77 | } | ||
78 | |||
79 | if (n == 8) | ||
80 | { | ||
81 | if (!zero) | ||
82 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | ||
83 | else | ||
84 | memset(&(t[n2]),0,8*sizeof(BN_ULONG)); | ||
85 | |||
86 | bn_mul_comba8(r,a,b); | ||
87 | bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n])); | ||
88 | } | ||
89 | else | ||
90 | { | ||
91 | p= &(t[n2*2]); | ||
92 | if (!zero) | ||
93 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); | ||
94 | else | ||
95 | memset(&(t[n2]),0,n*sizeof(BN_ULONG)); | ||
96 | bn_mul_recursive(r,a,b,n,p); | ||
97 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); | ||
98 | } | ||
99 | |||
100 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | ||
101 | * r[10] holds (a[0]*b[0]) | ||
102 | * r[32] holds (b[1]*b[1]) | ||
103 | */ | ||
104 | |||
105 | c1=bn_add_words(t,r,&(r[n2]),n2); | ||
106 | |||
107 | if (neg) /* if t[32] is negative */ | ||
108 | { | ||
109 | c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2); | ||
110 | } | ||
111 | else | ||
112 | { | ||
113 | /* Might have a carry */ | ||
114 | c1+=bn_add_words(&(t[n2]),&(t[n2]),t,n2); | ||
115 | } | ||
116 | |||
117 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) | ||
118 | * r[10] holds (a[0]*b[0]) | ||
119 | * r[32] holds (b[1]*b[1]) | ||
120 | * c1 holds the carry bits | ||
121 | */ | ||
122 | c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2); | ||
123 | if (c1) | ||
124 | { | ||
125 | p= &(r[n+n2]); | ||
126 | lo= *p; | ||
127 | ln=(lo+c1)&BN_MASK2; | ||
128 | *p=ln; | ||
129 | |||
130 | /* The overflow will stop before we over write | ||
131 | * words we should not overwrite */ | ||
132 | if (ln < c1) | ||
133 | { | ||
134 | do { | ||
135 | p++; | ||
136 | lo= *p; | ||
137 | ln=(lo+1)&BN_MASK2; | ||
138 | *p=ln; | ||
139 | } while (ln == 0); | ||
140 | } | ||
141 | } | ||
142 | } | ||
143 | |||
144 | /* n+tn is the word length | ||
145 | * t needs to be n*4 is size, as does r */ | ||
146 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, | ||
147 | int n, BN_ULONG *t) | ||
148 | { | ||
149 | int n2=n*2,i,j; | ||
150 | int c1; | ||
151 | BN_ULONG ln,lo,*p; | ||
152 | |||
153 | #ifdef BN_COUNT | ||
154 | printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); | ||
155 | #endif | ||
156 | if (n < 8) | ||
157 | { | ||
158 | i=tn+n; | ||
159 | bn_mul_normal(r,a,i,b,i); | ||
160 | return; | ||
161 | } | ||
162 | |||
163 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | ||
164 | bn_sub_words(t, a, &(a[n]),n); /* + */ | ||
165 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | ||
166 | |||
167 | if (n == 8) | ||
168 | { | ||
169 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | ||
170 | bn_mul_comba8(r,a,b); | ||
171 | bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); | ||
172 | memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); | ||
173 | } | ||
174 | else | ||
175 | { | ||
176 | p= &(t[n2*2]); | ||
177 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); | ||
178 | bn_mul_recursive(r,a,b,n,p); | ||
179 | i=n/2; | ||
180 | /* If there is only a bottom half to the number, | ||
181 | * just do it */ | ||
182 | j=tn-i; | ||
183 | if (j == 0) | ||
184 | { | ||
185 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); | ||
186 | memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); | ||
187 | } | ||
188 | else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ | ||
189 | { | ||
190 | bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), | ||
191 | j,i,p); | ||
192 | memset(&(r[n2+tn*2]),0, | ||
193 | sizeof(BN_ULONG)*(n2-tn*2)); | ||
194 | } | ||
195 | else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ | ||
196 | { | ||
197 | memset(&(r[n2]),0,sizeof(BN_ULONG)*(tn*2)); | ||
198 | for (;;) | ||
199 | { | ||
200 | i/=2; | ||
201 | if (i < tn) | ||
202 | { | ||
203 | bn_mul_part_recursive(&(r[n2]), | ||
204 | &(a[n]),&(b[n]), | ||
205 | tn-i,i,p); | ||
206 | break; | ||
207 | } | ||
208 | else if (i == tn) | ||
209 | { | ||
210 | bn_mul_recursive(&(r[n2]), | ||
211 | &(a[n]),&(b[n]), | ||
212 | i,p); | ||
213 | break; | ||
214 | } | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | |||
219 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | ||
220 | * r[10] holds (a[0]*b[0]) | ||
221 | * r[32] holds (b[1]*b[1]) | ||
222 | */ | ||
223 | |||
224 | c1=bn_add_words(t,r,&(r[n2]),n2); | ||
225 | c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2); | ||
226 | |||
227 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) | ||
228 | * r[10] holds (a[0]*b[0]) | ||
229 | * r[32] holds (b[1]*b[1]) | ||
230 | * c1 holds the carry bits | ||
231 | */ | ||
232 | c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2); | ||
233 | if (c1) | ||
234 | { | ||
235 | p= &(r[n+n2]); | ||
236 | lo= *p; | ||
237 | ln=(lo+c1)&BN_MASK2; | ||
238 | *p=ln; | ||
239 | |||
240 | /* The overflow will stop before we over write | ||
241 | * words we should not overwrite */ | ||
242 | if (ln < c1) | ||
243 | { | ||
244 | do { | ||
245 | p++; | ||
246 | lo= *p; | ||
247 | ln=(lo+1)&BN_MASK2; | ||
248 | *p=ln; | ||
249 | } while (ln == 0); | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | |||
254 | /* r is 2*n words in size, | ||
255 | * a and b are both n words in size. | ||
256 | * n must be a power of 2. | ||
257 | * We multiply and return the result. | ||
258 | * t must be 2*n words in size | ||
259 | * We calulate | ||
260 | * a[0]*b[0] | ||
261 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) | ||
262 | * a[1]*b[1] | ||
263 | */ | ||
264 | void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *t) | ||
265 | { | ||
266 | int n=n2/2; | ||
267 | int zero,c1; | ||
268 | BN_ULONG ln,lo,*p; | ||
269 | |||
270 | #ifdef BN_COUNT | ||
271 | printf(" bn_sqr_recursive %d * %d\n",n2,n2); | ||
272 | #endif | ||
273 | if (n2 == 4) | ||
274 | { | ||
275 | bn_sqr_comba4(r,a); | ||
276 | return; | ||
277 | } | ||
278 | else if (n2 == 8) | ||
279 | { | ||
280 | bn_sqr_comba8(r,a); | ||
281 | return; | ||
282 | } | ||
283 | if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) | ||
284 | { | ||
285 | bn_sqr_normal(r,a,n2,t); | ||
286 | return; | ||
287 | abort(); | ||
288 | } | ||
289 | /* r=(a[0]-a[1])*(a[1]-a[0]) */ | ||
290 | c1=bn_cmp_words(a,&(a[n]),n); | ||
291 | zero=0; | ||
292 | if (c1 > 0) | ||
293 | bn_sub_words(t,a,&(a[n]),n); | ||
294 | else if (c1 < 0) | ||
295 | bn_sub_words(t,&(a[n]),a,n); | ||
296 | else | ||
297 | zero=1; | ||
298 | |||
299 | /* The result will always be negative unless it is zero */ | ||
300 | |||
301 | if (n == 8) | ||
302 | { | ||
303 | if (!zero) | ||
304 | bn_sqr_comba8(&(t[n2]),t); | ||
305 | else | ||
306 | memset(&(t[n2]),0,8*sizeof(BN_ULONG)); | ||
307 | |||
308 | bn_sqr_comba8(r,a); | ||
309 | bn_sqr_comba8(&(r[n2]),&(a[n])); | ||
310 | } | ||
311 | else | ||
312 | { | ||
313 | p= &(t[n2*2]); | ||
314 | if (!zero) | ||
315 | bn_sqr_recursive(&(t[n2]),t,n,p); | ||
316 | else | ||
317 | memset(&(t[n2]),0,n*sizeof(BN_ULONG)); | ||
318 | bn_sqr_recursive(r,a,n,p); | ||
319 | bn_sqr_recursive(&(r[n2]),&(a[n]),n,p); | ||
320 | } | ||
321 | |||
322 | /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero | ||
323 | * r[10] holds (a[0]*b[0]) | ||
324 | * r[32] holds (b[1]*b[1]) | ||
325 | */ | ||
326 | |||
327 | c1=bn_add_words(t,r,&(r[n2]),n2); | ||
328 | |||
329 | /* t[32] is negative */ | ||
330 | c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2); | ||
331 | |||
332 | /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1]) | ||
333 | * r[10] holds (a[0]*a[0]) | ||
334 | * r[32] holds (a[1]*a[1]) | ||
335 | * c1 holds the carry bits | ||
336 | */ | ||
337 | c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2); | ||
338 | if (c1) | ||
339 | { | ||
340 | p= &(r[n+n2]); | ||
341 | lo= *p; | ||
342 | ln=(lo+c1)&BN_MASK2; | ||
343 | *p=ln; | ||
344 | |||
345 | /* The overflow will stop before we over write | ||
346 | * words we should not overwrite */ | ||
347 | if (ln < c1) | ||
348 | { | ||
349 | do { | ||
350 | p++; | ||
351 | lo= *p; | ||
352 | ln=(lo+1)&BN_MASK2; | ||
353 | *p=ln; | ||
354 | } while (ln == 0); | ||
355 | } | ||
356 | } | ||
357 | } | ||
358 | |||
359 | #if 1 | ||
360 | /* a and b must be the same size, which is n2. | ||
361 | * r needs to be n2 words and t needs to be n2*2 | ||
362 | */ | ||
363 | void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | ||
364 | BN_ULONG *t) | ||
365 | { | ||
366 | int n=n2/2; | ||
367 | |||
368 | #ifdef BN_COUNT | ||
369 | printf(" bn_mul_low_recursive %d * %d\n",n2,n2); | ||
370 | #endif | ||
371 | |||
372 | bn_mul_recursive(r,a,b,n,&(t[0])); | ||
373 | if (n > BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) | ||
374 | { | ||
375 | bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); | ||
376 | bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); | ||
377 | bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2])); | ||
378 | bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); | ||
379 | } | ||
380 | else | ||
381 | { | ||
382 | bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n); | ||
383 | bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n); | ||
384 | bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); | ||
385 | bn_add_words(&(r[n]),&(r[n]),&(t[n]),n); | ||
386 | } | ||
387 | } | ||
388 | |||
389 | /* a and b must be the same size, which is n2. | ||
390 | * r needs to be n2 words and t needs to be n2*2 | ||
391 | * l is the low words of the output. | ||
392 | * t needs to be n2*3 | ||
393 | */ | ||
394 | void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | ||
395 | BN_ULONG *t) | ||
396 | { | ||
397 | int j,i,n,c1,c2; | ||
398 | int neg,oneg,zero; | ||
399 | BN_ULONG ll,lc,*lp,*mp; | ||
400 | |||
401 | #ifdef BN_COUNT | ||
402 | printf(" bn_mul_high %d * %d\n",n2,n2); | ||
403 | #endif | ||
404 | n=(n2+1)/2; | ||
405 | |||
406 | /* Calculate (al-ah)*(bh-bl) */ | ||
407 | neg=zero=0; | ||
408 | c1=bn_cmp_words(&(a[0]),&(a[n]),n); | ||
409 | c2=bn_cmp_words(&(b[n]),&(b[0]),n); | ||
410 | switch (c1*3+c2) | ||
411 | { | ||
412 | case -4: | ||
413 | bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n); | ||
414 | bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n); | ||
415 | break; | ||
416 | case -3: | ||
417 | zero=1; | ||
418 | break; | ||
419 | case -2: | ||
420 | bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n); | ||
421 | bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n); | ||
422 | neg=1; | ||
423 | break; | ||
424 | case -1: | ||
425 | case 0: | ||
426 | case 1: | ||
427 | zero=1; | ||
428 | break; | ||
429 | case 2: | ||
430 | bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n); | ||
431 | bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n); | ||
432 | neg=1; | ||
433 | break; | ||
434 | case 3: | ||
435 | zero=1; | ||
436 | break; | ||
437 | case 4: | ||
438 | bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n); | ||
439 | bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n); | ||
440 | break; | ||
441 | } | ||
442 | |||
443 | oneg=neg; | ||
444 | /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ | ||
445 | bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); | ||
446 | /* r[10] = (a[1]*b[1]) */ | ||
447 | bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); | ||
448 | |||
449 | /* s0 == low(al*bl) | ||
450 | * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) | ||
451 | * We know s0 and s1 so the only unknown is high(al*bl) | ||
452 | * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) | ||
453 | * high(al*bl) == s1 - (r[0]+l[0]+t[0]) | ||
454 | */ | ||
455 | if (l != NULL) | ||
456 | { | ||
457 | lp= &(t[n2+n]); | ||
458 | c1=bn_add_words(lp,&(r[0]),&(l[0]),n); | ||
459 | } | ||
460 | else | ||
461 | { | ||
462 | c1=0; | ||
463 | lp= &(r[0]); | ||
464 | } | ||
465 | |||
466 | if (neg) | ||
467 | neg=bn_sub_words(&(t[n2]),lp,&(t[0]),n); | ||
468 | else | ||
469 | { | ||
470 | bn_add_words(&(t[n2]),lp,&(t[0]),n); | ||
471 | neg=0; | ||
472 | } | ||
473 | |||
474 | if (l != NULL) | ||
475 | { | ||
476 | bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n); | ||
477 | } | ||
478 | else | ||
479 | { | ||
480 | lp= &(t[n2+n]); | ||
481 | mp= &(t[n2]); | ||
482 | for (i=0; i<n; i++) | ||
483 | lp[i]=((~mp[i])+1)&BN_MASK2; | ||
484 | } | ||
485 | |||
486 | /* s[0] = low(al*bl) | ||
487 | * t[3] = high(al*bl) | ||
488 | * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign | ||
489 | * r[10] = (a[1]*b[1]) | ||
490 | */ | ||
491 | /* R[10] = al*bl | ||
492 | * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0]) | ||
493 | * R[32] = ah*bh | ||
494 | */ | ||
495 | /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow) | ||
496 | * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow) | ||
497 | * R[3]=r[1]+(carry/borrow) | ||
498 | */ | ||
499 | if (l != NULL) | ||
500 | { | ||
501 | lp= &(t[n2]); | ||
502 | c1= bn_add_words(lp,&(t[n2+n]),&(l[0]),n); | ||
503 | } | ||
504 | else | ||
505 | { | ||
506 | lp= &(t[n2+n]); | ||
507 | c1=0; | ||
508 | } | ||
509 | c1+=bn_add_words(&(t[n2]),lp, &(r[0]),n); | ||
510 | if (oneg) | ||
511 | c1-=bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n); | ||
512 | else | ||
513 | c1+=bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n); | ||
514 | |||
515 | c2 =bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n); | ||
516 | c2+=bn_add_words(&(r[0]),&(r[0]),&(r[n]),n); | ||
517 | if (oneg) | ||
518 | c2-=bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n); | ||
519 | else | ||
520 | c2+=bn_add_words(&(r[0]),&(r[0]),&(t[n]),n); | ||
521 | |||
522 | if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */ | ||
523 | { | ||
524 | i=0; | ||
525 | if (c1 > 0) | ||
526 | { | ||
527 | lc=c1; | ||
528 | do { | ||
529 | ll=(r[i]+lc)&BN_MASK2; | ||
530 | r[i++]=ll; | ||
531 | lc=(lc > ll); | ||
532 | } while (lc); | ||
533 | } | ||
534 | else | ||
535 | { | ||
536 | lc= -c1; | ||
537 | do { | ||
538 | ll=r[i]; | ||
539 | r[i++]=(ll-lc)&BN_MASK2; | ||
540 | lc=(lc > ll); | ||
541 | } while (lc); | ||
542 | } | ||
543 | } | ||
544 | if (c2 != 0) /* Add starting at r[1] */ | ||
545 | { | ||
546 | i=n; | ||
547 | if (c2 > 0) | ||
548 | { | ||
549 | lc=c2; | ||
550 | do { | ||
551 | ll=(r[i]+lc)&BN_MASK2; | ||
552 | r[i++]=ll; | ||
553 | lc=(lc > ll); | ||
554 | } while (lc); | ||
555 | } | ||
556 | else | ||
557 | { | ||
558 | lc= -c2; | ||
559 | do { | ||
560 | ll=r[i]; | ||
561 | r[i++]=(ll-lc)&BN_MASK2; | ||
562 | lc=(lc > ll); | ||
563 | } while (lc); | ||
564 | } | ||
565 | } | ||
566 | } | ||
567 | #endif | ||
diff --git a/src/lib/libcrypto/bn/old/bn_low.c b/src/lib/libcrypto/bn/old/bn_low.c new file mode 100644 index 0000000000..cbc406751c --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_low.c | |||
@@ -0,0 +1,194 @@ | |||
1 | /* crypto/bn/bn_mul.c */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | |||
59 | #include <stdio.h> | ||
60 | #include "cryptlib.h" | ||
61 | #include "bn_lcl.h" | ||
62 | |||
63 | static int bn_mm_low(BIGNUM *m,BIGNUM *A,BIGNUM *B, int num, | ||
64 | BIGNUM *sk,BN_CTX *ctx); | ||
65 | int BN_mul_low(BIGNUM *r, BIGNUM *a, BIGNUM *b,int words); | ||
66 | |||
67 | /* r must be different to a and b */ | ||
68 | int BN_mul_low(BIGNUM *r, BIGNUM *a, BIGNUM *b, int num) | ||
69 | { | ||
70 | BN_ULONG *ap,*bp,*rp; | ||
71 | BIGNUM *sk; | ||
72 | int j,i,n,ret; | ||
73 | int max,al,bl; | ||
74 | BN_CTX ctx; | ||
75 | |||
76 | bn_check_top(a); | ||
77 | bn_check_top(b); | ||
78 | |||
79 | #ifdef BN_MUL_DEBUG | ||
80 | printf("BN_mul_low(%d,%d,%d)\n",a->top,b->top,num); | ||
81 | #endif | ||
82 | |||
83 | al=a->top; | ||
84 | bl=b->top; | ||
85 | if ((al == 0) || (bl == 0)) | ||
86 | { | ||
87 | r->top=0; | ||
88 | return(1); | ||
89 | } | ||
90 | |||
91 | if ((bn_limit_bits_low > 0) && (num > bn_limit_num_low)) | ||
92 | { | ||
93 | n=BN_num_bits_word(num*2)-bn_limit_bits_low; | ||
94 | n*=2; | ||
95 | sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n); | ||
96 | memset(sk,0,sizeof(BIGNUM)*n); | ||
97 | memset(&ctx,0,sizeof(ctx)); | ||
98 | |||
99 | ret=bn_mm_low(r,a,b,num,&(sk[0]),&ctx); | ||
100 | for (i=0; i<n; i+=2) | ||
101 | { | ||
102 | BN_clear_free(&sk[i]); | ||
103 | BN_clear_free(&sk[i+1]); | ||
104 | } | ||
105 | Free(sk); | ||
106 | return(ret); | ||
107 | } | ||
108 | |||
109 | max=(al+bl); | ||
110 | if (bn_wexpand(r,max) == NULL) return(0); | ||
111 | r->neg=a->neg^b->neg; | ||
112 | ap=a->d; | ||
113 | bp=b->d; | ||
114 | rp=r->d; | ||
115 | r->top=(max > num)?num:max; | ||
116 | |||
117 | rp[al]=bn_mul_words(rp,ap,al,*(bp++)); | ||
118 | rp++; | ||
119 | j=bl; | ||
120 | for (i=1; i<j; i++) | ||
121 | { | ||
122 | if (al >= num--) | ||
123 | { | ||
124 | al--; | ||
125 | if (al <= 0) break; | ||
126 | } | ||
127 | rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); | ||
128 | rp++; | ||
129 | } | ||
130 | |||
131 | while ((r->top > 0) && (r->d[r->top-1] == 0)) | ||
132 | r->top--; | ||
133 | return(1); | ||
134 | } | ||
135 | |||
136 | |||
137 | #define t1 (sk[0]) | ||
138 | #define t2 (sk[1]) | ||
139 | |||
140 | /* r must be different to a and b */ | ||
141 | int bn_mm_low(BIGNUM *m, BIGNUM *A, BIGNUM *B, int num, BIGNUM *sk, | ||
142 | BN_CTX *ctx) | ||
143 | { | ||
144 | int n; /* ,sqr=0; */ | ||
145 | int an,bn; | ||
146 | BIGNUM ah,al,bh,bl; | ||
147 | |||
148 | bn_wexpand(m,num+3); | ||
149 | an=A->top; | ||
150 | bn=B->top; | ||
151 | |||
152 | #ifdef BN_MUL_DEBUG | ||
153 | printf("bn_mm_low(%d,%d,%d)\n",A->top,B->top,num); | ||
154 | #endif | ||
155 | |||
156 | n=(num+1)/2; | ||
157 | |||
158 | BN_init(&ah); BN_init(&al); BN_init(&bh); BN_init(&bl); | ||
159 | |||
160 | bn_set_low( &al,A,n); | ||
161 | bn_set_high(&ah,A,n); | ||
162 | bn_set_low( &bl,B,n); | ||
163 | bn_set_high(&bh,B,n); | ||
164 | |||
165 | if (num <= (bn_limit_num_low+bn_limit_num_low)) | ||
166 | { | ||
167 | BN_mul(m,&al,&bl); | ||
168 | BN_mul_low(&t1,&al,&bh,n); | ||
169 | BN_mul_low(&t2,&ah,&bl,n); | ||
170 | } | ||
171 | else | ||
172 | { | ||
173 | bn_mm(m ,&al,&bl,&(sk[2]),ctx); | ||
174 | bn_mm_low(&t1,&al,&bh,n,&(sk[2]),ctx); | ||
175 | bn_mm_low(&t2,&ah,&bl,n,&(sk[2]),ctx); | ||
176 | } | ||
177 | |||
178 | BN_add(&t1,&t1,&t2); | ||
179 | |||
180 | /* We will now do an evil hack instead of | ||
181 | * BN_lshift(&t1,&t1,n*BN_BITS2); | ||
182 | * BN_add(m,m,&t1); | ||
183 | * BN_mask_bits(m,num*BN_BITS2); | ||
184 | */ | ||
185 | bn_set_high(&ah,m,n); ah.max=num+2; | ||
186 | BN_add(&ah,&ah,&t1); | ||
187 | m->top=num; | ||
188 | |||
189 | m->neg=A->neg^B->neg; | ||
190 | return(1); | ||
191 | } | ||
192 | |||
193 | #undef t1 (sk[0]) | ||
194 | #undef t2 (sk[1]) | ||
diff --git a/src/lib/libcrypto/bn/old/bn_m.c b/src/lib/libcrypto/bn/old/bn_m.c new file mode 100644 index 0000000000..522beb02bc --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_m.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* crypto/bn/bn_m.c */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | |||
59 | #include <stdio.h> | ||
60 | /*#include "cryptlib.h"*/ | ||
61 | #include "bn_lcl.h" | ||
62 | |||
63 | #define limit_bits 5 /* 2^5, or 32 words */ | ||
64 | #define limit_num (1<<limit_bits) | ||
65 | |||
66 | int BN_m(BIGNUM *r, BIGNUM *a, BIGNUM *b) | ||
67 | { | ||
68 | BIGNUM *sk; | ||
69 | int i,n; | ||
70 | |||
71 | n=(BN_num_bits_word(a->top|b->top)-limit_bits); | ||
72 | n*=2; | ||
73 | sk=(BIGNUM *)malloc(sizeof(BIGNUM)*n); | ||
74 | for (i=0; i<n; i++) | ||
75 | BN_init(&(sk[i])); | ||
76 | |||
77 | return(BN_mm(r,a,b,&(sk[0]))); | ||
78 | } | ||
79 | |||
80 | #define ahal (sk[0]) | ||
81 | #define blbh (sk[1]) | ||
82 | |||
83 | /* r must be different to a and b */ | ||
84 | int BN_mm(BIGNUM *m, BIGNUM *A, BIGNUM *B, BIGNUM *sk) | ||
85 | { | ||
86 | int i,num,anum,bnum; | ||
87 | int an,bn; | ||
88 | BIGNUM ah,al,bh,bl; | ||
89 | |||
90 | an=A->top; | ||
91 | bn=B->top; | ||
92 | if ((an <= limit_num) || (bn <= limit_num)) | ||
93 | { | ||
94 | return(BN_mul(m,A,B)); | ||
95 | } | ||
96 | |||
97 | anum=(an>bn)?an:bn; | ||
98 | num=(anum)/2; | ||
99 | |||
100 | /* Are going to now chop things into 'num' word chunks. */ | ||
101 | bnum=num*BN_BITS2; | ||
102 | |||
103 | BN_init(&ahal); | ||
104 | BN_init(&blbh); | ||
105 | BN_init(&ah); | ||
106 | BN_init(&al); | ||
107 | BN_init(&bh); | ||
108 | BN_init(&bl); | ||
109 | |||
110 | al.top=num; | ||
111 | al.d=A->d; | ||
112 | ah.top=A->top-num; | ||
113 | ah.d= &(A->d[num]); | ||
114 | |||
115 | bl.top=num; | ||
116 | bl.d=B->d; | ||
117 | bh.top=B->top-num; | ||
118 | bh.d= &(B->d[num]); | ||
119 | |||
120 | BN_sub(&ahal,&ah,&al); | ||
121 | BN_sub(&blbh,&bl,&bh); | ||
122 | |||
123 | BN_mm(m,&ahal,&blbh,&(sk[2])); | ||
124 | BN_mm(&ahal,&al,&bl,&(sk[2])); | ||
125 | BN_mm(&blbh,&ah,&bh,&(sk[2])); | ||
126 | |||
127 | BN_add(m,m,&ahal); | ||
128 | BN_add(m,m,&blbh); | ||
129 | |||
130 | BN_lshift(m,m,bnum); | ||
131 | BN_add(m,m,&ahal); | ||
132 | |||
133 | BN_lshift(&blbh,&blbh,bnum*2); | ||
134 | BN_add(m,m,&blbh); | ||
135 | |||
136 | m->neg=A->neg^B->neg; | ||
137 | return(1); | ||
138 | } | ||
139 | |||
diff --git a/src/lib/libcrypto/bn/old/bn_mul.c.works b/src/lib/libcrypto/bn/old/bn_mul.c.works new file mode 100644 index 0000000000..6d565d44a2 --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_mul.c.works | |||
@@ -0,0 +1,219 @@ | |||
1 | /* crypto/bn/bn_mul.c */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | |||
59 | #include <stdio.h> | ||
60 | #include "cryptlib.h" | ||
61 | #include "bn_lcl.h" | ||
62 | |||
63 | int bn_mm(BIGNUM *m,BIGNUM *A,BIGNUM *B, BIGNUM *sk,BN_CTX *ctx); | ||
64 | |||
65 | /* r must be different to a and b */ | ||
66 | int BN_mul(r, a, b) | ||
67 | BIGNUM *r; | ||
68 | BIGNUM *a; | ||
69 | BIGNUM *b; | ||
70 | { | ||
71 | BN_ULONG *ap,*bp,*rp; | ||
72 | BIGNUM *sk; | ||
73 | int i,n,ret; | ||
74 | int max,al,bl; | ||
75 | BN_CTX ctx; | ||
76 | |||
77 | bn_check_top(a); | ||
78 | bn_check_top(b); | ||
79 | |||
80 | al=a->top; | ||
81 | bl=b->top; | ||
82 | if ((al == 0) || (bl == 0)) | ||
83 | { | ||
84 | r->top=0; | ||
85 | return(1); | ||
86 | } | ||
87 | #ifdef BN_MUL_DEBUG | ||
88 | printf("BN_mul(%d,%d)\n",a->top,b->top); | ||
89 | #endif | ||
90 | |||
91 | #ifdef BN_RECURSION | ||
92 | if ( (bn_limit_bits > 0) && | ||
93 | (bl > bn_limit_num) && (al > bn_limit_num)) | ||
94 | { | ||
95 | n=(BN_num_bits_word(al|bl)-bn_limit_bits); | ||
96 | n*=2; | ||
97 | sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n); | ||
98 | memset(sk,0,sizeof(BIGNUM)*n); | ||
99 | memset(&ctx,0,sizeof(ctx)); | ||
100 | |||
101 | ret=bn_mm(r,a,b,&(sk[0]),&ctx); | ||
102 | for (i=0; i<n; i+=2) | ||
103 | { | ||
104 | BN_clear_free(&sk[i]); | ||
105 | BN_clear_free(&sk[i+1]); | ||
106 | } | ||
107 | Free(sk); | ||
108 | return(ret); | ||
109 | } | ||
110 | #endif | ||
111 | |||
112 | max=(al+bl); | ||
113 | if (bn_wexpand(r,max) == NULL) return(0); | ||
114 | r->top=max; | ||
115 | r->neg=a->neg^b->neg; | ||
116 | ap=a->d; | ||
117 | bp=b->d; | ||
118 | rp=r->d; | ||
119 | |||
120 | #ifdef BN_RECURSION | ||
121 | if ((al == bl) && (al == 8)) | ||
122 | { | ||
123 | bn_mul_comba8(rp,ap,bp); | ||
124 | } | ||
125 | else | ||
126 | #endif | ||
127 | { | ||
128 | rp[al]=bn_mul_words(rp,ap,al,*(bp++)); | ||
129 | rp++; | ||
130 | for (i=1; i<bl; i++) | ||
131 | { | ||
132 | rp[al]=bn_mul_add_words(rp,ap,al,*(bp++)); | ||
133 | rp++; | ||
134 | } | ||
135 | } | ||
136 | if ((max > 0) && (r->d[max-1] == 0)) r->top--; | ||
137 | return(1); | ||
138 | } | ||
139 | |||
140 | #ifdef BN_RECURSION | ||
141 | |||
142 | #define ahal (sk[0]) | ||
143 | #define blbh (sk[1]) | ||
144 | |||
145 | /* r must be different to a and b */ | ||
146 | int bn_mm(m, A, B, sk,ctx) | ||
147 | BIGNUM *m,*A,*B; | ||
148 | BIGNUM *sk; | ||
149 | BN_CTX *ctx; | ||
150 | { | ||
151 | int n,num,sqr=0; | ||
152 | int an,bn; | ||
153 | BIGNUM ah,al,bh,bl; | ||
154 | |||
155 | an=A->top; | ||
156 | bn=B->top; | ||
157 | #ifdef BN_MUL_DEBUG | ||
158 | printf("bn_mm(%d,%d)\n",A->top,B->top); | ||
159 | #endif | ||
160 | |||
161 | if (A == B) sqr=1; | ||
162 | num=(an>bn)?an:bn; | ||
163 | n=(num+1)/2; | ||
164 | /* Are going to now chop things into 'num' word chunks. */ | ||
165 | |||
166 | BN_init(&ah); | ||
167 | BN_init(&al); | ||
168 | BN_init(&bh); | ||
169 | BN_init(&bl); | ||
170 | |||
171 | bn_set_low (&al,A,n); | ||
172 | bn_set_high(&ah,A,n); | ||
173 | bn_set_low (&bl,B,n); | ||
174 | bn_set_high(&bh,B,n); | ||
175 | |||
176 | BN_sub(&ahal,&ah,&al); | ||
177 | BN_sub(&blbh,&bl,&bh); | ||
178 | |||
179 | if (num <= (bn_limit_num+bn_limit_num)) | ||
180 | { | ||
181 | BN_mul(m,&ahal,&blbh); | ||
182 | if (sqr) | ||
183 | { | ||
184 | BN_sqr(&ahal,&al,ctx); | ||
185 | BN_sqr(&blbh,&ah,ctx); | ||
186 | } | ||
187 | else | ||
188 | { | ||
189 | BN_mul(&ahal,&al,&bl); | ||
190 | BN_mul(&blbh,&ah,&bh); | ||
191 | } | ||
192 | } | ||
193 | else | ||
194 | { | ||
195 | bn_mm(m,&ahal,&blbh,&(sk[2]),ctx); | ||
196 | bn_mm(&ahal,&al,&bl,&(sk[2]),ctx); | ||
197 | bn_mm(&blbh,&ah,&bh,&(sk[2]),ctx); | ||
198 | } | ||
199 | |||
200 | BN_add(m,m,&ahal); | ||
201 | BN_add(m,m,&blbh); | ||
202 | |||
203 | BN_lshift(m,m,n*BN_BITS2); | ||
204 | BN_lshift(&blbh,&blbh,n*BN_BITS2*2); | ||
205 | |||
206 | BN_add(m,m,&ahal); | ||
207 | BN_add(m,m,&blbh); | ||
208 | |||
209 | m->neg=A->neg^B->neg; | ||
210 | return(1); | ||
211 | } | ||
212 | #undef ahal (sk[0]) | ||
213 | #undef blbh (sk[1]) | ||
214 | |||
215 | #include "bn_low.c" | ||
216 | #include "bn_high.c" | ||
217 | #include "f.c" | ||
218 | |||
219 | #endif | ||
diff --git a/src/lib/libcrypto/bn/old/bn_wmul.c b/src/lib/libcrypto/bn/old/bn_wmul.c new file mode 100644 index 0000000000..a467b2f17a --- /dev/null +++ b/src/lib/libcrypto/bn/old/bn_wmul.c | |||
@@ -0,0 +1,173 @@ | |||
1 | #include <stdio.h> | ||
2 | #include "bn_lcl.h" | ||
3 | |||
4 | #if 1 | ||
5 | |||
6 | int bn_mull(BIGNUM *r,BIGNUM *a,BIGNUM *b, BN_CTX *ctx); | ||
7 | |||
8 | int bn_mull(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx) | ||
9 | { | ||
10 | int top,i,j,k,al,bl; | ||
11 | BIGNUM *t; | ||
12 | |||
13 | #ifdef BN_COUNT | ||
14 | printf("bn_mull %d * %d\n",a->top,b->top); | ||
15 | #endif | ||
16 | |||
17 | bn_check_top(a); | ||
18 | bn_check_top(b); | ||
19 | bn_check_top(r); | ||
20 | |||
21 | al=a->top; | ||
22 | bl=b->top; | ||
23 | r->neg=a->neg^b->neg; | ||
24 | |||
25 | top=al+bl; | ||
26 | if ((al < 4) || (bl < 4)) | ||
27 | { | ||
28 | if (bn_wexpand(r,top) == NULL) return(0); | ||
29 | r->top=top; | ||
30 | bn_mul_normal(r->d,a->d,al,b->d,bl); | ||
31 | goto end; | ||
32 | } | ||
33 | else if (al == bl) /* A good start, they are the same size */ | ||
34 | goto symetric; | ||
35 | else | ||
36 | { | ||
37 | i=(al-bl); | ||
38 | if ((i == 1) && !BN_get_flags(b,BN_FLG_STATIC_DATA)) | ||
39 | { | ||
40 | bn_wexpand(b,al); | ||
41 | b->d[bl]=0; | ||
42 | bl++; | ||
43 | goto symetric; | ||
44 | } | ||
45 | else if ((i == -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA)) | ||
46 | { | ||
47 | bn_wexpand(a,bl); | ||
48 | a->d[al]=0; | ||
49 | al++; | ||
50 | goto symetric; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* asymetric and >= 4 */ | ||
55 | if (bn_wexpand(r,top) == NULL) return(0); | ||
56 | r->top=top; | ||
57 | bn_mul_normal(r->d,a->d,al,b->d,bl); | ||
58 | |||
59 | if (0) | ||
60 | { | ||
61 | /* symetric and > 4 */ | ||
62 | symetric: | ||
63 | if (al == 4) | ||
64 | { | ||
65 | if (bn_wexpand(r,al*2) == NULL) return(0); | ||
66 | r->top=top; | ||
67 | bn_mul_comba4(r->d,a->d,b->d); | ||
68 | goto end; | ||
69 | } | ||
70 | if (al == 8) | ||
71 | { | ||
72 | if (bn_wexpand(r,al*2) == NULL) return(0); | ||
73 | r->top=top; | ||
74 | bn_mul_comba8(r->d,a->d,b->d); | ||
75 | goto end; | ||
76 | } | ||
77 | if (al <= BN_MULL_NORMAL_SIZE) | ||
78 | { | ||
79 | if (bn_wexpand(r,al*2) == NULL) return(0); | ||
80 | r->top=top; | ||
81 | bn_mul_normal(r->d,a->d,al,b->d,bl); | ||
82 | goto end; | ||
83 | } | ||
84 | /* 16 or larger */ | ||
85 | j=BN_num_bits_word((BN_ULONG)al); | ||
86 | j=1<<(j-1); | ||
87 | k=j+j; | ||
88 | t= &(ctx->bn[ctx->tos]); | ||
89 | if (al == j) /* exact multiple */ | ||
90 | { | ||
91 | bn_wexpand(t,k*2); | ||
92 | bn_wexpand(r,k*2); | ||
93 | bn_mul_recursive(r->d,a->d,b->d,al,t->d); | ||
94 | } | ||
95 | else | ||
96 | { | ||
97 | bn_wexpand(a,k); | ||
98 | bn_wexpand(b,k); | ||
99 | bn_wexpand(t,k*4); | ||
100 | bn_wexpand(r,k*4); | ||
101 | for (i=a->top; i<k; i++) | ||
102 | a->d[i]=0; | ||
103 | for (i=b->top; i<k; i++) | ||
104 | b->d[i]=0; | ||
105 | bn_mul_part_recursive(r->d,a->d,b->d,al-j,j,t->d); | ||
106 | } | ||
107 | r->top=top; | ||
108 | } | ||
109 | end: | ||
110 | bn_fix_top(r); | ||
111 | return(1); | ||
112 | } | ||
113 | #endif | ||
114 | |||
115 | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | ||
116 | { | ||
117 | BN_ULONG *rr; | ||
118 | |||
119 | #ifdef BN_COUNT | ||
120 | printf(" bn_mul_normal %d * %d\n",na,nb); | ||
121 | #endif | ||
122 | |||
123 | if (na < nb) | ||
124 | { | ||
125 | int itmp; | ||
126 | BN_ULONG *ltmp; | ||
127 | |||
128 | itmp=na; na=nb; nb=itmp; | ||
129 | ltmp=a; a=b; b=ltmp; | ||
130 | |||
131 | } | ||
132 | rr= &(r[na]); | ||
133 | rr[0]=bn_mul_words(r,a,na,b[0]); | ||
134 | |||
135 | for (;;) | ||
136 | { | ||
137 | if (--nb <= 0) return; | ||
138 | rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]); | ||
139 | if (--nb <= 0) return; | ||
140 | rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]); | ||
141 | if (--nb <= 0) return; | ||
142 | rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]); | ||
143 | if (--nb <= 0) return; | ||
144 | rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]); | ||
145 | rr+=4; | ||
146 | r+=4; | ||
147 | b+=4; | ||
148 | } | ||
149 | } | ||
150 | |||
151 | #if 1 | ||
152 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | ||
153 | { | ||
154 | #ifdef BN_COUNT | ||
155 | printf(" bn_mul_low_normal %d * %d\n",n,n); | ||
156 | #endif | ||
157 | bn_mul_words(r,a,n,b[0]); | ||
158 | |||
159 | for (;;) | ||
160 | { | ||
161 | if (--n <= 0) return; | ||
162 | bn_mul_add_words(&(r[1]),a,n,b[1]); | ||
163 | if (--n <= 0) return; | ||
164 | bn_mul_add_words(&(r[2]),a,n,b[2]); | ||
165 | if (--n <= 0) return; | ||
166 | bn_mul_add_words(&(r[3]),a,n,b[3]); | ||
167 | if (--n <= 0) return; | ||
168 | bn_mul_add_words(&(r[4]),a,n,b[4]); | ||
169 | r+=4; | ||
170 | b+=4; | ||
171 | } | ||
172 | } | ||
173 | #endif | ||
diff --git a/src/lib/libcrypto/bn/old/build b/src/lib/libcrypto/bn/old/build new file mode 100644 index 0000000000..8cd99e5f17 --- /dev/null +++ b/src/lib/libcrypto/bn/old/build | |||
@@ -0,0 +1,3 @@ | |||
1 | #!/bin/sh -x | ||
2 | |||
3 | gcc -g -I../../include test.c -L../.. -lcrypto | ||
diff --git a/src/lib/libcrypto/bn/old/info b/src/lib/libcrypto/bn/old/info new file mode 100644 index 0000000000..5ac99c3b23 --- /dev/null +++ b/src/lib/libcrypto/bn/old/info | |||
@@ -0,0 +1,22 @@ | |||
1 | Given A1A0 * B1B0 == S3S2S1S0 | ||
2 | |||
3 | S0= low(A0*B0) | ||
4 | S1= low( (A1-A0)*(B0-B1)) +low( A1*B1) +high(A0*B0) | ||
5 | S2= high((A1-A0)*(B0-B1)) +high(A1*B1) +low( A1*B1) | ||
6 | S3= high(A1*B1); | ||
7 | |||
8 | Assume we know S1 and S0, and can calulate A1*B1 and high((A1-A0)*(B0-B1)) | ||
9 | |||
10 | k0= S0 == low(A0*B0) | ||
11 | k1= S1 | ||
12 | k2= low( A1*B1) | ||
13 | k3= high(A1*B1) | ||
14 | k4= high((A1-A0)*(B0-B1)) | ||
15 | |||
16 | k1= low((A1-A0)*(B0-B1)) +k2 +high(A0*B0) | ||
17 | S2= k4 +k3 +k2 | ||
18 | S3= k3 | ||
19 | |||
20 | S1-k2= low((A1-A0)*(B0-B1)) +high(A0*B0) | ||
21 | |||
22 | We potentially have a carry or a borrow from S1 | ||
diff --git a/src/lib/libcrypto/bn/old/test.works b/src/lib/libcrypto/bn/old/test.works new file mode 100644 index 0000000000..127c7b415d --- /dev/null +++ b/src/lib/libcrypto/bn/old/test.works | |||
@@ -0,0 +1,205 @@ | |||
1 | #include <stdio.h> | ||
2 | #include "cryptlib.h" | ||
3 | #include "bn_lcl.h" | ||
4 | |||
5 | #define SIZE 128 | ||
6 | |||
7 | #define BN_MONT_CTX_set bn_mcs | ||
8 | #define BN_from_montgomery bn_fm | ||
9 | #define BN_mod_mul_montgomery bn_mmm | ||
10 | #undef BN_to_montgomery | ||
11 | #define BN_to_montgomery(r,a,mont,ctx) bn_mmm(\ | ||
12 | r,a,(mont)->RR,(mont),ctx) | ||
13 | |||
14 | main() | ||
15 | { | ||
16 | BIGNUM prime,a,b,r,A,B,R; | ||
17 | BN_MONT_CTX *mont; | ||
18 | BN_CTX *ctx; | ||
19 | int i; | ||
20 | |||
21 | ctx=BN_CTX_new(); | ||
22 | BN_init(&prime); | ||
23 | BN_init(&a); BN_init(&b); BN_init(&r); | ||
24 | BN_init(&A); BN_init(&B); BN_init(&R); | ||
25 | |||
26 | BN_generate_prime(&prime,SIZE,0,NULL,NULL,NULL,NULL); | ||
27 | BN_rand(&A,SIZE,1,0); | ||
28 | BN_rand(&B,SIZE,1,0); | ||
29 | BN_mod(&A,&A,&prime,ctx); | ||
30 | BN_mod(&B,&B,&prime,ctx); | ||
31 | |||
32 | mont=BN_MONT_CTX_new(); | ||
33 | BN_MONT_CTX_set(mont,&prime,ctx); | ||
34 | |||
35 | BN_to_montgomery(&a,&A,mont,ctx); | ||
36 | BN_to_montgomery(&b,&B,mont,ctx); | ||
37 | |||
38 | BN_mul(&r,&a,&b); | ||
39 | BN_print_fp(stdout,&r); printf("\n"); | ||
40 | BN_from_montgomery(&r,&r,mont,ctx); | ||
41 | BN_print_fp(stdout,&r); printf("\n"); | ||
42 | BN_from_montgomery(&r,&r,mont,ctx); | ||
43 | BN_print_fp(stdout,&r); printf("\n"); | ||
44 | |||
45 | BN_mod_mul(&R,&A,&B,&prime,ctx); | ||
46 | |||
47 | BN_print_fp(stdout,&a); printf("\n"); | ||
48 | BN_print_fp(stdout,&b); printf("\n"); | ||
49 | BN_print_fp(stdout,&prime); printf("\n"); | ||
50 | BN_print_fp(stdout,&r); printf("\n\n"); | ||
51 | |||
52 | BN_print_fp(stdout,&A); printf("\n"); | ||
53 | BN_print_fp(stdout,&B); printf("\n"); | ||
54 | BN_print_fp(stdout,&prime); printf("\n"); | ||
55 | BN_print_fp(stdout,&R); printf("\n\n"); | ||
56 | |||
57 | BN_mul(&r,&a,&b); | ||
58 | BN_print_fp(stdout,&r); printf(" <- BA*DC\n"); | ||
59 | BN_copy(&A,&r); | ||
60 | i=SIZE/2; | ||
61 | BN_mask_bits(&A,i*2); | ||
62 | // BN_print_fp(stdout,&A); printf(" <- low(BA*DC)\n"); | ||
63 | bn_do_lower(&r,&a,&b,&A,i); | ||
64 | // BN_print_fp(stdout,&r); printf(" <- low(BA*DC)\n"); | ||
65 | } | ||
66 | |||
67 | int bn_mul_low(r,a,b,low,i) | ||
68 | BIGNUM *r,*a,*b,*low; | ||
69 | int i; | ||
70 | { | ||
71 | int w; | ||
72 | BIGNUM Kh,Km,t1,t2,h,ah,al,bh,bl,l,m,s0,s1; | ||
73 | |||
74 | BN_init(&Kh); BN_init(&Km); BN_init(&t1); BN_init(&t2); BN_init(&l); | ||
75 | BN_init(&ah); BN_init(&al); BN_init(&bh); BN_init(&bl); BN_init(&h); | ||
76 | BN_init(&m); BN_init(&s0); BN_init(&s1); | ||
77 | |||
78 | BN_copy(&al,a); BN_mask_bits(&al,i); BN_rshift(&ah,a,i); | ||
79 | BN_copy(&bl,b); BN_mask_bits(&bl,i); BN_rshift(&bh,b,i); | ||
80 | |||
81 | |||
82 | BN_sub(&t1,&al,&ah); | ||
83 | BN_sub(&t2,&bh,&bl); | ||
84 | BN_mul(&m,&t1,&t2); | ||
85 | BN_mul(&h,&ah,&bh); | ||
86 | |||
87 | BN_copy(&s0,low); BN_mask_bits(&s0,i); | ||
88 | BN_rshift(&s1,low,i); | ||
89 | |||
90 | BN_add(&t1,&h,&m); | ||
91 | BN_add(&t1,&t1,&s0); | ||
92 | |||
93 | BN_copy(&t2,&t1); BN_mask_bits(&t2,i); | ||
94 | BN_sub(&t1,&s1,&t2); | ||
95 | BN_lshift(&t1,&t1,i); | ||
96 | BN_add(&t1,&t1,&s0); | ||
97 | if (t1.neg) | ||
98 | { | ||
99 | BN_lshift(&t2,BN_value_one(),i*2); | ||
100 | BN_add(&t1,&t2,&t1); | ||
101 | BN_mask_bits(&t1,i*2); | ||
102 | } | ||
103 | |||
104 | BN_free(&Kh); BN_free(&Km); BN_free(&t1); BN_free(&t2); | ||
105 | BN_free(&ah); BN_free(&al); BN_free(&bh); BN_free(&bl); | ||
106 | } | ||
107 | |||
108 | int BN_mod_mul_montgomery(r,a,b,mont,ctx) | ||
109 | BIGNUM *r,*a,*b; | ||
110 | BN_MONT_CTX *mont; | ||
111 | BN_CTX *ctx; | ||
112 | { | ||
113 | BIGNUM *tmp; | ||
114 | |||
115 | tmp= &(ctx->bn[ctx->tos++]); | ||
116 | |||
117 | if (a == b) | ||
118 | { | ||
119 | if (!BN_sqr(tmp,a,ctx)) goto err; | ||
120 | } | ||
121 | else | ||
122 | { | ||
123 | if (!BN_mul(tmp,a,b)) goto err; | ||
124 | } | ||
125 | /* reduce from aRR to aR */ | ||
126 | if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; | ||
127 | ctx->tos--; | ||
128 | return(1); | ||
129 | err: | ||
130 | return(0); | ||
131 | } | ||
132 | |||
133 | int BN_from_montgomery(r,a,mont,ctx) | ||
134 | BIGNUM *r; | ||
135 | BIGNUM *a; | ||
136 | BN_MONT_CTX *mont; | ||
137 | BN_CTX *ctx; | ||
138 | { | ||
139 | BIGNUM z1; | ||
140 | BIGNUM *t1,*t2; | ||
141 | BN_ULONG *ap,*bp,*rp; | ||
142 | int j,i,bl,al; | ||
143 | |||
144 | BN_init(&z1); | ||
145 | t1= &(ctx->bn[ctx->tos]); | ||
146 | t2= &(ctx->bn[ctx->tos+1]); | ||
147 | |||
148 | if (!BN_copy(t1,a)) goto err; | ||
149 | /* can cheat */ | ||
150 | BN_mask_bits(t1,mont->ri); | ||
151 | if (!BN_mul(t2,t1,mont->Ni)) goto err; | ||
152 | BN_mask_bits(t2,mont->ri); | ||
153 | |||
154 | if (!BN_mul(t1,t2,mont->N)) goto err; | ||
155 | if (!BN_add(t2,t1,a)) goto err; | ||
156 | |||
157 | /* At this point, t2 has the bottom ri bits set to zero. | ||
158 | * This means that the bottom ri bits == the 1^ri minus the bottom | ||
159 | * ri bits of a. | ||
160 | * This means that only the bits above 'ri' in a need to be added, | ||
161 | * and XXXXXXXXXXXXXXXXXXXXXXXX | ||
162 | */ | ||
163 | BN_print_fp(stdout,t2); printf("\n"); | ||
164 | BN_rshift(r,t2,mont->ri); | ||
165 | |||
166 | if (BN_ucmp(r,mont->N) >= 0) | ||
167 | bn_qsub(r,r,mont->N); | ||
168 | |||
169 | return(1); | ||
170 | err: | ||
171 | return(0); | ||
172 | } | ||
173 | |||
174 | int BN_MONT_CTX_set(mont,mod,ctx) | ||
175 | BN_MONT_CTX *mont; | ||
176 | BIGNUM *mod; | ||
177 | BN_CTX *ctx; | ||
178 | { | ||
179 | BIGNUM *Ri=NULL,*R=NULL; | ||
180 | |||
181 | if (mont->RR == NULL) mont->RR=BN_new(); | ||
182 | if (mont->N == NULL) mont->N=BN_new(); | ||
183 | |||
184 | R=mont->RR; /* grab RR as a temp */ | ||
185 | BN_copy(mont->N,mod); /* Set N */ | ||
186 | |||
187 | mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; | ||
188 | BN_lshift(R,BN_value_one(),mont->ri); /* R */ | ||
189 | if ((Ri=BN_mod_inverse(NULL,R,mod,ctx)) == NULL) goto err;/* Ri */ | ||
190 | BN_lshift(Ri,Ri,mont->ri); /* R*Ri */ | ||
191 | bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */ | ||
192 | BN_div(Ri,NULL,Ri,mod,ctx); | ||
193 | if (mont->Ni != NULL) BN_free(mont->Ni); | ||
194 | mont->Ni=Ri; /* Ni=(R*Ri-1)/N */ | ||
195 | |||
196 | /* setup RR for conversions */ | ||
197 | BN_lshift(mont->RR,BN_value_one(),mont->ri*2); | ||
198 | BN_mod(mont->RR,mont->RR,mont->N,ctx); | ||
199 | |||
200 | return(1); | ||
201 | err: | ||
202 | return(0); | ||
203 | } | ||
204 | |||
205 | |||