summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl384
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl286
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2.s416
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8.S1458
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8plus.S1535
-rw-r--r--src/lib/libcrypto/bn/asm/x86.pl28
-rw-r--r--src/lib/libcrypto/bn/asm/x86/add.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl277
-rw-r--r--src/lib/libcrypto/bn/asm/x86/div.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul.pl77
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul_add.pl87
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sqr.pl60
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sub.pl76
13 files changed, 0 insertions, 4775 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
deleted file mode 100644
index 5191bed273..0000000000
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ /dev/null
@@ -1,384 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_add_words("bn_mul_add_words");
9&bn_mul_words("bn_mul_words");
10&bn_sqr_words("bn_sqr_words");
11&bn_div_words("bn_div_words");
12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words");
14
15&asm_finish();
16
17sub bn_mul_add_words
18 {
19 local($name)=@_;
20
21 &function_begin($name,"");
22
23 &comment("");
24 $Low="eax";
25 $High="edx";
26 $a="ebx";
27 $w="ebp";
28 $r="edi";
29 $c="esi";
30
31 &xor($c,$c); # clear carry
32 &mov($r,&wparam(0)); #
33
34 &mov("ecx",&wparam(2)); #
35 &mov($a,&wparam(1)); #
36
37 &and("ecx",0xfffffff8); # num / 8
38 &mov($w,&wparam(3)); #
39
40 &push("ecx"); # Up the stack for a tmp variable
41
42 &jz(&label("maw_finish"));
43
44 &set_label("maw_loop",0);
45
46 &mov(&swtmp(0),"ecx"); #
47
48 for ($i=0; $i<32; $i+=4)
49 {
50 &comment("Round $i");
51
52 &mov("eax",&DWP($i,$a,"",0)); # *a
53 &mul($w); # *a * w
54 &add("eax",$c); # L(t)+= *r
55 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
56 &adc("edx",0); # H(t)+=carry
57 &add("eax",$c); # L(t)+=c
58 &adc("edx",0); # H(t)+=carry
59 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
60 &mov($c,"edx"); # c= H(t);
61 }
62
63 &comment("");
64 &mov("ecx",&swtmp(0)); #
65 &add($a,32);
66 &add($r,32);
67 &sub("ecx",8);
68 &jnz(&label("maw_loop"));
69
70 &set_label("maw_finish",0);
71 &mov("ecx",&wparam(2)); # get num
72 &and("ecx",7);
73 &jnz(&label("maw_finish2")); # helps branch prediction
74 &jmp(&label("maw_end"));
75
76 &set_label("maw_finish2",1);
77 for ($i=0; $i<7; $i++)
78 {
79 &comment("Tail Round $i");
80 &mov("eax",&DWP($i*4,$a,"",0));# *a
81 &mul($w); # *a * w
82 &add("eax",$c); # L(t)+=c
83 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
84 &adc("edx",0); # H(t)+=carry
85 &add("eax",$c);
86 &adc("edx",0); # H(t)+=carry
87 &dec("ecx") if ($i != 7-1);
88 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
89 &mov($c,"edx"); # c= H(t);
90 &jz(&label("maw_end")) if ($i != 7-1);
91 }
92 &set_label("maw_end",0);
93 &mov("eax",$c);
94
95 &pop("ecx"); # clear variable from
96
97 &function_end($name);
98 }
99
100sub bn_mul_words
101 {
102 local($name)=@_;
103
104 &function_begin($name,"");
105
106 &comment("");
107 $Low="eax";
108 $High="edx";
109 $a="ebx";
110 $w="ecx";
111 $r="edi";
112 $c="esi";
113 $num="ebp";
114
115 &xor($c,$c); # clear carry
116 &mov($r,&wparam(0)); #
117 &mov($a,&wparam(1)); #
118 &mov($num,&wparam(2)); #
119 &mov($w,&wparam(3)); #
120
121 &and($num,0xfffffff8); # num / 8
122 &jz(&label("mw_finish"));
123
124 &set_label("mw_loop",0);
125 for ($i=0; $i<32; $i+=4)
126 {
127 &comment("Round $i");
128
129 &mov("eax",&DWP($i,$a,"",0)); # *a
130 &mul($w); # *a * w
131 &add("eax",$c); # L(t)+=c
132 # XXX
133
134 &adc("edx",0); # H(t)+=carry
135 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
136
137 &mov($c,"edx"); # c= H(t);
138 }
139
140 &comment("");
141 &add($a,32);
142 &add($r,32);
143 &sub($num,8);
144 &jz(&label("mw_finish"));
145 &jmp(&label("mw_loop"));
146
147 &set_label("mw_finish",0);
148 &mov($num,&wparam(2)); # get num
149 &and($num,7);
150 &jnz(&label("mw_finish2"));
151 &jmp(&label("mw_end"));
152
153 &set_label("mw_finish2",1);
154 for ($i=0; $i<7; $i++)
155 {
156 &comment("Tail Round $i");
157 &mov("eax",&DWP($i*4,$a,"",0));# *a
158 &mul($w); # *a * w
159 &add("eax",$c); # L(t)+=c
160 # XXX
161 &adc("edx",0); # H(t)+=carry
162 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
163 &mov($c,"edx"); # c= H(t);
164 &dec($num) if ($i != 7-1);
165 &jz(&label("mw_end")) if ($i != 7-1);
166 }
167 &set_label("mw_end",0);
168 &mov("eax",$c);
169
170 &function_end($name);
171 }
172
173sub bn_sqr_words
174 {
175 local($name)=@_;
176
177 &function_begin($name,"");
178
179 &comment("");
180 $r="esi";
181 $a="edi";
182 $num="ebx";
183
184 &mov($r,&wparam(0)); #
185 &mov($a,&wparam(1)); #
186 &mov($num,&wparam(2)); #
187
188 &and($num,0xfffffff8); # num / 8
189 &jz(&label("sw_finish"));
190
191 &set_label("sw_loop",0);
192 for ($i=0; $i<32; $i+=4)
193 {
194 &comment("Round $i");
195 &mov("eax",&DWP($i,$a,"",0)); # *a
196 # XXX
197 &mul("eax"); # *a * *a
198 &mov(&DWP($i*2,$r,"",0),"eax"); #
199 &mov(&DWP($i*2+4,$r,"",0),"edx");#
200 }
201
202 &comment("");
203 &add($a,32);
204 &add($r,64);
205 &sub($num,8);
206 &jnz(&label("sw_loop"));
207
208 &set_label("sw_finish",0);
209 &mov($num,&wparam(2)); # get num
210 &and($num,7);
211 &jz(&label("sw_end"));
212
213 for ($i=0; $i<7; $i++)
214 {
215 &comment("Tail Round $i");
216 &mov("eax",&DWP($i*4,$a,"",0)); # *a
217 # XXX
218 &mul("eax"); # *a * *a
219 &mov(&DWP($i*8,$r,"",0),"eax"); #
220 &dec($num) if ($i != 7-1);
221 &mov(&DWP($i*8+4,$r,"",0),"edx");
222 &jz(&label("sw_end")) if ($i != 7-1);
223 }
224 &set_label("sw_end",0);
225
226 &function_end($name);
227 }
228
229sub bn_div_words
230 {
231 local($name)=@_;
232
233 &function_begin($name,"");
234 &mov("edx",&wparam(0)); #
235 &mov("eax",&wparam(1)); #
236 &mov("ebx",&wparam(2)); #
237 &div("ebx");
238 &function_end($name);
239 }
240
241sub bn_add_words
242 {
243 local($name)=@_;
244
245 &function_begin($name,"");
246
247 &comment("");
248 $a="esi";
249 $b="edi";
250 $c="eax";
251 $r="ebx";
252 $tmp1="ecx";
253 $tmp2="edx";
254 $num="ebp";
255
256 &mov($r,&wparam(0)); # get r
257 &mov($a,&wparam(1)); # get a
258 &mov($b,&wparam(2)); # get b
259 &mov($num,&wparam(3)); # get num
260 &xor($c,$c); # clear carry
261 &and($num,0xfffffff8); # num / 8
262
263 &jz(&label("aw_finish"));
264
265 &set_label("aw_loop",0);
266 for ($i=0; $i<8; $i++)
267 {
268 &comment("Round $i");
269
270 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
271 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
272 &add($tmp1,$c);
273 &mov($c,0);
274 &adc($c,$c);
275 &add($tmp1,$tmp2);
276 &adc($c,0);
277 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
278 }
279
280 &comment("");
281 &add($a,32);
282 &add($b,32);
283 &add($r,32);
284 &sub($num,8);
285 &jnz(&label("aw_loop"));
286
287 &set_label("aw_finish",0);
288 &mov($num,&wparam(3)); # get num
289 &and($num,7);
290 &jz(&label("aw_end"));
291
292 for ($i=0; $i<7; $i++)
293 {
294 &comment("Tail Round $i");
295 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
296 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
297 &add($tmp1,$c);
298 &mov($c,0);
299 &adc($c,$c);
300 &add($tmp1,$tmp2);
301 &adc($c,0);
302 &dec($num) if ($i != 6);
303 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
304 &jz(&label("aw_end")) if ($i != 6);
305 }
306 &set_label("aw_end",0);
307
308# &mov("eax",$c); # $c is "eax"
309
310 &function_end($name);
311 }
312
313sub bn_sub_words
314 {
315 local($name)=@_;
316
317 &function_begin($name,"");
318
319 &comment("");
320 $a="esi";
321 $b="edi";
322 $c="eax";
323 $r="ebx";
324 $tmp1="ecx";
325 $tmp2="edx";
326 $num="ebp";
327
328 &mov($r,&wparam(0)); # get r
329 &mov($a,&wparam(1)); # get a
330 &mov($b,&wparam(2)); # get b
331 &mov($num,&wparam(3)); # get num
332 &xor($c,$c); # clear carry
333 &and($num,0xfffffff8); # num / 8
334
335 &jz(&label("aw_finish"));
336
337 &set_label("aw_loop",0);
338 for ($i=0; $i<8; $i++)
339 {
340 &comment("Round $i");
341
342 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
343 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
344 &sub($tmp1,$c);
345 &mov($c,0);
346 &adc($c,$c);
347 &sub($tmp1,$tmp2);
348 &adc($c,0);
349 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
350 }
351
352 &comment("");
353 &add($a,32);
354 &add($b,32);
355 &add($r,32);
356 &sub($num,8);
357 &jnz(&label("aw_loop"));
358
359 &set_label("aw_finish",0);
360 &mov($num,&wparam(3)); # get num
361 &and($num,7);
362 &jz(&label("aw_end"));
363
364 for ($i=0; $i<7; $i++)
365 {
366 &comment("Tail Round $i");
367 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
368 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
369 &sub($tmp1,$c);
370 &mov($c,0);
371 &adc($c,$c);
372 &sub($tmp1,$tmp2);
373 &adc($c,0);
374 &dec($num) if ($i != 6);
375 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
376 &jz(&label("aw_end")) if ($i != 6);
377 }
378 &set_label("aw_end",0);
379
380# &mov("eax",$c); # $c is "eax"
381
382 &function_end($name);
383 }
384
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
deleted file mode 100644
index 5d962cb957..0000000000
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ /dev/null
@@ -1,286 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_comba("bn_mul_comba8",8);
9&bn_mul_comba("bn_mul_comba4",4);
10&bn_sqr_comba("bn_sqr_comba8",8);
11&bn_sqr_comba("bn_sqr_comba4",4);
12
13&asm_finish();
14
15sub mul_add_c
16 {
17 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
18
19 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
20 # words, and 1 if load return value
21
22 &comment("mul a[$ai]*b[$bi]");
23
24 # "eax" and "edx" will always be pre-loaded.
25 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
26 # &mov("edx",&DWP($bi*4,$b,"",0));
27
28 &mul("edx");
29 &add($c0,"eax");
30 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
31 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
32 ###
33 &adc($c1,"edx");
34 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
36 ###
37 &adc($c2,0);
38 # is pos > 1, it means it is the last loop
39 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
40 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
41 }
42
43sub sqr_add_c
44 {
45 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
46
47 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
48 # words, and 1 if load return value
49
50 &comment("sqr a[$ai]*a[$bi]");
51
52 # "eax" and "edx" will always be pre-loaded.
53 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
54 # &mov("edx",&DWP($bi*4,$b,"",0));
55
56 if ($ai == $bi)
57 { &mul("eax");}
58 else
59 { &mul("edx");}
60 &add($c0,"eax");
61 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
62 ###
63 &adc($c1,"edx");
64 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
65 ###
66 &adc($c2,0);
67 # is pos > 1, it means it is the last loop
68 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
69 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
70 }
71
72sub sqr_add_c2
73 {
74 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
75
76 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
77 # words, and 1 if load return value
78
79 &comment("sqr a[$ai]*a[$bi]");
80
81 # "eax" and "edx" will always be pre-loaded.
82 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
83 # &mov("edx",&DWP($bi*4,$a,"",0));
84
85 if ($ai == $bi)
86 { &mul("eax");}
87 else
88 { &mul("edx");}
89 &add("eax","eax");
90 ###
91 &adc("edx","edx");
92 ###
93 &adc($c2,0);
94 &add($c0,"eax");
95 &adc($c1,"edx");
96 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
98 &adc($c2,0);
99 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
100 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
101 ###
102 }
103
104sub bn_mul_comba
105 {
106 local($name,$num)=@_;
107 local($a,$b,$c0,$c1,$c2);
108 local($i,$as,$ae,$bs,$be,$ai,$bi);
109 local($tot,$end);
110
111 &function_begin_B($name,"");
112
113 $c0="ebx";
114 $c1="ecx";
115 $c2="ebp";
116 $a="esi";
117 $b="edi";
118
119 $as=0;
120 $ae=0;
121 $bs=0;
122 $be=0;
123 $tot=$num+$num-1;
124
125 &push("esi");
126 &mov($a,&wparam(1));
127 &push("edi");
128 &mov($b,&wparam(2));
129 &push("ebp");
130 &push("ebx");
131
132 &xor($c0,$c0);
133 &mov("eax",&DWP(0,$a,"",0)); # load the first word
134 &xor($c1,$c1);
135 &mov("edx",&DWP(0,$b,"",0)); # load the first second
136
137 for ($i=0; $i<$tot; $i++)
138 {
139 $ai=$as;
140 $bi=$bs;
141 $end=$be+1;
142
143 &comment("################## Calculate word $i");
144
145 for ($j=$bs; $j<$end; $j++)
146 {
147 &xor($c2,$c2) if ($j == $bs);
148 if (($j+1) == $end)
149 {
150 $v=1;
151 $v=2 if (($i+1) == $tot);
152 }
153 else
154 { $v=0; }
155 if (($j+1) != $end)
156 {
157 $na=($ai-1);
158 $nb=($bi+1);
159 }
160 else
161 {
162 $na=$as+($i < ($num-1));
163 $nb=$bs+($i >= ($num-1));
164 }
165#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
166 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
167 if ($v)
168 {
169 &comment("saved r[$i]");
170 # &mov("eax",&wparam(0));
171 # &mov(&DWP($i*4,"eax","",0),$c0);
172 ($c0,$c1,$c2)=($c1,$c2,$c0);
173 }
174 $ai--;
175 $bi++;
176 }
177 $as++ if ($i < ($num-1));
178 $ae++ if ($i >= ($num-1));
179
180 $bs++ if ($i >= ($num-1));
181 $be++ if ($i < ($num-1));
182 }
183 &comment("save r[$i]");
184 # &mov("eax",&wparam(0));
185 &mov(&DWP($i*4,"eax","",0),$c0);
186
187 &pop("ebx");
188 &pop("ebp");
189 &pop("edi");
190 &pop("esi");
191 &ret();
192 &function_end_B($name);
193 }
194
195sub bn_sqr_comba
196 {
197 local($name,$num)=@_;
198 local($r,$a,$c0,$c1,$c2)=@_;
199 local($i,$as,$ae,$bs,$be,$ai,$bi);
200 local($b,$tot,$end,$half);
201
202 &function_begin_B($name,"");
203
204 $c0="ebx";
205 $c1="ecx";
206 $c2="ebp";
207 $a="esi";
208 $r="edi";
209
210 &push("esi");
211 &push("edi");
212 &push("ebp");
213 &push("ebx");
214 &mov($r,&wparam(0));
215 &mov($a,&wparam(1));
216 &xor($c0,$c0);
217 &xor($c1,$c1);
218 &mov("eax",&DWP(0,$a,"",0)); # load the first word
219
220 $as=0;
221 $ae=0;
222 $bs=0;
223 $be=0;
224 $tot=$num+$num-1;
225
226 for ($i=0; $i<$tot; $i++)
227 {
228 $ai=$as;
229 $bi=$bs;
230 $end=$be+1;
231
232 &comment("############### Calculate word $i");
233 for ($j=$bs; $j<$end; $j++)
234 {
235 &xor($c2,$c2) if ($j == $bs);
236 if (($ai-1) < ($bi+1))
237 {
238 $v=1;
239 $v=2 if ($i+1) == $tot;
240 }
241 else
242 { $v=0; }
243 if (!$v)
244 {
245 $na=$ai-1;
246 $nb=$bi+1;
247 }
248 else
249 {
250 $na=$as+($i < ($num-1));
251 $nb=$bs+($i >= ($num-1));
252 }
253 if ($ai == $bi)
254 {
255 &sqr_add_c($r,$a,$ai,$bi,
256 $c0,$c1,$c2,$v,$i,$na,$nb);
257 }
258 else
259 {
260 &sqr_add_c2($r,$a,$ai,$bi,
261 $c0,$c1,$c2,$v,$i,$na,$nb);
262 }
263 if ($v)
264 {
265 &comment("saved r[$i]");
266 #&mov(&DWP($i*4,$r,"",0),$c0);
267 ($c0,$c1,$c2)=($c1,$c2,$c0);
268 last;
269 }
270 $ai--;
271 $bi++;
272 }
273 $as++ if ($i < ($num-1));
274 $ae++ if ($i >= ($num-1));
275
276 $bs++ if ($i >= ($num-1));
277 $be++ if ($i < ($num-1));
278 }
279 &mov(&DWP($i*4,$r,"",0),$c0);
280 &pop("ebx");
281 &pop("ebp");
282 &pop("edi");
283 &pop("esi");
284 &ret();
285 &function_end_B($name);
286 }
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s
deleted file mode 100644
index c2725996a4..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2.s
+++ /dev/null
@@ -1,416 +0,0 @@
1 .SPACE $PRIVATE$
2 .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
3 .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
4 .SPACE $TEXT$
5 .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
6 .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
7 .IMPORT $global$,DATA
8 .IMPORT $$dyncall,MILLICODE
9; gcc_compiled.:
10 .SPACE $TEXT$
11 .SUBSPA $CODE$
12
13 .align 4
14 .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
15bn_mul_add_words
16 .PROC
17 .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4
18 .ENTRY
19 stw %r2,-20(0,%r30)
20 stwm %r4,64(0,%r30)
21 copy %r24,%r31
22 stw %r3,-60(0,%r30)
23 ldi 0,%r20
24 ldo 12(%r26),%r2
25 stw %r23,-16(0,%r30)
26 copy %r25,%r3
27 ldo 12(%r3),%r1
28 fldws -16(0,%r30),%fr8L
29L$0010
30 copy %r20,%r25
31 ldi 0,%r24
32 fldws 0(0,%r3),%fr9L
33 ldw 0(0,%r26),%r19
34 xmpyu %fr8L,%fr9L,%fr9
35 fstds %fr9,-16(0,%r30)
36 copy %r19,%r23
37 ldw -16(0,%r30),%r28
38 ldw -12(0,%r30),%r29
39 ldi 0,%r22
40 add %r23,%r29,%r29
41 addc %r22,%r28,%r28
42 add %r25,%r29,%r29
43 addc %r24,%r28,%r28
44 copy %r28,%r21
45 ldi 0,%r20
46 copy %r21,%r20
47 addib,= -1,%r31,L$0011
48 stw %r29,0(0,%r26)
49 copy %r20,%r25
50 ldi 0,%r24
51 fldws -8(0,%r1),%fr9L
52 ldw -8(0,%r2),%r19
53 xmpyu %fr8L,%fr9L,%fr9
54 fstds %fr9,-16(0,%r30)
55 copy %r19,%r23
56 ldw -16(0,%r30),%r28
57 ldw -12(0,%r30),%r29
58 ldi 0,%r22
59 add %r23,%r29,%r29
60 addc %r22,%r28,%r28
61 add %r25,%r29,%r29
62 addc %r24,%r28,%r28
63 copy %r28,%r21
64 ldi 0,%r20
65 copy %r21,%r20
66 addib,= -1,%r31,L$0011
67 stw %r29,-8(0,%r2)
68 copy %r20,%r25
69 ldi 0,%r24
70 fldws -4(0,%r1),%fr9L
71 ldw -4(0,%r2),%r19
72 xmpyu %fr8L,%fr9L,%fr9
73 fstds %fr9,-16(0,%r30)
74 copy %r19,%r23
75 ldw -16(0,%r30),%r28
76 ldw -12(0,%r30),%r29
77 ldi 0,%r22
78 add %r23,%r29,%r29
79 addc %r22,%r28,%r28
80 add %r25,%r29,%r29
81 addc %r24,%r28,%r28
82 copy %r28,%r21
83 ldi 0,%r20
84 copy %r21,%r20
85 addib,= -1,%r31,L$0011
86 stw %r29,-4(0,%r2)
87 copy %r20,%r25
88 ldi 0,%r24
89 fldws 0(0,%r1),%fr9L
90 ldw 0(0,%r2),%r19
91 xmpyu %fr8L,%fr9L,%fr9
92 fstds %fr9,-16(0,%r30)
93 copy %r19,%r23
94 ldw -16(0,%r30),%r28
95 ldw -12(0,%r30),%r29
96 ldi 0,%r22
97 add %r23,%r29,%r29
98 addc %r22,%r28,%r28
99 add %r25,%r29,%r29
100 addc %r24,%r28,%r28
101 copy %r28,%r21
102 ldi 0,%r20
103 copy %r21,%r20
104 addib,= -1,%r31,L$0011
105 stw %r29,0(0,%r2)
106 ldo 16(%r1),%r1
107 ldo 16(%r3),%r3
108 ldo 16(%r2),%r2
109 bl L$0010,0
110 ldo 16(%r26),%r26
111L$0011
112 copy %r20,%r28
113 ldw -84(0,%r30),%r2
114 ldw -60(0,%r30),%r3
115 bv 0(%r2)
116 ldwm -64(0,%r30),%r4
117 .EXIT
118 .PROCEND
119 .align 4
120 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
121bn_mul_words
122 .PROC
123 .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3
124 .ENTRY
125 stw %r2,-20(0,%r30)
126 copy %r25,%r2
127 stwm %r4,64(0,%r30)
128 copy %r24,%r19
129 ldi 0,%r28
130 stw %r23,-16(0,%r30)
131 ldo 12(%r26),%r31
132 ldo 12(%r2),%r29
133 fldws -16(0,%r30),%fr8L
134L$0026
135 fldws 0(0,%r2),%fr9L
136 xmpyu %fr8L,%fr9L,%fr9
137 fstds %fr9,-16(0,%r30)
138 copy %r28,%r21
139 ldi 0,%r20
140 ldw -16(0,%r30),%r24
141 ldw -12(0,%r30),%r25
142 add %r21,%r25,%r25
143 addc %r20,%r24,%r24
144 copy %r24,%r23
145 ldi 0,%r22
146 copy %r23,%r28
147 addib,= -1,%r19,L$0027
148 stw %r25,0(0,%r26)
149 fldws -8(0,%r29),%fr9L
150 xmpyu %fr8L,%fr9L,%fr9
151 fstds %fr9,-16(0,%r30)
152 copy %r28,%r21
153 ldi 0,%r20
154 ldw -16(0,%r30),%r24
155 ldw -12(0,%r30),%r25
156 add %r21,%r25,%r25
157 addc %r20,%r24,%r24
158 copy %r24,%r23
159 ldi 0,%r22
160 copy %r23,%r28
161 addib,= -1,%r19,L$0027
162 stw %r25,-8(0,%r31)
163 fldws -4(0,%r29),%fr9L
164 xmpyu %fr8L,%fr9L,%fr9
165 fstds %fr9,-16(0,%r30)
166 copy %r28,%r21
167 ldi 0,%r20
168 ldw -16(0,%r30),%r24
169 ldw -12(0,%r30),%r25
170 add %r21,%r25,%r25
171 addc %r20,%r24,%r24
172 copy %r24,%r23
173 ldi 0,%r22
174 copy %r23,%r28
175 addib,= -1,%r19,L$0027
176 stw %r25,-4(0,%r31)
177 fldws 0(0,%r29),%fr9L
178 xmpyu %fr8L,%fr9L,%fr9
179 fstds %fr9,-16(0,%r30)
180 copy %r28,%r21
181 ldi 0,%r20
182 ldw -16(0,%r30),%r24
183 ldw -12(0,%r30),%r25
184 add %r21,%r25,%r25
185 addc %r20,%r24,%r24
186 copy %r24,%r23
187 ldi 0,%r22
188 copy %r23,%r28
189 addib,= -1,%r19,L$0027
190 stw %r25,0(0,%r31)
191 ldo 16(%r29),%r29
192 ldo 16(%r2),%r2
193 ldo 16(%r31),%r31
194 bl L$0026,0
195 ldo 16(%r26),%r26
196L$0027
197 ldw -84(0,%r30),%r2
198 bv 0(%r2)
199 ldwm -64(0,%r30),%r4
200 .EXIT
201 .PROCEND
202 .align 4
203 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
204bn_sqr_words
205 .PROC
206 .CALLINFO FRAME=0,NO_CALLS
207 .ENTRY
208 ldo 28(%r26),%r19
209 ldo 12(%r25),%r28
210L$0042
211 fldws 0(0,%r25),%fr8L
212 fldws 0(0,%r25),%fr8R
213 xmpyu %fr8L,%fr8R,%fr8
214 fstds %fr8,-16(0,%r30)
215 ldw -16(0,%r30),%r22
216 ldw -12(0,%r30),%r23
217 stw %r23,0(0,%r26)
218 copy %r22,%r21
219 ldi 0,%r20
220 addib,= -1,%r24,L$0049
221 stw %r21,-24(0,%r19)
222 fldws -8(0,%r28),%fr8L
223 fldws -8(0,%r28),%fr8R
224 xmpyu %fr8L,%fr8R,%fr8
225 fstds %fr8,-16(0,%r30)
226 ldw -16(0,%r30),%r22
227 ldw -12(0,%r30),%r23
228 stw %r23,-20(0,%r19)
229 copy %r22,%r21
230 ldi 0,%r20
231 addib,= -1,%r24,L$0049
232 stw %r21,-16(0,%r19)
233 fldws -4(0,%r28),%fr8L
234 fldws -4(0,%r28),%fr8R
235 xmpyu %fr8L,%fr8R,%fr8
236 fstds %fr8,-16(0,%r30)
237 ldw -16(0,%r30),%r22
238 ldw -12(0,%r30),%r23
239 stw %r23,-12(0,%r19)
240 copy %r22,%r21
241 ldi 0,%r20
242 addib,= -1,%r24,L$0049
243 stw %r21,-8(0,%r19)
244 fldws 0(0,%r28),%fr8L
245 fldws 0(0,%r28),%fr8R
246 xmpyu %fr8L,%fr8R,%fr8
247 fstds %fr8,-16(0,%r30)
248 ldw -16(0,%r30),%r22
249 ldw -12(0,%r30),%r23
250 stw %r23,-4(0,%r19)
251 copy %r22,%r21
252 ldi 0,%r20
253 addib,= -1,%r24,L$0049
254 stw %r21,0(0,%r19)
255 ldo 16(%r28),%r28
256 ldo 16(%r25),%r25
257 ldo 32(%r19),%r19
258 bl L$0042,0
259 ldo 32(%r26),%r26
260L$0049
261 bv,n 0(%r2)
262 .EXIT
263 .PROCEND
264 .IMPORT BN_num_bits_word,CODE
265 .IMPORT fprintf,CODE
266 .IMPORT __iob,DATA
267 .SPACE $TEXT$
268 .SUBSPA $LIT$
269
270 .align 4
271L$C0000
272 .STRING "Division would overflow (%d)\x0a\x00"
273 .IMPORT abort,CODE
274 .SPACE $TEXT$
275 .SUBSPA $CODE$
276
277 .align 4
278 .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
279bn_div64
280 .PROC
281 .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
282 .ENTRY
283 stw %r2,-20(0,%r30)
284 stwm %r8,128(0,%r30)
285 stw %r7,-124(0,%r30)
286 stw %r4,-112(0,%r30)
287 stw %r3,-108(0,%r30)
288 copy %r26,%r3
289 copy %r25,%r4
290 stw %r6,-120(0,%r30)
291 ldi 0,%r7
292 stw %r5,-116(0,%r30)
293 movb,<> %r24,%r5,L$0051
294 ldi 2,%r6
295 bl L$0068,0
296 ldi -1,%r28
297L$0051
298 .CALL ARGW0=GR
299 bl BN_num_bits_word,%r2
300 copy %r5,%r26
301 copy %r28,%r24
302 ldi 32,%r19
303 comb,= %r19,%r24,L$0052
304 subi 31,%r24,%r19
305 mtsar %r19
306 zvdepi 1,32,%r19
307 comb,>>= %r19,%r3,L$0052
308 addil LR'__iob-$global$+32,%r27
309 ldo RR'__iob-$global$+32(%r1),%r26
310 ldil LR'L$C0000,%r25
311 .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR
312 bl fprintf,%r2
313 ldo RR'L$C0000(%r25),%r25
314 .CALL
315 bl abort,%r2
316 nop
317L$0052
318 comb,>> %r5,%r3,L$0053
319 subi 32,%r24,%r24
320 sub %r3,%r5,%r3
321L$0053
322 comib,= 0,%r24,L$0054
323 subi 31,%r24,%r19
324 mtsar %r19
325 zvdep %r5,32,%r5
326 zvdep %r3,32,%r21
327 subi 32,%r24,%r20
328 mtsar %r20
329 vshd 0,%r4,%r20
330 or %r21,%r20,%r3
331 mtsar %r19
332 zvdep %r4,32,%r4
333L$0054
334 extru %r5,15,16,%r23
335 extru %r5,31,16,%r28
336L$0055
337 extru %r3,15,16,%r19
338 comb,<> %r23,%r19,L$0058
339 copy %r3,%r26
340 bl L$0059,0
341 zdepi -1,31,16,%r29
342L$0058
343 .IMPORT $$divU,MILLICODE
344 bl $$divU,%r31
345 copy %r23,%r25
346L$0059
347 stw %r29,-16(0,%r30)
348 fldws -16(0,%r30),%fr10L
349 stw %r28,-16(0,%r30)
350 fldws -16(0,%r30),%fr10R
351 stw %r23,-16(0,%r30)
352 xmpyu %fr10L,%fr10R,%fr8
353 fldws -16(0,%r30),%fr10R
354 fstws %fr8R,-16(0,%r30)
355 xmpyu %fr10L,%fr10R,%fr9
356 ldw -16(0,%r30),%r8
357 fstws %fr9R,-16(0,%r30)
358 copy %r8,%r22
359 ldw -16(0,%r30),%r8
360 extru %r4,15,16,%r24
361 copy %r8,%r21
362L$0060
363 sub %r3,%r21,%r20
364 copy %r20,%r19
365 depi 0,31,16,%r19
366 comib,<> 0,%r19,L$0061
367 zdep %r20,15,16,%r19
368 addl %r19,%r24,%r19
369 comb,>>= %r19,%r22,L$0061
370 sub %r22,%r28,%r22
371 sub %r21,%r23,%r21
372 bl L$0060,0
373 ldo -1(%r29),%r29
374L$0061
375 stw %r29,-16(0,%r30)
376 fldws -16(0,%r30),%fr10L
377 stw %r28,-16(0,%r30)
378 fldws -16(0,%r30),%fr10R
379 xmpyu %fr10L,%fr10R,%fr8
380 fstws %fr8R,-16(0,%r30)
381 ldw -16(0,%r30),%r8
382 stw %r23,-16(0,%r30)
383 fldws -16(0,%r30),%fr10R
384 copy %r8,%r19
385 xmpyu %fr10L,%fr10R,%fr8
386 fstws %fr8R,-16(0,%r30)
387 extru %r19,15,16,%r20
388 ldw -16(0,%r30),%r8
389 zdep %r19,15,16,%r19
390 addl %r8,%r20,%r20
391 comclr,<<= %r19,%r4,0
392 addi 1,%r20,%r20
393 comb,<<= %r20,%r3,L$0066
394 sub %r4,%r19,%r4
395 addl %r3,%r5,%r3
396 ldo -1(%r29),%r29
397L$0066
398 addib,= -1,%r6,L$0056
399 sub %r3,%r20,%r3
400 zdep %r29,15,16,%r7
401 shd %r3,%r4,16,%r3
402 bl L$0055,0
403 zdep %r4,15,16,%r4
404L$0056
405 or %r7,%r29,%r28
406L$0068
407 ldw -148(0,%r30),%r2
408 ldw -124(0,%r30),%r7
409 ldw -120(0,%r30),%r6
410 ldw -116(0,%r30),%r5
411 ldw -112(0,%r30),%r4
412 ldw -108(0,%r30),%r3
413 bv 0(%r2)
414 ldwm -128(0,%r30),%r8
415 .EXIT
416 .PROCEND
diff --git a/src/lib/libcrypto/bn/asm/sparcv8.S b/src/lib/libcrypto/bn/asm/sparcv8.S
deleted file mode 100644
index 88c5dc480a..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8.S
+++ /dev/null
@@ -1,1458 +0,0 @@
1.ident "sparcv8.s, Version 1.4"
2.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * See bn_asm.sparc.v8plus.S for more details.
22 */
23
24/*
25 * Revision history.
26 *
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
30 * 1.4 - some retunes;
31 *
32 * (*) see bn_asm.sparc.v8plus.S for details
33 */
34
35.section ".text",#alloc,#execinstr
36.file "bn_asm.sparc.v8.S"
37
38.align 32
39
40.global bn_mul_add_words
41/*
42 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43 * BN_ULONG *rp,*ap;
44 * int num;
45 * BN_ULONG w;
46 */
47bn_mul_add_words:
48 cmp %o2,0
49 bg,a .L_bn_mul_add_words_proceed
50 ld [%o1],%g2
51 retl
52 clr %o0
53
54.L_bn_mul_add_words_proceed:
55 andcc %o2,-4,%g0
56 bz .L_bn_mul_add_words_tail
57 clr %o5
58
59.L_bn_mul_add_words_loop:
60 ld [%o0],%o4
61 ld [%o1+4],%g3
62 umul %o3,%g2,%g2
63 rd %y,%g1
64 addcc %o4,%o5,%o4
65 addx %g1,0,%g1
66 addcc %o4,%g2,%o4
67 st %o4,[%o0]
68 addx %g1,0,%o5
69
70 ld [%o0+4],%o4
71 ld [%o1+8],%g2
72 umul %o3,%g3,%g3
73 dec 4,%o2
74 rd %y,%g1
75 addcc %o4,%o5,%o4
76 addx %g1,0,%g1
77 addcc %o4,%g3,%o4
78 st %o4,[%o0+4]
79 addx %g1,0,%o5
80
81 ld [%o0+8],%o4
82 ld [%o1+12],%g3
83 umul %o3,%g2,%g2
84 inc 16,%o1
85 rd %y,%g1
86 addcc %o4,%o5,%o4
87 addx %g1,0,%g1
88 addcc %o4,%g2,%o4
89 st %o4,[%o0+8]
90 addx %g1,0,%o5
91
92 ld [%o0+12],%o4
93 umul %o3,%g3,%g3
94 inc 16,%o0
95 rd %y,%g1
96 addcc %o4,%o5,%o4
97 addx %g1,0,%g1
98 addcc %o4,%g3,%o4
99 st %o4,[%o0-4]
100 addx %g1,0,%o5
101 andcc %o2,-4,%g0
102 bnz,a .L_bn_mul_add_words_loop
103 ld [%o1],%g2
104
105 tst %o2
106 bnz,a .L_bn_mul_add_words_tail
107 ld [%o1],%g2
108.L_bn_mul_add_words_return:
109 retl
110 mov %o5,%o0
111 nop
112
113.L_bn_mul_add_words_tail:
114 ld [%o0],%o4
115 umul %o3,%g2,%g2
116 addcc %o4,%o5,%o4
117 rd %y,%g1
118 addx %g1,0,%g1
119 addcc %o4,%g2,%o4
120 addx %g1,0,%o5
121 deccc %o2
122 bz .L_bn_mul_add_words_return
123 st %o4,[%o0]
124
125 ld [%o1+4],%g2
126 ld [%o0+4],%o4
127 umul %o3,%g2,%g2
128 rd %y,%g1
129 addcc %o4,%o5,%o4
130 addx %g1,0,%g1
131 addcc %o4,%g2,%o4
132 addx %g1,0,%o5
133 deccc %o2
134 bz .L_bn_mul_add_words_return
135 st %o4,[%o0+4]
136
137 ld [%o1+8],%g2
138 ld [%o0+8],%o4
139 umul %o3,%g2,%g2
140 rd %y,%g1
141 addcc %o4,%o5,%o4
142 addx %g1,0,%g1
143 addcc %o4,%g2,%o4
144 st %o4,[%o0+8]
145 retl
146 addx %g1,0,%o0
147
148.type bn_mul_add_words,#function
149.size bn_mul_add_words,(.-bn_mul_add_words)
150
151.align 32
152
153.global bn_mul_words
154/*
155 * BN_ULONG bn_mul_words(rp,ap,num,w)
156 * BN_ULONG *rp,*ap;
157 * int num;
158 * BN_ULONG w;
159 */
160bn_mul_words:
161 cmp %o2,0
162 bg,a .L_bn_mul_words_proceeed
163 ld [%o1],%g2
164 retl
165 clr %o0
166
167.L_bn_mul_words_proceeed:
168 andcc %o2,-4,%g0
169 bz .L_bn_mul_words_tail
170 clr %o5
171
172.L_bn_mul_words_loop:
173 ld [%o1+4],%g3
174 umul %o3,%g2,%g2
175 addcc %g2,%o5,%g2
176 rd %y,%g1
177 addx %g1,0,%o5
178 st %g2,[%o0]
179
180 ld [%o1+8],%g2
181 umul %o3,%g3,%g3
182 addcc %g3,%o5,%g3
183 rd %y,%g1
184 dec 4,%o2
185 addx %g1,0,%o5
186 st %g3,[%o0+4]
187
188 ld [%o1+12],%g3
189 umul %o3,%g2,%g2
190 addcc %g2,%o5,%g2
191 rd %y,%g1
192 inc 16,%o1
193 st %g2,[%o0+8]
194 addx %g1,0,%o5
195
196 umul %o3,%g3,%g3
197 addcc %g3,%o5,%g3
198 rd %y,%g1
199 inc 16,%o0
200 addx %g1,0,%o5
201 st %g3,[%o0-4]
202 andcc %o2,-4,%g0
203 nop
204 bnz,a .L_bn_mul_words_loop
205 ld [%o1],%g2
206
207 tst %o2
208 bnz,a .L_bn_mul_words_tail
209 ld [%o1],%g2
210.L_bn_mul_words_return:
211 retl
212 mov %o5,%o0
213 nop
214
215.L_bn_mul_words_tail:
216 umul %o3,%g2,%g2
217 addcc %g2,%o5,%g2
218 rd %y,%g1
219 addx %g1,0,%o5
220 deccc %o2
221 bz .L_bn_mul_words_return
222 st %g2,[%o0]
223 nop
224
225 ld [%o1+4],%g2
226 umul %o3,%g2,%g2
227 addcc %g2,%o5,%g2
228 rd %y,%g1
229 addx %g1,0,%o5
230 deccc %o2
231 bz .L_bn_mul_words_return
232 st %g2,[%o0+4]
233
234 ld [%o1+8],%g2
235 umul %o3,%g2,%g2
236 addcc %g2,%o5,%g2
237 rd %y,%g1
238 st %g2,[%o0+8]
239 retl
240 addx %g1,0,%o0
241
242.type bn_mul_words,#function
243.size bn_mul_words,(.-bn_mul_words)
244
245.align 32
246.global bn_sqr_words
247/*
248 * void bn_sqr_words(r,a,n)
249 * BN_ULONG *r,*a;
250 * int n;
251 */
252bn_sqr_words:
253 cmp %o2,0
254 bg,a .L_bn_sqr_words_proceeed
255 ld [%o1],%g2
256 retl
257 clr %o0
258
259.L_bn_sqr_words_proceeed:
260 andcc %o2,-4,%g0
261 bz .L_bn_sqr_words_tail
262 clr %o5
263
264.L_bn_sqr_words_loop:
265 ld [%o1+4],%g3
266 umul %g2,%g2,%o4
267 st %o4,[%o0]
268 rd %y,%o5
269 st %o5,[%o0+4]
270
271 ld [%o1+8],%g2
272 umul %g3,%g3,%o4
273 dec 4,%o2
274 st %o4,[%o0+8]
275 rd %y,%o5
276 st %o5,[%o0+12]
277 nop
278
279 ld [%o1+12],%g3
280 umul %g2,%g2,%o4
281 st %o4,[%o0+16]
282 rd %y,%o5
283 inc 16,%o1
284 st %o5,[%o0+20]
285
286 umul %g3,%g3,%o4
287 inc 32,%o0
288 st %o4,[%o0-8]
289 rd %y,%o5
290 st %o5,[%o0-4]
291 andcc %o2,-4,%g2
292 bnz,a .L_bn_sqr_words_loop
293 ld [%o1],%g2
294
295 tst %o2
296 nop
297 bnz,a .L_bn_sqr_words_tail
298 ld [%o1],%g2
299.L_bn_sqr_words_return:
300 retl
301 clr %o0
302
303.L_bn_sqr_words_tail:
304 umul %g2,%g2,%o4
305 st %o4,[%o0]
306 deccc %o2
307 rd %y,%o5
308 bz .L_bn_sqr_words_return
309 st %o5,[%o0+4]
310
311 ld [%o1+4],%g2
312 umul %g2,%g2,%o4
313 st %o4,[%o0+8]
314 deccc %o2
315 rd %y,%o5
316 nop
317 bz .L_bn_sqr_words_return
318 st %o5,[%o0+12]
319
320 ld [%o1+8],%g2
321 umul %g2,%g2,%o4
322 st %o4,[%o0+16]
323 rd %y,%o5
324 st %o5,[%o0+20]
325 retl
326 clr %o0
327
328.type bn_sqr_words,#function
329.size bn_sqr_words,(.-bn_sqr_words)
330
331.align 32
332
333.global bn_div_words
334/*
335 * BN_ULONG bn_div_words(h,l,d)
336 * BN_ULONG h,l,d;
337 */
338bn_div_words:
339 wr %o0,%y
340 udiv %o1,%o2,%o0
341 retl
342 nop
343
344.type bn_div_words,#function
345.size bn_div_words,(.-bn_div_words)
346
347.align 32
348
349.global bn_add_words
350/*
351 * BN_ULONG bn_add_words(rp,ap,bp,n)
352 * BN_ULONG *rp,*ap,*bp;
353 * int n;
354 */
355bn_add_words:
356 cmp %o3,0
357 bg,a .L_bn_add_words_proceed
358 ld [%o1],%o4
359 retl
360 clr %o0
361
362.L_bn_add_words_proceed:
363 andcc %o3,-4,%g0
364 bz .L_bn_add_words_tail
365 clr %g1
366 ba .L_bn_add_words_warn_loop
367 addcc %g0,0,%g0 ! clear carry flag
368
369.L_bn_add_words_loop:
370 ld [%o1],%o4
371.L_bn_add_words_warn_loop:
372 ld [%o2],%o5
373 ld [%o1+4],%g3
374 ld [%o2+4],%g4
375 dec 4,%o3
376 addxcc %o5,%o4,%o5
377 st %o5,[%o0]
378
379 ld [%o1+8],%o4
380 ld [%o2+8],%o5
381 inc 16,%o1
382 addxcc %g3,%g4,%g3
383 st %g3,[%o0+4]
384
385 ld [%o1-4],%g3
386 ld [%o2+12],%g4
387 inc 16,%o2
388 addxcc %o5,%o4,%o5
389 st %o5,[%o0+8]
390
391 inc 16,%o0
392 addxcc %g3,%g4,%g3
393 st %g3,[%o0-4]
394 addx %g0,0,%g1
395 andcc %o3,-4,%g0
396 bnz,a .L_bn_add_words_loop
397 addcc %g1,-1,%g0
398
399 tst %o3
400 bnz,a .L_bn_add_words_tail
401 ld [%o1],%o4
402.L_bn_add_words_return:
403 retl
404 mov %g1,%o0
405
406.L_bn_add_words_tail:
407 addcc %g1,-1,%g0
408 ld [%o2],%o5
409 addxcc %o5,%o4,%o5
410 addx %g0,0,%g1
411 deccc %o3
412 bz .L_bn_add_words_return
413 st %o5,[%o0]
414
415 ld [%o1+4],%o4
416 addcc %g1,-1,%g0
417 ld [%o2+4],%o5
418 addxcc %o5,%o4,%o5
419 addx %g0,0,%g1
420 deccc %o3
421 bz .L_bn_add_words_return
422 st %o5,[%o0+4]
423
424 ld [%o1+8],%o4
425 addcc %g1,-1,%g0
426 ld [%o2+8],%o5
427 addxcc %o5,%o4,%o5
428 st %o5,[%o0+8]
429 retl
430 addx %g0,0,%o0
431
432.type bn_add_words,#function
433.size bn_add_words,(.-bn_add_words)
434
435.align 32
436
437.global bn_sub_words
438/*
439 * BN_ULONG bn_sub_words(rp,ap,bp,n)
440 * BN_ULONG *rp,*ap,*bp;
441 * int n;
442 */
443bn_sub_words:
444 cmp %o3,0
445 bg,a .L_bn_sub_words_proceed
446 ld [%o1],%o4
447 retl
448 clr %o0
449
450.L_bn_sub_words_proceed:
451 andcc %o3,-4,%g0
452 bz .L_bn_sub_words_tail
453 clr %g1
454 ba .L_bn_sub_words_warm_loop
455 addcc %g0,0,%g0 ! clear carry flag
456
457.L_bn_sub_words_loop:
458 ld [%o1],%o4
459.L_bn_sub_words_warm_loop:
460 ld [%o2],%o5
461 ld [%o1+4],%g3
462 ld [%o2+4],%g4
463 dec 4,%o3
464 subxcc %o4,%o5,%o5
465 st %o5,[%o0]
466
467 ld [%o1+8],%o4
468 ld [%o2+8],%o5
469 inc 16,%o1
470 subxcc %g3,%g4,%g4
471 st %g4,[%o0+4]
472
473 ld [%o1-4],%g3
474 ld [%o2+12],%g4
475 inc 16,%o2
476 subxcc %o4,%o5,%o5
477 st %o5,[%o0+8]
478
479 inc 16,%o0
480 subxcc %g3,%g4,%g4
481 st %g4,[%o0-4]
482 addx %g0,0,%g1
483 andcc %o3,-4,%g0
484 bnz,a .L_bn_sub_words_loop
485 addcc %g1,-1,%g0
486
487 tst %o3
488 nop
489 bnz,a .L_bn_sub_words_tail
490 ld [%o1],%o4
491.L_bn_sub_words_return:
492 retl
493 mov %g1,%o0
494
495.L_bn_sub_words_tail:
496 addcc %g1,-1,%g0
497 ld [%o2],%o5
498 subxcc %o4,%o5,%o5
499 addx %g0,0,%g1
500 deccc %o3
501 bz .L_bn_sub_words_return
502 st %o5,[%o0]
503 nop
504
505 ld [%o1+4],%o4
506 addcc %g1,-1,%g0
507 ld [%o2+4],%o5
508 subxcc %o4,%o5,%o5
509 addx %g0,0,%g1
510 deccc %o3
511 bz .L_bn_sub_words_return
512 st %o5,[%o0+4]
513
514 ld [%o1+8],%o4
515 addcc %g1,-1,%g0
516 ld [%o2+8],%o5
517 subxcc %o4,%o5,%o5
518 st %o5,[%o0+8]
519 retl
520 addx %g0,0,%o0
521
522.type bn_sub_words,#function
523.size bn_sub_words,(.-bn_sub_words)
524
525#define FRAME_SIZE -96
526
527/*
528 * Here is register usage map for *all* routines below.
529 */
530#define t_1 %o0
531#define t_2 %o1
532#define c_1 %o2
533#define c_2 %o3
534#define c_3 %o4
535
536#define ap(I) [%i1+4*I]
537#define bp(I) [%i2+4*I]
538#define rp(I) [%i0+4*I]
539
540#define a_0 %l0
541#define a_1 %l1
542#define a_2 %l2
543#define a_3 %l3
544#define a_4 %l4
545#define a_5 %l5
546#define a_6 %l6
547#define a_7 %l7
548
549#define b_0 %i3
550#define b_1 %i4
551#define b_2 %i5
552#define b_3 %o5
553#define b_4 %g1
554#define b_5 %g2
555#define b_6 %g3
556#define b_7 %g4
557
558.align 32
559.global bn_mul_comba8
560/*
561 * void bn_mul_comba8(r,a,b)
562 * BN_ULONG *r,*a,*b;
563 */
564bn_mul_comba8:
565 save %sp,FRAME_SIZE,%sp
566 ld ap(0),a_0
567 ld bp(0),b_0
568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
569 ld bp(1),b_1
570 rd %y,c_2
571 st c_1,rp(0) !r[0]=c1;
572
573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
574 ld ap(1),a_1
575 addcc c_2,t_1,c_2
576 rd %y,t_2
577 addxcc %g0,t_2,c_3 !=
578 addx %g0,%g0,c_1
579 ld ap(2),a_2
580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
581 addcc c_2,t_1,c_2 !=
582 rd %y,t_2
583 addxcc c_3,t_2,c_3
584 st c_2,rp(1) !r[1]=c2;
585 addx c_1,%g0,c_1 !=
586
587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
588 addcc c_3,t_1,c_3
589 rd %y,t_2
590 addxcc c_1,t_2,c_1 !=
591 addx %g0,%g0,c_2
592 ld bp(2),b_2
593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
594 addcc c_3,t_1,c_3 !=
595 rd %y,t_2
596 addxcc c_1,t_2,c_1
597 ld bp(3),b_3
598 addx c_2,%g0,c_2 !=
599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
600 addcc c_3,t_1,c_3
601 rd %y,t_2
602 addxcc c_1,t_2,c_1 !=
603 addx c_2,%g0,c_2
604 st c_3,rp(2) !r[2]=c3;
605
606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
607 addcc c_1,t_1,c_1 !=
608 rd %y,t_2
609 addxcc c_2,t_2,c_2
610 addx %g0,%g0,c_3
611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
612 addcc c_1,t_1,c_1
613 rd %y,t_2
614 addxcc c_2,t_2,c_2
615 addx c_3,%g0,c_3 !=
616 ld ap(3),a_3
617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
618 addcc c_1,t_1,c_1
619 rd %y,t_2 !=
620 addxcc c_2,t_2,c_2
621 addx c_3,%g0,c_3
622 ld ap(4),a_4
623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
624 addcc c_1,t_1,c_1
625 rd %y,t_2
626 addxcc c_2,t_2,c_2
627 addx c_3,%g0,c_3 !=
628 st c_1,rp(3) !r[3]=c1;
629
630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
631 addcc c_2,t_1,c_2
632 rd %y,t_2 !=
633 addxcc c_3,t_2,c_3
634 addx %g0,%g0,c_1
635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
636 addcc c_2,t_1,c_2 !=
637 rd %y,t_2
638 addxcc c_3,t_2,c_3
639 addx c_1,%g0,c_1
640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
641 addcc c_2,t_1,c_2
642 rd %y,t_2
643 addxcc c_3,t_2,c_3
644 addx c_1,%g0,c_1 !=
645 ld bp(4),b_4
646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
647 addcc c_2,t_1,c_2
648 rd %y,t_2 !=
649 addxcc c_3,t_2,c_3
650 addx c_1,%g0,c_1
651 ld bp(5),b_5
652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
653 addcc c_2,t_1,c_2
654 rd %y,t_2
655 addxcc c_3,t_2,c_3
656 addx c_1,%g0,c_1 !=
657 st c_2,rp(4) !r[4]=c2;
658
659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
660 addcc c_3,t_1,c_3
661 rd %y,t_2 !=
662 addxcc c_1,t_2,c_1
663 addx %g0,%g0,c_2
664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
665 addcc c_3,t_1,c_3 !=
666 rd %y,t_2
667 addxcc c_1,t_2,c_1
668 addx c_2,%g0,c_2
669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
670 addcc c_3,t_1,c_3
671 rd %y,t_2
672 addxcc c_1,t_2,c_1
673 addx c_2,%g0,c_2 !=
674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
675 addcc c_3,t_1,c_3
676 rd %y,t_2
677 addxcc c_1,t_2,c_1 !=
678 addx c_2,%g0,c_2
679 ld ap(5),a_5
680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
681 addcc c_3,t_1,c_3 !=
682 rd %y,t_2
683 addxcc c_1,t_2,c_1
684 ld ap(6),a_6
685 addx c_2,%g0,c_2 !=
686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
687 addcc c_3,t_1,c_3
688 rd %y,t_2
689 addxcc c_1,t_2,c_1 !=
690 addx c_2,%g0,c_2
691 st c_3,rp(5) !r[5]=c3;
692
693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
694 addcc c_1,t_1,c_1 !=
695 rd %y,t_2
696 addxcc c_2,t_2,c_2
697 addx %g0,%g0,c_3
698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
699 addcc c_1,t_1,c_1
700 rd %y,t_2
701 addxcc c_2,t_2,c_2
702 addx c_3,%g0,c_3 !=
703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
704 addcc c_1,t_1,c_1
705 rd %y,t_2
706 addxcc c_2,t_2,c_2 !=
707 addx c_3,%g0,c_3
708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
709 addcc c_1,t_1,c_1
710 rd %y,t_2 !=
711 addxcc c_2,t_2,c_2
712 addx c_3,%g0,c_3
713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
714 addcc c_1,t_1,c_1 !=
715 rd %y,t_2
716 addxcc c_2,t_2,c_2
717 ld bp(6),b_6
718 addx c_3,%g0,c_3 !=
719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
720 addcc c_1,t_1,c_1
721 rd %y,t_2
722 addxcc c_2,t_2,c_2 !=
723 addx c_3,%g0,c_3
724 ld bp(7),b_7
725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
726 addcc c_1,t_1,c_1 !=
727 rd %y,t_2
728 addxcc c_2,t_2,c_2
729 st c_1,rp(6) !r[6]=c1;
730 addx c_3,%g0,c_3 !=
731
732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
733 addcc c_2,t_1,c_2
734 rd %y,t_2
735 addxcc c_3,t_2,c_3 !=
736 addx %g0,%g0,c_1
737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
738 addcc c_2,t_1,c_2
739 rd %y,t_2 !=
740 addxcc c_3,t_2,c_3
741 addx c_1,%g0,c_1
742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
743 addcc c_2,t_1,c_2 !=
744 rd %y,t_2
745 addxcc c_3,t_2,c_3
746 addx c_1,%g0,c_1
747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
748 addcc c_2,t_1,c_2
749 rd %y,t_2
750 addxcc c_3,t_2,c_3
751 addx c_1,%g0,c_1 !=
752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
753 addcc c_2,t_1,c_2
754 rd %y,t_2
755 addxcc c_3,t_2,c_3 !=
756 addx c_1,%g0,c_1
757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
758 addcc c_2,t_1,c_2
759 rd %y,t_2 !=
760 addxcc c_3,t_2,c_3
761 addx c_1,%g0,c_1
762 ld ap(7),a_7
763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
764 addcc c_2,t_1,c_2
765 rd %y,t_2
766 addxcc c_3,t_2,c_3
767 addx c_1,%g0,c_1 !=
768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
769 addcc c_2,t_1,c_2
770 rd %y,t_2
771 addxcc c_3,t_2,c_3 !=
772 addx c_1,%g0,c_1
773 st c_2,rp(7) !r[7]=c2;
774
775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
776 addcc c_3,t_1,c_3 !=
777 rd %y,t_2
778 addxcc c_1,t_2,c_1
779 addx %g0,%g0,c_2
780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
781 addcc c_3,t_1,c_3
782 rd %y,t_2
783 addxcc c_1,t_2,c_1
784 addx c_2,%g0,c_2 !=
785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
786 addcc c_3,t_1,c_3
787 rd %y,t_2
788 addxcc c_1,t_2,c_1 !=
789 addx c_2,%g0,c_2
790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
791 addcc c_3,t_1,c_3
792 rd %y,t_2 !=
793 addxcc c_1,t_2,c_1
794 addx c_2,%g0,c_2
795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
796 addcc c_3,t_1,c_3 !=
797 rd %y,t_2
798 addxcc c_1,t_2,c_1
799 addx c_2,%g0,c_2
800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
801 addcc c_3,t_1,c_3
802 rd %y,t_2
803 addxcc c_1,t_2,c_1
804 addx c_2,%g0,c_2 !=
805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
806 addcc c_3,t_1,c_3
807 rd %y,t_2
808 addxcc c_1,t_2,c_1 !
809 addx c_2,%g0,c_2
810 st c_3,rp(8) !r[8]=c3;
811
812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
813 addcc c_1,t_1,c_1 !=
814 rd %y,t_2
815 addxcc c_2,t_2,c_2
816 addx %g0,%g0,c_3
817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
818 addcc c_1,t_1,c_1
819 rd %y,t_2
820 addxcc c_2,t_2,c_2
821 addx c_3,%g0,c_3 !=
822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
823 addcc c_1,t_1,c_1
824 rd %y,t_2
825 addxcc c_2,t_2,c_2 !=
826 addx c_3,%g0,c_3
827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
828 addcc c_1,t_1,c_1
829 rd %y,t_2 !=
830 addxcc c_2,t_2,c_2
831 addx c_3,%g0,c_3
832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
833 addcc c_1,t_1,c_1 !=
834 rd %y,t_2
835 addxcc c_2,t_2,c_2
836 addx c_3,%g0,c_3
837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
838 addcc c_1,t_1,c_1
839 rd %y,t_2
840 addxcc c_2,t_2,c_2
841 addx c_3,%g0,c_3 !=
842 st c_1,rp(9) !r[9]=c1;
843
844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
845 addcc c_2,t_1,c_2
846 rd %y,t_2 !=
847 addxcc c_3,t_2,c_3
848 addx %g0,%g0,c_1
849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
850 addcc c_2,t_1,c_2 !=
851 rd %y,t_2
852 addxcc c_3,t_2,c_3
853 addx c_1,%g0,c_1
854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
855 addcc c_2,t_1,c_2
856 rd %y,t_2
857 addxcc c_3,t_2,c_3
858 addx c_1,%g0,c_1 !=
859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
860 addcc c_2,t_1,c_2
861 rd %y,t_2
862 addxcc c_3,t_2,c_3 !=
863 addx c_1,%g0,c_1
864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
865 addcc c_2,t_1,c_2
866 rd %y,t_2 !=
867 addxcc c_3,t_2,c_3
868 addx c_1,%g0,c_1
869 st c_2,rp(10) !r[10]=c2;
870
871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
872 addcc c_3,t_1,c_3
873 rd %y,t_2
874 addxcc c_1,t_2,c_1
875 addx %g0,%g0,c_2 !=
876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
877 addcc c_3,t_1,c_3
878 rd %y,t_2
879 addxcc c_1,t_2,c_1 !=
880 addx c_2,%g0,c_2
881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
882 addcc c_3,t_1,c_3
883 rd %y,t_2 !=
884 addxcc c_1,t_2,c_1
885 addx c_2,%g0,c_2
886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
887 addcc c_3,t_1,c_3 !=
888 rd %y,t_2
889 addxcc c_1,t_2,c_1
890 st c_3,rp(11) !r[11]=c3;
891 addx c_2,%g0,c_2 !=
892
893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
894 addcc c_1,t_1,c_1
895 rd %y,t_2
896 addxcc c_2,t_2,c_2 !=
897 addx %g0,%g0,c_3
898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
899 addcc c_1,t_1,c_1
900 rd %y,t_2 !=
901 addxcc c_2,t_2,c_2
902 addx c_3,%g0,c_3
903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
904 addcc c_1,t_1,c_1 !=
905 rd %y,t_2
906 addxcc c_2,t_2,c_2
907 st c_1,rp(12) !r[12]=c1;
908 addx c_3,%g0,c_3 !=
909
910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
911 addcc c_2,t_1,c_2
912 rd %y,t_2
913 addxcc c_3,t_2,c_3 !=
914 addx %g0,%g0,c_1
915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
916 addcc c_2,t_1,c_2
917 rd %y,t_2 !=
918 addxcc c_3,t_2,c_3
919 addx c_1,%g0,c_1
920 st c_2,rp(13) !r[13]=c2;
921
922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
923 addcc c_3,t_1,c_3
924 rd %y,t_2
925 addxcc c_1,t_2,c_1
926 nop !=
927 st c_3,rp(14) !r[14]=c3;
928 st c_1,rp(15) !r[15]=c1;
929
930 ret
931 restore %g0,%g0,%o0
932
933.type bn_mul_comba8,#function
934.size bn_mul_comba8,(.-bn_mul_comba8)
935
936.align 32
937
938.global bn_mul_comba4
939/*
940 * void bn_mul_comba4(r,a,b)
941 * BN_ULONG *r,*a,*b;
942 */
943bn_mul_comba4:
944 save %sp,FRAME_SIZE,%sp
945 ld ap(0),a_0
946 ld bp(0),b_0
947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
948 ld bp(1),b_1
949 rd %y,c_2
950 st c_1,rp(0) !r[0]=c1;
951
952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
953 ld ap(1),a_1
954 addcc c_2,t_1,c_2
955 rd %y,t_2 !=
956 addxcc %g0,t_2,c_3
957 addx %g0,%g0,c_1
958 ld ap(2),a_2
959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
960 addcc c_2,t_1,c_2
961 rd %y,t_2
962 addxcc c_3,t_2,c_3
963 addx c_1,%g0,c_1 !=
964 st c_2,rp(1) !r[1]=c2;
965
966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
967 addcc c_3,t_1,c_3
968 rd %y,t_2 !=
969 addxcc c_1,t_2,c_1
970 addx %g0,%g0,c_2
971 ld bp(2),b_2
972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
973 addcc c_3,t_1,c_3
974 rd %y,t_2
975 addxcc c_1,t_2,c_1
976 addx c_2,%g0,c_2 !=
977 ld bp(3),b_3
978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
979 addcc c_3,t_1,c_3
980 rd %y,t_2 !=
981 addxcc c_1,t_2,c_1
982 addx c_2,%g0,c_2
983 st c_3,rp(2) !r[2]=c3;
984
985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
986 addcc c_1,t_1,c_1
987 rd %y,t_2
988 addxcc c_2,t_2,c_2
989 addx %g0,%g0,c_3 !=
990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
991 addcc c_1,t_1,c_1
992 rd %y,t_2
993 addxcc c_2,t_2,c_2 !=
994 addx c_3,%g0,c_3
995 ld ap(3),a_3
996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
997 addcc c_1,t_1,c_1 !=
998 rd %y,t_2
999 addxcc c_2,t_2,c_2
1000 addx c_3,%g0,c_3
1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1002 addcc c_1,t_1,c_1
1003 rd %y,t_2
1004 addxcc c_2,t_2,c_2
1005 addx c_3,%g0,c_3 !=
1006 st c_1,rp(3) !r[3]=c1;
1007
1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1009 addcc c_2,t_1,c_2
1010 rd %y,t_2 !=
1011 addxcc c_3,t_2,c_3
1012 addx %g0,%g0,c_1
1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1014 addcc c_2,t_1,c_2 !=
1015 rd %y,t_2
1016 addxcc c_3,t_2,c_3
1017 addx c_1,%g0,c_1
1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1019 addcc c_2,t_1,c_2
1020 rd %y,t_2
1021 addxcc c_3,t_2,c_3
1022 addx c_1,%g0,c_1 !=
1023 st c_2,rp(4) !r[4]=c2;
1024
1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1026 addcc c_3,t_1,c_3
1027 rd %y,t_2 !=
1028 addxcc c_1,t_2,c_1
1029 addx %g0,%g0,c_2
1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1031 addcc c_3,t_1,c_3 !=
1032 rd %y,t_2
1033 addxcc c_1,t_2,c_1
1034 st c_3,rp(5) !r[5]=c3;
1035 addx c_2,%g0,c_2 !=
1036
1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1038 addcc c_1,t_1,c_1
1039 rd %y,t_2
1040 addxcc c_2,t_2,c_2 !=
1041 st c_1,rp(6) !r[6]=c1;
1042 st c_2,rp(7) !r[7]=c2;
1043
1044 ret
1045 restore %g0,%g0,%o0
1046
1047.type bn_mul_comba4,#function
1048.size bn_mul_comba4,(.-bn_mul_comba4)
1049
1050.align 32
1051
1052.global bn_sqr_comba8
1053bn_sqr_comba8:
1054 save %sp,FRAME_SIZE,%sp
1055 ld ap(0),a_0
1056 ld ap(1),a_1
1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1058 rd %y,c_2
1059 st c_1,rp(0) !r[0]=c1;
1060
1061 ld ap(2),a_2
1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1063 addcc c_2,t_1,c_2
1064 rd %y,t_2
1065 addxcc %g0,t_2,c_3
1066 addx %g0,%g0,c_1 !=
1067 addcc c_2,t_1,c_2
1068 addxcc c_3,t_2,c_3
1069 st c_2,rp(1) !r[1]=c2;
1070 addx c_1,%g0,c_1 !=
1071
1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1073 addcc c_3,t_1,c_3
1074 rd %y,t_2
1075 addxcc c_1,t_2,c_1 !=
1076 addx %g0,%g0,c_2
1077 addcc c_3,t_1,c_3
1078 addxcc c_1,t_2,c_1
1079 addx c_2,%g0,c_2 !=
1080 ld ap(3),a_3
1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1082 addcc c_3,t_1,c_3
1083 rd %y,t_2 !=
1084 addxcc c_1,t_2,c_1
1085 addx c_2,%g0,c_2
1086 st c_3,rp(2) !r[2]=c3;
1087
1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1089 addcc c_1,t_1,c_1
1090 rd %y,t_2
1091 addxcc c_2,t_2,c_2
1092 addx %g0,%g0,c_3 !=
1093 addcc c_1,t_1,c_1
1094 addxcc c_2,t_2,c_2
1095 ld ap(4),a_4
1096 addx c_3,%g0,c_3 !=
1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1098 addcc c_1,t_1,c_1
1099 rd %y,t_2
1100 addxcc c_2,t_2,c_2 !=
1101 addx c_3,%g0,c_3
1102 addcc c_1,t_1,c_1
1103 addxcc c_2,t_2,c_2
1104 addx c_3,%g0,c_3 !=
1105 st c_1,rp(3) !r[3]=c1;
1106
1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1108 addcc c_2,t_1,c_2
1109 rd %y,t_2 !=
1110 addxcc c_3,t_2,c_3
1111 addx %g0,%g0,c_1
1112 addcc c_2,t_1,c_2
1113 addxcc c_3,t_2,c_3 !=
1114 addx c_1,%g0,c_1
1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1116 addcc c_2,t_1,c_2
1117 rd %y,t_2 !=
1118 addxcc c_3,t_2,c_3
1119 addx c_1,%g0,c_1
1120 addcc c_2,t_1,c_2
1121 addxcc c_3,t_2,c_3 !=
1122 addx c_1,%g0,c_1
1123 ld ap(5),a_5
1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1125 addcc c_2,t_1,c_2 !=
1126 rd %y,t_2
1127 addxcc c_3,t_2,c_3
1128 st c_2,rp(4) !r[4]=c2;
1129 addx c_1,%g0,c_1 !=
1130
1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1132 addcc c_3,t_1,c_3
1133 rd %y,t_2
1134 addxcc c_1,t_2,c_1 !=
1135 addx %g0,%g0,c_2
1136 addcc c_3,t_1,c_3
1137 addxcc c_1,t_2,c_1
1138 addx c_2,%g0,c_2 !=
1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1140 addcc c_3,t_1,c_3
1141 rd %y,t_2
1142 addxcc c_1,t_2,c_1 !=
1143 addx c_2,%g0,c_2
1144 addcc c_3,t_1,c_3
1145 addxcc c_1,t_2,c_1
1146 addx c_2,%g0,c_2 !=
1147 ld ap(6),a_6
1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1149 addcc c_3,t_1,c_3
1150 rd %y,t_2 !=
1151 addxcc c_1,t_2,c_1
1152 addx c_2,%g0,c_2
1153 addcc c_3,t_1,c_3
1154 addxcc c_1,t_2,c_1 !=
1155 addx c_2,%g0,c_2
1156 st c_3,rp(5) !r[5]=c3;
1157
1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1159 addcc c_1,t_1,c_1 !=
1160 rd %y,t_2
1161 addxcc c_2,t_2,c_2
1162 addx %g0,%g0,c_3
1163 addcc c_1,t_1,c_1 !=
1164 addxcc c_2,t_2,c_2
1165 addx c_3,%g0,c_3
1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1167 addcc c_1,t_1,c_1 !=
1168 rd %y,t_2
1169 addxcc c_2,t_2,c_2
1170 addx c_3,%g0,c_3
1171 addcc c_1,t_1,c_1 !=
1172 addxcc c_2,t_2,c_2
1173 addx c_3,%g0,c_3
1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1175 addcc c_1,t_1,c_1 !=
1176 rd %y,t_2
1177 addxcc c_2,t_2,c_2
1178 addx c_3,%g0,c_3
1179 addcc c_1,t_1,c_1 !=
1180 addxcc c_2,t_2,c_2
1181 addx c_3,%g0,c_3
1182 ld ap(7),a_7
1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1184 addcc c_1,t_1,c_1
1185 rd %y,t_2
1186 addxcc c_2,t_2,c_2
1187 addx c_3,%g0,c_3 !=
1188 st c_1,rp(6) !r[6]=c1;
1189
1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1191 addcc c_2,t_1,c_2
1192 rd %y,t_2 !=
1193 addxcc c_3,t_2,c_3
1194 addx %g0,%g0,c_1
1195 addcc c_2,t_1,c_2
1196 addxcc c_3,t_2,c_3 !=
1197 addx c_1,%g0,c_1
1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1199 addcc c_2,t_1,c_2
1200 rd %y,t_2 !=
1201 addxcc c_3,t_2,c_3
1202 addx c_1,%g0,c_1
1203 addcc c_2,t_1,c_2
1204 addxcc c_3,t_2,c_3 !=
1205 addx c_1,%g0,c_1
1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1207 addcc c_2,t_1,c_2
1208 rd %y,t_2 !=
1209 addxcc c_3,t_2,c_3
1210 addx c_1,%g0,c_1
1211 addcc c_2,t_1,c_2
1212 addxcc c_3,t_2,c_3 !=
1213 addx c_1,%g0,c_1
1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1215 addcc c_2,t_1,c_2
1216 rd %y,t_2 !=
1217 addxcc c_3,t_2,c_3
1218 addx c_1,%g0,c_1
1219 addcc c_2,t_1,c_2
1220 addxcc c_3,t_2,c_3 !=
1221 addx c_1,%g0,c_1
1222 st c_2,rp(7) !r[7]=c2;
1223
1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1225 addcc c_3,t_1,c_3 !=
1226 rd %y,t_2
1227 addxcc c_1,t_2,c_1
1228 addx %g0,%g0,c_2
1229 addcc c_3,t_1,c_3 !=
1230 addxcc c_1,t_2,c_1
1231 addx c_2,%g0,c_2
1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1233 addcc c_3,t_1,c_3 !=
1234 rd %y,t_2
1235 addxcc c_1,t_2,c_1
1236 addx c_2,%g0,c_2
1237 addcc c_3,t_1,c_3 !=
1238 addxcc c_1,t_2,c_1
1239 addx c_2,%g0,c_2
1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1241 addcc c_3,t_1,c_3 !=
1242 rd %y,t_2
1243 addxcc c_1,t_2,c_1
1244 addx c_2,%g0,c_2
1245 addcc c_3,t_1,c_3 !=
1246 addxcc c_1,t_2,c_1
1247 addx c_2,%g0,c_2
1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1249 addcc c_3,t_1,c_3 !=
1250 rd %y,t_2
1251 addxcc c_1,t_2,c_1
1252 st c_3,rp(8) !r[8]=c3;
1253 addx c_2,%g0,c_2 !=
1254
1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1256 addcc c_1,t_1,c_1
1257 rd %y,t_2
1258 addxcc c_2,t_2,c_2 !=
1259 addx %g0,%g0,c_3
1260 addcc c_1,t_1,c_1
1261 addxcc c_2,t_2,c_2
1262 addx c_3,%g0,c_3 !=
1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1264 addcc c_1,t_1,c_1
1265 rd %y,t_2
1266 addxcc c_2,t_2,c_2 !=
1267 addx c_3,%g0,c_3
1268 addcc c_1,t_1,c_1
1269 addxcc c_2,t_2,c_2
1270 addx c_3,%g0,c_3 !=
1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1272 addcc c_1,t_1,c_1
1273 rd %y,t_2
1274 addxcc c_2,t_2,c_2 !=
1275 addx c_3,%g0,c_3
1276 addcc c_1,t_1,c_1
1277 addxcc c_2,t_2,c_2
1278 addx c_3,%g0,c_3 !=
1279 st c_1,rp(9) !r[9]=c1;
1280
1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1282 addcc c_2,t_1,c_2
1283 rd %y,t_2 !=
1284 addxcc c_3,t_2,c_3
1285 addx %g0,%g0,c_1
1286 addcc c_2,t_1,c_2
1287 addxcc c_3,t_2,c_3 !=
1288 addx c_1,%g0,c_1
1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1290 addcc c_2,t_1,c_2
1291 rd %y,t_2 !=
1292 addxcc c_3,t_2,c_3
1293 addx c_1,%g0,c_1
1294 addcc c_2,t_1,c_2
1295 addxcc c_3,t_2,c_3 !=
1296 addx c_1,%g0,c_1
1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1298 addcc c_2,t_1,c_2
1299 rd %y,t_2 !=
1300 addxcc c_3,t_2,c_3
1301 addx c_1,%g0,c_1
1302 st c_2,rp(10) !r[10]=c2;
1303
1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1305 addcc c_3,t_1,c_3
1306 rd %y,t_2
1307 addxcc c_1,t_2,c_1
1308 addx %g0,%g0,c_2 !=
1309 addcc c_3,t_1,c_3
1310 addxcc c_1,t_2,c_1
1311 addx c_2,%g0,c_2
1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1313 addcc c_3,t_1,c_3
1314 rd %y,t_2
1315 addxcc c_1,t_2,c_1
1316 addx c_2,%g0,c_2 !=
1317 addcc c_3,t_1,c_3
1318 addxcc c_1,t_2,c_1
1319 st c_3,rp(11) !r[11]=c3;
1320 addx c_2,%g0,c_2 !=
1321
1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1323 addcc c_1,t_1,c_1
1324 rd %y,t_2
1325 addxcc c_2,t_2,c_2 !=
1326 addx %g0,%g0,c_3
1327 addcc c_1,t_1,c_1
1328 addxcc c_2,t_2,c_2
1329 addx c_3,%g0,c_3 !=
1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1331 addcc c_1,t_1,c_1
1332 rd %y,t_2
1333 addxcc c_2,t_2,c_2 !=
1334 addx c_3,%g0,c_3
1335 st c_1,rp(12) !r[12]=c1;
1336
1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1338 addcc c_2,t_1,c_2 !=
1339 rd %y,t_2
1340 addxcc c_3,t_2,c_3
1341 addx %g0,%g0,c_1
1342 addcc c_2,t_1,c_2 !=
1343 addxcc c_3,t_2,c_3
1344 st c_2,rp(13) !r[13]=c2;
1345 addx c_1,%g0,c_1 !=
1346
1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1348 addcc c_3,t_1,c_3
1349 rd %y,t_2
1350 addxcc c_1,t_2,c_1 !=
1351 st c_3,rp(14) !r[14]=c3;
1352 st c_1,rp(15) !r[15]=c1;
1353
1354 ret
1355 restore %g0,%g0,%o0
1356
1357.type bn_sqr_comba8,#function
1358.size bn_sqr_comba8,(.-bn_sqr_comba8)
1359
1360.align 32
1361
1362.global bn_sqr_comba4
1363/*
1364 * void bn_sqr_comba4(r,a)
1365 * BN_ULONG *r,*a;
1366 */
1367bn_sqr_comba4:
1368 save %sp,FRAME_SIZE,%sp
1369 ld ap(0),a_0
1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1371 ld ap(1),a_1 !=
1372 rd %y,c_2
1373 st c_1,rp(0) !r[0]=c1;
1374
1375 ld ap(2),a_2
1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1377 addcc c_2,t_1,c_2
1378 rd %y,t_2
1379 addxcc %g0,t_2,c_3
1380 addx %g0,%g0,c_1 !=
1381 addcc c_2,t_1,c_2
1382 addxcc c_3,t_2,c_3
1383 addx c_1,%g0,c_1 !=
1384 st c_2,rp(1) !r[1]=c2;
1385
1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1387 addcc c_3,t_1,c_3
1388 rd %y,t_2 !=
1389 addxcc c_1,t_2,c_1
1390 addx %g0,%g0,c_2
1391 addcc c_3,t_1,c_3
1392 addxcc c_1,t_2,c_1 !=
1393 addx c_2,%g0,c_2
1394 ld ap(3),a_3
1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1396 addcc c_3,t_1,c_3 !=
1397 rd %y,t_2
1398 addxcc c_1,t_2,c_1
1399 st c_3,rp(2) !r[2]=c3;
1400 addx c_2,%g0,c_2 !=
1401
1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1403 addcc c_1,t_1,c_1
1404 rd %y,t_2
1405 addxcc c_2,t_2,c_2 !=
1406 addx %g0,%g0,c_3
1407 addcc c_1,t_1,c_1
1408 addxcc c_2,t_2,c_2
1409 addx c_3,%g0,c_3 !=
1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1411 addcc c_1,t_1,c_1
1412 rd %y,t_2
1413 addxcc c_2,t_2,c_2 !=
1414 addx c_3,%g0,c_3
1415 addcc c_1,t_1,c_1
1416 addxcc c_2,t_2,c_2
1417 addx c_3,%g0,c_3 !=
1418 st c_1,rp(3) !r[3]=c1;
1419
1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1421 addcc c_2,t_1,c_2
1422 rd %y,t_2 !=
1423 addxcc c_3,t_2,c_3
1424 addx %g0,%g0,c_1
1425 addcc c_2,t_1,c_2
1426 addxcc c_3,t_2,c_3 !=
1427 addx c_1,%g0,c_1
1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1429 addcc c_2,t_1,c_2
1430 rd %y,t_2 !=
1431 addxcc c_3,t_2,c_3
1432 addx c_1,%g0,c_1
1433 st c_2,rp(4) !r[4]=c2;
1434
1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1436 addcc c_3,t_1,c_3
1437 rd %y,t_2
1438 addxcc c_1,t_2,c_1
1439 addx %g0,%g0,c_2 !=
1440 addcc c_3,t_1,c_3
1441 addxcc c_1,t_2,c_1
1442 st c_3,rp(5) !r[5]=c3;
1443 addx c_2,%g0,c_2 !=
1444
1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1446 addcc c_1,t_1,c_1
1447 rd %y,t_2
1448 addxcc c_2,t_2,c_2 !=
1449 st c_1,rp(6) !r[6]=c1;
1450 st c_2,rp(7) !r[7]=c2;
1451
1452 ret
1453 restore %g0,%g0,%o0
1454
1455.type bn_sqr_comba4,#function
1456.size bn_sqr_comba4,(.-bn_sqr_comba4)
1457
1458.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
deleted file mode 100644
index 0074dfdb75..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8plus.S
+++ /dev/null
@@ -1,1535 +0,0 @@
1.ident "sparcv8plus.s, Version 1.4"
2.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * Questions-n-answers.
22 *
23 * Q. How to compile?
24 * A. With SC4.x/SC5.x:
25 *
26 * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
27 *
28 * and with gcc:
29 *
30 * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o
31 *
32 * or if above fails (it does if you have gas installed):
33 *
34 * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o
35 *
36 * Quick-n-dirty way to fuse the module into the library.
37 * Provided that the library is already configured and built
38 * (in 0.9.2 case with no-asm option):
39 *
40 * # cd crypto/bn
41 * # cp /some/place/bn_asm.sparc.v8plus.S .
42 * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
43 * # make
44 * # cd ../..
45 * # make; make test
46 *
47 * Quick-n-dirty way to get rid of it:
48 *
49 * # cd crypto/bn
50 * # touch bn_asm.c
51 * # make
52 * # cd ../..
53 * # make; make test
54 *
55 * Q. V8plus achitecture? What kind of beast is that?
56 * A. Well, it's rather a programming model than an architecture...
57 * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
58 * special conditions, namely when kernel doesn't preserve upper
59 * 32 bits of otherwise 64-bit registers during a context switch.
60 *
61 * Q. Why just UltraSPARC? What about SuperSPARC?
62 * A. Original release did target UltraSPARC only. Now SuperSPARC
63 * version is provided along. Both version share bn_*comba[48]
64 * implementations (see comment later in code for explanation).
65 * But what's so special about this UltraSPARC implementation?
66 * Why didn't I let compiler do the job? Trouble is that most of
67 * available compilers (well, SC5.0 is the only exception) don't
68 * attempt to take advantage of UltraSPARC's 64-bitness under
69 * 32-bit kernels even though it's perfectly possible (see next
70 * question).
71 *
72 * Q. 64-bit registers under 32-bit kernels? Didn't you just say it
73 * doesn't work?
74 * A. You can't adress *all* registers as 64-bit wide:-( The catch is
75 * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
76 * preserved if you're in a leaf function, i.e. such never calling
77 * any other functions. All functions in this module are leaf and
78 * 10 registers is a handful. And as a matter of fact none-"comba"
79 * routines don't require even that much and I could even afford to
80 * not allocate own stack frame for 'em:-)
81 *
82 * Q. What about 64-bit kernels?
83 * A. What about 'em? Just kidding:-) Pure 64-bit version is currently
84 * under evaluation and development...
85 *
86 * Q. What about shared libraries?
87 * A. What about 'em? Kidding again:-) Code does *not* contain any
88 * code position dependencies and it's safe to include it into
89 * shared library as is.
90 *
91 * Q. How much faster does it go?
92 * A. Do you have a good benchmark? In either case below is what I
93 * experience with crypto/bn/expspeed.c test program:
94 *
95 * v8plus module on U10/300MHz against bn_asm.c compiled with:
96 *
97 * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12%
98 * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35%
99 * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45%
100 *
101 * v8 module on SS10/60MHz against bn_asm.c compiled with:
102 *
103 * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10%
104 * cc-4.2 -xarch=v8 -xO5 -xdepend +10%
105 * egcs-1.1.2 -mv8 -O3 +35-45%
106 *
107 * As you can see it's damn hard to beat the new Sun C compiler
108 * and it's in first place GNU C users who will appreciate this
109 * assembler implementation:-)
110 */
111
112/*
113 * Revision history.
114 *
115 * 1.0 - initial release;
116 * 1.1 - new loop unrolling model(*);
117 * - some more fine tuning;
118 * 1.2 - made gas friendly;
119 * - updates to documentation concerning v9;
120 * - new performance comparison matrix;
121 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
122 * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient)
123 * resulting in slight overall performance kick;
124 * - some retunes;
125 * - support for GNU as added;
126 *
127 * (*) Originally unrolled loop looked like this:
128 * for (;;) {
129 * op(p+0); if (--n==0) break;
130 * op(p+1); if (--n==0) break;
131 * op(p+2); if (--n==0) break;
132 * op(p+3); if (--n==0) break;
133 * p+=4;
134 * }
135 * I unroll according to following:
136 * while (n&~3) {
137 * op(p+0); op(p+1); op(p+2); op(p+3);
138 * p+=4; n=-4;
139 * }
140 * if (n) {
141 * op(p+0); if (--n==0) return;
142 * op(p+2); if (--n==0) return;
143 * op(p+3); return;
144 * }
145 */
146
147/*
148 * GNU assembler can't stand stuw:-(
149 */
150#define stuw st
151
152.section ".text",#alloc,#execinstr
153.file "bn_asm.sparc.v8plus.S"
154
155.align 32
156
157.global bn_mul_add_words
158/*
159 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
160 * BN_ULONG *rp,*ap;
161 * int num;
162 * BN_ULONG w;
163 */
164bn_mul_add_words:
165 brgz,a %o2,.L_bn_mul_add_words_proceed
166 lduw [%o1],%g2
167 retl
168 clr %o0
169
170.L_bn_mul_add_words_proceed:
171 srl %o3,%g0,%o3 ! clruw %o3
172 andcc %o2,-4,%g0
173 bz,pn %icc,.L_bn_mul_add_words_tail
174 clr %o5
175
176.L_bn_mul_add_words_loop: ! wow! 32 aligned!
177 lduw [%o0],%g1
178 lduw [%o1+4],%g3
179 mulx %o3,%g2,%g2
180 add %g1,%o5,%o4
181 nop
182 add %o4,%g2,%o4
183 stuw %o4,[%o0]
184 srlx %o4,32,%o5
185
186 lduw [%o0+4],%g1
187 lduw [%o1+8],%g2
188 mulx %o3,%g3,%g3
189 add %g1,%o5,%o4
190 dec 4,%o2
191 add %o4,%g3,%o4
192 stuw %o4,[%o0+4]
193 srlx %o4,32,%o5
194
195 lduw [%o0+8],%g1
196 lduw [%o1+12],%g3
197 mulx %o3,%g2,%g2
198 add %g1,%o5,%o4
199 inc 16,%o1
200 add %o4,%g2,%o4
201 stuw %o4,[%o0+8]
202 srlx %o4,32,%o5
203
204 lduw [%o0+12],%g1
205 mulx %o3,%g3,%g3
206 add %g1,%o5,%o4
207 inc 16,%o0
208 add %o4,%g3,%o4
209 andcc %o2,-4,%g0
210 stuw %o4,[%o0-4]
211 srlx %o4,32,%o5
212 bnz,a,pt %icc,.L_bn_mul_add_words_loop
213 lduw [%o1],%g2
214
215 brnz,a,pn %o2,.L_bn_mul_add_words_tail
216 lduw [%o1],%g2
217.L_bn_mul_add_words_return:
218 retl
219 mov %o5,%o0
220
221.L_bn_mul_add_words_tail:
222 lduw [%o0],%g1
223 mulx %o3,%g2,%g2
224 add %g1,%o5,%o4
225 dec %o2
226 add %o4,%g2,%o4
227 srlx %o4,32,%o5
228 brz,pt %o2,.L_bn_mul_add_words_return
229 stuw %o4,[%o0]
230
231 lduw [%o1+4],%g2
232 lduw [%o0+4],%g1
233 mulx %o3,%g2,%g2
234 add %g1,%o5,%o4
235 dec %o2
236 add %o4,%g2,%o4
237 srlx %o4,32,%o5
238 brz,pt %o2,.L_bn_mul_add_words_return
239 stuw %o4,[%o0+4]
240
241 lduw [%o1+8],%g2
242 lduw [%o0+8],%g1
243 mulx %o3,%g2,%g2
244 add %g1,%o5,%o4
245 add %o4,%g2,%o4
246 stuw %o4,[%o0+8]
247 retl
248 srlx %o4,32,%o0
249
250.type bn_mul_add_words,#function
251.size bn_mul_add_words,(.-bn_mul_add_words)
252
253.align 32
254
255.global bn_mul_words
256/*
257 * BN_ULONG bn_mul_words(rp,ap,num,w)
258 * BN_ULONG *rp,*ap;
259 * int num;
260 * BN_ULONG w;
261 */
262bn_mul_words:
263 brgz,a %o2,.L_bn_mul_words_proceeed
264 lduw [%o1],%g2
265 retl
266 clr %o0
267
268.L_bn_mul_words_proceeed:
269 srl %o3,%g0,%o3 ! clruw %o3
270 andcc %o2,-4,%g0
271 bz,pn %icc,.L_bn_mul_words_tail
272 clr %o5
273
274.L_bn_mul_words_loop: ! wow! 32 aligned!
275 lduw [%o1+4],%g3
276 mulx %o3,%g2,%g2
277 add %g2,%o5,%o4
278 nop
279 stuw %o4,[%o0]
280 srlx %o4,32,%o5
281
282 lduw [%o1+8],%g2
283 mulx %o3,%g3,%g3
284 add %g3,%o5,%o4
285 dec 4,%o2
286 stuw %o4,[%o0+4]
287 srlx %o4,32,%o5
288
289 lduw [%o1+12],%g3
290 mulx %o3,%g2,%g2
291 add %g2,%o5,%o4
292 inc 16,%o1
293 stuw %o4,[%o0+8]
294 srlx %o4,32,%o5
295
296 mulx %o3,%g3,%g3
297 add %g3,%o5,%o4
298 inc 16,%o0
299 stuw %o4,[%o0-4]
300 srlx %o4,32,%o5
301 andcc %o2,-4,%g0
302 bnz,a,pt %icc,.L_bn_mul_words_loop
303 lduw [%o1],%g2
304 nop
305 nop
306
307 brnz,a,pn %o2,.L_bn_mul_words_tail
308 lduw [%o1],%g2
309.L_bn_mul_words_return:
310 retl
311 mov %o5,%o0
312
313.L_bn_mul_words_tail:
314 mulx %o3,%g2,%g2
315 add %g2,%o5,%o4
316 dec %o2
317 srlx %o4,32,%o5
318 brz,pt %o2,.L_bn_mul_words_return
319 stuw %o4,[%o0]
320
321 lduw [%o1+4],%g2
322 mulx %o3,%g2,%g2
323 add %g2,%o5,%o4
324 dec %o2
325 srlx %o4,32,%o5
326 brz,pt %o2,.L_bn_mul_words_return
327 stuw %o4,[%o0+4]
328
329 lduw [%o1+8],%g2
330 mulx %o3,%g2,%g2
331 add %g2,%o5,%o4
332 stuw %o4,[%o0+8]
333 retl
334 srlx %o4,32,%o0
335
336.type bn_mul_words,#function
337.size bn_mul_words,(.-bn_mul_words)
338
339.align 32
340.global bn_sqr_words
341/*
342 * void bn_sqr_words(r,a,n)
343 * BN_ULONG *r,*a;
344 * int n;
345 */
346bn_sqr_words:
347 brgz,a %o2,.L_bn_sqr_words_proceeed
348 lduw [%o1],%g2
349 retl
350 clr %o0
351
352.L_bn_sqr_words_proceeed:
353 andcc %o2,-4,%g0
354 nop
355 bz,pn %icc,.L_bn_sqr_words_tail
356 nop
357
358.L_bn_sqr_words_loop: ! wow! 32 aligned!
359 lduw [%o1+4],%g3
360 mulx %g2,%g2,%o4
361 stuw %o4,[%o0]
362 srlx %o4,32,%o5
363 stuw %o5,[%o0+4]
364 nop
365
366 lduw [%o1+8],%g2
367 mulx %g3,%g3,%o4
368 dec 4,%o2
369 stuw %o4,[%o0+8]
370 srlx %o4,32,%o5
371 stuw %o5,[%o0+12]
372
373 lduw [%o1+12],%g3
374 mulx %g2,%g2,%o4
375 srlx %o4,32,%o5
376 stuw %o4,[%o0+16]
377 inc 16,%o1
378 stuw %o5,[%o0+20]
379
380 mulx %g3,%g3,%o4
381 inc 32,%o0
382 stuw %o4,[%o0-8]
383 srlx %o4,32,%o5
384 andcc %o2,-4,%g2
385 stuw %o5,[%o0-4]
386 bnz,a,pt %icc,.L_bn_sqr_words_loop
387 lduw [%o1],%g2
388 nop
389
390 brnz,a,pn %o2,.L_bn_sqr_words_tail
391 lduw [%o1],%g2
392.L_bn_sqr_words_return:
393 retl
394 clr %o0
395
396.L_bn_sqr_words_tail:
397 mulx %g2,%g2,%o4
398 dec %o2
399 stuw %o4,[%o0]
400 srlx %o4,32,%o5
401 brz,pt %o2,.L_bn_sqr_words_return
402 stuw %o5,[%o0+4]
403
404 lduw [%o1+4],%g2
405 mulx %g2,%g2,%o4
406 dec %o2
407 stuw %o4,[%o0+8]
408 srlx %o4,32,%o5
409 brz,pt %o2,.L_bn_sqr_words_return
410 stuw %o5,[%o0+12]
411
412 lduw [%o1+8],%g2
413 mulx %g2,%g2,%o4
414 srlx %o4,32,%o5
415 stuw %o4,[%o0+16]
416 stuw %o5,[%o0+20]
417 retl
418 clr %o0
419
420.type bn_sqr_words,#function
421.size bn_sqr_words,(.-bn_sqr_words)
422
423.align 32
424.global bn_div_words
425/*
426 * BN_ULONG bn_div_words(h,l,d)
427 * BN_ULONG h,l,d;
428 */
429bn_div_words:
430 sllx %o0,32,%o0
431 or %o0,%o1,%o0
432 udivx %o0,%o2,%o0
433 retl
434 srl %o0,%g0,%o0 ! clruw %o0
435
436.type bn_div_words,#function
437.size bn_div_words,(.-bn_div_words)
438
439.align 32
440
441.global bn_add_words
442/*
443 * BN_ULONG bn_add_words(rp,ap,bp,n)
444 * BN_ULONG *rp,*ap,*bp;
445 * int n;
446 */
447bn_add_words:
448 brgz,a %o3,.L_bn_add_words_proceed
449 lduw [%o1],%o4
450 retl
451 clr %o0
452
453.L_bn_add_words_proceed:
454 andcc %o3,-4,%g0
455 bz,pn %icc,.L_bn_add_words_tail
456 addcc %g0,0,%g0 ! clear carry flag
457 nop
458
459.L_bn_add_words_loop: ! wow! 32 aligned!
460 dec 4,%o3
461 lduw [%o2],%o5
462 lduw [%o1+4],%g1
463 lduw [%o2+4],%g2
464 lduw [%o1+8],%g3
465 lduw [%o2+8],%g4
466 addccc %o5,%o4,%o5
467 stuw %o5,[%o0]
468
469 lduw [%o1+12],%o4
470 lduw [%o2+12],%o5
471 inc 16,%o1
472 addccc %g1,%g2,%g1
473 stuw %g1,[%o0+4]
474
475 inc 16,%o2
476 addccc %g3,%g4,%g3
477 stuw %g3,[%o0+8]
478
479 inc 16,%o0
480 addccc %o5,%o4,%o5
481 stuw %o5,[%o0-4]
482 and %o3,-4,%g1
483 brnz,a,pt %g1,.L_bn_add_words_loop
484 lduw [%o1],%o4
485
486 brnz,a,pn %o3,.L_bn_add_words_tail
487 lduw [%o1],%o4
488.L_bn_add_words_return:
489 clr %o0
490 retl
491 movcs %icc,1,%o0
492 nop
493
494.L_bn_add_words_tail:
495 lduw [%o2],%o5
496 dec %o3
497 addccc %o5,%o4,%o5
498 brz,pt %o3,.L_bn_add_words_return
499 stuw %o5,[%o0]
500
501 lduw [%o1+4],%o4
502 lduw [%o2+4],%o5
503 dec %o3
504 addccc %o5,%o4,%o5
505 brz,pt %o3,.L_bn_add_words_return
506 stuw %o5,[%o0+4]
507
508 lduw [%o1+8],%o4
509 lduw [%o2+8],%o5
510 addccc %o5,%o4,%o5
511 stuw %o5,[%o0+8]
512 clr %o0
513 retl
514 movcs %icc,1,%o0
515
516.type bn_add_words,#function
517.size bn_add_words,(.-bn_add_words)
518
519.global bn_sub_words
520/*
521 * BN_ULONG bn_sub_words(rp,ap,bp,n)
522 * BN_ULONG *rp,*ap,*bp;
523 * int n;
524 */
525bn_sub_words:
526 brgz,a %o3,.L_bn_sub_words_proceed
527 lduw [%o1],%o4
528 retl
529 clr %o0
530
531.L_bn_sub_words_proceed:
532 andcc %o3,-4,%g0
533 bz,pn %icc,.L_bn_sub_words_tail
534 addcc %g0,0,%g0 ! clear carry flag
535 nop
536
537.L_bn_sub_words_loop: ! wow! 32 aligned!
538 dec 4,%o3
539 lduw [%o2],%o5
540 lduw [%o1+4],%g1
541 lduw [%o2+4],%g2
542 lduw [%o1+8],%g3
543 lduw [%o2+8],%g4
544 subccc %o4,%o5,%o5
545 stuw %o5,[%o0]
546
547 lduw [%o1+12],%o4
548 lduw [%o2+12],%o5
549 inc 16,%o1
550 subccc %g1,%g2,%g2
551 stuw %g2,[%o0+4]
552
553 inc 16,%o2
554 subccc %g3,%g4,%g4
555 stuw %g4,[%o0+8]
556
557 inc 16,%o0
558 subccc %o4,%o5,%o5
559 stuw %o5,[%o0-4]
560 and %o3,-4,%g1
561 brnz,a,pt %g1,.L_bn_sub_words_loop
562 lduw [%o1],%o4
563
564 brnz,a,pn %o3,.L_bn_sub_words_tail
565 lduw [%o1],%o4
566.L_bn_sub_words_return:
567 clr %o0
568 retl
569 movcs %icc,1,%o0
570 nop
571
572.L_bn_sub_words_tail: ! wow! 32 aligned!
573 lduw [%o2],%o5
574 dec %o3
575 subccc %o4,%o5,%o5
576 brz,pt %o3,.L_bn_sub_words_return
577 stuw %o5,[%o0]
578
579 lduw [%o1+4],%o4
580 lduw [%o2+4],%o5
581 dec %o3
582 subccc %o4,%o5,%o5
583 brz,pt %o3,.L_bn_sub_words_return
584 stuw %o5,[%o0+4]
585
586 lduw [%o1+8],%o4
587 lduw [%o2+8],%o5
588 subccc %o4,%o5,%o5
589 stuw %o5,[%o0+8]
590 clr %o0
591 retl
592 movcs %icc,1,%o0
593
594.type bn_sub_words,#function
595.size bn_sub_words,(.-bn_sub_words)
596
597/*
598 * Code below depends on the fact that upper parts of the %l0-%l7
599 * and %i0-%i7 are zeroed by kernel after context switch. In
600 * previous versions this comment stated that "the trouble is that
601 * it's not feasible to implement the mumbo-jumbo in less V9
602 * instructions:-(" which apparently isn't true thanks to
603 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement
604 * results not from the shorter code, but from elimination of
605 * multicycle none-pairable 'rd %y,%rd' instructions.
606 *
607 * Andy.
608 */
609
610#define FRAME_SIZE -96
611
612/*
613 * Here is register usage map for *all* routines below.
614 */
615#define t_1 %o0
616#define t_2 %o1
617#define c_12 %o2
618#define c_3 %o3
619
620#define ap(I) [%i1+4*I]
621#define bp(I) [%i2+4*I]
622#define rp(I) [%i0+4*I]
623
624#define a_0 %l0
625#define a_1 %l1
626#define a_2 %l2
627#define a_3 %l3
628#define a_4 %l4
629#define a_5 %l5
630#define a_6 %l6
631#define a_7 %l7
632
633#define b_0 %i3
634#define b_1 %i4
635#define b_2 %i5
636#define b_3 %o4
637#define b_4 %o5
638#define b_5 %o7
639#define b_6 %g1
640#define b_7 %g4
641
642.align 32
643.global bn_mul_comba8
644/*
645 * void bn_mul_comba8(r,a,b)
646 * BN_ULONG *r,*a,*b;
647 */
648bn_mul_comba8:
649 save %sp,FRAME_SIZE,%sp
650 mov 1,t_2
651 lduw ap(0),a_0
652 sllx t_2,32,t_2
653 lduw bp(0),b_0 !=
654 lduw bp(1),b_1
655 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
656 srlx t_1,32,c_12
657 stuw t_1,rp(0) !=!r[0]=c1;
658
659 lduw ap(1),a_1
660 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
661 addcc c_12,t_1,c_12
662 clr c_3 !=
663 bcs,a %xcc,.+8
664 add c_3,t_2,c_3
665 lduw ap(2),a_2
666 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
667 addcc c_12,t_1,t_1
668 bcs,a %xcc,.+8
669 add c_3,t_2,c_3
670 srlx t_1,32,c_12 !=
671 stuw t_1,rp(1) !r[1]=c2;
672 or c_12,c_3,c_12
673
674 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
675 addcc c_12,t_1,c_12 !=
676 clr c_3
677 bcs,a %xcc,.+8
678 add c_3,t_2,c_3
679 lduw bp(2),b_2 !=
680 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
681 addcc c_12,t_1,c_12
682 bcs,a %xcc,.+8
683 add c_3,t_2,c_3 !=
684 lduw bp(3),b_3
685 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
686 addcc c_12,t_1,t_1
687 bcs,a %xcc,.+8 !=
688 add c_3,t_2,c_3
689 srlx t_1,32,c_12
690 stuw t_1,rp(2) !r[2]=c3;
691 or c_12,c_3,c_12 !=
692
693 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
694 addcc c_12,t_1,c_12
695 clr c_3
696 bcs,a %xcc,.+8 !=
697 add c_3,t_2,c_3
698 mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
699 addcc c_12,t_1,c_12
700 bcs,a %xcc,.+8 !=
701 add c_3,t_2,c_3
702 lduw ap(3),a_3
703 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
704 addcc c_12,t_1,c_12 !=
705 bcs,a %xcc,.+8
706 add c_3,t_2,c_3
707 lduw ap(4),a_4
708 mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!=
709 addcc c_12,t_1,t_1
710 bcs,a %xcc,.+8
711 add c_3,t_2,c_3
712 srlx t_1,32,c_12 !=
713 stuw t_1,rp(3) !r[3]=c1;
714 or c_12,c_3,c_12
715
716 mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
717 addcc c_12,t_1,c_12 !=
718 clr c_3
719 bcs,a %xcc,.+8
720 add c_3,t_2,c_3
721 mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1);
722 addcc c_12,t_1,c_12
723 bcs,a %xcc,.+8
724 add c_3,t_2,c_3
725 mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
726 addcc c_12,t_1,c_12
727 bcs,a %xcc,.+8
728 add c_3,t_2,c_3
729 lduw bp(4),b_4 !=
730 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
731 addcc c_12,t_1,c_12
732 bcs,a %xcc,.+8
733 add c_3,t_2,c_3 !=
734 lduw bp(5),b_5
735 mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1);
736 addcc c_12,t_1,t_1
737 bcs,a %xcc,.+8 !=
738 add c_3,t_2,c_3
739 srlx t_1,32,c_12
740 stuw t_1,rp(4) !r[4]=c2;
741 or c_12,c_3,c_12 !=
742
743 mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
744 addcc c_12,t_1,c_12
745 clr c_3
746 bcs,a %xcc,.+8 !=
747 add c_3,t_2,c_3
748 mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
749 addcc c_12,t_1,c_12
750 bcs,a %xcc,.+8 !=
751 add c_3,t_2,c_3
752 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
753 addcc c_12,t_1,c_12
754 bcs,a %xcc,.+8 !=
755 add c_3,t_2,c_3
756 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
757 addcc c_12,t_1,c_12
758 bcs,a %xcc,.+8 !=
759 add c_3,t_2,c_3
760 lduw ap(5),a_5
761 mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
762 addcc c_12,t_1,c_12 !=
763 bcs,a %xcc,.+8
764 add c_3,t_2,c_3
765 lduw ap(6),a_6
766 mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2);
767 addcc c_12,t_1,t_1
768 bcs,a %xcc,.+8
769 add c_3,t_2,c_3
770 srlx t_1,32,c_12 !=
771 stuw t_1,rp(5) !r[5]=c3;
772 or c_12,c_3,c_12
773
774 mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
775 addcc c_12,t_1,c_12 !=
776 clr c_3
777 bcs,a %xcc,.+8
778 add c_3,t_2,c_3
779 mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
780 addcc c_12,t_1,c_12
781 bcs,a %xcc,.+8
782 add c_3,t_2,c_3
783 mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3);
784 addcc c_12,t_1,c_12
785 bcs,a %xcc,.+8
786 add c_3,t_2,c_3
787 mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3);
788 addcc c_12,t_1,c_12
789 bcs,a %xcc,.+8
790 add c_3,t_2,c_3
791 mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3);
792 addcc c_12,t_1,c_12
793 bcs,a %xcc,.+8
794 add c_3,t_2,c_3
795 lduw bp(6),b_6 !=
796 mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
797 addcc c_12,t_1,c_12
798 bcs,a %xcc,.+8
799 add c_3,t_2,c_3 !=
800 lduw bp(7),b_7
801 mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
802 addcc c_12,t_1,t_1
803 bcs,a %xcc,.+8 !=
804 add c_3,t_2,c_3
805 srlx t_1,32,c_12
806 stuw t_1,rp(6) !r[6]=c1;
807 or c_12,c_3,c_12 !=
808
809 mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
810 addcc c_12,t_1,c_12
811 clr c_3
812 bcs,a %xcc,.+8 !=
813 add c_3,t_2,c_3
814 mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
815 addcc c_12,t_1,c_12
816 bcs,a %xcc,.+8 !=
817 add c_3,t_2,c_3
818 mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
819 addcc c_12,t_1,c_12
820 bcs,a %xcc,.+8 !=
821 add c_3,t_2,c_3
822 mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1);
823 addcc c_12,t_1,c_12
824 bcs,a %xcc,.+8 !=
825 add c_3,t_2,c_3
826 mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
827 addcc c_12,t_1,c_12
828 bcs,a %xcc,.+8 !=
829 add c_3,t_2,c_3
830 mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
831 addcc c_12,t_1,c_12
832 bcs,a %xcc,.+8 !=
833 add c_3,t_2,c_3
834 lduw ap(7),a_7
835 mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
836 addcc c_12,t_1,c_12
837 bcs,a %xcc,.+8
838 add c_3,t_2,c_3
839 mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1);
840 addcc c_12,t_1,t_1
841 bcs,a %xcc,.+8
842 add c_3,t_2,c_3
843 srlx t_1,32,c_12 !=
844 stuw t_1,rp(7) !r[7]=c2;
845 or c_12,c_3,c_12
846
847 mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2);
848 addcc c_12,t_1,c_12
849 clr c_3
850 bcs,a %xcc,.+8
851 add c_3,t_2,c_3 !=
852 mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2);
853 addcc c_12,t_1,c_12
854 bcs,a %xcc,.+8
855 add c_3,t_2,c_3 !=
856 mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
857 addcc c_12,t_1,c_12
858 bcs,a %xcc,.+8
859 add c_3,t_2,c_3 !=
860 mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
861 addcc c_12,t_1,c_12
862 bcs,a %xcc,.+8
863 add c_3,t_2,c_3 !=
864 mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
865 addcc c_12,t_1,c_12
866 bcs,a %xcc,.+8
867 add c_3,t_2,c_3 !=
868 mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2);
869 addcc c_12,t_1,c_12
870 bcs,a %xcc,.+8
871 add c_3,t_2,c_3 !=
872 mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
873 addcc c_12,t_1,t_1
874 bcs,a %xcc,.+8
875 add c_3,t_2,c_3 !=
876 srlx t_1,32,c_12
877 stuw t_1,rp(8) !r[8]=c3;
878 or c_12,c_3,c_12
879
880 mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3);
881 addcc c_12,t_1,c_12
882 clr c_3
883 bcs,a %xcc,.+8
884 add c_3,t_2,c_3 !=
885 mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3);
886 addcc c_12,t_1,c_12
887 bcs,a %xcc,.+8 !=
888 add c_3,t_2,c_3
889 mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
890 addcc c_12,t_1,c_12
891 bcs,a %xcc,.+8 !=
892 add c_3,t_2,c_3
893 mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
894 addcc c_12,t_1,c_12
895 bcs,a %xcc,.+8 !=
896 add c_3,t_2,c_3
897 mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
898 addcc c_12,t_1,c_12
899 bcs,a %xcc,.+8 !=
900 add c_3,t_2,c_3
901 mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3);
902 addcc c_12,t_1,t_1
903 bcs,a %xcc,.+8 !=
904 add c_3,t_2,c_3
905 srlx t_1,32,c_12
906 stuw t_1,rp(9) !r[9]=c1;
907 or c_12,c_3,c_12 !=
908
909 mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
910 addcc c_12,t_1,c_12
911 clr c_3
912 bcs,a %xcc,.+8 !=
913 add c_3,t_2,c_3
914 mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
915 addcc c_12,t_1,c_12
916 bcs,a %xcc,.+8 !=
917 add c_3,t_2,c_3
918 mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1);
919 addcc c_12,t_1,c_12
920 bcs,a %xcc,.+8 !=
921 add c_3,t_2,c_3
922 mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
923 addcc c_12,t_1,c_12
924 bcs,a %xcc,.+8 !=
925 add c_3,t_2,c_3
926 mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
927 addcc c_12,t_1,t_1
928 bcs,a %xcc,.+8 !=
929 add c_3,t_2,c_3
930 srlx t_1,32,c_12
931 stuw t_1,rp(10) !r[10]=c2;
932 or c_12,c_3,c_12 !=
933
934 mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2);
935 addcc c_12,t_1,c_12
936 clr c_3
937 bcs,a %xcc,.+8 !=
938 add c_3,t_2,c_3
939 mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
940 addcc c_12,t_1,c_12
941 bcs,a %xcc,.+8 !=
942 add c_3,t_2,c_3
943 mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
944 addcc c_12,t_1,c_12
945 bcs,a %xcc,.+8 !=
946 add c_3,t_2,c_3
947 mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
948 addcc c_12,t_1,t_1
949 bcs,a %xcc,.+8 !=
950 add c_3,t_2,c_3
951 srlx t_1,32,c_12
952 stuw t_1,rp(11) !r[11]=c3;
953 or c_12,c_3,c_12 !=
954
955 mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
956 addcc c_12,t_1,c_12
957 clr c_3
958 bcs,a %xcc,.+8 !=
959 add c_3,t_2,c_3
960 mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
961 addcc c_12,t_1,c_12
962 bcs,a %xcc,.+8 !=
963 add c_3,t_2,c_3
964 mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
965 addcc c_12,t_1,t_1
966 bcs,a %xcc,.+8 !=
967 add c_3,t_2,c_3
968 srlx t_1,32,c_12
969 stuw t_1,rp(12) !r[12]=c1;
970 or c_12,c_3,c_12 !=
971
972 mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
973 addcc c_12,t_1,c_12
974 clr c_3
975 bcs,a %xcc,.+8 !=
976 add c_3,t_2,c_3
977 mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
978 addcc c_12,t_1,t_1
979 bcs,a %xcc,.+8 !=
980 add c_3,t_2,c_3
981 srlx t_1,32,c_12
982 st t_1,rp(13) !r[13]=c2;
983 or c_12,c_3,c_12 !=
984
985 mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2);
986 addcc c_12,t_1,t_1
987 srlx t_1,32,c_12 !=
988 stuw t_1,rp(14) !r[14]=c3;
989 stuw c_12,rp(15) !r[15]=c1;
990
991 ret
992 restore %g0,%g0,%o0 !=
993
994.type bn_mul_comba8,#function
995.size bn_mul_comba8,(.-bn_mul_comba8)
996
997.align 32
998
999.global bn_mul_comba4
1000/*
1001 * void bn_mul_comba4(r,a,b)
1002 * BN_ULONG *r,*a,*b;
1003 */
1004bn_mul_comba4:
1005 save %sp,FRAME_SIZE,%sp
1006 lduw ap(0),a_0
1007 mov 1,t_2
1008 lduw bp(0),b_0
1009 sllx t_2,32,t_2 !=
1010 lduw bp(1),b_1
1011 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
1012 srlx t_1,32,c_12
1013 stuw t_1,rp(0) !=!r[0]=c1;
1014
1015 lduw ap(1),a_1
1016 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
1017 addcc c_12,t_1,c_12
1018 clr c_3 !=
1019 bcs,a %xcc,.+8
1020 add c_3,t_2,c_3
1021 lduw ap(2),a_2
1022 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
1023 addcc c_12,t_1,t_1
1024 bcs,a %xcc,.+8
1025 add c_3,t_2,c_3
1026 srlx t_1,32,c_12 !=
1027 stuw t_1,rp(1) !r[1]=c2;
1028 or c_12,c_3,c_12
1029
1030 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1031 addcc c_12,t_1,c_12 !=
1032 clr c_3
1033 bcs,a %xcc,.+8
1034 add c_3,t_2,c_3
1035 lduw bp(2),b_2 !=
1036 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
1037 addcc c_12,t_1,c_12
1038 bcs,a %xcc,.+8
1039 add c_3,t_2,c_3 !=
1040 lduw bp(3),b_3
1041 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1042 addcc c_12,t_1,t_1
1043 bcs,a %xcc,.+8 !=
1044 add c_3,t_2,c_3
1045 srlx t_1,32,c_12
1046 stuw t_1,rp(2) !r[2]=c3;
1047 or c_12,c_3,c_12 !=
1048
1049 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
1050 addcc c_12,t_1,c_12
1051 clr c_3
1052 bcs,a %xcc,.+8 !=
1053 add c_3,t_2,c_3
1054 mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1055 addcc c_12,t_1,c_12
1056 bcs,a %xcc,.+8 !=
1057 add c_3,t_2,c_3
1058 lduw ap(3),a_3
1059 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1060 addcc c_12,t_1,c_12 !=
1061 bcs,a %xcc,.+8
1062 add c_3,t_2,c_3
1063 mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
1064 addcc c_12,t_1,t_1 !=
1065 bcs,a %xcc,.+8
1066 add c_3,t_2,c_3
1067 srlx t_1,32,c_12
1068 stuw t_1,rp(3) !=!r[3]=c1;
1069 or c_12,c_3,c_12
1070
1071 mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1072 addcc c_12,t_1,c_12
1073 clr c_3 !=
1074 bcs,a %xcc,.+8
1075 add c_3,t_2,c_3
1076 mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1077 addcc c_12,t_1,c_12 !=
1078 bcs,a %xcc,.+8
1079 add c_3,t_2,c_3
1080 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
1081 addcc c_12,t_1,t_1 !=
1082 bcs,a %xcc,.+8
1083 add c_3,t_2,c_3
1084 srlx t_1,32,c_12
1085 stuw t_1,rp(4) !=!r[4]=c2;
1086 or c_12,c_3,c_12
1087
1088 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1089 addcc c_12,t_1,c_12
1090 clr c_3 !=
1091 bcs,a %xcc,.+8
1092 add c_3,t_2,c_3
1093 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1094 addcc c_12,t_1,t_1 !=
1095 bcs,a %xcc,.+8
1096 add c_3,t_2,c_3
1097 srlx t_1,32,c_12
1098 stuw t_1,rp(5) !=!r[5]=c3;
1099 or c_12,c_3,c_12
1100
1101 mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1102 addcc c_12,t_1,t_1
1103 srlx t_1,32,c_12 !=
1104 stuw t_1,rp(6) !r[6]=c1;
1105 stuw c_12,rp(7) !r[7]=c2;
1106
1107 ret
1108 restore %g0,%g0,%o0
1109
1110.type bn_mul_comba4,#function
1111.size bn_mul_comba4,(.-bn_mul_comba4)
1112
1113.align 32
1114
1115.global bn_sqr_comba8
1116bn_sqr_comba8:
1117 save %sp,FRAME_SIZE,%sp
1118 mov 1,t_2
1119 lduw ap(0),a_0
1120 sllx t_2,32,t_2
1121 lduw ap(1),a_1
1122 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1123 srlx t_1,32,c_12
1124 stuw t_1,rp(0) !r[0]=c1;
1125
1126 lduw ap(2),a_2
1127 mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1128 addcc c_12,t_1,c_12
1129 clr c_3
1130 bcs,a %xcc,.+8
1131 add c_3,t_2,c_3
1132 addcc c_12,t_1,t_1
1133 bcs,a %xcc,.+8
1134 add c_3,t_2,c_3
1135 srlx t_1,32,c_12
1136 stuw t_1,rp(1) !r[1]=c2;
1137 or c_12,c_3,c_12
1138
1139 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1140 addcc c_12,t_1,c_12
1141 clr c_3
1142 bcs,a %xcc,.+8
1143 add c_3,t_2,c_3
1144 addcc c_12,t_1,c_12
1145 bcs,a %xcc,.+8
1146 add c_3,t_2,c_3
1147 lduw ap(3),a_3
1148 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1149 addcc c_12,t_1,t_1
1150 bcs,a %xcc,.+8
1151 add c_3,t_2,c_3
1152 srlx t_1,32,c_12
1153 stuw t_1,rp(2) !r[2]=c3;
1154 or c_12,c_3,c_12
1155
1156 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1157 addcc c_12,t_1,c_12
1158 clr c_3
1159 bcs,a %xcc,.+8
1160 add c_3,t_2,c_3
1161 addcc c_12,t_1,c_12
1162 bcs,a %xcc,.+8
1163 add c_3,t_2,c_3
1164 lduw ap(4),a_4
1165 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1166 addcc c_12,t_1,c_12
1167 bcs,a %xcc,.+8
1168 add c_3,t_2,c_3
1169 addcc c_12,t_1,t_1
1170 bcs,a %xcc,.+8
1171 add c_3,t_2,c_3
1172 srlx t_1,32,c_12
1173 st t_1,rp(3) !r[3]=c1;
1174 or c_12,c_3,c_12
1175
1176 mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1177 addcc c_12,t_1,c_12
1178 clr c_3
1179 bcs,a %xcc,.+8
1180 add c_3,t_2,c_3
1181 addcc c_12,t_1,c_12
1182 bcs,a %xcc,.+8
1183 add c_3,t_2,c_3
1184 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1185 addcc c_12,t_1,c_12
1186 bcs,a %xcc,.+8
1187 add c_3,t_2,c_3
1188 addcc c_12,t_1,c_12
1189 bcs,a %xcc,.+8
1190 add c_3,t_2,c_3
1191 lduw ap(5),a_5
1192 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1193 addcc c_12,t_1,t_1
1194 bcs,a %xcc,.+8
1195 add c_3,t_2,c_3
1196 srlx t_1,32,c_12
1197 stuw t_1,rp(4) !r[4]=c2;
1198 or c_12,c_3,c_12
1199
1200 mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1201 addcc c_12,t_1,c_12
1202 clr c_3
1203 bcs,a %xcc,.+8
1204 add c_3,t_2,c_3
1205 addcc c_12,t_1,c_12
1206 bcs,a %xcc,.+8
1207 add c_3,t_2,c_3
1208 mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1209 addcc c_12,t_1,c_12
1210 bcs,a %xcc,.+8
1211 add c_3,t_2,c_3
1212 addcc c_12,t_1,c_12
1213 bcs,a %xcc,.+8
1214 add c_3,t_2,c_3
1215 lduw ap(6),a_6
1216 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1217 addcc c_12,t_1,c_12
1218 bcs,a %xcc,.+8
1219 add c_3,t_2,c_3
1220 addcc c_12,t_1,t_1
1221 bcs,a %xcc,.+8
1222 add c_3,t_2,c_3
1223 srlx t_1,32,c_12
1224 stuw t_1,rp(5) !r[5]=c3;
1225 or c_12,c_3,c_12
1226
1227 mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1228 addcc c_12,t_1,c_12
1229 clr c_3
1230 bcs,a %xcc,.+8
1231 add c_3,t_2,c_3
1232 addcc c_12,t_1,c_12
1233 bcs,a %xcc,.+8
1234 add c_3,t_2,c_3
1235 mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1236 addcc c_12,t_1,c_12
1237 bcs,a %xcc,.+8
1238 add c_3,t_2,c_3
1239 addcc c_12,t_1,c_12
1240 bcs,a %xcc,.+8
1241 add c_3,t_2,c_3
1242 mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1243 addcc c_12,t_1,c_12
1244 bcs,a %xcc,.+8
1245 add c_3,t_2,c_3
1246 addcc c_12,t_1,c_12
1247 bcs,a %xcc,.+8
1248 add c_3,t_2,c_3
1249 lduw ap(7),a_7
1250 mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1251 addcc c_12,t_1,t_1
1252 bcs,a %xcc,.+8
1253 add c_3,t_2,c_3
1254 srlx t_1,32,c_12
1255 stuw t_1,rp(6) !r[6]=c1;
1256 or c_12,c_3,c_12
1257
1258 mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1259 addcc c_12,t_1,c_12
1260 clr c_3
1261 bcs,a %xcc,.+8
1262 add c_3,t_2,c_3
1263 addcc c_12,t_1,c_12
1264 bcs,a %xcc,.+8
1265 add c_3,t_2,c_3
1266 mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1267 addcc c_12,t_1,c_12
1268 bcs,a %xcc,.+8
1269 add c_3,t_2,c_3
1270 addcc c_12,t_1,c_12
1271 bcs,a %xcc,.+8
1272 add c_3,t_2,c_3
1273 mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1274 addcc c_12,t_1,c_12
1275 bcs,a %xcc,.+8
1276 add c_3,t_2,c_3
1277 addcc c_12,t_1,c_12
1278 bcs,a %xcc,.+8
1279 add c_3,t_2,c_3
1280 mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1281 addcc c_12,t_1,c_12
1282 bcs,a %xcc,.+8
1283 add c_3,t_2,c_3
1284 addcc c_12,t_1,t_1
1285 bcs,a %xcc,.+8
1286 add c_3,t_2,c_3
1287 srlx t_1,32,c_12
1288 stuw t_1,rp(7) !r[7]=c2;
1289 or c_12,c_3,c_12
1290
1291 mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1292 addcc c_12,t_1,c_12
1293 clr c_3
1294 bcs,a %xcc,.+8
1295 add c_3,t_2,c_3
1296 addcc c_12,t_1,c_12
1297 bcs,a %xcc,.+8
1298 add c_3,t_2,c_3
1299 mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1300 addcc c_12,t_1,c_12
1301 bcs,a %xcc,.+8
1302 add c_3,t_2,c_3
1303 addcc c_12,t_1,c_12
1304 bcs,a %xcc,.+8
1305 add c_3,t_2,c_3
1306 mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1307 addcc c_12,t_1,c_12
1308 bcs,a %xcc,.+8
1309 add c_3,t_2,c_3
1310 addcc c_12,t_1,c_12
1311 bcs,a %xcc,.+8
1312 add c_3,t_2,c_3
1313 mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1314 addcc c_12,t_1,t_1
1315 bcs,a %xcc,.+8
1316 add c_3,t_2,c_3
1317 srlx t_1,32,c_12
1318 stuw t_1,rp(8) !r[8]=c3;
1319 or c_12,c_3,c_12
1320
1321 mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1322 addcc c_12,t_1,c_12
1323 clr c_3
1324 bcs,a %xcc,.+8
1325 add c_3,t_2,c_3
1326 addcc c_12,t_1,c_12
1327 bcs,a %xcc,.+8
1328 add c_3,t_2,c_3
1329 mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1330 addcc c_12,t_1,c_12
1331 bcs,a %xcc,.+8
1332 add c_3,t_2,c_3
1333 addcc c_12,t_1,c_12
1334 bcs,a %xcc,.+8
1335 add c_3,t_2,c_3
1336 mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1337 addcc c_12,t_1,c_12
1338 bcs,a %xcc,.+8
1339 add c_3,t_2,c_3
1340 addcc c_12,t_1,t_1
1341 bcs,a %xcc,.+8
1342 add c_3,t_2,c_3
1343 srlx t_1,32,c_12
1344 stuw t_1,rp(9) !r[9]=c1;
1345 or c_12,c_3,c_12
1346
1347 mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1348 addcc c_12,t_1,c_12
1349 clr c_3
1350 bcs,a %xcc,.+8
1351 add c_3,t_2,c_3
1352 addcc c_12,t_1,c_12
1353 bcs,a %xcc,.+8
1354 add c_3,t_2,c_3
1355 mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1356 addcc c_12,t_1,c_12
1357 bcs,a %xcc,.+8
1358 add c_3,t_2,c_3
1359 addcc c_12,t_1,c_12
1360 bcs,a %xcc,.+8
1361 add c_3,t_2,c_3
1362 mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1363 addcc c_12,t_1,t_1
1364 bcs,a %xcc,.+8
1365 add c_3,t_2,c_3
1366 srlx t_1,32,c_12
1367 stuw t_1,rp(10) !r[10]=c2;
1368 or c_12,c_3,c_12
1369
1370 mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2);
1371 addcc c_12,t_1,c_12
1372 clr c_3
1373 bcs,a %xcc,.+8
1374 add c_3,t_2,c_3
1375 addcc c_12,t_1,c_12
1376 bcs,a %xcc,.+8
1377 add c_3,t_2,c_3
1378 mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2);
1379 addcc c_12,t_1,c_12
1380 bcs,a %xcc,.+8
1381 add c_3,t_2,c_3
1382 addcc c_12,t_1,t_1
1383 bcs,a %xcc,.+8
1384 add c_3,t_2,c_3
1385 srlx t_1,32,c_12
1386 stuw t_1,rp(11) !r[11]=c3;
1387 or c_12,c_3,c_12
1388
1389 mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1390 addcc c_12,t_1,c_12
1391 clr c_3
1392 bcs,a %xcc,.+8
1393 add c_3,t_2,c_3
1394 addcc c_12,t_1,c_12
1395 bcs,a %xcc,.+8
1396 add c_3,t_2,c_3
1397 mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1398 addcc c_12,t_1,t_1
1399 bcs,a %xcc,.+8
1400 add c_3,t_2,c_3
1401 srlx t_1,32,c_12
1402 stuw t_1,rp(12) !r[12]=c1;
1403 or c_12,c_3,c_12
1404
1405 mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1406 addcc c_12,t_1,c_12
1407 clr c_3
1408 bcs,a %xcc,.+8
1409 add c_3,t_2,c_3
1410 addcc c_12,t_1,t_1
1411 bcs,a %xcc,.+8
1412 add c_3,t_2,c_3
1413 srlx t_1,32,c_12
1414 stuw t_1,rp(13) !r[13]=c2;
1415 or c_12,c_3,c_12
1416
1417 mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1418 addcc c_12,t_1,t_1
1419 srlx t_1,32,c_12
1420 stuw t_1,rp(14) !r[14]=c3;
1421 stuw c_12,rp(15) !r[15]=c1;
1422
1423 ret
1424 restore %g0,%g0,%o0
1425
1426.type bn_sqr_comba8,#function
1427.size bn_sqr_comba8,(.-bn_sqr_comba8)
1428
1429.align 32
1430
1431.global bn_sqr_comba4
1432/*
1433 * void bn_sqr_comba4(r,a)
1434 * BN_ULONG *r,*a;
1435 */
1436bn_sqr_comba4:
1437 save %sp,FRAME_SIZE,%sp
1438 mov 1,t_2
1439 lduw ap(0),a_0
1440 sllx t_2,32,t_2
1441 lduw ap(1),a_1
1442 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1443 srlx t_1,32,c_12
1444 stuw t_1,rp(0) !r[0]=c1;
1445
1446 lduw ap(2),a_2
1447 mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1);
1448 addcc c_12,t_1,c_12
1449 clr c_3
1450 bcs,a %xcc,.+8
1451 add c_3,t_2,c_3
1452 addcc c_12,t_1,t_1
1453 bcs,a %xcc,.+8
1454 add c_3,t_2,c_3
1455 srlx t_1,32,c_12
1456 stuw t_1,rp(1) !r[1]=c2;
1457 or c_12,c_3,c_12
1458
1459 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1460 addcc c_12,t_1,c_12
1461 clr c_3
1462 bcs,a %xcc,.+8
1463 add c_3,t_2,c_3
1464 addcc c_12,t_1,c_12
1465 bcs,a %xcc,.+8
1466 add c_3,t_2,c_3
1467 lduw ap(3),a_3
1468 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1469 addcc c_12,t_1,t_1
1470 bcs,a %xcc,.+8
1471 add c_3,t_2,c_3
1472 srlx t_1,32,c_12
1473 stuw t_1,rp(2) !r[2]=c3;
1474 or c_12,c_3,c_12
1475
1476 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1477 addcc c_12,t_1,c_12
1478 clr c_3
1479 bcs,a %xcc,.+8
1480 add c_3,t_2,c_3
1481 addcc c_12,t_1,c_12
1482 bcs,a %xcc,.+8
1483 add c_3,t_2,c_3
1484 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1485 addcc c_12,t_1,c_12
1486 bcs,a %xcc,.+8
1487 add c_3,t_2,c_3
1488 addcc c_12,t_1,t_1
1489 bcs,a %xcc,.+8
1490 add c_3,t_2,c_3
1491 srlx t_1,32,c_12
1492 stuw t_1,rp(3) !r[3]=c1;
1493 or c_12,c_3,c_12
1494
1495 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1496 addcc c_12,t_1,c_12
1497 clr c_3
1498 bcs,a %xcc,.+8
1499 add c_3,t_2,c_3
1500 addcc c_12,t_1,c_12
1501 bcs,a %xcc,.+8
1502 add c_3,t_2,c_3
1503 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1504 addcc c_12,t_1,t_1
1505 bcs,a %xcc,.+8
1506 add c_3,t_2,c_3
1507 srlx t_1,32,c_12
1508 stuw t_1,rp(4) !r[4]=c2;
1509 or c_12,c_3,c_12
1510
1511 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1512 addcc c_12,t_1,c_12
1513 clr c_3
1514 bcs,a %xcc,.+8
1515 add c_3,t_2,c_3
1516 addcc c_12,t_1,t_1
1517 bcs,a %xcc,.+8
1518 add c_3,t_2,c_3
1519 srlx t_1,32,c_12
1520 stuw t_1,rp(5) !r[5]=c3;
1521 or c_12,c_3,c_12
1522
1523 mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1524 addcc c_12,t_1,t_1
1525 srlx t_1,32,c_12
1526 stuw t_1,rp(6) !r[6]=c1;
1527 stuw c_12,rp(7) !r[7]=c2;
1528
1529 ret
1530 restore %g0,%g0,%o0
1531
1532.type bn_sqr_comba4,#function
1533.size bn_sqr_comba4,(.-bn_sqr_comba4)
1534
1535.align 32
diff --git a/src/lib/libcrypto/bn/asm/x86.pl b/src/lib/libcrypto/bn/asm/x86.pl
deleted file mode 100644
index 1bc4f1bb27..0000000000
--- a/src/lib/libcrypto/bn/asm/x86.pl
+++ /dev/null
@@ -1,28 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6require("x86/mul_add.pl");
7require("x86/mul.pl");
8require("x86/sqr.pl");
9require("x86/div.pl");
10require("x86/add.pl");
11require("x86/sub.pl");
12require("x86/comba.pl");
13
14&asm_init($ARGV[0],$0);
15
16&bn_mul_add_words("bn_mul_add_words");
17&bn_mul_words("bn_mul_words");
18&bn_sqr_words("bn_sqr_words");
19&bn_div_words("bn_div_words");
20&bn_add_words("bn_add_words");
21&bn_sub_words("bn_sub_words");
22&bn_mul_comba("bn_mul_comba8",8);
23&bn_mul_comba("bn_mul_comba4",4);
24&bn_sqr_comba("bn_sqr_comba8",8);
25&bn_sqr_comba("bn_sqr_comba4",4);
26
27&asm_finish();
28
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl
deleted file mode 100644
index 0b5cf583e3..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/add.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &add($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &add($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &add($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &add($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
deleted file mode 100644
index 2291253629..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/comba.pl
+++ /dev/null
@@ -1,277 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub mul_add_c
5 {
6 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7
8 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
9 # words, and 1 if load return value
10
11 &comment("mul a[$ai]*b[$bi]");
12
13 # "eax" and "edx" will always be pre-loaded.
14 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
15 # &mov("edx",&DWP($bi*4,$b,"",0));
16
17 &mul("edx");
18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ###
22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
25 ###
26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
30 }
31
32sub sqr_add_c
33 {
34 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
35
36 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
37 # words, and 1 if load return value
38
39 &comment("sqr a[$ai]*a[$bi]");
40
41 # "eax" and "edx" will always be pre-loaded.
42 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
43 # &mov("edx",&DWP($bi*4,$b,"",0));
44
45 if ($ai == $bi)
46 { &mul("eax");}
47 else
48 { &mul("edx");}
49 &add($c0,"eax");
50 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
51 ###
52 &adc($c1,"edx");
53 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
54 ###
55 &adc($c2,0);
56 # is pos > 1, it means it is the last loop
57 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
58 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
59 }
60
61sub sqr_add_c2
62 {
63 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
64
65 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
66 # words, and 1 if load return value
67
68 &comment("sqr a[$ai]*a[$bi]");
69
70 # "eax" and "edx" will always be pre-loaded.
71 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
72 # &mov("edx",&DWP($bi*4,$a,"",0));
73
74 if ($ai == $bi)
75 { &mul("eax");}
76 else
77 { &mul("edx");}
78 &add("eax","eax");
79 ###
80 &adc("edx","edx");
81 ###
82 &adc($c2,0);
83 &add($c0,"eax");
84 &adc($c1,"edx");
85 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
86 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
87 &adc($c2,0);
88 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
89 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
90 ###
91 }
92
93sub bn_mul_comba
94 {
95 local($name,$num)=@_;
96 local($a,$b,$c0,$c1,$c2);
97 local($i,$as,$ae,$bs,$be,$ai,$bi);
98 local($tot,$end);
99
100 &function_begin_B($name,"");
101
102 $c0="ebx";
103 $c1="ecx";
104 $c2="ebp";
105 $a="esi";
106 $b="edi";
107
108 $as=0;
109 $ae=0;
110 $bs=0;
111 $be=0;
112 $tot=$num+$num-1;
113
114 &push("esi");
115 &mov($a,&wparam(1));
116 &push("edi");
117 &mov($b,&wparam(2));
118 &push("ebp");
119 &push("ebx");
120
121 &xor($c0,$c0);
122 &mov("eax",&DWP(0,$a,"",0)); # load the first word
123 &xor($c1,$c1);
124 &mov("edx",&DWP(0,$b,"",0)); # load the first second
125
126 for ($i=0; $i<$tot; $i++)
127 {
128 $ai=$as;
129 $bi=$bs;
130 $end=$be+1;
131
132 &comment("################## Calculate word $i");
133
134 for ($j=$bs; $j<$end; $j++)
135 {
136 &xor($c2,$c2) if ($j == $bs);
137 if (($j+1) == $end)
138 {
139 $v=1;
140 $v=2 if (($i+1) == $tot);
141 }
142 else
143 { $v=0; }
144 if (($j+1) != $end)
145 {
146 $na=($ai-1);
147 $nb=($bi+1);
148 }
149 else
150 {
151 $na=$as+($i < ($num-1));
152 $nb=$bs+($i >= ($num-1));
153 }
154#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
155 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
156 if ($v)
157 {
158 &comment("saved r[$i]");
159 # &mov("eax",&wparam(0));
160 # &mov(&DWP($i*4,"eax","",0),$c0);
161 ($c0,$c1,$c2)=($c1,$c2,$c0);
162 }
163 $ai--;
164 $bi++;
165 }
166 $as++ if ($i < ($num-1));
167 $ae++ if ($i >= ($num-1));
168
169 $bs++ if ($i >= ($num-1));
170 $be++ if ($i < ($num-1));
171 }
172 &comment("save r[$i]");
173 # &mov("eax",&wparam(0));
174 &mov(&DWP($i*4,"eax","",0),$c0);
175
176 &pop("ebx");
177 &pop("ebp");
178 &pop("edi");
179 &pop("esi");
180 &ret();
181 &function_end_B($name);
182 }
183
184sub bn_sqr_comba
185 {
186 local($name,$num)=@_;
187 local($r,$a,$c0,$c1,$c2)=@_;
188 local($i,$as,$ae,$bs,$be,$ai,$bi);
189 local($b,$tot,$end,$half);
190
191 &function_begin_B($name,"");
192
193 $c0="ebx";
194 $c1="ecx";
195 $c2="ebp";
196 $a="esi";
197 $r="edi";
198
199 &push("esi");
200 &push("edi");
201 &push("ebp");
202 &push("ebx");
203 &mov($r,&wparam(0));
204 &mov($a,&wparam(1));
205 &xor($c0,$c0);
206 &xor($c1,$c1);
207 &mov("eax",&DWP(0,$a,"",0)); # load the first word
208
209 $as=0;
210 $ae=0;
211 $bs=0;
212 $be=0;
213 $tot=$num+$num-1;
214
215 for ($i=0; $i<$tot; $i++)
216 {
217 $ai=$as;
218 $bi=$bs;
219 $end=$be+1;
220
221 &comment("############### Calculate word $i");
222 for ($j=$bs; $j<$end; $j++)
223 {
224 &xor($c2,$c2) if ($j == $bs);
225 if (($ai-1) < ($bi+1))
226 {
227 $v=1;
228 $v=2 if ($i+1) == $tot;
229 }
230 else
231 { $v=0; }
232 if (!$v)
233 {
234 $na=$ai-1;
235 $nb=$bi+1;
236 }
237 else
238 {
239 $na=$as+($i < ($num-1));
240 $nb=$bs+($i >= ($num-1));
241 }
242 if ($ai == $bi)
243 {
244 &sqr_add_c($r,$a,$ai,$bi,
245 $c0,$c1,$c2,$v,$i,$na,$nb);
246 }
247 else
248 {
249 &sqr_add_c2($r,$a,$ai,$bi,
250 $c0,$c1,$c2,$v,$i,$na,$nb);
251 }
252 if ($v)
253 {
254 &comment("saved r[$i]");
255 #&mov(&DWP($i*4,$r,"",0),$c0);
256 ($c0,$c1,$c2)=($c1,$c2,$c0);
257 last;
258 }
259 $ai--;
260 $bi++;
261 }
262 $as++ if ($i < ($num-1));
263 $ae++ if ($i >= ($num-1));
264
265 $bs++ if ($i >= ($num-1));
266 $be++ if ($i < ($num-1));
267 }
268 &mov(&DWP($i*4,$r,"",0),$c0);
269 &pop("ebx");
270 &pop("ebp");
271 &pop("edi");
272 &pop("esi");
273 &ret();
274 &function_end_B($name);
275 }
276
2771;
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl
deleted file mode 100644
index 0e90152caa..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/div.pl
+++ /dev/null
@@ -1,15 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_div_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9 &mov("edx",&wparam(0)); #
10 &mov("eax",&wparam(1)); #
11 &mov("ebx",&wparam(2)); #
12 &div("ebx");
13 &function_end($name);
14 }
151;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl
deleted file mode 100644
index 674cb9b055..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul.pl
+++ /dev/null
@@ -1,77 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ecx";
15 $r="edi";
16 $c="esi";
17 $num="ebp";
18
19 &xor($c,$c); # clear carry
20 &mov($r,&wparam(0)); #
21 &mov($a,&wparam(1)); #
22 &mov($num,&wparam(2)); #
23 &mov($w,&wparam(3)); #
24
25 &and($num,0xfffffff8); # num / 8
26 &jz(&label("mw_finish"));
27
28 &set_label("mw_loop",0);
29 for ($i=0; $i<32; $i+=4)
30 {
31 &comment("Round $i");
32
33 &mov("eax",&DWP($i,$a,"",0)); # *a
34 &mul($w); # *a * w
35 &add("eax",$c); # L(t)+=c
36 # XXX
37
38 &adc("edx",0); # H(t)+=carry
39 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
40
41 &mov($c,"edx"); # c= H(t);
42 }
43
44 &comment("");
45 &add($a,32);
46 &add($r,32);
47 &sub($num,8);
48 &jz(&label("mw_finish"));
49 &jmp(&label("mw_loop"));
50
51 &set_label("mw_finish",0);
52 &mov($num,&wparam(2)); # get num
53 &and($num,7);
54 &jnz(&label("mw_finish2"));
55 &jmp(&label("mw_end"));
56
57 &set_label("mw_finish2",1);
58 for ($i=0; $i<7; $i++)
59 {
60 &comment("Tail Round $i");
61 &mov("eax",&DWP($i*4,$a,"",0));# *a
62 &mul($w); # *a * w
63 &add("eax",$c); # L(t)+=c
64 # XXX
65 &adc("edx",0); # H(t)+=carry
66 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
67 &mov($c,"edx"); # c= H(t);
68 &dec($num) if ($i != 7-1);
69 &jz(&label("mw_end")) if ($i != 7-1);
70 }
71 &set_label("mw_end",0);
72 &mov("eax",$c);
73
74 &function_end($name);
75 }
76
771;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
deleted file mode 100644
index 61830d3a90..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul_add.pl
+++ /dev/null
@@ -1,87 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ebp";
15 $r="edi";
16 $c="esi";
17
18 &xor($c,$c); # clear carry
19 &mov($r,&wparam(0)); #
20
21 &mov("ecx",&wparam(2)); #
22 &mov($a,&wparam(1)); #
23
24 &and("ecx",0xfffffff8); # num / 8
25 &mov($w,&wparam(3)); #
26
27 &push("ecx"); # Up the stack for a tmp variable
28
29 &jz(&label("maw_finish"));
30
31 &set_label("maw_loop",0);
32
33 &mov(&swtmp(0),"ecx"); #
34
35 for ($i=0; $i<32; $i+=4)
36 {
37 &comment("Round $i");
38
39 &mov("eax",&DWP($i,$a,"",0)); # *a
40 &mul($w); # *a * w
41 &add("eax",$c); # L(t)+= *r
42 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
43 &adc("edx",0); # H(t)+=carry
44 &add("eax",$c); # L(t)+=c
45 &adc("edx",0); # H(t)+=carry
46 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
47 &mov($c,"edx"); # c= H(t);
48 }
49
50 &comment("");
51 &mov("ecx",&swtmp(0)); #
52 &add($a,32);
53 &add($r,32);
54 &sub("ecx",8);
55 &jnz(&label("maw_loop"));
56
57 &set_label("maw_finish",0);
58 &mov("ecx",&wparam(2)); # get num
59 &and("ecx",7);
60 &jnz(&label("maw_finish2")); # helps branch prediction
61 &jmp(&label("maw_end"));
62
63 &set_label("maw_finish2",1);
64 for ($i=0; $i<7; $i++)
65 {
66 &comment("Tail Round $i");
67 &mov("eax",&DWP($i*4,$a,"",0));# *a
68 &mul($w); # *a * w
69 &add("eax",$c); # L(t)+=c
70 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
71 &adc("edx",0); # H(t)+=carry
72 &add("eax",$c);
73 &adc("edx",0); # H(t)+=carry
74 &dec("ecx") if ($i != 7-1);
75 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
76 &mov($c,"edx"); # c= H(t);
77 &jz(&label("maw_end")) if ($i != 7-1);
78 }
79 &set_label("maw_end",0);
80 &mov("eax",$c);
81
82 &pop("ecx"); # clear variable from
83
84 &function_end($name);
85 }
86
871;
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl
deleted file mode 100644
index 1f90993cf6..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sqr.pl
+++ /dev/null
@@ -1,60 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $r="esi";
12 $a="edi";
13 $num="ebx";
14
15 &mov($r,&wparam(0)); #
16 &mov($a,&wparam(1)); #
17 &mov($num,&wparam(2)); #
18
19 &and($num,0xfffffff8); # num / 8
20 &jz(&label("sw_finish"));
21
22 &set_label("sw_loop",0);
23 for ($i=0; $i<32; $i+=4)
24 {
25 &comment("Round $i");
26 &mov("eax",&DWP($i,$a,"",0)); # *a
27 # XXX
28 &mul("eax"); # *a * *a
29 &mov(&DWP($i*2,$r,"",0),"eax"); #
30 &mov(&DWP($i*2+4,$r,"",0),"edx");#
31 }
32
33 &comment("");
34 &add($a,32);
35 &add($r,64);
36 &sub($num,8);
37 &jnz(&label("sw_loop"));
38
39 &set_label("sw_finish",0);
40 &mov($num,&wparam(2)); # get num
41 &and($num,7);
42 &jz(&label("sw_end"));
43
44 for ($i=0; $i<7; $i++)
45 {
46 &comment("Tail Round $i");
47 &mov("eax",&DWP($i*4,$a,"",0)); # *a
48 # XXX
49 &mul("eax"); # *a * *a
50 &mov(&DWP($i*8,$r,"",0),"eax"); #
51 &dec($num) if ($i != 7-1);
52 &mov(&DWP($i*8+4,$r,"",0),"edx");
53 &jz(&label("sw_end")) if ($i != 7-1);
54 }
55 &set_label("sw_end",0);
56
57 &function_end($name);
58 }
59
601;
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl
deleted file mode 100644
index 837b0e1b07..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sub.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &sub($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &sub($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &sub($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &sub($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;