summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl593
-rw-r--r--src/lib/libcrypto/bn/asm/co-586.pl286
-rw-r--r--src/lib/libcrypto/bn/asm/ia64.S1605
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2.s1618
-rw-r--r--src/lib/libcrypto/bn/asm/pa-risc2W.s1605
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8.S1458
-rw-r--r--src/lib/libcrypto/bn/asm/sparcv8plus.S1535
-rw-r--r--src/lib/libcrypto/bn/asm/x86.pl28
-rw-r--r--src/lib/libcrypto/bn/asm/x86/add.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86/comba.pl277
-rw-r--r--src/lib/libcrypto/bn/asm/x86/div.pl15
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul.pl77
-rw-r--r--src/lib/libcrypto/bn/asm/x86/mul_add.pl87
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sqr.pl60
-rw-r--r--src/lib/libcrypto/bn/asm/x86/sub.pl76
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gcc.c575
-rw-r--r--src/lib/libcrypto/bn/bn.h549
-rw-r--r--src/lib/libcrypto/bn/bn_add.c309
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c832
-rw-r--r--src/lib/libcrypto/bn/bn_blind.c144
-rw-r--r--src/lib/libcrypto/bn/bn_ctx.c155
-rw-r--r--src/lib/libcrypto/bn/bn_div.c387
-rw-r--r--src/lib/libcrypto/bn/bn_err.c131
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c747
-rw-r--r--src/lib/libcrypto/bn/bn_exp2.c313
-rw-r--r--src/lib/libcrypto/bn/bn_gcd.c490
-rw-r--r--src/lib/libcrypto/bn/bn_kron.c182
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h453
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c824
-rw-r--r--src/lib/libcrypto/bn/bn_mod.c296
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c349
-rw-r--r--src/lib/libcrypto/bn/bn_mpi.c129
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c802
-rw-r--r--src/lib/libcrypto/bn/bn_prime.c466
-rw-r--r--src/lib/libcrypto/bn/bn_prime.h325
-rw-r--r--src/lib/libcrypto/bn/bn_prime.pl117
-rw-r--r--src/lib/libcrypto/bn/bn_print.c333
-rw-r--r--src/lib/libcrypto/bn/bn_rand.c291
-rw-r--r--src/lib/libcrypto/bn/bn_recp.c230
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c205
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c288
-rw-r--r--src/lib/libcrypto/bn/bn_sqrt.c387
-rw-r--r--src/lib/libcrypto/bn/bn_word.c208
43 files changed, 0 insertions, 19913 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
deleted file mode 100644
index c4de4a2bee..0000000000
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ /dev/null
@@ -1,593 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_add_words("bn_mul_add_words");
9&bn_mul_words("bn_mul_words");
10&bn_sqr_words("bn_sqr_words");
11&bn_div_words("bn_div_words");
12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words");
14#&bn_sub_part_words("bn_sub_part_words");
15
16&asm_finish();
17
18sub bn_mul_add_words
19 {
20 local($name)=@_;
21
22 &function_begin($name,"");
23
24 &comment("");
25 $Low="eax";
26 $High="edx";
27 $a="ebx";
28 $w="ebp";
29 $r="edi";
30 $c="esi";
31
32 &xor($c,$c); # clear carry
33 &mov($r,&wparam(0)); #
34
35 &mov("ecx",&wparam(2)); #
36 &mov($a,&wparam(1)); #
37
38 &and("ecx",0xfffffff8); # num / 8
39 &mov($w,&wparam(3)); #
40
41 &push("ecx"); # Up the stack for a tmp variable
42
43 &jz(&label("maw_finish"));
44
45 &set_label("maw_loop",0);
46
47 &mov(&swtmp(0),"ecx"); #
48
49 for ($i=0; $i<32; $i+=4)
50 {
51 &comment("Round $i");
52
53 &mov("eax",&DWP($i,$a,"",0)); # *a
54 &mul($w); # *a * w
55 &add("eax",$c); # L(t)+= *r
56 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
57 &adc("edx",0); # H(t)+=carry
58 &add("eax",$c); # L(t)+=c
59 &adc("edx",0); # H(t)+=carry
60 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
61 &mov($c,"edx"); # c= H(t);
62 }
63
64 &comment("");
65 &mov("ecx",&swtmp(0)); #
66 &add($a,32);
67 &add($r,32);
68 &sub("ecx",8);
69 &jnz(&label("maw_loop"));
70
71 &set_label("maw_finish",0);
72 &mov("ecx",&wparam(2)); # get num
73 &and("ecx",7);
74 &jnz(&label("maw_finish2")); # helps branch prediction
75 &jmp(&label("maw_end"));
76
77 &set_label("maw_finish2",1);
78 for ($i=0; $i<7; $i++)
79 {
80 &comment("Tail Round $i");
81 &mov("eax",&DWP($i*4,$a,"",0));# *a
82 &mul($w); # *a * w
83 &add("eax",$c); # L(t)+=c
84 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
85 &adc("edx",0); # H(t)+=carry
86 &add("eax",$c);
87 &adc("edx",0); # H(t)+=carry
88 &dec("ecx") if ($i != 7-1);
89 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
90 &mov($c,"edx"); # c= H(t);
91 &jz(&label("maw_end")) if ($i != 7-1);
92 }
93 &set_label("maw_end",0);
94 &mov("eax",$c);
95
96 &pop("ecx"); # clear variable from
97
98 &function_end($name);
99 }
100
101sub bn_mul_words
102 {
103 local($name)=@_;
104
105 &function_begin($name,"");
106
107 &comment("");
108 $Low="eax";
109 $High="edx";
110 $a="ebx";
111 $w="ecx";
112 $r="edi";
113 $c="esi";
114 $num="ebp";
115
116 &xor($c,$c); # clear carry
117 &mov($r,&wparam(0)); #
118 &mov($a,&wparam(1)); #
119 &mov($num,&wparam(2)); #
120 &mov($w,&wparam(3)); #
121
122 &and($num,0xfffffff8); # num / 8
123 &jz(&label("mw_finish"));
124
125 &set_label("mw_loop",0);
126 for ($i=0; $i<32; $i+=4)
127 {
128 &comment("Round $i");
129
130 &mov("eax",&DWP($i,$a,"",0)); # *a
131 &mul($w); # *a * w
132 &add("eax",$c); # L(t)+=c
133 # XXX
134
135 &adc("edx",0); # H(t)+=carry
136 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
137
138 &mov($c,"edx"); # c= H(t);
139 }
140
141 &comment("");
142 &add($a,32);
143 &add($r,32);
144 &sub($num,8);
145 &jz(&label("mw_finish"));
146 &jmp(&label("mw_loop"));
147
148 &set_label("mw_finish",0);
149 &mov($num,&wparam(2)); # get num
150 &and($num,7);
151 &jnz(&label("mw_finish2"));
152 &jmp(&label("mw_end"));
153
154 &set_label("mw_finish2",1);
155 for ($i=0; $i<7; $i++)
156 {
157 &comment("Tail Round $i");
158 &mov("eax",&DWP($i*4,$a,"",0));# *a
159 &mul($w); # *a * w
160 &add("eax",$c); # L(t)+=c
161 # XXX
162 &adc("edx",0); # H(t)+=carry
163 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
164 &mov($c,"edx"); # c= H(t);
165 &dec($num) if ($i != 7-1);
166 &jz(&label("mw_end")) if ($i != 7-1);
167 }
168 &set_label("mw_end",0);
169 &mov("eax",$c);
170
171 &function_end($name);
172 }
173
174sub bn_sqr_words
175 {
176 local($name)=@_;
177
178 &function_begin($name,"");
179
180 &comment("");
181 $r="esi";
182 $a="edi";
183 $num="ebx";
184
185 &mov($r,&wparam(0)); #
186 &mov($a,&wparam(1)); #
187 &mov($num,&wparam(2)); #
188
189 &and($num,0xfffffff8); # num / 8
190 &jz(&label("sw_finish"));
191
192 &set_label("sw_loop",0);
193 for ($i=0; $i<32; $i+=4)
194 {
195 &comment("Round $i");
196 &mov("eax",&DWP($i,$a,"",0)); # *a
197 # XXX
198 &mul("eax"); # *a * *a
199 &mov(&DWP($i*2,$r,"",0),"eax"); #
200 &mov(&DWP($i*2+4,$r,"",0),"edx");#
201 }
202
203 &comment("");
204 &add($a,32);
205 &add($r,64);
206 &sub($num,8);
207 &jnz(&label("sw_loop"));
208
209 &set_label("sw_finish",0);
210 &mov($num,&wparam(2)); # get num
211 &and($num,7);
212 &jz(&label("sw_end"));
213
214 for ($i=0; $i<7; $i++)
215 {
216 &comment("Tail Round $i");
217 &mov("eax",&DWP($i*4,$a,"",0)); # *a
218 # XXX
219 &mul("eax"); # *a * *a
220 &mov(&DWP($i*8,$r,"",0),"eax"); #
221 &dec($num) if ($i != 7-1);
222 &mov(&DWP($i*8+4,$r,"",0),"edx");
223 &jz(&label("sw_end")) if ($i != 7-1);
224 }
225 &set_label("sw_end",0);
226
227 &function_end($name);
228 }
229
230sub bn_div_words
231 {
232 local($name)=@_;
233
234 &function_begin($name,"");
235 &mov("edx",&wparam(0)); #
236 &mov("eax",&wparam(1)); #
237 &mov("ebx",&wparam(2)); #
238 &div("ebx");
239 &function_end($name);
240 }
241
242sub bn_add_words
243 {
244 local($name)=@_;
245
246 &function_begin($name,"");
247
248 &comment("");
249 $a="esi";
250 $b="edi";
251 $c="eax";
252 $r="ebx";
253 $tmp1="ecx";
254 $tmp2="edx";
255 $num="ebp";
256
257 &mov($r,&wparam(0)); # get r
258 &mov($a,&wparam(1)); # get a
259 &mov($b,&wparam(2)); # get b
260 &mov($num,&wparam(3)); # get num
261 &xor($c,$c); # clear carry
262 &and($num,0xfffffff8); # num / 8
263
264 &jz(&label("aw_finish"));
265
266 &set_label("aw_loop",0);
267 for ($i=0; $i<8; $i++)
268 {
269 &comment("Round $i");
270
271 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
272 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
273 &add($tmp1,$c);
274 &mov($c,0);
275 &adc($c,$c);
276 &add($tmp1,$tmp2);
277 &adc($c,0);
278 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
279 }
280
281 &comment("");
282 &add($a,32);
283 &add($b,32);
284 &add($r,32);
285 &sub($num,8);
286 &jnz(&label("aw_loop"));
287
288 &set_label("aw_finish",0);
289 &mov($num,&wparam(3)); # get num
290 &and($num,7);
291 &jz(&label("aw_end"));
292
293 for ($i=0; $i<7; $i++)
294 {
295 &comment("Tail Round $i");
296 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
297 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
298 &add($tmp1,$c);
299 &mov($c,0);
300 &adc($c,$c);
301 &add($tmp1,$tmp2);
302 &adc($c,0);
303 &dec($num) if ($i != 6);
304 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
305 &jz(&label("aw_end")) if ($i != 6);
306 }
307 &set_label("aw_end",0);
308
309# &mov("eax",$c); # $c is "eax"
310
311 &function_end($name);
312 }
313
314sub bn_sub_words
315 {
316 local($name)=@_;
317
318 &function_begin($name,"");
319
320 &comment("");
321 $a="esi";
322 $b="edi";
323 $c="eax";
324 $r="ebx";
325 $tmp1="ecx";
326 $tmp2="edx";
327 $num="ebp";
328
329 &mov($r,&wparam(0)); # get r
330 &mov($a,&wparam(1)); # get a
331 &mov($b,&wparam(2)); # get b
332 &mov($num,&wparam(3)); # get num
333 &xor($c,$c); # clear carry
334 &and($num,0xfffffff8); # num / 8
335
336 &jz(&label("aw_finish"));
337
338 &set_label("aw_loop",0);
339 for ($i=0; $i<8; $i++)
340 {
341 &comment("Round $i");
342
343 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
344 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
345 &sub($tmp1,$c);
346 &mov($c,0);
347 &adc($c,$c);
348 &sub($tmp1,$tmp2);
349 &adc($c,0);
350 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
351 }
352
353 &comment("");
354 &add($a,32);
355 &add($b,32);
356 &add($r,32);
357 &sub($num,8);
358 &jnz(&label("aw_loop"));
359
360 &set_label("aw_finish",0);
361 &mov($num,&wparam(3)); # get num
362 &and($num,7);
363 &jz(&label("aw_end"));
364
365 for ($i=0; $i<7; $i++)
366 {
367 &comment("Tail Round $i");
368 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
369 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
370 &sub($tmp1,$c);
371 &mov($c,0);
372 &adc($c,$c);
373 &sub($tmp1,$tmp2);
374 &adc($c,0);
375 &dec($num) if ($i != 6);
376 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
377 &jz(&label("aw_end")) if ($i != 6);
378 }
379 &set_label("aw_end",0);
380
381# &mov("eax",$c); # $c is "eax"
382
383 &function_end($name);
384 }
385
386sub bn_sub_part_words
387 {
388 local($name)=@_;
389
390 &function_begin($name,"");
391
392 &comment("");
393 $a="esi";
394 $b="edi";
395 $c="eax";
396 $r="ebx";
397 $tmp1="ecx";
398 $tmp2="edx";
399 $num="ebp";
400
401 &mov($r,&wparam(0)); # get r
402 &mov($a,&wparam(1)); # get a
403 &mov($b,&wparam(2)); # get b
404 &mov($num,&wparam(3)); # get num
405 &xor($c,$c); # clear carry
406 &and($num,0xfffffff8); # num / 8
407
408 &jz(&label("aw_finish"));
409
410 &set_label("aw_loop",0);
411 for ($i=0; $i<8; $i++)
412 {
413 &comment("Round $i");
414
415 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
416 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
417 &sub($tmp1,$c);
418 &mov($c,0);
419 &adc($c,$c);
420 &sub($tmp1,$tmp2);
421 &adc($c,0);
422 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
423 }
424
425 &comment("");
426 &add($a,32);
427 &add($b,32);
428 &add($r,32);
429 &sub($num,8);
430 &jnz(&label("aw_loop"));
431
432 &set_label("aw_finish",0);
433 &mov($num,&wparam(3)); # get num
434 &and($num,7);
435 &jz(&label("aw_end"));
436
437 for ($i=0; $i<7; $i++)
438 {
439 &comment("Tail Round $i");
440 &mov($tmp1,&DWP(0,$a,"",0)); # *a
441 &mov($tmp2,&DWP(0,$b,"",0));# *b
442 &sub($tmp1,$c);
443 &mov($c,0);
444 &adc($c,$c);
445 &sub($tmp1,$tmp2);
446 &adc($c,0);
447 &mov(&DWP(0,$r,"",0),$tmp1); # *r
448 &add($a, 4);
449 &add($b, 4);
450 &add($r, 4);
451 &dec($num) if ($i != 6);
452 &jz(&label("aw_end")) if ($i != 6);
453 }
454 &set_label("aw_end",0);
455
456 &cmp(&wparam(4),0);
457 &je(&label("pw_end"));
458
459 &mov($num,&wparam(4)); # get dl
460 &cmp($num,0);
461 &je(&label("pw_end"));
462 &jge(&label("pw_pos"));
463
464 &comment("pw_neg");
465 &mov($tmp2,0);
466 &sub($tmp2,$num);
467 &mov($num,$tmp2);
468 &and($num,0xfffffff8); # num / 8
469 &jz(&label("pw_neg_finish"));
470
471 &set_label("pw_neg_loop",0);
472 for ($i=0; $i<8; $i++)
473 {
474 &comment("dl<0 Round $i");
475
476 &mov($tmp1,0);
477 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
478 &sub($tmp1,$c);
479 &mov($c,0);
480 &adc($c,$c);
481 &sub($tmp1,$tmp2);
482 &adc($c,0);
483 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
484 }
485
486 &comment("");
487 &add($b,32);
488 &add($r,32);
489 &sub($num,8);
490 &jnz(&label("pw_neg_loop"));
491
492 &set_label("pw_neg_finish",0);
493 &mov($tmp2,&wparam(4)); # get dl
494 &mov($num,0);
495 &sub($num,$tmp2);
496 &and($num,7);
497 &jz(&label("pw_end"));
498
499 for ($i=0; $i<7; $i++)
500 {
501 &comment("dl<0 Tail Round $i");
502 &mov($tmp1,0);
503 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504 &sub($tmp1,$c);
505 &mov($c,0);
506 &adc($c,$c);
507 &sub($tmp1,$tmp2);
508 &adc($c,0);
509 &dec($num) if ($i != 6);
510 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
511 &jz(&label("pw_end")) if ($i != 6);
512 }
513
514 &jmp(&label("pw_end"));
515
516 &set_label("pw_pos",0);
517
518 &and($num,0xfffffff8); # num / 8
519 &jz(&label("pw_pos_finish"));
520
521 &set_label("pw_pos_loop",0);
522
523 for ($i=0; $i<8; $i++)
524 {
525 &comment("dl>0 Round $i");
526
527 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
528 &sub($tmp1,$c);
529 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
530 &jnc(&label("pw_nc".$i));
531 }
532
533 &comment("");
534 &add($a,32);
535 &add($r,32);
536 &sub($num,8);
537 &jnz(&label("pw_pos_loop"));
538
539 &set_label("pw_pos_finish",0);
540 &mov($num,&wparam(4)); # get dl
541 &and($num,7);
542 &jz(&label("pw_end"));
543
544 for ($i=0; $i<7; $i++)
545 {
546 &comment("dl>0 Tail Round $i");
547 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
548 &sub($tmp1,$c);
549 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
550 &jnc(&label("pw_tail_nc".$i));
551 &dec($num) if ($i != 6);
552 &jz(&label("pw_end")) if ($i != 6);
553 }
554 &mov($c,1);
555 &jmp(&label("pw_end"));
556
557 &set_label("pw_nc_loop",0);
558 for ($i=0; $i<8; $i++)
559 {
560 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
561 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
562 &set_label("pw_nc".$i,0);
563 }
564
565 &comment("");
566 &add($a,32);
567 &add($r,32);
568 &sub($num,8);
569 &jnz(&label("pw_nc_loop"));
570
571 &mov($num,&wparam(4)); # get dl
572 &and($num,7);
573 &jz(&label("pw_nc_end"));
574
575 for ($i=0; $i<7; $i++)
576 {
577 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
578 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
579 &set_label("pw_tail_nc".$i,0);
580 &dec($num) if ($i != 6);
581 &jz(&label("pw_nc_end")) if ($i != 6);
582 }
583
584 &set_label("pw_nc_end",0);
585 &mov($c,0);
586
587 &set_label("pw_end",0);
588
589# &mov("eax",$c); # $c is "eax"
590
591 &function_end($name);
592 }
593
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
deleted file mode 100644
index 5d962cb957..0000000000
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ /dev/null
@@ -1,286 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_comba("bn_mul_comba8",8);
9&bn_mul_comba("bn_mul_comba4",4);
10&bn_sqr_comba("bn_sqr_comba8",8);
11&bn_sqr_comba("bn_sqr_comba4",4);
12
13&asm_finish();
14
15sub mul_add_c
16 {
17 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
18
19 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
20 # words, and 1 if load return value
21
22 &comment("mul a[$ai]*b[$bi]");
23
24 # "eax" and "edx" will always be pre-loaded.
25 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
26 # &mov("edx",&DWP($bi*4,$b,"",0));
27
28 &mul("edx");
29 &add($c0,"eax");
30 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
31 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
32 ###
33 &adc($c1,"edx");
34 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
36 ###
37 &adc($c2,0);
38 # is pos > 1, it means it is the last loop
39 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
40 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
41 }
42
43sub sqr_add_c
44 {
45 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
46
47 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
48 # words, and 1 if load return value
49
50 &comment("sqr a[$ai]*a[$bi]");
51
52 # "eax" and "edx" will always be pre-loaded.
53 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
54 # &mov("edx",&DWP($bi*4,$b,"",0));
55
56 if ($ai == $bi)
57 { &mul("eax");}
58 else
59 { &mul("edx");}
60 &add($c0,"eax");
61 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
62 ###
63 &adc($c1,"edx");
64 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
65 ###
66 &adc($c2,0);
67 # is pos > 1, it means it is the last loop
68 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
69 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
70 }
71
72sub sqr_add_c2
73 {
74 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
75
76 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
77 # words, and 1 if load return value
78
79 &comment("sqr a[$ai]*a[$bi]");
80
81 # "eax" and "edx" will always be pre-loaded.
82 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
83 # &mov("edx",&DWP($bi*4,$a,"",0));
84
85 if ($ai == $bi)
86 { &mul("eax");}
87 else
88 { &mul("edx");}
89 &add("eax","eax");
90 ###
91 &adc("edx","edx");
92 ###
93 &adc($c2,0);
94 &add($c0,"eax");
95 &adc($c1,"edx");
96 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
98 &adc($c2,0);
99 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
100 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
101 ###
102 }
103
104sub bn_mul_comba
105 {
106 local($name,$num)=@_;
107 local($a,$b,$c0,$c1,$c2);
108 local($i,$as,$ae,$bs,$be,$ai,$bi);
109 local($tot,$end);
110
111 &function_begin_B($name,"");
112
113 $c0="ebx";
114 $c1="ecx";
115 $c2="ebp";
116 $a="esi";
117 $b="edi";
118
119 $as=0;
120 $ae=0;
121 $bs=0;
122 $be=0;
123 $tot=$num+$num-1;
124
125 &push("esi");
126 &mov($a,&wparam(1));
127 &push("edi");
128 &mov($b,&wparam(2));
129 &push("ebp");
130 &push("ebx");
131
132 &xor($c0,$c0);
133 &mov("eax",&DWP(0,$a,"",0)); # load the first word
134 &xor($c1,$c1);
135 &mov("edx",&DWP(0,$b,"",0)); # load the first second
136
137 for ($i=0; $i<$tot; $i++)
138 {
139 $ai=$as;
140 $bi=$bs;
141 $end=$be+1;
142
143 &comment("################## Calculate word $i");
144
145 for ($j=$bs; $j<$end; $j++)
146 {
147 &xor($c2,$c2) if ($j == $bs);
148 if (($j+1) == $end)
149 {
150 $v=1;
151 $v=2 if (($i+1) == $tot);
152 }
153 else
154 { $v=0; }
155 if (($j+1) != $end)
156 {
157 $na=($ai-1);
158 $nb=($bi+1);
159 }
160 else
161 {
162 $na=$as+($i < ($num-1));
163 $nb=$bs+($i >= ($num-1));
164 }
165#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
166 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
167 if ($v)
168 {
169 &comment("saved r[$i]");
170 # &mov("eax",&wparam(0));
171 # &mov(&DWP($i*4,"eax","",0),$c0);
172 ($c0,$c1,$c2)=($c1,$c2,$c0);
173 }
174 $ai--;
175 $bi++;
176 }
177 $as++ if ($i < ($num-1));
178 $ae++ if ($i >= ($num-1));
179
180 $bs++ if ($i >= ($num-1));
181 $be++ if ($i < ($num-1));
182 }
183 &comment("save r[$i]");
184 # &mov("eax",&wparam(0));
185 &mov(&DWP($i*4,"eax","",0),$c0);
186
187 &pop("ebx");
188 &pop("ebp");
189 &pop("edi");
190 &pop("esi");
191 &ret();
192 &function_end_B($name);
193 }
194
195sub bn_sqr_comba
196 {
197 local($name,$num)=@_;
198 local($r,$a,$c0,$c1,$c2)=@_;
199 local($i,$as,$ae,$bs,$be,$ai,$bi);
200 local($b,$tot,$end,$half);
201
202 &function_begin_B($name,"");
203
204 $c0="ebx";
205 $c1="ecx";
206 $c2="ebp";
207 $a="esi";
208 $r="edi";
209
210 &push("esi");
211 &push("edi");
212 &push("ebp");
213 &push("ebx");
214 &mov($r,&wparam(0));
215 &mov($a,&wparam(1));
216 &xor($c0,$c0);
217 &xor($c1,$c1);
218 &mov("eax",&DWP(0,$a,"",0)); # load the first word
219
220 $as=0;
221 $ae=0;
222 $bs=0;
223 $be=0;
224 $tot=$num+$num-1;
225
226 for ($i=0; $i<$tot; $i++)
227 {
228 $ai=$as;
229 $bi=$bs;
230 $end=$be+1;
231
232 &comment("############### Calculate word $i");
233 for ($j=$bs; $j<$end; $j++)
234 {
235 &xor($c2,$c2) if ($j == $bs);
236 if (($ai-1) < ($bi+1))
237 {
238 $v=1;
239 $v=2 if ($i+1) == $tot;
240 }
241 else
242 { $v=0; }
243 if (!$v)
244 {
245 $na=$ai-1;
246 $nb=$bi+1;
247 }
248 else
249 {
250 $na=$as+($i < ($num-1));
251 $nb=$bs+($i >= ($num-1));
252 }
253 if ($ai == $bi)
254 {
255 &sqr_add_c($r,$a,$ai,$bi,
256 $c0,$c1,$c2,$v,$i,$na,$nb);
257 }
258 else
259 {
260 &sqr_add_c2($r,$a,$ai,$bi,
261 $c0,$c1,$c2,$v,$i,$na,$nb);
262 }
263 if ($v)
264 {
265 &comment("saved r[$i]");
266 #&mov(&DWP($i*4,$r,"",0),$c0);
267 ($c0,$c1,$c2)=($c1,$c2,$c0);
268 last;
269 }
270 $ai--;
271 $bi++;
272 }
273 $as++ if ($i < ($num-1));
274 $ae++ if ($i >= ($num-1));
275
276 $bs++ if ($i >= ($num-1));
277 $be++ if ($i < ($num-1));
278 }
279 &mov(&DWP($i*4,$r,"",0),$c0);
280 &pop("ebx");
281 &pop("ebp");
282 &pop("edi");
283 &pop("esi");
284 &ret();
285 &function_end_B($name);
286 }
diff --git a/src/lib/libcrypto/bn/asm/ia64.S b/src/lib/libcrypto/bn/asm/ia64.S
deleted file mode 100644
index 7dfda85566..0000000000
--- a/src/lib/libcrypto/bn/asm/ia64.S
+++ /dev/null
@@ -1,1605 +0,0 @@
1.explicit
2.text
3.ident "ia64.S, Version 2.0"
4.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6//
7// ====================================================================
8// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9// project.
10//
11// Rights for redistribution and usage in source and binary forms are
12// granted according to the OpenSSL license. Warranty of any kind is
13// disclaimed.
14// ====================================================================
15//
16// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
17// different from Itanium to this module viewpoint. Most notably, is it
18// "wider" than Itanium? Can you experience loop scalability as
19// discussed in commentary sections? Not really:-( Itanium2 has 6
20// integer ALU ports, i.e. it's 2 ports wider, but it's not enough to
21// spin twice as fast, as I need 8 IALU ports. Amount of floating point
22// ports is the same, i.e. 2, while I need 4. In other words, to this
23// module Itanium2 remains effectively as "wide" as Itanium. Yet it's
24// essentially different in respect to this module, and a re-tune was
25// required. Well, because some intruction latencies has changed. Most
26// noticeably those intensively used:
27//
28// Itanium Itanium2
29// ldf8 9 6 L2 hit
30// ld8 2 1 L1 hit
31// getf 2 5
32// xma[->getf] 7[+1] 4[+0]
33// add[->st8] 1[+1] 1[+0]
34//
35// What does it mean? You might ratiocinate that the original code
36// should run just faster... Because sum of latencies is smaller...
37// Wrong! Note that getf latency increased. This means that if a loop is
38// scheduled for lower latency (and they are), then it will suffer from
39// stall condition and the code will therefore turn anti-scalable, e.g.
40// original bn_mul_words spun at 5*n or 2.5 times slower than expected
41// on Itanium2! What to do? Reschedule loops for Itanium2? But then
42// Itanium would exhibit anti-scalability. So I've chosen to reschedule
43// for worst latency for every instruction aiming for best *all-round*
44// performance.
45
46// Q. How much faster does it get?
47// A. Here is the output from 'openssl speed rsa dsa' for vanilla
48// 0.9.6a compiled with gcc version 2.96 20000731 (Red Hat
49// Linux 7.1 2.96-81):
50//
51// sign verify sign/s verify/s
52// rsa 512 bits 0.0036s 0.0003s 275.3 2999.2
53// rsa 1024 bits 0.0203s 0.0011s 49.3 894.1
54// rsa 2048 bits 0.1331s 0.0040s 7.5 250.9
55// rsa 4096 bits 0.9270s 0.0147s 1.1 68.1
56// sign verify sign/s verify/s
57// dsa 512 bits 0.0035s 0.0043s 288.3 234.8
58// dsa 1024 bits 0.0111s 0.0135s 90.0 74.2
59//
60// And here is similar output but for this assembler
61// implementation:-)
62//
63// sign verify sign/s verify/s
64// rsa 512 bits 0.0021s 0.0001s 549.4 9638.5
65// rsa 1024 bits 0.0055s 0.0002s 183.8 4481.1
66// rsa 2048 bits 0.0244s 0.0006s 41.4 1726.3
67// rsa 4096 bits 0.1295s 0.0018s 7.7 561.5
68// sign verify sign/s verify/s
69// dsa 512 bits 0.0012s 0.0013s 891.9 756.6
70// dsa 1024 bits 0.0023s 0.0028s 440.4 376.2
71//
72// Yes, you may argue that it's not fair comparison as it's
73// possible to craft the C implementation with BN_UMULT_HIGH
74// inline assembler macro. But of course! Here is the output
75// with the macro:
76//
77// sign verify sign/s verify/s
78// rsa 512 bits 0.0020s 0.0002s 495.0 6561.0
79// rsa 1024 bits 0.0086s 0.0004s 116.2 2235.7
80// rsa 2048 bits 0.0519s 0.0015s 19.3 667.3
81// rsa 4096 bits 0.3464s 0.0053s 2.9 187.7
82// sign verify sign/s verify/s
83// dsa 512 bits 0.0016s 0.0020s 613.1 510.5
84// dsa 1024 bits 0.0045s 0.0054s 221.0 183.9
85//
86// My code is still way faster, huh:-) And I believe that even
87// higher performance can be achieved. Note that as keys get
88// longer, performance gain is larger. Why? According to the
89// profiler there is another player in the field, namely
90// BN_from_montgomery consuming larger and larger portion of CPU
91// time as keysize decreases. I therefore consider putting effort
92// to assembler implementation of the following routine:
93//
94// void bn_mul_add_mont (BN_ULONG *rp,BN_ULONG *np,int nl,BN_ULONG n0)
95// {
96// int i,j;
97// BN_ULONG v;
98//
99// for (i=0; i<nl; i++)
100// {
101// v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
102// nrp++;
103// rp++;
104// if (((nrp[-1]+=v)&BN_MASK2) < v)
105// for (j=0; ((++nrp[j])&BN_MASK2) == 0; j++) ;
106// }
107// }
108//
109// It might as well be beneficial to implement even combaX
110// variants, as it appears as it can literally unleash the
111// performance (see comment section to bn_mul_comba8 below).
112//
113// And finally for your reference the output for 0.9.6a compiled
114// with SGIcc version 0.01.0-12 (keep in mind that for the moment
115// of this writing it's not possible to convince SGIcc to use
116// BN_UMULT_HIGH inline assembler macro, yet the code is fast,
117// i.e. for a compiler generated one:-):
118//
119// sign verify sign/s verify/s
120// rsa 512 bits 0.0022s 0.0002s 452.7 5894.3
121// rsa 1024 bits 0.0097s 0.0005s 102.7 2002.9
122// rsa 2048 bits 0.0578s 0.0017s 17.3 600.2
123// rsa 4096 bits 0.3838s 0.0061s 2.6 164.5
124// sign verify sign/s verify/s
125// dsa 512 bits 0.0018s 0.0022s 547.3 459.6
126// dsa 1024 bits 0.0051s 0.0062s 196.6 161.3
127//
128// Oh! Benchmarks were performed on 733MHz Lion-class Itanium
129// system running Redhat Linux 7.1 (very special thanks to Ray
130// McCaffity of Williams Communications for providing an account).
131//
132// Q. What's the heck with 'rum 1<<5' at the end of every function?
133// A. Well, by clearing the "upper FP registers written" bit of the
134// User Mask I want to excuse the kernel from preserving upper
135// (f32-f128) FP register bank over process context switch, thus
136// minimizing bus bandwidth consumption during the switch (i.e.
137// after PKI opration completes and the program is off doing
138// something else like bulk symmetric encryption). Having said
139// this, I also want to point out that it might be good idea
140// to compile the whole toolkit (as well as majority of the
141// programs for that matter) with -mfixed-range=f32-f127 command
142// line option. No, it doesn't prevent the compiler from writing
143// to upper bank, but at least discourages to do so. If you don't
144// like the idea you have the option to compile the module with
145// -Drum=nop.m in command line.
146//
147
148#if 1
149//
150// bn_[add|sub]_words routines.
151//
152// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
153// data reside in L1 cache, i.e. 2 ticks away). It's possible to
154// compress the epilogue and get down to 2*n+6, but at the cost of
155// scalability (the neat feature of this implementation is that it
156// shall automagically spin in n+5 on "wider" IA-64 implementations:-)
157// I consider that the epilogue is short enough as it is to trade tiny
158// performance loss on Itanium for scalability.
159//
160// BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
161//
162.global bn_add_words#
163.proc bn_add_words#
164.align 64
165.skip 32 // makes the loop body aligned at 64-byte boundary
166bn_add_words:
167 .prologue
168 .fframe 0
169 .save ar.pfs,r2
170{ .mii; alloc r2=ar.pfs,4,12,0,16
171 cmp4.le p6,p0=r35,r0 };;
172{ .mfb; mov r8=r0 // return value
173(p6) br.ret.spnt.many b0 };;
174
175 .save ar.lc,r3
176{ .mib; sub r10=r35,r0,1
177 mov r3=ar.lc
178 brp.loop.imp .L_bn_add_words_ctop,.L_bn_add_words_cend-16
179 }
180 .body
181{ .mib;
182#if defined(_HPUX_SOURCE) && defined(_ILP32)
183 addp4 r14=0,r32 // rp
184#else
185 mov r14=r32 // rp
186#endif
187 mov r9=pr };;
188{ .mii;
189#if defined(_HPUX_SOURCE) && defined(_ILP32)
190 addp4 r15=0,r33 // ap
191#else
192 mov r15=r33 // ap
193#endif
194 mov ar.lc=r10
195 mov ar.ec=6 }
196{ .mib;
197#if defined(_HPUX_SOURCE) && defined(_ILP32)
198 addp4 r16=0,r34 // bp
199#else
200 mov r16=r34 // bp
201#endif
202 mov pr.rot=1<<16 };;
203
204.L_bn_add_words_ctop:
205{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
206 (p18) add r39=r37,r34
207 (p19) cmp.ltu.unc p56,p0=r40,r38 }
208{ .mfb; (p0) nop.m 0x0
209 (p0) nop.f 0x0
210 (p0) nop.b 0x0 }
211{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
212 (p58) cmp.eq.or p57,p0=-1,r41 // (p20)
213 (p58) add r41=1,r41 } // (p20)
214{ .mfb; (p21) st8 [r14]=r42,8 // *(rp++)=r
215 (p0) nop.f 0x0
216 br.ctop.sptk .L_bn_add_words_ctop };;
217.L_bn_add_words_cend:
218
219{ .mii;
220(p59) add r8=1,r8 // return value
221 mov pr=r9,0x1ffff
222 mov ar.lc=r3 }
223{ .mbb; nop.b 0x0
224 br.ret.sptk.many b0 };;
225.endp bn_add_words#
226
227//
228// BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num)
229//
230.global bn_sub_words#
231.proc bn_sub_words#
232.align 64
233.skip 32 // makes the loop body aligned at 64-byte boundary
234bn_sub_words:
235 .prologue
236 .fframe 0
237 .save ar.pfs,r2
238{ .mii; alloc r2=ar.pfs,4,12,0,16
239 cmp4.le p6,p0=r35,r0 };;
240{ .mfb; mov r8=r0 // return value
241(p6) br.ret.spnt.many b0 };;
242
243 .save ar.lc,r3
244{ .mib; sub r10=r35,r0,1
245 mov r3=ar.lc
246 brp.loop.imp .L_bn_sub_words_ctop,.L_bn_sub_words_cend-16
247 }
248 .body
249{ .mib;
250#if defined(_HPUX_SOURCE) && defined(_ILP32)
251 addp4 r14=0,r32 // rp
252#else
253 mov r14=r32 // rp
254#endif
255 mov r9=pr };;
256{ .mii;
257#if defined(_HPUX_SOURCE) && defined(_ILP32)
258 addp4 r15=0,r33 // ap
259#else
260 mov r15=r33 // ap
261#endif
262 mov ar.lc=r10
263 mov ar.ec=6 }
264{ .mib;
265#if defined(_HPUX_SOURCE) && defined(_ILP32)
266 addp4 r16=0,r34 // bp
267#else
268 mov r16=r34 // bp
269#endif
270 mov pr.rot=1<<16 };;
271
272.L_bn_sub_words_ctop:
273{ .mii; (p16) ld8 r32=[r16],8 // b=*(bp++)
274 (p18) sub r39=r37,r34
275 (p19) cmp.gtu.unc p56,p0=r40,r38 }
276{ .mfb; (p0) nop.m 0x0
277 (p0) nop.f 0x0
278 (p0) nop.b 0x0 }
279{ .mii; (p16) ld8 r35=[r15],8 // a=*(ap++)
280 (p58) cmp.eq.or p57,p0=0,r41 // (p20)
281 (p58) add r41=-1,r41 } // (p20)
282{ .mbb; (p21) st8 [r14]=r42,8 // *(rp++)=r
283 (p0) nop.b 0x0
284 br.ctop.sptk .L_bn_sub_words_ctop };;
285.L_bn_sub_words_cend:
286
287{ .mii;
288(p59) add r8=1,r8 // return value
289 mov pr=r9,0x1ffff
290 mov ar.lc=r3 }
291{ .mbb; nop.b 0x0
292 br.ret.sptk.many b0 };;
293.endp bn_sub_words#
294#endif
295
296#if 0
297#define XMA_TEMPTATION
298#endif
299
300#if 1
301//
302// BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
303//
304.global bn_mul_words#
305.proc bn_mul_words#
306.align 64
307.skip 32 // makes the loop body aligned at 64-byte boundary
308bn_mul_words:
309 .prologue
310 .fframe 0
311 .save ar.pfs,r2
312#ifdef XMA_TEMPTATION
313{ .mfi; alloc r2=ar.pfs,4,0,0,0 };;
314#else
315{ .mfi; alloc r2=ar.pfs,4,12,0,16 };;
316#endif
317{ .mib; mov r8=r0 // return value
318 cmp4.le p6,p0=r34,r0
319(p6) br.ret.spnt.many b0 };;
320
321 .save ar.lc,r3
322{ .mii; sub r10=r34,r0,1
323 mov r3=ar.lc
324 mov r9=pr };;
325
326 .body
327{ .mib; setf.sig f8=r35 // w
328 mov pr.rot=0x800001<<16
329 // ------^----- serves as (p50) at first (p27)
330 brp.loop.imp .L_bn_mul_words_ctop,.L_bn_mul_words_cend-16
331 }
332
333#ifndef XMA_TEMPTATION
334
335{ .mii;
336#if defined(_HPUX_SOURCE) && defined(_ILP32)
337 addp4 r14=0,r32 // rp
338 addp4 r15=0,r33 // ap
339#else
340 mov r14=r32 // rp
341 mov r15=r33 // ap
342#endif
343 mov ar.lc=r10 }
344{ .mii; mov r40=0 // serves as r35 at first (p27)
345 mov ar.ec=13 };;
346
347// This loop spins in 2*(n+12) ticks. It's scheduled for data in Itanium
348// L2 cache (i.e. 9 ticks away) as floating point load/store instructions
349// bypass L1 cache and L2 latency is actually best-case scenario for
350// ldf8. The loop is not scalable and shall run in 2*(n+12) even on
351// "wider" IA-64 implementations. It's a trade-off here. n+24 loop
352// would give us ~5% in *overall* performance improvement on "wider"
353// IA-64, but would hurt Itanium for about same because of longer
354// epilogue. As it's a matter of few percents in either case I've
355// chosen to trade the scalability for development time (you can see
356// this very instruction sequence in bn_mul_add_words loop which in
357// turn is scalable).
358.L_bn_mul_words_ctop:
359{ .mfi; (p25) getf.sig r36=f52 // low
360 (p21) xmpy.lu f48=f37,f8
361 (p28) cmp.ltu p54,p50=r41,r39 }
362{ .mfi; (p16) ldf8 f32=[r15],8
363 (p21) xmpy.hu f40=f37,f8
364 (p0) nop.i 0x0 };;
365{ .mii; (p25) getf.sig r32=f44 // high
366 .pred.rel "mutex",p50,p54
367 (p50) add r40=r38,r35 // (p27)
368 (p54) add r40=r38,r35,1 } // (p27)
369{ .mfb; (p28) st8 [r14]=r41,8
370 (p0) nop.f 0x0
371 br.ctop.sptk .L_bn_mul_words_ctop };;
372.L_bn_mul_words_cend:
373
374{ .mii; nop.m 0x0
375.pred.rel "mutex",p51,p55
376(p51) add r8=r36,r0
377(p55) add r8=r36,r0,1 }
378{ .mfb; nop.m 0x0
379 nop.f 0x0
380 nop.b 0x0 }
381
382#else // XMA_TEMPTATION
383
384 setf.sig f37=r0 // serves as carry at (p18) tick
385 mov ar.lc=r10
386 mov ar.ec=5;;
387
388// Most of you examining this code very likely wonder why in the name
389// of Intel the following loop is commented out? Indeed, it looks so
390// neat that you find it hard to believe that it's something wrong
391// with it, right? The catch is that every iteration depends on the
392// result from previous one and the latter isn't available instantly.
393// The loop therefore spins at the latency of xma minus 1, or in other
394// words at 6*(n+4) ticks:-( Compare to the "production" loop above
395// that runs in 2*(n+11) where the low latency problem is worked around
396// by moving the dependency to one-tick latent interger ALU. Note that
397// "distance" between ldf8 and xma is not latency of ldf8, but the
398// *difference* between xma and ldf8 latencies.
399.L_bn_mul_words_ctop:
400{ .mfi; (p16) ldf8 f32=[r33],8
401 (p18) xma.hu f38=f34,f8,f39 }
402{ .mfb; (p20) stf8 [r32]=f37,8
403 (p18) xma.lu f35=f34,f8,f39
404 br.ctop.sptk .L_bn_mul_words_ctop };;
405.L_bn_mul_words_cend:
406
407 getf.sig r8=f41 // the return value
408
409#endif // XMA_TEMPTATION
410
411{ .mii; nop.m 0x0
412 mov pr=r9,0x1ffff
413 mov ar.lc=r3 }
414{ .mfb; rum 1<<5 // clear um.mfh
415 nop.f 0x0
416 br.ret.sptk.many b0 };;
417.endp bn_mul_words#
418#endif
419
420#if 1
421//
422// BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
423//
424.global bn_mul_add_words#
425.proc bn_mul_add_words#
426.align 64
427//.skip 0 // makes the loop split at 64-byte boundary
428bn_mul_add_words:
429 .prologue
430 .fframe 0
431 .save ar.pfs,r2
432{ .mii; alloc r2=ar.pfs,4,12,0,16
433 cmp4.le p6,p0=r34,r0 };;
434{ .mfb; mov r8=r0 // return value
435(p6) br.ret.spnt.many b0 };;
436
437 .save ar.lc,r3
438{ .mii; sub r10=r34,r0,1
439 mov r3=ar.lc
440 mov r9=pr };;
441
442 .body
443{ .mib; setf.sig f8=r35 // w
444 mov pr.rot=0x800001<<16
445 // ------^----- serves as (p50) at first (p27)
446 brp.loop.imp .L_bn_mul_add_words_ctop,.L_bn_mul_add_words_cend-16
447 }
448{ .mii;
449#if defined(_HPUX_SOURCE) && defined(_ILP32)
450 addp4 r14=0,r32 // rp
451 addp4 r15=0,r33 // ap
452#else
453 mov r14=r32 // rp
454 mov r15=r33 // ap
455#endif
456 mov ar.lc=r10 }
457{ .mii; mov r40=0 // serves as r35 at first (p27)
458#if defined(_HPUX_SOURCE) && defined(_ILP32)
459 addp4 r18=0,r32 // rp copy
460#else
461 mov r18=r32 // rp copy
462#endif
463 mov ar.ec=15 };;
464
465// This loop spins in 3*(n+14) ticks on Itanium and should spin in
466// 2*(n+14) on "wider" IA-64 implementations (to be verified with new
467// µ-architecture manuals as they become available). As usual it's
468// possible to compress the epilogue, down to 10 in this case, at the
469// cost of scalability. Compressed (and therefore non-scalable) loop
470// running at 3*(n+11) would buy you ~10% on Itanium but take ~35%
471// from "wider" IA-64 so let it be scalable! Special attention was
472// paid for having the loop body split at 64-byte boundary. ld8 is
473// scheduled for L1 cache as the data is more than likely there.
474// Indeed, bn_mul_words has put it there a moment ago:-)
475.L_bn_mul_add_words_ctop:
476{ .mfi; (p25) getf.sig r36=f52 // low
477 (p21) xmpy.lu f48=f37,f8
478 (p28) cmp.ltu p54,p50=r41,r39 }
479{ .mfi; (p16) ldf8 f32=[r15],8
480 (p21) xmpy.hu f40=f37,f8
481 (p28) add r45=r45,r41 };;
482{ .mii; (p25) getf.sig r32=f44 // high
483 .pred.rel "mutex",p50,p54
484 (p50) add r40=r38,r35 // (p27)
485 (p54) add r40=r38,r35,1 } // (p27)
486{ .mfb; (p28) cmp.ltu.unc p60,p0=r45,r41
487 (p0) nop.f 0x0
488 (p0) nop.b 0x0 }
489{ .mii; (p27) ld8 r44=[r18],8
490 (p62) cmp.eq.or p61,p0=-1,r46
491 (p62) add r46=1,r46 }
492{ .mfb; (p30) st8 [r14]=r47,8
493 (p0) nop.f 0x0
494 br.ctop.sptk .L_bn_mul_add_words_ctop};;
495.L_bn_mul_add_words_cend:
496
497{ .mii; nop.m 0x0
498.pred.rel "mutex",p53,p57
499(p53) add r8=r38,r0
500(p57) add r8=r38,r0,1 }
501{ .mfb; nop.m 0x0
502 nop.f 0x0
503 nop.b 0x0 };;
504{ .mii;
505(p63) add r8=1,r8
506 mov pr=r9,0x1ffff
507 mov ar.lc=r3 }
508{ .mfb; rum 1<<5 // clear um.mfh
509 nop.f 0x0
510 br.ret.sptk.many b0 };;
511.endp bn_mul_add_words#
512#endif
513
514#if 1
515//
516// void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
517//
518.global bn_sqr_words#
519.proc bn_sqr_words#
520.align 64
521.skip 32 // makes the loop body aligned at 64-byte boundary
522bn_sqr_words:
523 .prologue
524 .fframe 0
525 .save ar.pfs,r2
526{ .mii; alloc r2=ar.pfs,3,0,0,0
527 sxt4 r34=r34 };;
528{ .mii; cmp.le p6,p0=r34,r0
529 mov r8=r0 } // return value
530{ .mfb; nop.f 0x0
531(p6) br.ret.spnt.many b0 };;
532
533 .save ar.lc,r3
534{ .mii; sub r10=r34,r0,1
535 mov r3=ar.lc
536 mov r9=pr };;
537
538 .body
539#if defined(_HPUX_SOURCE) && defined(_ILP32)
540{ .mii; addp4 r32=0,r32
541 addp4 r33=0,r33 };;
542#endif
543{ .mib;
544 mov pr.rot=1<<16
545 brp.loop.imp .L_bn_sqr_words_ctop,.L_bn_sqr_words_cend-16
546 }
547{ .mii; add r34=8,r32
548 mov ar.lc=r10
549 mov ar.ec=18 };;
550
551// 2*(n+17) on Itanium, (n+17) on "wider" IA-64 implementations. It's
552// possible to compress the epilogue (I'm getting tired to write this
553// comment over and over) and get down to 2*n+16 at the cost of
554// scalability. The decision will very likely be reconsidered after the
555// benchmark program is profiled. I.e. if perfomance gain on Itanium
556// will appear larger than loss on "wider" IA-64, then the loop should
557// be explicitely split and the epilogue compressed.
558.L_bn_sqr_words_ctop:
559{ .mfi; (p16) ldf8 f32=[r33],8
560 (p25) xmpy.lu f42=f41,f41
561 (p0) nop.i 0x0 }
562{ .mib; (p33) stf8 [r32]=f50,16
563 (p0) nop.i 0x0
564 (p0) nop.b 0x0 }
565{ .mfi; (p0) nop.m 0x0
566 (p25) xmpy.hu f52=f41,f41
567 (p0) nop.i 0x0 }
568{ .mib; (p33) stf8 [r34]=f60,16
569 (p0) nop.i 0x0
570 br.ctop.sptk .L_bn_sqr_words_ctop };;
571.L_bn_sqr_words_cend:
572
573{ .mii; nop.m 0x0
574 mov pr=r9,0x1ffff
575 mov ar.lc=r3 }
576{ .mfb; rum 1<<5 // clear um.mfh
577 nop.f 0x0
578 br.ret.sptk.many b0 };;
579.endp bn_sqr_words#
580#endif
581
582#if 1
583// Apparently we win nothing by implementing special bn_sqr_comba8.
584// Yes, it is possible to reduce the number of multiplications by
585// almost factor of two, but then the amount of additions would
586// increase by factor of two (as we would have to perform those
587// otherwise performed by xma ourselves). Normally we would trade
588// anyway as multiplications are way more expensive, but not this
589// time... Multiplication kernel is fully pipelined and as we drain
590// one 128-bit multiplication result per clock cycle multiplications
591// are effectively as inexpensive as additions. Special implementation
592// might become of interest for "wider" IA-64 implementation as you'll
593// be able to get through the multiplication phase faster (there won't
594// be any stall issues as discussed in the commentary section below and
595// you therefore will be able to employ all 4 FP units)... But these
596// Itanium days it's simply too hard to justify the effort so I just
597// drop down to bn_mul_comba8 code:-)
598//
599// void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
600//
601.global bn_sqr_comba8#
602.proc bn_sqr_comba8#
603.align 64
604bn_sqr_comba8:
605 .prologue
606 .fframe 0
607 .save ar.pfs,r2
608#if defined(_HPUX_SOURCE) && defined(_ILP32)
609{ .mii; alloc r2=ar.pfs,2,1,0,0
610 addp4 r33=0,r33
611 addp4 r32=0,r32 };;
612{ .mii;
613#else
614{ .mii; alloc r2=ar.pfs,2,1,0,0
615#endif
616 mov r34=r33
617 add r14=8,r33 };;
618 .body
619{ .mii; add r17=8,r34
620 add r15=16,r33
621 add r18=16,r34 }
622{ .mfb; add r16=24,r33
623 br .L_cheat_entry_point8 };;
624.endp bn_sqr_comba8#
625#endif
626
627#if 1
628// I've estimated this routine to run in ~120 ticks, but in reality
629// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
630// cycles consumed for instructions fetch? Or did I misinterpret some
631// clause in Itanium µ-architecture manual? Comments are welcomed and
632// highly appreciated.
633//
634// However! It should be noted that even 160 ticks is darn good result
635// as it's over 10 (yes, ten, spelled as t-e-n) times faster than the
636// C version (compiled with gcc with inline assembler). I really
637// kicked compiler's butt here, didn't I? Yeah! This brings us to the
638// following statement. It's damn shame that this routine isn't called
639// very often nowadays! According to the profiler most CPU time is
640// consumed by bn_mul_add_words called from BN_from_montgomery. In
641// order to estimate what we're missing, I've compared the performance
642// of this routine against "traditional" implementation, i.e. against
643// following routine:
644//
645// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
646// { r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
647// r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
648// r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
649// r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
650// r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
651// r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
652// r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
653// r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
654// }
655//
656// The one below is over 8 times faster than the one above:-( Even
657// more reasons to "combafy" bn_mul_add_mont...
658//
659// And yes, this routine really made me wish there were an optimizing
660// assembler! It also feels like it deserves a dedication.
661//
662// To my wife for being there and to my kids...
663//
664// void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
665//
666#define carry1 r14
667#define carry2 r15
668#define carry3 r34
669.global bn_mul_comba8#
670.proc bn_mul_comba8#
671.align 64
672bn_mul_comba8:
673 .prologue
674 .fframe 0
675 .save ar.pfs,r2
676#if defined(_HPUX_SOURCE) && defined(_ILP32)
677{ .mii; alloc r2=ar.pfs,3,0,0,0
678 addp4 r33=0,r33
679 addp4 r34=0,r34 };;
680{ .mii; addp4 r32=0,r32
681#else
682{ .mii; alloc r2=ar.pfs,3,0,0,0
683#endif
684 add r14=8,r33
685 add r17=8,r34 }
686 .body
687{ .mii; add r15=16,r33
688 add r18=16,r34
689 add r16=24,r33 }
690.L_cheat_entry_point8:
691{ .mmi; add r19=24,r34
692
693 ldf8 f32=[r33],32 };;
694
695{ .mmi; ldf8 f120=[r34],32
696 ldf8 f121=[r17],32 }
697{ .mmi; ldf8 f122=[r18],32
698 ldf8 f123=[r19],32 };;
699{ .mmi; ldf8 f124=[r34]
700 ldf8 f125=[r17] }
701{ .mmi; ldf8 f126=[r18]
702 ldf8 f127=[r19] }
703
704{ .mmi; ldf8 f33=[r14],32
705 ldf8 f34=[r15],32 }
706{ .mmi; ldf8 f35=[r16],32;;
707 ldf8 f36=[r33] }
708{ .mmi; ldf8 f37=[r14]
709 ldf8 f38=[r15] }
710{ .mfi; ldf8 f39=[r16]
711// -------\ Entering multiplier's heaven /-------
712// ------------\ /------------
713// -----------------\ /-----------------
714// ----------------------\/----------------------
715 xma.hu f41=f32,f120,f0 }
716{ .mfi; xma.lu f40=f32,f120,f0 };; // (*)
717{ .mfi; xma.hu f51=f32,f121,f0 }
718{ .mfi; xma.lu f50=f32,f121,f0 };;
719{ .mfi; xma.hu f61=f32,f122,f0 }
720{ .mfi; xma.lu f60=f32,f122,f0 };;
721{ .mfi; xma.hu f71=f32,f123,f0 }
722{ .mfi; xma.lu f70=f32,f123,f0 };;
723{ .mfi; xma.hu f81=f32,f124,f0 }
724{ .mfi; xma.lu f80=f32,f124,f0 };;
725{ .mfi; xma.hu f91=f32,f125,f0 }
726{ .mfi; xma.lu f90=f32,f125,f0 };;
727{ .mfi; xma.hu f101=f32,f126,f0 }
728{ .mfi; xma.lu f100=f32,f126,f0 };;
729{ .mfi; xma.hu f111=f32,f127,f0 }
730{ .mfi; xma.lu f110=f32,f127,f0 };;//
731// (*) You can argue that splitting at every second bundle would
732// prevent "wider" IA-64 implementations from achieving the peak
733// performance. Well, not really... The catch is that if you
734// intend to keep 4 FP units busy by splitting at every fourth
735// bundle and thus perform these 16 multiplications in 4 ticks,
736// the first bundle *below* would stall because the result from
737// the first xma bundle *above* won't be available for another 3
738// ticks (if not more, being an optimist, I assume that "wider"
739// implementation will have same latency:-). This stall will hold
740// you back and the performance would be as if every second bundle
741// were split *anyway*...
742{ .mfi; getf.sig r16=f40
743 xma.hu f42=f33,f120,f41
744 add r33=8,r32 }
745{ .mfi; xma.lu f41=f33,f120,f41 };;
746{ .mfi; getf.sig r24=f50
747 xma.hu f52=f33,f121,f51 }
748{ .mfi; xma.lu f51=f33,f121,f51 };;
749{ .mfi; st8 [r32]=r16,16
750 xma.hu f62=f33,f122,f61 }
751{ .mfi; xma.lu f61=f33,f122,f61 };;
752{ .mfi; xma.hu f72=f33,f123,f71 }
753{ .mfi; xma.lu f71=f33,f123,f71 };;
754{ .mfi; xma.hu f82=f33,f124,f81 }
755{ .mfi; xma.lu f81=f33,f124,f81 };;
756{ .mfi; xma.hu f92=f33,f125,f91 }
757{ .mfi; xma.lu f91=f33,f125,f91 };;
758{ .mfi; xma.hu f102=f33,f126,f101 }
759{ .mfi; xma.lu f101=f33,f126,f101 };;
760{ .mfi; xma.hu f112=f33,f127,f111 }
761{ .mfi; xma.lu f111=f33,f127,f111 };;//
762//-------------------------------------------------//
763{ .mfi; getf.sig r25=f41
764 xma.hu f43=f34,f120,f42 }
765{ .mfi; xma.lu f42=f34,f120,f42 };;
766{ .mfi; getf.sig r16=f60
767 xma.hu f53=f34,f121,f52 }
768{ .mfi; xma.lu f52=f34,f121,f52 };;
769{ .mfi; getf.sig r17=f51
770 xma.hu f63=f34,f122,f62
771 add r25=r25,r24 }
772{ .mfi; xma.lu f62=f34,f122,f62
773 mov carry1=0 };;
774{ .mfi; cmp.ltu p6,p0=r25,r24
775 xma.hu f73=f34,f123,f72 }
776{ .mfi; xma.lu f72=f34,f123,f72 };;
777{ .mfi; st8 [r33]=r25,16
778 xma.hu f83=f34,f124,f82
779(p6) add carry1=1,carry1 }
780{ .mfi; xma.lu f82=f34,f124,f82 };;
781{ .mfi; xma.hu f93=f34,f125,f92 }
782{ .mfi; xma.lu f92=f34,f125,f92 };;
783{ .mfi; xma.hu f103=f34,f126,f102 }
784{ .mfi; xma.lu f102=f34,f126,f102 };;
785{ .mfi; xma.hu f113=f34,f127,f112 }
786{ .mfi; xma.lu f112=f34,f127,f112 };;//
787//-------------------------------------------------//
788{ .mfi; getf.sig r18=f42
789 xma.hu f44=f35,f120,f43
790 add r17=r17,r16 }
791{ .mfi; xma.lu f43=f35,f120,f43 };;
792{ .mfi; getf.sig r24=f70
793 xma.hu f54=f35,f121,f53 }
794{ .mfi; mov carry2=0
795 xma.lu f53=f35,f121,f53 };;
796{ .mfi; getf.sig r25=f61
797 xma.hu f64=f35,f122,f63
798 cmp.ltu p7,p0=r17,r16 }
799{ .mfi; add r18=r18,r17
800 xma.lu f63=f35,f122,f63 };;
801{ .mfi; getf.sig r26=f52
802 xma.hu f74=f35,f123,f73
803(p7) add carry2=1,carry2 }
804{ .mfi; cmp.ltu p7,p0=r18,r17
805 xma.lu f73=f35,f123,f73
806 add r18=r18,carry1 };;
807{ .mfi;
808 xma.hu f84=f35,f124,f83
809(p7) add carry2=1,carry2 }
810{ .mfi; cmp.ltu p7,p0=r18,carry1
811 xma.lu f83=f35,f124,f83 };;
812{ .mfi; st8 [r32]=r18,16
813 xma.hu f94=f35,f125,f93
814(p7) add carry2=1,carry2 }
815{ .mfi; xma.lu f93=f35,f125,f93 };;
816{ .mfi; xma.hu f104=f35,f126,f103 }
817{ .mfi; xma.lu f103=f35,f126,f103 };;
818{ .mfi; xma.hu f114=f35,f127,f113 }
819{ .mfi; mov carry1=0
820 xma.lu f113=f35,f127,f113
821 add r25=r25,r24 };;//
822//-------------------------------------------------//
823{ .mfi; getf.sig r27=f43
824 xma.hu f45=f36,f120,f44
825 cmp.ltu p6,p0=r25,r24 }
826{ .mfi; xma.lu f44=f36,f120,f44
827 add r26=r26,r25 };;
828{ .mfi; getf.sig r16=f80
829 xma.hu f55=f36,f121,f54
830(p6) add carry1=1,carry1 }
831{ .mfi; xma.lu f54=f36,f121,f54 };;
832{ .mfi; getf.sig r17=f71
833 xma.hu f65=f36,f122,f64
834 cmp.ltu p6,p0=r26,r25 }
835{ .mfi; xma.lu f64=f36,f122,f64
836 add r27=r27,r26 };;
837{ .mfi; getf.sig r18=f62
838 xma.hu f75=f36,f123,f74
839(p6) add carry1=1,carry1 }
840{ .mfi; cmp.ltu p6,p0=r27,r26
841 xma.lu f74=f36,f123,f74
842 add r27=r27,carry2 };;
843{ .mfi; getf.sig r19=f53
844 xma.hu f85=f36,f124,f84
845(p6) add carry1=1,carry1 }
846{ .mfi; xma.lu f84=f36,f124,f84
847 cmp.ltu p6,p0=r27,carry2 };;
848{ .mfi; st8 [r33]=r27,16
849 xma.hu f95=f36,f125,f94
850(p6) add carry1=1,carry1 }
851{ .mfi; xma.lu f94=f36,f125,f94 };;
852{ .mfi; xma.hu f105=f36,f126,f104 }
853{ .mfi; mov carry2=0
854 xma.lu f104=f36,f126,f104
855 add r17=r17,r16 };;
856{ .mfi; xma.hu f115=f36,f127,f114
857 cmp.ltu p7,p0=r17,r16 }
858{ .mfi; xma.lu f114=f36,f127,f114
859 add r18=r18,r17 };;//
860//-------------------------------------------------//
861{ .mfi; getf.sig r20=f44
862 xma.hu f46=f37,f120,f45
863(p7) add carry2=1,carry2 }
864{ .mfi; cmp.ltu p7,p0=r18,r17
865 xma.lu f45=f37,f120,f45
866 add r19=r19,r18 };;
867{ .mfi; getf.sig r24=f90
868 xma.hu f56=f37,f121,f55 }
869{ .mfi; xma.lu f55=f37,f121,f55 };;
870{ .mfi; getf.sig r25=f81
871 xma.hu f66=f37,f122,f65
872(p7) add carry2=1,carry2 }
873{ .mfi; cmp.ltu p7,p0=r19,r18
874 xma.lu f65=f37,f122,f65
875 add r20=r20,r19 };;
876{ .mfi; getf.sig r26=f72
877 xma.hu f76=f37,f123,f75
878(p7) add carry2=1,carry2 }
879{ .mfi; cmp.ltu p7,p0=r20,r19
880 xma.lu f75=f37,f123,f75
881 add r20=r20,carry1 };;
882{ .mfi; getf.sig r27=f63
883 xma.hu f86=f37,f124,f85
884(p7) add carry2=1,carry2 }
885{ .mfi; xma.lu f85=f37,f124,f85
886 cmp.ltu p7,p0=r20,carry1 };;
887{ .mfi; getf.sig r28=f54
888 xma.hu f96=f37,f125,f95
889(p7) add carry2=1,carry2 }
890{ .mfi; st8 [r32]=r20,16
891 xma.lu f95=f37,f125,f95 };;
892{ .mfi; xma.hu f106=f37,f126,f105 }
893{ .mfi; mov carry1=0
894 xma.lu f105=f37,f126,f105
895 add r25=r25,r24 };;
896{ .mfi; xma.hu f116=f37,f127,f115
897 cmp.ltu p6,p0=r25,r24 }
898{ .mfi; xma.lu f115=f37,f127,f115
899 add r26=r26,r25 };;//
900//-------------------------------------------------//
901{ .mfi; getf.sig r29=f45
902 xma.hu f47=f38,f120,f46
903(p6) add carry1=1,carry1 }
904{ .mfi; cmp.ltu p6,p0=r26,r25
905 xma.lu f46=f38,f120,f46
906 add r27=r27,r26 };;
907{ .mfi; getf.sig r16=f100
908 xma.hu f57=f38,f121,f56
909(p6) add carry1=1,carry1 }
910{ .mfi; cmp.ltu p6,p0=r27,r26
911 xma.lu f56=f38,f121,f56
912 add r28=r28,r27 };;
913{ .mfi; getf.sig r17=f91
914 xma.hu f67=f38,f122,f66
915(p6) add carry1=1,carry1 }
916{ .mfi; cmp.ltu p6,p0=r28,r27
917 xma.lu f66=f38,f122,f66
918 add r29=r29,r28 };;
919{ .mfi; getf.sig r18=f82
920 xma.hu f77=f38,f123,f76
921(p6) add carry1=1,carry1 }
922{ .mfi; cmp.ltu p6,p0=r29,r28
923 xma.lu f76=f38,f123,f76
924 add r29=r29,carry2 };;
925{ .mfi; getf.sig r19=f73
926 xma.hu f87=f38,f124,f86
927(p6) add carry1=1,carry1 }
928{ .mfi; xma.lu f86=f38,f124,f86
929 cmp.ltu p6,p0=r29,carry2 };;
930{ .mfi; getf.sig r20=f64
931 xma.hu f97=f38,f125,f96
932(p6) add carry1=1,carry1 }
933{ .mfi; st8 [r33]=r29,16
934 xma.lu f96=f38,f125,f96 };;
935{ .mfi; getf.sig r21=f55
936 xma.hu f107=f38,f126,f106 }
937{ .mfi; mov carry2=0
938 xma.lu f106=f38,f126,f106
939 add r17=r17,r16 };;
940{ .mfi; xma.hu f117=f38,f127,f116
941 cmp.ltu p7,p0=r17,r16 }
942{ .mfi; xma.lu f116=f38,f127,f116
943 add r18=r18,r17 };;//
944//-------------------------------------------------//
945{ .mfi; getf.sig r22=f46
946 xma.hu f48=f39,f120,f47
947(p7) add carry2=1,carry2 }
948{ .mfi; cmp.ltu p7,p0=r18,r17
949 xma.lu f47=f39,f120,f47
950 add r19=r19,r18 };;
951{ .mfi; getf.sig r24=f110
952 xma.hu f58=f39,f121,f57
953(p7) add carry2=1,carry2 }
954{ .mfi; cmp.ltu p7,p0=r19,r18
955 xma.lu f57=f39,f121,f57
956 add r20=r20,r19 };;
957{ .mfi; getf.sig r25=f101
958 xma.hu f68=f39,f122,f67
959(p7) add carry2=1,carry2 }
960{ .mfi; cmp.ltu p7,p0=r20,r19
961 xma.lu f67=f39,f122,f67
962 add r21=r21,r20 };;
963{ .mfi; getf.sig r26=f92
964 xma.hu f78=f39,f123,f77
965(p7) add carry2=1,carry2 }
966{ .mfi; cmp.ltu p7,p0=r21,r20
967 xma.lu f77=f39,f123,f77
968 add r22=r22,r21 };;
969{ .mfi; getf.sig r27=f83
970 xma.hu f88=f39,f124,f87
971(p7) add carry2=1,carry2 }
972{ .mfi; cmp.ltu p7,p0=r22,r21
973 xma.lu f87=f39,f124,f87
974 add r22=r22,carry1 };;
975{ .mfi; getf.sig r28=f74
976 xma.hu f98=f39,f125,f97
977(p7) add carry2=1,carry2 }
978{ .mfi; xma.lu f97=f39,f125,f97
979 cmp.ltu p7,p0=r22,carry1 };;
980{ .mfi; getf.sig r29=f65
981 xma.hu f108=f39,f126,f107
982(p7) add carry2=1,carry2 }
983{ .mfi; st8 [r32]=r22,16
984 xma.lu f107=f39,f126,f107 };;
985{ .mfi; getf.sig r30=f56
986 xma.hu f118=f39,f127,f117 }
987{ .mfi; xma.lu f117=f39,f127,f117 };;//
988//-------------------------------------------------//
989// Leaving muliplier's heaven... Quite a ride, huh?
990
991{ .mii; getf.sig r31=f47
992 add r25=r25,r24
993 mov carry1=0 };;
994{ .mii; getf.sig r16=f111
995 cmp.ltu p6,p0=r25,r24
996 add r26=r26,r25 };;
997{ .mfb; getf.sig r17=f102 }
998{ .mii;
999(p6) add carry1=1,carry1
1000 cmp.ltu p6,p0=r26,r25
1001 add r27=r27,r26 };;
1002{ .mfb; nop.m 0x0 }
1003{ .mii;
1004(p6) add carry1=1,carry1
1005 cmp.ltu p6,p0=r27,r26
1006 add r28=r28,r27 };;
1007{ .mii; getf.sig r18=f93
1008 add r17=r17,r16
1009 mov carry3=0 }
1010{ .mii;
1011(p6) add carry1=1,carry1
1012 cmp.ltu p6,p0=r28,r27
1013 add r29=r29,r28 };;
1014{ .mii; getf.sig r19=f84
1015 cmp.ltu p7,p0=r17,r16 }
1016{ .mii;
1017(p6) add carry1=1,carry1
1018 cmp.ltu p6,p0=r29,r28
1019 add r30=r30,r29 };;
1020{ .mii; getf.sig r20=f75
1021 add r18=r18,r17 }
1022{ .mii;
1023(p6) add carry1=1,carry1
1024 cmp.ltu p6,p0=r30,r29
1025 add r31=r31,r30 };;
1026{ .mfb; getf.sig r21=f66 }
1027{ .mii; (p7) add carry3=1,carry3
1028 cmp.ltu p7,p0=r18,r17
1029 add r19=r19,r18 }
1030{ .mfb; nop.m 0x0 }
1031{ .mii;
1032(p6) add carry1=1,carry1
1033 cmp.ltu p6,p0=r31,r30
1034 add r31=r31,carry2 };;
1035{ .mfb; getf.sig r22=f57 }
1036{ .mii; (p7) add carry3=1,carry3
1037 cmp.ltu p7,p0=r19,r18
1038 add r20=r20,r19 }
1039{ .mfb; nop.m 0x0 }
1040{ .mii;
1041(p6) add carry1=1,carry1
1042 cmp.ltu p6,p0=r31,carry2 };;
1043{ .mfb; getf.sig r23=f48 }
1044{ .mii; (p7) add carry3=1,carry3
1045 cmp.ltu p7,p0=r20,r19
1046 add r21=r21,r20 }
1047{ .mii;
1048(p6) add carry1=1,carry1 }
1049{ .mfb; st8 [r33]=r31,16 };;
1050
1051{ .mfb; getf.sig r24=f112 }
1052{ .mii; (p7) add carry3=1,carry3
1053 cmp.ltu p7,p0=r21,r20
1054 add r22=r22,r21 };;
1055{ .mfb; getf.sig r25=f103 }
1056{ .mii; (p7) add carry3=1,carry3
1057 cmp.ltu p7,p0=r22,r21
1058 add r23=r23,r22 };;
1059{ .mfb; getf.sig r26=f94 }
1060{ .mii; (p7) add carry3=1,carry3
1061 cmp.ltu p7,p0=r23,r22
1062 add r23=r23,carry1 };;
1063{ .mfb; getf.sig r27=f85 }
1064{ .mii; (p7) add carry3=1,carry3
1065 cmp.ltu p7,p8=r23,carry1};;
1066{ .mii; getf.sig r28=f76
1067 add r25=r25,r24
1068 mov carry1=0 }
1069{ .mii; st8 [r32]=r23,16
1070 (p7) add carry2=1,carry3
1071 (p8) add carry2=0,carry3 };;
1072
1073{ .mfb; nop.m 0x0 }
1074{ .mii; getf.sig r29=f67
1075 cmp.ltu p6,p0=r25,r24
1076 add r26=r26,r25 };;
1077{ .mfb; getf.sig r30=f58 }
1078{ .mii;
1079(p6) add carry1=1,carry1
1080 cmp.ltu p6,p0=r26,r25
1081 add r27=r27,r26 };;
1082{ .mfb; getf.sig r16=f113 }
1083{ .mii;
1084(p6) add carry1=1,carry1
1085 cmp.ltu p6,p0=r27,r26
1086 add r28=r28,r27 };;
1087{ .mfb; getf.sig r17=f104 }
1088{ .mii;
1089(p6) add carry1=1,carry1
1090 cmp.ltu p6,p0=r28,r27
1091 add r29=r29,r28 };;
1092{ .mfb; getf.sig r18=f95 }
1093{ .mii;
1094(p6) add carry1=1,carry1
1095 cmp.ltu p6,p0=r29,r28
1096 add r30=r30,r29 };;
1097{ .mii; getf.sig r19=f86
1098 add r17=r17,r16
1099 mov carry3=0 }
1100{ .mii;
1101(p6) add carry1=1,carry1
1102 cmp.ltu p6,p0=r30,r29
1103 add r30=r30,carry2 };;
1104{ .mii; getf.sig r20=f77
1105 cmp.ltu p7,p0=r17,r16
1106 add r18=r18,r17 }
1107{ .mii;
1108(p6) add carry1=1,carry1
1109 cmp.ltu p6,p0=r30,carry2 };;
1110{ .mfb; getf.sig r21=f68 }
1111{ .mii; st8 [r33]=r30,16
1112(p6) add carry1=1,carry1 };;
1113
1114{ .mfb; getf.sig r24=f114 }
1115{ .mii; (p7) add carry3=1,carry3
1116 cmp.ltu p7,p0=r18,r17
1117 add r19=r19,r18 };;
1118{ .mfb; getf.sig r25=f105 }
1119{ .mii; (p7) add carry3=1,carry3
1120 cmp.ltu p7,p0=r19,r18
1121 add r20=r20,r19 };;
1122{ .mfb; getf.sig r26=f96 }
1123{ .mii; (p7) add carry3=1,carry3
1124 cmp.ltu p7,p0=r20,r19
1125 add r21=r21,r20 };;
1126{ .mfb; getf.sig r27=f87 }
1127{ .mii; (p7) add carry3=1,carry3
1128 cmp.ltu p7,p0=r21,r20
1129 add r21=r21,carry1 };;
1130{ .mib; getf.sig r28=f78
1131 add r25=r25,r24 }
1132{ .mib; (p7) add carry3=1,carry3
1133 cmp.ltu p7,p8=r21,carry1};;
1134{ .mii; st8 [r32]=r21,16
1135 (p7) add carry2=1,carry3
1136 (p8) add carry2=0,carry3 }
1137
1138{ .mii; mov carry1=0
1139 cmp.ltu p6,p0=r25,r24
1140 add r26=r26,r25 };;
1141{ .mfb; getf.sig r16=f115 }
1142{ .mii;
1143(p6) add carry1=1,carry1
1144 cmp.ltu p6,p0=r26,r25
1145 add r27=r27,r26 };;
1146{ .mfb; getf.sig r17=f106 }
1147{ .mii;
1148(p6) add carry1=1,carry1
1149 cmp.ltu p6,p0=r27,r26
1150 add r28=r28,r27 };;
1151{ .mfb; getf.sig r18=f97 }
1152{ .mii;
1153(p6) add carry1=1,carry1
1154 cmp.ltu p6,p0=r28,r27
1155 add r28=r28,carry2 };;
1156{ .mib; getf.sig r19=f88
1157 add r17=r17,r16 }
1158{ .mib;
1159(p6) add carry1=1,carry1
1160 cmp.ltu p6,p0=r28,carry2 };;
1161{ .mii; st8 [r33]=r28,16
1162(p6) add carry1=1,carry1 }
1163
1164{ .mii; mov carry2=0
1165 cmp.ltu p7,p0=r17,r16
1166 add r18=r18,r17 };;
1167{ .mfb; getf.sig r24=f116 }
1168{ .mii; (p7) add carry2=1,carry2
1169 cmp.ltu p7,p0=r18,r17
1170 add r19=r19,r18 };;
1171{ .mfb; getf.sig r25=f107 }
1172{ .mii; (p7) add carry2=1,carry2
1173 cmp.ltu p7,p0=r19,r18
1174 add r19=r19,carry1 };;
1175{ .mfb; getf.sig r26=f98 }
1176{ .mii; (p7) add carry2=1,carry2
1177 cmp.ltu p7,p0=r19,carry1};;
1178{ .mii; st8 [r32]=r19,16
1179 (p7) add carry2=1,carry2 }
1180
1181{ .mfb; add r25=r25,r24 };;
1182
1183{ .mfb; getf.sig r16=f117 }
1184{ .mii; mov carry1=0
1185 cmp.ltu p6,p0=r25,r24
1186 add r26=r26,r25 };;
1187{ .mfb; getf.sig r17=f108 }
1188{ .mii;
1189(p6) add carry1=1,carry1
1190 cmp.ltu p6,p0=r26,r25
1191 add r26=r26,carry2 };;
1192{ .mfb; nop.m 0x0 }
1193{ .mii;
1194(p6) add carry1=1,carry1
1195 cmp.ltu p6,p0=r26,carry2 };;
1196{ .mii; st8 [r33]=r26,16
1197(p6) add carry1=1,carry1 }
1198
1199{ .mfb; add r17=r17,r16 };;
1200{ .mfb; getf.sig r24=f118 }
1201{ .mii; mov carry2=0
1202 cmp.ltu p7,p0=r17,r16
1203 add r17=r17,carry1 };;
1204{ .mii; (p7) add carry2=1,carry2
1205 cmp.ltu p7,p0=r17,carry1};;
1206{ .mii; st8 [r32]=r17
1207 (p7) add carry2=1,carry2 };;
1208{ .mfb; add r24=r24,carry2 };;
1209{ .mib; st8 [r33]=r24 }
1210
1211{ .mib; rum 1<<5 // clear um.mfh
1212 br.ret.sptk.many b0 };;
1213.endp bn_mul_comba8#
1214#undef carry3
1215#undef carry2
1216#undef carry1
1217#endif
1218
1219#if 1
1220// It's possible to make it faster (see comment to bn_sqr_comba8), but
1221// I reckon it doesn't worth the effort. Basically because the routine
1222// (actually both of them) practically never called... So I just play
1223// same trick as with bn_sqr_comba8.
1224//
1225// void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1226//
1227.global bn_sqr_comba4#
1228.proc bn_sqr_comba4#
1229.align 64
1230bn_sqr_comba4:
1231 .prologue
1232 .fframe 0
1233 .save ar.pfs,r2
1234#if defined(_HPUX_SOURCE) && defined(_ILP32)
1235{ .mii; alloc r2=ar.pfs,2,1,0,0
1236 addp4 r32=0,r32
1237 addp4 r33=0,r33 };;
1238{ .mii;
1239#else
1240{ .mii; alloc r2=ar.pfs,2,1,0,0
1241#endif
1242 mov r34=r33
1243 add r14=8,r33 };;
1244 .body
1245{ .mii; add r17=8,r34
1246 add r15=16,r33
1247 add r18=16,r34 }
1248{ .mfb; add r16=24,r33
1249 br .L_cheat_entry_point4 };;
1250.endp bn_sqr_comba4#
1251#endif
1252
1253#if 1
1254// Runs in ~115 cycles and ~4.5 times faster than C. Well, whatever...
1255//
1256// void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1257//
1258#define carry1 r14
1259#define carry2 r15
1260.global bn_mul_comba4#
1261.proc bn_mul_comba4#
1262.align 64
1263bn_mul_comba4:
1264 .prologue
1265 .fframe 0
1266 .save ar.pfs,r2
1267#if defined(_HPUX_SOURCE) && defined(_ILP32)
1268{ .mii; alloc r2=ar.pfs,3,0,0,0
1269 addp4 r33=0,r33
1270 addp4 r34=0,r34 };;
1271{ .mii; addp4 r32=0,r32
1272#else
1273{ .mii; alloc r2=ar.pfs,3,0,0,0
1274#endif
1275 add r14=8,r33
1276 add r17=8,r34 }
1277 .body
1278{ .mii; add r15=16,r33
1279 add r18=16,r34
1280 add r16=24,r33 };;
1281.L_cheat_entry_point4:
1282{ .mmi; add r19=24,r34
1283
1284 ldf8 f32=[r33] }
1285
1286{ .mmi; ldf8 f120=[r34]
1287 ldf8 f121=[r17] };;
1288{ .mmi; ldf8 f122=[r18]
1289 ldf8 f123=[r19] }
1290
1291{ .mmi; ldf8 f33=[r14]
1292 ldf8 f34=[r15] }
1293{ .mfi; ldf8 f35=[r16]
1294
1295 xma.hu f41=f32,f120,f0 }
1296{ .mfi; xma.lu f40=f32,f120,f0 };;
1297{ .mfi; xma.hu f51=f32,f121,f0 }
1298{ .mfi; xma.lu f50=f32,f121,f0 };;
1299{ .mfi; xma.hu f61=f32,f122,f0 }
1300{ .mfi; xma.lu f60=f32,f122,f0 };;
1301{ .mfi; xma.hu f71=f32,f123,f0 }
1302{ .mfi; xma.lu f70=f32,f123,f0 };;//
1303// Major stall takes place here, and 3 more places below. Result from
1304// first xma is not available for another 3 ticks.
1305{ .mfi; getf.sig r16=f40
1306 xma.hu f42=f33,f120,f41
1307 add r33=8,r32 }
1308{ .mfi; xma.lu f41=f33,f120,f41 };;
1309{ .mfi; getf.sig r24=f50
1310 xma.hu f52=f33,f121,f51 }
1311{ .mfi; xma.lu f51=f33,f121,f51 };;
1312{ .mfi; st8 [r32]=r16,16
1313 xma.hu f62=f33,f122,f61 }
1314{ .mfi; xma.lu f61=f33,f122,f61 };;
1315{ .mfi; xma.hu f72=f33,f123,f71 }
1316{ .mfi; xma.lu f71=f33,f123,f71 };;//
1317//-------------------------------------------------//
1318{ .mfi; getf.sig r25=f41
1319 xma.hu f43=f34,f120,f42 }
1320{ .mfi; xma.lu f42=f34,f120,f42 };;
1321{ .mfi; getf.sig r16=f60
1322 xma.hu f53=f34,f121,f52 }
1323{ .mfi; xma.lu f52=f34,f121,f52 };;
1324{ .mfi; getf.sig r17=f51
1325 xma.hu f63=f34,f122,f62
1326 add r25=r25,r24 }
1327{ .mfi; mov carry1=0
1328 xma.lu f62=f34,f122,f62 };;
1329{ .mfi; st8 [r33]=r25,16
1330 xma.hu f73=f34,f123,f72
1331 cmp.ltu p6,p0=r25,r24 }
1332{ .mfi; xma.lu f72=f34,f123,f72 };;//
1333//-------------------------------------------------//
1334{ .mfi; getf.sig r18=f42
1335 xma.hu f44=f35,f120,f43
1336(p6) add carry1=1,carry1 }
1337{ .mfi; add r17=r17,r16
1338 xma.lu f43=f35,f120,f43
1339 mov carry2=0 };;
1340{ .mfi; getf.sig r24=f70
1341 xma.hu f54=f35,f121,f53
1342 cmp.ltu p7,p0=r17,r16 }
1343{ .mfi; xma.lu f53=f35,f121,f53 };;
1344{ .mfi; getf.sig r25=f61
1345 xma.hu f64=f35,f122,f63
1346 add r18=r18,r17 }
1347{ .mfi; xma.lu f63=f35,f122,f63
1348(p7) add carry2=1,carry2 };;
1349{ .mfi; getf.sig r26=f52
1350 xma.hu f74=f35,f123,f73
1351 cmp.ltu p7,p0=r18,r17 }
1352{ .mfi; xma.lu f73=f35,f123,f73
1353 add r18=r18,carry1 };;
1354//-------------------------------------------------//
1355{ .mii; st8 [r32]=r18,16
1356(p7) add carry2=1,carry2
1357 cmp.ltu p7,p0=r18,carry1 };;
1358
1359{ .mfi; getf.sig r27=f43 // last major stall
1360(p7) add carry2=1,carry2 };;
1361{ .mii; getf.sig r16=f71
1362 add r25=r25,r24
1363 mov carry1=0 };;
1364{ .mii; getf.sig r17=f62
1365 cmp.ltu p6,p0=r25,r24
1366 add r26=r26,r25 };;
1367{ .mii;
1368(p6) add carry1=1,carry1
1369 cmp.ltu p6,p0=r26,r25
1370 add r27=r27,r26 };;
1371{ .mii;
1372(p6) add carry1=1,carry1
1373 cmp.ltu p6,p0=r27,r26
1374 add r27=r27,carry2 };;
1375{ .mii; getf.sig r18=f53
1376(p6) add carry1=1,carry1
1377 cmp.ltu p6,p0=r27,carry2 };;
1378{ .mfi; st8 [r33]=r27,16
1379(p6) add carry1=1,carry1 }
1380
1381{ .mii; getf.sig r19=f44
1382 add r17=r17,r16
1383 mov carry2=0 };;
1384{ .mii; getf.sig r24=f72
1385 cmp.ltu p7,p0=r17,r16
1386 add r18=r18,r17 };;
1387{ .mii; (p7) add carry2=1,carry2
1388 cmp.ltu p7,p0=r18,r17
1389 add r19=r19,r18 };;
1390{ .mii; (p7) add carry2=1,carry2
1391 cmp.ltu p7,p0=r19,r18
1392 add r19=r19,carry1 };;
1393{ .mii; getf.sig r25=f63
1394 (p7) add carry2=1,carry2
1395 cmp.ltu p7,p0=r19,carry1};;
1396{ .mii; st8 [r32]=r19,16
1397 (p7) add carry2=1,carry2 }
1398
1399{ .mii; getf.sig r26=f54
1400 add r25=r25,r24
1401 mov carry1=0 };;
1402{ .mii; getf.sig r16=f73
1403 cmp.ltu p6,p0=r25,r24
1404 add r26=r26,r25 };;
1405{ .mii;
1406(p6) add carry1=1,carry1
1407 cmp.ltu p6,p0=r26,r25
1408 add r26=r26,carry2 };;
1409{ .mii; getf.sig r17=f64
1410(p6) add carry1=1,carry1
1411 cmp.ltu p6,p0=r26,carry2 };;
1412{ .mii; st8 [r33]=r26,16
1413(p6) add carry1=1,carry1 }
1414
1415{ .mii; getf.sig r24=f74
1416 add r17=r17,r16
1417 mov carry2=0 };;
1418{ .mii; cmp.ltu p7,p0=r17,r16
1419 add r17=r17,carry1 };;
1420
1421{ .mii; (p7) add carry2=1,carry2
1422 cmp.ltu p7,p0=r17,carry1};;
1423{ .mii; st8 [r32]=r17,16
1424 (p7) add carry2=1,carry2 };;
1425
1426{ .mii; add r24=r24,carry2 };;
1427{ .mii; st8 [r33]=r24 }
1428
1429{ .mib; rum 1<<5 // clear um.mfh
1430 br.ret.sptk.many b0 };;
1431.endp bn_mul_comba4#
1432#undef carry2
1433#undef carry1
1434#endif
1435
1436#if 1
1437//
1438// BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
1439//
1440// In the nutshell it's a port of my MIPS III/IV implementation.
1441//
1442#define AT r14
1443#define H r16
1444#define HH r20
1445#define L r17
1446#define D r18
1447#define DH r22
1448#define I r21
1449
1450#if 0
1451// Some preprocessors (most notably HP-UX) apper to be allergic to
1452// macros enclosed to parenthesis as these three will be.
1453#define cont p16
1454#define break p0 // p20
1455#define equ p24
1456#else
1457cont=p16
1458break=p0
1459equ=p24
1460#endif
1461
1462.global abort#
1463.global bn_div_words#
1464.proc bn_div_words#
1465.align 64
1466bn_div_words:
1467 .prologue
1468 .fframe 0
1469 .save ar.pfs,r2
1470 .save b0,r3
1471{ .mii; alloc r2=ar.pfs,3,5,0,8
1472 mov r3=b0
1473 mov r10=pr };;
1474{ .mmb; cmp.eq p6,p0=r34,r0
1475 mov r8=-1
1476(p6) br.ret.spnt.many b0 };;
1477
1478 .body
1479{ .mii; mov H=r32 // save h
1480 mov ar.ec=0 // don't rotate at exit
1481 mov pr.rot=0 }
1482{ .mii; mov L=r33 // save l
1483 mov r36=r0 };;
1484
1485.L_divw_shift: // -vv- note signed comparison
1486{ .mfi; (p0) cmp.lt p16,p0=r0,r34 // d
1487 (p0) shladd r33=r34,1,r0 }
1488{ .mfb; (p0) add r35=1,r36
1489 (p0) nop.f 0x0
1490(p16) br.wtop.dpnt .L_divw_shift };;
1491
1492{ .mii; mov D=r34
1493 shr.u DH=r34,32
1494 sub r35=64,r36 };;
1495{ .mii; setf.sig f7=DH
1496 shr.u AT=H,r35
1497 mov I=r36 };;
1498{ .mib; cmp.ne p6,p0=r0,AT
1499 shl H=H,r36
1500(p6) br.call.spnt.clr b0=abort };; // overflow, die...
1501
1502{ .mfi; fcvt.xuf.s1 f7=f7
1503 shr.u AT=L,r35 };;
1504{ .mii; shl L=L,r36
1505 or H=H,AT };;
1506
1507{ .mii; nop.m 0x0
1508 cmp.leu p6,p0=D,H;;
1509(p6) sub H=H,D }
1510
1511{ .mlx; setf.sig f14=D
1512 movl AT=0xffffffff };;
1513///////////////////////////////////////////////////////////
1514{ .mii; setf.sig f6=H
1515 shr.u HH=H,32;;
1516 cmp.eq p6,p7=HH,DH };;
1517{ .mfb;
1518(p6) setf.sig f8=AT
1519(p7) fcvt.xuf.s1 f6=f6
1520(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1521
1522{ .mfi; getf.sig r33=f8 // q
1523 xmpy.lu f9=f8,f14 }
1524{ .mfi; xmpy.hu f10=f8,f14
1525 shrp H=H,L,32 };;
1526
1527{ .mmi; getf.sig r35=f9 // tl
1528 getf.sig r31=f10 };; // th
1529
1530.L_divw_1st_iter:
1531{ .mii; (p0) add r32=-1,r33
1532 (p0) cmp.eq equ,cont=HH,r31 };;
1533{ .mii; (p0) cmp.ltu p8,p0=r35,D
1534 (p0) sub r34=r35,D
1535 (equ) cmp.leu break,cont=r35,H };;
1536{ .mib; (cont) cmp.leu cont,break=HH,r31
1537 (p8) add r31=-1,r31
1538(cont) br.wtop.spnt .L_divw_1st_iter };;
1539///////////////////////////////////////////////////////////
1540{ .mii; sub H=H,r35
1541 shl r8=r33,32
1542 shl L=L,32 };;
1543///////////////////////////////////////////////////////////
1544{ .mii; setf.sig f6=H
1545 shr.u HH=H,32;;
1546 cmp.eq p6,p7=HH,DH };;
1547{ .mfb;
1548(p6) setf.sig f8=AT
1549(p7) fcvt.xuf.s1 f6=f6
1550(p7) br.call.sptk b6=.L_udiv64_32_b6 };;
1551
1552{ .mfi; getf.sig r33=f8 // q
1553 xmpy.lu f9=f8,f14 }
1554{ .mfi; xmpy.hu f10=f8,f14
1555 shrp H=H,L,32 };;
1556
1557{ .mmi; getf.sig r35=f9 // tl
1558 getf.sig r31=f10 };; // th
1559
1560.L_divw_2nd_iter:
1561{ .mii; (p0) add r32=-1,r33
1562 (p0) cmp.eq equ,cont=HH,r31 };;
1563{ .mii; (p0) cmp.ltu p8,p0=r35,D
1564 (p0) sub r34=r35,D
1565 (equ) cmp.leu break,cont=r35,H };;
1566{ .mib; (cont) cmp.leu cont,break=HH,r31
1567 (p8) add r31=-1,r31
1568(cont) br.wtop.spnt .L_divw_2nd_iter };;
1569///////////////////////////////////////////////////////////
1570{ .mii; sub H=H,r35
1571 or r8=r8,r33
1572 mov ar.pfs=r2 };;
1573{ .mii; shr.u r9=H,I // remainder if anybody wants it
1574 mov pr=r10,0x1ffff }
1575{ .mfb; br.ret.sptk.many b0 };;
1576
1577// Unsigned 64 by 32 (well, by 64 for the moment) bit integer division
1578// procedure.
1579//
1580// inputs: f6 = (double)a, f7 = (double)b
1581// output: f8 = (int)(a/b)
1582// clobbered: f8,f9,f10,f11,pred
1583pred=p15
1584// This procedure is essentially Intel code and therefore is
1585// copyrighted to Intel Corporation (I suppose...). It's sligtly
1586// modified for specific needs.
1587.align 32
1588.skip 16
1589.L_udiv64_32_b6:
1590 frcpa.s1 f8,pred=f6,f7;; // [0] y0 = 1 / b
1591
1592(pred) fnma.s1 f9=f7,f8,f1 // [5] e0 = 1 - b * y0
1593(pred) fmpy.s1 f10=f6,f8;; // [5] q0 = a * y0
1594(pred) fmpy.s1 f11=f9,f9 // [10] e1 = e0 * e0
1595(pred) fma.s1 f10=f9,f10,f10;; // [10] q1 = q0 + e0 * q0
1596(pred) fma.s1 f8=f9,f8,f8 //;; // [15] y1 = y0 + e0 * y0
1597(pred) fma.s1 f9=f11,f10,f10;; // [15] q2 = q1 + e1 * q1
1598(pred) fma.s1 f8=f11,f8,f8 //;; // [20] y2 = y1 + e1 * y1
1599(pred) fnma.s1 f10=f7,f9,f6;; // [20] r2 = a - b * q2
1600(pred) fma.s1 f8=f10,f8,f9;; // [25] q3 = q2 + r2 * y2
1601
1602 fcvt.fxu.trunc.s1 f8=f8 // [30] q = trunc(q3)
1603 br.ret.sptk.many b6;;
1604.endp bn_div_words#
1605#endif
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s
deleted file mode 100644
index f3b16290eb..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2.s
+++ /dev/null
@@ -1,1618 +0,0 @@
1;
2; PA-RISC 2.0 implementation of bn_asm code, based on the
3; 64-bit version of the code. This code is effectively the
4; same as the 64-bit version except the register model is
5; slightly different given all values must be 32-bit between
6; function calls. Thus the 64-bit return values are returned
7; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
8;
9;
10; This code is approximately 2x faster than the C version
11; for RSA/DSA.
12;
13; See http://devresource.hp.com/ for more details on the PA-RISC
14; architecture. Also see the book "PA-RISC 2.0 Architecture"
15; by Gerry Kane for information on the instruction set architecture.
16;
17; Code written by Chris Ruemmler (with some help from the HP C
18; compiler).
19;
20; The code compiles with HP's assembler
21;
22
23 .level 2.0N
24 .space $TEXT$
25 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
26
27;
28; Global Register definitions used for the routines.
29;
30; Some information about HP's runtime architecture for 32-bits.
31;
32; "Caller save" means the calling function must save the register
33; if it wants the register to be preserved.
34; "Callee save" means if a function uses the register, it must save
35; the value before using it.
36;
37; For the floating point registers
38;
39; "caller save" registers: fr4-fr11, fr22-fr31
40; "callee save" registers: fr12-fr21
41; "special" registers: fr0-fr3 (status and exception registers)
42;
43; For the integer registers
44; value zero : r0
45; "caller save" registers: r1,r19-r26
46; "callee save" registers: r3-r18
47; return register : r2 (rp)
48; return values ; r28,r29 (ret0,ret1)
49; Stack pointer ; r30 (sp)
50; millicode return ptr ; r31 (also a caller save register)
51
52
53;
54; Arguments to the routines
55;
56r_ptr .reg %r26
57a_ptr .reg %r25
58b_ptr .reg %r24
59num .reg %r24
60n .reg %r23
61
62;
63; Note that the "w" argument for bn_mul_add_words and bn_mul_words
64; is passed on the stack at a delta of -56 from the top of stack
65; as the routine is entered.
66;
67
68;
69; Globals used in some routines
70;
71
72top_overflow .reg %r23
73high_mask .reg %r22 ; value 0xffffffff80000000L
74
75
76;------------------------------------------------------------------------------
77;
78; bn_mul_add_words
79;
80;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
81; int num, BN_ULONG w)
82;
83; arg0 = r_ptr
84; arg1 = a_ptr
85; arg3 = num
86; -56(sp) = w
87;
88; Local register definitions
89;
90
91fm1 .reg %fr22
92fm .reg %fr23
93ht_temp .reg %fr24
94ht_temp_1 .reg %fr25
95lt_temp .reg %fr26
96lt_temp_1 .reg %fr27
97fm1_1 .reg %fr28
98fm_1 .reg %fr29
99
100fw_h .reg %fr7L
101fw_l .reg %fr7R
102fw .reg %fr7
103
104fht_0 .reg %fr8L
105flt_0 .reg %fr8R
106t_float_0 .reg %fr8
107
108fht_1 .reg %fr9L
109flt_1 .reg %fr9R
110t_float_1 .reg %fr9
111
112tmp_0 .reg %r31
113tmp_1 .reg %r21
114m_0 .reg %r20
115m_1 .reg %r19
116ht_0 .reg %r1
117ht_1 .reg %r3
118lt_0 .reg %r4
119lt_1 .reg %r5
120m1_0 .reg %r6
121m1_1 .reg %r7
122rp_val .reg %r8
123rp_val_1 .reg %r9
124
125bn_mul_add_words
126 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
127 .proc
128 .callinfo frame=128
129 .entry
130 .align 64
131
132 STD %r3,0(%sp) ; save r3
133 STD %r4,8(%sp) ; save r4
134 NOP ; Needed to make the loop 16-byte aligned
135 NOP ; needed to make the loop 16-byte aligned
136
137 STD %r5,16(%sp) ; save r5
138 NOP
139 STD %r6,24(%sp) ; save r6
140 STD %r7,32(%sp) ; save r7
141
142 STD %r8,40(%sp) ; save r8
143 STD %r9,48(%sp) ; save r9
144 COPY %r0,%ret1 ; return 0 by default
145 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
146
147 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
148 LDO 128(%sp),%sp ; bump stack
149
150 ;
151 ; The loop is unrolled twice, so if there is only 1 number
152 ; then go straight to the cleanup code.
153 ;
154 CMPIB,= 1,num,bn_mul_add_words_single_top
155 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
156
157 ;
158 ; This loop is unrolled 2 times (64-byte aligned as well)
159 ;
160 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
161 ; two 32-bit mutiplies can be issued per cycle.
162 ;
163bn_mul_add_words_unroll2
164
165 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
166 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
167 LDD 0(r_ptr),rp_val ; rp[0]
168 LDD 8(r_ptr),rp_val_1 ; rp[1]
169
170 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
171 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
172 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
173 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
174
175 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
176 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
177 FSTD fm,-8(%sp) ; -8(sp) = m[0]
178 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
179
180 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
181 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
182 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
183 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
184
185 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
186 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
187 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
188 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
189
190 LDD -8(%sp),m_0 ; m[0]
191 LDD -40(%sp),m_1 ; m[1]
192 LDD -16(%sp),m1_0 ; m1[0]
193 LDD -48(%sp),m1_1 ; m1[1]
194
195 LDD -24(%sp),ht_0 ; ht[0]
196 LDD -56(%sp),ht_1 ; ht[1]
197 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
198 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
199
200 LDD -32(%sp),lt_0
201 LDD -64(%sp),lt_1
202 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
203 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
204
205 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
206 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
207 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
208 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
209
210 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
211 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
212 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
213 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
214
215 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
216 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
217 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
218 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
219
220 ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
221 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
222 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
223 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
224
225 LDO -2(num),num ; num = num - 2;
226 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
227 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
228 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
229
230 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
231 ADD,DC ht_1,%r0,%ret1 ; ht[1]++
232 LDO 16(a_ptr),a_ptr ; a_ptr += 2
233
234 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
235 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
236 LDO 16(r_ptr),r_ptr ; r_ptr += 2
237
238 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
239
240 ;
241 ; Top of loop aligned on 64-byte boundary
242 ;
243bn_mul_add_words_single_top
244 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
245 LDD 0(r_ptr),rp_val ; rp[0]
246 LDO 8(a_ptr),a_ptr ; a_ptr++
247 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
248 FSTD fm1,-16(%sp) ; -16(sp) = m1
249 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
250 FSTD fm,-8(%sp) ; -8(sp) = m
251 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
252 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
253 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
254 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
255
256 LDD -8(%sp),m_0
257 LDD -16(%sp),m1_0 ; m1 = temp1
258 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
259 LDD -24(%sp),ht_0
260 LDD -32(%sp),lt_0
261
262 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
263 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
264
265 EXTRD,U tmp_0,31,32,m_0 ; m>>32
266 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
267
268 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
269 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
270 ADD,DC ht_0,%r0,ht_0 ; ht++
271 ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
272 ADD,DC ht_0,%r0,ht_0 ; ht++
273 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
274 ADD,DC ht_0,%r0,%ret1 ; ht++
275 STD lt_0,0(r_ptr) ; rp[0] = lt
276
277bn_mul_add_words_exit
278 .EXIT
279
280 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
281 LDD -80(%sp),%r9 ; restore r9
282 LDD -88(%sp),%r8 ; restore r8
283 LDD -96(%sp),%r7 ; restore r7
284 LDD -104(%sp),%r6 ; restore r6
285 LDD -112(%sp),%r5 ; restore r5
286 LDD -120(%sp),%r4 ; restore r4
287 BVE (%rp)
288 LDD,MB -128(%sp),%r3 ; restore r3
289 .PROCEND ;in=23,24,25,26,29;out=28;
290
291;----------------------------------------------------------------------------
292;
293;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
294;
295; arg0 = rp
296; arg1 = ap
297; arg3 = num
298; w on stack at -56(sp)
299
300bn_mul_words
301 .proc
302 .callinfo frame=128
303 .entry
304 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
305 .align 64
306
307 STD %r3,0(%sp) ; save r3
308 STD %r4,8(%sp) ; save r4
309 NOP
310 STD %r5,16(%sp) ; save r5
311
312 STD %r6,24(%sp) ; save r6
313 STD %r7,32(%sp) ; save r7
314 COPY %r0,%ret1 ; return 0 by default
315 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
316
317 CMPIB,>= 0,num,bn_mul_words_exit
318 LDO 128(%sp),%sp ; bump stack
319
320 ;
321 ; See if only 1 word to do, thus just do cleanup
322 ;
323 CMPIB,= 1,num,bn_mul_words_single_top
324 FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
325
326 ;
327 ; This loop is unrolled 2 times (64-byte aligned as well)
328 ;
329 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
330 ; two 32-bit mutiplies can be issued per cycle.
331 ;
332bn_mul_words_unroll2
333
334 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
335 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
336 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
337 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
338
339 FSTD fm1,-16(%sp) ; -16(sp) = m1
340 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
341 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
342 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
343
344 FSTD fm,-8(%sp) ; -8(sp) = m
345 FSTD fm_1,-40(%sp) ; -40(sp) = m
346 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
347 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
348
349 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
350 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
351 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
352 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
353
354 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
355 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
356 LDD -8(%sp),m_0
357 LDD -40(%sp),m_1
358
359 LDD -16(%sp),m1_0
360 LDD -48(%sp),m1_1
361 LDD -24(%sp),ht_0
362 LDD -56(%sp),ht_1
363
364 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
365 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
366 LDD -32(%sp),lt_0
367 LDD -64(%sp),lt_1
368
369 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
370 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
371 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
372 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
373
374 EXTRD,U tmp_0,31,32,m_0 ; m>>32
375 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
376 EXTRD,U tmp_1,31,32,m_1 ; m>>32
377 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
378
379 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
380 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
381 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
382 ADD,DC ht_0,%r0,ht_0 ; ht++
383
384 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
385 ADD,DC ht_1,%r0,ht_1 ; ht++
386 ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1);
387 ADD,DC ht_0,%r0,ht_0 ; ht++
388
389 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
390 ADD,DC ht_1,%r0,ht_1 ; ht++
391 STD lt_0,0(r_ptr) ; rp[0] = lt
392 STD lt_1,8(r_ptr) ; rp[1] = lt
393
394 COPY ht_1,%ret1 ; carry = ht
395 LDO -2(num),num ; num = num - 2;
396 LDO 16(a_ptr),a_ptr ; ap += 2
397 CMPIB,<= 2,num,bn_mul_words_unroll2
398 LDO 16(r_ptr),r_ptr ; rp++
399
400 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
401
402 ;
403 ; Top of loop aligned on 64-byte boundary
404 ;
405bn_mul_words_single_top
406 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
407
408 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
409 FSTD fm1,-16(%sp) ; -16(sp) = m1
410 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
411 FSTD fm,-8(%sp) ; -8(sp) = m
412 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
413 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
414 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
415 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
416
417 LDD -8(%sp),m_0
418 LDD -16(%sp),m1_0
419 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
420 LDD -24(%sp),ht_0
421 LDD -32(%sp),lt_0
422
423 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
424 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
425
426 EXTRD,U tmp_0,31,32,m_0 ; m>>32
427 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
428
429 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
430 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
431 ADD,DC ht_0,%r0,ht_0 ; ht++
432
433 ADD %ret1,lt_0,lt_0 ; lt = lt + c;
434 ADD,DC ht_0,%r0,ht_0 ; ht++
435
436 COPY ht_0,%ret1 ; copy carry
437 STD lt_0,0(r_ptr) ; rp[0] = lt
438
439bn_mul_words_exit
440 .EXIT
441 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
442 LDD -96(%sp),%r7 ; restore r7
443 LDD -104(%sp),%r6 ; restore r6
444 LDD -112(%sp),%r5 ; restore r5
445 LDD -120(%sp),%r4 ; restore r4
446 BVE (%rp)
447 LDD,MB -128(%sp),%r3 ; restore r3
448 .PROCEND
449
450;----------------------------------------------------------------------------
451;
452;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
453;
454; arg0 = rp
455; arg1 = ap
456; arg2 = num
457;
458
459bn_sqr_words
460 .proc
461 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
462 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
463 .entry
464 .align 64
465
466 STD %r3,0(%sp) ; save r3
467 STD %r4,8(%sp) ; save r4
468 NOP
469 STD %r5,16(%sp) ; save r5
470
471 CMPIB,>= 0,num,bn_sqr_words_exit
472 LDO 128(%sp),%sp ; bump stack
473
474 ;
475 ; If only 1, the goto straight to cleanup
476 ;
477 CMPIB,= 1,num,bn_sqr_words_single_top
478 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
479
480 ;
481 ; This loop is unrolled 2 times (64-byte aligned as well)
482 ;
483
484bn_sqr_words_unroll2
485 FLDD 0(a_ptr),t_float_0 ; a[0]
486 FLDD 8(a_ptr),t_float_1 ; a[1]
487 XMPYU fht_0,flt_0,fm ; m[0]
488 XMPYU fht_1,flt_1,fm_1 ; m[1]
489
490 FSTD fm,-24(%sp) ; store m[0]
491 FSTD fm_1,-56(%sp) ; store m[1]
492 XMPYU flt_0,flt_0,lt_temp ; lt[0]
493 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
494
495 FSTD lt_temp,-16(%sp) ; store lt[0]
496 FSTD lt_temp_1,-48(%sp) ; store lt[1]
497 XMPYU fht_0,fht_0,ht_temp ; ht[0]
498 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
499
500 FSTD ht_temp,-8(%sp) ; store ht[0]
501 FSTD ht_temp_1,-40(%sp) ; store ht[1]
502 LDD -24(%sp),m_0
503 LDD -56(%sp),m_1
504
505 AND m_0,high_mask,tmp_0 ; m[0] & Mask
506 AND m_1,high_mask,tmp_1 ; m[1] & Mask
507 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
508 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
509
510 LDD -16(%sp),lt_0
511 LDD -48(%sp),lt_1
512 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
513 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
514
515 LDD -8(%sp),ht_0
516 LDD -40(%sp),ht_1
517 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
518 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
519
520 ADD lt_0,m_0,lt_0 ; lt = lt+m
521 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
522 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
523 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
524
525 ADD lt_1,m_1,lt_1 ; lt = lt+m
526 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
527 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
528 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
529
530 LDO -2(num),num ; num = num - 2;
531 LDO 16(a_ptr),a_ptr ; ap += 2
532 CMPIB,<= 2,num,bn_sqr_words_unroll2
533 LDO 32(r_ptr),r_ptr ; rp += 4
534
535 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
536
537 ;
538 ; Top of loop aligned on 64-byte boundary
539 ;
540bn_sqr_words_single_top
541 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
542
543 XMPYU fht_0,flt_0,fm ; m
544 FSTD fm,-24(%sp) ; store m
545
546 XMPYU flt_0,flt_0,lt_temp ; lt
547 FSTD lt_temp,-16(%sp) ; store lt
548
549 XMPYU fht_0,fht_0,ht_temp ; ht
550 FSTD ht_temp,-8(%sp) ; store ht
551
552 LDD -24(%sp),m_0 ; load m
553 AND m_0,high_mask,tmp_0 ; m & Mask
554 DEPD,Z m_0,30,31,m_0 ; m << 32+1
555 LDD -16(%sp),lt_0 ; lt
556
557 LDD -8(%sp),ht_0 ; ht
558 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
559 ADD m_0,lt_0,lt_0 ; lt = lt+m
560 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
561 ADD,DC ht_0,%r0,ht_0 ; ht++
562
563 STD lt_0,0(r_ptr) ; rp[0] = lt
564 STD ht_0,8(r_ptr) ; rp[1] = ht
565
566bn_sqr_words_exit
567 .EXIT
568 LDD -112(%sp),%r5 ; restore r5
569 LDD -120(%sp),%r4 ; restore r4
570 BVE (%rp)
571 LDD,MB -128(%sp),%r3
572 .PROCEND ;in=23,24,25,26,29;out=28;
573
574
575;----------------------------------------------------------------------------
576;
577;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
578;
579; arg0 = rp
580; arg1 = ap
581; arg2 = bp
582; arg3 = n
583
584t .reg %r22
585b .reg %r21
586l .reg %r20
587
588bn_add_words
589 .proc
590 .entry
591 .callinfo
592 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
593 .align 64
594
595 CMPIB,>= 0,n,bn_add_words_exit
596 COPY %r0,%ret1 ; return 0 by default
597
598 ;
599 ; If 2 or more numbers do the loop
600 ;
601 CMPIB,= 1,n,bn_add_words_single_top
602 NOP
603
604 ;
605 ; This loop is unrolled 2 times (64-byte aligned as well)
606 ;
607bn_add_words_unroll2
608 LDD 0(a_ptr),t
609 LDD 0(b_ptr),b
610 ADD t,%ret1,t ; t = t+c;
611 ADD,DC %r0,%r0,%ret1 ; set c to carry
612 ADD t,b,l ; l = t + b[0]
613 ADD,DC %ret1,%r0,%ret1 ; c+= carry
614 STD l,0(r_ptr)
615
616 LDD 8(a_ptr),t
617 LDD 8(b_ptr),b
618 ADD t,%ret1,t ; t = t+c;
619 ADD,DC %r0,%r0,%ret1 ; set c to carry
620 ADD t,b,l ; l = t + b[0]
621 ADD,DC %ret1,%r0,%ret1 ; c+= carry
622 STD l,8(r_ptr)
623
624 LDO -2(n),n
625 LDO 16(a_ptr),a_ptr
626 LDO 16(b_ptr),b_ptr
627
628 CMPIB,<= 2,n,bn_add_words_unroll2
629 LDO 16(r_ptr),r_ptr
630
631 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
632
633bn_add_words_single_top
634 LDD 0(a_ptr),t
635 LDD 0(b_ptr),b
636
637 ADD t,%ret1,t ; t = t+c;
638 ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??)
639 ADD t,b,l ; l = t + b[0]
640 ADD,DC %ret1,%r0,%ret1 ; c+= carry
641 STD l,0(r_ptr)
642
643bn_add_words_exit
644 .EXIT
645 BVE (%rp)
646 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
647 .PROCEND ;in=23,24,25,26,29;out=28;
648
649;----------------------------------------------------------------------------
650;
651;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
652;
653; arg0 = rp
654; arg1 = ap
655; arg2 = bp
656; arg3 = n
657
658t1 .reg %r22
659t2 .reg %r21
660sub_tmp1 .reg %r20
661sub_tmp2 .reg %r19
662
663
664bn_sub_words
665 .proc
666 .callinfo
667 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
668 .entry
669 .align 64
670
671 CMPIB,>= 0,n,bn_sub_words_exit
672 COPY %r0,%ret1 ; return 0 by default
673
674 ;
675 ; If 2 or more numbers do the loop
676 ;
677 CMPIB,= 1,n,bn_sub_words_single_top
678 NOP
679
680 ;
681 ; This loop is unrolled 2 times (64-byte aligned as well)
682 ;
683bn_sub_words_unroll2
684 LDD 0(a_ptr),t1
685 LDD 0(b_ptr),t2
686 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
687 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
688
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret1
694 STD sub_tmp1,0(r_ptr)
695
696 LDD 8(a_ptr),t1
697 LDD 8(b_ptr),t2
698 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
699 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
700 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
701 LDO 1(%r0),sub_tmp2
702
703 CMPCLR,*= t1,t2,%r0
704 COPY sub_tmp2,%ret1
705 STD sub_tmp1,8(r_ptr)
706
707 LDO -2(n),n
708 LDO 16(a_ptr),a_ptr
709 LDO 16(b_ptr),b_ptr
710
711 CMPIB,<= 2,n,bn_sub_words_unroll2
712 LDO 16(r_ptr),r_ptr
713
714 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
715
716bn_sub_words_single_top
717 LDD 0(a_ptr),t1
718 LDD 0(b_ptr),t2
719 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
720 SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
721 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
722 LDO 1(%r0),sub_tmp2
723
724 CMPCLR,*= t1,t2,%r0
725 COPY sub_tmp2,%ret1
726
727 STD sub_tmp1,0(r_ptr)
728
729bn_sub_words_exit
730 .EXIT
731 BVE (%rp)
732 EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
733 .PROCEND ;in=23,24,25,26,29;out=28;
734
735;------------------------------------------------------------------------------
736;
737; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
738;
739; arg0 = h
740; arg1 = l
741; arg2 = d
742;
743; This is mainly just output from the HP C compiler.
744;
745;------------------------------------------------------------------------------
746bn_div_words
747 .PROC
748 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
749 .IMPORT BN_num_bits_word,CODE
750 ;--- not PIC .IMPORT __iob,DATA
751 ;--- not PIC .IMPORT fprintf,CODE
752 .IMPORT abort,CODE
753 .IMPORT $$div2U,MILLICODE
754 .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
755 .ENTRY
756 STW %r2,-20(%r30) ;offset 0x8ec
757 STW,MA %r3,192(%r30) ;offset 0x8f0
758 STW %r4,-188(%r30) ;offset 0x8f4
759 DEPD %r5,31,32,%r6 ;offset 0x8f8
760 STD %r6,-184(%r30) ;offset 0x8fc
761 DEPD %r7,31,32,%r8 ;offset 0x900
762 STD %r8,-176(%r30) ;offset 0x904
763 STW %r9,-168(%r30) ;offset 0x908
764 LDD -248(%r30),%r3 ;offset 0x90c
765 COPY %r26,%r4 ;offset 0x910
766 COPY %r24,%r5 ;offset 0x914
767 DEPD %r25,31,32,%r4 ;offset 0x918
768 CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c
769 DEPD %r23,31,32,%r5 ;offset 0x920
770 MOVIB,TR -1,%r29,$00060002 ;offset 0x924
771 EXTRD,U %r29,31,32,%r28 ;offset 0x928
772$0006002A
773 LDO -1(%r29),%r29 ;offset 0x92c
774 SUB %r23,%r7,%r23 ;offset 0x930
775$00060024
776 SUB %r4,%r31,%r25 ;offset 0x934
777 AND %r25,%r19,%r26 ;offset 0x938
778 CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c
779 DEPD,Z %r25,31,32,%r20 ;offset 0x940
780 OR %r20,%r24,%r21 ;offset 0x944
781 CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948
782 SUB %r31,%r2,%r31 ;offset 0x94c
783$00060046
784$0006002E
785 DEPD,Z %r23,31,32,%r25 ;offset 0x950
786 EXTRD,U %r23,31,32,%r26 ;offset 0x954
787 AND %r25,%r19,%r24 ;offset 0x958
788 ADD,L %r31,%r26,%r31 ;offset 0x95c
789 CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960
790 LDO 1(%r31),%r31 ;offset 0x964
791$00060032
792 CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968
793 LDO -1(%r29),%r29 ;offset 0x96c
794 ADD,L %r4,%r3,%r4 ;offset 0x970
795$00060036
796 ADDIB,=,N -1,%r8,$D0 ;offset 0x974
797 SUB %r5,%r24,%r28 ;offset 0x978
798$0006003A
799 SUB %r4,%r31,%r24 ;offset 0x97c
800 SHRPD %r24,%r28,32,%r4 ;offset 0x980
801 DEPD,Z %r29,31,32,%r9 ;offset 0x984
802 DEPD,Z %r28,31,32,%r5 ;offset 0x988
803$0006001C
804 EXTRD,U %r4,31,32,%r31 ;offset 0x98c
805 CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990
806 MOVB,TR %r6,%r29,$D1 ;offset 0x994
807 STD %r29,-152(%r30) ;offset 0x998
808$0006000C
809 EXTRD,U %r3,31,32,%r25 ;offset 0x99c
810 COPY %r3,%r26 ;offset 0x9a0
811 EXTRD,U %r3,31,32,%r9 ;offset 0x9a4
812 EXTRD,U %r4,31,32,%r8 ;offset 0x9a8
813 .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28;
814 B,L BN_num_bits_word,%r2 ;offset 0x9ac
815 EXTRD,U %r5,31,32,%r7 ;offset 0x9b0
816 LDI 64,%r20 ;offset 0x9b4
817 DEPD %r7,31,32,%r5 ;offset 0x9b8
818 DEPD %r8,31,32,%r4 ;offset 0x9bc
819 DEPD %r9,31,32,%r3 ;offset 0x9c0
820 CMPB,= %r28,%r20,$00060012 ;offset 0x9c4
821 COPY %r28,%r24 ;offset 0x9c8
822 MTSARCM %r24 ;offset 0x9cc
823 DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0
824 CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4
825$00060012
826 SUBI 64,%r24,%r31 ;offset 0x9d8
827 CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc
828 SUB %r4,%r3,%r4 ;offset 0x9e0
829$00060016
830 CMPB,= %r31,%r0,$0006001A ;offset 0x9e4
831 COPY %r0,%r9 ;offset 0x9e8
832 MTSARCM %r31 ;offset 0x9ec
833 DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0
834 SUBI 64,%r31,%r26 ;offset 0x9f4
835 MTSAR %r26 ;offset 0x9f8
836 SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc
837 MTSARCM %r31 ;offset 0xa00
838 DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04
839$0006001A
840 DEPDI,Z -1,31,32,%r19 ;offset 0xa08
841 AND %r3,%r19,%r29 ;offset 0xa0c
842 EXTRD,U %r29,31,32,%r2 ;offset 0xa10
843 DEPDI,Z -1,63,32,%r6 ;offset 0xa14
844 MOVIB,TR 2,%r8,$0006001C ;offset 0xa18
845 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c
846$D2
847 ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20
848 ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24
849 ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28
850 ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28;
851 ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c
852 ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30
853 .CALL ;
854 B,L abort,%r2 ;offset 0xa34
855 NOP ;offset 0xa38
856 B $D3 ;offset 0xa3c
857 LDW -212(%r30),%r2 ;offset 0xa40
858$00060020
859 COPY %r4,%r26 ;offset 0xa44
860 EXTRD,U %r4,31,32,%r25 ;offset 0xa48
861 COPY %r2,%r24 ;offset 0xa4c
862 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
863 B,L $$div2U,%r31 ;offset 0xa50
864 EXTRD,U %r2,31,32,%r23 ;offset 0xa54
865 DEPD %r28,31,32,%r29 ;offset 0xa58
866$00060022
867 STD %r29,-152(%r30) ;offset 0xa5c
868$D1
869 AND %r5,%r19,%r24 ;offset 0xa60
870 EXTRD,U %r24,31,32,%r24 ;offset 0xa64
871 STW %r2,-160(%r30) ;offset 0xa68
872 STW %r7,-128(%r30) ;offset 0xa6c
873 FLDD -152(%r30),%fr4 ;offset 0xa70
874 FLDD -152(%r30),%fr7 ;offset 0xa74
875 FLDW -160(%r30),%fr8L ;offset 0xa78
876 FLDW -128(%r30),%fr5L ;offset 0xa7c
877 XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80
878 FSTD %fr10,-136(%r30) ;offset 0xa84
879 XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88
880 FSTD %fr22,-144(%r30) ;offset 0xa8c
881 XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90
882 XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94
883 FSTD %fr11,-112(%r30) ;offset 0xa98
884 FSTD %fr23,-120(%r30) ;offset 0xa9c
885 LDD -136(%r30),%r28 ;offset 0xaa0
886 DEPD,Z %r28,31,32,%r31 ;offset 0xaa4
887 LDD -144(%r30),%r20 ;offset 0xaa8
888 ADD,L %r20,%r31,%r31 ;offset 0xaac
889 LDD -112(%r30),%r22 ;offset 0xab0
890 DEPD,Z %r22,31,32,%r22 ;offset 0xab4
891 LDD -120(%r30),%r21 ;offset 0xab8
892 B $00060024 ;offset 0xabc
893 ADD,L %r21,%r22,%r23 ;offset 0xac0
894$D0
895 OR %r9,%r29,%r29 ;offset 0xac4
896$00060040
897 EXTRD,U %r29,31,32,%r28 ;offset 0xac8
898$00060002
899$L2
900 LDW -212(%r30),%r2 ;offset 0xacc
901$D3
902 LDW -168(%r30),%r9 ;offset 0xad0
903 LDD -176(%r30),%r8 ;offset 0xad4
904 EXTRD,U %r8,31,32,%r7 ;offset 0xad8
905 LDD -184(%r30),%r6 ;offset 0xadc
906 EXTRD,U %r6,31,32,%r5 ;offset 0xae0
907 LDW -188(%r30),%r4 ;offset 0xae4
908 BVE (%r2) ;offset 0xae8
909 .EXIT
910 LDW,MB -192(%r30),%r3 ;offset 0xaec
911 .PROCEND ;in=23,25;out=28,29;fpin=105,107;
912
913
914
915
916;----------------------------------------------------------------------------
917;
918; Registers to hold 64-bit values to manipulate. The "L" part
919; of the register corresponds to the upper 32-bits, while the "R"
920; part corresponds to the lower 32-bits
921;
922; Note, that when using b6 and b7, the code must save these before
923; using them because they are callee save registers
924;
925;
926; Floating point registers to use to save values that
927; are manipulated. These don't collide with ftemp1-6 and
928; are all caller save registers
929;
930a0 .reg %fr22
931a0L .reg %fr22L
932a0R .reg %fr22R
933
934a1 .reg %fr23
935a1L .reg %fr23L
936a1R .reg %fr23R
937
938a2 .reg %fr24
939a2L .reg %fr24L
940a2R .reg %fr24R
941
942a3 .reg %fr25
943a3L .reg %fr25L
944a3R .reg %fr25R
945
946a4 .reg %fr26
947a4L .reg %fr26L
948a4R .reg %fr26R
949
950a5 .reg %fr27
951a5L .reg %fr27L
952a5R .reg %fr27R
953
954a6 .reg %fr28
955a6L .reg %fr28L
956a6R .reg %fr28R
957
958a7 .reg %fr29
959a7L .reg %fr29L
960a7R .reg %fr29R
961
962b0 .reg %fr30
963b0L .reg %fr30L
964b0R .reg %fr30R
965
966b1 .reg %fr31
967b1L .reg %fr31L
968b1R .reg %fr31R
969
970;
971; Temporary floating point variables, these are all caller save
972; registers
973;
974ftemp1 .reg %fr4
975ftemp2 .reg %fr5
976ftemp3 .reg %fr6
977ftemp4 .reg %fr7
978
979;
980; The B set of registers when used.
981;
982
983b2 .reg %fr8
984b2L .reg %fr8L
985b2R .reg %fr8R
986
987b3 .reg %fr9
988b3L .reg %fr9L
989b3R .reg %fr9R
990
991b4 .reg %fr10
992b4L .reg %fr10L
993b4R .reg %fr10R
994
995b5 .reg %fr11
996b5L .reg %fr11L
997b5R .reg %fr11R
998
999b6 .reg %fr12
1000b6L .reg %fr12L
1001b6R .reg %fr12R
1002
1003b7 .reg %fr13
1004b7L .reg %fr13L
1005b7R .reg %fr13R
1006
1007c1 .reg %r21 ; only reg
1008temp1 .reg %r20 ; only reg
1009temp2 .reg %r19 ; only reg
1010temp3 .reg %r31 ; only reg
1011
1012m1 .reg %r28
1013c2 .reg %r23
1014high_one .reg %r1
1015ht .reg %r6
1016lt .reg %r5
1017m .reg %r4
1018c3 .reg %r3
1019
1020SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1021 XMPYU A0L,A0R,ftemp1 ; m
1022 FSTD ftemp1,-24(%sp) ; store m
1023
1024 XMPYU A0R,A0R,ftemp2 ; lt
1025 FSTD ftemp2,-16(%sp) ; store lt
1026
1027 XMPYU A0L,A0L,ftemp3 ; ht
1028 FSTD ftemp3,-8(%sp) ; store ht
1029
1030 LDD -24(%sp),m ; load m
1031 AND m,high_mask,temp2 ; m & Mask
1032 DEPD,Z m,30,31,temp3 ; m << 32+1
1033 LDD -16(%sp),lt ; lt
1034
1035 LDD -8(%sp),ht ; ht
1036 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1037 ADD temp3,lt,lt ; lt = lt+m
1038 ADD,L ht,temp1,ht ; ht += temp1
1039 ADD,DC ht,%r0,ht ; ht++
1040
1041 ADD C1,lt,C1 ; c1=c1+lt
1042 ADD,DC ht,%r0,ht ; ht++
1043
1044 ADD C2,ht,C2 ; c2=c2+ht
1045 ADD,DC C3,%r0,C3 ; c3++
1046.endm
1047
1048SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1049 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1050 FSTD ftemp1,-16(%sp) ;
1051 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1052 FSTD ftemp2,-8(%sp) ;
1053 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1054 FSTD ftemp3,-32(%sp)
1055 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1056 FSTD ftemp4,-24(%sp) ;
1057
1058 LDD -8(%sp),m ; r21 = m
1059 LDD -16(%sp),m1 ; r19 = m1
1060 ADD,L m,m1,m ; m+m1
1061
1062 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1063 LDD -24(%sp),ht ; r24 = ht
1064
1065 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1066 ADD,L ht,high_one,ht ; ht+=high_one
1067
1068 EXTRD,U m,31,32,temp1 ; m >> 32
1069 LDD -32(%sp),lt ; lt
1070 ADD,L ht,temp1,ht ; ht+= m>>32
1071 ADD lt,temp3,lt ; lt = lt+m1
1072 ADD,DC ht,%r0,ht ; ht++
1073
1074 ADD ht,ht,ht ; ht=ht+ht;
1075 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1076
1077 ADD lt,lt,lt ; lt=lt+lt;
1078 ADD,DC ht,%r0,ht ; add in carry (ht++)
1079
1080 ADD C1,lt,C1 ; c1=c1+lt
1081 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1082 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1083
1084 ADD C2,ht,C2 ; c2 = c2 + ht
1085 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1086.endm
1087
1088;
1089;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1090; arg0 = r_ptr
1091; arg1 = a_ptr
1092;
1093
1094bn_sqr_comba8
1095 .PROC
1096 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1097 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1098 .ENTRY
1099 .align 64
1100
1101 STD %r3,0(%sp) ; save r3
1102 STD %r4,8(%sp) ; save r4
1103 STD %r5,16(%sp) ; save r5
1104 STD %r6,24(%sp) ; save r6
1105
1106 ;
1107 ; Zero out carries
1108 ;
1109 COPY %r0,c1
1110 COPY %r0,c2
1111 COPY %r0,c3
1112
1113 LDO 128(%sp),%sp ; bump stack
1114 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1115 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1116
1117 ;
1118 ; Load up all of the values we are going to use
1119 ;
1120 FLDD 0(a_ptr),a0
1121 FLDD 8(a_ptr),a1
1122 FLDD 16(a_ptr),a2
1123 FLDD 24(a_ptr),a3
1124 FLDD 32(a_ptr),a4
1125 FLDD 40(a_ptr),a5
1126 FLDD 48(a_ptr),a6
1127 FLDD 56(a_ptr),a7
1128
1129 SQR_ADD_C a0L,a0R,c1,c2,c3
1130 STD c1,0(r_ptr) ; r[0] = c1;
1131 COPY %r0,c1
1132
1133 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1134 STD c2,8(r_ptr) ; r[1] = c2;
1135 COPY %r0,c2
1136
1137 SQR_ADD_C a1L,a1R,c3,c1,c2
1138 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1139 STD c3,16(r_ptr) ; r[2] = c3;
1140 COPY %r0,c3
1141
1142 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1143 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1144 STD c1,24(r_ptr) ; r[3] = c1;
1145 COPY %r0,c1
1146
1147 SQR_ADD_C a2L,a2R,c2,c3,c1
1148 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1149 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1150 STD c2,32(r_ptr) ; r[4] = c2;
1151 COPY %r0,c2
1152
1153 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1154 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1155 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1156 STD c3,40(r_ptr) ; r[5] = c3;
1157 COPY %r0,c3
1158
1159 SQR_ADD_C a3L,a3R,c1,c2,c3
1160 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1161 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1162 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1163 STD c1,48(r_ptr) ; r[6] = c1;
1164 COPY %r0,c1
1165
1166 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1167 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1168 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1169 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1170 STD c2,56(r_ptr) ; r[7] = c2;
1171 COPY %r0,c2
1172
1173 SQR_ADD_C a4L,a4R,c3,c1,c2
1174 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1175 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1176 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1177 STD c3,64(r_ptr) ; r[8] = c3;
1178 COPY %r0,c3
1179
1180 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1181 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1182 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1183 STD c1,72(r_ptr) ; r[9] = c1;
1184 COPY %r0,c1
1185
1186 SQR_ADD_C a5L,a5R,c2,c3,c1
1187 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1188 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1189 STD c2,80(r_ptr) ; r[10] = c2;
1190 COPY %r0,c2
1191
1192 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1193 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1194 STD c3,88(r_ptr) ; r[11] = c3;
1195 COPY %r0,c3
1196
1197 SQR_ADD_C a6L,a6R,c1,c2,c3
1198 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1199 STD c1,96(r_ptr) ; r[12] = c1;
1200 COPY %r0,c1
1201
1202 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1203 STD c2,104(r_ptr) ; r[13] = c2;
1204 COPY %r0,c2
1205
1206 SQR_ADD_C a7L,a7R,c3,c1,c2
1207 STD c3, 112(r_ptr) ; r[14] = c3
1208 STD c1, 120(r_ptr) ; r[15] = c1
1209
1210 .EXIT
1211 LDD -104(%sp),%r6 ; restore r6
1212 LDD -112(%sp),%r5 ; restore r5
1213 LDD -120(%sp),%r4 ; restore r4
1214 BVE (%rp)
1215 LDD,MB -128(%sp),%r3
1216
1217 .PROCEND
1218
1219;-----------------------------------------------------------------------------
1220;
1221;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1222; arg0 = r_ptr
1223; arg1 = a_ptr
1224;
1225
1226bn_sqr_comba4
1227 .proc
1228 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1229 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1230 .entry
1231 .align 64
1232 STD %r3,0(%sp) ; save r3
1233 STD %r4,8(%sp) ; save r4
1234 STD %r5,16(%sp) ; save r5
1235 STD %r6,24(%sp) ; save r6
1236
1237 ;
1238 ; Zero out carries
1239 ;
1240 COPY %r0,c1
1241 COPY %r0,c2
1242 COPY %r0,c3
1243
1244 LDO 128(%sp),%sp ; bump stack
1245 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1246 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1247
1248 ;
1249 ; Load up all of the values we are going to use
1250 ;
1251 FLDD 0(a_ptr),a0
1252 FLDD 8(a_ptr),a1
1253 FLDD 16(a_ptr),a2
1254 FLDD 24(a_ptr),a3
1255 FLDD 32(a_ptr),a4
1256 FLDD 40(a_ptr),a5
1257 FLDD 48(a_ptr),a6
1258 FLDD 56(a_ptr),a7
1259
1260 SQR_ADD_C a0L,a0R,c1,c2,c3
1261
1262 STD c1,0(r_ptr) ; r[0] = c1;
1263 COPY %r0,c1
1264
1265 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1266
1267 STD c2,8(r_ptr) ; r[1] = c2;
1268 COPY %r0,c2
1269
1270 SQR_ADD_C a1L,a1R,c3,c1,c2
1271 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1272
1273 STD c3,16(r_ptr) ; r[2] = c3;
1274 COPY %r0,c3
1275
1276 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1277 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1278
1279 STD c1,24(r_ptr) ; r[3] = c1;
1280 COPY %r0,c1
1281
1282 SQR_ADD_C a2L,a2R,c2,c3,c1
1283 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1284
1285 STD c2,32(r_ptr) ; r[4] = c2;
1286 COPY %r0,c2
1287
1288 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1289 STD c3,40(r_ptr) ; r[5] = c3;
1290 COPY %r0,c3
1291
1292 SQR_ADD_C a3L,a3R,c1,c2,c3
1293 STD c1,48(r_ptr) ; r[6] = c1;
1294 STD c2,56(r_ptr) ; r[7] = c2;
1295
1296 .EXIT
1297 LDD -104(%sp),%r6 ; restore r6
1298 LDD -112(%sp),%r5 ; restore r5
1299 LDD -120(%sp),%r4 ; restore r4
1300 BVE (%rp)
1301 LDD,MB -128(%sp),%r3
1302
1303 .PROCEND
1304
1305
1306;---------------------------------------------------------------------------
1307
1308MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1309 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1310 FSTD ftemp1,-16(%sp) ;
1311 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1312 FSTD ftemp2,-8(%sp) ;
1313 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1314 FSTD ftemp3,-32(%sp)
1315 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1316 FSTD ftemp4,-24(%sp) ;
1317
1318 LDD -8(%sp),m ; r21 = m
1319 LDD -16(%sp),m1 ; r19 = m1
1320 ADD,L m,m1,m ; m+m1
1321
1322 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1323 LDD -24(%sp),ht ; r24 = ht
1324
1325 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1326 ADD,L ht,high_one,ht ; ht+=high_one
1327
1328 EXTRD,U m,31,32,temp1 ; m >> 32
1329 LDD -32(%sp),lt ; lt
1330 ADD,L ht,temp1,ht ; ht+= m>>32
1331 ADD lt,temp3,lt ; lt = lt+m1
1332 ADD,DC ht,%r0,ht ; ht++
1333
1334 ADD C1,lt,C1 ; c1=c1+lt
1335 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1336
1337 ADD C2,ht,C2 ; c2 = c2 + ht
1338 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1339.endm
1340
1341
1342;
1343;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1344; arg0 = r_ptr
1345; arg1 = a_ptr
1346; arg2 = b_ptr
1347;
1348
1349bn_mul_comba8
1350 .proc
1351 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1352 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1353 .entry
1354 .align 64
1355
1356 STD %r3,0(%sp) ; save r3
1357 STD %r4,8(%sp) ; save r4
1358 STD %r5,16(%sp) ; save r5
1359 STD %r6,24(%sp) ; save r6
1360 FSTD %fr12,32(%sp) ; save r6
1361 FSTD %fr13,40(%sp) ; save r7
1362
1363 ;
1364 ; Zero out carries
1365 ;
1366 COPY %r0,c1
1367 COPY %r0,c2
1368 COPY %r0,c3
1369
1370 LDO 128(%sp),%sp ; bump stack
1371 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1372
1373 ;
1374 ; Load up all of the values we are going to use
1375 ;
1376 FLDD 0(a_ptr),a0
1377 FLDD 8(a_ptr),a1
1378 FLDD 16(a_ptr),a2
1379 FLDD 24(a_ptr),a3
1380 FLDD 32(a_ptr),a4
1381 FLDD 40(a_ptr),a5
1382 FLDD 48(a_ptr),a6
1383 FLDD 56(a_ptr),a7
1384
1385 FLDD 0(b_ptr),b0
1386 FLDD 8(b_ptr),b1
1387 FLDD 16(b_ptr),b2
1388 FLDD 24(b_ptr),b3
1389 FLDD 32(b_ptr),b4
1390 FLDD 40(b_ptr),b5
1391 FLDD 48(b_ptr),b6
1392 FLDD 56(b_ptr),b7
1393
1394 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1395 STD c1,0(r_ptr)
1396 COPY %r0,c1
1397
1398 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1399 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1400 STD c2,8(r_ptr)
1401 COPY %r0,c2
1402
1403 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1404 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1405 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1406 STD c3,16(r_ptr)
1407 COPY %r0,c3
1408
1409 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1410 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1411 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1412 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1413 STD c1,24(r_ptr)
1414 COPY %r0,c1
1415
1416 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1417 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1418 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1419 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1420 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1421 STD c2,32(r_ptr)
1422 COPY %r0,c2
1423
1424 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1425 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1426 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1427 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1428 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1429 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1430 STD c3,40(r_ptr)
1431 COPY %r0,c3
1432
1433 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1434 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1435 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1436 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1437 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1438 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1439 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1440 STD c1,48(r_ptr)
1441 COPY %r0,c1
1442
1443 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1444 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1445 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1446 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1447 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1448 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1449 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1450 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1451 STD c2,56(r_ptr)
1452 COPY %r0,c2
1453
1454 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1455 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1456 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1457 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1458 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1459 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1460 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1461 STD c3,64(r_ptr)
1462 COPY %r0,c3
1463
1464 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1465 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1466 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1467 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1468 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1469 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1470 STD c1,72(r_ptr)
1471 COPY %r0,c1
1472
1473 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1474 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1475 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1476 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1477 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1478 STD c2,80(r_ptr)
1479 COPY %r0,c2
1480
1481 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1482 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1483 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1484 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1485 STD c3,88(r_ptr)
1486 COPY %r0,c3
1487
1488 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1489 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1490 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1491 STD c1,96(r_ptr)
1492 COPY %r0,c1
1493
1494 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1495 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1496 STD c2,104(r_ptr)
1497 COPY %r0,c2
1498
1499 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1500 STD c3,112(r_ptr)
1501 STD c1,120(r_ptr)
1502
1503 .EXIT
1504 FLDD -88(%sp),%fr13
1505 FLDD -96(%sp),%fr12
1506 LDD -104(%sp),%r6 ; restore r6
1507 LDD -112(%sp),%r5 ; restore r5
1508 LDD -120(%sp),%r4 ; restore r4
1509 BVE (%rp)
1510 LDD,MB -128(%sp),%r3
1511
1512 .PROCEND
1513
1514;-----------------------------------------------------------------------------
1515;
1516;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1517; arg0 = r_ptr
1518; arg1 = a_ptr
1519; arg2 = b_ptr
1520;
1521
1522bn_mul_comba4
1523 .proc
1524 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1525 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1526 .entry
1527 .align 64
1528
1529 STD %r3,0(%sp) ; save r3
1530 STD %r4,8(%sp) ; save r4
1531 STD %r5,16(%sp) ; save r5
1532 STD %r6,24(%sp) ; save r6
1533 FSTD %fr12,32(%sp) ; save r6
1534 FSTD %fr13,40(%sp) ; save r7
1535
1536 ;
1537 ; Zero out carries
1538 ;
1539 COPY %r0,c1
1540 COPY %r0,c2
1541 COPY %r0,c3
1542
1543 LDO 128(%sp),%sp ; bump stack
1544 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1545
1546 ;
1547 ; Load up all of the values we are going to use
1548 ;
1549 FLDD 0(a_ptr),a0
1550 FLDD 8(a_ptr),a1
1551 FLDD 16(a_ptr),a2
1552 FLDD 24(a_ptr),a3
1553
1554 FLDD 0(b_ptr),b0
1555 FLDD 8(b_ptr),b1
1556 FLDD 16(b_ptr),b2
1557 FLDD 24(b_ptr),b3
1558
1559 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1560 STD c1,0(r_ptr)
1561 COPY %r0,c1
1562
1563 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1564 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1565 STD c2,8(r_ptr)
1566 COPY %r0,c2
1567
1568 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1569 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1570 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1571 STD c3,16(r_ptr)
1572 COPY %r0,c3
1573
1574 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1575 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1576 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1577 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1578 STD c1,24(r_ptr)
1579 COPY %r0,c1
1580
1581 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1582 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1583 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1584 STD c2,32(r_ptr)
1585 COPY %r0,c2
1586
1587 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1588 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1589 STD c3,40(r_ptr)
1590 COPY %r0,c3
1591
1592 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1593 STD c1,48(r_ptr)
1594 STD c2,56(r_ptr)
1595
1596 .EXIT
1597 FLDD -88(%sp),%fr13
1598 FLDD -96(%sp),%fr12
1599 LDD -104(%sp),%r6 ; restore r6
1600 LDD -112(%sp),%r5 ; restore r5
1601 LDD -120(%sp),%r4 ; restore r4
1602 BVE (%rp)
1603 LDD,MB -128(%sp),%r3
1604
1605 .PROCEND
1606
1607
1608;--- not PIC .SPACE $TEXT$
1609;--- not PIC .SUBSPA $CODE$
1610;--- not PIC .SPACE $PRIVATE$,SORT=16
1611;--- not PIC .IMPORT $global$,DATA
1612;--- not PIC .SPACE $TEXT$
1613;--- not PIC .SUBSPA $CODE$
1614;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c
1615;--- not PIC C$7
1616;--- not PIC .ALIGN 8
1617;--- not PIC .STRINGZ "Division would overflow (%d)\n"
1618 .END
diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s
deleted file mode 100644
index a99545754d..0000000000
--- a/src/lib/libcrypto/bn/asm/pa-risc2W.s
+++ /dev/null
@@ -1,1605 +0,0 @@
1;
2; PA-RISC 64-bit implementation of bn_asm code
3;
4; This code is approximately 2x faster than the C version
5; for RSA/DSA.
6;
7; See http://devresource.hp.com/ for more details on the PA-RISC
8; architecture. Also see the book "PA-RISC 2.0 Architecture"
9; by Gerry Kane for information on the instruction set architecture.
10;
11; Code written by Chris Ruemmler (with some help from the HP C
12; compiler).
13;
14; The code compiles with HP's assembler
15;
16
17 .level 2.0W
18 .space $TEXT$
19 .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
20
21;
22; Global Register definitions used for the routines.
23;
24; Some information about HP's runtime architecture for 64-bits.
25;
26; "Caller save" means the calling function must save the register
27; if it wants the register to be preserved.
28; "Callee save" means if a function uses the register, it must save
29; the value before using it.
30;
31; For the floating point registers
32;
33; "caller save" registers: fr4-fr11, fr22-fr31
34; "callee save" registers: fr12-fr21
35; "special" registers: fr0-fr3 (status and exception registers)
36;
37; For the integer registers
38; value zero : r0
39; "caller save" registers: r1,r19-r26
40; "callee save" registers: r3-r18
41; return register : r2 (rp)
42; return values ; r28 (ret0,ret1)
43; Stack pointer ; r30 (sp)
44; global data pointer ; r27 (dp)
45; argument pointer ; r29 (ap)
46; millicode return ptr ; r31 (also a caller save register)
47
48
49;
50; Arguments to the routines
51;
52r_ptr .reg %r26
53a_ptr .reg %r25
54b_ptr .reg %r24
55num .reg %r24
56w .reg %r23
57n .reg %r23
58
59
60;
61; Globals used in some routines
62;
63
64top_overflow .reg %r29
65high_mask .reg %r22 ; value 0xffffffff80000000L
66
67
68;------------------------------------------------------------------------------
69;
70; bn_mul_add_words
71;
72;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
73; int num, BN_ULONG w)
74;
75; arg0 = r_ptr
76; arg1 = a_ptr
77; arg2 = num
78; arg3 = w
79;
80; Local register definitions
81;
82
83fm1 .reg %fr22
84fm .reg %fr23
85ht_temp .reg %fr24
86ht_temp_1 .reg %fr25
87lt_temp .reg %fr26
88lt_temp_1 .reg %fr27
89fm1_1 .reg %fr28
90fm_1 .reg %fr29
91
92fw_h .reg %fr7L
93fw_l .reg %fr7R
94fw .reg %fr7
95
96fht_0 .reg %fr8L
97flt_0 .reg %fr8R
98t_float_0 .reg %fr8
99
100fht_1 .reg %fr9L
101flt_1 .reg %fr9R
102t_float_1 .reg %fr9
103
104tmp_0 .reg %r31
105tmp_1 .reg %r21
106m_0 .reg %r20
107m_1 .reg %r19
108ht_0 .reg %r1
109ht_1 .reg %r3
110lt_0 .reg %r4
111lt_1 .reg %r5
112m1_0 .reg %r6
113m1_1 .reg %r7
114rp_val .reg %r8
115rp_val_1 .reg %r9
116
117bn_mul_add_words
118 .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
119 .proc
120 .callinfo frame=128
121 .entry
122 .align 64
123
124 STD %r3,0(%sp) ; save r3
125 STD %r4,8(%sp) ; save r4
126 NOP ; Needed to make the loop 16-byte aligned
127 NOP ; Needed to make the loop 16-byte aligned
128
129 STD %r5,16(%sp) ; save r5
130 STD %r6,24(%sp) ; save r6
131 STD %r7,32(%sp) ; save r7
132 STD %r8,40(%sp) ; save r8
133
134 STD %r9,48(%sp) ; save r9
135 COPY %r0,%ret0 ; return 0 by default
136 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
137 STD w,56(%sp) ; store w on stack
138
139 CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
140 LDO 128(%sp),%sp ; bump stack
141
142 ;
143 ; The loop is unrolled twice, so if there is only 1 number
144 ; then go straight to the cleanup code.
145 ;
146 CMPIB,= 1,num,bn_mul_add_words_single_top
147 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
148
149 ;
150 ; This loop is unrolled 2 times (64-byte aligned as well)
151 ;
152 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
153 ; two 32-bit mutiplies can be issued per cycle.
154 ;
155bn_mul_add_words_unroll2
156
157 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
158 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
159 LDD 0(r_ptr),rp_val ; rp[0]
160 LDD 8(r_ptr),rp_val_1 ; rp[1]
161
162 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
163 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
164 FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
165 FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
166
167 XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
168 XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
169 FSTD fm,-8(%sp) ; -8(sp) = m[0]
170 FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
171
172 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
173 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
174 FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
175 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
176
177 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
178 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
179 FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
180 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
181
182 LDD -8(%sp),m_0 ; m[0]
183 LDD -40(%sp),m_1 ; m[1]
184 LDD -16(%sp),m1_0 ; m1[0]
185 LDD -48(%sp),m1_1 ; m1[1]
186
187 LDD -24(%sp),ht_0 ; ht[0]
188 LDD -56(%sp),ht_1 ; ht[1]
189 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
190 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
191
192 LDD -32(%sp),lt_0
193 LDD -64(%sp),lt_1
194 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
195 ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
196
197 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
198 ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
199 EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
200 DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
201
202 EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
203 DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
204 ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
205 ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
206
207 ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
208 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
209 ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
210 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
211
212 ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c;
213 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
214 ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
215 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
216
217 LDO -2(num),num ; num = num - 2;
218 ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
219 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
220 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
221
222 ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
223 ADD,DC ht_1,%r0,%ret0 ; ht[1]++
224 LDO 16(a_ptr),a_ptr ; a_ptr += 2
225
226 STD lt_1,8(r_ptr) ; rp[1] = lt[1]
227 CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
228 LDO 16(r_ptr),r_ptr ; r_ptr += 2
229
230 CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
231
232 ;
233 ; Top of loop aligned on 64-byte boundary
234 ;
235bn_mul_add_words_single_top
236 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
237 LDD 0(r_ptr),rp_val ; rp[0]
238 LDO 8(a_ptr),a_ptr ; a_ptr++
239 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
240 FSTD fm1,-16(%sp) ; -16(sp) = m1
241 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
242 FSTD fm,-8(%sp) ; -8(sp) = m
243 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
244 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
245 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
246 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
247
248 LDD -8(%sp),m_0
249 LDD -16(%sp),m1_0 ; m1 = temp1
250 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
251 LDD -24(%sp),ht_0
252 LDD -32(%sp),lt_0
253
254 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
255 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
256
257 EXTRD,U tmp_0,31,32,m_0 ; m>>32
258 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
259
260 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
261 ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
262 ADD,DC ht_0,%r0,ht_0 ; ht++
263 ADD %ret0,tmp_0,lt_0 ; lt = lt + c;
264 ADD,DC ht_0,%r0,ht_0 ; ht++
265 ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
266 ADD,DC ht_0,%r0,%ret0 ; ht++
267 STD lt_0,0(r_ptr) ; rp[0] = lt
268
269bn_mul_add_words_exit
270 .EXIT
271 LDD -80(%sp),%r9 ; restore r9
272 LDD -88(%sp),%r8 ; restore r8
273 LDD -96(%sp),%r7 ; restore r7
274 LDD -104(%sp),%r6 ; restore r6
275 LDD -112(%sp),%r5 ; restore r5
276 LDD -120(%sp),%r4 ; restore r4
277 BVE (%rp)
278 LDD,MB -128(%sp),%r3 ; restore r3
279 .PROCEND ;in=23,24,25,26,29;out=28;
280
281;----------------------------------------------------------------------------
282;
283;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
284;
285; arg0 = rp
286; arg1 = ap
287; arg2 = num
288; arg3 = w
289
290bn_mul_words
291 .proc
292 .callinfo frame=128
293 .entry
294 .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
295 .align 64
296
297 STD %r3,0(%sp) ; save r3
298 STD %r4,8(%sp) ; save r4
299 STD %r5,16(%sp) ; save r5
300 STD %r6,24(%sp) ; save r6
301
302 STD %r7,32(%sp) ; save r7
303 COPY %r0,%ret0 ; return 0 by default
304 DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
305 STD w,56(%sp) ; w on stack
306
307 CMPIB,>= 0,num,bn_mul_words_exit
308 LDO 128(%sp),%sp ; bump stack
309
310 ;
311 ; See if only 1 word to do, thus just do cleanup
312 ;
313 CMPIB,= 1,num,bn_mul_words_single_top
314 FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
315
316 ;
317 ; This loop is unrolled 2 times (64-byte aligned as well)
318 ;
319 ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
320 ; two 32-bit mutiplies can be issued per cycle.
321 ;
322bn_mul_words_unroll2
323
324 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
325 FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
326 XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
327 XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
328
329 FSTD fm1,-16(%sp) ; -16(sp) = m1
330 FSTD fm1_1,-48(%sp) ; -48(sp) = m1
331 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
332 XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
333
334 FSTD fm,-8(%sp) ; -8(sp) = m
335 FSTD fm_1,-40(%sp) ; -40(sp) = m
336 XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
337 XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
338
339 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
340 FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
341 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
342 XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
343
344 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
345 FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
346 LDD -8(%sp),m_0
347 LDD -40(%sp),m_1
348
349 LDD -16(%sp),m1_0
350 LDD -48(%sp),m1_1
351 LDD -24(%sp),ht_0
352 LDD -56(%sp),ht_1
353
354 ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
355 ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
356 LDD -32(%sp),lt_0
357 LDD -64(%sp),lt_1
358
359 CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
360 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
361 CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
362 ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
363
364 EXTRD,U tmp_0,31,32,m_0 ; m>>32
365 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
366 EXTRD,U tmp_1,31,32,m_1 ; m>>32
367 DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
368
369 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
370 ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
371 ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
372 ADD,DC ht_0,%r0,ht_0 ; ht++
373
374 ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
375 ADD,DC ht_1,%r0,ht_1 ; ht++
376 ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0);
377 ADD,DC ht_0,%r0,ht_0 ; ht++
378
379 ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
380 ADD,DC ht_1,%r0,ht_1 ; ht++
381 STD lt_0,0(r_ptr) ; rp[0] = lt
382 STD lt_1,8(r_ptr) ; rp[1] = lt
383
384 COPY ht_1,%ret0 ; carry = ht
385 LDO -2(num),num ; num = num - 2;
386 LDO 16(a_ptr),a_ptr ; ap += 2
387 CMPIB,<= 2,num,bn_mul_words_unroll2
388 LDO 16(r_ptr),r_ptr ; rp++
389
390 CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
391
392 ;
393 ; Top of loop aligned on 64-byte boundary
394 ;
395bn_mul_words_single_top
396 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
397
398 XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
399 FSTD fm1,-16(%sp) ; -16(sp) = m1
400 XMPYU flt_0,fw_h,fm ; m = lt*fw_h
401 FSTD fm,-8(%sp) ; -8(sp) = m
402 XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
403 FSTD ht_temp,-24(%sp) ; -24(sp) = ht
404 XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
405 FSTD lt_temp,-32(%sp) ; -32(sp) = lt
406
407 LDD -8(%sp),m_0
408 LDD -16(%sp),m1_0
409 ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
410 LDD -24(%sp),ht_0
411 LDD -32(%sp),lt_0
412
413 CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
414 ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
415
416 EXTRD,U tmp_0,31,32,m_0 ; m>>32
417 DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
418
419 ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
420 ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
421 ADD,DC ht_0,%r0,ht_0 ; ht++
422
423 ADD %ret0,lt_0,lt_0 ; lt = lt + c;
424 ADD,DC ht_0,%r0,ht_0 ; ht++
425
426 COPY ht_0,%ret0 ; copy carry
427 STD lt_0,0(r_ptr) ; rp[0] = lt
428
429bn_mul_words_exit
430 .EXIT
431 LDD -96(%sp),%r7 ; restore r7
432 LDD -104(%sp),%r6 ; restore r6
433 LDD -112(%sp),%r5 ; restore r5
434 LDD -120(%sp),%r4 ; restore r4
435 BVE (%rp)
436 LDD,MB -128(%sp),%r3 ; restore r3
437 .PROCEND ;in=23,24,25,26,29;out=28;
438
439;----------------------------------------------------------------------------
440;
441;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
442;
443; arg0 = rp
444; arg1 = ap
445; arg2 = num
446;
447
448bn_sqr_words
449 .proc
450 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
451 .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
452 .entry
453 .align 64
454
455 STD %r3,0(%sp) ; save r3
456 STD %r4,8(%sp) ; save r4
457 NOP
458 STD %r5,16(%sp) ; save r5
459
460 CMPIB,>= 0,num,bn_sqr_words_exit
461 LDO 128(%sp),%sp ; bump stack
462
463 ;
464 ; If only 1, the goto straight to cleanup
465 ;
466 CMPIB,= 1,num,bn_sqr_words_single_top
467 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
468
469 ;
470 ; This loop is unrolled 2 times (64-byte aligned as well)
471 ;
472
473bn_sqr_words_unroll2
474 FLDD 0(a_ptr),t_float_0 ; a[0]
475 FLDD 8(a_ptr),t_float_1 ; a[1]
476 XMPYU fht_0,flt_0,fm ; m[0]
477 XMPYU fht_1,flt_1,fm_1 ; m[1]
478
479 FSTD fm,-24(%sp) ; store m[0]
480 FSTD fm_1,-56(%sp) ; store m[1]
481 XMPYU flt_0,flt_0,lt_temp ; lt[0]
482 XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
483
484 FSTD lt_temp,-16(%sp) ; store lt[0]
485 FSTD lt_temp_1,-48(%sp) ; store lt[1]
486 XMPYU fht_0,fht_0,ht_temp ; ht[0]
487 XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
488
489 FSTD ht_temp,-8(%sp) ; store ht[0]
490 FSTD ht_temp_1,-40(%sp) ; store ht[1]
491 LDD -24(%sp),m_0
492 LDD -56(%sp),m_1
493
494 AND m_0,high_mask,tmp_0 ; m[0] & Mask
495 AND m_1,high_mask,tmp_1 ; m[1] & Mask
496 DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
497 DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
498
499 LDD -16(%sp),lt_0
500 LDD -48(%sp),lt_1
501 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
502 EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
503
504 LDD -8(%sp),ht_0
505 LDD -40(%sp),ht_1
506 ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
507 ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
508
509 ADD lt_0,m_0,lt_0 ; lt = lt+m
510 ADD,DC ht_0,%r0,ht_0 ; ht[0]++
511 STD lt_0,0(r_ptr) ; rp[0] = lt[0]
512 STD ht_0,8(r_ptr) ; rp[1] = ht[1]
513
514 ADD lt_1,m_1,lt_1 ; lt = lt+m
515 ADD,DC ht_1,%r0,ht_1 ; ht[1]++
516 STD lt_1,16(r_ptr) ; rp[2] = lt[1]
517 STD ht_1,24(r_ptr) ; rp[3] = ht[1]
518
519 LDO -2(num),num ; num = num - 2;
520 LDO 16(a_ptr),a_ptr ; ap += 2
521 CMPIB,<= 2,num,bn_sqr_words_unroll2
522 LDO 32(r_ptr),r_ptr ; rp += 4
523
524 CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
525
526 ;
527 ; Top of loop aligned on 64-byte boundary
528 ;
529bn_sqr_words_single_top
530 FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
531
532 XMPYU fht_0,flt_0,fm ; m
533 FSTD fm,-24(%sp) ; store m
534
535 XMPYU flt_0,flt_0,lt_temp ; lt
536 FSTD lt_temp,-16(%sp) ; store lt
537
538 XMPYU fht_0,fht_0,ht_temp ; ht
539 FSTD ht_temp,-8(%sp) ; store ht
540
541 LDD -24(%sp),m_0 ; load m
542 AND m_0,high_mask,tmp_0 ; m & Mask
543 DEPD,Z m_0,30,31,m_0 ; m << 32+1
544 LDD -16(%sp),lt_0 ; lt
545
546 LDD -8(%sp),ht_0 ; ht
547 EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
548 ADD m_0,lt_0,lt_0 ; lt = lt+m
549 ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
550 ADD,DC ht_0,%r0,ht_0 ; ht++
551
552 STD lt_0,0(r_ptr) ; rp[0] = lt
553 STD ht_0,8(r_ptr) ; rp[1] = ht
554
555bn_sqr_words_exit
556 .EXIT
557 LDD -112(%sp),%r5 ; restore r5
558 LDD -120(%sp),%r4 ; restore r4
559 BVE (%rp)
560 LDD,MB -128(%sp),%r3
561 .PROCEND ;in=23,24,25,26,29;out=28;
562
563
564;----------------------------------------------------------------------------
565;
566;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
567;
568; arg0 = rp
569; arg1 = ap
570; arg2 = bp
571; arg3 = n
572
573t .reg %r22
574b .reg %r21
575l .reg %r20
576
577bn_add_words
578 .proc
579 .entry
580 .callinfo
581 .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
582 .align 64
583
584 CMPIB,>= 0,n,bn_add_words_exit
585 COPY %r0,%ret0 ; return 0 by default
586
587 ;
588 ; If 2 or more numbers do the loop
589 ;
590 CMPIB,= 1,n,bn_add_words_single_top
591 NOP
592
593 ;
594 ; This loop is unrolled 2 times (64-byte aligned as well)
595 ;
596bn_add_words_unroll2
597 LDD 0(a_ptr),t
598 LDD 0(b_ptr),b
599 ADD t,%ret0,t ; t = t+c;
600 ADD,DC %r0,%r0,%ret0 ; set c to carry
601 ADD t,b,l ; l = t + b[0]
602 ADD,DC %ret0,%r0,%ret0 ; c+= carry
603 STD l,0(r_ptr)
604
605 LDD 8(a_ptr),t
606 LDD 8(b_ptr),b
607 ADD t,%ret0,t ; t = t+c;
608 ADD,DC %r0,%r0,%ret0 ; set c to carry
609 ADD t,b,l ; l = t + b[0]
610 ADD,DC %ret0,%r0,%ret0 ; c+= carry
611 STD l,8(r_ptr)
612
613 LDO -2(n),n
614 LDO 16(a_ptr),a_ptr
615 LDO 16(b_ptr),b_ptr
616
617 CMPIB,<= 2,n,bn_add_words_unroll2
618 LDO 16(r_ptr),r_ptr
619
620 CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
621
622bn_add_words_single_top
623 LDD 0(a_ptr),t
624 LDD 0(b_ptr),b
625
626 ADD t,%ret0,t ; t = t+c;
627 ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??)
628 ADD t,b,l ; l = t + b[0]
629 ADD,DC %ret0,%r0,%ret0 ; c+= carry
630 STD l,0(r_ptr)
631
632bn_add_words_exit
633 .EXIT
634 BVE (%rp)
635 NOP
636 .PROCEND ;in=23,24,25,26,29;out=28;
637
638;----------------------------------------------------------------------------
639;
640;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
641;
642; arg0 = rp
643; arg1 = ap
644; arg2 = bp
645; arg3 = n
646
647t1 .reg %r22
648t2 .reg %r21
649sub_tmp1 .reg %r20
650sub_tmp2 .reg %r19
651
652
653bn_sub_words
654 .proc
655 .callinfo
656 .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
657 .entry
658 .align 64
659
660 CMPIB,>= 0,n,bn_sub_words_exit
661 COPY %r0,%ret0 ; return 0 by default
662
663 ;
664 ; If 2 or more numbers do the loop
665 ;
666 CMPIB,= 1,n,bn_sub_words_single_top
667 NOP
668
669 ;
670 ; This loop is unrolled 2 times (64-byte aligned as well)
671 ;
672bn_sub_words_unroll2
673 LDD 0(a_ptr),t1
674 LDD 0(b_ptr),t2
675 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
676 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
677
678 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
679 LDO 1(%r0),sub_tmp2
680
681 CMPCLR,*= t1,t2,%r0
682 COPY sub_tmp2,%ret0
683 STD sub_tmp1,0(r_ptr)
684
685 LDD 8(a_ptr),t1
686 LDD 8(b_ptr),t2
687 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
688 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
689 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
690 LDO 1(%r0),sub_tmp2
691
692 CMPCLR,*= t1,t2,%r0
693 COPY sub_tmp2,%ret0
694 STD sub_tmp1,8(r_ptr)
695
696 LDO -2(n),n
697 LDO 16(a_ptr),a_ptr
698 LDO 16(b_ptr),b_ptr
699
700 CMPIB,<= 2,n,bn_sub_words_unroll2
701 LDO 16(r_ptr),r_ptr
702
703 CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
704
705bn_sub_words_single_top
706 LDD 0(a_ptr),t1
707 LDD 0(b_ptr),t2
708 SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
709 SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
710 CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
711 LDO 1(%r0),sub_tmp2
712
713 CMPCLR,*= t1,t2,%r0
714 COPY sub_tmp2,%ret0
715
716 STD sub_tmp1,0(r_ptr)
717
718bn_sub_words_exit
719 .EXIT
720 BVE (%rp)
721 NOP
722 .PROCEND ;in=23,24,25,26,29;out=28;
723
724;------------------------------------------------------------------------------
725;
726; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
727;
728; arg0 = h
729; arg1 = l
730; arg2 = d
731;
732; This is mainly just modified assembly from the compiler, thus the
733; lack of variable names.
734;
735;------------------------------------------------------------------------------
736bn_div_words
737 .proc
738 .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
739 .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
740 .IMPORT BN_num_bits_word,CODE,NO_RELOCATION
741 .IMPORT __iob,DATA
742 .IMPORT fprintf,CODE,NO_RELOCATION
743 .IMPORT abort,CODE,NO_RELOCATION
744 .IMPORT $$div2U,MILLICODE
745 .entry
746 STD %r2,-16(%r30)
747 STD,MA %r3,352(%r30)
748 STD %r4,-344(%r30)
749 STD %r5,-336(%r30)
750 STD %r6,-328(%r30)
751 STD %r7,-320(%r30)
752 STD %r8,-312(%r30)
753 STD %r9,-304(%r30)
754 STD %r10,-296(%r30)
755
756 STD %r27,-288(%r30) ; save gp
757
758 COPY %r24,%r3 ; save d
759 COPY %r26,%r4 ; save h (high 64-bits)
760 LDO -1(%r0),%ret0 ; return -1 by default
761
762 CMPB,*= %r0,%arg2,$D3 ; if (d == 0)
763 COPY %r25,%r5 ; save l (low 64-bits)
764
765 LDO -48(%r30),%r29 ; create ap
766 .CALL ;in=26,29;out=28;
767 B,L BN_num_bits_word,%r2
768 COPY %r3,%r26
769 LDD -288(%r30),%r27 ; restore gp
770 LDI 64,%r21
771
772 CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward)
773 COPY %ret0,%r24 ; i
774 MTSARCM %r24
775 DEPDI,Z -1,%sar,1,%r29
776 CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)
777
778$00000012
779 SUBI 64,%r24,%r31 ; i = 64 - i;
780 CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d)
781 SUB %r4,%r3,%r4 ; h -= d
782 CMPB,= %r31,%r0,$0000001A ; if (i)
783 COPY %r0,%r10 ; ret = 0
784 MTSARCM %r31 ; i to shift
785 DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
786 SUBI 64,%r31,%r19 ; 64 - i; redundent
787 MTSAR %r19 ; (64 -i) to shift
788 SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
789 MTSARCM %r31 ; i to shift
790 DEPD,Z %r5,%sar,64,%r5 ; l <<= i;
791
792$0000001A
793 DEPDI,Z -1,31,32,%r19
794 EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32
795 EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff
796 LDO 2(%r0),%r9
797 STD %r3,-280(%r30) ; "d" to stack
798
799$0000001C
800 DEPDI,Z -1,63,32,%r29 ;
801 EXTRD,U %r4,31,32,%r31 ; h >> 32
802 CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div
803 COPY %r4,%r26
804 EXTRD,U %r4,31,32,%r25
805 COPY %r6,%r24
806 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
807 B,L $$div2U,%r2
808 EXTRD,U %r6,31,32,%r23
809 DEPD %r28,31,32,%r29
810$D2
811 STD %r29,-272(%r30) ; q
812 AND %r5,%r19,%r24 ; t & 0xffffffff00000000;
813 EXTRD,U %r24,31,32,%r24 ; ???
814 FLDD -272(%r30),%fr7 ; q
815 FLDD -280(%r30),%fr8 ; d
816 XMPYU %fr8L,%fr7L,%fr10
817 FSTD %fr10,-256(%r30)
818 XMPYU %fr8L,%fr7R,%fr22
819 FSTD %fr22,-264(%r30)
820 XMPYU %fr8R,%fr7L,%fr11
821 XMPYU %fr8R,%fr7R,%fr23
822 FSTD %fr11,-232(%r30)
823 FSTD %fr23,-240(%r30)
824 LDD -256(%r30),%r28
825 DEPD,Z %r28,31,32,%r2
826 LDD -264(%r30),%r20
827 ADD,L %r20,%r2,%r31
828 LDD -232(%r30),%r22
829 DEPD,Z %r22,31,32,%r22
830 LDD -240(%r30),%r21
831 B $00000024 ; enter loop
832 ADD,L %r21,%r22,%r23
833
834$0000002A
835 LDO -1(%r29),%r29
836 SUB %r23,%r8,%r23
837$00000024
838 SUB %r4,%r31,%r25
839 AND %r25,%r19,%r26
840 CMPB,*<>,N %r0,%r26,$00000046 ; (forward)
841 DEPD,Z %r25,31,32,%r20
842 OR %r20,%r24,%r21
843 CMPB,*<<,N %r21,%r23,$0000002A ;(backward)
844 SUB %r31,%r6,%r31
845;-------------Break path---------------------
846
847$00000046
848 DEPD,Z %r23,31,32,%r25 ;tl
849 EXTRD,U %r23,31,32,%r26 ;t
850 AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L
851 ADD,L %r31,%r26,%r31 ;th += t;
852 CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl)
853 LDO 1(%r31),%r31 ; th++;
854 CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward)
855 LDO -1(%r29),%r29 ;q--;
856 ADD,L %r4,%r3,%r4 ;h += d;
857$00000036
858 ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward)
859 SUB %r5,%r24,%r28 ; l -= tl;
860 SUB %r4,%r31,%r24 ; h -= th;
861 SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32));
862 DEPD,Z %r29,31,32,%r10 ; ret = q<<32
863 b $0000001C
864 DEPD,Z %r28,31,32,%r5 ; l = l << 32
865
866$D1
867 OR %r10,%r29,%r28 ; ret |= q
868$D3
869 LDD -368(%r30),%r2
870$D0
871 LDD -296(%r30),%r10
872 LDD -304(%r30),%r9
873 LDD -312(%r30),%r8
874 LDD -320(%r30),%r7
875 LDD -328(%r30),%r6
876 LDD -336(%r30),%r5
877 LDD -344(%r30),%r4
878 BVE (%r2)
879 .EXIT
880 LDD,MB -352(%r30),%r3
881
882bn_div_err_case
883 MFIA %r6
884 ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1
885 LDO R'bn_div_words-bn_div_err_case(%r1),%r6
886 ADDIL LT'__iob,%r27,%r1
887 LDD RT'__iob(%r1),%r26
888 ADDIL L'C$4-bn_div_words,%r6,%r1
889 LDO R'C$4-bn_div_words(%r1),%r25
890 LDO 64(%r26),%r26
891 .CALL ;in=24,25,26,29;out=28;
892 B,L fprintf,%r2
893 LDO -48(%r30),%r29
894 LDD -288(%r30),%r27
895 .CALL ;in=29;
896 B,L abort,%r2
897 LDO -48(%r30),%r29
898 LDD -288(%r30),%r27
899 B $D0
900 LDD -368(%r30),%r2
901 .PROCEND ;in=24,25,26,29;out=28;
902
903;----------------------------------------------------------------------------
904;
905; Registers to hold 64-bit values to manipulate. The "L" part
906; of the register corresponds to the upper 32-bits, while the "R"
907; part corresponds to the lower 32-bits
908;
909; Note, that when using b6 and b7, the code must save these before
910; using them because they are callee save registers
911;
912;
913; Floating point registers to use to save values that
914; are manipulated. These don't collide with ftemp1-6 and
915; are all caller save registers
916;
917a0 .reg %fr22
918a0L .reg %fr22L
919a0R .reg %fr22R
920
921a1 .reg %fr23
922a1L .reg %fr23L
923a1R .reg %fr23R
924
925a2 .reg %fr24
926a2L .reg %fr24L
927a2R .reg %fr24R
928
929a3 .reg %fr25
930a3L .reg %fr25L
931a3R .reg %fr25R
932
933a4 .reg %fr26
934a4L .reg %fr26L
935a4R .reg %fr26R
936
937a5 .reg %fr27
938a5L .reg %fr27L
939a5R .reg %fr27R
940
941a6 .reg %fr28
942a6L .reg %fr28L
943a6R .reg %fr28R
944
945a7 .reg %fr29
946a7L .reg %fr29L
947a7R .reg %fr29R
948
949b0 .reg %fr30
950b0L .reg %fr30L
951b0R .reg %fr30R
952
953b1 .reg %fr31
954b1L .reg %fr31L
955b1R .reg %fr31R
956
957;
958; Temporary floating point variables, these are all caller save
959; registers
960;
961ftemp1 .reg %fr4
962ftemp2 .reg %fr5
963ftemp3 .reg %fr6
964ftemp4 .reg %fr7
965
966;
967; The B set of registers when used.
968;
969
970b2 .reg %fr8
971b2L .reg %fr8L
972b2R .reg %fr8R
973
974b3 .reg %fr9
975b3L .reg %fr9L
976b3R .reg %fr9R
977
978b4 .reg %fr10
979b4L .reg %fr10L
980b4R .reg %fr10R
981
982b5 .reg %fr11
983b5L .reg %fr11L
984b5R .reg %fr11R
985
986b6 .reg %fr12
987b6L .reg %fr12L
988b6R .reg %fr12R
989
990b7 .reg %fr13
991b7L .reg %fr13L
992b7R .reg %fr13R
993
994c1 .reg %r21 ; only reg
995temp1 .reg %r20 ; only reg
996temp2 .reg %r19 ; only reg
997temp3 .reg %r31 ; only reg
998
999m1 .reg %r28
1000c2 .reg %r23
1001high_one .reg %r1
1002ht .reg %r6
1003lt .reg %r5
1004m .reg %r4
1005c3 .reg %r3
1006
1007SQR_ADD_C .macro A0L,A0R,C1,C2,C3
1008 XMPYU A0L,A0R,ftemp1 ; m
1009 FSTD ftemp1,-24(%sp) ; store m
1010
1011 XMPYU A0R,A0R,ftemp2 ; lt
1012 FSTD ftemp2,-16(%sp) ; store lt
1013
1014 XMPYU A0L,A0L,ftemp3 ; ht
1015 FSTD ftemp3,-8(%sp) ; store ht
1016
1017 LDD -24(%sp),m ; load m
1018 AND m,high_mask,temp2 ; m & Mask
1019 DEPD,Z m,30,31,temp3 ; m << 32+1
1020 LDD -16(%sp),lt ; lt
1021
1022 LDD -8(%sp),ht ; ht
1023 EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
1024 ADD temp3,lt,lt ; lt = lt+m
1025 ADD,L ht,temp1,ht ; ht += temp1
1026 ADD,DC ht,%r0,ht ; ht++
1027
1028 ADD C1,lt,C1 ; c1=c1+lt
1029 ADD,DC ht,%r0,ht ; ht++
1030
1031 ADD C2,ht,C2 ; c2=c2+ht
1032 ADD,DC C3,%r0,C3 ; c3++
1033.endm
1034
1035SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
1036 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
1037 FSTD ftemp1,-16(%sp) ;
1038 XMPYU A0R,A1L,ftemp2 ; m = bh*lt
1039 FSTD ftemp2,-8(%sp) ;
1040 XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
1041 FSTD ftemp3,-32(%sp)
1042 XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
1043 FSTD ftemp4,-24(%sp) ;
1044
1045 LDD -8(%sp),m ; r21 = m
1046 LDD -16(%sp),m1 ; r19 = m1
1047 ADD,L m,m1,m ; m+m1
1048
1049 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1050 LDD -24(%sp),ht ; r24 = ht
1051
1052 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1053 ADD,L ht,high_one,ht ; ht+=high_one
1054
1055 EXTRD,U m,31,32,temp1 ; m >> 32
1056 LDD -32(%sp),lt ; lt
1057 ADD,L ht,temp1,ht ; ht+= m>>32
1058 ADD lt,temp3,lt ; lt = lt+m1
1059 ADD,DC ht,%r0,ht ; ht++
1060
1061 ADD ht,ht,ht ; ht=ht+ht;
1062 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1063
1064 ADD lt,lt,lt ; lt=lt+lt;
1065 ADD,DC ht,%r0,ht ; add in carry (ht++)
1066
1067 ADD C1,lt,C1 ; c1=c1+lt
1068 ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
1069 LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
1070
1071 ADD C2,ht,C2 ; c2 = c2 + ht
1072 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1073.endm
1074
1075;
1076;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
1077; arg0 = r_ptr
1078; arg1 = a_ptr
1079;
1080
1081bn_sqr_comba8
1082 .PROC
1083 .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1084 .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1085 .ENTRY
1086 .align 64
1087
1088 STD %r3,0(%sp) ; save r3
1089 STD %r4,8(%sp) ; save r4
1090 STD %r5,16(%sp) ; save r5
1091 STD %r6,24(%sp) ; save r6
1092
1093 ;
1094 ; Zero out carries
1095 ;
1096 COPY %r0,c1
1097 COPY %r0,c2
1098 COPY %r0,c3
1099
1100 LDO 128(%sp),%sp ; bump stack
1101 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1102 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1103
1104 ;
1105 ; Load up all of the values we are going to use
1106 ;
1107 FLDD 0(a_ptr),a0
1108 FLDD 8(a_ptr),a1
1109 FLDD 16(a_ptr),a2
1110 FLDD 24(a_ptr),a3
1111 FLDD 32(a_ptr),a4
1112 FLDD 40(a_ptr),a5
1113 FLDD 48(a_ptr),a6
1114 FLDD 56(a_ptr),a7
1115
1116 SQR_ADD_C a0L,a0R,c1,c2,c3
1117 STD c1,0(r_ptr) ; r[0] = c1;
1118 COPY %r0,c1
1119
1120 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1121 STD c2,8(r_ptr) ; r[1] = c2;
1122 COPY %r0,c2
1123
1124 SQR_ADD_C a1L,a1R,c3,c1,c2
1125 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1126 STD c3,16(r_ptr) ; r[2] = c3;
1127 COPY %r0,c3
1128
1129 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1130 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1131 STD c1,24(r_ptr) ; r[3] = c1;
1132 COPY %r0,c1
1133
1134 SQR_ADD_C a2L,a2R,c2,c3,c1
1135 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1136 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
1137 STD c2,32(r_ptr) ; r[4] = c2;
1138 COPY %r0,c2
1139
1140 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
1141 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
1142 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1143 STD c3,40(r_ptr) ; r[5] = c3;
1144 COPY %r0,c3
1145
1146 SQR_ADD_C a3L,a3R,c1,c2,c3
1147 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
1148 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
1149 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
1150 STD c1,48(r_ptr) ; r[6] = c1;
1151 COPY %r0,c1
1152
1153 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
1154 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
1155 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
1156 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
1157 STD c2,56(r_ptr) ; r[7] = c2;
1158 COPY %r0,c2
1159
1160 SQR_ADD_C a4L,a4R,c3,c1,c2
1161 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
1162 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
1163 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
1164 STD c3,64(r_ptr) ; r[8] = c3;
1165 COPY %r0,c3
1166
1167 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
1168 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
1169 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
1170 STD c1,72(r_ptr) ; r[9] = c1;
1171 COPY %r0,c1
1172
1173 SQR_ADD_C a5L,a5R,c2,c3,c1
1174 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
1175 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
1176 STD c2,80(r_ptr) ; r[10] = c2;
1177 COPY %r0,c2
1178
1179 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
1180 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
1181 STD c3,88(r_ptr) ; r[11] = c3;
1182 COPY %r0,c3
1183
1184 SQR_ADD_C a6L,a6R,c1,c2,c3
1185 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
1186 STD c1,96(r_ptr) ; r[12] = c1;
1187 COPY %r0,c1
1188
1189 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
1190 STD c2,104(r_ptr) ; r[13] = c2;
1191 COPY %r0,c2
1192
1193 SQR_ADD_C a7L,a7R,c3,c1,c2
1194 STD c3, 112(r_ptr) ; r[14] = c3
1195 STD c1, 120(r_ptr) ; r[15] = c1
1196
1197 .EXIT
1198 LDD -104(%sp),%r6 ; restore r6
1199 LDD -112(%sp),%r5 ; restore r5
1200 LDD -120(%sp),%r4 ; restore r4
1201 BVE (%rp)
1202 LDD,MB -128(%sp),%r3
1203
1204 .PROCEND
1205
1206;-----------------------------------------------------------------------------
1207;
1208;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
1209; arg0 = r_ptr
1210; arg1 = a_ptr
1211;
1212
1213bn_sqr_comba4
1214 .proc
1215 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1216 .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1217 .entry
1218 .align 64
1219 STD %r3,0(%sp) ; save r3
1220 STD %r4,8(%sp) ; save r4
1221 STD %r5,16(%sp) ; save r5
1222 STD %r6,24(%sp) ; save r6
1223
1224 ;
1225 ; Zero out carries
1226 ;
1227 COPY %r0,c1
1228 COPY %r0,c2
1229 COPY %r0,c3
1230
1231 LDO 128(%sp),%sp ; bump stack
1232 DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
1233 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1234
1235 ;
1236 ; Load up all of the values we are going to use
1237 ;
1238 FLDD 0(a_ptr),a0
1239 FLDD 8(a_ptr),a1
1240 FLDD 16(a_ptr),a2
1241 FLDD 24(a_ptr),a3
1242 FLDD 32(a_ptr),a4
1243 FLDD 40(a_ptr),a5
1244 FLDD 48(a_ptr),a6
1245 FLDD 56(a_ptr),a7
1246
1247 SQR_ADD_C a0L,a0R,c1,c2,c3
1248
1249 STD c1,0(r_ptr) ; r[0] = c1;
1250 COPY %r0,c1
1251
1252 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
1253
1254 STD c2,8(r_ptr) ; r[1] = c2;
1255 COPY %r0,c2
1256
1257 SQR_ADD_C a1L,a1R,c3,c1,c2
1258 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
1259
1260 STD c3,16(r_ptr) ; r[2] = c3;
1261 COPY %r0,c3
1262
1263 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
1264 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
1265
1266 STD c1,24(r_ptr) ; r[3] = c1;
1267 COPY %r0,c1
1268
1269 SQR_ADD_C a2L,a2R,c2,c3,c1
1270 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
1271
1272 STD c2,32(r_ptr) ; r[4] = c2;
1273 COPY %r0,c2
1274
1275 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
1276 STD c3,40(r_ptr) ; r[5] = c3;
1277 COPY %r0,c3
1278
1279 SQR_ADD_C a3L,a3R,c1,c2,c3
1280 STD c1,48(r_ptr) ; r[6] = c1;
1281 STD c2,56(r_ptr) ; r[7] = c2;
1282
1283 .EXIT
1284 LDD -104(%sp),%r6 ; restore r6
1285 LDD -112(%sp),%r5 ; restore r5
1286 LDD -120(%sp),%r4 ; restore r4
1287 BVE (%rp)
1288 LDD,MB -128(%sp),%r3
1289
1290 .PROCEND
1291
1292
1293;---------------------------------------------------------------------------
1294
1295MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
1296 XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
1297 FSTD ftemp1,-16(%sp) ;
1298 XMPYU A0R,B0L,ftemp2 ; m = bh*lt
1299 FSTD ftemp2,-8(%sp) ;
1300 XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
1301 FSTD ftemp3,-32(%sp)
1302 XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
1303 FSTD ftemp4,-24(%sp) ;
1304
1305 LDD -8(%sp),m ; r21 = m
1306 LDD -16(%sp),m1 ; r19 = m1
1307 ADD,L m,m1,m ; m+m1
1308
1309 DEPD,Z m,31,32,temp3 ; (m+m1<<32)
1310 LDD -24(%sp),ht ; r24 = ht
1311
1312 CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
1313 ADD,L ht,high_one,ht ; ht+=high_one
1314
1315 EXTRD,U m,31,32,temp1 ; m >> 32
1316 LDD -32(%sp),lt ; lt
1317 ADD,L ht,temp1,ht ; ht+= m>>32
1318 ADD lt,temp3,lt ; lt = lt+m1
1319 ADD,DC ht,%r0,ht ; ht++
1320
1321 ADD C1,lt,C1 ; c1=c1+lt
1322 ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
1323
1324 ADD C2,ht,C2 ; c2 = c2 + ht
1325 ADD,DC C3,%r0,C3 ; add in carry (c3++)
1326.endm
1327
1328
1329;
1330;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1331; arg0 = r_ptr
1332; arg1 = a_ptr
1333; arg2 = b_ptr
1334;
1335
1336bn_mul_comba8
1337 .proc
1338 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1339 .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1340 .entry
1341 .align 64
1342
1343 STD %r3,0(%sp) ; save r3
1344 STD %r4,8(%sp) ; save r4
1345 STD %r5,16(%sp) ; save r5
1346 STD %r6,24(%sp) ; save r6
1347 FSTD %fr12,32(%sp) ; save r6
1348 FSTD %fr13,40(%sp) ; save r7
1349
1350 ;
1351 ; Zero out carries
1352 ;
1353 COPY %r0,c1
1354 COPY %r0,c2
1355 COPY %r0,c3
1356
1357 LDO 128(%sp),%sp ; bump stack
1358 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1359
1360 ;
1361 ; Load up all of the values we are going to use
1362 ;
1363 FLDD 0(a_ptr),a0
1364 FLDD 8(a_ptr),a1
1365 FLDD 16(a_ptr),a2
1366 FLDD 24(a_ptr),a3
1367 FLDD 32(a_ptr),a4
1368 FLDD 40(a_ptr),a5
1369 FLDD 48(a_ptr),a6
1370 FLDD 56(a_ptr),a7
1371
1372 FLDD 0(b_ptr),b0
1373 FLDD 8(b_ptr),b1
1374 FLDD 16(b_ptr),b2
1375 FLDD 24(b_ptr),b3
1376 FLDD 32(b_ptr),b4
1377 FLDD 40(b_ptr),b5
1378 FLDD 48(b_ptr),b6
1379 FLDD 56(b_ptr),b7
1380
1381 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1382 STD c1,0(r_ptr)
1383 COPY %r0,c1
1384
1385 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1386 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1387 STD c2,8(r_ptr)
1388 COPY %r0,c2
1389
1390 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1391 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1392 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1393 STD c3,16(r_ptr)
1394 COPY %r0,c3
1395
1396 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1397 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1398 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1399 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1400 STD c1,24(r_ptr)
1401 COPY %r0,c1
1402
1403 MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
1404 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1405 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1406 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1407 MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
1408 STD c2,32(r_ptr)
1409 COPY %r0,c2
1410
1411 MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
1412 MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
1413 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1414 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1415 MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
1416 MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
1417 STD c3,40(r_ptr)
1418 COPY %r0,c3
1419
1420 MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
1421 MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
1422 MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
1423 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1424 MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
1425 MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
1426 MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
1427 STD c1,48(r_ptr)
1428 COPY %r0,c1
1429
1430 MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
1431 MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
1432 MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
1433 MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
1434 MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
1435 MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
1436 MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
1437 MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
1438 STD c2,56(r_ptr)
1439 COPY %r0,c2
1440
1441 MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
1442 MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
1443 MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
1444 MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
1445 MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
1446 MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
1447 MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
1448 STD c3,64(r_ptr)
1449 COPY %r0,c3
1450
1451 MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
1452 MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
1453 MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
1454 MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
1455 MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
1456 MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
1457 STD c1,72(r_ptr)
1458 COPY %r0,c1
1459
1460 MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
1461 MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
1462 MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
1463 MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
1464 MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
1465 STD c2,80(r_ptr)
1466 COPY %r0,c2
1467
1468 MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
1469 MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
1470 MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
1471 MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
1472 STD c3,88(r_ptr)
1473 COPY %r0,c3
1474
1475 MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
1476 MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
1477 MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
1478 STD c1,96(r_ptr)
1479 COPY %r0,c1
1480
1481 MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
1482 MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
1483 STD c2,104(r_ptr)
1484 COPY %r0,c2
1485
1486 MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
1487 STD c3,112(r_ptr)
1488 STD c1,120(r_ptr)
1489
1490 .EXIT
1491 FLDD -88(%sp),%fr13
1492 FLDD -96(%sp),%fr12
1493 LDD -104(%sp),%r6 ; restore r6
1494 LDD -112(%sp),%r5 ; restore r5
1495 LDD -120(%sp),%r4 ; restore r4
1496 BVE (%rp)
1497 LDD,MB -128(%sp),%r3
1498
1499 .PROCEND
1500
1501;-----------------------------------------------------------------------------
1502;
1503;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
1504; arg0 = r_ptr
1505; arg1 = a_ptr
1506; arg2 = b_ptr
1507;
1508
1509bn_mul_comba4
1510 .proc
1511 .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
1512 .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
1513 .entry
1514 .align 64
1515
1516 STD %r3,0(%sp) ; save r3
1517 STD %r4,8(%sp) ; save r4
1518 STD %r5,16(%sp) ; save r5
1519 STD %r6,24(%sp) ; save r6
1520 FSTD %fr12,32(%sp) ; save r6
1521 FSTD %fr13,40(%sp) ; save r7
1522
1523 ;
1524 ; Zero out carries
1525 ;
1526 COPY %r0,c1
1527 COPY %r0,c2
1528 COPY %r0,c3
1529
1530 LDO 128(%sp),%sp ; bump stack
1531 DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
1532
1533 ;
1534 ; Load up all of the values we are going to use
1535 ;
1536 FLDD 0(a_ptr),a0
1537 FLDD 8(a_ptr),a1
1538 FLDD 16(a_ptr),a2
1539 FLDD 24(a_ptr),a3
1540
1541 FLDD 0(b_ptr),b0
1542 FLDD 8(b_ptr),b1
1543 FLDD 16(b_ptr),b2
1544 FLDD 24(b_ptr),b3
1545
1546 MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
1547 STD c1,0(r_ptr)
1548 COPY %r0,c1
1549
1550 MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
1551 MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
1552 STD c2,8(r_ptr)
1553 COPY %r0,c2
1554
1555 MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
1556 MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
1557 MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
1558 STD c3,16(r_ptr)
1559 COPY %r0,c3
1560
1561 MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
1562 MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
1563 MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
1564 MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
1565 STD c1,24(r_ptr)
1566 COPY %r0,c1
1567
1568 MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
1569 MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
1570 MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
1571 STD c2,32(r_ptr)
1572 COPY %r0,c2
1573
1574 MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
1575 MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
1576 STD c3,40(r_ptr)
1577 COPY %r0,c3
1578
1579 MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
1580 STD c1,48(r_ptr)
1581 STD c2,56(r_ptr)
1582
1583 .EXIT
1584 FLDD -88(%sp),%fr13
1585 FLDD -96(%sp),%fr12
1586 LDD -104(%sp),%r6 ; restore r6
1587 LDD -112(%sp),%r5 ; restore r5
1588 LDD -120(%sp),%r4 ; restore r4
1589 BVE (%rp)
1590 LDD,MB -128(%sp),%r3
1591
1592 .PROCEND
1593
1594
1595 .SPACE $TEXT$
1596 .SUBSPA $CODE$
1597 .SPACE $PRIVATE$,SORT=16
1598 .IMPORT $global$,DATA
1599 .SPACE $TEXT$
1600 .SUBSPA $CODE$
1601 .SUBSPA $LIT$,ACCESS=0x2c
1602C$4
1603 .ALIGN 8
1604 .STRINGZ "Division would overflow (%d)\n"
1605 .END
diff --git a/src/lib/libcrypto/bn/asm/sparcv8.S b/src/lib/libcrypto/bn/asm/sparcv8.S
deleted file mode 100644
index 88c5dc480a..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8.S
+++ /dev/null
@@ -1,1458 +0,0 @@
1.ident "sparcv8.s, Version 1.4"
2.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * See bn_asm.sparc.v8plus.S for more details.
22 */
23
24/*
25 * Revision history.
26 *
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
30 * 1.4 - some retunes;
31 *
32 * (*) see bn_asm.sparc.v8plus.S for details
33 */
34
35.section ".text",#alloc,#execinstr
36.file "bn_asm.sparc.v8.S"
37
38.align 32
39
40.global bn_mul_add_words
41/*
42 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
43 * BN_ULONG *rp,*ap;
44 * int num;
45 * BN_ULONG w;
46 */
47bn_mul_add_words:
48 cmp %o2,0
49 bg,a .L_bn_mul_add_words_proceed
50 ld [%o1],%g2
51 retl
52 clr %o0
53
54.L_bn_mul_add_words_proceed:
55 andcc %o2,-4,%g0
56 bz .L_bn_mul_add_words_tail
57 clr %o5
58
59.L_bn_mul_add_words_loop:
60 ld [%o0],%o4
61 ld [%o1+4],%g3
62 umul %o3,%g2,%g2
63 rd %y,%g1
64 addcc %o4,%o5,%o4
65 addx %g1,0,%g1
66 addcc %o4,%g2,%o4
67 st %o4,[%o0]
68 addx %g1,0,%o5
69
70 ld [%o0+4],%o4
71 ld [%o1+8],%g2
72 umul %o3,%g3,%g3
73 dec 4,%o2
74 rd %y,%g1
75 addcc %o4,%o5,%o4
76 addx %g1,0,%g1
77 addcc %o4,%g3,%o4
78 st %o4,[%o0+4]
79 addx %g1,0,%o5
80
81 ld [%o0+8],%o4
82 ld [%o1+12],%g3
83 umul %o3,%g2,%g2
84 inc 16,%o1
85 rd %y,%g1
86 addcc %o4,%o5,%o4
87 addx %g1,0,%g1
88 addcc %o4,%g2,%o4
89 st %o4,[%o0+8]
90 addx %g1,0,%o5
91
92 ld [%o0+12],%o4
93 umul %o3,%g3,%g3
94 inc 16,%o0
95 rd %y,%g1
96 addcc %o4,%o5,%o4
97 addx %g1,0,%g1
98 addcc %o4,%g3,%o4
99 st %o4,[%o0-4]
100 addx %g1,0,%o5
101 andcc %o2,-4,%g0
102 bnz,a .L_bn_mul_add_words_loop
103 ld [%o1],%g2
104
105 tst %o2
106 bnz,a .L_bn_mul_add_words_tail
107 ld [%o1],%g2
108.L_bn_mul_add_words_return:
109 retl
110 mov %o5,%o0
111 nop
112
113.L_bn_mul_add_words_tail:
114 ld [%o0],%o4
115 umul %o3,%g2,%g2
116 addcc %o4,%o5,%o4
117 rd %y,%g1
118 addx %g1,0,%g1
119 addcc %o4,%g2,%o4
120 addx %g1,0,%o5
121 deccc %o2
122 bz .L_bn_mul_add_words_return
123 st %o4,[%o0]
124
125 ld [%o1+4],%g2
126 ld [%o0+4],%o4
127 umul %o3,%g2,%g2
128 rd %y,%g1
129 addcc %o4,%o5,%o4
130 addx %g1,0,%g1
131 addcc %o4,%g2,%o4
132 addx %g1,0,%o5
133 deccc %o2
134 bz .L_bn_mul_add_words_return
135 st %o4,[%o0+4]
136
137 ld [%o1+8],%g2
138 ld [%o0+8],%o4
139 umul %o3,%g2,%g2
140 rd %y,%g1
141 addcc %o4,%o5,%o4
142 addx %g1,0,%g1
143 addcc %o4,%g2,%o4
144 st %o4,[%o0+8]
145 retl
146 addx %g1,0,%o0
147
148.type bn_mul_add_words,#function
149.size bn_mul_add_words,(.-bn_mul_add_words)
150
151.align 32
152
153.global bn_mul_words
154/*
155 * BN_ULONG bn_mul_words(rp,ap,num,w)
156 * BN_ULONG *rp,*ap;
157 * int num;
158 * BN_ULONG w;
159 */
160bn_mul_words:
161 cmp %o2,0
162 bg,a .L_bn_mul_words_proceeed
163 ld [%o1],%g2
164 retl
165 clr %o0
166
167.L_bn_mul_words_proceeed:
168 andcc %o2,-4,%g0
169 bz .L_bn_mul_words_tail
170 clr %o5
171
172.L_bn_mul_words_loop:
173 ld [%o1+4],%g3
174 umul %o3,%g2,%g2
175 addcc %g2,%o5,%g2
176 rd %y,%g1
177 addx %g1,0,%o5
178 st %g2,[%o0]
179
180 ld [%o1+8],%g2
181 umul %o3,%g3,%g3
182 addcc %g3,%o5,%g3
183 rd %y,%g1
184 dec 4,%o2
185 addx %g1,0,%o5
186 st %g3,[%o0+4]
187
188 ld [%o1+12],%g3
189 umul %o3,%g2,%g2
190 addcc %g2,%o5,%g2
191 rd %y,%g1
192 inc 16,%o1
193 st %g2,[%o0+8]
194 addx %g1,0,%o5
195
196 umul %o3,%g3,%g3
197 addcc %g3,%o5,%g3
198 rd %y,%g1
199 inc 16,%o0
200 addx %g1,0,%o5
201 st %g3,[%o0-4]
202 andcc %o2,-4,%g0
203 nop
204 bnz,a .L_bn_mul_words_loop
205 ld [%o1],%g2
206
207 tst %o2
208 bnz,a .L_bn_mul_words_tail
209 ld [%o1],%g2
210.L_bn_mul_words_return:
211 retl
212 mov %o5,%o0
213 nop
214
215.L_bn_mul_words_tail:
216 umul %o3,%g2,%g2
217 addcc %g2,%o5,%g2
218 rd %y,%g1
219 addx %g1,0,%o5
220 deccc %o2
221 bz .L_bn_mul_words_return
222 st %g2,[%o0]
223 nop
224
225 ld [%o1+4],%g2
226 umul %o3,%g2,%g2
227 addcc %g2,%o5,%g2
228 rd %y,%g1
229 addx %g1,0,%o5
230 deccc %o2
231 bz .L_bn_mul_words_return
232 st %g2,[%o0+4]
233
234 ld [%o1+8],%g2
235 umul %o3,%g2,%g2
236 addcc %g2,%o5,%g2
237 rd %y,%g1
238 st %g2,[%o0+8]
239 retl
240 addx %g1,0,%o0
241
242.type bn_mul_words,#function
243.size bn_mul_words,(.-bn_mul_words)
244
245.align 32
246.global bn_sqr_words
247/*
248 * void bn_sqr_words(r,a,n)
249 * BN_ULONG *r,*a;
250 * int n;
251 */
252bn_sqr_words:
253 cmp %o2,0
254 bg,a .L_bn_sqr_words_proceeed
255 ld [%o1],%g2
256 retl
257 clr %o0
258
259.L_bn_sqr_words_proceeed:
260 andcc %o2,-4,%g0
261 bz .L_bn_sqr_words_tail
262 clr %o5
263
264.L_bn_sqr_words_loop:
265 ld [%o1+4],%g3
266 umul %g2,%g2,%o4
267 st %o4,[%o0]
268 rd %y,%o5
269 st %o5,[%o0+4]
270
271 ld [%o1+8],%g2
272 umul %g3,%g3,%o4
273 dec 4,%o2
274 st %o4,[%o0+8]
275 rd %y,%o5
276 st %o5,[%o0+12]
277 nop
278
279 ld [%o1+12],%g3
280 umul %g2,%g2,%o4
281 st %o4,[%o0+16]
282 rd %y,%o5
283 inc 16,%o1
284 st %o5,[%o0+20]
285
286 umul %g3,%g3,%o4
287 inc 32,%o0
288 st %o4,[%o0-8]
289 rd %y,%o5
290 st %o5,[%o0-4]
291 andcc %o2,-4,%g2
292 bnz,a .L_bn_sqr_words_loop
293 ld [%o1],%g2
294
295 tst %o2
296 nop
297 bnz,a .L_bn_sqr_words_tail
298 ld [%o1],%g2
299.L_bn_sqr_words_return:
300 retl
301 clr %o0
302
303.L_bn_sqr_words_tail:
304 umul %g2,%g2,%o4
305 st %o4,[%o0]
306 deccc %o2
307 rd %y,%o5
308 bz .L_bn_sqr_words_return
309 st %o5,[%o0+4]
310
311 ld [%o1+4],%g2
312 umul %g2,%g2,%o4
313 st %o4,[%o0+8]
314 deccc %o2
315 rd %y,%o5
316 nop
317 bz .L_bn_sqr_words_return
318 st %o5,[%o0+12]
319
320 ld [%o1+8],%g2
321 umul %g2,%g2,%o4
322 st %o4,[%o0+16]
323 rd %y,%o5
324 st %o5,[%o0+20]
325 retl
326 clr %o0
327
328.type bn_sqr_words,#function
329.size bn_sqr_words,(.-bn_sqr_words)
330
331.align 32
332
333.global bn_div_words
334/*
335 * BN_ULONG bn_div_words(h,l,d)
336 * BN_ULONG h,l,d;
337 */
338bn_div_words:
339 wr %o0,%y
340 udiv %o1,%o2,%o0
341 retl
342 nop
343
344.type bn_div_words,#function
345.size bn_div_words,(.-bn_div_words)
346
347.align 32
348
349.global bn_add_words
350/*
351 * BN_ULONG bn_add_words(rp,ap,bp,n)
352 * BN_ULONG *rp,*ap,*bp;
353 * int n;
354 */
355bn_add_words:
356 cmp %o3,0
357 bg,a .L_bn_add_words_proceed
358 ld [%o1],%o4
359 retl
360 clr %o0
361
362.L_bn_add_words_proceed:
363 andcc %o3,-4,%g0
364 bz .L_bn_add_words_tail
365 clr %g1
366 ba .L_bn_add_words_warn_loop
367 addcc %g0,0,%g0 ! clear carry flag
368
369.L_bn_add_words_loop:
370 ld [%o1],%o4
371.L_bn_add_words_warn_loop:
372 ld [%o2],%o5
373 ld [%o1+4],%g3
374 ld [%o2+4],%g4
375 dec 4,%o3
376 addxcc %o5,%o4,%o5
377 st %o5,[%o0]
378
379 ld [%o1+8],%o4
380 ld [%o2+8],%o5
381 inc 16,%o1
382 addxcc %g3,%g4,%g3
383 st %g3,[%o0+4]
384
385 ld [%o1-4],%g3
386 ld [%o2+12],%g4
387 inc 16,%o2
388 addxcc %o5,%o4,%o5
389 st %o5,[%o0+8]
390
391 inc 16,%o0
392 addxcc %g3,%g4,%g3
393 st %g3,[%o0-4]
394 addx %g0,0,%g1
395 andcc %o3,-4,%g0
396 bnz,a .L_bn_add_words_loop
397 addcc %g1,-1,%g0
398
399 tst %o3
400 bnz,a .L_bn_add_words_tail
401 ld [%o1],%o4
402.L_bn_add_words_return:
403 retl
404 mov %g1,%o0
405
406.L_bn_add_words_tail:
407 addcc %g1,-1,%g0
408 ld [%o2],%o5
409 addxcc %o5,%o4,%o5
410 addx %g0,0,%g1
411 deccc %o3
412 bz .L_bn_add_words_return
413 st %o5,[%o0]
414
415 ld [%o1+4],%o4
416 addcc %g1,-1,%g0
417 ld [%o2+4],%o5
418 addxcc %o5,%o4,%o5
419 addx %g0,0,%g1
420 deccc %o3
421 bz .L_bn_add_words_return
422 st %o5,[%o0+4]
423
424 ld [%o1+8],%o4
425 addcc %g1,-1,%g0
426 ld [%o2+8],%o5
427 addxcc %o5,%o4,%o5
428 st %o5,[%o0+8]
429 retl
430 addx %g0,0,%o0
431
432.type bn_add_words,#function
433.size bn_add_words,(.-bn_add_words)
434
435.align 32
436
437.global bn_sub_words
438/*
439 * BN_ULONG bn_sub_words(rp,ap,bp,n)
440 * BN_ULONG *rp,*ap,*bp;
441 * int n;
442 */
443bn_sub_words:
444 cmp %o3,0
445 bg,a .L_bn_sub_words_proceed
446 ld [%o1],%o4
447 retl
448 clr %o0
449
450.L_bn_sub_words_proceed:
451 andcc %o3,-4,%g0
452 bz .L_bn_sub_words_tail
453 clr %g1
454 ba .L_bn_sub_words_warm_loop
455 addcc %g0,0,%g0 ! clear carry flag
456
457.L_bn_sub_words_loop:
458 ld [%o1],%o4
459.L_bn_sub_words_warm_loop:
460 ld [%o2],%o5
461 ld [%o1+4],%g3
462 ld [%o2+4],%g4
463 dec 4,%o3
464 subxcc %o4,%o5,%o5
465 st %o5,[%o0]
466
467 ld [%o1+8],%o4
468 ld [%o2+8],%o5
469 inc 16,%o1
470 subxcc %g3,%g4,%g4
471 st %g4,[%o0+4]
472
473 ld [%o1-4],%g3
474 ld [%o2+12],%g4
475 inc 16,%o2
476 subxcc %o4,%o5,%o5
477 st %o5,[%o0+8]
478
479 inc 16,%o0
480 subxcc %g3,%g4,%g4
481 st %g4,[%o0-4]
482 addx %g0,0,%g1
483 andcc %o3,-4,%g0
484 bnz,a .L_bn_sub_words_loop
485 addcc %g1,-1,%g0
486
487 tst %o3
488 nop
489 bnz,a .L_bn_sub_words_tail
490 ld [%o1],%o4
491.L_bn_sub_words_return:
492 retl
493 mov %g1,%o0
494
495.L_bn_sub_words_tail:
496 addcc %g1,-1,%g0
497 ld [%o2],%o5
498 subxcc %o4,%o5,%o5
499 addx %g0,0,%g1
500 deccc %o3
501 bz .L_bn_sub_words_return
502 st %o5,[%o0]
503 nop
504
505 ld [%o1+4],%o4
506 addcc %g1,-1,%g0
507 ld [%o2+4],%o5
508 subxcc %o4,%o5,%o5
509 addx %g0,0,%g1
510 deccc %o3
511 bz .L_bn_sub_words_return
512 st %o5,[%o0+4]
513
514 ld [%o1+8],%o4
515 addcc %g1,-1,%g0
516 ld [%o2+8],%o5
517 subxcc %o4,%o5,%o5
518 st %o5,[%o0+8]
519 retl
520 addx %g0,0,%o0
521
522.type bn_sub_words,#function
523.size bn_sub_words,(.-bn_sub_words)
524
525#define FRAME_SIZE -96
526
527/*
528 * Here is register usage map for *all* routines below.
529 */
530#define t_1 %o0
531#define t_2 %o1
532#define c_1 %o2
533#define c_2 %o3
534#define c_3 %o4
535
536#define ap(I) [%i1+4*I]
537#define bp(I) [%i2+4*I]
538#define rp(I) [%i0+4*I]
539
540#define a_0 %l0
541#define a_1 %l1
542#define a_2 %l2
543#define a_3 %l3
544#define a_4 %l4
545#define a_5 %l5
546#define a_6 %l6
547#define a_7 %l7
548
549#define b_0 %i3
550#define b_1 %i4
551#define b_2 %i5
552#define b_3 %o5
553#define b_4 %g1
554#define b_5 %g2
555#define b_6 %g3
556#define b_7 %g4
557
558.align 32
559.global bn_mul_comba8
560/*
561 * void bn_mul_comba8(r,a,b)
562 * BN_ULONG *r,*a,*b;
563 */
564bn_mul_comba8:
565 save %sp,FRAME_SIZE,%sp
566 ld ap(0),a_0
567 ld bp(0),b_0
568 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
569 ld bp(1),b_1
570 rd %y,c_2
571 st c_1,rp(0) !r[0]=c1;
572
573 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
574 ld ap(1),a_1
575 addcc c_2,t_1,c_2
576 rd %y,t_2
577 addxcc %g0,t_2,c_3 !=
578 addx %g0,%g0,c_1
579 ld ap(2),a_2
580 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
581 addcc c_2,t_1,c_2 !=
582 rd %y,t_2
583 addxcc c_3,t_2,c_3
584 st c_2,rp(1) !r[1]=c2;
585 addx c_1,%g0,c_1 !=
586
587 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
588 addcc c_3,t_1,c_3
589 rd %y,t_2
590 addxcc c_1,t_2,c_1 !=
591 addx %g0,%g0,c_2
592 ld bp(2),b_2
593 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
594 addcc c_3,t_1,c_3 !=
595 rd %y,t_2
596 addxcc c_1,t_2,c_1
597 ld bp(3),b_3
598 addx c_2,%g0,c_2 !=
599 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
600 addcc c_3,t_1,c_3
601 rd %y,t_2
602 addxcc c_1,t_2,c_1 !=
603 addx c_2,%g0,c_2
604 st c_3,rp(2) !r[2]=c3;
605
606 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
607 addcc c_1,t_1,c_1 !=
608 rd %y,t_2
609 addxcc c_2,t_2,c_2
610 addx %g0,%g0,c_3
611 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
612 addcc c_1,t_1,c_1
613 rd %y,t_2
614 addxcc c_2,t_2,c_2
615 addx c_3,%g0,c_3 !=
616 ld ap(3),a_3
617 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
618 addcc c_1,t_1,c_1
619 rd %y,t_2 !=
620 addxcc c_2,t_2,c_2
621 addx c_3,%g0,c_3
622 ld ap(4),a_4
623 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
624 addcc c_1,t_1,c_1
625 rd %y,t_2
626 addxcc c_2,t_2,c_2
627 addx c_3,%g0,c_3 !=
628 st c_1,rp(3) !r[3]=c1;
629
630 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
631 addcc c_2,t_1,c_2
632 rd %y,t_2 !=
633 addxcc c_3,t_2,c_3
634 addx %g0,%g0,c_1
635 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
636 addcc c_2,t_1,c_2 !=
637 rd %y,t_2
638 addxcc c_3,t_2,c_3
639 addx c_1,%g0,c_1
640 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
641 addcc c_2,t_1,c_2
642 rd %y,t_2
643 addxcc c_3,t_2,c_3
644 addx c_1,%g0,c_1 !=
645 ld bp(4),b_4
646 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
647 addcc c_2,t_1,c_2
648 rd %y,t_2 !=
649 addxcc c_3,t_2,c_3
650 addx c_1,%g0,c_1
651 ld bp(5),b_5
652 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
653 addcc c_2,t_1,c_2
654 rd %y,t_2
655 addxcc c_3,t_2,c_3
656 addx c_1,%g0,c_1 !=
657 st c_2,rp(4) !r[4]=c2;
658
659 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
660 addcc c_3,t_1,c_3
661 rd %y,t_2 !=
662 addxcc c_1,t_2,c_1
663 addx %g0,%g0,c_2
664 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
665 addcc c_3,t_1,c_3 !=
666 rd %y,t_2
667 addxcc c_1,t_2,c_1
668 addx c_2,%g0,c_2
669 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
670 addcc c_3,t_1,c_3
671 rd %y,t_2
672 addxcc c_1,t_2,c_1
673 addx c_2,%g0,c_2 !=
674 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
675 addcc c_3,t_1,c_3
676 rd %y,t_2
677 addxcc c_1,t_2,c_1 !=
678 addx c_2,%g0,c_2
679 ld ap(5),a_5
680 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
681 addcc c_3,t_1,c_3 !=
682 rd %y,t_2
683 addxcc c_1,t_2,c_1
684 ld ap(6),a_6
685 addx c_2,%g0,c_2 !=
686 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
687 addcc c_3,t_1,c_3
688 rd %y,t_2
689 addxcc c_1,t_2,c_1 !=
690 addx c_2,%g0,c_2
691 st c_3,rp(5) !r[5]=c3;
692
693 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
694 addcc c_1,t_1,c_1 !=
695 rd %y,t_2
696 addxcc c_2,t_2,c_2
697 addx %g0,%g0,c_3
698 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
699 addcc c_1,t_1,c_1
700 rd %y,t_2
701 addxcc c_2,t_2,c_2
702 addx c_3,%g0,c_3 !=
703 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
704 addcc c_1,t_1,c_1
705 rd %y,t_2
706 addxcc c_2,t_2,c_2 !=
707 addx c_3,%g0,c_3
708 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
709 addcc c_1,t_1,c_1
710 rd %y,t_2 !=
711 addxcc c_2,t_2,c_2
712 addx c_3,%g0,c_3
713 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
714 addcc c_1,t_1,c_1 !=
715 rd %y,t_2
716 addxcc c_2,t_2,c_2
717 ld bp(6),b_6
718 addx c_3,%g0,c_3 !=
719 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
720 addcc c_1,t_1,c_1
721 rd %y,t_2
722 addxcc c_2,t_2,c_2 !=
723 addx c_3,%g0,c_3
724 ld bp(7),b_7
725 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
726 addcc c_1,t_1,c_1 !=
727 rd %y,t_2
728 addxcc c_2,t_2,c_2
729 st c_1,rp(6) !r[6]=c1;
730 addx c_3,%g0,c_3 !=
731
732 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
733 addcc c_2,t_1,c_2
734 rd %y,t_2
735 addxcc c_3,t_2,c_3 !=
736 addx %g0,%g0,c_1
737 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
738 addcc c_2,t_1,c_2
739 rd %y,t_2 !=
740 addxcc c_3,t_2,c_3
741 addx c_1,%g0,c_1
742 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
743 addcc c_2,t_1,c_2 !=
744 rd %y,t_2
745 addxcc c_3,t_2,c_3
746 addx c_1,%g0,c_1
747 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
748 addcc c_2,t_1,c_2
749 rd %y,t_2
750 addxcc c_3,t_2,c_3
751 addx c_1,%g0,c_1 !=
752 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
753 addcc c_2,t_1,c_2
754 rd %y,t_2
755 addxcc c_3,t_2,c_3 !=
756 addx c_1,%g0,c_1
757 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
758 addcc c_2,t_1,c_2
759 rd %y,t_2 !=
760 addxcc c_3,t_2,c_3
761 addx c_1,%g0,c_1
762 ld ap(7),a_7
763 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
764 addcc c_2,t_1,c_2
765 rd %y,t_2
766 addxcc c_3,t_2,c_3
767 addx c_1,%g0,c_1 !=
768 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
769 addcc c_2,t_1,c_2
770 rd %y,t_2
771 addxcc c_3,t_2,c_3 !=
772 addx c_1,%g0,c_1
773 st c_2,rp(7) !r[7]=c2;
774
775 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
776 addcc c_3,t_1,c_3 !=
777 rd %y,t_2
778 addxcc c_1,t_2,c_1
779 addx %g0,%g0,c_2
780 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
781 addcc c_3,t_1,c_3
782 rd %y,t_2
783 addxcc c_1,t_2,c_1
784 addx c_2,%g0,c_2 !=
785 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
786 addcc c_3,t_1,c_3
787 rd %y,t_2
788 addxcc c_1,t_2,c_1 !=
789 addx c_2,%g0,c_2
790 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
791 addcc c_3,t_1,c_3
792 rd %y,t_2 !=
793 addxcc c_1,t_2,c_1
794 addx c_2,%g0,c_2
795 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
796 addcc c_3,t_1,c_3 !=
797 rd %y,t_2
798 addxcc c_1,t_2,c_1
799 addx c_2,%g0,c_2
800 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
801 addcc c_3,t_1,c_3
802 rd %y,t_2
803 addxcc c_1,t_2,c_1
804 addx c_2,%g0,c_2 !=
805 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
806 addcc c_3,t_1,c_3
807 rd %y,t_2
808 addxcc c_1,t_2,c_1 !
809 addx c_2,%g0,c_2
810 st c_3,rp(8) !r[8]=c3;
811
812 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
813 addcc c_1,t_1,c_1 !=
814 rd %y,t_2
815 addxcc c_2,t_2,c_2
816 addx %g0,%g0,c_3
817 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
818 addcc c_1,t_1,c_1
819 rd %y,t_2
820 addxcc c_2,t_2,c_2
821 addx c_3,%g0,c_3 !=
822 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
823 addcc c_1,t_1,c_1
824 rd %y,t_2
825 addxcc c_2,t_2,c_2 !=
826 addx c_3,%g0,c_3
827 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
828 addcc c_1,t_1,c_1
829 rd %y,t_2 !=
830 addxcc c_2,t_2,c_2
831 addx c_3,%g0,c_3
832 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
833 addcc c_1,t_1,c_1 !=
834 rd %y,t_2
835 addxcc c_2,t_2,c_2
836 addx c_3,%g0,c_3
837 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
838 addcc c_1,t_1,c_1
839 rd %y,t_2
840 addxcc c_2,t_2,c_2
841 addx c_3,%g0,c_3 !=
842 st c_1,rp(9) !r[9]=c1;
843
844 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
845 addcc c_2,t_1,c_2
846 rd %y,t_2 !=
847 addxcc c_3,t_2,c_3
848 addx %g0,%g0,c_1
849 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
850 addcc c_2,t_1,c_2 !=
851 rd %y,t_2
852 addxcc c_3,t_2,c_3
853 addx c_1,%g0,c_1
854 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
855 addcc c_2,t_1,c_2
856 rd %y,t_2
857 addxcc c_3,t_2,c_3
858 addx c_1,%g0,c_1 !=
859 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
860 addcc c_2,t_1,c_2
861 rd %y,t_2
862 addxcc c_3,t_2,c_3 !=
863 addx c_1,%g0,c_1
864 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
865 addcc c_2,t_1,c_2
866 rd %y,t_2 !=
867 addxcc c_3,t_2,c_3
868 addx c_1,%g0,c_1
869 st c_2,rp(10) !r[10]=c2;
870
871 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
872 addcc c_3,t_1,c_3
873 rd %y,t_2
874 addxcc c_1,t_2,c_1
875 addx %g0,%g0,c_2 !=
876 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
877 addcc c_3,t_1,c_3
878 rd %y,t_2
879 addxcc c_1,t_2,c_1 !=
880 addx c_2,%g0,c_2
881 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
882 addcc c_3,t_1,c_3
883 rd %y,t_2 !=
884 addxcc c_1,t_2,c_1
885 addx c_2,%g0,c_2
886 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
887 addcc c_3,t_1,c_3 !=
888 rd %y,t_2
889 addxcc c_1,t_2,c_1
890 st c_3,rp(11) !r[11]=c3;
891 addx c_2,%g0,c_2 !=
892
893 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
894 addcc c_1,t_1,c_1
895 rd %y,t_2
896 addxcc c_2,t_2,c_2 !=
897 addx %g0,%g0,c_3
898 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
899 addcc c_1,t_1,c_1
900 rd %y,t_2 !=
901 addxcc c_2,t_2,c_2
902 addx c_3,%g0,c_3
903 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
904 addcc c_1,t_1,c_1 !=
905 rd %y,t_2
906 addxcc c_2,t_2,c_2
907 st c_1,rp(12) !r[12]=c1;
908 addx c_3,%g0,c_3 !=
909
910 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
911 addcc c_2,t_1,c_2
912 rd %y,t_2
913 addxcc c_3,t_2,c_3 !=
914 addx %g0,%g0,c_1
915 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
916 addcc c_2,t_1,c_2
917 rd %y,t_2 !=
918 addxcc c_3,t_2,c_3
919 addx c_1,%g0,c_1
920 st c_2,rp(13) !r[13]=c2;
921
922 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
923 addcc c_3,t_1,c_3
924 rd %y,t_2
925 addxcc c_1,t_2,c_1
926 nop !=
927 st c_3,rp(14) !r[14]=c3;
928 st c_1,rp(15) !r[15]=c1;
929
930 ret
931 restore %g0,%g0,%o0
932
933.type bn_mul_comba8,#function
934.size bn_mul_comba8,(.-bn_mul_comba8)
935
936.align 32
937
938.global bn_mul_comba4
939/*
940 * void bn_mul_comba4(r,a,b)
941 * BN_ULONG *r,*a,*b;
942 */
943bn_mul_comba4:
944 save %sp,FRAME_SIZE,%sp
945 ld ap(0),a_0
946 ld bp(0),b_0
947 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
948 ld bp(1),b_1
949 rd %y,c_2
950 st c_1,rp(0) !r[0]=c1;
951
952 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
953 ld ap(1),a_1
954 addcc c_2,t_1,c_2
955 rd %y,t_2 !=
956 addxcc %g0,t_2,c_3
957 addx %g0,%g0,c_1
958 ld ap(2),a_2
959 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
960 addcc c_2,t_1,c_2
961 rd %y,t_2
962 addxcc c_3,t_2,c_3
963 addx c_1,%g0,c_1 !=
964 st c_2,rp(1) !r[1]=c2;
965
966 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
967 addcc c_3,t_1,c_3
968 rd %y,t_2 !=
969 addxcc c_1,t_2,c_1
970 addx %g0,%g0,c_2
971 ld bp(2),b_2
972 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
973 addcc c_3,t_1,c_3
974 rd %y,t_2
975 addxcc c_1,t_2,c_1
976 addx c_2,%g0,c_2 !=
977 ld bp(3),b_3
978 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
979 addcc c_3,t_1,c_3
980 rd %y,t_2 !=
981 addxcc c_1,t_2,c_1
982 addx c_2,%g0,c_2
983 st c_3,rp(2) !r[2]=c3;
984
985 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
986 addcc c_1,t_1,c_1
987 rd %y,t_2
988 addxcc c_2,t_2,c_2
989 addx %g0,%g0,c_3 !=
990 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
991 addcc c_1,t_1,c_1
992 rd %y,t_2
993 addxcc c_2,t_2,c_2 !=
994 addx c_3,%g0,c_3
995 ld ap(3),a_3
996 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
997 addcc c_1,t_1,c_1 !=
998 rd %y,t_2
999 addxcc c_2,t_2,c_2
1000 addx c_3,%g0,c_3
1001 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1002 addcc c_1,t_1,c_1
1003 rd %y,t_2
1004 addxcc c_2,t_2,c_2
1005 addx c_3,%g0,c_3 !=
1006 st c_1,rp(3) !r[3]=c1;
1007
1008 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1009 addcc c_2,t_1,c_2
1010 rd %y,t_2 !=
1011 addxcc c_3,t_2,c_3
1012 addx %g0,%g0,c_1
1013 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1014 addcc c_2,t_1,c_2 !=
1015 rd %y,t_2
1016 addxcc c_3,t_2,c_3
1017 addx c_1,%g0,c_1
1018 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1019 addcc c_2,t_1,c_2
1020 rd %y,t_2
1021 addxcc c_3,t_2,c_3
1022 addx c_1,%g0,c_1 !=
1023 st c_2,rp(4) !r[4]=c2;
1024
1025 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1026 addcc c_3,t_1,c_3
1027 rd %y,t_2 !=
1028 addxcc c_1,t_2,c_1
1029 addx %g0,%g0,c_2
1030 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1031 addcc c_3,t_1,c_3 !=
1032 rd %y,t_2
1033 addxcc c_1,t_2,c_1
1034 st c_3,rp(5) !r[5]=c3;
1035 addx c_2,%g0,c_2 !=
1036
1037 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1038 addcc c_1,t_1,c_1
1039 rd %y,t_2
1040 addxcc c_2,t_2,c_2 !=
1041 st c_1,rp(6) !r[6]=c1;
1042 st c_2,rp(7) !r[7]=c2;
1043
1044 ret
1045 restore %g0,%g0,%o0
1046
1047.type bn_mul_comba4,#function
1048.size bn_mul_comba4,(.-bn_mul_comba4)
1049
1050.align 32
1051
1052.global bn_sqr_comba8
1053bn_sqr_comba8:
1054 save %sp,FRAME_SIZE,%sp
1055 ld ap(0),a_0
1056 ld ap(1),a_1
1057 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1058 rd %y,c_2
1059 st c_1,rp(0) !r[0]=c1;
1060
1061 ld ap(2),a_2
1062 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1063 addcc c_2,t_1,c_2
1064 rd %y,t_2
1065 addxcc %g0,t_2,c_3
1066 addx %g0,%g0,c_1 !=
1067 addcc c_2,t_1,c_2
1068 addxcc c_3,t_2,c_3
1069 st c_2,rp(1) !r[1]=c2;
1070 addx c_1,%g0,c_1 !=
1071
1072 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1073 addcc c_3,t_1,c_3
1074 rd %y,t_2
1075 addxcc c_1,t_2,c_1 !=
1076 addx %g0,%g0,c_2
1077 addcc c_3,t_1,c_3
1078 addxcc c_1,t_2,c_1
1079 addx c_2,%g0,c_2 !=
1080 ld ap(3),a_3
1081 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1082 addcc c_3,t_1,c_3
1083 rd %y,t_2 !=
1084 addxcc c_1,t_2,c_1
1085 addx c_2,%g0,c_2
1086 st c_3,rp(2) !r[2]=c3;
1087
1088 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1089 addcc c_1,t_1,c_1
1090 rd %y,t_2
1091 addxcc c_2,t_2,c_2
1092 addx %g0,%g0,c_3 !=
1093 addcc c_1,t_1,c_1
1094 addxcc c_2,t_2,c_2
1095 ld ap(4),a_4
1096 addx c_3,%g0,c_3 !=
1097 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1098 addcc c_1,t_1,c_1
1099 rd %y,t_2
1100 addxcc c_2,t_2,c_2 !=
1101 addx c_3,%g0,c_3
1102 addcc c_1,t_1,c_1
1103 addxcc c_2,t_2,c_2
1104 addx c_3,%g0,c_3 !=
1105 st c_1,rp(3) !r[3]=c1;
1106
1107 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1108 addcc c_2,t_1,c_2
1109 rd %y,t_2 !=
1110 addxcc c_3,t_2,c_3
1111 addx %g0,%g0,c_1
1112 addcc c_2,t_1,c_2
1113 addxcc c_3,t_2,c_3 !=
1114 addx c_1,%g0,c_1
1115 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1116 addcc c_2,t_1,c_2
1117 rd %y,t_2 !=
1118 addxcc c_3,t_2,c_3
1119 addx c_1,%g0,c_1
1120 addcc c_2,t_1,c_2
1121 addxcc c_3,t_2,c_3 !=
1122 addx c_1,%g0,c_1
1123 ld ap(5),a_5
1124 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1125 addcc c_2,t_1,c_2 !=
1126 rd %y,t_2
1127 addxcc c_3,t_2,c_3
1128 st c_2,rp(4) !r[4]=c2;
1129 addx c_1,%g0,c_1 !=
1130
1131 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1132 addcc c_3,t_1,c_3
1133 rd %y,t_2
1134 addxcc c_1,t_2,c_1 !=
1135 addx %g0,%g0,c_2
1136 addcc c_3,t_1,c_3
1137 addxcc c_1,t_2,c_1
1138 addx c_2,%g0,c_2 !=
1139 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1140 addcc c_3,t_1,c_3
1141 rd %y,t_2
1142 addxcc c_1,t_2,c_1 !=
1143 addx c_2,%g0,c_2
1144 addcc c_3,t_1,c_3
1145 addxcc c_1,t_2,c_1
1146 addx c_2,%g0,c_2 !=
1147 ld ap(6),a_6
1148 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1149 addcc c_3,t_1,c_3
1150 rd %y,t_2 !=
1151 addxcc c_1,t_2,c_1
1152 addx c_2,%g0,c_2
1153 addcc c_3,t_1,c_3
1154 addxcc c_1,t_2,c_1 !=
1155 addx c_2,%g0,c_2
1156 st c_3,rp(5) !r[5]=c3;
1157
1158 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1159 addcc c_1,t_1,c_1 !=
1160 rd %y,t_2
1161 addxcc c_2,t_2,c_2
1162 addx %g0,%g0,c_3
1163 addcc c_1,t_1,c_1 !=
1164 addxcc c_2,t_2,c_2
1165 addx c_3,%g0,c_3
1166 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1167 addcc c_1,t_1,c_1 !=
1168 rd %y,t_2
1169 addxcc c_2,t_2,c_2
1170 addx c_3,%g0,c_3
1171 addcc c_1,t_1,c_1 !=
1172 addxcc c_2,t_2,c_2
1173 addx c_3,%g0,c_3
1174 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1175 addcc c_1,t_1,c_1 !=
1176 rd %y,t_2
1177 addxcc c_2,t_2,c_2
1178 addx c_3,%g0,c_3
1179 addcc c_1,t_1,c_1 !=
1180 addxcc c_2,t_2,c_2
1181 addx c_3,%g0,c_3
1182 ld ap(7),a_7
1183 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1184 addcc c_1,t_1,c_1
1185 rd %y,t_2
1186 addxcc c_2,t_2,c_2
1187 addx c_3,%g0,c_3 !=
1188 st c_1,rp(6) !r[6]=c1;
1189
1190 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1191 addcc c_2,t_1,c_2
1192 rd %y,t_2 !=
1193 addxcc c_3,t_2,c_3
1194 addx %g0,%g0,c_1
1195 addcc c_2,t_1,c_2
1196 addxcc c_3,t_2,c_3 !=
1197 addx c_1,%g0,c_1
1198 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1199 addcc c_2,t_1,c_2
1200 rd %y,t_2 !=
1201 addxcc c_3,t_2,c_3
1202 addx c_1,%g0,c_1
1203 addcc c_2,t_1,c_2
1204 addxcc c_3,t_2,c_3 !=
1205 addx c_1,%g0,c_1
1206 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1207 addcc c_2,t_1,c_2
1208 rd %y,t_2 !=
1209 addxcc c_3,t_2,c_3
1210 addx c_1,%g0,c_1
1211 addcc c_2,t_1,c_2
1212 addxcc c_3,t_2,c_3 !=
1213 addx c_1,%g0,c_1
1214 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1215 addcc c_2,t_1,c_2
1216 rd %y,t_2 !=
1217 addxcc c_3,t_2,c_3
1218 addx c_1,%g0,c_1
1219 addcc c_2,t_1,c_2
1220 addxcc c_3,t_2,c_3 !=
1221 addx c_1,%g0,c_1
1222 st c_2,rp(7) !r[7]=c2;
1223
1224 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1225 addcc c_3,t_1,c_3 !=
1226 rd %y,t_2
1227 addxcc c_1,t_2,c_1
1228 addx %g0,%g0,c_2
1229 addcc c_3,t_1,c_3 !=
1230 addxcc c_1,t_2,c_1
1231 addx c_2,%g0,c_2
1232 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1233 addcc c_3,t_1,c_3 !=
1234 rd %y,t_2
1235 addxcc c_1,t_2,c_1
1236 addx c_2,%g0,c_2
1237 addcc c_3,t_1,c_3 !=
1238 addxcc c_1,t_2,c_1
1239 addx c_2,%g0,c_2
1240 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1241 addcc c_3,t_1,c_3 !=
1242 rd %y,t_2
1243 addxcc c_1,t_2,c_1
1244 addx c_2,%g0,c_2
1245 addcc c_3,t_1,c_3 !=
1246 addxcc c_1,t_2,c_1
1247 addx c_2,%g0,c_2
1248 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1249 addcc c_3,t_1,c_3 !=
1250 rd %y,t_2
1251 addxcc c_1,t_2,c_1
1252 st c_3,rp(8) !r[8]=c3;
1253 addx c_2,%g0,c_2 !=
1254
1255 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1256 addcc c_1,t_1,c_1
1257 rd %y,t_2
1258 addxcc c_2,t_2,c_2 !=
1259 addx %g0,%g0,c_3
1260 addcc c_1,t_1,c_1
1261 addxcc c_2,t_2,c_2
1262 addx c_3,%g0,c_3 !=
1263 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1264 addcc c_1,t_1,c_1
1265 rd %y,t_2
1266 addxcc c_2,t_2,c_2 !=
1267 addx c_3,%g0,c_3
1268 addcc c_1,t_1,c_1
1269 addxcc c_2,t_2,c_2
1270 addx c_3,%g0,c_3 !=
1271 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1272 addcc c_1,t_1,c_1
1273 rd %y,t_2
1274 addxcc c_2,t_2,c_2 !=
1275 addx c_3,%g0,c_3
1276 addcc c_1,t_1,c_1
1277 addxcc c_2,t_2,c_2
1278 addx c_3,%g0,c_3 !=
1279 st c_1,rp(9) !r[9]=c1;
1280
1281 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1282 addcc c_2,t_1,c_2
1283 rd %y,t_2 !=
1284 addxcc c_3,t_2,c_3
1285 addx %g0,%g0,c_1
1286 addcc c_2,t_1,c_2
1287 addxcc c_3,t_2,c_3 !=
1288 addx c_1,%g0,c_1
1289 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1290 addcc c_2,t_1,c_2
1291 rd %y,t_2 !=
1292 addxcc c_3,t_2,c_3
1293 addx c_1,%g0,c_1
1294 addcc c_2,t_1,c_2
1295 addxcc c_3,t_2,c_3 !=
1296 addx c_1,%g0,c_1
1297 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1298 addcc c_2,t_1,c_2
1299 rd %y,t_2 !=
1300 addxcc c_3,t_2,c_3
1301 addx c_1,%g0,c_1
1302 st c_2,rp(10) !r[10]=c2;
1303
1304 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1305 addcc c_3,t_1,c_3
1306 rd %y,t_2
1307 addxcc c_1,t_2,c_1
1308 addx %g0,%g0,c_2 !=
1309 addcc c_3,t_1,c_3
1310 addxcc c_1,t_2,c_1
1311 addx c_2,%g0,c_2
1312 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1313 addcc c_3,t_1,c_3
1314 rd %y,t_2
1315 addxcc c_1,t_2,c_1
1316 addx c_2,%g0,c_2 !=
1317 addcc c_3,t_1,c_3
1318 addxcc c_1,t_2,c_1
1319 st c_3,rp(11) !r[11]=c3;
1320 addx c_2,%g0,c_2 !=
1321
1322 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1323 addcc c_1,t_1,c_1
1324 rd %y,t_2
1325 addxcc c_2,t_2,c_2 !=
1326 addx %g0,%g0,c_3
1327 addcc c_1,t_1,c_1
1328 addxcc c_2,t_2,c_2
1329 addx c_3,%g0,c_3 !=
1330 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1331 addcc c_1,t_1,c_1
1332 rd %y,t_2
1333 addxcc c_2,t_2,c_2 !=
1334 addx c_3,%g0,c_3
1335 st c_1,rp(12) !r[12]=c1;
1336
1337 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1338 addcc c_2,t_1,c_2 !=
1339 rd %y,t_2
1340 addxcc c_3,t_2,c_3
1341 addx %g0,%g0,c_1
1342 addcc c_2,t_1,c_2 !=
1343 addxcc c_3,t_2,c_3
1344 st c_2,rp(13) !r[13]=c2;
1345 addx c_1,%g0,c_1 !=
1346
1347 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1348 addcc c_3,t_1,c_3
1349 rd %y,t_2
1350 addxcc c_1,t_2,c_1 !=
1351 st c_3,rp(14) !r[14]=c3;
1352 st c_1,rp(15) !r[15]=c1;
1353
1354 ret
1355 restore %g0,%g0,%o0
1356
1357.type bn_sqr_comba8,#function
1358.size bn_sqr_comba8,(.-bn_sqr_comba8)
1359
1360.align 32
1361
1362.global bn_sqr_comba4
1363/*
1364 * void bn_sqr_comba4(r,a)
1365 * BN_ULONG *r,*a;
1366 */
1367bn_sqr_comba4:
1368 save %sp,FRAME_SIZE,%sp
1369 ld ap(0),a_0
1370 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1371 ld ap(1),a_1 !=
1372 rd %y,c_2
1373 st c_1,rp(0) !r[0]=c1;
1374
1375 ld ap(2),a_2
1376 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1377 addcc c_2,t_1,c_2
1378 rd %y,t_2
1379 addxcc %g0,t_2,c_3
1380 addx %g0,%g0,c_1 !=
1381 addcc c_2,t_1,c_2
1382 addxcc c_3,t_2,c_3
1383 addx c_1,%g0,c_1 !=
1384 st c_2,rp(1) !r[1]=c2;
1385
1386 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1387 addcc c_3,t_1,c_3
1388 rd %y,t_2 !=
1389 addxcc c_1,t_2,c_1
1390 addx %g0,%g0,c_2
1391 addcc c_3,t_1,c_3
1392 addxcc c_1,t_2,c_1 !=
1393 addx c_2,%g0,c_2
1394 ld ap(3),a_3
1395 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1396 addcc c_3,t_1,c_3 !=
1397 rd %y,t_2
1398 addxcc c_1,t_2,c_1
1399 st c_3,rp(2) !r[2]=c3;
1400 addx c_2,%g0,c_2 !=
1401
1402 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1403 addcc c_1,t_1,c_1
1404 rd %y,t_2
1405 addxcc c_2,t_2,c_2 !=
1406 addx %g0,%g0,c_3
1407 addcc c_1,t_1,c_1
1408 addxcc c_2,t_2,c_2
1409 addx c_3,%g0,c_3 !=
1410 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1411 addcc c_1,t_1,c_1
1412 rd %y,t_2
1413 addxcc c_2,t_2,c_2 !=
1414 addx c_3,%g0,c_3
1415 addcc c_1,t_1,c_1
1416 addxcc c_2,t_2,c_2
1417 addx c_3,%g0,c_3 !=
1418 st c_1,rp(3) !r[3]=c1;
1419
1420 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1421 addcc c_2,t_1,c_2
1422 rd %y,t_2 !=
1423 addxcc c_3,t_2,c_3
1424 addx %g0,%g0,c_1
1425 addcc c_2,t_1,c_2
1426 addxcc c_3,t_2,c_3 !=
1427 addx c_1,%g0,c_1
1428 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1429 addcc c_2,t_1,c_2
1430 rd %y,t_2 !=
1431 addxcc c_3,t_2,c_3
1432 addx c_1,%g0,c_1
1433 st c_2,rp(4) !r[4]=c2;
1434
1435 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1436 addcc c_3,t_1,c_3
1437 rd %y,t_2
1438 addxcc c_1,t_2,c_1
1439 addx %g0,%g0,c_2 !=
1440 addcc c_3,t_1,c_3
1441 addxcc c_1,t_2,c_1
1442 st c_3,rp(5) !r[5]=c3;
1443 addx c_2,%g0,c_2 !=
1444
1445 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1446 addcc c_1,t_1,c_1
1447 rd %y,t_2
1448 addxcc c_2,t_2,c_2 !=
1449 st c_1,rp(6) !r[6]=c1;
1450 st c_2,rp(7) !r[7]=c2;
1451
1452 ret
1453 restore %g0,%g0,%o0
1454
1455.type bn_sqr_comba4,#function
1456.size bn_sqr_comba4,(.-bn_sqr_comba4)
1457
1458.align 32
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
deleted file mode 100644
index 0074dfdb75..0000000000
--- a/src/lib/libcrypto/bn/asm/sparcv8plus.S
+++ /dev/null
@@ -1,1535 +0,0 @@
1.ident "sparcv8plus.s, Version 1.4"
2.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
3
4/*
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7 * project.
8 *
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
11 * disclaimed.
12 * ====================================================================
13 */
14
15/*
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
20 *
21 * Questions-n-answers.
22 *
23 * Q. How to compile?
24 * A. With SC4.x/SC5.x:
25 *
26 * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
27 *
28 * and with gcc:
29 *
30 * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o
31 *
32 * or if above fails (it does if you have gas installed):
33 *
34 * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o
35 *
36 * Quick-n-dirty way to fuse the module into the library.
37 * Provided that the library is already configured and built
38 * (in 0.9.2 case with no-asm option):
39 *
40 * # cd crypto/bn
41 * # cp /some/place/bn_asm.sparc.v8plus.S .
42 * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o
43 * # make
44 * # cd ../..
45 * # make; make test
46 *
47 * Quick-n-dirty way to get rid of it:
48 *
49 * # cd crypto/bn
50 * # touch bn_asm.c
51 * # make
52 * # cd ../..
53 * # make; make test
54 *
55 * Q. V8plus achitecture? What kind of beast is that?
56 * A. Well, it's rather a programming model than an architecture...
57 * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
58 * special conditions, namely when kernel doesn't preserve upper
59 * 32 bits of otherwise 64-bit registers during a context switch.
60 *
61 * Q. Why just UltraSPARC? What about SuperSPARC?
62 * A. Original release did target UltraSPARC only. Now SuperSPARC
63 * version is provided along. Both version share bn_*comba[48]
64 * implementations (see comment later in code for explanation).
65 * But what's so special about this UltraSPARC implementation?
66 * Why didn't I let compiler do the job? Trouble is that most of
67 * available compilers (well, SC5.0 is the only exception) don't
68 * attempt to take advantage of UltraSPARC's 64-bitness under
69 * 32-bit kernels even though it's perfectly possible (see next
70 * question).
71 *
72 * Q. 64-bit registers under 32-bit kernels? Didn't you just say it
73 * doesn't work?
74 * A. You can't adress *all* registers as 64-bit wide:-( The catch is
75 * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
76 * preserved if you're in a leaf function, i.e. such never calling
77 * any other functions. All functions in this module are leaf and
78 * 10 registers is a handful. And as a matter of fact none-"comba"
79 * routines don't require even that much and I could even afford to
80 * not allocate own stack frame for 'em:-)
81 *
82 * Q. What about 64-bit kernels?
83 * A. What about 'em? Just kidding:-) Pure 64-bit version is currently
84 * under evaluation and development...
85 *
86 * Q. What about shared libraries?
87 * A. What about 'em? Kidding again:-) Code does *not* contain any
88 * code position dependencies and it's safe to include it into
89 * shared library as is.
90 *
91 * Q. How much faster does it go?
92 * A. Do you have a good benchmark? In either case below is what I
93 * experience with crypto/bn/expspeed.c test program:
94 *
95 * v8plus module on U10/300MHz against bn_asm.c compiled with:
96 *
97 * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12%
98 * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35%
99 * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45%
100 *
101 * v8 module on SS10/60MHz against bn_asm.c compiled with:
102 *
103 * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10%
104 * cc-4.2 -xarch=v8 -xO5 -xdepend +10%
105 * egcs-1.1.2 -mv8 -O3 +35-45%
106 *
107 * As you can see it's damn hard to beat the new Sun C compiler
108 * and it's in first place GNU C users who will appreciate this
109 * assembler implementation:-)
110 */
111
112/*
113 * Revision history.
114 *
115 * 1.0 - initial release;
116 * 1.1 - new loop unrolling model(*);
117 * - some more fine tuning;
118 * 1.2 - made gas friendly;
119 * - updates to documentation concerning v9;
120 * - new performance comparison matrix;
121 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
122 * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient)
123 * resulting in slight overall performance kick;
124 * - some retunes;
125 * - support for GNU as added;
126 *
127 * (*) Originally unrolled loop looked like this:
128 * for (;;) {
129 * op(p+0); if (--n==0) break;
130 * op(p+1); if (--n==0) break;
131 * op(p+2); if (--n==0) break;
132 * op(p+3); if (--n==0) break;
133 * p+=4;
134 * }
135 * I unroll according to following:
136 * while (n&~3) {
137 * op(p+0); op(p+1); op(p+2); op(p+3);
138 * p+=4; n=-4;
139 * }
140 * if (n) {
141 * op(p+0); if (--n==0) return;
142 * op(p+2); if (--n==0) return;
143 * op(p+3); return;
144 * }
145 */
146
147/*
148 * GNU assembler can't stand stuw:-(
149 */
150#define stuw st
151
152.section ".text",#alloc,#execinstr
153.file "bn_asm.sparc.v8plus.S"
154
155.align 32
156
157.global bn_mul_add_words
158/*
159 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
160 * BN_ULONG *rp,*ap;
161 * int num;
162 * BN_ULONG w;
163 */
164bn_mul_add_words:
165 brgz,a %o2,.L_bn_mul_add_words_proceed
166 lduw [%o1],%g2
167 retl
168 clr %o0
169
170.L_bn_mul_add_words_proceed:
171 srl %o3,%g0,%o3 ! clruw %o3
172 andcc %o2,-4,%g0
173 bz,pn %icc,.L_bn_mul_add_words_tail
174 clr %o5
175
176.L_bn_mul_add_words_loop: ! wow! 32 aligned!
177 lduw [%o0],%g1
178 lduw [%o1+4],%g3
179 mulx %o3,%g2,%g2
180 add %g1,%o5,%o4
181 nop
182 add %o4,%g2,%o4
183 stuw %o4,[%o0]
184 srlx %o4,32,%o5
185
186 lduw [%o0+4],%g1
187 lduw [%o1+8],%g2
188 mulx %o3,%g3,%g3
189 add %g1,%o5,%o4
190 dec 4,%o2
191 add %o4,%g3,%o4
192 stuw %o4,[%o0+4]
193 srlx %o4,32,%o5
194
195 lduw [%o0+8],%g1
196 lduw [%o1+12],%g3
197 mulx %o3,%g2,%g2
198 add %g1,%o5,%o4
199 inc 16,%o1
200 add %o4,%g2,%o4
201 stuw %o4,[%o0+8]
202 srlx %o4,32,%o5
203
204 lduw [%o0+12],%g1
205 mulx %o3,%g3,%g3
206 add %g1,%o5,%o4
207 inc 16,%o0
208 add %o4,%g3,%o4
209 andcc %o2,-4,%g0
210 stuw %o4,[%o0-4]
211 srlx %o4,32,%o5
212 bnz,a,pt %icc,.L_bn_mul_add_words_loop
213 lduw [%o1],%g2
214
215 brnz,a,pn %o2,.L_bn_mul_add_words_tail
216 lduw [%o1],%g2
217.L_bn_mul_add_words_return:
218 retl
219 mov %o5,%o0
220
221.L_bn_mul_add_words_tail:
222 lduw [%o0],%g1
223 mulx %o3,%g2,%g2
224 add %g1,%o5,%o4
225 dec %o2
226 add %o4,%g2,%o4
227 srlx %o4,32,%o5
228 brz,pt %o2,.L_bn_mul_add_words_return
229 stuw %o4,[%o0]
230
231 lduw [%o1+4],%g2
232 lduw [%o0+4],%g1
233 mulx %o3,%g2,%g2
234 add %g1,%o5,%o4
235 dec %o2
236 add %o4,%g2,%o4
237 srlx %o4,32,%o5
238 brz,pt %o2,.L_bn_mul_add_words_return
239 stuw %o4,[%o0+4]
240
241 lduw [%o1+8],%g2
242 lduw [%o0+8],%g1
243 mulx %o3,%g2,%g2
244 add %g1,%o5,%o4
245 add %o4,%g2,%o4
246 stuw %o4,[%o0+8]
247 retl
248 srlx %o4,32,%o0
249
250.type bn_mul_add_words,#function
251.size bn_mul_add_words,(.-bn_mul_add_words)
252
253.align 32
254
255.global bn_mul_words
256/*
257 * BN_ULONG bn_mul_words(rp,ap,num,w)
258 * BN_ULONG *rp,*ap;
259 * int num;
260 * BN_ULONG w;
261 */
262bn_mul_words:
263 brgz,a %o2,.L_bn_mul_words_proceeed
264 lduw [%o1],%g2
265 retl
266 clr %o0
267
268.L_bn_mul_words_proceeed:
269 srl %o3,%g0,%o3 ! clruw %o3
270 andcc %o2,-4,%g0
271 bz,pn %icc,.L_bn_mul_words_tail
272 clr %o5
273
274.L_bn_mul_words_loop: ! wow! 32 aligned!
275 lduw [%o1+4],%g3
276 mulx %o3,%g2,%g2
277 add %g2,%o5,%o4
278 nop
279 stuw %o4,[%o0]
280 srlx %o4,32,%o5
281
282 lduw [%o1+8],%g2
283 mulx %o3,%g3,%g3
284 add %g3,%o5,%o4
285 dec 4,%o2
286 stuw %o4,[%o0+4]
287 srlx %o4,32,%o5
288
289 lduw [%o1+12],%g3
290 mulx %o3,%g2,%g2
291 add %g2,%o5,%o4
292 inc 16,%o1
293 stuw %o4,[%o0+8]
294 srlx %o4,32,%o5
295
296 mulx %o3,%g3,%g3
297 add %g3,%o5,%o4
298 inc 16,%o0
299 stuw %o4,[%o0-4]
300 srlx %o4,32,%o5
301 andcc %o2,-4,%g0
302 bnz,a,pt %icc,.L_bn_mul_words_loop
303 lduw [%o1],%g2
304 nop
305 nop
306
307 brnz,a,pn %o2,.L_bn_mul_words_tail
308 lduw [%o1],%g2
309.L_bn_mul_words_return:
310 retl
311 mov %o5,%o0
312
313.L_bn_mul_words_tail:
314 mulx %o3,%g2,%g2
315 add %g2,%o5,%o4
316 dec %o2
317 srlx %o4,32,%o5
318 brz,pt %o2,.L_bn_mul_words_return
319 stuw %o4,[%o0]
320
321 lduw [%o1+4],%g2
322 mulx %o3,%g2,%g2
323 add %g2,%o5,%o4
324 dec %o2
325 srlx %o4,32,%o5
326 brz,pt %o2,.L_bn_mul_words_return
327 stuw %o4,[%o0+4]
328
329 lduw [%o1+8],%g2
330 mulx %o3,%g2,%g2
331 add %g2,%o5,%o4
332 stuw %o4,[%o0+8]
333 retl
334 srlx %o4,32,%o0
335
336.type bn_mul_words,#function
337.size bn_mul_words,(.-bn_mul_words)
338
339.align 32
340.global bn_sqr_words
341/*
342 * void bn_sqr_words(r,a,n)
343 * BN_ULONG *r,*a;
344 * int n;
345 */
346bn_sqr_words:
347 brgz,a %o2,.L_bn_sqr_words_proceeed
348 lduw [%o1],%g2
349 retl
350 clr %o0
351
352.L_bn_sqr_words_proceeed:
353 andcc %o2,-4,%g0
354 nop
355 bz,pn %icc,.L_bn_sqr_words_tail
356 nop
357
358.L_bn_sqr_words_loop: ! wow! 32 aligned!
359 lduw [%o1+4],%g3
360 mulx %g2,%g2,%o4
361 stuw %o4,[%o0]
362 srlx %o4,32,%o5
363 stuw %o5,[%o0+4]
364 nop
365
366 lduw [%o1+8],%g2
367 mulx %g3,%g3,%o4
368 dec 4,%o2
369 stuw %o4,[%o0+8]
370 srlx %o4,32,%o5
371 stuw %o5,[%o0+12]
372
373 lduw [%o1+12],%g3
374 mulx %g2,%g2,%o4
375 srlx %o4,32,%o5
376 stuw %o4,[%o0+16]
377 inc 16,%o1
378 stuw %o5,[%o0+20]
379
380 mulx %g3,%g3,%o4
381 inc 32,%o0
382 stuw %o4,[%o0-8]
383 srlx %o4,32,%o5
384 andcc %o2,-4,%g2
385 stuw %o5,[%o0-4]
386 bnz,a,pt %icc,.L_bn_sqr_words_loop
387 lduw [%o1],%g2
388 nop
389
390 brnz,a,pn %o2,.L_bn_sqr_words_tail
391 lduw [%o1],%g2
392.L_bn_sqr_words_return:
393 retl
394 clr %o0
395
396.L_bn_sqr_words_tail:
397 mulx %g2,%g2,%o4
398 dec %o2
399 stuw %o4,[%o0]
400 srlx %o4,32,%o5
401 brz,pt %o2,.L_bn_sqr_words_return
402 stuw %o5,[%o0+4]
403
404 lduw [%o1+4],%g2
405 mulx %g2,%g2,%o4
406 dec %o2
407 stuw %o4,[%o0+8]
408 srlx %o4,32,%o5
409 brz,pt %o2,.L_bn_sqr_words_return
410 stuw %o5,[%o0+12]
411
412 lduw [%o1+8],%g2
413 mulx %g2,%g2,%o4
414 srlx %o4,32,%o5
415 stuw %o4,[%o0+16]
416 stuw %o5,[%o0+20]
417 retl
418 clr %o0
419
420.type bn_sqr_words,#function
421.size bn_sqr_words,(.-bn_sqr_words)
422
423.align 32
424.global bn_div_words
425/*
426 * BN_ULONG bn_div_words(h,l,d)
427 * BN_ULONG h,l,d;
428 */
429bn_div_words:
430 sllx %o0,32,%o0
431 or %o0,%o1,%o0
432 udivx %o0,%o2,%o0
433 retl
434 srl %o0,%g0,%o0 ! clruw %o0
435
436.type bn_div_words,#function
437.size bn_div_words,(.-bn_div_words)
438
439.align 32
440
441.global bn_add_words
442/*
443 * BN_ULONG bn_add_words(rp,ap,bp,n)
444 * BN_ULONG *rp,*ap,*bp;
445 * int n;
446 */
447bn_add_words:
448 brgz,a %o3,.L_bn_add_words_proceed
449 lduw [%o1],%o4
450 retl
451 clr %o0
452
453.L_bn_add_words_proceed:
454 andcc %o3,-4,%g0
455 bz,pn %icc,.L_bn_add_words_tail
456 addcc %g0,0,%g0 ! clear carry flag
457 nop
458
459.L_bn_add_words_loop: ! wow! 32 aligned!
460 dec 4,%o3
461 lduw [%o2],%o5
462 lduw [%o1+4],%g1
463 lduw [%o2+4],%g2
464 lduw [%o1+8],%g3
465 lduw [%o2+8],%g4
466 addccc %o5,%o4,%o5
467 stuw %o5,[%o0]
468
469 lduw [%o1+12],%o4
470 lduw [%o2+12],%o5
471 inc 16,%o1
472 addccc %g1,%g2,%g1
473 stuw %g1,[%o0+4]
474
475 inc 16,%o2
476 addccc %g3,%g4,%g3
477 stuw %g3,[%o0+8]
478
479 inc 16,%o0
480 addccc %o5,%o4,%o5
481 stuw %o5,[%o0-4]
482 and %o3,-4,%g1
483 brnz,a,pt %g1,.L_bn_add_words_loop
484 lduw [%o1],%o4
485
486 brnz,a,pn %o3,.L_bn_add_words_tail
487 lduw [%o1],%o4
488.L_bn_add_words_return:
489 clr %o0
490 retl
491 movcs %icc,1,%o0
492 nop
493
494.L_bn_add_words_tail:
495 lduw [%o2],%o5
496 dec %o3
497 addccc %o5,%o4,%o5
498 brz,pt %o3,.L_bn_add_words_return
499 stuw %o5,[%o0]
500
501 lduw [%o1+4],%o4
502 lduw [%o2+4],%o5
503 dec %o3
504 addccc %o5,%o4,%o5
505 brz,pt %o3,.L_bn_add_words_return
506 stuw %o5,[%o0+4]
507
508 lduw [%o1+8],%o4
509 lduw [%o2+8],%o5
510 addccc %o5,%o4,%o5
511 stuw %o5,[%o0+8]
512 clr %o0
513 retl
514 movcs %icc,1,%o0
515
516.type bn_add_words,#function
517.size bn_add_words,(.-bn_add_words)
518
519.global bn_sub_words
520/*
521 * BN_ULONG bn_sub_words(rp,ap,bp,n)
522 * BN_ULONG *rp,*ap,*bp;
523 * int n;
524 */
525bn_sub_words:
526 brgz,a %o3,.L_bn_sub_words_proceed
527 lduw [%o1],%o4
528 retl
529 clr %o0
530
531.L_bn_sub_words_proceed:
532 andcc %o3,-4,%g0
533 bz,pn %icc,.L_bn_sub_words_tail
534 addcc %g0,0,%g0 ! clear carry flag
535 nop
536
537.L_bn_sub_words_loop: ! wow! 32 aligned!
538 dec 4,%o3
539 lduw [%o2],%o5
540 lduw [%o1+4],%g1
541 lduw [%o2+4],%g2
542 lduw [%o1+8],%g3
543 lduw [%o2+8],%g4
544 subccc %o4,%o5,%o5
545 stuw %o5,[%o0]
546
547 lduw [%o1+12],%o4
548 lduw [%o2+12],%o5
549 inc 16,%o1
550 subccc %g1,%g2,%g2
551 stuw %g2,[%o0+4]
552
553 inc 16,%o2
554 subccc %g3,%g4,%g4
555 stuw %g4,[%o0+8]
556
557 inc 16,%o0
558 subccc %o4,%o5,%o5
559 stuw %o5,[%o0-4]
560 and %o3,-4,%g1
561 brnz,a,pt %g1,.L_bn_sub_words_loop
562 lduw [%o1],%o4
563
564 brnz,a,pn %o3,.L_bn_sub_words_tail
565 lduw [%o1],%o4
566.L_bn_sub_words_return:
567 clr %o0
568 retl
569 movcs %icc,1,%o0
570 nop
571
572.L_bn_sub_words_tail: ! wow! 32 aligned!
573 lduw [%o2],%o5
574 dec %o3
575 subccc %o4,%o5,%o5
576 brz,pt %o3,.L_bn_sub_words_return
577 stuw %o5,[%o0]
578
579 lduw [%o1+4],%o4
580 lduw [%o2+4],%o5
581 dec %o3
582 subccc %o4,%o5,%o5
583 brz,pt %o3,.L_bn_sub_words_return
584 stuw %o5,[%o0+4]
585
586 lduw [%o1+8],%o4
587 lduw [%o2+8],%o5
588 subccc %o4,%o5,%o5
589 stuw %o5,[%o0+8]
590 clr %o0
591 retl
592 movcs %icc,1,%o0
593
594.type bn_sub_words,#function
595.size bn_sub_words,(.-bn_sub_words)
596
597/*
598 * Code below depends on the fact that upper parts of the %l0-%l7
599 * and %i0-%i7 are zeroed by kernel after context switch. In
600 * previous versions this comment stated that "the trouble is that
601 * it's not feasible to implement the mumbo-jumbo in less V9
602 * instructions:-(" which apparently isn't true thanks to
603 * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement
604 * results not from the shorter code, but from elimination of
605 * multicycle none-pairable 'rd %y,%rd' instructions.
606 *
607 * Andy.
608 */
609
610#define FRAME_SIZE -96
611
612/*
613 * Here is register usage map for *all* routines below.
614 */
615#define t_1 %o0
616#define t_2 %o1
617#define c_12 %o2
618#define c_3 %o3
619
620#define ap(I) [%i1+4*I]
621#define bp(I) [%i2+4*I]
622#define rp(I) [%i0+4*I]
623
624#define a_0 %l0
625#define a_1 %l1
626#define a_2 %l2
627#define a_3 %l3
628#define a_4 %l4
629#define a_5 %l5
630#define a_6 %l6
631#define a_7 %l7
632
633#define b_0 %i3
634#define b_1 %i4
635#define b_2 %i5
636#define b_3 %o4
637#define b_4 %o5
638#define b_5 %o7
639#define b_6 %g1
640#define b_7 %g4
641
642.align 32
643.global bn_mul_comba8
644/*
645 * void bn_mul_comba8(r,a,b)
646 * BN_ULONG *r,*a,*b;
647 */
648bn_mul_comba8:
649 save %sp,FRAME_SIZE,%sp
650 mov 1,t_2
651 lduw ap(0),a_0
652 sllx t_2,32,t_2
653 lduw bp(0),b_0 !=
654 lduw bp(1),b_1
655 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
656 srlx t_1,32,c_12
657 stuw t_1,rp(0) !=!r[0]=c1;
658
659 lduw ap(1),a_1
660 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
661 addcc c_12,t_1,c_12
662 clr c_3 !=
663 bcs,a %xcc,.+8
664 add c_3,t_2,c_3
665 lduw ap(2),a_2
666 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
667 addcc c_12,t_1,t_1
668 bcs,a %xcc,.+8
669 add c_3,t_2,c_3
670 srlx t_1,32,c_12 !=
671 stuw t_1,rp(1) !r[1]=c2;
672 or c_12,c_3,c_12
673
674 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
675 addcc c_12,t_1,c_12 !=
676 clr c_3
677 bcs,a %xcc,.+8
678 add c_3,t_2,c_3
679 lduw bp(2),b_2 !=
680 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
681 addcc c_12,t_1,c_12
682 bcs,a %xcc,.+8
683 add c_3,t_2,c_3 !=
684 lduw bp(3),b_3
685 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
686 addcc c_12,t_1,t_1
687 bcs,a %xcc,.+8 !=
688 add c_3,t_2,c_3
689 srlx t_1,32,c_12
690 stuw t_1,rp(2) !r[2]=c3;
691 or c_12,c_3,c_12 !=
692
693 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
694 addcc c_12,t_1,c_12
695 clr c_3
696 bcs,a %xcc,.+8 !=
697 add c_3,t_2,c_3
698 mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
699 addcc c_12,t_1,c_12
700 bcs,a %xcc,.+8 !=
701 add c_3,t_2,c_3
702 lduw ap(3),a_3
703 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
704 addcc c_12,t_1,c_12 !=
705 bcs,a %xcc,.+8
706 add c_3,t_2,c_3
707 lduw ap(4),a_4
708 mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!=
709 addcc c_12,t_1,t_1
710 bcs,a %xcc,.+8
711 add c_3,t_2,c_3
712 srlx t_1,32,c_12 !=
713 stuw t_1,rp(3) !r[3]=c1;
714 or c_12,c_3,c_12
715
716 mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
717 addcc c_12,t_1,c_12 !=
718 clr c_3
719 bcs,a %xcc,.+8
720 add c_3,t_2,c_3
721 mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1);
722 addcc c_12,t_1,c_12
723 bcs,a %xcc,.+8
724 add c_3,t_2,c_3
725 mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
726 addcc c_12,t_1,c_12
727 bcs,a %xcc,.+8
728 add c_3,t_2,c_3
729 lduw bp(4),b_4 !=
730 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
731 addcc c_12,t_1,c_12
732 bcs,a %xcc,.+8
733 add c_3,t_2,c_3 !=
734 lduw bp(5),b_5
735 mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1);
736 addcc c_12,t_1,t_1
737 bcs,a %xcc,.+8 !=
738 add c_3,t_2,c_3
739 srlx t_1,32,c_12
740 stuw t_1,rp(4) !r[4]=c2;
741 or c_12,c_3,c_12 !=
742
743 mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
744 addcc c_12,t_1,c_12
745 clr c_3
746 bcs,a %xcc,.+8 !=
747 add c_3,t_2,c_3
748 mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
749 addcc c_12,t_1,c_12
750 bcs,a %xcc,.+8 !=
751 add c_3,t_2,c_3
752 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
753 addcc c_12,t_1,c_12
754 bcs,a %xcc,.+8 !=
755 add c_3,t_2,c_3
756 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
757 addcc c_12,t_1,c_12
758 bcs,a %xcc,.+8 !=
759 add c_3,t_2,c_3
760 lduw ap(5),a_5
761 mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
762 addcc c_12,t_1,c_12 !=
763 bcs,a %xcc,.+8
764 add c_3,t_2,c_3
765 lduw ap(6),a_6
766 mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2);
767 addcc c_12,t_1,t_1
768 bcs,a %xcc,.+8
769 add c_3,t_2,c_3
770 srlx t_1,32,c_12 !=
771 stuw t_1,rp(5) !r[5]=c3;
772 or c_12,c_3,c_12
773
774 mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
775 addcc c_12,t_1,c_12 !=
776 clr c_3
777 bcs,a %xcc,.+8
778 add c_3,t_2,c_3
779 mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
780 addcc c_12,t_1,c_12
781 bcs,a %xcc,.+8
782 add c_3,t_2,c_3
783 mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3);
784 addcc c_12,t_1,c_12
785 bcs,a %xcc,.+8
786 add c_3,t_2,c_3
787 mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3);
788 addcc c_12,t_1,c_12
789 bcs,a %xcc,.+8
790 add c_3,t_2,c_3
791 mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3);
792 addcc c_12,t_1,c_12
793 bcs,a %xcc,.+8
794 add c_3,t_2,c_3
795 lduw bp(6),b_6 !=
796 mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
797 addcc c_12,t_1,c_12
798 bcs,a %xcc,.+8
799 add c_3,t_2,c_3 !=
800 lduw bp(7),b_7
801 mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
802 addcc c_12,t_1,t_1
803 bcs,a %xcc,.+8 !=
804 add c_3,t_2,c_3
805 srlx t_1,32,c_12
806 stuw t_1,rp(6) !r[6]=c1;
807 or c_12,c_3,c_12 !=
808
809 mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
810 addcc c_12,t_1,c_12
811 clr c_3
812 bcs,a %xcc,.+8 !=
813 add c_3,t_2,c_3
814 mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
815 addcc c_12,t_1,c_12
816 bcs,a %xcc,.+8 !=
817 add c_3,t_2,c_3
818 mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
819 addcc c_12,t_1,c_12
820 bcs,a %xcc,.+8 !=
821 add c_3,t_2,c_3
822 mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1);
823 addcc c_12,t_1,c_12
824 bcs,a %xcc,.+8 !=
825 add c_3,t_2,c_3
826 mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
827 addcc c_12,t_1,c_12
828 bcs,a %xcc,.+8 !=
829 add c_3,t_2,c_3
830 mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
831 addcc c_12,t_1,c_12
832 bcs,a %xcc,.+8 !=
833 add c_3,t_2,c_3
834 lduw ap(7),a_7
835 mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
836 addcc c_12,t_1,c_12
837 bcs,a %xcc,.+8
838 add c_3,t_2,c_3
839 mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1);
840 addcc c_12,t_1,t_1
841 bcs,a %xcc,.+8
842 add c_3,t_2,c_3
843 srlx t_1,32,c_12 !=
844 stuw t_1,rp(7) !r[7]=c2;
845 or c_12,c_3,c_12
846
847 mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2);
848 addcc c_12,t_1,c_12
849 clr c_3
850 bcs,a %xcc,.+8
851 add c_3,t_2,c_3 !=
852 mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2);
853 addcc c_12,t_1,c_12
854 bcs,a %xcc,.+8
855 add c_3,t_2,c_3 !=
856 mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
857 addcc c_12,t_1,c_12
858 bcs,a %xcc,.+8
859 add c_3,t_2,c_3 !=
860 mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
861 addcc c_12,t_1,c_12
862 bcs,a %xcc,.+8
863 add c_3,t_2,c_3 !=
864 mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
865 addcc c_12,t_1,c_12
866 bcs,a %xcc,.+8
867 add c_3,t_2,c_3 !=
868 mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2);
869 addcc c_12,t_1,c_12
870 bcs,a %xcc,.+8
871 add c_3,t_2,c_3 !=
872 mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
873 addcc c_12,t_1,t_1
874 bcs,a %xcc,.+8
875 add c_3,t_2,c_3 !=
876 srlx t_1,32,c_12
877 stuw t_1,rp(8) !r[8]=c3;
878 or c_12,c_3,c_12
879
880 mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3);
881 addcc c_12,t_1,c_12
882 clr c_3
883 bcs,a %xcc,.+8
884 add c_3,t_2,c_3 !=
885 mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3);
886 addcc c_12,t_1,c_12
887 bcs,a %xcc,.+8 !=
888 add c_3,t_2,c_3
889 mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
890 addcc c_12,t_1,c_12
891 bcs,a %xcc,.+8 !=
892 add c_3,t_2,c_3
893 mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
894 addcc c_12,t_1,c_12
895 bcs,a %xcc,.+8 !=
896 add c_3,t_2,c_3
897 mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
898 addcc c_12,t_1,c_12
899 bcs,a %xcc,.+8 !=
900 add c_3,t_2,c_3
901 mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3);
902 addcc c_12,t_1,t_1
903 bcs,a %xcc,.+8 !=
904 add c_3,t_2,c_3
905 srlx t_1,32,c_12
906 stuw t_1,rp(9) !r[9]=c1;
907 or c_12,c_3,c_12 !=
908
909 mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
910 addcc c_12,t_1,c_12
911 clr c_3
912 bcs,a %xcc,.+8 !=
913 add c_3,t_2,c_3
914 mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
915 addcc c_12,t_1,c_12
916 bcs,a %xcc,.+8 !=
917 add c_3,t_2,c_3
918 mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1);
919 addcc c_12,t_1,c_12
920 bcs,a %xcc,.+8 !=
921 add c_3,t_2,c_3
922 mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
923 addcc c_12,t_1,c_12
924 bcs,a %xcc,.+8 !=
925 add c_3,t_2,c_3
926 mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
927 addcc c_12,t_1,t_1
928 bcs,a %xcc,.+8 !=
929 add c_3,t_2,c_3
930 srlx t_1,32,c_12
931 stuw t_1,rp(10) !r[10]=c2;
932 or c_12,c_3,c_12 !=
933
934 mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2);
935 addcc c_12,t_1,c_12
936 clr c_3
937 bcs,a %xcc,.+8 !=
938 add c_3,t_2,c_3
939 mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
940 addcc c_12,t_1,c_12
941 bcs,a %xcc,.+8 !=
942 add c_3,t_2,c_3
943 mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
944 addcc c_12,t_1,c_12
945 bcs,a %xcc,.+8 !=
946 add c_3,t_2,c_3
947 mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
948 addcc c_12,t_1,t_1
949 bcs,a %xcc,.+8 !=
950 add c_3,t_2,c_3
951 srlx t_1,32,c_12
952 stuw t_1,rp(11) !r[11]=c3;
953 or c_12,c_3,c_12 !=
954
955 mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
956 addcc c_12,t_1,c_12
957 clr c_3
958 bcs,a %xcc,.+8 !=
959 add c_3,t_2,c_3
960 mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
961 addcc c_12,t_1,c_12
962 bcs,a %xcc,.+8 !=
963 add c_3,t_2,c_3
964 mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
965 addcc c_12,t_1,t_1
966 bcs,a %xcc,.+8 !=
967 add c_3,t_2,c_3
968 srlx t_1,32,c_12
969 stuw t_1,rp(12) !r[12]=c1;
970 or c_12,c_3,c_12 !=
971
972 mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
973 addcc c_12,t_1,c_12
974 clr c_3
975 bcs,a %xcc,.+8 !=
976 add c_3,t_2,c_3
977 mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
978 addcc c_12,t_1,t_1
979 bcs,a %xcc,.+8 !=
980 add c_3,t_2,c_3
981 srlx t_1,32,c_12
982 st t_1,rp(13) !r[13]=c2;
983 or c_12,c_3,c_12 !=
984
985 mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2);
986 addcc c_12,t_1,t_1
987 srlx t_1,32,c_12 !=
988 stuw t_1,rp(14) !r[14]=c3;
989 stuw c_12,rp(15) !r[15]=c1;
990
991 ret
992 restore %g0,%g0,%o0 !=
993
994.type bn_mul_comba8,#function
995.size bn_mul_comba8,(.-bn_mul_comba8)
996
997.align 32
998
999.global bn_mul_comba4
1000/*
1001 * void bn_mul_comba4(r,a,b)
1002 * BN_ULONG *r,*a,*b;
1003 */
1004bn_mul_comba4:
1005 save %sp,FRAME_SIZE,%sp
1006 lduw ap(0),a_0
1007 mov 1,t_2
1008 lduw bp(0),b_0
1009 sllx t_2,32,t_2 !=
1010 lduw bp(1),b_1
1011 mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3);
1012 srlx t_1,32,c_12
1013 stuw t_1,rp(0) !=!r[0]=c1;
1014
1015 lduw ap(1),a_1
1016 mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1);
1017 addcc c_12,t_1,c_12
1018 clr c_3 !=
1019 bcs,a %xcc,.+8
1020 add c_3,t_2,c_3
1021 lduw ap(2),a_2
1022 mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
1023 addcc c_12,t_1,t_1
1024 bcs,a %xcc,.+8
1025 add c_3,t_2,c_3
1026 srlx t_1,32,c_12 !=
1027 stuw t_1,rp(1) !r[1]=c2;
1028 or c_12,c_3,c_12
1029
1030 mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1031 addcc c_12,t_1,c_12 !=
1032 clr c_3
1033 bcs,a %xcc,.+8
1034 add c_3,t_2,c_3
1035 lduw bp(2),b_2 !=
1036 mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
1037 addcc c_12,t_1,c_12
1038 bcs,a %xcc,.+8
1039 add c_3,t_2,c_3 !=
1040 lduw bp(3),b_3
1041 mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1042 addcc c_12,t_1,t_1
1043 bcs,a %xcc,.+8 !=
1044 add c_3,t_2,c_3
1045 srlx t_1,32,c_12
1046 stuw t_1,rp(2) !r[2]=c3;
1047 or c_12,c_3,c_12 !=
1048
1049 mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
1050 addcc c_12,t_1,c_12
1051 clr c_3
1052 bcs,a %xcc,.+8 !=
1053 add c_3,t_2,c_3
1054 mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1055 addcc c_12,t_1,c_12
1056 bcs,a %xcc,.+8 !=
1057 add c_3,t_2,c_3
1058 lduw ap(3),a_3
1059 mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1060 addcc c_12,t_1,c_12 !=
1061 bcs,a %xcc,.+8
1062 add c_3,t_2,c_3
1063 mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
1064 addcc c_12,t_1,t_1 !=
1065 bcs,a %xcc,.+8
1066 add c_3,t_2,c_3
1067 srlx t_1,32,c_12
1068 stuw t_1,rp(3) !=!r[3]=c1;
1069 or c_12,c_3,c_12
1070
1071 mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1072 addcc c_12,t_1,c_12
1073 clr c_3 !=
1074 bcs,a %xcc,.+8
1075 add c_3,t_2,c_3
1076 mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1077 addcc c_12,t_1,c_12 !=
1078 bcs,a %xcc,.+8
1079 add c_3,t_2,c_3
1080 mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
1081 addcc c_12,t_1,t_1 !=
1082 bcs,a %xcc,.+8
1083 add c_3,t_2,c_3
1084 srlx t_1,32,c_12
1085 stuw t_1,rp(4) !=!r[4]=c2;
1086 or c_12,c_3,c_12
1087
1088 mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1089 addcc c_12,t_1,c_12
1090 clr c_3 !=
1091 bcs,a %xcc,.+8
1092 add c_3,t_2,c_3
1093 mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1094 addcc c_12,t_1,t_1 !=
1095 bcs,a %xcc,.+8
1096 add c_3,t_2,c_3
1097 srlx t_1,32,c_12
1098 stuw t_1,rp(5) !=!r[5]=c3;
1099 or c_12,c_3,c_12
1100
1101 mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1102 addcc c_12,t_1,t_1
1103 srlx t_1,32,c_12 !=
1104 stuw t_1,rp(6) !r[6]=c1;
1105 stuw c_12,rp(7) !r[7]=c2;
1106
1107 ret
1108 restore %g0,%g0,%o0
1109
1110.type bn_mul_comba4,#function
1111.size bn_mul_comba4,(.-bn_mul_comba4)
1112
1113.align 32
1114
1115.global bn_sqr_comba8
1116bn_sqr_comba8:
1117 save %sp,FRAME_SIZE,%sp
1118 mov 1,t_2
1119 lduw ap(0),a_0
1120 sllx t_2,32,t_2
1121 lduw ap(1),a_1
1122 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1123 srlx t_1,32,c_12
1124 stuw t_1,rp(0) !r[0]=c1;
1125
1126 lduw ap(2),a_2
1127 mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1128 addcc c_12,t_1,c_12
1129 clr c_3
1130 bcs,a %xcc,.+8
1131 add c_3,t_2,c_3
1132 addcc c_12,t_1,t_1
1133 bcs,a %xcc,.+8
1134 add c_3,t_2,c_3
1135 srlx t_1,32,c_12
1136 stuw t_1,rp(1) !r[1]=c2;
1137 or c_12,c_3,c_12
1138
1139 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1140 addcc c_12,t_1,c_12
1141 clr c_3
1142 bcs,a %xcc,.+8
1143 add c_3,t_2,c_3
1144 addcc c_12,t_1,c_12
1145 bcs,a %xcc,.+8
1146 add c_3,t_2,c_3
1147 lduw ap(3),a_3
1148 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1149 addcc c_12,t_1,t_1
1150 bcs,a %xcc,.+8
1151 add c_3,t_2,c_3
1152 srlx t_1,32,c_12
1153 stuw t_1,rp(2) !r[2]=c3;
1154 or c_12,c_3,c_12
1155
1156 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1157 addcc c_12,t_1,c_12
1158 clr c_3
1159 bcs,a %xcc,.+8
1160 add c_3,t_2,c_3
1161 addcc c_12,t_1,c_12
1162 bcs,a %xcc,.+8
1163 add c_3,t_2,c_3
1164 lduw ap(4),a_4
1165 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1166 addcc c_12,t_1,c_12
1167 bcs,a %xcc,.+8
1168 add c_3,t_2,c_3
1169 addcc c_12,t_1,t_1
1170 bcs,a %xcc,.+8
1171 add c_3,t_2,c_3
1172 srlx t_1,32,c_12
1173 st t_1,rp(3) !r[3]=c1;
1174 or c_12,c_3,c_12
1175
1176 mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1177 addcc c_12,t_1,c_12
1178 clr c_3
1179 bcs,a %xcc,.+8
1180 add c_3,t_2,c_3
1181 addcc c_12,t_1,c_12
1182 bcs,a %xcc,.+8
1183 add c_3,t_2,c_3
1184 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1185 addcc c_12,t_1,c_12
1186 bcs,a %xcc,.+8
1187 add c_3,t_2,c_3
1188 addcc c_12,t_1,c_12
1189 bcs,a %xcc,.+8
1190 add c_3,t_2,c_3
1191 lduw ap(5),a_5
1192 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1193 addcc c_12,t_1,t_1
1194 bcs,a %xcc,.+8
1195 add c_3,t_2,c_3
1196 srlx t_1,32,c_12
1197 stuw t_1,rp(4) !r[4]=c2;
1198 or c_12,c_3,c_12
1199
1200 mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1201 addcc c_12,t_1,c_12
1202 clr c_3
1203 bcs,a %xcc,.+8
1204 add c_3,t_2,c_3
1205 addcc c_12,t_1,c_12
1206 bcs,a %xcc,.+8
1207 add c_3,t_2,c_3
1208 mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1209 addcc c_12,t_1,c_12
1210 bcs,a %xcc,.+8
1211 add c_3,t_2,c_3
1212 addcc c_12,t_1,c_12
1213 bcs,a %xcc,.+8
1214 add c_3,t_2,c_3
1215 lduw ap(6),a_6
1216 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1217 addcc c_12,t_1,c_12
1218 bcs,a %xcc,.+8
1219 add c_3,t_2,c_3
1220 addcc c_12,t_1,t_1
1221 bcs,a %xcc,.+8
1222 add c_3,t_2,c_3
1223 srlx t_1,32,c_12
1224 stuw t_1,rp(5) !r[5]=c3;
1225 or c_12,c_3,c_12
1226
1227 mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1228 addcc c_12,t_1,c_12
1229 clr c_3
1230 bcs,a %xcc,.+8
1231 add c_3,t_2,c_3
1232 addcc c_12,t_1,c_12
1233 bcs,a %xcc,.+8
1234 add c_3,t_2,c_3
1235 mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1236 addcc c_12,t_1,c_12
1237 bcs,a %xcc,.+8
1238 add c_3,t_2,c_3
1239 addcc c_12,t_1,c_12
1240 bcs,a %xcc,.+8
1241 add c_3,t_2,c_3
1242 mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1243 addcc c_12,t_1,c_12
1244 bcs,a %xcc,.+8
1245 add c_3,t_2,c_3
1246 addcc c_12,t_1,c_12
1247 bcs,a %xcc,.+8
1248 add c_3,t_2,c_3
1249 lduw ap(7),a_7
1250 mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1251 addcc c_12,t_1,t_1
1252 bcs,a %xcc,.+8
1253 add c_3,t_2,c_3
1254 srlx t_1,32,c_12
1255 stuw t_1,rp(6) !r[6]=c1;
1256 or c_12,c_3,c_12
1257
1258 mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1259 addcc c_12,t_1,c_12
1260 clr c_3
1261 bcs,a %xcc,.+8
1262 add c_3,t_2,c_3
1263 addcc c_12,t_1,c_12
1264 bcs,a %xcc,.+8
1265 add c_3,t_2,c_3
1266 mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1267 addcc c_12,t_1,c_12
1268 bcs,a %xcc,.+8
1269 add c_3,t_2,c_3
1270 addcc c_12,t_1,c_12
1271 bcs,a %xcc,.+8
1272 add c_3,t_2,c_3
1273 mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1274 addcc c_12,t_1,c_12
1275 bcs,a %xcc,.+8
1276 add c_3,t_2,c_3
1277 addcc c_12,t_1,c_12
1278 bcs,a %xcc,.+8
1279 add c_3,t_2,c_3
1280 mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1281 addcc c_12,t_1,c_12
1282 bcs,a %xcc,.+8
1283 add c_3,t_2,c_3
1284 addcc c_12,t_1,t_1
1285 bcs,a %xcc,.+8
1286 add c_3,t_2,c_3
1287 srlx t_1,32,c_12
1288 stuw t_1,rp(7) !r[7]=c2;
1289 or c_12,c_3,c_12
1290
1291 mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1292 addcc c_12,t_1,c_12
1293 clr c_3
1294 bcs,a %xcc,.+8
1295 add c_3,t_2,c_3
1296 addcc c_12,t_1,c_12
1297 bcs,a %xcc,.+8
1298 add c_3,t_2,c_3
1299 mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1300 addcc c_12,t_1,c_12
1301 bcs,a %xcc,.+8
1302 add c_3,t_2,c_3
1303 addcc c_12,t_1,c_12
1304 bcs,a %xcc,.+8
1305 add c_3,t_2,c_3
1306 mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1307 addcc c_12,t_1,c_12
1308 bcs,a %xcc,.+8
1309 add c_3,t_2,c_3
1310 addcc c_12,t_1,c_12
1311 bcs,a %xcc,.+8
1312 add c_3,t_2,c_3
1313 mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1314 addcc c_12,t_1,t_1
1315 bcs,a %xcc,.+8
1316 add c_3,t_2,c_3
1317 srlx t_1,32,c_12
1318 stuw t_1,rp(8) !r[8]=c3;
1319 or c_12,c_3,c_12
1320
1321 mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1322 addcc c_12,t_1,c_12
1323 clr c_3
1324 bcs,a %xcc,.+8
1325 add c_3,t_2,c_3
1326 addcc c_12,t_1,c_12
1327 bcs,a %xcc,.+8
1328 add c_3,t_2,c_3
1329 mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1330 addcc c_12,t_1,c_12
1331 bcs,a %xcc,.+8
1332 add c_3,t_2,c_3
1333 addcc c_12,t_1,c_12
1334 bcs,a %xcc,.+8
1335 add c_3,t_2,c_3
1336 mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1337 addcc c_12,t_1,c_12
1338 bcs,a %xcc,.+8
1339 add c_3,t_2,c_3
1340 addcc c_12,t_1,t_1
1341 bcs,a %xcc,.+8
1342 add c_3,t_2,c_3
1343 srlx t_1,32,c_12
1344 stuw t_1,rp(9) !r[9]=c1;
1345 or c_12,c_3,c_12
1346
1347 mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1348 addcc c_12,t_1,c_12
1349 clr c_3
1350 bcs,a %xcc,.+8
1351 add c_3,t_2,c_3
1352 addcc c_12,t_1,c_12
1353 bcs,a %xcc,.+8
1354 add c_3,t_2,c_3
1355 mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1356 addcc c_12,t_1,c_12
1357 bcs,a %xcc,.+8
1358 add c_3,t_2,c_3
1359 addcc c_12,t_1,c_12
1360 bcs,a %xcc,.+8
1361 add c_3,t_2,c_3
1362 mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1363 addcc c_12,t_1,t_1
1364 bcs,a %xcc,.+8
1365 add c_3,t_2,c_3
1366 srlx t_1,32,c_12
1367 stuw t_1,rp(10) !r[10]=c2;
1368 or c_12,c_3,c_12
1369
1370 mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2);
1371 addcc c_12,t_1,c_12
1372 clr c_3
1373 bcs,a %xcc,.+8
1374 add c_3,t_2,c_3
1375 addcc c_12,t_1,c_12
1376 bcs,a %xcc,.+8
1377 add c_3,t_2,c_3
1378 mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2);
1379 addcc c_12,t_1,c_12
1380 bcs,a %xcc,.+8
1381 add c_3,t_2,c_3
1382 addcc c_12,t_1,t_1
1383 bcs,a %xcc,.+8
1384 add c_3,t_2,c_3
1385 srlx t_1,32,c_12
1386 stuw t_1,rp(11) !r[11]=c3;
1387 or c_12,c_3,c_12
1388
1389 mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1390 addcc c_12,t_1,c_12
1391 clr c_3
1392 bcs,a %xcc,.+8
1393 add c_3,t_2,c_3
1394 addcc c_12,t_1,c_12
1395 bcs,a %xcc,.+8
1396 add c_3,t_2,c_3
1397 mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1398 addcc c_12,t_1,t_1
1399 bcs,a %xcc,.+8
1400 add c_3,t_2,c_3
1401 srlx t_1,32,c_12
1402 stuw t_1,rp(12) !r[12]=c1;
1403 or c_12,c_3,c_12
1404
1405 mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1406 addcc c_12,t_1,c_12
1407 clr c_3
1408 bcs,a %xcc,.+8
1409 add c_3,t_2,c_3
1410 addcc c_12,t_1,t_1
1411 bcs,a %xcc,.+8
1412 add c_3,t_2,c_3
1413 srlx t_1,32,c_12
1414 stuw t_1,rp(13) !r[13]=c2;
1415 or c_12,c_3,c_12
1416
1417 mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1418 addcc c_12,t_1,t_1
1419 srlx t_1,32,c_12
1420 stuw t_1,rp(14) !r[14]=c3;
1421 stuw c_12,rp(15) !r[15]=c1;
1422
1423 ret
1424 restore %g0,%g0,%o0
1425
1426.type bn_sqr_comba8,#function
1427.size bn_sqr_comba8,(.-bn_sqr_comba8)
1428
1429.align 32
1430
1431.global bn_sqr_comba4
1432/*
1433 * void bn_sqr_comba4(r,a)
1434 * BN_ULONG *r,*a;
1435 */
1436bn_sqr_comba4:
1437 save %sp,FRAME_SIZE,%sp
1438 mov 1,t_2
1439 lduw ap(0),a_0
1440 sllx t_2,32,t_2
1441 lduw ap(1),a_1
1442 mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3);
1443 srlx t_1,32,c_12
1444 stuw t_1,rp(0) !r[0]=c1;
1445
1446 lduw ap(2),a_2
1447 mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1);
1448 addcc c_12,t_1,c_12
1449 clr c_3
1450 bcs,a %xcc,.+8
1451 add c_3,t_2,c_3
1452 addcc c_12,t_1,t_1
1453 bcs,a %xcc,.+8
1454 add c_3,t_2,c_3
1455 srlx t_1,32,c_12
1456 stuw t_1,rp(1) !r[1]=c2;
1457 or c_12,c_3,c_12
1458
1459 mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1460 addcc c_12,t_1,c_12
1461 clr c_3
1462 bcs,a %xcc,.+8
1463 add c_3,t_2,c_3
1464 addcc c_12,t_1,c_12
1465 bcs,a %xcc,.+8
1466 add c_3,t_2,c_3
1467 lduw ap(3),a_3
1468 mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1469 addcc c_12,t_1,t_1
1470 bcs,a %xcc,.+8
1471 add c_3,t_2,c_3
1472 srlx t_1,32,c_12
1473 stuw t_1,rp(2) !r[2]=c3;
1474 or c_12,c_3,c_12
1475
1476 mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1477 addcc c_12,t_1,c_12
1478 clr c_3
1479 bcs,a %xcc,.+8
1480 add c_3,t_2,c_3
1481 addcc c_12,t_1,c_12
1482 bcs,a %xcc,.+8
1483 add c_3,t_2,c_3
1484 mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1485 addcc c_12,t_1,c_12
1486 bcs,a %xcc,.+8
1487 add c_3,t_2,c_3
1488 addcc c_12,t_1,t_1
1489 bcs,a %xcc,.+8
1490 add c_3,t_2,c_3
1491 srlx t_1,32,c_12
1492 stuw t_1,rp(3) !r[3]=c1;
1493 or c_12,c_3,c_12
1494
1495 mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1496 addcc c_12,t_1,c_12
1497 clr c_3
1498 bcs,a %xcc,.+8
1499 add c_3,t_2,c_3
1500 addcc c_12,t_1,c_12
1501 bcs,a %xcc,.+8
1502 add c_3,t_2,c_3
1503 mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1504 addcc c_12,t_1,t_1
1505 bcs,a %xcc,.+8
1506 add c_3,t_2,c_3
1507 srlx t_1,32,c_12
1508 stuw t_1,rp(4) !r[4]=c2;
1509 or c_12,c_3,c_12
1510
1511 mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1512 addcc c_12,t_1,c_12
1513 clr c_3
1514 bcs,a %xcc,.+8
1515 add c_3,t_2,c_3
1516 addcc c_12,t_1,t_1
1517 bcs,a %xcc,.+8
1518 add c_3,t_2,c_3
1519 srlx t_1,32,c_12
1520 stuw t_1,rp(5) !r[5]=c3;
1521 or c_12,c_3,c_12
1522
1523 mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1524 addcc c_12,t_1,t_1
1525 srlx t_1,32,c_12
1526 stuw t_1,rp(6) !r[6]=c1;
1527 stuw c_12,rp(7) !r[7]=c2;
1528
1529 ret
1530 restore %g0,%g0,%o0
1531
1532.type bn_sqr_comba4,#function
1533.size bn_sqr_comba4,(.-bn_sqr_comba4)
1534
1535.align 32
diff --git a/src/lib/libcrypto/bn/asm/x86.pl b/src/lib/libcrypto/bn/asm/x86.pl
deleted file mode 100644
index 1bc4f1bb27..0000000000
--- a/src/lib/libcrypto/bn/asm/x86.pl
+++ /dev/null
@@ -1,28 +0,0 @@
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6require("x86/mul_add.pl");
7require("x86/mul.pl");
8require("x86/sqr.pl");
9require("x86/div.pl");
10require("x86/add.pl");
11require("x86/sub.pl");
12require("x86/comba.pl");
13
14&asm_init($ARGV[0],$0);
15
16&bn_mul_add_words("bn_mul_add_words");
17&bn_mul_words("bn_mul_words");
18&bn_sqr_words("bn_sqr_words");
19&bn_div_words("bn_div_words");
20&bn_add_words("bn_add_words");
21&bn_sub_words("bn_sub_words");
22&bn_mul_comba("bn_mul_comba8",8);
23&bn_mul_comba("bn_mul_comba4",4);
24&bn_sqr_comba("bn_sqr_comba8",8);
25&bn_sqr_comba("bn_sqr_comba4",4);
26
27&asm_finish();
28
diff --git a/src/lib/libcrypto/bn/asm/x86/add.pl b/src/lib/libcrypto/bn/asm/x86/add.pl
deleted file mode 100644
index 0b5cf583e3..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/add.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &add($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &add($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &add($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &add($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86/comba.pl b/src/lib/libcrypto/bn/asm/x86/comba.pl
deleted file mode 100644
index 2291253629..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/comba.pl
+++ /dev/null
@@ -1,277 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub mul_add_c
5 {
6 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7
8 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
9 # words, and 1 if load return value
10
11 &comment("mul a[$ai]*b[$bi]");
12
13 # "eax" and "edx" will always be pre-loaded.
14 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
15 # &mov("edx",&DWP($bi*4,$b,"",0));
16
17 &mul("edx");
18 &add($c0,"eax");
19 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
20 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
21 ###
22 &adc($c1,"edx");
23 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
24 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
25 ###
26 &adc($c2,0);
27 # is pos > 1, it means it is the last loop
28 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
29 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
30 }
31
32sub sqr_add_c
33 {
34 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
35
36 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
37 # words, and 1 if load return value
38
39 &comment("sqr a[$ai]*a[$bi]");
40
41 # "eax" and "edx" will always be pre-loaded.
42 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
43 # &mov("edx",&DWP($bi*4,$b,"",0));
44
45 if ($ai == $bi)
46 { &mul("eax");}
47 else
48 { &mul("edx");}
49 &add($c0,"eax");
50 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
51 ###
52 &adc($c1,"edx");
53 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
54 ###
55 &adc($c2,0);
56 # is pos > 1, it means it is the last loop
57 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
58 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
59 }
60
61sub sqr_add_c2
62 {
63 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
64
65 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
66 # words, and 1 if load return value
67
68 &comment("sqr a[$ai]*a[$bi]");
69
70 # "eax" and "edx" will always be pre-loaded.
71 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
72 # &mov("edx",&DWP($bi*4,$a,"",0));
73
74 if ($ai == $bi)
75 { &mul("eax");}
76 else
77 { &mul("edx");}
78 &add("eax","eax");
79 ###
80 &adc("edx","edx");
81 ###
82 &adc($c2,0);
83 &add($c0,"eax");
84 &adc($c1,"edx");
85 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
86 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
87 &adc($c2,0);
88 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
89 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
90 ###
91 }
92
93sub bn_mul_comba
94 {
95 local($name,$num)=@_;
96 local($a,$b,$c0,$c1,$c2);
97 local($i,$as,$ae,$bs,$be,$ai,$bi);
98 local($tot,$end);
99
100 &function_begin_B($name,"");
101
102 $c0="ebx";
103 $c1="ecx";
104 $c2="ebp";
105 $a="esi";
106 $b="edi";
107
108 $as=0;
109 $ae=0;
110 $bs=0;
111 $be=0;
112 $tot=$num+$num-1;
113
114 &push("esi");
115 &mov($a,&wparam(1));
116 &push("edi");
117 &mov($b,&wparam(2));
118 &push("ebp");
119 &push("ebx");
120
121 &xor($c0,$c0);
122 &mov("eax",&DWP(0,$a,"",0)); # load the first word
123 &xor($c1,$c1);
124 &mov("edx",&DWP(0,$b,"",0)); # load the first second
125
126 for ($i=0; $i<$tot; $i++)
127 {
128 $ai=$as;
129 $bi=$bs;
130 $end=$be+1;
131
132 &comment("################## Calculate word $i");
133
134 for ($j=$bs; $j<$end; $j++)
135 {
136 &xor($c2,$c2) if ($j == $bs);
137 if (($j+1) == $end)
138 {
139 $v=1;
140 $v=2 if (($i+1) == $tot);
141 }
142 else
143 { $v=0; }
144 if (($j+1) != $end)
145 {
146 $na=($ai-1);
147 $nb=($bi+1);
148 }
149 else
150 {
151 $na=$as+($i < ($num-1));
152 $nb=$bs+($i >= ($num-1));
153 }
154#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
155 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
156 if ($v)
157 {
158 &comment("saved r[$i]");
159 # &mov("eax",&wparam(0));
160 # &mov(&DWP($i*4,"eax","",0),$c0);
161 ($c0,$c1,$c2)=($c1,$c2,$c0);
162 }
163 $ai--;
164 $bi++;
165 }
166 $as++ if ($i < ($num-1));
167 $ae++ if ($i >= ($num-1));
168
169 $bs++ if ($i >= ($num-1));
170 $be++ if ($i < ($num-1));
171 }
172 &comment("save r[$i]");
173 # &mov("eax",&wparam(0));
174 &mov(&DWP($i*4,"eax","",0),$c0);
175
176 &pop("ebx");
177 &pop("ebp");
178 &pop("edi");
179 &pop("esi");
180 &ret();
181 &function_end_B($name);
182 }
183
184sub bn_sqr_comba
185 {
186 local($name,$num)=@_;
187 local($r,$a,$c0,$c1,$c2)=@_;
188 local($i,$as,$ae,$bs,$be,$ai,$bi);
189 local($b,$tot,$end,$half);
190
191 &function_begin_B($name,"");
192
193 $c0="ebx";
194 $c1="ecx";
195 $c2="ebp";
196 $a="esi";
197 $r="edi";
198
199 &push("esi");
200 &push("edi");
201 &push("ebp");
202 &push("ebx");
203 &mov($r,&wparam(0));
204 &mov($a,&wparam(1));
205 &xor($c0,$c0);
206 &xor($c1,$c1);
207 &mov("eax",&DWP(0,$a,"",0)); # load the first word
208
209 $as=0;
210 $ae=0;
211 $bs=0;
212 $be=0;
213 $tot=$num+$num-1;
214
215 for ($i=0; $i<$tot; $i++)
216 {
217 $ai=$as;
218 $bi=$bs;
219 $end=$be+1;
220
221 &comment("############### Calculate word $i");
222 for ($j=$bs; $j<$end; $j++)
223 {
224 &xor($c2,$c2) if ($j == $bs);
225 if (($ai-1) < ($bi+1))
226 {
227 $v=1;
228 $v=2 if ($i+1) == $tot;
229 }
230 else
231 { $v=0; }
232 if (!$v)
233 {
234 $na=$ai-1;
235 $nb=$bi+1;
236 }
237 else
238 {
239 $na=$as+($i < ($num-1));
240 $nb=$bs+($i >= ($num-1));
241 }
242 if ($ai == $bi)
243 {
244 &sqr_add_c($r,$a,$ai,$bi,
245 $c0,$c1,$c2,$v,$i,$na,$nb);
246 }
247 else
248 {
249 &sqr_add_c2($r,$a,$ai,$bi,
250 $c0,$c1,$c2,$v,$i,$na,$nb);
251 }
252 if ($v)
253 {
254 &comment("saved r[$i]");
255 #&mov(&DWP($i*4,$r,"",0),$c0);
256 ($c0,$c1,$c2)=($c1,$c2,$c0);
257 last;
258 }
259 $ai--;
260 $bi++;
261 }
262 $as++ if ($i < ($num-1));
263 $ae++ if ($i >= ($num-1));
264
265 $bs++ if ($i >= ($num-1));
266 $be++ if ($i < ($num-1));
267 }
268 &mov(&DWP($i*4,$r,"",0),$c0);
269 &pop("ebx");
270 &pop("ebp");
271 &pop("edi");
272 &pop("esi");
273 &ret();
274 &function_end_B($name);
275 }
276
2771;
diff --git a/src/lib/libcrypto/bn/asm/x86/div.pl b/src/lib/libcrypto/bn/asm/x86/div.pl
deleted file mode 100644
index 0e90152caa..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/div.pl
+++ /dev/null
@@ -1,15 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_div_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9 &mov("edx",&wparam(0)); #
10 &mov("eax",&wparam(1)); #
11 &mov("ebx",&wparam(2)); #
12 &div("ebx");
13 &function_end($name);
14 }
151;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul.pl b/src/lib/libcrypto/bn/asm/x86/mul.pl
deleted file mode 100644
index 674cb9b055..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul.pl
+++ /dev/null
@@ -1,77 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ecx";
15 $r="edi";
16 $c="esi";
17 $num="ebp";
18
19 &xor($c,$c); # clear carry
20 &mov($r,&wparam(0)); #
21 &mov($a,&wparam(1)); #
22 &mov($num,&wparam(2)); #
23 &mov($w,&wparam(3)); #
24
25 &and($num,0xfffffff8); # num / 8
26 &jz(&label("mw_finish"));
27
28 &set_label("mw_loop",0);
29 for ($i=0; $i<32; $i+=4)
30 {
31 &comment("Round $i");
32
33 &mov("eax",&DWP($i,$a,"",0)); # *a
34 &mul($w); # *a * w
35 &add("eax",$c); # L(t)+=c
36 # XXX
37
38 &adc("edx",0); # H(t)+=carry
39 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
40
41 &mov($c,"edx"); # c= H(t);
42 }
43
44 &comment("");
45 &add($a,32);
46 &add($r,32);
47 &sub($num,8);
48 &jz(&label("mw_finish"));
49 &jmp(&label("mw_loop"));
50
51 &set_label("mw_finish",0);
52 &mov($num,&wparam(2)); # get num
53 &and($num,7);
54 &jnz(&label("mw_finish2"));
55 &jmp(&label("mw_end"));
56
57 &set_label("mw_finish2",1);
58 for ($i=0; $i<7; $i++)
59 {
60 &comment("Tail Round $i");
61 &mov("eax",&DWP($i*4,$a,"",0));# *a
62 &mul($w); # *a * w
63 &add("eax",$c); # L(t)+=c
64 # XXX
65 &adc("edx",0); # H(t)+=carry
66 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
67 &mov($c,"edx"); # c= H(t);
68 &dec($num) if ($i != 7-1);
69 &jz(&label("mw_end")) if ($i != 7-1);
70 }
71 &set_label("mw_end",0);
72 &mov("eax",$c);
73
74 &function_end($name);
75 }
76
771;
diff --git a/src/lib/libcrypto/bn/asm/x86/mul_add.pl b/src/lib/libcrypto/bn/asm/x86/mul_add.pl
deleted file mode 100644
index 61830d3a90..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/mul_add.pl
+++ /dev/null
@@ -1,87 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_mul_add_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $Low="eax";
12 $High="edx";
13 $a="ebx";
14 $w="ebp";
15 $r="edi";
16 $c="esi";
17
18 &xor($c,$c); # clear carry
19 &mov($r,&wparam(0)); #
20
21 &mov("ecx",&wparam(2)); #
22 &mov($a,&wparam(1)); #
23
24 &and("ecx",0xfffffff8); # num / 8
25 &mov($w,&wparam(3)); #
26
27 &push("ecx"); # Up the stack for a tmp variable
28
29 &jz(&label("maw_finish"));
30
31 &set_label("maw_loop",0);
32
33 &mov(&swtmp(0),"ecx"); #
34
35 for ($i=0; $i<32; $i+=4)
36 {
37 &comment("Round $i");
38
39 &mov("eax",&DWP($i,$a,"",0)); # *a
40 &mul($w); # *a * w
41 &add("eax",$c); # L(t)+= *r
42 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
43 &adc("edx",0); # H(t)+=carry
44 &add("eax",$c); # L(t)+=c
45 &adc("edx",0); # H(t)+=carry
46 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
47 &mov($c,"edx"); # c= H(t);
48 }
49
50 &comment("");
51 &mov("ecx",&swtmp(0)); #
52 &add($a,32);
53 &add($r,32);
54 &sub("ecx",8);
55 &jnz(&label("maw_loop"));
56
57 &set_label("maw_finish",0);
58 &mov("ecx",&wparam(2)); # get num
59 &and("ecx",7);
60 &jnz(&label("maw_finish2")); # helps branch prediction
61 &jmp(&label("maw_end"));
62
63 &set_label("maw_finish2",1);
64 for ($i=0; $i<7; $i++)
65 {
66 &comment("Tail Round $i");
67 &mov("eax",&DWP($i*4,$a,"",0));# *a
68 &mul($w); # *a * w
69 &add("eax",$c); # L(t)+=c
70 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
71 &adc("edx",0); # H(t)+=carry
72 &add("eax",$c);
73 &adc("edx",0); # H(t)+=carry
74 &dec("ecx") if ($i != 7-1);
75 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
76 &mov($c,"edx"); # c= H(t);
77 &jz(&label("maw_end")) if ($i != 7-1);
78 }
79 &set_label("maw_end",0);
80 &mov("eax",$c);
81
82 &pop("ecx"); # clear variable from
83
84 &function_end($name);
85 }
86
871;
diff --git a/src/lib/libcrypto/bn/asm/x86/sqr.pl b/src/lib/libcrypto/bn/asm/x86/sqr.pl
deleted file mode 100644
index 1f90993cf6..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sqr.pl
+++ /dev/null
@@ -1,60 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sqr_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $r="esi";
12 $a="edi";
13 $num="ebx";
14
15 &mov($r,&wparam(0)); #
16 &mov($a,&wparam(1)); #
17 &mov($num,&wparam(2)); #
18
19 &and($num,0xfffffff8); # num / 8
20 &jz(&label("sw_finish"));
21
22 &set_label("sw_loop",0);
23 for ($i=0; $i<32; $i+=4)
24 {
25 &comment("Round $i");
26 &mov("eax",&DWP($i,$a,"",0)); # *a
27 # XXX
28 &mul("eax"); # *a * *a
29 &mov(&DWP($i*2,$r,"",0),"eax"); #
30 &mov(&DWP($i*2+4,$r,"",0),"edx");#
31 }
32
33 &comment("");
34 &add($a,32);
35 &add($r,64);
36 &sub($num,8);
37 &jnz(&label("sw_loop"));
38
39 &set_label("sw_finish",0);
40 &mov($num,&wparam(2)); # get num
41 &and($num,7);
42 &jz(&label("sw_end"));
43
44 for ($i=0; $i<7; $i++)
45 {
46 &comment("Tail Round $i");
47 &mov("eax",&DWP($i*4,$a,"",0)); # *a
48 # XXX
49 &mul("eax"); # *a * *a
50 &mov(&DWP($i*8,$r,"",0),"eax"); #
51 &dec($num) if ($i != 7-1);
52 &mov(&DWP($i*8+4,$r,"",0),"edx");
53 &jz(&label("sw_end")) if ($i != 7-1);
54 }
55 &set_label("sw_end",0);
56
57 &function_end($name);
58 }
59
601;
diff --git a/src/lib/libcrypto/bn/asm/x86/sub.pl b/src/lib/libcrypto/bn/asm/x86/sub.pl
deleted file mode 100644
index 837b0e1b07..0000000000
--- a/src/lib/libcrypto/bn/asm/x86/sub.pl
+++ /dev/null
@@ -1,76 +0,0 @@
1#!/usr/local/bin/perl
2# x86 assember
3
4sub bn_sub_words
5 {
6 local($name)=@_;
7
8 &function_begin($name,"");
9
10 &comment("");
11 $a="esi";
12 $b="edi";
13 $c="eax";
14 $r="ebx";
15 $tmp1="ecx";
16 $tmp2="edx";
17 $num="ebp";
18
19 &mov($r,&wparam(0)); # get r
20 &mov($a,&wparam(1)); # get a
21 &mov($b,&wparam(2)); # get b
22 &mov($num,&wparam(3)); # get num
23 &xor($c,$c); # clear carry
24 &and($num,0xfffffff8); # num / 8
25
26 &jz(&label("aw_finish"));
27
28 &set_label("aw_loop",0);
29 for ($i=0; $i<8; $i++)
30 {
31 &comment("Round $i");
32
33 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
34 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
35 &sub($tmp1,$c);
36 &mov($c,0);
37 &adc($c,$c);
38 &sub($tmp1,$tmp2);
39 &adc($c,0);
40 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
41 }
42
43 &comment("");
44 &add($a,32);
45 &add($b,32);
46 &add($r,32);
47 &sub($num,8);
48 &jnz(&label("aw_loop"));
49
50 &set_label("aw_finish",0);
51 &mov($num,&wparam(3)); # get num
52 &and($num,7);
53 &jz(&label("aw_end"));
54
55 for ($i=0; $i<7; $i++)
56 {
57 &comment("Tail Round $i");
58 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
59 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
60 &sub($tmp1,$c);
61 &mov($c,0);
62 &adc($c,$c);
63 &sub($tmp1,$tmp2);
64 &adc($c,0);
65 &dec($num) if ($i != 6);
66 &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
67 &jz(&label("aw_end")) if ($i != 6);
68 }
69 &set_label("aw_end",0);
70
71# &mov("eax",$c); # $c is "eax"
72
73 &function_end($name);
74 }
75
761;
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
deleted file mode 100644
index 450e8e4322..0000000000
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ /dev/null
@@ -1,575 +0,0 @@
1/*
2 * x86_64 BIGNUM accelerator version 0.1, December 2002.
3 *
4 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 * project.
6 *
7 * Rights for redistribution and usage in source and binary forms are
8 * granted according to the OpenSSL license. Warranty of any kind is
9 * disclaimed.
10 *
11 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
12 * versions, like 1.0...
13 * A. Well, that's because this code is basically a quick-n-dirty
14 * proof-of-concept hack. As you can see it's implemented with
15 * inline assembler, which means that you're bound to GCC and that
16 * there must be a room for fine-tuning.
17 *
18 * Q. Why inline assembler?
19 * A. x86_64 features own ABI I'm not familiar with. Which is why
20 * I decided to let the compiler take care of subroutine
21 * prologue/epilogue as well as register allocation.
22 *
23 * Q. How much faster does it get?
24 * A. Unfortunately people sitting on x86_64 hardware are prohibited
25 * to disclose the performance numbers, so they (SuSE labs to be
26 * specific) wouldn't tell me. However! Very similar coding technique
27 * (reaching out for 128-bit result from 64x64-bit multiplication)
28 * results in >3 times performance improvement on MIPS and I see no
29 * reason why gain on x86_64 would be so much different:-)
30 */
31
32#define BN_ULONG unsigned long
33
34/*
35 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
36 * "g"(0) let the compiler to decide where does it
37 * want to keep the value of zero;
38 */
39#define mul_add(r,a,word,carry) do { \
40 register BN_ULONG high,low; \
41 asm ("mulq %3" \
42 : "=a"(low),"=d"(high) \
43 : "a"(word),"m"(a) \
44 : "cc"); \
45 asm ("addq %2,%0; adcq %3,%1" \
46 : "+r"(carry),"+d"(high)\
47 : "a"(low),"g"(0) \
48 : "cc"); \
49 asm ("addq %2,%0; adcq %3,%1" \
50 : "+m"(r),"+d"(high) \
51 : "r"(carry),"g"(0) \
52 : "cc"); \
53 carry=high; \
54 } while (0)
55
56#define mul(r,a,word,carry) do { \
57 register BN_ULONG high,low; \
58 asm ("mulq %3" \
59 : "=a"(low),"=d"(high) \
60 : "a"(word),"g"(a) \
61 : "cc"); \
62 asm ("addq %2,%0; adcq %3,%1" \
63 : "+r"(carry),"+d"(high)\
64 : "a"(low),"g"(0) \
65 : "cc"); \
66 (r)=carry, carry=high; \
67 } while (0)
68
69#define sqr(r0,r1,a) \
70 asm ("mulq %2" \
71 : "=a"(r0),"=d"(r1) \
72 : "a"(a) \
73 : "cc");
74
75BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
76 {
77 BN_ULONG c1=0;
78
79 if (num <= 0) return(c1);
80
81 while (num&~3)
82 {
83 mul_add(rp[0],ap[0],w,c1);
84 mul_add(rp[1],ap[1],w,c1);
85 mul_add(rp[2],ap[2],w,c1);
86 mul_add(rp[3],ap[3],w,c1);
87 ap+=4; rp+=4; num-=4;
88 }
89 if (num)
90 {
91 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
92 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
93 mul_add(rp[2],ap[2],w,c1); return c1;
94 }
95
96 return(c1);
97 }
98
99BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
100 {
101 BN_ULONG c1=0;
102
103 if (num <= 0) return(c1);
104
105 while (num&~3)
106 {
107 mul(rp[0],ap[0],w,c1);
108 mul(rp[1],ap[1],w,c1);
109 mul(rp[2],ap[2],w,c1);
110 mul(rp[3],ap[3],w,c1);
111 ap+=4; rp+=4; num-=4;
112 }
113 if (num)
114 {
115 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
116 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
117 mul(rp[2],ap[2],w,c1);
118 }
119 return(c1);
120 }
121
122void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
123 {
124 if (n <= 0) return;
125
126 while (n&~3)
127 {
128 sqr(r[0],r[1],a[0]);
129 sqr(r[2],r[3],a[1]);
130 sqr(r[4],r[5],a[2]);
131 sqr(r[6],r[7],a[3]);
132 a+=4; r+=8; n-=4;
133 }
134 if (n)
135 {
136 sqr(r[0],r[1],a[0]); if (--n == 0) return;
137 sqr(r[2],r[3],a[1]); if (--n == 0) return;
138 sqr(r[4],r[5],a[2]);
139 }
140 }
141
142BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
143{ BN_ULONG ret,waste;
144
145 asm ("divq %4"
146 : "=a"(ret),"=d"(waste)
147 : "a"(l),"d"(h),"g"(d)
148 : "cc");
149
150 return ret;
151}
152
153BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
154{ BN_ULONG ret,i;
155
156 if (n <= 0) return 0;
157
158 asm (
159 " subq %2,%2 \n"
160 ".align 16 \n"
161 "1: movq (%4,%2,8),%0 \n"
162 " adcq (%5,%2,8),%0 \n"
163 " movq %0,(%3,%2,8) \n"
164 " leaq 1(%2),%2 \n"
165 " loop 1b \n"
166 " sbbq %0,%0 \n"
167 : "+a"(ret),"+c"(n),"+r"(i)
168 : "r"(rp),"r"(ap),"r"(bp)
169 : "cc"
170 );
171
172 return ret&1;
173}
174
175#ifndef SIMICS
176BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
177{ BN_ULONG ret,i;
178
179 if (n <= 0) return 0;
180
181 asm (
182 " subq %2,%2 \n"
183 ".align 16 \n"
184 "1: movq (%4,%2,8),%0 \n"
185 " sbbq (%5,%2,8),%0 \n"
186 " movq %0,(%3,%2,8) \n"
187 " leaq 1(%2),%2 \n"
188 " loop 1b \n"
189 " sbbq %0,%0 \n"
190 : "+a"(ret),"+c"(n),"+r"(i)
191 : "r"(rp),"r"(ap),"r"(bp)
192 : "cc"
193 );
194
195 return ret&1;
196}
197#else
198/* Simics 1.4<7 has buggy sbbq:-( */
199#define BN_MASK2 0xffffffffffffffffL
200BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
201 {
202 BN_ULONG t1,t2;
203 int c=0;
204
205 if (n <= 0) return((BN_ULONG)0);
206
207 for (;;)
208 {
209 t1=a[0]; t2=b[0];
210 r[0]=(t1-t2-c)&BN_MASK2;
211 if (t1 != t2) c=(t1 < t2);
212 if (--n <= 0) break;
213
214 t1=a[1]; t2=b[1];
215 r[1]=(t1-t2-c)&BN_MASK2;
216 if (t1 != t2) c=(t1 < t2);
217 if (--n <= 0) break;
218
219 t1=a[2]; t2=b[2];
220 r[2]=(t1-t2-c)&BN_MASK2;
221 if (t1 != t2) c=(t1 < t2);
222 if (--n <= 0) break;
223
224 t1=a[3]; t2=b[3];
225 r[3]=(t1-t2-c)&BN_MASK2;
226 if (t1 != t2) c=(t1 < t2);
227 if (--n <= 0) break;
228
229 a+=4;
230 b+=4;
231 r+=4;
232 }
233 return(c);
234 }
235#endif
236
237/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
238/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
239/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
240/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
241
242#if 0
243/* original macros are kept for reference purposes */
244#define mul_add_c(a,b,c0,c1,c2) { \
245 BN_ULONG ta=(a),tb=(b); \
246 t1 = ta * tb; \
247 t2 = BN_UMULT_HIGH(ta,tb); \
248 c0 += t1; t2 += (c0<t1)?1:0; \
249 c1 += t2; c2 += (c1<t2)?1:0; \
250 }
251
252#define mul_add_c2(a,b,c0,c1,c2) { \
253 BN_ULONG ta=(a),tb=(b),t0; \
254 t1 = BN_UMULT_HIGH(ta,tb); \
255 t0 = ta * tb; \
256 t2 = t1+t1; c2 += (t2<t1)?1:0; \
257 t1 = t0+t0; t2 += (t1<t0)?1:0; \
258 c0 += t1; t2 += (c0<t1)?1:0; \
259 c1 += t2; c2 += (c1<t2)?1:0; \
260 }
261#else
262#define mul_add_c(a,b,c0,c1,c2) do { \
263 asm ("mulq %3" \
264 : "=a"(t1),"=d"(t2) \
265 : "a"(a),"m"(b) \
266 : "cc"); \
267 asm ("addq %2,%0; adcq %3,%1" \
268 : "+r"(c0),"+d"(t2) \
269 : "a"(t1),"g"(0) \
270 : "cc"); \
271 asm ("addq %2,%0; adcq %3,%1" \
272 : "+r"(c1),"+r"(c2) \
273 : "d"(t2),"g"(0) \
274 : "cc"); \
275 } while (0)
276
277#define sqr_add_c(a,i,c0,c1,c2) do { \
278 asm ("mulq %2" \
279 : "=a"(t1),"=d"(t2) \
280 : "a"(a[i]) \
281 : "cc"); \
282 asm ("addq %2,%0; adcq %3,%1" \
283 : "+r"(c0),"+d"(t2) \
284 : "a"(t1),"g"(0) \
285 : "cc"); \
286 asm ("addq %2,%0; adcq %3,%1" \
287 : "+r"(c1),"+r"(c2) \
288 : "d"(t2),"g"(0) \
289 : "cc"); \
290 } while (0)
291
292#define mul_add_c2(a,b,c0,c1,c2) do { \
293 asm ("mulq %3" \
294 : "=a"(t1),"=d"(t2) \
295 : "a"(a),"m"(b) \
296 : "cc"); \
297 asm ("addq %0,%0; adcq %2,%1" \
298 : "+d"(t2),"+r"(c2) \
299 : "g"(0) \
300 : "cc"); \
301 asm ("addq %0,%0; adcq %2,%1" \
302 : "+a"(t1),"+d"(t2) \
303 : "g"(0) \
304 : "cc"); \
305 asm ("addq %2,%0; adcq %3,%1" \
306 : "+r"(c0),"+d"(t2) \
307 : "a"(t1),"g"(0) \
308 : "cc"); \
309 asm ("addq %2,%0; adcq %3,%1" \
310 : "+r"(c1),"+r"(c2) \
311 : "d"(t2),"g"(0) \
312 : "cc"); \
313 } while (0)
314#endif
315
316#define sqr_add_c2(a,i,j,c0,c1,c2) \
317 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
318
319void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
320 {
321 BN_ULONG bl,bh;
322 BN_ULONG t1,t2;
323 BN_ULONG c1,c2,c3;
324
325 c1=0;
326 c2=0;
327 c3=0;
328 mul_add_c(a[0],b[0],c1,c2,c3);
329 r[0]=c1;
330 c1=0;
331 mul_add_c(a[0],b[1],c2,c3,c1);
332 mul_add_c(a[1],b[0],c2,c3,c1);
333 r[1]=c2;
334 c2=0;
335 mul_add_c(a[2],b[0],c3,c1,c2);
336 mul_add_c(a[1],b[1],c3,c1,c2);
337 mul_add_c(a[0],b[2],c3,c1,c2);
338 r[2]=c3;
339 c3=0;
340 mul_add_c(a[0],b[3],c1,c2,c3);
341 mul_add_c(a[1],b[2],c1,c2,c3);
342 mul_add_c(a[2],b[1],c1,c2,c3);
343 mul_add_c(a[3],b[0],c1,c2,c3);
344 r[3]=c1;
345 c1=0;
346 mul_add_c(a[4],b[0],c2,c3,c1);
347 mul_add_c(a[3],b[1],c2,c3,c1);
348 mul_add_c(a[2],b[2],c2,c3,c1);
349 mul_add_c(a[1],b[3],c2,c3,c1);
350 mul_add_c(a[0],b[4],c2,c3,c1);
351 r[4]=c2;
352 c2=0;
353 mul_add_c(a[0],b[5],c3,c1,c2);
354 mul_add_c(a[1],b[4],c3,c1,c2);
355 mul_add_c(a[2],b[3],c3,c1,c2);
356 mul_add_c(a[3],b[2],c3,c1,c2);
357 mul_add_c(a[4],b[1],c3,c1,c2);
358 mul_add_c(a[5],b[0],c3,c1,c2);
359 r[5]=c3;
360 c3=0;
361 mul_add_c(a[6],b[0],c1,c2,c3);
362 mul_add_c(a[5],b[1],c1,c2,c3);
363 mul_add_c(a[4],b[2],c1,c2,c3);
364 mul_add_c(a[3],b[3],c1,c2,c3);
365 mul_add_c(a[2],b[4],c1,c2,c3);
366 mul_add_c(a[1],b[5],c1,c2,c3);
367 mul_add_c(a[0],b[6],c1,c2,c3);
368 r[6]=c1;
369 c1=0;
370 mul_add_c(a[0],b[7],c2,c3,c1);
371 mul_add_c(a[1],b[6],c2,c3,c1);
372 mul_add_c(a[2],b[5],c2,c3,c1);
373 mul_add_c(a[3],b[4],c2,c3,c1);
374 mul_add_c(a[4],b[3],c2,c3,c1);
375 mul_add_c(a[5],b[2],c2,c3,c1);
376 mul_add_c(a[6],b[1],c2,c3,c1);
377 mul_add_c(a[7],b[0],c2,c3,c1);
378 r[7]=c2;
379 c2=0;
380 mul_add_c(a[7],b[1],c3,c1,c2);
381 mul_add_c(a[6],b[2],c3,c1,c2);
382 mul_add_c(a[5],b[3],c3,c1,c2);
383 mul_add_c(a[4],b[4],c3,c1,c2);
384 mul_add_c(a[3],b[5],c3,c1,c2);
385 mul_add_c(a[2],b[6],c3,c1,c2);
386 mul_add_c(a[1],b[7],c3,c1,c2);
387 r[8]=c3;
388 c3=0;
389 mul_add_c(a[2],b[7],c1,c2,c3);
390 mul_add_c(a[3],b[6],c1,c2,c3);
391 mul_add_c(a[4],b[5],c1,c2,c3);
392 mul_add_c(a[5],b[4],c1,c2,c3);
393 mul_add_c(a[6],b[3],c1,c2,c3);
394 mul_add_c(a[7],b[2],c1,c2,c3);
395 r[9]=c1;
396 c1=0;
397 mul_add_c(a[7],b[3],c2,c3,c1);
398 mul_add_c(a[6],b[4],c2,c3,c1);
399 mul_add_c(a[5],b[5],c2,c3,c1);
400 mul_add_c(a[4],b[6],c2,c3,c1);
401 mul_add_c(a[3],b[7],c2,c3,c1);
402 r[10]=c2;
403 c2=0;
404 mul_add_c(a[4],b[7],c3,c1,c2);
405 mul_add_c(a[5],b[6],c3,c1,c2);
406 mul_add_c(a[6],b[5],c3,c1,c2);
407 mul_add_c(a[7],b[4],c3,c1,c2);
408 r[11]=c3;
409 c3=0;
410 mul_add_c(a[7],b[5],c1,c2,c3);
411 mul_add_c(a[6],b[6],c1,c2,c3);
412 mul_add_c(a[5],b[7],c1,c2,c3);
413 r[12]=c1;
414 c1=0;
415 mul_add_c(a[6],b[7],c2,c3,c1);
416 mul_add_c(a[7],b[6],c2,c3,c1);
417 r[13]=c2;
418 c2=0;
419 mul_add_c(a[7],b[7],c3,c1,c2);
420 r[14]=c3;
421 r[15]=c1;
422 }
423
424void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
425 {
426 BN_ULONG bl,bh;
427 BN_ULONG t1,t2;
428 BN_ULONG c1,c2,c3;
429
430 c1=0;
431 c2=0;
432 c3=0;
433 mul_add_c(a[0],b[0],c1,c2,c3);
434 r[0]=c1;
435 c1=0;
436 mul_add_c(a[0],b[1],c2,c3,c1);
437 mul_add_c(a[1],b[0],c2,c3,c1);
438 r[1]=c2;
439 c2=0;
440 mul_add_c(a[2],b[0],c3,c1,c2);
441 mul_add_c(a[1],b[1],c3,c1,c2);
442 mul_add_c(a[0],b[2],c3,c1,c2);
443 r[2]=c3;
444 c3=0;
445 mul_add_c(a[0],b[3],c1,c2,c3);
446 mul_add_c(a[1],b[2],c1,c2,c3);
447 mul_add_c(a[2],b[1],c1,c2,c3);
448 mul_add_c(a[3],b[0],c1,c2,c3);
449 r[3]=c1;
450 c1=0;
451 mul_add_c(a[3],b[1],c2,c3,c1);
452 mul_add_c(a[2],b[2],c2,c3,c1);
453 mul_add_c(a[1],b[3],c2,c3,c1);
454 r[4]=c2;
455 c2=0;
456 mul_add_c(a[2],b[3],c3,c1,c2);
457 mul_add_c(a[3],b[2],c3,c1,c2);
458 r[5]=c3;
459 c3=0;
460 mul_add_c(a[3],b[3],c1,c2,c3);
461 r[6]=c1;
462 r[7]=c2;
463 }
464
465void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
466 {
467 BN_ULONG bl,bh;
468 BN_ULONG t1,t2;
469 BN_ULONG c1,c2,c3;
470
471 c1=0;
472 c2=0;
473 c3=0;
474 sqr_add_c(a,0,c1,c2,c3);
475 r[0]=c1;
476 c1=0;
477 sqr_add_c2(a,1,0,c2,c3,c1);
478 r[1]=c2;
479 c2=0;
480 sqr_add_c(a,1,c3,c1,c2);
481 sqr_add_c2(a,2,0,c3,c1,c2);
482 r[2]=c3;
483 c3=0;
484 sqr_add_c2(a,3,0,c1,c2,c3);
485 sqr_add_c2(a,2,1,c1,c2,c3);
486 r[3]=c1;
487 c1=0;
488 sqr_add_c(a,2,c2,c3,c1);
489 sqr_add_c2(a,3,1,c2,c3,c1);
490 sqr_add_c2(a,4,0,c2,c3,c1);
491 r[4]=c2;
492 c2=0;
493 sqr_add_c2(a,5,0,c3,c1,c2);
494 sqr_add_c2(a,4,1,c3,c1,c2);
495 sqr_add_c2(a,3,2,c3,c1,c2);
496 r[5]=c3;
497 c3=0;
498 sqr_add_c(a,3,c1,c2,c3);
499 sqr_add_c2(a,4,2,c1,c2,c3);
500 sqr_add_c2(a,5,1,c1,c2,c3);
501 sqr_add_c2(a,6,0,c1,c2,c3);
502 r[6]=c1;
503 c1=0;
504 sqr_add_c2(a,7,0,c2,c3,c1);
505 sqr_add_c2(a,6,1,c2,c3,c1);
506 sqr_add_c2(a,5,2,c2,c3,c1);
507 sqr_add_c2(a,4,3,c2,c3,c1);
508 r[7]=c2;
509 c2=0;
510 sqr_add_c(a,4,c3,c1,c2);
511 sqr_add_c2(a,5,3,c3,c1,c2);
512 sqr_add_c2(a,6,2,c3,c1,c2);
513 sqr_add_c2(a,7,1,c3,c1,c2);
514 r[8]=c3;
515 c3=0;
516 sqr_add_c2(a,7,2,c1,c2,c3);
517 sqr_add_c2(a,6,3,c1,c2,c3);
518 sqr_add_c2(a,5,4,c1,c2,c3);
519 r[9]=c1;
520 c1=0;
521 sqr_add_c(a,5,c2,c3,c1);
522 sqr_add_c2(a,6,4,c2,c3,c1);
523 sqr_add_c2(a,7,3,c2,c3,c1);
524 r[10]=c2;
525 c2=0;
526 sqr_add_c2(a,7,4,c3,c1,c2);
527 sqr_add_c2(a,6,5,c3,c1,c2);
528 r[11]=c3;
529 c3=0;
530 sqr_add_c(a,6,c1,c2,c3);
531 sqr_add_c2(a,7,5,c1,c2,c3);
532 r[12]=c1;
533 c1=0;
534 sqr_add_c2(a,7,6,c2,c3,c1);
535 r[13]=c2;
536 c2=0;
537 sqr_add_c(a,7,c3,c1,c2);
538 r[14]=c3;
539 r[15]=c1;
540 }
541
542void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
543 {
544 BN_ULONG bl,bh;
545 BN_ULONG t1,t2;
546 BN_ULONG c1,c2,c3;
547
548 c1=0;
549 c2=0;
550 c3=0;
551 sqr_add_c(a,0,c1,c2,c3);
552 r[0]=c1;
553 c1=0;
554 sqr_add_c2(a,1,0,c2,c3,c1);
555 r[1]=c2;
556 c2=0;
557 sqr_add_c(a,1,c3,c1,c2);
558 sqr_add_c2(a,2,0,c3,c1,c2);
559 r[2]=c3;
560 c3=0;
561 sqr_add_c2(a,3,0,c1,c2,c3);
562 sqr_add_c2(a,2,1,c1,c2,c3);
563 r[3]=c1;
564 c1=0;
565 sqr_add_c(a,2,c2,c3,c1);
566 sqr_add_c2(a,3,1,c2,c3,c1);
567 r[4]=c2;
568 c2=0;
569 sqr_add_c2(a,3,2,c3,c1,c2);
570 r[5]=c3;
571 c3=0;
572 sqr_add_c(a,3,c1,c2,c3);
573 r[6]=c1;
574 r[7]=c2;
575 }
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
deleted file mode 100644
index 3da6d8ced9..0000000000
--- a/src/lib/libcrypto/bn/bn.h
+++ /dev/null
@@ -1,549 +0,0 @@
1/* crypto/bn/bn.h */
2/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef HEADER_BN_H
60#define HEADER_BN_H
61
62#include <openssl/e_os2.h>
63#ifndef OPENSSL_NO_FP_API
64#include <stdio.h> /* FILE */
65#endif
66
67#ifdef __cplusplus
68extern "C" {
69#endif
70
71#ifdef OPENSSL_SYS_VMS
72#undef BN_LLONG /* experimental, so far... */
73#endif
74
75#define BN_MUL_COMBA
76#define BN_SQR_COMBA
77#define BN_RECURSION
78
79/* This next option uses the C libraries (2 word)/(1 word) function.
80 * If it is not defined, I use my C version (which is slower).
81 * The reason for this flag is that when the particular C compiler
82 * library routine is used, and the library is linked with a different
83 * compiler, the library is missing. This mostly happens when the
84 * library is built with gcc and then linked using normal cc. This would
85 * be a common occurrence because gcc normally produces code that is
86 * 2 times faster than system compilers for the big number stuff.
87 * For machines with only one compiler (or shared libraries), this should
88 * be on. Again this in only really a problem on machines
89 * using "long long's", are 32bit, and are not using my assembler code. */
90#if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \
91 defined(OPENSSL_SYS_WIN32) || defined(linux)
92# ifndef BN_DIV2W
93# define BN_DIV2W
94# endif
95#endif
96
97/* assuming long is 64bit - this is the DEC Alpha
98 * unsigned long long is only 64 bits :-(, don't define
99 * BN_LLONG for the DEC Alpha */
100#ifdef SIXTY_FOUR_BIT_LONG
101#define BN_ULLONG unsigned long long
102#define BN_ULONG unsigned long
103#define BN_LONG long
104#define BN_BITS 128
105#define BN_BYTES 8
106#define BN_BITS2 64
107#define BN_BITS4 32
108#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
109#define BN_MASK2 (0xffffffffffffffffL)
110#define BN_MASK2l (0xffffffffL)
111#define BN_MASK2h (0xffffffff00000000L)
112#define BN_MASK2h1 (0xffffffff80000000L)
113#define BN_TBIT (0x8000000000000000L)
114#define BN_DEC_CONV (10000000000000000000UL)
115#define BN_DEC_FMT1 "%lu"
116#define BN_DEC_FMT2 "%019lu"
117#define BN_DEC_NUM 19
118#endif
119
120/* This is where the long long data type is 64 bits, but long is 32.
121 * For machines where there are 64bit registers, this is the mode to use.
122 * IRIX, on R4000 and above should use this mode, along with the relevant
123 * assembler code :-). Do NOT define BN_LLONG.
124 */
125#ifdef SIXTY_FOUR_BIT
126#undef BN_LLONG
127#undef BN_ULLONG
128#define BN_ULONG unsigned long long
129#define BN_LONG long long
130#define BN_BITS 128
131#define BN_BYTES 8
132#define BN_BITS2 64
133#define BN_BITS4 32
134#define BN_MASK2 (0xffffffffffffffffLL)
135#define BN_MASK2l (0xffffffffL)
136#define BN_MASK2h (0xffffffff00000000LL)
137#define BN_MASK2h1 (0xffffffff80000000LL)
138#define BN_TBIT (0x8000000000000000LL)
139#define BN_DEC_CONV (10000000000000000000ULL)
140#define BN_DEC_FMT1 "%llu"
141#define BN_DEC_FMT2 "%019llu"
142#define BN_DEC_NUM 19
143#endif
144
145#ifdef THIRTY_TWO_BIT
146#if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__)
147#define BN_ULLONG unsigned _int64
148#else
149#define BN_ULLONG unsigned long long
150#endif
151#define BN_ULONG unsigned long
152#define BN_LONG long
153#define BN_BITS 64
154#define BN_BYTES 4
155#define BN_BITS2 32
156#define BN_BITS4 16
157#ifdef OPENSSL_SYS_WIN32
158/* VC++ doesn't like the LL suffix */
159#define BN_MASK (0xffffffffffffffffL)
160#else
161#define BN_MASK (0xffffffffffffffffLL)
162#endif
163#define BN_MASK2 (0xffffffffL)
164#define BN_MASK2l (0xffff)
165#define BN_MASK2h1 (0xffff8000L)
166#define BN_MASK2h (0xffff0000L)
167#define BN_TBIT (0x80000000L)
168#define BN_DEC_CONV (1000000000L)
169#define BN_DEC_FMT1 "%lu"
170#define BN_DEC_FMT2 "%09lu"
171#define BN_DEC_NUM 9
172#endif
173
174#ifdef SIXTEEN_BIT
175#ifndef BN_DIV2W
176#define BN_DIV2W
177#endif
178#define BN_ULLONG unsigned long
179#define BN_ULONG unsigned short
180#define BN_LONG short
181#define BN_BITS 32
182#define BN_BYTES 2
183#define BN_BITS2 16
184#define BN_BITS4 8
185#define BN_MASK (0xffffffff)
186#define BN_MASK2 (0xffff)
187#define BN_MASK2l (0xff)
188#define BN_MASK2h1 (0xff80)
189#define BN_MASK2h (0xff00)
190#define BN_TBIT (0x8000)
191#define BN_DEC_CONV (100000)
192#define BN_DEC_FMT1 "%u"
193#define BN_DEC_FMT2 "%05u"
194#define BN_DEC_NUM 5
195#endif
196
197#ifdef EIGHT_BIT
198#ifndef BN_DIV2W
199#define BN_DIV2W
200#endif
201#define BN_ULLONG unsigned short
202#define BN_ULONG unsigned char
203#define BN_LONG char
204#define BN_BITS 16
205#define BN_BYTES 1
206#define BN_BITS2 8
207#define BN_BITS4 4
208#define BN_MASK (0xffff)
209#define BN_MASK2 (0xff)
210#define BN_MASK2l (0xf)
211#define BN_MASK2h1 (0xf8)
212#define BN_MASK2h (0xf0)
213#define BN_TBIT (0x80)
214#define BN_DEC_CONV (100)
215#define BN_DEC_FMT1 "%u"
216#define BN_DEC_FMT2 "%02u"
217#define BN_DEC_NUM 2
218#endif
219
220#define BN_DEFAULT_BITS 1280
221
222#ifdef BIGNUM
223#undef BIGNUM
224#endif
225
226#define BN_FLG_MALLOCED 0x01
227#define BN_FLG_STATIC_DATA 0x02
228#define BN_FLG_FREE 0x8000 /* used for debuging */
229#define BN_set_flags(b,n) ((b)->flags|=(n))
230#define BN_get_flags(b,n) ((b)->flags&(n))
231
232typedef struct bignum_st
233 {
234 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
235 int top; /* Index of last used d +1. */
236 /* The next are internal book keeping for bn_expand. */
237 int dmax; /* Size of the d array. */
238 int neg; /* one if the number is negative */
239 int flags;
240 } BIGNUM;
241
242/* Used for temp variables (declaration hidden in bn_lcl.h) */
243typedef struct bignum_ctx BN_CTX;
244
245typedef struct bn_blinding_st
246 {
247 int init;
248 BIGNUM *A;
249 BIGNUM *Ai;
250 BIGNUM *mod; /* just a reference */
251 unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
252 * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
253 } BN_BLINDING;
254
255/* Used for montgomery multiplication */
256typedef struct bn_mont_ctx_st
257 {
258 int ri; /* number of bits in R */
259 BIGNUM RR; /* used to convert to montgomery form */
260 BIGNUM N; /* The modulus */
261 BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
262 * (Ni is only stored for bignum algorithm) */
263 BN_ULONG n0; /* least significant word of Ni */
264 int flags;
265 } BN_MONT_CTX;
266
267/* Used for reciprocal division/mod functions
268 * It cannot be shared between threads
269 */
270typedef struct bn_recp_ctx_st
271 {
272 BIGNUM N; /* the divisor */
273 BIGNUM Nr; /* the reciprocal */
274 int num_bits;
275 int shift;
276 int flags;
277 } BN_RECP_CTX;
278
279#define BN_prime_checks 0 /* default: select number of iterations
280 based on the size of the number */
281
282/* number of Miller-Rabin iterations for an error rate of less than 2^-80
283 * for random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook
284 * of Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996];
285 * original paper: Damgaard, Landrock, Pomerance: Average case error estimates
286 * for the strong probable prime test. -- Math. Comp. 61 (1993) 177-194) */
287#define BN_prime_checks_for_size(b) ((b) >= 1300 ? 2 : \
288 (b) >= 850 ? 3 : \
289 (b) >= 650 ? 4 : \
290 (b) >= 550 ? 5 : \
291 (b) >= 450 ? 6 : \
292 (b) >= 400 ? 7 : \
293 (b) >= 350 ? 8 : \
294 (b) >= 300 ? 9 : \
295 (b) >= 250 ? 12 : \
296 (b) >= 200 ? 15 : \
297 (b) >= 150 ? 18 : \
298 /* b >= 100 */ 27)
299
300#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
301
302/* Note that BN_abs_is_word does not work reliably for w == 0 */
303#define BN_abs_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w)))
304#define BN_is_zero(a) (((a)->top == 0) || BN_abs_is_word(a,0))
305#define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg)
306#define BN_is_word(a,w) ((w) ? BN_abs_is_word((a),(w)) && !(a)->neg : \
307 BN_is_zero((a)))
308#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
309
310#define BN_one(a) (BN_set_word((a),1))
311#define BN_zero(a) (BN_set_word((a),0))
312
313/*#define BN_ascii2bn(a) BN_hex2bn(a) */
314/*#define BN_bn2ascii(a) BN_bn2hex(a) */
315
316const BIGNUM *BN_value_one(void);
317char * BN_options(void);
318BN_CTX *BN_CTX_new(void);
319void BN_CTX_init(BN_CTX *c);
320void BN_CTX_free(BN_CTX *c);
321void BN_CTX_start(BN_CTX *ctx);
322BIGNUM *BN_CTX_get(BN_CTX *ctx);
323void BN_CTX_end(BN_CTX *ctx);
324int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
325int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom);
326int BN_rand_range(BIGNUM *rnd, BIGNUM *range);
327int BN_pseudo_rand_range(BIGNUM *rnd, BIGNUM *range);
328int BN_num_bits(const BIGNUM *a);
329int BN_num_bits_word(BN_ULONG);
330BIGNUM *BN_new(void);
331void BN_init(BIGNUM *);
332void BN_clear_free(BIGNUM *a);
333BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b);
334void BN_swap(BIGNUM *a, BIGNUM *b);
335BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret);
336int BN_bn2bin(const BIGNUM *a, unsigned char *to);
337BIGNUM *BN_mpi2bn(const unsigned char *s,int len,BIGNUM *ret);
338int BN_bn2mpi(const BIGNUM *a, unsigned char *to);
339int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
340int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
341int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
342int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
343int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
344int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx);
345
346int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
347 BN_CTX *ctx);
348#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
349int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx);
350int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
351int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
352int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx);
353int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m);
354int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
355 const BIGNUM *m, BN_CTX *ctx);
356int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
357int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx);
358int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m);
359int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx);
360int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m);
361
362BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w);
363BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
364int BN_mul_word(BIGNUM *a, BN_ULONG w);
365int BN_add_word(BIGNUM *a, BN_ULONG w);
366int BN_sub_word(BIGNUM *a, BN_ULONG w);
367int BN_set_word(BIGNUM *a, BN_ULONG w);
368BN_ULONG BN_get_word(const BIGNUM *a);
369
370int BN_cmp(const BIGNUM *a, const BIGNUM *b);
371void BN_free(BIGNUM *a);
372int BN_is_bit_set(const BIGNUM *a, int n);
373int BN_lshift(BIGNUM *r, const BIGNUM *a, int n);
374int BN_lshift1(BIGNUM *r, const BIGNUM *a);
375int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,BN_CTX *ctx);
376
377int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
378 const BIGNUM *m,BN_CTX *ctx);
379int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
380 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
381int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p,
382 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
383int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1,
384 const BIGNUM *a2, const BIGNUM *p2,const BIGNUM *m,
385 BN_CTX *ctx,BN_MONT_CTX *m_ctx);
386int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
387 const BIGNUM *m,BN_CTX *ctx);
388
389int BN_mask_bits(BIGNUM *a,int n);
390#ifndef OPENSSL_NO_FP_API
391int BN_print_fp(FILE *fp, const BIGNUM *a);
392#endif
393#ifdef HEADER_BIO_H
394int BN_print(BIO *fp, const BIGNUM *a);
395#else
396int BN_print(void *fp, const BIGNUM *a);
397#endif
398int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx);
399int BN_rshift(BIGNUM *r, const BIGNUM *a, int n);
400int BN_rshift1(BIGNUM *r, const BIGNUM *a);
401void BN_clear(BIGNUM *a);
402BIGNUM *BN_dup(const BIGNUM *a);
403int BN_ucmp(const BIGNUM *a, const BIGNUM *b);
404int BN_set_bit(BIGNUM *a, int n);
405int BN_clear_bit(BIGNUM *a, int n);
406char * BN_bn2hex(const BIGNUM *a);
407char * BN_bn2dec(const BIGNUM *a);
408int BN_hex2bn(BIGNUM **a, const char *str);
409int BN_dec2bn(BIGNUM **a, const char *str);
410int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
411int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
412BIGNUM *BN_mod_inverse(BIGNUM *ret,
413 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
414BIGNUM *BN_mod_sqrt(BIGNUM *ret,
415 const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx);
416BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe,
417 const BIGNUM *add, const BIGNUM *rem,
418 void (*callback)(int,int,void *),void *cb_arg);
419int BN_is_prime(const BIGNUM *p,int nchecks,
420 void (*callback)(int,int,void *),
421 BN_CTX *ctx,void *cb_arg);
422int BN_is_prime_fasttest(const BIGNUM *p,int nchecks,
423 void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg,
424 int do_trial_division);
425
426BN_MONT_CTX *BN_MONT_CTX_new(void );
427void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
428int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
429 BN_MONT_CTX *mont, BN_CTX *ctx);
430#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
431 (r),(a),&((mont)->RR),(mont),(ctx))
432int BN_from_montgomery(BIGNUM *r,const BIGNUM *a,
433 BN_MONT_CTX *mont, BN_CTX *ctx);
434void BN_MONT_CTX_free(BN_MONT_CTX *mont);
435int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *mod,BN_CTX *ctx);
436BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
437
438BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod);
439void BN_BLINDING_free(BN_BLINDING *b);
440int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
441int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx);
442int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
443
444void BN_set_params(int mul,int high,int low,int mont);
445int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
446
447void BN_RECP_CTX_init(BN_RECP_CTX *recp);
448BN_RECP_CTX *BN_RECP_CTX_new(void);
449void BN_RECP_CTX_free(BN_RECP_CTX *recp);
450int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx);
451int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
452 BN_RECP_CTX *recp,BN_CTX *ctx);
453int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
454 const BIGNUM *m, BN_CTX *ctx);
455int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
456 BN_RECP_CTX *recp, BN_CTX *ctx);
457
458/* library internal functions */
459
460#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
461 (a):bn_expand2((a),(bits)/BN_BITS2+1))
462#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
463BIGNUM *bn_expand2(BIGNUM *a, int words);
464BIGNUM *bn_dup_expand(const BIGNUM *a, int words);
465
466#define bn_fix_top(a) \
467 { \
468 BN_ULONG *ftl; \
469 if ((a)->top > 0) \
470 { \
471 for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
472 if (*(ftl--)) break; \
473 } \
474 }
475
476BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
477BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
478void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
479BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
480BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
481BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num);
482
483#ifdef BN_DEBUG
484void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n);
485# define bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \
486 fprintf(stderr,"\n");}
487# define bn_dump(a,n) bn_dump1(stderr,#a,a,n);
488#else
489# define bn_print(a)
490# define bn_dump(a,b)
491#endif
492
493int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom);
494
495/* BEGIN ERROR CODES */
496/* The following lines are auto generated by the script mkerr.pl. Any changes
497 * made after this point may be overwritten when the script is next run.
498 */
499void ERR_load_BN_strings(void);
500
501/* Error codes for the BN functions. */
502
503/* Function codes. */
504#define BN_F_BN_BLINDING_CONVERT 100
505#define BN_F_BN_BLINDING_INVERT 101
506#define BN_F_BN_BLINDING_NEW 102
507#define BN_F_BN_BLINDING_UPDATE 103
508#define BN_F_BN_BN2DEC 104
509#define BN_F_BN_BN2HEX 105
510#define BN_F_BN_CTX_GET 116
511#define BN_F_BN_CTX_NEW 106
512#define BN_F_BN_DIV 107
513#define BN_F_BN_EXPAND2 108
514#define BN_F_BN_EXPAND_INTERNAL 120
515#define BN_F_BN_MOD_EXP2_MONT 118
516#define BN_F_BN_MOD_EXP_MONT 109
517#define BN_F_BN_MOD_EXP_MONT_WORD 117
518#define BN_F_BN_MOD_INVERSE 110
519#define BN_F_BN_MOD_LSHIFT_QUICK 119
520#define BN_F_BN_MOD_MUL_RECIPROCAL 111
521#define BN_F_BN_MOD_SQRT 121
522#define BN_F_BN_MPI2BN 112
523#define BN_F_BN_NEW 113
524#define BN_F_BN_RAND 114
525#define BN_F_BN_RAND_RANGE 122
526#define BN_F_BN_USUB 115
527
528/* Reason codes. */
529#define BN_R_ARG2_LT_ARG3 100
530#define BN_R_BAD_RECIPROCAL 101
531#define BN_R_BIGNUM_TOO_LONG 114
532#define BN_R_CALLED_WITH_EVEN_MODULUS 102
533#define BN_R_DIV_BY_ZERO 103
534#define BN_R_ENCODING_ERROR 104
535#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
536#define BN_R_INPUT_NOT_REDUCED 110
537#define BN_R_INVALID_LENGTH 106
538#define BN_R_INVALID_RANGE 115
539#define BN_R_NOT_A_SQUARE 111
540#define BN_R_NOT_INITIALIZED 107
541#define BN_R_NO_INVERSE 108
542#define BN_R_P_IS_NOT_PRIME 112
543#define BN_R_TOO_MANY_ITERATIONS 113
544#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109
545
546#ifdef __cplusplus
547}
548#endif
549#endif
diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c
deleted file mode 100644
index 6cba07e9f6..0000000000
--- a/src/lib/libcrypto/bn/bn_add.c
+++ /dev/null
@@ -1,309 +0,0 @@
1/* crypto/bn/bn_add.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r can == a or b */
64int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
65 {
66 const BIGNUM *tmp;
67 int a_neg = a->neg;
68
69 bn_check_top(a);
70 bn_check_top(b);
71
72 /* a + b a+b
73 * a + -b a-b
74 * -a + b b-a
75 * -a + -b -(a+b)
76 */
77 if (a_neg ^ b->neg)
78 {
79 /* only one is negative */
80 if (a_neg)
81 { tmp=a; a=b; b=tmp; }
82
83 /* we are now a - b */
84
85 if (BN_ucmp(a,b) < 0)
86 {
87 if (!BN_usub(r,b,a)) return(0);
88 r->neg=1;
89 }
90 else
91 {
92 if (!BN_usub(r,a,b)) return(0);
93 r->neg=0;
94 }
95 return(1);
96 }
97
98 if (!BN_uadd(r,a,b)) return(0);
99 if (a_neg) /* both are neg */
100 r->neg=1;
101 else
102 r->neg=0;
103 return(1);
104 }
105
106/* unsigned add of b to a, r must be large enough */
107int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
108 {
109 register int i;
110 int max,min;
111 BN_ULONG *ap,*bp,*rp,carry,t1;
112 const BIGNUM *tmp;
113
114 bn_check_top(a);
115 bn_check_top(b);
116
117 if (a->top < b->top)
118 { tmp=a; a=b; b=tmp; }
119 max=a->top;
120 min=b->top;
121
122 if (bn_wexpand(r,max+1) == NULL)
123 return(0);
124
125 r->top=max;
126
127
128 ap=a->d;
129 bp=b->d;
130 rp=r->d;
131 carry=0;
132
133 carry=bn_add_words(rp,ap,bp,min);
134 rp+=min;
135 ap+=min;
136 bp+=min;
137 i=min;
138
139 if (carry)
140 {
141 while (i < max)
142 {
143 i++;
144 t1= *(ap++);
145 if ((*(rp++)=(t1+1)&BN_MASK2) >= t1)
146 {
147 carry=0;
148 break;
149 }
150 }
151 if ((i >= max) && carry)
152 {
153 *(rp++)=1;
154 r->top++;
155 }
156 }
157 if (rp != ap)
158 {
159 for (; i<max; i++)
160 *(rp++)= *(ap++);
161 }
162 /* memcpy(rp,ap,sizeof(*ap)*(max-i));*/
163 r->neg = 0;
164 return(1);
165 }
166
167/* unsigned subtraction of b from a, a must be larger than b. */
168int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
169 {
170 int max,min;
171 register BN_ULONG t1,t2,*ap,*bp,*rp;
172 int i,carry;
173#if defined(IRIX_CC_BUG) && !defined(LINT)
174 int dummy;
175#endif
176
177 bn_check_top(a);
178 bn_check_top(b);
179
180 if (a->top < b->top) /* hmm... should not be happening */
181 {
182 BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
183 return(0);
184 }
185
186 max=a->top;
187 min=b->top;
188 if (bn_wexpand(r,max) == NULL) return(0);
189
190 ap=a->d;
191 bp=b->d;
192 rp=r->d;
193
194#if 1
195 carry=0;
196 for (i=0; i<min; i++)
197 {
198 t1= *(ap++);
199 t2= *(bp++);
200 if (carry)
201 {
202 carry=(t1 <= t2);
203 t1=(t1-t2-1)&BN_MASK2;
204 }
205 else
206 {
207 carry=(t1 < t2);
208 t1=(t1-t2)&BN_MASK2;
209 }
210#if defined(IRIX_CC_BUG) && !defined(LINT)
211 dummy=t1;
212#endif
213 *(rp++)=t1&BN_MASK2;
214 }
215#else
216 carry=bn_sub_words(rp,ap,bp,min);
217 ap+=min;
218 bp+=min;
219 rp+=min;
220 i=min;
221#endif
222 if (carry) /* subtracted */
223 {
224 while (i < max)
225 {
226 i++;
227 t1= *(ap++);
228 t2=(t1-1)&BN_MASK2;
229 *(rp++)=t2;
230 if (t1 > t2) break;
231 }
232 }
233#if 0
234 memcpy(rp,ap,sizeof(*rp)*(max-i));
235#else
236 if (rp != ap)
237 {
238 for (;;)
239 {
240 if (i++ >= max) break;
241 rp[0]=ap[0];
242 if (i++ >= max) break;
243 rp[1]=ap[1];
244 if (i++ >= max) break;
245 rp[2]=ap[2];
246 if (i++ >= max) break;
247 rp[3]=ap[3];
248 rp+=4;
249 ap+=4;
250 }
251 }
252#endif
253
254 r->top=max;
255 r->neg=0;
256 bn_fix_top(r);
257 return(1);
258 }
259
260int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
261 {
262 int max;
263 int add=0,neg=0;
264 const BIGNUM *tmp;
265
266 bn_check_top(a);
267 bn_check_top(b);
268
269 /* a - b a-b
270 * a - -b a+b
271 * -a - b -(a+b)
272 * -a - -b b-a
273 */
274 if (a->neg)
275 {
276 if (b->neg)
277 { tmp=a; a=b; b=tmp; }
278 else
279 { add=1; neg=1; }
280 }
281 else
282 {
283 if (b->neg) { add=1; neg=0; }
284 }
285
286 if (add)
287 {
288 if (!BN_uadd(r,a,b)) return(0);
289 r->neg=neg;
290 return(1);
291 }
292
293 /* We are actually doing a - b :-) */
294
295 max=(a->top > b->top)?a->top:b->top;
296 if (bn_wexpand(r,max) == NULL) return(0);
297 if (BN_ucmp(a,b) < 0)
298 {
299 if (!BN_usub(r,b,a)) return(0);
300 r->neg=1;
301 }
302 else
303 {
304 if (!BN_usub(r,a,b)) return(0);
305 r->neg=0;
306 }
307 return(1);
308 }
309
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
deleted file mode 100644
index be8aa3ffc5..0000000000
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ /dev/null
@@ -1,832 +0,0 @@
1/* crypto/bn/bn_asm.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <stdio.h>
65#include <assert.h>
66#include "cryptlib.h"
67#include "bn_lcl.h"
68
69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70
71BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
72 {
73 BN_ULONG c1=0;
74
75 assert(num >= 0);
76 if (num <= 0) return(c1);
77
78 while (num&~3)
79 {
80 mul_add(rp[0],ap[0],w,c1);
81 mul_add(rp[1],ap[1],w,c1);
82 mul_add(rp[2],ap[2],w,c1);
83 mul_add(rp[3],ap[3],w,c1);
84 ap+=4; rp+=4; num-=4;
85 }
86 if (num)
87 {
88 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90 mul_add(rp[2],ap[2],w,c1); return c1;
91 }
92
93 return(c1);
94 }
95
96BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
97 {
98 BN_ULONG c1=0;
99
100 assert(num >= 0);
101 if (num <= 0) return(c1);
102
103 while (num&~3)
104 {
105 mul(rp[0],ap[0],w,c1);
106 mul(rp[1],ap[1],w,c1);
107 mul(rp[2],ap[2],w,c1);
108 mul(rp[3],ap[3],w,c1);
109 ap+=4; rp+=4; num-=4;
110 }
111 if (num)
112 {
113 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115 mul(rp[2],ap[2],w,c1);
116 }
117 return(c1);
118 }
119
120void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
121 {
122 assert(n >= 0);
123 if (n <= 0) return;
124 while (n&~3)
125 {
126 sqr(r[0],r[1],a[0]);
127 sqr(r[2],r[3],a[1]);
128 sqr(r[4],r[5],a[2]);
129 sqr(r[6],r[7],a[3]);
130 a+=4; r+=8; n-=4;
131 }
132 if (n)
133 {
134 sqr(r[0],r[1],a[0]); if (--n == 0) return;
135 sqr(r[2],r[3],a[1]); if (--n == 0) return;
136 sqr(r[4],r[5],a[2]);
137 }
138 }
139
140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
141
142BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
143 {
144 BN_ULONG c=0;
145 BN_ULONG bl,bh;
146
147 assert(num >= 0);
148 if (num <= 0) return((BN_ULONG)0);
149
150 bl=LBITS(w);
151 bh=HBITS(w);
152
153 for (;;)
154 {
155 mul_add(rp[0],ap[0],bl,bh,c);
156 if (--num == 0) break;
157 mul_add(rp[1],ap[1],bl,bh,c);
158 if (--num == 0) break;
159 mul_add(rp[2],ap[2],bl,bh,c);
160 if (--num == 0) break;
161 mul_add(rp[3],ap[3],bl,bh,c);
162 if (--num == 0) break;
163 ap+=4;
164 rp+=4;
165 }
166 return(c);
167 }
168
169BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
170 {
171 BN_ULONG carry=0;
172 BN_ULONG bl,bh;
173
174 assert(num >= 0);
175 if (num <= 0) return((BN_ULONG)0);
176
177 bl=LBITS(w);
178 bh=HBITS(w);
179
180 for (;;)
181 {
182 mul(rp[0],ap[0],bl,bh,carry);
183 if (--num == 0) break;
184 mul(rp[1],ap[1],bl,bh,carry);
185 if (--num == 0) break;
186 mul(rp[2],ap[2],bl,bh,carry);
187 if (--num == 0) break;
188 mul(rp[3],ap[3],bl,bh,carry);
189 if (--num == 0) break;
190 ap+=4;
191 rp+=4;
192 }
193 return(carry);
194 }
195
196void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
197 {
198 assert(n >= 0);
199 if (n <= 0) return;
200 for (;;)
201 {
202 sqr64(r[0],r[1],a[0]);
203 if (--n == 0) break;
204
205 sqr64(r[2],r[3],a[1]);
206 if (--n == 0) break;
207
208 sqr64(r[4],r[5],a[2]);
209 if (--n == 0) break;
210
211 sqr64(r[6],r[7],a[3]);
212 if (--n == 0) break;
213
214 a+=4;
215 r+=8;
216 }
217 }
218
219#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
220
221#if defined(BN_LLONG) && defined(BN_DIV2W)
222
223BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
224 {
225 return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
226 }
227
228#else
229
230/* Divide h,l by d and return the result. */
231/* I need to test this some more :-( */
232BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
233 {
234 BN_ULONG dh,dl,q,ret=0,th,tl,t;
235 int i,count=2;
236
237 if (d == 0) return(BN_MASK2);
238
239 i=BN_num_bits_word(d);
240 assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i));
241
242 i=BN_BITS2-i;
243 if (h >= d) h-=d;
244
245 if (i)
246 {
247 d<<=i;
248 h=(h<<i)|(l>>(BN_BITS2-i));
249 l<<=i;
250 }
251 dh=(d&BN_MASK2h)>>BN_BITS4;
252 dl=(d&BN_MASK2l);
253 for (;;)
254 {
255 if ((h>>BN_BITS4) == dh)
256 q=BN_MASK2l;
257 else
258 q=h/dh;
259
260 th=q*dh;
261 tl=dl*q;
262 for (;;)
263 {
264 t=h-th;
265 if ((t&BN_MASK2h) ||
266 ((tl) <= (
267 (t<<BN_BITS4)|
268 ((l&BN_MASK2h)>>BN_BITS4))))
269 break;
270 q--;
271 th-=dh;
272 tl-=dl;
273 }
274 t=(tl>>BN_BITS4);
275 tl=(tl<<BN_BITS4)&BN_MASK2h;
276 th+=t;
277
278 if (l < tl) th++;
279 l-=tl;
280 if (h < th)
281 {
282 h+=d;
283 q--;
284 }
285 h-=th;
286
287 if (--count == 0) break;
288
289 ret=q<<BN_BITS4;
290 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
291 l=(l&BN_MASK2l)<<BN_BITS4;
292 }
293 ret|=q;
294 return(ret);
295 }
296#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
297
298#ifdef BN_LLONG
299BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
300 {
301 BN_ULLONG ll=0;
302
303 assert(n >= 0);
304 if (n <= 0) return((BN_ULONG)0);
305
306 for (;;)
307 {
308 ll+=(BN_ULLONG)a[0]+b[0];
309 r[0]=(BN_ULONG)ll&BN_MASK2;
310 ll>>=BN_BITS2;
311 if (--n <= 0) break;
312
313 ll+=(BN_ULLONG)a[1]+b[1];
314 r[1]=(BN_ULONG)ll&BN_MASK2;
315 ll>>=BN_BITS2;
316 if (--n <= 0) break;
317
318 ll+=(BN_ULLONG)a[2]+b[2];
319 r[2]=(BN_ULONG)ll&BN_MASK2;
320 ll>>=BN_BITS2;
321 if (--n <= 0) break;
322
323 ll+=(BN_ULLONG)a[3]+b[3];
324 r[3]=(BN_ULONG)ll&BN_MASK2;
325 ll>>=BN_BITS2;
326 if (--n <= 0) break;
327
328 a+=4;
329 b+=4;
330 r+=4;
331 }
332 return((BN_ULONG)ll);
333 }
334#else /* !BN_LLONG */
335BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
336 {
337 BN_ULONG c,l,t;
338
339 assert(n >= 0);
340 if (n <= 0) return((BN_ULONG)0);
341
342 c=0;
343 for (;;)
344 {
345 t=a[0];
346 t=(t+c)&BN_MASK2;
347 c=(t < c);
348 l=(t+b[0])&BN_MASK2;
349 c+=(l < t);
350 r[0]=l;
351 if (--n <= 0) break;
352
353 t=a[1];
354 t=(t+c)&BN_MASK2;
355 c=(t < c);
356 l=(t+b[1])&BN_MASK2;
357 c+=(l < t);
358 r[1]=l;
359 if (--n <= 0) break;
360
361 t=a[2];
362 t=(t+c)&BN_MASK2;
363 c=(t < c);
364 l=(t+b[2])&BN_MASK2;
365 c+=(l < t);
366 r[2]=l;
367 if (--n <= 0) break;
368
369 t=a[3];
370 t=(t+c)&BN_MASK2;
371 c=(t < c);
372 l=(t+b[3])&BN_MASK2;
373 c+=(l < t);
374 r[3]=l;
375 if (--n <= 0) break;
376
377 a+=4;
378 b+=4;
379 r+=4;
380 }
381 return((BN_ULONG)c);
382 }
383#endif /* !BN_LLONG */
384
385BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
386 {
387 BN_ULONG t1,t2;
388 int c=0;
389
390 assert(n >= 0);
391 if (n <= 0) return((BN_ULONG)0);
392
393 for (;;)
394 {
395 t1=a[0]; t2=b[0];
396 r[0]=(t1-t2-c)&BN_MASK2;
397 if (t1 != t2) c=(t1 < t2);
398 if (--n <= 0) break;
399
400 t1=a[1]; t2=b[1];
401 r[1]=(t1-t2-c)&BN_MASK2;
402 if (t1 != t2) c=(t1 < t2);
403 if (--n <= 0) break;
404
405 t1=a[2]; t2=b[2];
406 r[2]=(t1-t2-c)&BN_MASK2;
407 if (t1 != t2) c=(t1 < t2);
408 if (--n <= 0) break;
409
410 t1=a[3]; t2=b[3];
411 r[3]=(t1-t2-c)&BN_MASK2;
412 if (t1 != t2) c=(t1 < t2);
413 if (--n <= 0) break;
414
415 a+=4;
416 b+=4;
417 r+=4;
418 }
419 return(c);
420 }
421
422#ifdef BN_MUL_COMBA
423
424#undef bn_mul_comba8
425#undef bn_mul_comba4
426#undef bn_sqr_comba8
427#undef bn_sqr_comba4
428
429/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
430/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
431/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
432/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
433
434#ifdef BN_LLONG
435#define mul_add_c(a,b,c0,c1,c2) \
436 t=(BN_ULLONG)a*b; \
437 t1=(BN_ULONG)Lw(t); \
438 t2=(BN_ULONG)Hw(t); \
439 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
440 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
441
442#define mul_add_c2(a,b,c0,c1,c2) \
443 t=(BN_ULLONG)a*b; \
444 tt=(t+t)&BN_MASK; \
445 if (tt < t) c2++; \
446 t1=(BN_ULONG)Lw(tt); \
447 t2=(BN_ULONG)Hw(tt); \
448 c0=(c0+t1)&BN_MASK2; \
449 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
450 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
451
452#define sqr_add_c(a,i,c0,c1,c2) \
453 t=(BN_ULLONG)a[i]*a[i]; \
454 t1=(BN_ULONG)Lw(t); \
455 t2=(BN_ULONG)Hw(t); \
456 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
457 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
458
459#define sqr_add_c2(a,i,j,c0,c1,c2) \
460 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
461
462#elif defined(BN_UMULT_HIGH)
463
464#define mul_add_c(a,b,c0,c1,c2) { \
465 BN_ULONG ta=(a),tb=(b); \
466 t1 = ta * tb; \
467 t2 = BN_UMULT_HIGH(ta,tb); \
468 c0 += t1; t2 += (c0<t1)?1:0; \
469 c1 += t2; c2 += (c1<t2)?1:0; \
470 }
471
472#define mul_add_c2(a,b,c0,c1,c2) { \
473 BN_ULONG ta=(a),tb=(b),t0; \
474 t1 = BN_UMULT_HIGH(ta,tb); \
475 t0 = ta * tb; \
476 t2 = t1+t1; c2 += (t2<t1)?1:0; \
477 t1 = t0+t0; t2 += (t1<t0)?1:0; \
478 c0 += t1; t2 += (c0<t1)?1:0; \
479 c1 += t2; c2 += (c1<t2)?1:0; \
480 }
481
482#define sqr_add_c(a,i,c0,c1,c2) { \
483 BN_ULONG ta=(a)[i]; \
484 t1 = ta * ta; \
485 t2 = BN_UMULT_HIGH(ta,ta); \
486 c0 += t1; t2 += (c0<t1)?1:0; \
487 c1 += t2; c2 += (c1<t2)?1:0; \
488 }
489
490#define sqr_add_c2(a,i,j,c0,c1,c2) \
491 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
492
493#else /* !BN_LLONG */
494#define mul_add_c(a,b,c0,c1,c2) \
495 t1=LBITS(a); t2=HBITS(a); \
496 bl=LBITS(b); bh=HBITS(b); \
497 mul64(t1,t2,bl,bh); \
498 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
499 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
500
501#define mul_add_c2(a,b,c0,c1,c2) \
502 t1=LBITS(a); t2=HBITS(a); \
503 bl=LBITS(b); bh=HBITS(b); \
504 mul64(t1,t2,bl,bh); \
505 if (t2 & BN_TBIT) c2++; \
506 t2=(t2+t2)&BN_MASK2; \
507 if (t1 & BN_TBIT) t2++; \
508 t1=(t1+t1)&BN_MASK2; \
509 c0=(c0+t1)&BN_MASK2; \
510 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
511 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
512
513#define sqr_add_c(a,i,c0,c1,c2) \
514 sqr64(t1,t2,(a)[i]); \
515 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
516 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
517
518#define sqr_add_c2(a,i,j,c0,c1,c2) \
519 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
520#endif /* !BN_LLONG */
521
522void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
523 {
524#ifdef BN_LLONG
525 BN_ULLONG t;
526#else
527 BN_ULONG bl,bh;
528#endif
529 BN_ULONG t1,t2;
530 BN_ULONG c1,c2,c3;
531
532 c1=0;
533 c2=0;
534 c3=0;
535 mul_add_c(a[0],b[0],c1,c2,c3);
536 r[0]=c1;
537 c1=0;
538 mul_add_c(a[0],b[1],c2,c3,c1);
539 mul_add_c(a[1],b[0],c2,c3,c1);
540 r[1]=c2;
541 c2=0;
542 mul_add_c(a[2],b[0],c3,c1,c2);
543 mul_add_c(a[1],b[1],c3,c1,c2);
544 mul_add_c(a[0],b[2],c3,c1,c2);
545 r[2]=c3;
546 c3=0;
547 mul_add_c(a[0],b[3],c1,c2,c3);
548 mul_add_c(a[1],b[2],c1,c2,c3);
549 mul_add_c(a[2],b[1],c1,c2,c3);
550 mul_add_c(a[3],b[0],c1,c2,c3);
551 r[3]=c1;
552 c1=0;
553 mul_add_c(a[4],b[0],c2,c3,c1);
554 mul_add_c(a[3],b[1],c2,c3,c1);
555 mul_add_c(a[2],b[2],c2,c3,c1);
556 mul_add_c(a[1],b[3],c2,c3,c1);
557 mul_add_c(a[0],b[4],c2,c3,c1);
558 r[4]=c2;
559 c2=0;
560 mul_add_c(a[0],b[5],c3,c1,c2);
561 mul_add_c(a[1],b[4],c3,c1,c2);
562 mul_add_c(a[2],b[3],c3,c1,c2);
563 mul_add_c(a[3],b[2],c3,c1,c2);
564 mul_add_c(a[4],b[1],c3,c1,c2);
565 mul_add_c(a[5],b[0],c3,c1,c2);
566 r[5]=c3;
567 c3=0;
568 mul_add_c(a[6],b[0],c1,c2,c3);
569 mul_add_c(a[5],b[1],c1,c2,c3);
570 mul_add_c(a[4],b[2],c1,c2,c3);
571 mul_add_c(a[3],b[3],c1,c2,c3);
572 mul_add_c(a[2],b[4],c1,c2,c3);
573 mul_add_c(a[1],b[5],c1,c2,c3);
574 mul_add_c(a[0],b[6],c1,c2,c3);
575 r[6]=c1;
576 c1=0;
577 mul_add_c(a[0],b[7],c2,c3,c1);
578 mul_add_c(a[1],b[6],c2,c3,c1);
579 mul_add_c(a[2],b[5],c2,c3,c1);
580 mul_add_c(a[3],b[4],c2,c3,c1);
581 mul_add_c(a[4],b[3],c2,c3,c1);
582 mul_add_c(a[5],b[2],c2,c3,c1);
583 mul_add_c(a[6],b[1],c2,c3,c1);
584 mul_add_c(a[7],b[0],c2,c3,c1);
585 r[7]=c2;
586 c2=0;
587 mul_add_c(a[7],b[1],c3,c1,c2);
588 mul_add_c(a[6],b[2],c3,c1,c2);
589 mul_add_c(a[5],b[3],c3,c1,c2);
590 mul_add_c(a[4],b[4],c3,c1,c2);
591 mul_add_c(a[3],b[5],c3,c1,c2);
592 mul_add_c(a[2],b[6],c3,c1,c2);
593 mul_add_c(a[1],b[7],c3,c1,c2);
594 r[8]=c3;
595 c3=0;
596 mul_add_c(a[2],b[7],c1,c2,c3);
597 mul_add_c(a[3],b[6],c1,c2,c3);
598 mul_add_c(a[4],b[5],c1,c2,c3);
599 mul_add_c(a[5],b[4],c1,c2,c3);
600 mul_add_c(a[6],b[3],c1,c2,c3);
601 mul_add_c(a[7],b[2],c1,c2,c3);
602 r[9]=c1;
603 c1=0;
604 mul_add_c(a[7],b[3],c2,c3,c1);
605 mul_add_c(a[6],b[4],c2,c3,c1);
606 mul_add_c(a[5],b[5],c2,c3,c1);
607 mul_add_c(a[4],b[6],c2,c3,c1);
608 mul_add_c(a[3],b[7],c2,c3,c1);
609 r[10]=c2;
610 c2=0;
611 mul_add_c(a[4],b[7],c3,c1,c2);
612 mul_add_c(a[5],b[6],c3,c1,c2);
613 mul_add_c(a[6],b[5],c3,c1,c2);
614 mul_add_c(a[7],b[4],c3,c1,c2);
615 r[11]=c3;
616 c3=0;
617 mul_add_c(a[7],b[5],c1,c2,c3);
618 mul_add_c(a[6],b[6],c1,c2,c3);
619 mul_add_c(a[5],b[7],c1,c2,c3);
620 r[12]=c1;
621 c1=0;
622 mul_add_c(a[6],b[7],c2,c3,c1);
623 mul_add_c(a[7],b[6],c2,c3,c1);
624 r[13]=c2;
625 c2=0;
626 mul_add_c(a[7],b[7],c3,c1,c2);
627 r[14]=c3;
628 r[15]=c1;
629 }
630
631void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
632 {
633#ifdef BN_LLONG
634 BN_ULLONG t;
635#else
636 BN_ULONG bl,bh;
637#endif
638 BN_ULONG t1,t2;
639 BN_ULONG c1,c2,c3;
640
641 c1=0;
642 c2=0;
643 c3=0;
644 mul_add_c(a[0],b[0],c1,c2,c3);
645 r[0]=c1;
646 c1=0;
647 mul_add_c(a[0],b[1],c2,c3,c1);
648 mul_add_c(a[1],b[0],c2,c3,c1);
649 r[1]=c2;
650 c2=0;
651 mul_add_c(a[2],b[0],c3,c1,c2);
652 mul_add_c(a[1],b[1],c3,c1,c2);
653 mul_add_c(a[0],b[2],c3,c1,c2);
654 r[2]=c3;
655 c3=0;
656 mul_add_c(a[0],b[3],c1,c2,c3);
657 mul_add_c(a[1],b[2],c1,c2,c3);
658 mul_add_c(a[2],b[1],c1,c2,c3);
659 mul_add_c(a[3],b[0],c1,c2,c3);
660 r[3]=c1;
661 c1=0;
662 mul_add_c(a[3],b[1],c2,c3,c1);
663 mul_add_c(a[2],b[2],c2,c3,c1);
664 mul_add_c(a[1],b[3],c2,c3,c1);
665 r[4]=c2;
666 c2=0;
667 mul_add_c(a[2],b[3],c3,c1,c2);
668 mul_add_c(a[3],b[2],c3,c1,c2);
669 r[5]=c3;
670 c3=0;
671 mul_add_c(a[3],b[3],c1,c2,c3);
672 r[6]=c1;
673 r[7]=c2;
674 }
675
676void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
677 {
678#ifdef BN_LLONG
679 BN_ULLONG t,tt;
680#else
681 BN_ULONG bl,bh;
682#endif
683 BN_ULONG t1,t2;
684 BN_ULONG c1,c2,c3;
685
686 c1=0;
687 c2=0;
688 c3=0;
689 sqr_add_c(a,0,c1,c2,c3);
690 r[0]=c1;
691 c1=0;
692 sqr_add_c2(a,1,0,c2,c3,c1);
693 r[1]=c2;
694 c2=0;
695 sqr_add_c(a,1,c3,c1,c2);
696 sqr_add_c2(a,2,0,c3,c1,c2);
697 r[2]=c3;
698 c3=0;
699 sqr_add_c2(a,3,0,c1,c2,c3);
700 sqr_add_c2(a,2,1,c1,c2,c3);
701 r[3]=c1;
702 c1=0;
703 sqr_add_c(a,2,c2,c3,c1);
704 sqr_add_c2(a,3,1,c2,c3,c1);
705 sqr_add_c2(a,4,0,c2,c3,c1);
706 r[4]=c2;
707 c2=0;
708 sqr_add_c2(a,5,0,c3,c1,c2);
709 sqr_add_c2(a,4,1,c3,c1,c2);
710 sqr_add_c2(a,3,2,c3,c1,c2);
711 r[5]=c3;
712 c3=0;
713 sqr_add_c(a,3,c1,c2,c3);
714 sqr_add_c2(a,4,2,c1,c2,c3);
715 sqr_add_c2(a,5,1,c1,c2,c3);
716 sqr_add_c2(a,6,0,c1,c2,c3);
717 r[6]=c1;
718 c1=0;
719 sqr_add_c2(a,7,0,c2,c3,c1);
720 sqr_add_c2(a,6,1,c2,c3,c1);
721 sqr_add_c2(a,5,2,c2,c3,c1);
722 sqr_add_c2(a,4,3,c2,c3,c1);
723 r[7]=c2;
724 c2=0;
725 sqr_add_c(a,4,c3,c1,c2);
726 sqr_add_c2(a,5,3,c3,c1,c2);
727 sqr_add_c2(a,6,2,c3,c1,c2);
728 sqr_add_c2(a,7,1,c3,c1,c2);
729 r[8]=c3;
730 c3=0;
731 sqr_add_c2(a,7,2,c1,c2,c3);
732 sqr_add_c2(a,6,3,c1,c2,c3);
733 sqr_add_c2(a,5,4,c1,c2,c3);
734 r[9]=c1;
735 c1=0;
736 sqr_add_c(a,5,c2,c3,c1);
737 sqr_add_c2(a,6,4,c2,c3,c1);
738 sqr_add_c2(a,7,3,c2,c3,c1);
739 r[10]=c2;
740 c2=0;
741 sqr_add_c2(a,7,4,c3,c1,c2);
742 sqr_add_c2(a,6,5,c3,c1,c2);
743 r[11]=c3;
744 c3=0;
745 sqr_add_c(a,6,c1,c2,c3);
746 sqr_add_c2(a,7,5,c1,c2,c3);
747 r[12]=c1;
748 c1=0;
749 sqr_add_c2(a,7,6,c2,c3,c1);
750 r[13]=c2;
751 c2=0;
752 sqr_add_c(a,7,c3,c1,c2);
753 r[14]=c3;
754 r[15]=c1;
755 }
756
757void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
758 {
759#ifdef BN_LLONG
760 BN_ULLONG t,tt;
761#else
762 BN_ULONG bl,bh;
763#endif
764 BN_ULONG t1,t2;
765 BN_ULONG c1,c2,c3;
766
767 c1=0;
768 c2=0;
769 c3=0;
770 sqr_add_c(a,0,c1,c2,c3);
771 r[0]=c1;
772 c1=0;
773 sqr_add_c2(a,1,0,c2,c3,c1);
774 r[1]=c2;
775 c2=0;
776 sqr_add_c(a,1,c3,c1,c2);
777 sqr_add_c2(a,2,0,c3,c1,c2);
778 r[2]=c3;
779 c3=0;
780 sqr_add_c2(a,3,0,c1,c2,c3);
781 sqr_add_c2(a,2,1,c1,c2,c3);
782 r[3]=c1;
783 c1=0;
784 sqr_add_c(a,2,c2,c3,c1);
785 sqr_add_c2(a,3,1,c2,c3,c1);
786 r[4]=c2;
787 c2=0;
788 sqr_add_c2(a,3,2,c3,c1,c2);
789 r[5]=c3;
790 c3=0;
791 sqr_add_c(a,3,c1,c2,c3);
792 r[6]=c1;
793 r[7]=c2;
794 }
795#else /* !BN_MUL_COMBA */
796
797/* hmm... is it faster just to do a multiply? */
798#undef bn_sqr_comba4
799void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
800 {
801 BN_ULONG t[8];
802 bn_sqr_normal(r,a,4,t);
803 }
804
805#undef bn_sqr_comba8
806void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
807 {
808 BN_ULONG t[16];
809 bn_sqr_normal(r,a,8,t);
810 }
811
812void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
813 {
814 r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
815 r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
816 r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
817 r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
818 }
819
820void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
821 {
822 r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
823 r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
824 r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
825 r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
826 r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
827 r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
828 r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
829 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
830 }
831
832#endif /* !BN_MUL_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
deleted file mode 100644
index 2d287e6d1b..0000000000
--- a/src/lib/libcrypto/bn/bn_blind.c
+++ /dev/null
@@ -1,144 +0,0 @@
1/* crypto/bn/bn_blind.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63BN_BLINDING *BN_BLINDING_new(BIGNUM *A, BIGNUM *Ai, BIGNUM *mod)
64 {
65 BN_BLINDING *ret=NULL;
66
67 bn_check_top(Ai);
68 bn_check_top(mod);
69
70 if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL)
71 {
72 BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
73 return(NULL);
74 }
75 memset(ret,0,sizeof(BN_BLINDING));
76 if ((ret->A=BN_new()) == NULL) goto err;
77 if ((ret->Ai=BN_new()) == NULL) goto err;
78 if (!BN_copy(ret->A,A)) goto err;
79 if (!BN_copy(ret->Ai,Ai)) goto err;
80 ret->mod=mod;
81 return(ret);
82err:
83 if (ret != NULL) BN_BLINDING_free(ret);
84 return(NULL);
85 }
86
87void BN_BLINDING_free(BN_BLINDING *r)
88 {
89 if(r == NULL)
90 return;
91
92 if (r->A != NULL) BN_free(r->A );
93 if (r->Ai != NULL) BN_free(r->Ai);
94 OPENSSL_free(r);
95 }
96
97int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
98 {
99 int ret=0;
100
101 if ((b->A == NULL) || (b->Ai == NULL))
102 {
103 BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED);
104 goto err;
105 }
106
107 if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err;
108 if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err;
109
110 ret=1;
111err:
112 return(ret);
113 }
114
115int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
116 {
117 bn_check_top(n);
118
119 if ((b->A == NULL) || (b->Ai == NULL))
120 {
121 BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITIALIZED);
122 return(0);
123 }
124 return(BN_mod_mul(n,n,b->A,b->mod,ctx));
125 }
126
127int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
128 {
129 int ret;
130
131 bn_check_top(n);
132 if ((b->A == NULL) || (b->Ai == NULL))
133 {
134 BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITIALIZED);
135 return(0);
136 }
137 if ((ret=BN_mod_mul(n,n,b->Ai,b->mod,ctx)) >= 0)
138 {
139 if (!BN_BLINDING_update(b,ctx))
140 return(0);
141 }
142 return(ret);
143 }
144
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
deleted file mode 100644
index 7daf19eb84..0000000000
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/* crypto/bn/bn_ctx.c */
2/* Written by Ulf Moeller for the OpenSSL project. */
3/* ====================================================================
4 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * 3. All advertising materials mentioning features or use of this
19 * software must display the following acknowledgment:
20 * "This product includes software developed by the OpenSSL Project
21 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
22 *
23 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
24 * endorse or promote products derived from this software without
25 * prior written permission. For written permission, please contact
26 * openssl-core@openssl.org.
27 *
28 * 5. Products derived from this software may not be called "OpenSSL"
29 * nor may "OpenSSL" appear in their names without prior written
30 * permission of the OpenSSL Project.
31 *
32 * 6. Redistributions of any form whatsoever must retain the following
33 * acknowledgment:
34 * "This product includes software developed by the OpenSSL Project
35 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
38 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
40 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
43 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
44 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
46 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
47 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
48 * OF THE POSSIBILITY OF SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This product includes cryptographic software written by Eric Young
52 * (eay@cryptsoft.com). This product includes software written by Tim
53 * Hudson (tjh@cryptsoft.com).
54 *
55 */
56
57#ifndef BN_CTX_DEBUG
58# undef NDEBUG /* avoid conflicting definitions */
59# define NDEBUG
60#endif
61
62#include <stdio.h>
63#include <assert.h>
64
65#include "cryptlib.h"
66#include "bn_lcl.h"
67
68
69BN_CTX *BN_CTX_new(void)
70 {
71 BN_CTX *ret;
72
73 ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX));
74 if (ret == NULL)
75 {
76 BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
77 return(NULL);
78 }
79
80 BN_CTX_init(ret);
81 ret->flags=BN_FLG_MALLOCED;
82 return(ret);
83 }
84
85void BN_CTX_init(BN_CTX *ctx)
86 {
87#if 0 /* explicit version */
88 int i;
89 ctx->tos = 0;
90 ctx->flags = 0;
91 ctx->depth = 0;
92 ctx->too_many = 0;
93 for (i = 0; i < BN_CTX_NUM; i++)
94 BN_init(&(ctx->bn[i]));
95#else
96 memset(ctx, 0, sizeof *ctx);
97#endif
98 }
99
100void BN_CTX_free(BN_CTX *ctx)
101 {
102 int i;
103
104 if (ctx == NULL) return;
105 assert(ctx->depth == 0);
106
107 for (i=0; i < BN_CTX_NUM; i++)
108 BN_clear_free(&(ctx->bn[i]));
109 if (ctx->flags & BN_FLG_MALLOCED)
110 OPENSSL_free(ctx);
111 }
112
113void BN_CTX_start(BN_CTX *ctx)
114 {
115 if (ctx->depth < BN_CTX_NUM_POS)
116 ctx->pos[ctx->depth] = ctx->tos;
117 ctx->depth++;
118 }
119
120
121BIGNUM *BN_CTX_get(BN_CTX *ctx)
122 {
123 /* Note: If BN_CTX_get is ever changed to allocate BIGNUMs dynamically,
124 * make sure that if BN_CTX_get fails once it will return NULL again
125 * until BN_CTX_end is called. (This is so that callers have to check
126 * only the last return value.)
127 */
128 if (ctx->depth > BN_CTX_NUM_POS || ctx->tos >= BN_CTX_NUM)
129 {
130 if (!ctx->too_many)
131 {
132 BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
133 /* disable error code until BN_CTX_end is called: */
134 ctx->too_many = 1;
135 }
136 return NULL;
137 }
138 return (&(ctx->bn[ctx->tos++]));
139 }
140
141void BN_CTX_end(BN_CTX *ctx)
142 {
143 if (ctx == NULL) return;
144 assert(ctx->depth > 0);
145 if (ctx->depth == 0)
146 /* should never happen, but we can tolerate it if not in
147 * debug mode (could be a 'goto err' in the calling function
148 * before BN_CTX_start was reached) */
149 BN_CTX_start(ctx);
150
151 ctx->too_many = 0;
152 ctx->depth--;
153 if (ctx->depth < BN_CTX_NUM_POS)
154 ctx->tos = ctx->pos[ctx->depth];
155 }
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
deleted file mode 100644
index 580d1201bc..0000000000
--- a/src/lib/libcrypto/bn/bn_div.c
+++ /dev/null
@@ -1,387 +0,0 @@
1/* crypto/bn/bn_div.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <openssl/bn.h>
61#include "cryptlib.h"
62#include "bn_lcl.h"
63
64
65/* The old slow way */
66#if 0
67int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
68 BN_CTX *ctx)
69 {
70 int i,nm,nd;
71 int ret = 0;
72 BIGNUM *D;
73
74 bn_check_top(m);
75 bn_check_top(d);
76 if (BN_is_zero(d))
77 {
78 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
79 return(0);
80 }
81
82 if (BN_ucmp(m,d) < 0)
83 {
84 if (rem != NULL)
85 { if (BN_copy(rem,m) == NULL) return(0); }
86 if (dv != NULL) BN_zero(dv);
87 return(1);
88 }
89
90 BN_CTX_start(ctx);
91 D = BN_CTX_get(ctx);
92 if (dv == NULL) dv = BN_CTX_get(ctx);
93 if (rem == NULL) rem = BN_CTX_get(ctx);
94 if (D == NULL || dv == NULL || rem == NULL)
95 goto end;
96
97 nd=BN_num_bits(d);
98 nm=BN_num_bits(m);
99 if (BN_copy(D,d) == NULL) goto end;
100 if (BN_copy(rem,m) == NULL) goto end;
101
102 /* The next 2 are needed so we can do a dv->d[0]|=1 later
103 * since BN_lshift1 will only work once there is a value :-) */
104 BN_zero(dv);
105 bn_wexpand(dv,1);
106 dv->top=1;
107
108 if (!BN_lshift(D,D,nm-nd)) goto end;
109 for (i=nm-nd; i>=0; i--)
110 {
111 if (!BN_lshift1(dv,dv)) goto end;
112 if (BN_ucmp(rem,D) >= 0)
113 {
114 dv->d[0]|=1;
115 if (!BN_usub(rem,rem,D)) goto end;
116 }
117/* CAN IMPROVE (and have now :=) */
118 if (!BN_rshift1(D,D)) goto end;
119 }
120 rem->neg=BN_is_zero(rem)?0:m->neg;
121 dv->neg=m->neg^d->neg;
122 ret = 1;
123 end:
124 BN_CTX_end(ctx);
125 return(ret);
126 }
127
128#else
129
130#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
131 && !defined(PEDANTIC) && !defined(BN_DIV3W)
132# if defined(__GNUC__) && __GNUC__>=2
133# if defined(__i386) || defined (__i386__)
134 /*
135 * There were two reasons for implementing this template:
136 * - GNU C generates a call to a function (__udivdi3 to be exact)
137 * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
138 * understand why...);
139 * - divl doesn't only calculate quotient, but also leaves
140 * remainder in %edx which we can definitely use here:-)
141 *
142 * <appro@fy.chalmers.se>
143 */
144# define bn_div_words(n0,n1,d0) \
145 ({ asm volatile ( \
146 "divl %4" \
147 : "=a"(q), "=d"(rem) \
148 : "a"(n1), "d"(n0), "g"(d0) \
149 : "cc"); \
150 q; \
151 })
152# define REMAINDER_IS_ALREADY_CALCULATED
153# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
154 /*
155 * Same story here, but it's 128-bit by 64-bit division. Wow!
156 * <appro@fy.chalmers.se>
157 */
158# define bn_div_words(n0,n1,d0) \
159 ({ asm volatile ( \
160 "divq %4" \
161 : "=a"(q), "=d"(rem) \
162 : "a"(n1), "d"(n0), "g"(d0) \
163 : "cc"); \
164 q; \
165 })
166# define REMAINDER_IS_ALREADY_CALCULATED
167# endif /* __<cpu> */
168# endif /* __GNUC__ */
169#endif /* OPENSSL_NO_ASM */
170
171
172/* BN_div computes dv := num / divisor, rounding towards zero, and sets up
173 * rm such that dv*divisor + rm = num holds.
174 * Thus:
175 * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
176 * rm->neg == num->neg (unless the remainder is zero)
177 * If 'dv' or 'rm' is NULL, the respective value is not returned.
178 */
179int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
180 BN_CTX *ctx)
181 {
182 int norm_shift,i,j,loop;
183 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
184 BN_ULONG *resp,*wnump;
185 BN_ULONG d0,d1;
186 int num_n,div_n;
187
188 bn_check_top(num);
189 bn_check_top(divisor);
190
191 if (BN_is_zero(divisor))
192 {
193 BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
194 return(0);
195 }
196
197 if (BN_ucmp(num,divisor) < 0)
198 {
199 if (rm != NULL)
200 { if (BN_copy(rm,num) == NULL) return(0); }
201 if (dv != NULL) BN_zero(dv);
202 return(1);
203 }
204
205 BN_CTX_start(ctx);
206 tmp=BN_CTX_get(ctx);
207 snum=BN_CTX_get(ctx);
208 sdiv=BN_CTX_get(ctx);
209 if (dv == NULL)
210 res=BN_CTX_get(ctx);
211 else res=dv;
212 if (sdiv == NULL || res == NULL) goto err;
213 tmp->neg=0;
214
215 /* First we normalise the numbers */
216 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
217 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
218 sdiv->neg=0;
219 norm_shift+=BN_BITS2;
220 if (!(BN_lshift(snum,num,norm_shift))) goto err;
221 snum->neg=0;
222 div_n=sdiv->top;
223 num_n=snum->top;
224 loop=num_n-div_n;
225
226 /* Lets setup a 'window' into snum
227 * This is the part that corresponds to the current
228 * 'area' being divided */
229 BN_init(&wnum);
230 wnum.d= &(snum->d[loop]);
231 wnum.top= div_n;
232 wnum.dmax= snum->dmax+1; /* a bit of a lie */
233
234 /* Get the top 2 words of sdiv */
235 /* i=sdiv->top; */
236 d0=sdiv->d[div_n-1];
237 d1=(div_n == 1)?0:sdiv->d[div_n-2];
238
239 /* pointer to the 'top' of snum */
240 wnump= &(snum->d[num_n-1]);
241
242 /* Setup to 'res' */
243 res->neg= (num->neg^divisor->neg);
244 if (!bn_wexpand(res,(loop+1))) goto err;
245 res->top=loop;
246 resp= &(res->d[loop-1]);
247
248 /* space for temp */
249 if (!bn_wexpand(tmp,(div_n+1))) goto err;
250
251 if (BN_ucmp(&wnum,sdiv) >= 0)
252 {
253 if (!BN_usub(&wnum,&wnum,sdiv)) goto err;
254 *resp=1;
255 res->d[res->top-1]=1;
256 }
257 else
258 res->top--;
259 if (res->top == 0)
260 res->neg = 0;
261 resp--;
262
263 for (i=0; i<loop-1; i++)
264 {
265 BN_ULONG q,l0;
266#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
267 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
268 q=bn_div_3_words(wnump,d1,d0);
269#else
270 BN_ULONG n0,n1,rem=0;
271
272 n0=wnump[0];
273 n1=wnump[-1];
274 if (n0 == d0)
275 q=BN_MASK2;
276 else /* n0 < d0 */
277 {
278#ifdef BN_LLONG
279 BN_ULLONG t2;
280
281#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
282 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
283#else
284 q=bn_div_words(n0,n1,d0);
285#ifdef BN_DEBUG_LEVITTE
286 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
287X) -> 0x%08X\n",
288 n0, n1, d0, q);
289#endif
290#endif
291
292#ifndef REMAINDER_IS_ALREADY_CALCULATED
293 /*
294 * rem doesn't have to be BN_ULLONG. The least we
295 * know it's less that d0, isn't it?
296 */
297 rem=(n1-q*d0)&BN_MASK2;
298#endif
299 t2=(BN_ULLONG)d1*q;
300
301 for (;;)
302 {
303 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
304 break;
305 q--;
306 rem += d0;
307 if (rem < d0) break; /* don't let rem overflow */
308 t2 -= d1;
309 }
310#else /* !BN_LLONG */
311 BN_ULONG t2l,t2h,ql,qh;
312
313 q=bn_div_words(n0,n1,d0);
314#ifdef BN_DEBUG_LEVITTE
315 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
316X) -> 0x%08X\n",
317 n0, n1, d0, q);
318#endif
319#ifndef REMAINDER_IS_ALREADY_CALCULATED
320 rem=(n1-q*d0)&BN_MASK2;
321#endif
322
323#if defined(BN_UMULT_LOHI)
324 BN_UMULT_LOHI(t2l,t2h,d1,q);
325#elif defined(BN_UMULT_HIGH)
326 t2l = d1 * q;
327 t2h = BN_UMULT_HIGH(d1,q);
328#else
329 t2l=LBITS(d1); t2h=HBITS(d1);
330 ql =LBITS(q); qh =HBITS(q);
331 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
332#endif
333
334 for (;;)
335 {
336 if ((t2h < rem) ||
337 ((t2h == rem) && (t2l <= wnump[-2])))
338 break;
339 q--;
340 rem += d0;
341 if (rem < d0) break; /* don't let rem overflow */
342 if (t2l < d1) t2h--; t2l -= d1;
343 }
344#endif /* !BN_LLONG */
345 }
346#endif /* !BN_DIV3W */
347
348 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
349 wnum.d--; wnum.top++;
350 tmp->d[div_n]=l0;
351 for (j=div_n+1; j>0; j--)
352 if (tmp->d[j-1]) break;
353 tmp->top=j;
354
355 j=wnum.top;
356 if (!BN_sub(&wnum,&wnum,tmp)) goto err;
357
358 snum->top=snum->top+wnum.top-j;
359
360 if (wnum.neg)
361 {
362 q--;
363 j=wnum.top;
364 if (!BN_add(&wnum,&wnum,sdiv)) goto err;
365 snum->top+=wnum.top-j;
366 }
367 *(resp--)=q;
368 wnump--;
369 }
370 if (rm != NULL)
371 {
372 /* Keep a copy of the neg flag in num because if rm==num
373 * BN_rshift() will overwrite it.
374 */
375 int neg = num->neg;
376 BN_rshift(rm,snum,norm_shift);
377 if (!BN_is_zero(rm))
378 rm->neg = neg;
379 }
380 BN_CTX_end(ctx);
381 return(1);
382err:
383 BN_CTX_end(ctx);
384 return(0);
385 }
386
387#endif
diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c
deleted file mode 100644
index fb84ee96d8..0000000000
--- a/src/lib/libcrypto/bn/bn_err.c
+++ /dev/null
@@ -1,131 +0,0 @@
1/* crypto/bn/bn_err.c */
2/* ====================================================================
3 * Copyright (c) 1999 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@OpenSSL.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56/* NOTE: this file was auto generated by the mkerr.pl script: any changes
57 * made to it will be overwritten when the script next updates this file,
58 * only reason strings will be preserved.
59 */
60
61#include <stdio.h>
62#include <openssl/err.h>
63#include <openssl/bn.h>
64
65/* BEGIN ERROR CODES */
66#ifndef OPENSSL_NO_ERR
67static ERR_STRING_DATA BN_str_functs[]=
68 {
69{ERR_PACK(0,BN_F_BN_BLINDING_CONVERT,0), "BN_BLINDING_convert"},
70{ERR_PACK(0,BN_F_BN_BLINDING_INVERT,0), "BN_BLINDING_invert"},
71{ERR_PACK(0,BN_F_BN_BLINDING_NEW,0), "BN_BLINDING_new"},
72{ERR_PACK(0,BN_F_BN_BLINDING_UPDATE,0), "BN_BLINDING_update"},
73{ERR_PACK(0,BN_F_BN_BN2DEC,0), "BN_bn2dec"},
74{ERR_PACK(0,BN_F_BN_BN2HEX,0), "BN_bn2hex"},
75{ERR_PACK(0,BN_F_BN_CTX_GET,0), "BN_CTX_get"},
76{ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"},
77{ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"},
78{ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"},
79{ERR_PACK(0,BN_F_BN_EXPAND_INTERNAL,0), "BN_EXPAND_INTERNAL"},
80{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"},
81{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"},
82{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"},
83{ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"},
84{ERR_PACK(0,BN_F_BN_MOD_LSHIFT_QUICK,0), "BN_mod_lshift_quick"},
85{ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"},
86{ERR_PACK(0,BN_F_BN_MOD_SQRT,0), "BN_mod_sqrt"},
87{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"},
88{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"},
89{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"},
90{ERR_PACK(0,BN_F_BN_RAND_RANGE,0), "BN_rand_range"},
91{ERR_PACK(0,BN_F_BN_USUB,0), "BN_usub"},
92{0,NULL}
93 };
94
95static ERR_STRING_DATA BN_str_reasons[]=
96 {
97{BN_R_ARG2_LT_ARG3 ,"arg2 lt arg3"},
98{BN_R_BAD_RECIPROCAL ,"bad reciprocal"},
99{BN_R_BIGNUM_TOO_LONG ,"bignum too long"},
100{BN_R_CALLED_WITH_EVEN_MODULUS ,"called with even modulus"},
101{BN_R_DIV_BY_ZERO ,"div by zero"},
102{BN_R_ENCODING_ERROR ,"encoding error"},
103{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"},
104{BN_R_INPUT_NOT_REDUCED ,"input not reduced"},
105{BN_R_INVALID_LENGTH ,"invalid length"},
106{BN_R_INVALID_RANGE ,"invalid range"},
107{BN_R_NOT_A_SQUARE ,"not a square"},
108{BN_R_NOT_INITIALIZED ,"not initialized"},
109{BN_R_NO_INVERSE ,"no inverse"},
110{BN_R_P_IS_NOT_PRIME ,"p is not prime"},
111{BN_R_TOO_MANY_ITERATIONS ,"too many iterations"},
112{BN_R_TOO_MANY_TEMPORARY_VARIABLES ,"too many temporary variables"},
113{0,NULL}
114 };
115
116#endif
117
118void ERR_load_BN_strings(void)
119 {
120 static int init=1;
121
122 if (init)
123 {
124 init=0;
125#ifndef OPENSSL_NO_ERR
126 ERR_load_strings(ERR_LIB_BN,BN_str_functs);
127 ERR_load_strings(ERR_LIB_BN,BN_str_reasons);
128#endif
129
130 }
131 }
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
deleted file mode 100644
index afdfd580fb..0000000000
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ /dev/null
@@ -1,747 +0,0 @@
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define TABLE_SIZE 32
117
118/* this one works - simple but works */
119int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
120 {
121 int i,bits,ret=0;
122 BIGNUM *v,*rr;
123
124 BN_CTX_start(ctx);
125 if ((r == a) || (r == p))
126 rr = BN_CTX_get(ctx);
127 else
128 rr = r;
129 if ((v = BN_CTX_get(ctx)) == NULL) goto err;
130
131 if (BN_copy(v,a) == NULL) goto err;
132 bits=BN_num_bits(p);
133
134 if (BN_is_odd(p))
135 { if (BN_copy(rr,a) == NULL) goto err; }
136 else { if (!BN_one(rr)) goto err; }
137
138 for (i=1; i<bits; i++)
139 {
140 if (!BN_sqr(v,v,ctx)) goto err;
141 if (BN_is_bit_set(p,i))
142 {
143 if (!BN_mul(rr,rr,v,ctx)) goto err;
144 }
145 }
146 ret=1;
147err:
148 if (r != rr) BN_copy(r,rr);
149 BN_CTX_end(ctx);
150 return(ret);
151 }
152
153
154int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
155 BN_CTX *ctx)
156 {
157 int ret;
158
159 bn_check_top(a);
160 bn_check_top(p);
161 bn_check_top(m);
162
163 /* For even modulus m = 2^k*m_odd, it might make sense to compute
164 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
165 * exponentiation for the odd part), using appropriate exponent
166 * reductions, and combine the results using the CRT.
167 *
168 * For now, we use Montgomery only if the modulus is odd; otherwise,
169 * exponentiation using the reciprocal-based quick remaindering
170 * algorithm is used.
171 *
172 * (Timing obtained with expspeed.c [computations a^p mod m
173 * where a, p, m are of the same length: 256, 512, 1024, 2048,
174 * 4096, 8192 bits], compared to the running time of the
175 * standard algorithm:
176 *
177 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
178 * 55 .. 77 % [UltraSparc processor, but
179 * debug-solaris-sparcv8-gcc conf.]
180 *
181 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
182 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
183 *
184 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
185 * at 2048 and more bits, but at 512 and 1024 bits, it was
186 * slower even than the standard algorithm!
187 *
188 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
189 * should be obtained when the new Montgomery reduction code
190 * has been integrated into OpenSSL.)
191 */
192
193#define MONT_MUL_MOD
194#define MONT_EXP_WORD
195#define RECP_MUL_MOD
196
197#ifdef MONT_MUL_MOD
198 /* I have finally been able to take out this pre-condition of
199 * the top bit being set. It was caused by an error in BN_div
200 * with negatives. There was also another problem when for a^b%m
201 * a >= m. eay 07-May-97 */
202/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
203
204 if (BN_is_odd(m))
205 {
206# ifdef MONT_EXP_WORD
207 if (a->top == 1 && !a->neg)
208 {
209 BN_ULONG A = a->d[0];
210 ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
211 }
212 else
213# endif
214 ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
215 }
216 else
217#endif
218#ifdef RECP_MUL_MOD
219 { ret=BN_mod_exp_recp(r,a,p,m,ctx); }
220#else
221 { ret=BN_mod_exp_simple(r,a,p,m,ctx); }
222#endif
223
224 return(ret);
225 }
226
227
228int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
229 const BIGNUM *m, BN_CTX *ctx)
230 {
231 int i,j,bits,ret=0,wstart,wend,window,wvalue;
232 int start=1,ts=0;
233 BIGNUM *aa;
234 BIGNUM val[TABLE_SIZE];
235 BN_RECP_CTX recp;
236
237 bits=BN_num_bits(p);
238
239 if (bits == 0)
240 {
241 ret = BN_one(r);
242 return ret;
243 }
244
245 BN_CTX_start(ctx);
246 if ((aa = BN_CTX_get(ctx)) == NULL) goto err;
247
248 BN_RECP_CTX_init(&recp);
249 if (m->neg)
250 {
251 /* ignore sign of 'm' */
252 if (!BN_copy(aa, m)) goto err;
253 aa->neg = 0;
254 if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
255 }
256 else
257 {
258 if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
259 }
260
261 BN_init(&(val[0]));
262 ts=1;
263
264 if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */
265 if (BN_is_zero(&(val[0])))
266 {
267 ret = BN_zero(r);
268 goto err;
269 }
270
271 window = BN_window_bits_for_exponent_size(bits);
272 if (window > 1)
273 {
274 if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
275 goto err; /* 2 */
276 j=1<<(window-1);
277 for (i=1; i<j; i++)
278 {
279 BN_init(&val[i]);
280 if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
281 goto err;
282 }
283 ts=i;
284 }
285
286 start=1; /* This is used to avoid multiplication etc
287 * when there is only the value '1' in the
288 * buffer. */
289 wvalue=0; /* The 'value' of the window */
290 wstart=bits-1; /* The top bit of the window */
291 wend=0; /* The bottom bit of the window */
292
293 if (!BN_one(r)) goto err;
294
295 for (;;)
296 {
297 if (BN_is_bit_set(p,wstart) == 0)
298 {
299 if (!start)
300 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
301 goto err;
302 if (wstart == 0) break;
303 wstart--;
304 continue;
305 }
306 /* We now have wstart on a 'set' bit, we now need to work out
307 * how bit a window to do. To do this we need to scan
308 * forward until the last set bit before the end of the
309 * window */
310 j=wstart;
311 wvalue=1;
312 wend=0;
313 for (i=1; i<window; i++)
314 {
315 if (wstart-i < 0) break;
316 if (BN_is_bit_set(p,wstart-i))
317 {
318 wvalue<<=(i-wend);
319 wvalue|=1;
320 wend=i;
321 }
322 }
323
324 /* wend is the size of the current window */
325 j=wend+1;
326 /* add the 'bytes above' */
327 if (!start)
328 for (i=0; i<j; i++)
329 {
330 if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
331 goto err;
332 }
333
334 /* wvalue will be an odd number < 2^window */
335 if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
336 goto err;
337
338 /* move the 'window' down further */
339 wstart-=wend+1;
340 wvalue=0;
341 start=0;
342 if (wstart < 0) break;
343 }
344 ret=1;
345err:
346 BN_CTX_end(ctx);
347 for (i=0; i<ts; i++)
348 BN_clear_free(&(val[i]));
349 BN_RECP_CTX_free(&recp);
350 return(ret);
351 }
352
353
354int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
355 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
356 {
357 int i,j,bits,ret=0,wstart,wend,window,wvalue;
358 int start=1,ts=0;
359 BIGNUM *d,*r;
360 const BIGNUM *aa;
361 BIGNUM val[TABLE_SIZE];
362 BN_MONT_CTX *mont=NULL;
363
364 bn_check_top(a);
365 bn_check_top(p);
366 bn_check_top(m);
367
368 if (!(m->d[0] & 1))
369 {
370 BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
371 return(0);
372 }
373 bits=BN_num_bits(p);
374 if (bits == 0)
375 {
376 ret = BN_one(rr);
377 return ret;
378 }
379
380 BN_CTX_start(ctx);
381 d = BN_CTX_get(ctx);
382 r = BN_CTX_get(ctx);
383 if (d == NULL || r == NULL) goto err;
384
385 /* If this is not done, things will break in the montgomery
386 * part */
387
388 if (in_mont != NULL)
389 mont=in_mont;
390 else
391 {
392 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
393 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
394 }
395
396 BN_init(&val[0]);
397 ts=1;
398 if (a->neg || BN_ucmp(a,m) >= 0)
399 {
400 if (!BN_nnmod(&(val[0]),a,m,ctx))
401 goto err;
402 aa= &(val[0]);
403 }
404 else
405 aa=a;
406 if (BN_is_zero(aa))
407 {
408 ret = BN_zero(rr);
409 goto err;
410 }
411 if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
412
413 window = BN_window_bits_for_exponent_size(bits);
414 if (window > 1)
415 {
416 if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
417 j=1<<(window-1);
418 for (i=1; i<j; i++)
419 {
420 BN_init(&(val[i]));
421 if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
422 goto err;
423 }
424 ts=i;
425 }
426
427 start=1; /* This is used to avoid multiplication etc
428 * when there is only the value '1' in the
429 * buffer. */
430 wvalue=0; /* The 'value' of the window */
431 wstart=bits-1; /* The top bit of the window */
432 wend=0; /* The bottom bit of the window */
433
434 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
435 for (;;)
436 {
437 if (BN_is_bit_set(p,wstart) == 0)
438 {
439 if (!start)
440 {
441 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
442 goto err;
443 }
444 if (wstart == 0) break;
445 wstart--;
446 continue;
447 }
448 /* We now have wstart on a 'set' bit, we now need to work out
449 * how bit a window to do. To do this we need to scan
450 * forward until the last set bit before the end of the
451 * window */
452 j=wstart;
453 wvalue=1;
454 wend=0;
455 for (i=1; i<window; i++)
456 {
457 if (wstart-i < 0) break;
458 if (BN_is_bit_set(p,wstart-i))
459 {
460 wvalue<<=(i-wend);
461 wvalue|=1;
462 wend=i;
463 }
464 }
465
466 /* wend is the size of the current window */
467 j=wend+1;
468 /* add the 'bytes above' */
469 if (!start)
470 for (i=0; i<j; i++)
471 {
472 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
473 goto err;
474 }
475
476 /* wvalue will be an odd number < 2^window */
477 if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
478 goto err;
479
480 /* move the 'window' down further */
481 wstart-=wend+1;
482 wvalue=0;
483 start=0;
484 if (wstart < 0) break;
485 }
486 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
487 ret=1;
488err:
489 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
490 BN_CTX_end(ctx);
491 for (i=0; i<ts; i++)
492 BN_clear_free(&(val[i]));
493 return(ret);
494 }
495
496int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
497 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
498 {
499 BN_MONT_CTX *mont = NULL;
500 int b, bits, ret=0;
501 int r_is_one;
502 BN_ULONG w, next_w;
503 BIGNUM *d, *r, *t;
504 BIGNUM *swap_tmp;
505#define BN_MOD_MUL_WORD(r, w, m) \
506 (BN_mul_word(r, (w)) && \
507 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
508 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
509 /* BN_MOD_MUL_WORD is only used with 'w' large,
510 * so the BN_ucmp test is probably more overhead
511 * than always using BN_mod (which uses BN_copy if
512 * a similar test returns true). */
513 /* We can use BN_mod and do not need BN_nnmod because our
514 * accumulator is never negative (the result of BN_mod does
515 * not depend on the sign of the modulus).
516 */
517#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
518 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
519
520 bn_check_top(p);
521 bn_check_top(m);
522
523 if (m->top == 0 || !(m->d[0] & 1))
524 {
525 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
526 return(0);
527 }
528 if (m->top == 1)
529 a %= m->d[0]; /* make sure that 'a' is reduced */
530
531 bits = BN_num_bits(p);
532 if (bits == 0)
533 {
534 ret = BN_one(rr);
535 return ret;
536 }
537 if (a == 0)
538 {
539 ret = BN_zero(rr);
540 return ret;
541 }
542
543 BN_CTX_start(ctx);
544 d = BN_CTX_get(ctx);
545 r = BN_CTX_get(ctx);
546 t = BN_CTX_get(ctx);
547 if (d == NULL || r == NULL || t == NULL) goto err;
548
549 if (in_mont != NULL)
550 mont=in_mont;
551 else
552 {
553 if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
554 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
555 }
556
557 r_is_one = 1; /* except for Montgomery factor */
558
559 /* bits-1 >= 0 */
560
561 /* The result is accumulated in the product r*w. */
562 w = a; /* bit 'bits-1' of 'p' is always set */
563 for (b = bits-2; b >= 0; b--)
564 {
565 /* First, square r*w. */
566 next_w = w*w;
567 if ((next_w/w) != w) /* overflow */
568 {
569 if (r_is_one)
570 {
571 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
572 r_is_one = 0;
573 }
574 else
575 {
576 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
577 }
578 next_w = 1;
579 }
580 w = next_w;
581 if (!r_is_one)
582 {
583 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
584 }
585
586 /* Second, multiply r*w by 'a' if exponent bit is set. */
587 if (BN_is_bit_set(p, b))
588 {
589 next_w = w*a;
590 if ((next_w/a) != w) /* overflow */
591 {
592 if (r_is_one)
593 {
594 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
595 r_is_one = 0;
596 }
597 else
598 {
599 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
600 }
601 next_w = a;
602 }
603 w = next_w;
604 }
605 }
606
607 /* Finally, set r:=r*w. */
608 if (w != 1)
609 {
610 if (r_is_one)
611 {
612 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
613 r_is_one = 0;
614 }
615 else
616 {
617 if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
618 }
619 }
620
621 if (r_is_one) /* can happen only if a == 1*/
622 {
623 if (!BN_one(rr)) goto err;
624 }
625 else
626 {
627 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
628 }
629 ret = 1;
630err:
631 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
632 BN_CTX_end(ctx);
633 return(ret);
634 }
635
636
637/* The old fallback, simple version :-) */
638int BN_mod_exp_simple(BIGNUM *r,
639 const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
640 BN_CTX *ctx)
641 {
642 int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
643 int start=1;
644 BIGNUM *d;
645 BIGNUM val[TABLE_SIZE];
646
647 bits=BN_num_bits(p);
648
649 if (bits == 0)
650 {
651 ret = BN_one(r);
652 return ret;
653 }
654
655 BN_CTX_start(ctx);
656 if ((d = BN_CTX_get(ctx)) == NULL) goto err;
657
658 BN_init(&(val[0]));
659 ts=1;
660 if (!BN_nnmod(&(val[0]),a,m,ctx)) goto err; /* 1 */
661 if (BN_is_zero(&(val[0])))
662 {
663 ret = BN_zero(r);
664 goto err;
665 }
666
667 window = BN_window_bits_for_exponent_size(bits);
668 if (window > 1)
669 {
670 if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
671 goto err; /* 2 */
672 j=1<<(window-1);
673 for (i=1; i<j; i++)
674 {
675 BN_init(&(val[i]));
676 if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
677 goto err;
678 }
679 ts=i;
680 }
681
682 start=1; /* This is used to avoid multiplication etc
683 * when there is only the value '1' in the
684 * buffer. */
685 wvalue=0; /* The 'value' of the window */
686 wstart=bits-1; /* The top bit of the window */
687 wend=0; /* The bottom bit of the window */
688
689 if (!BN_one(r)) goto err;
690
691 for (;;)
692 {
693 if (BN_is_bit_set(p,wstart) == 0)
694 {
695 if (!start)
696 if (!BN_mod_mul(r,r,r,m,ctx))
697 goto err;
698 if (wstart == 0) break;
699 wstart--;
700 continue;
701 }
702 /* We now have wstart on a 'set' bit, we now need to work out
703 * how bit a window to do. To do this we need to scan
704 * forward until the last set bit before the end of the
705 * window */
706 j=wstart;
707 wvalue=1;
708 wend=0;
709 for (i=1; i<window; i++)
710 {
711 if (wstart-i < 0) break;
712 if (BN_is_bit_set(p,wstart-i))
713 {
714 wvalue<<=(i-wend);
715 wvalue|=1;
716 wend=i;
717 }
718 }
719
720 /* wend is the size of the current window */
721 j=wend+1;
722 /* add the 'bytes above' */
723 if (!start)
724 for (i=0; i<j; i++)
725 {
726 if (!BN_mod_mul(r,r,r,m,ctx))
727 goto err;
728 }
729
730 /* wvalue will be an odd number < 2^window */
731 if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx))
732 goto err;
733
734 /* move the 'window' down further */
735 wstart-=wend+1;
736 wvalue=0;
737 start=0;
738 if (wstart < 0) break;
739 }
740 ret=1;
741err:
742 BN_CTX_end(ctx);
743 for (i=0; i<ts; i++)
744 BN_clear_free(&(val[i]));
745 return(ret);
746 }
747
diff --git a/src/lib/libcrypto/bn/bn_exp2.c b/src/lib/libcrypto/bn/bn_exp2.c
deleted file mode 100644
index 73ccd58a83..0000000000
--- a/src/lib/libcrypto/bn/bn_exp2.c
+++ /dev/null
@@ -1,313 +0,0 @@
1/* crypto/bn/bn_exp2.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include "cryptlib.h"
114#include "bn_lcl.h"
115
116#define TABLE_SIZE 32
117
118int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
119 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
120 BN_CTX *ctx, BN_MONT_CTX *in_mont)
121 {
122 int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
123 int r_is_one=1,ts1=0,ts2=0;
124 BIGNUM *d,*r;
125 const BIGNUM *a_mod_m;
126 BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE];
127 BN_MONT_CTX *mont=NULL;
128
129 bn_check_top(a1);
130 bn_check_top(p1);
131 bn_check_top(a2);
132 bn_check_top(p2);
133 bn_check_top(m);
134
135 if (!(m->d[0] & 1))
136 {
137 BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
138 return(0);
139 }
140 bits1=BN_num_bits(p1);
141 bits2=BN_num_bits(p2);
142 if ((bits1 == 0) && (bits2 == 0))
143 {
144 ret = BN_one(rr);
145 return ret;
146 }
147
148 bits=(bits1 > bits2)?bits1:bits2;
149
150 BN_CTX_start(ctx);
151 d = BN_CTX_get(ctx);
152 r = BN_CTX_get(ctx);
153 if (d == NULL || r == NULL) goto err;
154
155 if (in_mont != NULL)
156 mont=in_mont;
157 else
158 {
159 if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
160 if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
161 }
162
163 window1 = BN_window_bits_for_exponent_size(bits1);
164 window2 = BN_window_bits_for_exponent_size(bits2);
165
166 /*
167 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
168 */
169 BN_init(&val1[0]);
170 ts1=1;
171 if (a1->neg || BN_ucmp(a1,m) >= 0)
172 {
173 if (!BN_mod(&(val1[0]),a1,m,ctx))
174 goto err;
175 a_mod_m = &(val1[0]);
176 }
177 else
178 a_mod_m = a1;
179 if (BN_is_zero(a_mod_m))
180 {
181 ret = BN_zero(rr);
182 goto err;
183 }
184
185 if (!BN_to_montgomery(&(val1[0]),a_mod_m,mont,ctx)) goto err;
186 if (window1 > 1)
187 {
188 if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err;
189
190 j=1<<(window1-1);
191 for (i=1; i<j; i++)
192 {
193 BN_init(&(val1[i]));
194 if (!BN_mod_mul_montgomery(&(val1[i]),&(val1[i-1]),d,mont,ctx))
195 goto err;
196 }
197 ts1=i;
198 }
199
200
201 /*
202 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
203 */
204 BN_init(&val2[0]);
205 ts2=1;
206 if (a2->neg || BN_ucmp(a2,m) >= 0)
207 {
208 if (!BN_mod(&(val2[0]),a2,m,ctx))
209 goto err;
210 a_mod_m = &(val2[0]);
211 }
212 else
213 a_mod_m = a2;
214 if (BN_is_zero(a_mod_m))
215 {
216 ret = BN_zero(rr);
217 goto err;
218 }
219 if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err;
220 if (window2 > 1)
221 {
222 if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err;
223
224 j=1<<(window2-1);
225 for (i=1; i<j; i++)
226 {
227 BN_init(&(val2[i]));
228 if (!BN_mod_mul_montgomery(&(val2[i]),&(val2[i-1]),d,mont,ctx))
229 goto err;
230 }
231 ts2=i;
232 }
233
234
235 /* Now compute the power product, using independent windows. */
236 r_is_one=1;
237 wvalue1=0; /* The 'value' of the first window */
238 wvalue2=0; /* The 'value' of the second window */
239 wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */
240 wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */
241
242 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
243 for (b=bits-1; b>=0; b--)
244 {
245 if (!r_is_one)
246 {
247 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
248 goto err;
249 }
250
251 if (!wvalue1)
252 if (BN_is_bit_set(p1, b))
253 {
254 /* consider bits b-window1+1 .. b for this window */
255 i = b-window1+1;
256 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
257 i++;
258 wpos1 = i;
259 wvalue1 = 1;
260 for (i = b-1; i >= wpos1; i--)
261 {
262 wvalue1 <<= 1;
263 if (BN_is_bit_set(p1, i))
264 wvalue1++;
265 }
266 }
267
268 if (!wvalue2)
269 if (BN_is_bit_set(p2, b))
270 {
271 /* consider bits b-window2+1 .. b for this window */
272 i = b-window2+1;
273 while (!BN_is_bit_set(p2, i))
274 i++;
275 wpos2 = i;
276 wvalue2 = 1;
277 for (i = b-1; i >= wpos2; i--)
278 {
279 wvalue2 <<= 1;
280 if (BN_is_bit_set(p2, i))
281 wvalue2++;
282 }
283 }
284
285 if (wvalue1 && b == wpos1)
286 {
287 /* wvalue1 is odd and < 2^window1 */
288 if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx))
289 goto err;
290 wvalue1 = 0;
291 r_is_one = 0;
292 }
293
294 if (wvalue2 && b == wpos2)
295 {
296 /* wvalue2 is odd and < 2^window2 */
297 if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx))
298 goto err;
299 wvalue2 = 0;
300 r_is_one = 0;
301 }
302 }
303 BN_from_montgomery(rr,r,mont,ctx);
304 ret=1;
305err:
306 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
307 BN_CTX_end(ctx);
308 for (i=0; i<ts1; i++)
309 BN_clear_free(&(val1[i]));
310 for (i=0; i<ts2; i++)
311 BN_clear_free(&(val2[i]));
312 return(ret);
313 }
diff --git a/src/lib/libcrypto/bn/bn_gcd.c b/src/lib/libcrypto/bn/bn_gcd.c
deleted file mode 100644
index 7649f63fd2..0000000000
--- a/src/lib/libcrypto/bn/bn_gcd.c
+++ /dev/null
@@ -1,490 +0,0 @@
1/* crypto/bn/bn_gcd.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "bn_lcl.h"
114
115static BIGNUM *euclid(BIGNUM *a, BIGNUM *b);
116
117int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
118 {
119 BIGNUM *a,*b,*t;
120 int ret=0;
121
122 bn_check_top(in_a);
123 bn_check_top(in_b);
124
125 BN_CTX_start(ctx);
126 a = BN_CTX_get(ctx);
127 b = BN_CTX_get(ctx);
128 if (a == NULL || b == NULL) goto err;
129
130 if (BN_copy(a,in_a) == NULL) goto err;
131 if (BN_copy(b,in_b) == NULL) goto err;
132 a->neg = 0;
133 b->neg = 0;
134
135 if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; }
136 t=euclid(a,b);
137 if (t == NULL) goto err;
138
139 if (BN_copy(r,t) == NULL) goto err;
140 ret=1;
141err:
142 BN_CTX_end(ctx);
143 return(ret);
144 }
145
146static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
147 {
148 BIGNUM *t;
149 int shifts=0;
150
151 bn_check_top(a);
152 bn_check_top(b);
153
154 /* 0 <= b <= a */
155 while (!BN_is_zero(b))
156 {
157 /* 0 < b <= a */
158
159 if (BN_is_odd(a))
160 {
161 if (BN_is_odd(b))
162 {
163 if (!BN_sub(a,a,b)) goto err;
164 if (!BN_rshift1(a,a)) goto err;
165 if (BN_cmp(a,b) < 0)
166 { t=a; a=b; b=t; }
167 }
168 else /* a odd - b even */
169 {
170 if (!BN_rshift1(b,b)) goto err;
171 if (BN_cmp(a,b) < 0)
172 { t=a; a=b; b=t; }
173 }
174 }
175 else /* a is even */
176 {
177 if (BN_is_odd(b))
178 {
179 if (!BN_rshift1(a,a)) goto err;
180 if (BN_cmp(a,b) < 0)
181 { t=a; a=b; b=t; }
182 }
183 else /* a even - b even */
184 {
185 if (!BN_rshift1(a,a)) goto err;
186 if (!BN_rshift1(b,b)) goto err;
187 shifts++;
188 }
189 }
190 /* 0 <= b <= a */
191 }
192
193 if (shifts)
194 {
195 if (!BN_lshift(a,a,shifts)) goto err;
196 }
197 return(a);
198err:
199 return(NULL);
200 }
201
202
203/* solves ax == 1 (mod n) */
204BIGNUM *BN_mod_inverse(BIGNUM *in,
205 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
206 {
207 BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL;
208 BIGNUM *ret=NULL;
209 int sign;
210
211 bn_check_top(a);
212 bn_check_top(n);
213
214 BN_CTX_start(ctx);
215 A = BN_CTX_get(ctx);
216 B = BN_CTX_get(ctx);
217 X = BN_CTX_get(ctx);
218 D = BN_CTX_get(ctx);
219 M = BN_CTX_get(ctx);
220 Y = BN_CTX_get(ctx);
221 T = BN_CTX_get(ctx);
222 if (T == NULL) goto err;
223
224 if (in == NULL)
225 R=BN_new();
226 else
227 R=in;
228 if (R == NULL) goto err;
229
230 BN_one(X);
231 BN_zero(Y);
232 if (BN_copy(B,a) == NULL) goto err;
233 if (BN_copy(A,n) == NULL) goto err;
234 A->neg = 0;
235 if (B->neg || (BN_ucmp(B, A) >= 0))
236 {
237 if (!BN_nnmod(B, B, A, ctx)) goto err;
238 }
239 sign = -1;
240 /* From B = a mod |n|, A = |n| it follows that
241 *
242 * 0 <= B < A,
243 * -sign*X*a == B (mod |n|),
244 * sign*Y*a == A (mod |n|).
245 */
246
247 if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048)))
248 {
249 /* Binary inversion algorithm; requires odd modulus.
250 * This is faster than the general algorithm if the modulus
251 * is sufficiently small (about 400 .. 500 bits on 32-bit
252 * sytems, but much more on 64-bit systems) */
253 int shift;
254
255 while (!BN_is_zero(B))
256 {
257 /*
258 * 0 < B < |n|,
259 * 0 < A <= |n|,
260 * (1) -sign*X*a == B (mod |n|),
261 * (2) sign*Y*a == A (mod |n|)
262 */
263
264 /* Now divide B by the maximum possible power of two in the integers,
265 * and divide X by the same value mod |n|.
266 * When we're done, (1) still holds. */
267 shift = 0;
268 while (!BN_is_bit_set(B, shift)) /* note that 0 < B */
269 {
270 shift++;
271
272 if (BN_is_odd(X))
273 {
274 if (!BN_uadd(X, X, n)) goto err;
275 }
276 /* now X is even, so we can easily divide it by two */
277 if (!BN_rshift1(X, X)) goto err;
278 }
279 if (shift > 0)
280 {
281 if (!BN_rshift(B, B, shift)) goto err;
282 }
283
284
285 /* Same for A and Y. Afterwards, (2) still holds. */
286 shift = 0;
287 while (!BN_is_bit_set(A, shift)) /* note that 0 < A */
288 {
289 shift++;
290
291 if (BN_is_odd(Y))
292 {
293 if (!BN_uadd(Y, Y, n)) goto err;
294 }
295 /* now Y is even */
296 if (!BN_rshift1(Y, Y)) goto err;
297 }
298 if (shift > 0)
299 {
300 if (!BN_rshift(A, A, shift)) goto err;
301 }
302
303
304 /* We still have (1) and (2).
305 * Both A and B are odd.
306 * The following computations ensure that
307 *
308 * 0 <= B < |n|,
309 * 0 < A < |n|,
310 * (1) -sign*X*a == B (mod |n|),
311 * (2) sign*Y*a == A (mod |n|),
312 *
313 * and that either A or B is even in the next iteration.
314 */
315 if (BN_ucmp(B, A) >= 0)
316 {
317 /* -sign*(X + Y)*a == B - A (mod |n|) */
318 if (!BN_uadd(X, X, Y)) goto err;
319 /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that
320 * actually makes the algorithm slower */
321 if (!BN_usub(B, B, A)) goto err;
322 }
323 else
324 {
325 /* sign*(X + Y)*a == A - B (mod |n|) */
326 if (!BN_uadd(Y, Y, X)) goto err;
327 /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */
328 if (!BN_usub(A, A, B)) goto err;
329 }
330 }
331 }
332 else
333 {
334 /* general inversion algorithm */
335
336 while (!BN_is_zero(B))
337 {
338 BIGNUM *tmp;
339
340 /*
341 * 0 < B < A,
342 * (*) -sign*X*a == B (mod |n|),
343 * sign*Y*a == A (mod |n|)
344 */
345
346 /* (D, M) := (A/B, A%B) ... */
347 if (BN_num_bits(A) == BN_num_bits(B))
348 {
349 if (!BN_one(D)) goto err;
350 if (!BN_sub(M,A,B)) goto err;
351 }
352 else if (BN_num_bits(A) == BN_num_bits(B) + 1)
353 {
354 /* A/B is 1, 2, or 3 */
355 if (!BN_lshift1(T,B)) goto err;
356 if (BN_ucmp(A,T) < 0)
357 {
358 /* A < 2*B, so D=1 */
359 if (!BN_one(D)) goto err;
360 if (!BN_sub(M,A,B)) goto err;
361 }
362 else
363 {
364 /* A >= 2*B, so D=2 or D=3 */
365 if (!BN_sub(M,A,T)) goto err;
366 if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */
367 if (BN_ucmp(A,D) < 0)
368 {
369 /* A < 3*B, so D=2 */
370 if (!BN_set_word(D,2)) goto err;
371 /* M (= A - 2*B) already has the correct value */
372 }
373 else
374 {
375 /* only D=3 remains */
376 if (!BN_set_word(D,3)) goto err;
377 /* currently M = A - 2*B, but we need M = A - 3*B */
378 if (!BN_sub(M,M,B)) goto err;
379 }
380 }
381 }
382 else
383 {
384 if (!BN_div(D,M,A,B,ctx)) goto err;
385 }
386
387 /* Now
388 * A = D*B + M;
389 * thus we have
390 * (**) sign*Y*a == D*B + M (mod |n|).
391 */
392
393 tmp=A; /* keep the BIGNUM object, the value does not matter */
394
395 /* (A, B) := (B, A mod B) ... */
396 A=B;
397 B=M;
398 /* ... so we have 0 <= B < A again */
399
400 /* Since the former M is now B and the former B is now A,
401 * (**) translates into
402 * sign*Y*a == D*A + B (mod |n|),
403 * i.e.
404 * sign*Y*a - D*A == B (mod |n|).
405 * Similarly, (*) translates into
406 * -sign*X*a == A (mod |n|).
407 *
408 * Thus,
409 * sign*Y*a + D*sign*X*a == B (mod |n|),
410 * i.e.
411 * sign*(Y + D*X)*a == B (mod |n|).
412 *
413 * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
414 * -sign*X*a == B (mod |n|),
415 * sign*Y*a == A (mod |n|).
416 * Note that X and Y stay non-negative all the time.
417 */
418
419 /* most of the time D is very small, so we can optimize tmp := D*X+Y */
420 if (BN_is_one(D))
421 {
422 if (!BN_add(tmp,X,Y)) goto err;
423 }
424 else
425 {
426 if (BN_is_word(D,2))
427 {
428 if (!BN_lshift1(tmp,X)) goto err;
429 }
430 else if (BN_is_word(D,4))
431 {
432 if (!BN_lshift(tmp,X,2)) goto err;
433 }
434 else if (D->top == 1)
435 {
436 if (!BN_copy(tmp,X)) goto err;
437 if (!BN_mul_word(tmp,D->d[0])) goto err;
438 }
439 else
440 {
441 if (!BN_mul(tmp,D,X,ctx)) goto err;
442 }
443 if (!BN_add(tmp,tmp,Y)) goto err;
444 }
445
446 M=Y; /* keep the BIGNUM object, the value does not matter */
447 Y=X;
448 X=tmp;
449 sign = -sign;
450 }
451 }
452
453 /*
454 * The while loop (Euclid's algorithm) ends when
455 * A == gcd(a,n);
456 * we have
457 * sign*Y*a == A (mod |n|),
458 * where Y is non-negative.
459 */
460
461 if (sign < 0)
462 {
463 if (!BN_sub(Y,n,Y)) goto err;
464 }
465 /* Now Y*a == A (mod |n|). */
466
467
468 if (BN_is_one(A))
469 {
470 /* Y*a == 1 (mod |n|) */
471 if (!Y->neg && BN_ucmp(Y,n) < 0)
472 {
473 if (!BN_copy(R,Y)) goto err;
474 }
475 else
476 {
477 if (!BN_nnmod(R,Y,n,ctx)) goto err;
478 }
479 }
480 else
481 {
482 BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE);
483 goto err;
484 }
485 ret=R;
486err:
487 if ((ret == NULL) && (in == NULL)) BN_free(R);
488 BN_CTX_end(ctx);
489 return(ret);
490 }
diff --git a/src/lib/libcrypto/bn/bn_kron.c b/src/lib/libcrypto/bn/bn_kron.c
deleted file mode 100644
index 49f75594ae..0000000000
--- a/src/lib/libcrypto/bn/bn_kron.c
+++ /dev/null
@@ -1,182 +0,0 @@
1/* crypto/bn/bn_kron.c */
2/* ====================================================================
3 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 * acknowledgment:
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com). This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 *
54 */
55
56#include "bn_lcl.h"
57
58
59/* least significant word */
60#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
61
62/* Returns -2 for errors because both -1 and 0 are valid results. */
63int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
64 {
65 int i;
66 int ret = -2; /* avoid 'uninitialized' warning */
67 int err = 0;
68 BIGNUM *A, *B, *tmp;
69 /* In 'tab', only odd-indexed entries are relevant:
70 * For any odd BIGNUM n,
71 * tab[BN_lsw(n) & 7]
72 * is $(-1)^{(n^2-1)/8}$ (using TeX notation).
73 * Note that the sign of n does not matter.
74 */
75 static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
76
77 BN_CTX_start(ctx);
78 A = BN_CTX_get(ctx);
79 B = BN_CTX_get(ctx);
80 if (B == NULL) goto end;
81
82 err = !BN_copy(A, a);
83 if (err) goto end;
84 err = !BN_copy(B, b);
85 if (err) goto end;
86
87 /*
88 * Kronecker symbol, imlemented according to Henri Cohen,
89 * "A Course in Computational Algebraic Number Theory"
90 * (algorithm 1.4.10).
91 */
92
93 /* Cohen's step 1: */
94
95 if (BN_is_zero(B))
96 {
97 ret = BN_abs_is_word(A, 1);
98 goto end;
99 }
100
101 /* Cohen's step 2: */
102
103 if (!BN_is_odd(A) && !BN_is_odd(B))
104 {
105 ret = 0;
106 goto end;
107 }
108
109 /* now B is non-zero */
110 i = 0;
111 while (!BN_is_bit_set(B, i))
112 i++;
113 err = !BN_rshift(B, B, i);
114 if (err) goto end;
115 if (i & 1)
116 {
117 /* i is odd */
118 /* (thus B was even, thus A must be odd!) */
119
120 /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
121 ret = tab[BN_lsw(A) & 7];
122 }
123 else
124 {
125 /* i is even */
126 ret = 1;
127 }
128
129 if (B->neg)
130 {
131 B->neg = 0;
132 if (A->neg)
133 ret = -ret;
134 }
135
136 /* now B is positive and odd, so what remains to be done is
137 * to compute the Jacobi symbol (A/B) and multiply it by 'ret' */
138
139 while (1)
140 {
141 /* Cohen's step 3: */
142
143 /* B is positive and odd */
144
145 if (BN_is_zero(A))
146 {
147 ret = BN_is_one(B) ? ret : 0;
148 goto end;
149 }
150
151 /* now A is non-zero */
152 i = 0;
153 while (!BN_is_bit_set(A, i))
154 i++;
155 err = !BN_rshift(A, A, i);
156 if (err) goto end;
157 if (i & 1)
158 {
159 /* i is odd */
160 /* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
161 ret = ret * tab[BN_lsw(B) & 7];
162 }
163
164 /* Cohen's step 4: */
165 /* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
166 if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
167 ret = -ret;
168
169 /* (A, B) := (B mod |A|, |A|) */
170 err = !BN_nnmod(B, B, A, ctx);
171 if (err) goto end;
172 tmp = A; A = B; B = tmp;
173 tmp->neg = 0;
174 }
175
176 end:
177 BN_CTX_end(ctx);
178 if (err)
179 return -2;
180 else
181 return ret;
182 }
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
deleted file mode 100644
index 253e195e23..0000000000
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ /dev/null
@@ -1,453 +0,0 @@
1/* crypto/bn/bn_lcl.h */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#ifndef HEADER_BN_LCL_H
113#define HEADER_BN_LCL_H
114
115#include <openssl/bn.h>
116
117#ifdef __cplusplus
118extern "C" {
119#endif
120
121
122/* Used for temp variables */
123#define BN_CTX_NUM 32
124#define BN_CTX_NUM_POS 12
125struct bignum_ctx
126 {
127 int tos;
128 BIGNUM bn[BN_CTX_NUM];
129 int flags;
130 int depth;
131 int pos[BN_CTX_NUM_POS];
132 int too_many;
133 } /* BN_CTX */;
134
135
136/*
137 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
138 *
139 *
140 * For window size 'w' (w >= 2) and a random 'b' bits exponent,
141 * the number of multiplications is a constant plus on average
142 *
143 * 2^(w-1) + (b-w)/(w+1);
144 *
145 * here 2^(w-1) is for precomputing the table (we actually need
146 * entries only for windows that have the lowest bit set), and
147 * (b-w)/(w+1) is an approximation for the expected number of
148 * w-bit windows, not counting the first one.
149 *
150 * Thus we should use
151 *
152 * w >= 6 if b > 671
153 * w = 5 if 671 > b > 239
154 * w = 4 if 239 > b > 79
155 * w = 3 if 79 > b > 23
156 * w <= 2 if 23 > b
157 *
158 * (with draws in between). Very small exponents are often selected
159 * with low Hamming weight, so we use w = 1 for b <= 23.
160 */
161#if 1
162#define BN_window_bits_for_exponent_size(b) \
163 ((b) > 671 ? 6 : \
164 (b) > 239 ? 5 : \
165 (b) > 79 ? 4 : \
166 (b) > 23 ? 3 : 1)
167#else
168/* Old SSLeay/OpenSSL table.
169 * Maximum window size was 5, so this table differs for b==1024;
170 * but it coincides for other interesting values (b==160, b==512).
171 */
172#define BN_window_bits_for_exponent_size(b) \
173 ((b) > 255 ? 5 : \
174 (b) > 127 ? 4 : \
175 (b) > 17 ? 3 : 1)
176#endif
177
178
179
180/* Pentium pro 16,16,16,32,64 */
181/* Alpha 16,16,16,16.64 */
182#define BN_MULL_SIZE_NORMAL (16) /* 32 */
183#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
184#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
185#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
186#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
187
188#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
189/*
190 * BN_UMULT_HIGH section.
191 *
192 * No, I'm not trying to overwhelm you when stating that the
193 * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
194 * you to be impressed when I say that if the compiler doesn't
195 * support 2*N integer type, then you have to replace every N*N
196 * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
197 * and additions which unavoidably results in severe performance
198 * penalties. Of course provided that the hardware is capable of
199 * producing 2*N result... That's when you normally start
200 * considering assembler implementation. However! It should be
201 * pointed out that some CPUs (most notably Alpha, PowerPC and
202 * upcoming IA-64 family:-) provide *separate* instruction
203 * calculating the upper half of the product placing the result
204 * into a general purpose register. Now *if* the compiler supports
205 * inline assembler, then it's not impossible to implement the
206 * "bignum" routines (and have the compiler optimize 'em)
207 * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
208 * macro is about:-)
209 *
210 * <appro@fy.chalmers.se>
211 */
212# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
213# if defined(__DECC)
214# include <c_asm.h>
215# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
216# elif defined(__GNUC__)
217# define BN_UMULT_HIGH(a,b) ({ \
218 register BN_ULONG ret; \
219 asm ("umulh %1,%2,%0" \
220 : "=r"(ret) \
221 : "r"(a), "r"(b)); \
222 ret; })
223# endif /* compiler */
224# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
225# if defined(__GNUC__)
226# define BN_UMULT_HIGH(a,b) ({ \
227 register BN_ULONG ret; \
228 asm ("mulhdu %0,%1,%2" \
229 : "=r"(ret) \
230 : "r"(a), "r"(b)); \
231 ret; })
232# endif /* compiler */
233# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
234# if defined(__GNUC__)
235# define BN_UMULT_HIGH(a,b) ({ \
236 register BN_ULONG ret,discard; \
237 asm ("mulq %3" \
238 : "=a"(discard),"=d"(ret) \
239 : "a"(a), "g"(b) \
240 : "cc"); \
241 ret; })
242# define BN_UMULT_LOHI(low,high,a,b) \
243 asm ("mulq %3" \
244 : "=a"(low),"=d"(high) \
245 : "a"(a),"g"(b) \
246 : "cc");
247# endif
248# endif /* cpu */
249#endif /* OPENSSL_NO_ASM */
250
251/*************************************************************
252 * Using the long long type
253 */
254#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
255#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
256
257/* This is used for internal error checking and is not normally used */
258#ifdef BN_DEBUG
259# include <assert.h>
260# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax);
261#else
262# define bn_check_top(a)
263#endif
264
265/* This macro is to add extra stuff for development checking */
266#ifdef BN_DEBUG
267#define bn_set_max(r) ((r)->max=(r)->top,BN_set_flags((r),BN_FLG_STATIC_DATA))
268#else
269#define bn_set_max(r)
270#endif
271
272/* These macros are used to 'take' a section of a bignum for read only use */
273#define bn_set_low(r,a,n) \
274 { \
275 (r)->top=((a)->top > (n))?(n):(a)->top; \
276 (r)->d=(a)->d; \
277 (r)->neg=(a)->neg; \
278 (r)->flags|=BN_FLG_STATIC_DATA; \
279 bn_set_max(r); \
280 }
281
282#define bn_set_high(r,a,n) \
283 { \
284 if ((a)->top > (n)) \
285 { \
286 (r)->top=(a)->top-n; \
287 (r)->d= &((a)->d[n]); \
288 } \
289 else \
290 (r)->top=0; \
291 (r)->neg=(a)->neg; \
292 (r)->flags|=BN_FLG_STATIC_DATA; \
293 bn_set_max(r); \
294 }
295
296#ifdef BN_LLONG
297#define mul_add(r,a,w,c) { \
298 BN_ULLONG t; \
299 t=(BN_ULLONG)w * (a) + (r) + (c); \
300 (r)= Lw(t); \
301 (c)= Hw(t); \
302 }
303
304#define mul(r,a,w,c) { \
305 BN_ULLONG t; \
306 t=(BN_ULLONG)w * (a) + (c); \
307 (r)= Lw(t); \
308 (c)= Hw(t); \
309 }
310
311#define sqr(r0,r1,a) { \
312 BN_ULLONG t; \
313 t=(BN_ULLONG)(a)*(a); \
314 (r0)=Lw(t); \
315 (r1)=Hw(t); \
316 }
317
318#elif defined(BN_UMULT_HIGH)
319#define mul_add(r,a,w,c) { \
320 BN_ULONG high,low,ret,tmp=(a); \
321 ret = (r); \
322 high= BN_UMULT_HIGH(w,tmp); \
323 ret += (c); \
324 low = (w) * tmp; \
325 (c) = (ret<(c))?1:0; \
326 (c) += high; \
327 ret += low; \
328 (c) += (ret<low)?1:0; \
329 (r) = ret; \
330 }
331
332#define mul(r,a,w,c) { \
333 BN_ULONG high,low,ret,ta=(a); \
334 low = (w) * ta; \
335 high= BN_UMULT_HIGH(w,ta); \
336 ret = low + (c); \
337 (c) = high; \
338 (c) += (ret<low)?1:0; \
339 (r) = ret; \
340 }
341
342#define sqr(r0,r1,a) { \
343 BN_ULONG tmp=(a); \
344 (r0) = tmp * tmp; \
345 (r1) = BN_UMULT_HIGH(tmp,tmp); \
346 }
347
348#else
349/*************************************************************
350 * No long long type
351 */
352
353#define LBITS(a) ((a)&BN_MASK2l)
354#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
355#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
356
357#define LLBITS(a) ((a)&BN_MASKl)
358#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
359#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
360
361#define mul64(l,h,bl,bh) \
362 { \
363 BN_ULONG m,m1,lt,ht; \
364 \
365 lt=l; \
366 ht=h; \
367 m =(bh)*(lt); \
368 lt=(bl)*(lt); \
369 m1=(bl)*(ht); \
370 ht =(bh)*(ht); \
371 m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
372 ht+=HBITS(m); \
373 m1=L2HBITS(m); \
374 lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
375 (l)=lt; \
376 (h)=ht; \
377 }
378
379#define sqr64(lo,ho,in) \
380 { \
381 BN_ULONG l,h,m; \
382 \
383 h=(in); \
384 l=LBITS(h); \
385 h=HBITS(h); \
386 m =(l)*(h); \
387 l*=l; \
388 h*=h; \
389 h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
390 m =(m&BN_MASK2l)<<(BN_BITS4+1); \
391 l=(l+m)&BN_MASK2; if (l < m) h++; \
392 (lo)=l; \
393 (ho)=h; \
394 }
395
396#define mul_add(r,a,bl,bh,c) { \
397 BN_ULONG l,h; \
398 \
399 h= (a); \
400 l=LBITS(h); \
401 h=HBITS(h); \
402 mul64(l,h,(bl),(bh)); \
403 \
404 /* non-multiply part */ \
405 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
406 (c)=(r); \
407 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
408 (c)=h&BN_MASK2; \
409 (r)=l; \
410 }
411
412#define mul(r,a,bl,bh,c) { \
413 BN_ULONG l,h; \
414 \
415 h= (a); \
416 l=LBITS(h); \
417 h=HBITS(h); \
418 mul64(l,h,(bl),(bh)); \
419 \
420 /* non-multiply part */ \
421 l+=(c); if ((l&BN_MASK2) < (c)) h++; \
422 (c)=h&BN_MASK2; \
423 (r)=l&BN_MASK2; \
424 }
425#endif /* !BN_LLONG */
426
427void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
428void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
429void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
430void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
431void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
432void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
433int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
434int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
435 int cl, int dl);
436#ifdef BN_RECURSION
437void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
438 BN_ULONG *t);
439void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
440 int n, BN_ULONG *t);
441void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
442 BN_ULONG *t);
443void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
444 BN_ULONG *t);
445void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
446#endif
447void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
448
449#ifdef __cplusplus
450}
451#endif
452
453#endif
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
deleted file mode 100644
index e1660450bc..0000000000
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ /dev/null
@@ -1,824 +0,0 @@
1/* crypto/bn/bn_lib.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
64#include <assert.h>
65#include <limits.h>
66#include <stdio.h>
67#include "cryptlib.h"
68#include "bn_lcl.h"
69
70const char *BN_version="Big Number" OPENSSL_VERSION_PTEXT;
71
72/* For a 32 bit machine
73 * 2 - 4 == 128
74 * 3 - 8 == 256
75 * 4 - 16 == 512
76 * 5 - 32 == 1024
77 * 6 - 64 == 2048
78 * 7 - 128 == 4096
79 * 8 - 256 == 8192
80 */
81static int bn_limit_bits=0;
82static int bn_limit_num=8; /* (1<<bn_limit_bits) */
83static int bn_limit_bits_low=0;
84static int bn_limit_num_low=8; /* (1<<bn_limit_bits_low) */
85static int bn_limit_bits_high=0;
86static int bn_limit_num_high=8; /* (1<<bn_limit_bits_high) */
87static int bn_limit_bits_mont=0;
88static int bn_limit_num_mont=8; /* (1<<bn_limit_bits_mont) */
89
90void BN_set_params(int mult, int high, int low, int mont)
91 {
92 if (mult >= 0)
93 {
94 if (mult > (sizeof(int)*8)-1)
95 mult=sizeof(int)*8-1;
96 bn_limit_bits=mult;
97 bn_limit_num=1<<mult;
98 }
99 if (high >= 0)
100 {
101 if (high > (sizeof(int)*8)-1)
102 high=sizeof(int)*8-1;
103 bn_limit_bits_high=high;
104 bn_limit_num_high=1<<high;
105 }
106 if (low >= 0)
107 {
108 if (low > (sizeof(int)*8)-1)
109 low=sizeof(int)*8-1;
110 bn_limit_bits_low=low;
111 bn_limit_num_low=1<<low;
112 }
113 if (mont >= 0)
114 {
115 if (mont > (sizeof(int)*8)-1)
116 mont=sizeof(int)*8-1;
117 bn_limit_bits_mont=mont;
118 bn_limit_num_mont=1<<mont;
119 }
120 }
121
122int BN_get_params(int which)
123 {
124 if (which == 0) return(bn_limit_bits);
125 else if (which == 1) return(bn_limit_bits_high);
126 else if (which == 2) return(bn_limit_bits_low);
127 else if (which == 3) return(bn_limit_bits_mont);
128 else return(0);
129 }
130
131const BIGNUM *BN_value_one(void)
132 {
133 static BN_ULONG data_one=1L;
134 static BIGNUM const_one={&data_one,1,1,0};
135
136 return(&const_one);
137 }
138
139char *BN_options(void)
140 {
141 static int init=0;
142 static char data[16];
143
144 if (!init)
145 {
146 init++;
147#ifdef BN_LLONG
148 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
149 (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
150#else
151 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
152 (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
153#endif
154 }
155 return(data);
156 }
157
158int BN_num_bits_word(BN_ULONG l)
159 {
160 static const char bits[256]={
161 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
162 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
163 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
164 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
165 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
166 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
167 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
168 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
169 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
170 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
171 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
172 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
173 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
174 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
175 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
176 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
177 };
178
179#if defined(SIXTY_FOUR_BIT_LONG)
180 if (l & 0xffffffff00000000L)
181 {
182 if (l & 0xffff000000000000L)
183 {
184 if (l & 0xff00000000000000L)
185 {
186 return(bits[(int)(l>>56)]+56);
187 }
188 else return(bits[(int)(l>>48)]+48);
189 }
190 else
191 {
192 if (l & 0x0000ff0000000000L)
193 {
194 return(bits[(int)(l>>40)]+40);
195 }
196 else return(bits[(int)(l>>32)]+32);
197 }
198 }
199 else
200#else
201#ifdef SIXTY_FOUR_BIT
202 if (l & 0xffffffff00000000LL)
203 {
204 if (l & 0xffff000000000000LL)
205 {
206 if (l & 0xff00000000000000LL)
207 {
208 return(bits[(int)(l>>56)]+56);
209 }
210 else return(bits[(int)(l>>48)]+48);
211 }
212 else
213 {
214 if (l & 0x0000ff0000000000LL)
215 {
216 return(bits[(int)(l>>40)]+40);
217 }
218 else return(bits[(int)(l>>32)]+32);
219 }
220 }
221 else
222#endif
223#endif
224 {
225#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
226 if (l & 0xffff0000L)
227 {
228 if (l & 0xff000000L)
229 return(bits[(int)(l>>24L)]+24);
230 else return(bits[(int)(l>>16L)]+16);
231 }
232 else
233#endif
234 {
235#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
236 if (l & 0xff00L)
237 return(bits[(int)(l>>8)]+8);
238 else
239#endif
240 return(bits[(int)(l )] );
241 }
242 }
243 }
244
245int BN_num_bits(const BIGNUM *a)
246 {
247 BN_ULONG l;
248 int i;
249
250 bn_check_top(a);
251
252 if (a->top == 0) return(0);
253 l=a->d[a->top-1];
254 assert(l != 0);
255 i=(a->top-1)*BN_BITS2;
256 return(i+BN_num_bits_word(l));
257 }
258
259void BN_clear_free(BIGNUM *a)
260 {
261 int i;
262
263 if (a == NULL) return;
264 if (a->d != NULL)
265 {
266 OPENSSL_cleanse(a->d,a->dmax*sizeof(a->d[0]));
267 if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
268 OPENSSL_free(a->d);
269 }
270 i=BN_get_flags(a,BN_FLG_MALLOCED);
271 OPENSSL_cleanse(a,sizeof(BIGNUM));
272 if (i)
273 OPENSSL_free(a);
274 }
275
276void BN_free(BIGNUM *a)
277 {
278 if (a == NULL) return;
279 if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
280 OPENSSL_free(a->d);
281 a->flags|=BN_FLG_FREE; /* REMOVE? */
282 if (a->flags & BN_FLG_MALLOCED)
283 OPENSSL_free(a);
284 }
285
286void BN_init(BIGNUM *a)
287 {
288 memset(a,0,sizeof(BIGNUM));
289 }
290
291BIGNUM *BN_new(void)
292 {
293 BIGNUM *ret;
294
295 if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL)
296 {
297 BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
298 return(NULL);
299 }
300 ret->flags=BN_FLG_MALLOCED;
301 ret->top=0;
302 ret->neg=0;
303 ret->dmax=0;
304 ret->d=NULL;
305 return(ret);
306 }
307
308/* This is used both by bn_expand2() and bn_dup_expand() */
309/* The caller MUST check that words > b->dmax before calling this */
310static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
311 {
312 BN_ULONG *A,*a = NULL;
313 const BN_ULONG *B;
314 int i;
315
316 if (words > (INT_MAX/(4*BN_BITS2)))
317 {
318 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG);
319 return NULL;
320 }
321
322 bn_check_top(b);
323 if (BN_get_flags(b,BN_FLG_STATIC_DATA))
324 {
325 BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
326 return(NULL);
327 }
328 a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1));
329 if (A == NULL)
330 {
331 BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE);
332 return(NULL);
333 }
334#if 1
335 B=b->d;
336 /* Check if the previous number needs to be copied */
337 if (B != NULL)
338 {
339 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
340 {
341 /*
342 * The fact that the loop is unrolled
343 * 4-wise is a tribute to Intel. It's
344 * the one that doesn't have enough
345 * registers to accomodate more data.
346 * I'd unroll it 8-wise otherwise:-)
347 *
348 * <appro@fy.chalmers.se>
349 */
350 BN_ULONG a0,a1,a2,a3;
351 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
352 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
353 }
354 switch (b->top&3)
355 {
356 case 3: A[2]=B[2];
357 case 2: A[1]=B[1];
358 case 1: A[0]=B[0];
359 case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does
360 * the switch table by doing a=top&3; a--; goto jump_table[a];
361 * which fails for top== 0 */
362 ;
363 }
364 }
365
366 /* Now need to zero any data between b->top and b->max */
367 /* XXX Why? */
368
369 A= &(a[b->top]);
370 for (i=(words - b->top)>>3; i>0; i--,A+=8)
371 {
372 A[0]=0; A[1]=0; A[2]=0; A[3]=0;
373 A[4]=0; A[5]=0; A[6]=0; A[7]=0;
374 }
375 for (i=(words - b->top)&7; i>0; i--,A++)
376 A[0]=0;
377#else
378 memset(A,0,sizeof(BN_ULONG)*(words+1));
379 memcpy(A,b->d,sizeof(b->d[0])*b->top);
380#endif
381
382 return(a);
383 }
384
385/* This is an internal function that can be used instead of bn_expand2()
386 * when there is a need to copy BIGNUMs instead of only expanding the
387 * data part, while still expanding them.
388 * Especially useful when needing to expand BIGNUMs that are declared
389 * 'const' and should therefore not be changed.
390 * The reason to use this instead of a BN_dup() followed by a bn_expand2()
391 * is memory allocation overhead. A BN_dup() followed by a bn_expand2()
392 * will allocate new memory for the BIGNUM data twice, and free it once,
393 * while bn_dup_expand() makes sure allocation is made only once.
394 */
395
396BIGNUM *bn_dup_expand(const BIGNUM *b, int words)
397 {
398 BIGNUM *r = NULL;
399
400 /* This function does not work if
401 * words <= b->dmax && top < words
402 * because BN_dup() does not preserve 'dmax'!
403 * (But bn_dup_expand() is not used anywhere yet.)
404 */
405
406 if (words > b->dmax)
407 {
408 BN_ULONG *a = bn_expand_internal(b, words);
409
410 if (a)
411 {
412 r = BN_new();
413 if (r)
414 {
415 r->top = b->top;
416 r->dmax = words;
417 r->neg = b->neg;
418 r->d = a;
419 }
420 else
421 {
422 /* r == NULL, BN_new failure */
423 OPENSSL_free(a);
424 }
425 }
426 /* If a == NULL, there was an error in allocation in
427 bn_expand_internal(), and NULL should be returned */
428 }
429 else
430 {
431 r = BN_dup(b);
432 }
433
434 return r;
435 }
436
437/* This is an internal function that should not be used in applications.
438 * It ensures that 'b' has enough room for a 'words' word number number.
439 * It is mostly used by the various BIGNUM routines. If there is an error,
440 * NULL is returned. If not, 'b' is returned. */
441
442BIGNUM *bn_expand2(BIGNUM *b, int words)
443 {
444 if (words > b->dmax)
445 {
446 BN_ULONG *a = bn_expand_internal(b, words);
447
448 if (a)
449 {
450 if (b->d)
451 OPENSSL_free(b->d);
452 b->d=a;
453 b->dmax=words;
454 }
455 else
456 b = NULL;
457 }
458 return b;
459 }
460
461BIGNUM *BN_dup(const BIGNUM *a)
462 {
463 BIGNUM *r, *t;
464
465 if (a == NULL) return NULL;
466
467 bn_check_top(a);
468
469 t = BN_new();
470 if (t == NULL) return(NULL);
471 r = BN_copy(t, a);
472 /* now r == t || r == NULL */
473 if (r == NULL)
474 BN_free(t);
475 return r;
476 }
477
478BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
479 {
480 int i;
481 BN_ULONG *A;
482 const BN_ULONG *B;
483
484 bn_check_top(b);
485
486 if (a == b) return(a);
487 if (bn_wexpand(a,b->top) == NULL) return(NULL);
488
489#if 1
490 A=a->d;
491 B=b->d;
492 for (i=b->top>>2; i>0; i--,A+=4,B+=4)
493 {
494 BN_ULONG a0,a1,a2,a3;
495 a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
496 A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
497 }
498 switch (b->top&3)
499 {
500 case 3: A[2]=B[2];
501 case 2: A[1]=B[1];
502 case 1: A[0]=B[0];
503 case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */
504 }
505#else
506 memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
507#endif
508
509/* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/
510 a->top=b->top;
511 if ((a->top == 0) && (a->d != NULL))
512 a->d[0]=0;
513 a->neg=b->neg;
514 return(a);
515 }
516
517void BN_swap(BIGNUM *a, BIGNUM *b)
518 {
519 int flags_old_a, flags_old_b;
520 BN_ULONG *tmp_d;
521 int tmp_top, tmp_dmax, tmp_neg;
522
523 flags_old_a = a->flags;
524 flags_old_b = b->flags;
525
526 tmp_d = a->d;
527 tmp_top = a->top;
528 tmp_dmax = a->dmax;
529 tmp_neg = a->neg;
530
531 a->d = b->d;
532 a->top = b->top;
533 a->dmax = b->dmax;
534 a->neg = b->neg;
535
536 b->d = tmp_d;
537 b->top = tmp_top;
538 b->dmax = tmp_dmax;
539 b->neg = tmp_neg;
540
541 a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA);
542 b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA);
543 }
544
545
546void BN_clear(BIGNUM *a)
547 {
548 if (a->d != NULL)
549 memset(a->d,0,a->dmax*sizeof(a->d[0]));
550 a->top=0;
551 a->neg=0;
552 }
553
554BN_ULONG BN_get_word(const BIGNUM *a)
555 {
556 int i,n;
557 BN_ULONG ret=0;
558
559 n=BN_num_bytes(a);
560 if (n > sizeof(BN_ULONG))
561 return(BN_MASK2);
562 for (i=a->top-1; i>=0; i--)
563 {
564#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
565 ret<<=BN_BITS4; /* stops the compiler complaining */
566 ret<<=BN_BITS4;
567#else
568 ret=0;
569#endif
570 ret|=a->d[i];
571 }
572 return(ret);
573 }
574
575int BN_set_word(BIGNUM *a, BN_ULONG w)
576 {
577 int i,n;
578 if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0);
579
580 n=sizeof(BN_ULONG)/BN_BYTES;
581 a->neg=0;
582 a->top=0;
583 a->d[0]=(BN_ULONG)w&BN_MASK2;
584 if (a->d[0] != 0) a->top=1;
585 for (i=1; i<n; i++)
586 {
587 /* the following is done instead of
588 * w>>=BN_BITS2 so compilers don't complain
589 * on builds where sizeof(long) == BN_TYPES */
590#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
591 w>>=BN_BITS4;
592 w>>=BN_BITS4;
593#else
594 w=0;
595#endif
596 a->d[i]=(BN_ULONG)w&BN_MASK2;
597 if (a->d[i] != 0) a->top=i+1;
598 }
599 return(1);
600 }
601
602BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
603 {
604 unsigned int i,m;
605 unsigned int n;
606 BN_ULONG l;
607
608 if (ret == NULL) ret=BN_new();
609 if (ret == NULL) return(NULL);
610 l=0;
611 n=len;
612 if (n == 0)
613 {
614 ret->top=0;
615 return(ret);
616 }
617 if (bn_expand(ret,(int)(n+2)*8) == NULL)
618 return(NULL);
619 i=((n-1)/BN_BYTES)+1;
620 m=((n-1)%(BN_BYTES));
621 ret->top=i;
622 ret->neg=0;
623 while (n-- > 0)
624 {
625 l=(l<<8L)| *(s++);
626 if (m-- == 0)
627 {
628 ret->d[--i]=l;
629 l=0;
630 m=BN_BYTES-1;
631 }
632 }
633 /* need to call this due to clear byte at top if avoiding
634 * having the top bit set (-ve number) */
635 bn_fix_top(ret);
636 return(ret);
637 }
638
639/* ignore negative */
640int BN_bn2bin(const BIGNUM *a, unsigned char *to)
641 {
642 int n,i;
643 BN_ULONG l;
644
645 n=i=BN_num_bytes(a);
646 while (i-- > 0)
647 {
648 l=a->d[i/BN_BYTES];
649 *(to++)=(unsigned char)(l>>(8*(i%BN_BYTES)))&0xff;
650 }
651 return(n);
652 }
653
654int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
655 {
656 int i;
657 BN_ULONG t1,t2,*ap,*bp;
658
659 bn_check_top(a);
660 bn_check_top(b);
661
662 i=a->top-b->top;
663 if (i != 0) return(i);
664 ap=a->d;
665 bp=b->d;
666 for (i=a->top-1; i>=0; i--)
667 {
668 t1= ap[i];
669 t2= bp[i];
670 if (t1 != t2)
671 return(t1 > t2?1:-1);
672 }
673 return(0);
674 }
675
676int BN_cmp(const BIGNUM *a, const BIGNUM *b)
677 {
678 int i;
679 int gt,lt;
680 BN_ULONG t1,t2;
681
682 if ((a == NULL) || (b == NULL))
683 {
684 if (a != NULL)
685 return(-1);
686 else if (b != NULL)
687 return(1);
688 else
689 return(0);
690 }
691
692 bn_check_top(a);
693 bn_check_top(b);
694
695 if (a->neg != b->neg)
696 {
697 if (a->neg)
698 return(-1);
699 else return(1);
700 }
701 if (a->neg == 0)
702 { gt=1; lt= -1; }
703 else { gt= -1; lt=1; }
704
705 if (a->top > b->top) return(gt);
706 if (a->top < b->top) return(lt);
707 for (i=a->top-1; i>=0; i--)
708 {
709 t1=a->d[i];
710 t2=b->d[i];
711 if (t1 > t2) return(gt);
712 if (t1 < t2) return(lt);
713 }
714 return(0);
715 }
716
717int BN_set_bit(BIGNUM *a, int n)
718 {
719 int i,j,k;
720
721 i=n/BN_BITS2;
722 j=n%BN_BITS2;
723 if (a->top <= i)
724 {
725 if (bn_wexpand(a,i+1) == NULL) return(0);
726 for(k=a->top; k<i+1; k++)
727 a->d[k]=0;
728 a->top=i+1;
729 }
730
731 a->d[i]|=(((BN_ULONG)1)<<j);
732 return(1);
733 }
734
735int BN_clear_bit(BIGNUM *a, int n)
736 {
737 int i,j;
738
739 i=n/BN_BITS2;
740 j=n%BN_BITS2;
741 if (a->top <= i) return(0);
742
743 a->d[i]&=(~(((BN_ULONG)1)<<j));
744 bn_fix_top(a);
745 return(1);
746 }
747
748int BN_is_bit_set(const BIGNUM *a, int n)
749 {
750 int i,j;
751
752 if (n < 0) return(0);
753 i=n/BN_BITS2;
754 j=n%BN_BITS2;
755 if (a->top <= i) return(0);
756 return((a->d[i]&(((BN_ULONG)1)<<j))?1:0);
757 }
758
759int BN_mask_bits(BIGNUM *a, int n)
760 {
761 int b,w;
762
763 w=n/BN_BITS2;
764 b=n%BN_BITS2;
765 if (w >= a->top) return(0);
766 if (b == 0)
767 a->top=w;
768 else
769 {
770 a->top=w+1;
771 a->d[w]&= ~(BN_MASK2<<b);
772 }
773 bn_fix_top(a);
774 return(1);
775 }
776
777int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
778 {
779 int i;
780 BN_ULONG aa,bb;
781
782 aa=a[n-1];
783 bb=b[n-1];
784 if (aa != bb) return((aa > bb)?1:-1);
785 for (i=n-2; i>=0; i--)
786 {
787 aa=a[i];
788 bb=b[i];
789 if (aa != bb) return((aa > bb)?1:-1);
790 }
791 return(0);
792 }
793
794/* Here follows a specialised variants of bn_cmp_words(). It has the
795 property of performing the operation on arrays of different sizes.
796 The sizes of those arrays is expressed through cl, which is the
797 common length ( basicall, min(len(a),len(b)) ), and dl, which is the
798 delta between the two lengths, calculated as len(a)-len(b).
799 All lengths are the number of BN_ULONGs... */
800
801int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
802 int cl, int dl)
803 {
804 int n,i;
805 n = cl-1;
806
807 if (dl < 0)
808 {
809 for (i=dl; i<0; i++)
810 {
811 if (b[n-i] != 0)
812 return -1; /* a < b */
813 }
814 }
815 if (dl > 0)
816 {
817 for (i=dl; i>0; i--)
818 {
819 if (a[n+i] != 0)
820 return 1; /* a > b */
821 }
822 }
823 return bn_cmp_words(a,b,cl);
824 }
diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c
deleted file mode 100644
index 5cf82480d7..0000000000
--- a/src/lib/libcrypto/bn/bn_mod.c
+++ /dev/null
@@ -1,296 +0,0 @@
1/* crypto/bn/bn_mod.c */
2/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
58 * All rights reserved.
59 *
60 * This package is an SSL implementation written
61 * by Eric Young (eay@cryptsoft.com).
62 * The implementation was written so as to conform with Netscapes SSL.
63 *
64 * This library is free for commercial and non-commercial use as long as
65 * the following conditions are aheared to. The following conditions
66 * apply to all code found in this distribution, be it the RC4, RSA,
67 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
68 * included with this distribution is covered by the same copyright terms
69 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
70 *
71 * Copyright remains Eric Young's, and as such any Copyright notices in
72 * the code are not to be removed.
73 * If this package is used in a product, Eric Young should be given attribution
74 * as the author of the parts of the library used.
75 * This can be in the form of a textual message at program startup or
76 * in documentation (online or textual) provided with the package.
77 *
78 * Redistribution and use in source and binary forms, with or without
79 * modification, are permitted provided that the following conditions
80 * are met:
81 * 1. Redistributions of source code must retain the copyright
82 * notice, this list of conditions and the following disclaimer.
83 * 2. Redistributions in binary form must reproduce the above copyright
84 * notice, this list of conditions and the following disclaimer in the
85 * documentation and/or other materials provided with the distribution.
86 * 3. All advertising materials mentioning features or use of this software
87 * must display the following acknowledgement:
88 * "This product includes cryptographic software written by
89 * Eric Young (eay@cryptsoft.com)"
90 * The word 'cryptographic' can be left out if the rouines from the library
91 * being used are not cryptographic related :-).
92 * 4. If you include any Windows specific code (or a derivative thereof) from
93 * the apps directory (application code) you must include an acknowledgement:
94 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
95 *
96 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
97 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
98 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
99 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
100 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
101 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
102 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
103 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
104 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
105 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
106 * SUCH DAMAGE.
107 *
108 * The licence and distribution terms for any publically available version or
109 * derivative of this code cannot be changed. i.e. this code cannot simply be
110 * copied and put under another distribution licence
111 * [including the GNU Public Licence.]
112 */
113
114#include "cryptlib.h"
115#include "bn_lcl.h"
116
117
118#if 0 /* now just a #define */
119int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
120 {
121 return(BN_div(NULL,rem,m,d,ctx));
122 /* note that rem->neg == m->neg (unless the remainder is zero) */
123 }
124#endif
125
126
127int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
128 {
129 /* like BN_mod, but returns non-negative remainder
130 * (i.e., 0 <= r < |d| always holds) */
131
132 if (!(BN_mod(r,m,d,ctx)))
133 return 0;
134 if (!r->neg)
135 return 1;
136 /* now -|d| < r < 0, so we have to set r := r + |d| */
137 return (d->neg ? BN_sub : BN_add)(r, r, d);
138}
139
140
141int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
142 {
143 if (!BN_add(r, a, b)) return 0;
144 return BN_nnmod(r, r, m, ctx);
145 }
146
147
148/* BN_mod_add variant that may be used if both a and b are non-negative
149 * and less than m */
150int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
151 {
152 if (!BN_add(r, a, b)) return 0;
153 if (BN_ucmp(r, m) >= 0)
154 return BN_usub(r, r, m);
155 return 1;
156 }
157
158
159int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
160 {
161 if (!BN_sub(r, a, b)) return 0;
162 return BN_nnmod(r, r, m, ctx);
163 }
164
165
166/* BN_mod_sub variant that may be used if both a and b are non-negative
167 * and less than m */
168int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
169 {
170 if (!BN_sub(r, a, b)) return 0;
171 if (r->neg)
172 return BN_add(r, r, m);
173 return 1;
174 }
175
176
177/* slow but works */
178int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
179 BN_CTX *ctx)
180 {
181 BIGNUM *t;
182 int ret=0;
183
184 bn_check_top(a);
185 bn_check_top(b);
186 bn_check_top(m);
187
188 BN_CTX_start(ctx);
189 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
190 if (a == b)
191 { if (!BN_sqr(t,a,ctx)) goto err; }
192 else
193 { if (!BN_mul(t,a,b,ctx)) goto err; }
194 if (!BN_nnmod(r,t,m,ctx)) goto err;
195 ret=1;
196err:
197 BN_CTX_end(ctx);
198 return(ret);
199 }
200
201
202int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
203 {
204 if (!BN_sqr(r, a, ctx)) return 0;
205 /* r->neg == 0, thus we don't need BN_nnmod */
206 return BN_mod(r, r, m, ctx);
207 }
208
209
210int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
211 {
212 if (!BN_lshift1(r, a)) return 0;
213 return BN_nnmod(r, r, m, ctx);
214 }
215
216
217/* BN_mod_lshift1 variant that may be used if a is non-negative
218 * and less than m */
219int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
220 {
221 if (!BN_lshift1(r, a)) return 0;
222 if (BN_cmp(r, m) >= 0)
223 return BN_sub(r, r, m);
224 return 1;
225 }
226
227
228int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
229 {
230 BIGNUM *abs_m = NULL;
231 int ret;
232
233 if (!BN_nnmod(r, a, m, ctx)) return 0;
234
235 if (m->neg)
236 {
237 abs_m = BN_dup(m);
238 if (abs_m == NULL) return 0;
239 abs_m->neg = 0;
240 }
241
242 ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
243
244 if (abs_m)
245 BN_free(abs_m);
246 return ret;
247 }
248
249
250/* BN_mod_lshift variant that may be used if a is non-negative
251 * and less than m */
252int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
253 {
254 if (r != a)
255 {
256 if (BN_copy(r, a) == NULL) return 0;
257 }
258
259 while (n > 0)
260 {
261 int max_shift;
262
263 /* 0 < r < m */
264 max_shift = BN_num_bits(m) - BN_num_bits(r);
265 /* max_shift >= 0 */
266
267 if (max_shift < 0)
268 {
269 BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
270 return 0;
271 }
272
273 if (max_shift > n)
274 max_shift = n;
275
276 if (max_shift)
277 {
278 if (!BN_lshift(r, r, max_shift)) return 0;
279 n -= max_shift;
280 }
281 else
282 {
283 if (!BN_lshift1(r, r)) return 0;
284 --n;
285 }
286
287 /* BN_num_bits(r) <= BN_num_bits(m) */
288
289 if (BN_cmp(r, m) >= 0)
290 {
291 if (!BN_sub(r, r, m)) return 0;
292 }
293 }
294
295 return 1;
296 }
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
deleted file mode 100644
index c9ebdbaabe..0000000000
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ /dev/null
@@ -1,349 +0,0 @@
1/* crypto/bn/bn_mont.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59/*
60 * Details about Montgomery multiplication algorithms can be found at
61 * http://security.ece.orst.edu/publications.html, e.g.
62 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and
63 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf
64 */
65
66#include <stdio.h>
67#include "cryptlib.h"
68#include "bn_lcl.h"
69
70#define MONT_WORD /* use the faster word-based algorithm */
71
72int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
73 BN_MONT_CTX *mont, BN_CTX *ctx)
74 {
75 BIGNUM *tmp;
76 int ret=0;
77
78 BN_CTX_start(ctx);
79 tmp = BN_CTX_get(ctx);
80 if (tmp == NULL) goto err;
81
82 bn_check_top(tmp);
83 if (a == b)
84 {
85 if (!BN_sqr(tmp,a,ctx)) goto err;
86 }
87 else
88 {
89 if (!BN_mul(tmp,a,b,ctx)) goto err;
90 }
91 /* reduce from aRR to aR */
92 if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
93 ret=1;
94err:
95 BN_CTX_end(ctx);
96 return(ret);
97 }
98
99int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
100 BN_CTX *ctx)
101 {
102 int retn=0;
103
104#ifdef MONT_WORD
105 BIGNUM *n,*r;
106 BN_ULONG *ap,*np,*rp,n0,v,*nrp;
107 int al,nl,max,i,x,ri;
108
109 BN_CTX_start(ctx);
110 if ((r = BN_CTX_get(ctx)) == NULL) goto err;
111
112 if (!BN_copy(r,a)) goto err;
113 n= &(mont->N);
114
115 ap=a->d;
116 /* mont->ri is the size of mont->N in bits (rounded up
117 to the word size) */
118 al=ri=mont->ri/BN_BITS2;
119
120 nl=n->top;
121 if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
122
123 max=(nl+al+1); /* allow for overflow (no?) XXX */
124 if (bn_wexpand(r,max) == NULL) goto err;
125 if (bn_wexpand(ret,max) == NULL) goto err;
126
127 r->neg=a->neg^n->neg;
128 np=n->d;
129 rp=r->d;
130 nrp= &(r->d[nl]);
131
132 /* clear the top words of T */
133#if 1
134 for (i=r->top; i<max; i++) /* memset? XXX */
135 r->d[i]=0;
136#else
137 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
138#endif
139
140 r->top=max;
141 n0=mont->n0;
142
143#ifdef BN_COUNT
144 fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl);
145#endif
146 for (i=0; i<nl; i++)
147 {
148#ifdef __TANDEM
149 {
150 long long t1;
151 long long t2;
152 long long t3;
153 t1 = rp[0] * (n0 & 0177777);
154 t2 = 037777600000l;
155 t2 = n0 & t2;
156 t3 = rp[0] & 0177777;
157 t2 = (t3 * t2) & BN_MASK2;
158 t1 = t1 + t2;
159 v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
160 }
161#else
162 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
163#endif
164 nrp++;
165 rp++;
166 if (((nrp[-1]+=v)&BN_MASK2) >= v)
167 continue;
168 else
169 {
170 if (((++nrp[0])&BN_MASK2) != 0) continue;
171 if (((++nrp[1])&BN_MASK2) != 0) continue;
172 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
173 }
174 }
175 bn_fix_top(r);
176
177 /* mont->ri will be a multiple of the word size */
178#if 0
179 BN_rshift(ret,r,mont->ri);
180#else
181 ret->neg = r->neg;
182 x=ri;
183 rp=ret->d;
184 ap= &(r->d[x]);
185 if (r->top < x)
186 al=0;
187 else
188 al=r->top-x;
189 ret->top=al;
190 al-=4;
191 for (i=0; i<al; i+=4)
192 {
193 BN_ULONG t1,t2,t3,t4;
194
195 t1=ap[i+0];
196 t2=ap[i+1];
197 t3=ap[i+2];
198 t4=ap[i+3];
199 rp[i+0]=t1;
200 rp[i+1]=t2;
201 rp[i+2]=t3;
202 rp[i+3]=t4;
203 }
204 al+=4;
205 for (; i<al; i++)
206 rp[i]=ap[i];
207#endif
208#else /* !MONT_WORD */
209 BIGNUM *t1,*t2;
210
211 BN_CTX_start(ctx);
212 t1 = BN_CTX_get(ctx);
213 t2 = BN_CTX_get(ctx);
214 if (t1 == NULL || t2 == NULL) goto err;
215
216 if (!BN_copy(t1,a)) goto err;
217 BN_mask_bits(t1,mont->ri);
218
219 if (!BN_mul(t2,t1,&mont->Ni,ctx)) goto err;
220 BN_mask_bits(t2,mont->ri);
221
222 if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
223 if (!BN_add(t2,a,t1)) goto err;
224 if (!BN_rshift(ret,t2,mont->ri)) goto err;
225#endif /* MONT_WORD */
226
227 if (BN_ucmp(ret, &(mont->N)) >= 0)
228 {
229 if (!BN_usub(ret,ret,&(mont->N))) goto err;
230 }
231 retn=1;
232 err:
233 BN_CTX_end(ctx);
234 return(retn);
235 }
236
237BN_MONT_CTX *BN_MONT_CTX_new(void)
238 {
239 BN_MONT_CTX *ret;
240
241 if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
242 return(NULL);
243
244 BN_MONT_CTX_init(ret);
245 ret->flags=BN_FLG_MALLOCED;
246 return(ret);
247 }
248
249void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
250 {
251 ctx->ri=0;
252 BN_init(&(ctx->RR));
253 BN_init(&(ctx->N));
254 BN_init(&(ctx->Ni));
255 ctx->flags=0;
256 }
257
258void BN_MONT_CTX_free(BN_MONT_CTX *mont)
259 {
260 if(mont == NULL)
261 return;
262
263 BN_free(&(mont->RR));
264 BN_free(&(mont->N));
265 BN_free(&(mont->Ni));
266 if (mont->flags & BN_FLG_MALLOCED)
267 OPENSSL_free(mont);
268 }
269
270int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
271 {
272 BIGNUM Ri,*R;
273
274 BN_init(&Ri);
275 R= &(mont->RR); /* grab RR as a temp */
276 BN_copy(&(mont->N),mod); /* Set N */
277 mont->N.neg = 0;
278
279#ifdef MONT_WORD
280 {
281 BIGNUM tmod;
282 BN_ULONG buf[2];
283
284 mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
285 if (!(BN_zero(R))) goto err;
286 if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
287
288 buf[0]=mod->d[0]; /* tmod = N mod word size */
289 buf[1]=0;
290 tmod.d=buf;
291 tmod.top=1;
292 tmod.dmax=2;
293 tmod.neg=0;
294 /* Ri = R^-1 mod N*/
295 if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
296 goto err;
297 if (!BN_lshift(&Ri,&Ri,BN_BITS2)) goto err; /* R*Ri */
298 if (!BN_is_zero(&Ri))
299 {
300 if (!BN_sub_word(&Ri,1)) goto err;
301 }
302 else /* if N mod word size == 1 */
303 {
304 if (!BN_set_word(&Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
305 }
306 if (!BN_div(&Ri,NULL,&Ri,&tmod,ctx)) goto err;
307 /* Ni = (R*Ri-1)/N,
308 * keep only least significant word: */
309 mont->n0 = (Ri.top > 0) ? Ri.d[0] : 0;
310 BN_free(&Ri);
311 }
312#else /* !MONT_WORD */
313 { /* bignum version */
314 mont->ri=BN_num_bits(&mont->N);
315 if (!BN_zero(R)) goto err;
316 if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */
317 /* Ri = R^-1 mod N*/
318 if ((BN_mod_inverse(&Ri,R,&mont->N,ctx)) == NULL)
319 goto err;
320 if (!BN_lshift(&Ri,&Ri,mont->ri)) goto err; /* R*Ri */
321 if (!BN_sub_word(&Ri,1)) goto err;
322 /* Ni = (R*Ri-1) / N */
323 if (!BN_div(&(mont->Ni),NULL,&Ri,&mont->N,ctx)) goto err;
324 BN_free(&Ri);
325 }
326#endif
327
328 /* setup RR for conversions */
329 if (!BN_zero(&(mont->RR))) goto err;
330 if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
331 if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
332
333 return(1);
334err:
335 return(0);
336 }
337
338BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
339 {
340 if (to == from) return(to);
341
342 if (!BN_copy(&(to->RR),&(from->RR))) return NULL;
343 if (!BN_copy(&(to->N),&(from->N))) return NULL;
344 if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
345 to->ri=from->ri;
346 to->n0=from->n0;
347 return(to);
348 }
349
diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c
deleted file mode 100644
index 05fa9d1e9a..0000000000
--- a/src/lib/libcrypto/bn/bn_mpi.c
+++ /dev/null
@@ -1,129 +0,0 @@
1/* crypto/bn/bn_mpi.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
64 {
65 int bits;
66 int num=0;
67 int ext=0;
68 long l;
69
70 bits=BN_num_bits(a);
71 num=(bits+7)/8;
72 if (bits > 0)
73 {
74 ext=((bits & 0x07) == 0);
75 }
76 if (d == NULL)
77 return(num+4+ext);
78
79 l=num+ext;
80 d[0]=(unsigned char)(l>>24)&0xff;
81 d[1]=(unsigned char)(l>>16)&0xff;
82 d[2]=(unsigned char)(l>> 8)&0xff;
83 d[3]=(unsigned char)(l )&0xff;
84 if (ext) d[4]=0;
85 num=BN_bn2bin(a,&(d[4+ext]));
86 if (a->neg)
87 d[4]|=0x80;
88 return(num+4+ext);
89 }
90
91BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a)
92 {
93 long len;
94 int neg=0;
95
96 if (n < 4)
97 {
98 BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
99 return(NULL);
100 }
101 len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
102 if ((len+4) != n)
103 {
104 BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
105 return(NULL);
106 }
107
108 if (a == NULL) a=BN_new();
109 if (a == NULL) return(NULL);
110
111 if (len == 0)
112 {
113 a->neg=0;
114 a->top=0;
115 return(a);
116 }
117 d+=4;
118 if ((*d) & 0x80)
119 neg=1;
120 if (BN_bin2bn(d,(int)len,a) == NULL)
121 return(NULL);
122 a->neg=neg;
123 if (neg)
124 {
125 BN_clear_bit(a,BN_num_bits(a)-1);
126 }
127 return(a);
128 }
129
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
deleted file mode 100644
index 3ae3822bc2..0000000000
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ /dev/null
@@ -1,802 +0,0 @@
1/* crypto/bn/bn_mul.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63#ifdef BN_RECURSION
64/* Karatsuba recursive multiplication algorithm
65 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
66
67/* r is 2*n2 words in size,
68 * a and b are both n2 words in size.
69 * n2 must be a power of 2.
70 * We multiply and return the result.
71 * t must be 2*n2 words in size
72 * We calculate
73 * a[0]*b[0]
74 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
75 * a[1]*b[1]
76 */
77void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
78 BN_ULONG *t)
79 {
80 int n=n2/2,c1,c2;
81 unsigned int neg,zero;
82 BN_ULONG ln,lo,*p;
83
84# ifdef BN_COUNT
85 printf(" bn_mul_recursive %d * %d\n",n2,n2);
86# endif
87# ifdef BN_MUL_COMBA
88# if 0
89 if (n2 == 4)
90 {
91 bn_mul_comba4(r,a,b);
92 return;
93 }
94# endif
95 if (n2 == 8)
96 {
97 bn_mul_comba8(r,a,b);
98 return;
99 }
100# endif /* BN_MUL_COMBA */
101 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
102 {
103 /* This should not happen */
104 bn_mul_normal(r,a,n2,b,n2);
105 return;
106 }
107 /* r=(a[0]-a[1])*(b[1]-b[0]) */
108 c1=bn_cmp_words(a,&(a[n]),n);
109 c2=bn_cmp_words(&(b[n]),b,n);
110 zero=neg=0;
111 switch (c1*3+c2)
112 {
113 case -4:
114 bn_sub_words(t, &(a[n]),a, n); /* - */
115 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
116 break;
117 case -3:
118 zero=1;
119 break;
120 case -2:
121 bn_sub_words(t, &(a[n]),a, n); /* - */
122 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
123 neg=1;
124 break;
125 case -1:
126 case 0:
127 case 1:
128 zero=1;
129 break;
130 case 2:
131 bn_sub_words(t, a, &(a[n]),n); /* + */
132 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
133 neg=1;
134 break;
135 case 3:
136 zero=1;
137 break;
138 case 4:
139 bn_sub_words(t, a, &(a[n]),n);
140 bn_sub_words(&(t[n]),&(b[n]),b, n);
141 break;
142 }
143
144# ifdef BN_MUL_COMBA
145 if (n == 4)
146 {
147 if (!zero)
148 bn_mul_comba4(&(t[n2]),t,&(t[n]));
149 else
150 memset(&(t[n2]),0,8*sizeof(BN_ULONG));
151
152 bn_mul_comba4(r,a,b);
153 bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
154 }
155 else if (n == 8)
156 {
157 if (!zero)
158 bn_mul_comba8(&(t[n2]),t,&(t[n]));
159 else
160 memset(&(t[n2]),0,16*sizeof(BN_ULONG));
161
162 bn_mul_comba8(r,a,b);
163 bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
164 }
165 else
166# endif /* BN_MUL_COMBA */
167 {
168 p= &(t[n2*2]);
169 if (!zero)
170 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
171 else
172 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
173 bn_mul_recursive(r,a,b,n,p);
174 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
175 }
176
177 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
178 * r[10] holds (a[0]*b[0])
179 * r[32] holds (b[1]*b[1])
180 */
181
182 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
183
184 if (neg) /* if t[32] is negative */
185 {
186 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
187 }
188 else
189 {
190 /* Might have a carry */
191 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
192 }
193
194 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
195 * r[10] holds (a[0]*b[0])
196 * r[32] holds (b[1]*b[1])
197 * c1 holds the carry bits
198 */
199 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
200 if (c1)
201 {
202 p= &(r[n+n2]);
203 lo= *p;
204 ln=(lo+c1)&BN_MASK2;
205 *p=ln;
206
207 /* The overflow will stop before we over write
208 * words we should not overwrite */
209 if (ln < (BN_ULONG)c1)
210 {
211 do {
212 p++;
213 lo= *p;
214 ln=(lo+1)&BN_MASK2;
215 *p=ln;
216 } while (ln == 0);
217 }
218 }
219 }
220
221/* n+tn is the word length
222 * t needs to be n*4 is size, as does r */
223void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn,
224 int n, BN_ULONG *t)
225 {
226 int i,j,n2=n*2;
227 int c1,c2,neg,zero;
228 BN_ULONG ln,lo,*p;
229
230# ifdef BN_COUNT
231 printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
232# endif
233 if (n < 8)
234 {
235 i=tn+n;
236 bn_mul_normal(r,a,i,b,i);
237 return;
238 }
239
240 /* r=(a[0]-a[1])*(b[1]-b[0]) */
241 c1=bn_cmp_words(a,&(a[n]),n);
242 c2=bn_cmp_words(&(b[n]),b,n);
243 zero=neg=0;
244 switch (c1*3+c2)
245 {
246 case -4:
247 bn_sub_words(t, &(a[n]),a, n); /* - */
248 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
249 break;
250 case -3:
251 zero=1;
252 /* break; */
253 case -2:
254 bn_sub_words(t, &(a[n]),a, n); /* - */
255 bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
256 neg=1;
257 break;
258 case -1:
259 case 0:
260 case 1:
261 zero=1;
262 /* break; */
263 case 2:
264 bn_sub_words(t, a, &(a[n]),n); /* + */
265 bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
266 neg=1;
267 break;
268 case 3:
269 zero=1;
270 /* break; */
271 case 4:
272 bn_sub_words(t, a, &(a[n]),n);
273 bn_sub_words(&(t[n]),&(b[n]),b, n);
274 break;
275 }
276 /* The zero case isn't yet implemented here. The speedup
277 would probably be negligible. */
278# if 0
279 if (n == 4)
280 {
281 bn_mul_comba4(&(t[n2]),t,&(t[n]));
282 bn_mul_comba4(r,a,b);
283 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
284 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
285 }
286 else
287# endif
288 if (n == 8)
289 {
290 bn_mul_comba8(&(t[n2]),t,&(t[n]));
291 bn_mul_comba8(r,a,b);
292 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
293 memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
294 }
295 else
296 {
297 p= &(t[n2*2]);
298 bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
299 bn_mul_recursive(r,a,b,n,p);
300 i=n/2;
301 /* If there is only a bottom half to the number,
302 * just do it */
303 j=tn-i;
304 if (j == 0)
305 {
306 bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
307 memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
308 }
309 else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
310 {
311 bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
312 j,i,p);
313 memset(&(r[n2+tn*2]),0,
314 sizeof(BN_ULONG)*(n2-tn*2));
315 }
316 else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
317 {
318 memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
319 if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
320 {
321 bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
322 }
323 else
324 {
325 for (;;)
326 {
327 i/=2;
328 if (i < tn)
329 {
330 bn_mul_part_recursive(&(r[n2]),
331 &(a[n]),&(b[n]),
332 tn-i,i,p);
333 break;
334 }
335 else if (i == tn)
336 {
337 bn_mul_recursive(&(r[n2]),
338 &(a[n]),&(b[n]),
339 i,p);
340 break;
341 }
342 }
343 }
344 }
345 }
346
347 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
348 * r[10] holds (a[0]*b[0])
349 * r[32] holds (b[1]*b[1])
350 */
351
352 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
353
354 if (neg) /* if t[32] is negative */
355 {
356 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
357 }
358 else
359 {
360 /* Might have a carry */
361 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
362 }
363
364 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
365 * r[10] holds (a[0]*b[0])
366 * r[32] holds (b[1]*b[1])
367 * c1 holds the carry bits
368 */
369 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
370 if (c1)
371 {
372 p= &(r[n+n2]);
373 lo= *p;
374 ln=(lo+c1)&BN_MASK2;
375 *p=ln;
376
377 /* The overflow will stop before we over write
378 * words we should not overwrite */
379 if (ln < (BN_ULONG)c1)
380 {
381 do {
382 p++;
383 lo= *p;
384 ln=(lo+1)&BN_MASK2;
385 *p=ln;
386 } while (ln == 0);
387 }
388 }
389 }
390
391/* a and b must be the same size, which is n2.
392 * r needs to be n2 words and t needs to be n2*2
393 */
394void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
395 BN_ULONG *t)
396 {
397 int n=n2/2;
398
399# ifdef BN_COUNT
400 printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
401# endif
402
403 bn_mul_recursive(r,a,b,n,&(t[0]));
404 if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
405 {
406 bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
407 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
408 bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
409 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
410 }
411 else
412 {
413 bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
414 bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
415 bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
416 bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
417 }
418 }
419
420/* a and b must be the same size, which is n2.
421 * r needs to be n2 words and t needs to be n2*2
422 * l is the low words of the output.
423 * t needs to be n2*3
424 */
425void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
426 BN_ULONG *t)
427 {
428 int i,n;
429 int c1,c2;
430 int neg,oneg,zero;
431 BN_ULONG ll,lc,*lp,*mp;
432
433# ifdef BN_COUNT
434 printf(" bn_mul_high %d * %d\n",n2,n2);
435# endif
436 n=n2/2;
437
438 /* Calculate (al-ah)*(bh-bl) */
439 neg=zero=0;
440 c1=bn_cmp_words(&(a[0]),&(a[n]),n);
441 c2=bn_cmp_words(&(b[n]),&(b[0]),n);
442 switch (c1*3+c2)
443 {
444 case -4:
445 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
446 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
447 break;
448 case -3:
449 zero=1;
450 break;
451 case -2:
452 bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
453 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
454 neg=1;
455 break;
456 case -1:
457 case 0:
458 case 1:
459 zero=1;
460 break;
461 case 2:
462 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
463 bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
464 neg=1;
465 break;
466 case 3:
467 zero=1;
468 break;
469 case 4:
470 bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
471 bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
472 break;
473 }
474
475 oneg=neg;
476 /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
477 /* r[10] = (a[1]*b[1]) */
478# ifdef BN_MUL_COMBA
479 if (n == 8)
480 {
481 bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
482 bn_mul_comba8(r,&(a[n]),&(b[n]));
483 }
484 else
485# endif
486 {
487 bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
488 bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
489 }
490
491 /* s0 == low(al*bl)
492 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
493 * We know s0 and s1 so the only unknown is high(al*bl)
494 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
495 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
496 */
497 if (l != NULL)
498 {
499 lp= &(t[n2+n]);
500 c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
501 }
502 else
503 {
504 c1=0;
505 lp= &(r[0]);
506 }
507
508 if (neg)
509 neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
510 else
511 {
512 bn_add_words(&(t[n2]),lp,&(t[0]),n);
513 neg=0;
514 }
515
516 if (l != NULL)
517 {
518 bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
519 }
520 else
521 {
522 lp= &(t[n2+n]);
523 mp= &(t[n2]);
524 for (i=0; i<n; i++)
525 lp[i]=((~mp[i])+1)&BN_MASK2;
526 }
527
528 /* s[0] = low(al*bl)
529 * t[3] = high(al*bl)
530 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
531 * r[10] = (a[1]*b[1])
532 */
533 /* R[10] = al*bl
534 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
535 * R[32] = ah*bh
536 */
537 /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
538 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
539 * R[3]=r[1]+(carry/borrow)
540 */
541 if (l != NULL)
542 {
543 lp= &(t[n2]);
544 c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
545 }
546 else
547 {
548 lp= &(t[n2+n]);
549 c1=0;
550 }
551 c1+=(int)(bn_add_words(&(t[n2]),lp, &(r[0]),n));
552 if (oneg)
553 c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
554 else
555 c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
556
557 c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
558 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
559 if (oneg)
560 c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
561 else
562 c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
563
564 if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
565 {
566 i=0;
567 if (c1 > 0)
568 {
569 lc=c1;
570 do {
571 ll=(r[i]+lc)&BN_MASK2;
572 r[i++]=ll;
573 lc=(lc > ll);
574 } while (lc);
575 }
576 else
577 {
578 lc= -c1;
579 do {
580 ll=r[i];
581 r[i++]=(ll-lc)&BN_MASK2;
582 lc=(lc > ll);
583 } while (lc);
584 }
585 }
586 if (c2 != 0) /* Add starting at r[1] */
587 {
588 i=n;
589 if (c2 > 0)
590 {
591 lc=c2;
592 do {
593 ll=(r[i]+lc)&BN_MASK2;
594 r[i++]=ll;
595 lc=(lc > ll);
596 } while (lc);
597 }
598 else
599 {
600 lc= -c2;
601 do {
602 ll=r[i];
603 r[i++]=(ll-lc)&BN_MASK2;
604 lc=(lc > ll);
605 } while (lc);
606 }
607 }
608 }
609#endif /* BN_RECURSION */
610
611int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
612 {
613 int top,al,bl;
614 BIGNUM *rr;
615 int ret = 0;
616#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
617 int i;
618#endif
619#ifdef BN_RECURSION
620 BIGNUM *t;
621 int j,k;
622#endif
623
624#ifdef BN_COUNT
625 printf("BN_mul %d * %d\n",a->top,b->top);
626#endif
627
628 bn_check_top(a);
629 bn_check_top(b);
630 bn_check_top(r);
631
632 al=a->top;
633 bl=b->top;
634
635 if ((al == 0) || (bl == 0))
636 {
637 if (!BN_zero(r)) goto err;
638 return(1);
639 }
640 top=al+bl;
641
642 BN_CTX_start(ctx);
643 if ((r == a) || (r == b))
644 {
645 if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
646 }
647 else
648 rr = r;
649 rr->neg=a->neg^b->neg;
650
651#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
652 i = al-bl;
653#endif
654#ifdef BN_MUL_COMBA
655 if (i == 0)
656 {
657# if 0
658 if (al == 4)
659 {
660 if (bn_wexpand(rr,8) == NULL) goto err;
661 rr->top=8;
662 bn_mul_comba4(rr->d,a->d,b->d);
663 goto end;
664 }
665# endif
666 if (al == 8)
667 {
668 if (bn_wexpand(rr,16) == NULL) goto err;
669 rr->top=16;
670 bn_mul_comba8(rr->d,a->d,b->d);
671 goto end;
672 }
673 }
674#endif /* BN_MUL_COMBA */
675#ifdef BN_RECURSION
676 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
677 {
678 if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA) && bl<b->dmax)
679 {
680#if 0 /* tribute to const-ification, bl<b->dmax above covers for this */
681 if (bn_wexpand(b,al) == NULL) goto err;
682#endif
683 b->d[bl]=0;
684 bl++;
685 i--;
686 }
687 else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA) && al<a->dmax)
688 {
689#if 0 /* tribute to const-ification, al<a->dmax above covers for this */
690 if (bn_wexpand(a,bl) == NULL) goto err;
691#endif
692 a->d[al]=0;
693 al++;
694 i++;
695 }
696 if (i == 0)
697 {
698 /* symmetric and > 4 */
699 /* 16 or larger */
700 j=BN_num_bits_word((BN_ULONG)al);
701 j=1<<(j-1);
702 k=j+j;
703 t = BN_CTX_get(ctx);
704 if (al == j) /* exact multiple */
705 {
706 if (bn_wexpand(t,k*2) == NULL) goto err;
707 if (bn_wexpand(rr,k*2) == NULL) goto err;
708 bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
709 rr->top=top;
710 goto end;
711 }
712#if 0 /* tribute to const-ification, rsa/dsa performance is not affected */
713 else
714 {
715 if (bn_wexpand(a,k) == NULL ) goto err;
716 if (bn_wexpand(b,k) == NULL ) goto err;
717 if (bn_wexpand(t,k*4) == NULL ) goto err;
718 if (bn_wexpand(rr,k*4) == NULL ) goto err;
719 for (i=a->top; i<k; i++)
720 a->d[i]=0;
721 for (i=b->top; i<k; i++)
722 b->d[i]=0;
723 bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
724 }
725 rr->top=top;
726 goto end;
727#endif
728 }
729 }
730#endif /* BN_RECURSION */
731 if (bn_wexpand(rr,top) == NULL) goto err;
732 rr->top=top;
733 bn_mul_normal(rr->d,a->d,al,b->d,bl);
734
735#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
736end:
737#endif
738 bn_fix_top(rr);
739 if (r != rr) BN_copy(r,rr);
740 ret=1;
741err:
742 BN_CTX_end(ctx);
743 return(ret);
744 }
745
746void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
747 {
748 BN_ULONG *rr;
749
750#ifdef BN_COUNT
751 printf(" bn_mul_normal %d * %d\n",na,nb);
752#endif
753
754 if (na < nb)
755 {
756 int itmp;
757 BN_ULONG *ltmp;
758
759 itmp=na; na=nb; nb=itmp;
760 ltmp=a; a=b; b=ltmp;
761
762 }
763 rr= &(r[na]);
764 rr[0]=bn_mul_words(r,a,na,b[0]);
765
766 for (;;)
767 {
768 if (--nb <= 0) return;
769 rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
770 if (--nb <= 0) return;
771 rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
772 if (--nb <= 0) return;
773 rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
774 if (--nb <= 0) return;
775 rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
776 rr+=4;
777 r+=4;
778 b+=4;
779 }
780 }
781
782void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
783 {
784#ifdef BN_COUNT
785 printf(" bn_mul_low_normal %d * %d\n",n,n);
786#endif
787 bn_mul_words(r,a,n,b[0]);
788
789 for (;;)
790 {
791 if (--n <= 0) return;
792 bn_mul_add_words(&(r[1]),a,n,b[1]);
793 if (--n <= 0) return;
794 bn_mul_add_words(&(r[2]),a,n,b[2]);
795 if (--n <= 0) return;
796 bn_mul_add_words(&(r[3]),a,n,b[3]);
797 if (--n <= 0) return;
798 bn_mul_add_words(&(r[4]),a,n,b[4]);
799 r+=4;
800 b+=4;
801 }
802 }
diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c
deleted file mode 100644
index e072d9255c..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.c
+++ /dev/null
@@ -1,466 +0,0 @@
1/* crypto/bn/bn_prime.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118/* The quick sieve algorithm approach to weeding out primes is
119 * Philip Zimmermann's, as implemented in PGP. I have had a read of
120 * his comments and implemented my own version.
121 */
122#include "bn_prime.h"
123
124static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
125 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
126static int probable_prime(BIGNUM *rnd, int bits);
127static int probable_prime_dh(BIGNUM *rnd, int bits,
128 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
129static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
130 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
131
132BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
133 const BIGNUM *add, const BIGNUM *rem,
134 void (*callback)(int,int,void *), void *cb_arg)
135 {
136 BIGNUM *rnd=NULL;
137 BIGNUM t;
138 int found=0;
139 int i,j,c1=0;
140 BN_CTX *ctx;
141 int checks = BN_prime_checks_for_size(bits);
142
143 BN_init(&t);
144 ctx=BN_CTX_new();
145 if (ctx == NULL) goto err;
146 if (ret == NULL)
147 {
148 if ((rnd=BN_new()) == NULL) goto err;
149 }
150 else
151 rnd=ret;
152loop:
153 /* make a random number and set the top and bottom bits */
154 if (add == NULL)
155 {
156 if (!probable_prime(rnd,bits)) goto err;
157 }
158 else
159 {
160 if (safe)
161 {
162 if (!probable_prime_dh_safe(rnd,bits,add,rem,ctx))
163 goto err;
164 }
165 else
166 {
167 if (!probable_prime_dh(rnd,bits,add,rem,ctx))
168 goto err;
169 }
170 }
171 /* if (BN_mod_word(rnd,(BN_ULONG)3) == 1) goto loop; */
172 if (callback != NULL) callback(0,c1++,cb_arg);
173
174 if (!safe)
175 {
176 i=BN_is_prime_fasttest(rnd,checks,callback,ctx,cb_arg,0);
177 if (i == -1) goto err;
178 if (i == 0) goto loop;
179 }
180 else
181 {
182 /* for "safe prime" generation,
183 * check that (p-1)/2 is prime.
184 * Since a prime is odd, We just
185 * need to divide by 2 */
186 if (!BN_rshift1(&t,rnd)) goto err;
187
188 for (i=0; i<checks; i++)
189 {
190 j=BN_is_prime_fasttest(rnd,1,callback,ctx,cb_arg,0);
191 if (j == -1) goto err;
192 if (j == 0) goto loop;
193
194 j=BN_is_prime_fasttest(&t,1,callback,ctx,cb_arg,0);
195 if (j == -1) goto err;
196 if (j == 0) goto loop;
197
198 if (callback != NULL) callback(2,c1-1,cb_arg);
199 /* We have a safe prime test pass */
200 }
201 }
202 /* we have a prime :-) */
203 found = 1;
204err:
205 if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd);
206 BN_free(&t);
207 if (ctx != NULL) BN_CTX_free(ctx);
208 return(found ? rnd : NULL);
209 }
210
211int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *),
212 BN_CTX *ctx_passed, void *cb_arg)
213 {
214 return BN_is_prime_fasttest(a, checks, callback, ctx_passed, cb_arg, 0);
215 }
216
217int BN_is_prime_fasttest(const BIGNUM *a, int checks,
218 void (*callback)(int,int,void *),
219 BN_CTX *ctx_passed, void *cb_arg,
220 int do_trial_division)
221 {
222 int i, j, ret = -1;
223 int k;
224 BN_CTX *ctx = NULL;
225 BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
226 BN_MONT_CTX *mont = NULL;
227 const BIGNUM *A = NULL;
228
229 if (BN_cmp(a, BN_value_one()) <= 0)
230 return 0;
231
232 if (checks == BN_prime_checks)
233 checks = BN_prime_checks_for_size(BN_num_bits(a));
234
235 /* first look for small factors */
236 if (!BN_is_odd(a))
237 return 0;
238 if (do_trial_division)
239 {
240 for (i = 1; i < NUMPRIMES; i++)
241 if (BN_mod_word(a, primes[i]) == 0)
242 return 0;
243 if (callback != NULL) callback(1, -1, cb_arg);
244 }
245
246 if (ctx_passed != NULL)
247 ctx = ctx_passed;
248 else
249 if ((ctx=BN_CTX_new()) == NULL)
250 goto err;
251 BN_CTX_start(ctx);
252
253 /* A := abs(a) */
254 if (a->neg)
255 {
256 BIGNUM *t;
257 if ((t = BN_CTX_get(ctx)) == NULL) goto err;
258 BN_copy(t, a);
259 t->neg = 0;
260 A = t;
261 }
262 else
263 A = a;
264 A1 = BN_CTX_get(ctx);
265 A1_odd = BN_CTX_get(ctx);
266 check = BN_CTX_get(ctx);
267 if (check == NULL) goto err;
268
269 /* compute A1 := A - 1 */
270 if (!BN_copy(A1, A))
271 goto err;
272 if (!BN_sub_word(A1, 1))
273 goto err;
274 if (BN_is_zero(A1))
275 {
276 ret = 0;
277 goto err;
278 }
279
280 /* write A1 as A1_odd * 2^k */
281 k = 1;
282 while (!BN_is_bit_set(A1, k))
283 k++;
284 if (!BN_rshift(A1_odd, A1, k))
285 goto err;
286
287 /* Montgomery setup for computations mod A */
288 mont = BN_MONT_CTX_new();
289 if (mont == NULL)
290 goto err;
291 if (!BN_MONT_CTX_set(mont, A, ctx))
292 goto err;
293
294 for (i = 0; i < checks; i++)
295 {
296 if (!BN_pseudo_rand_range(check, A1))
297 goto err;
298 if (!BN_add_word(check, 1))
299 goto err;
300 /* now 1 <= check < A */
301
302 j = witness(check, A, A1, A1_odd, k, ctx, mont);
303 if (j == -1) goto err;
304 if (j)
305 {
306 ret=0;
307 goto err;
308 }
309 if (callback != NULL) callback(1,i,cb_arg);
310 }
311 ret=1;
312err:
313 if (ctx != NULL)
314 {
315 BN_CTX_end(ctx);
316 if (ctx_passed == NULL)
317 BN_CTX_free(ctx);
318 }
319 if (mont != NULL)
320 BN_MONT_CTX_free(mont);
321
322 return(ret);
323 }
324
325static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
326 const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont)
327 {
328 if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
329 return -1;
330 if (BN_is_one(w))
331 return 0; /* probably prime */
332 if (BN_cmp(w, a1) == 0)
333 return 0; /* w == -1 (mod a), 'a' is probably prime */
334 while (--k)
335 {
336 if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
337 return -1;
338 if (BN_is_one(w))
339 return 1; /* 'a' is composite, otherwise a previous 'w' would
340 * have been == -1 (mod 'a') */
341 if (BN_cmp(w, a1) == 0)
342 return 0; /* w == -1 (mod a), 'a' is probably prime */
343 }
344 /* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
345 * and it is neither -1 nor +1 -- so 'a' cannot be prime */
346 return 1;
347 }
348
349static int probable_prime(BIGNUM *rnd, int bits)
350 {
351 int i;
352 BN_ULONG mods[NUMPRIMES];
353 BN_ULONG delta,d;
354
355again:
356 if (!BN_rand(rnd,bits,1,1)) return(0);
357 /* we now have a random number 'rand' to test. */
358 for (i=1; i<NUMPRIMES; i++)
359 mods[i]=BN_mod_word(rnd,(BN_ULONG)primes[i]);
360 delta=0;
361 loop: for (i=1; i<NUMPRIMES; i++)
362 {
363 /* check that rnd is not a prime and also
364 * that gcd(rnd-1,primes) == 1 (except for 2) */
365 if (((mods[i]+delta)%primes[i]) <= 1)
366 {
367 d=delta;
368 delta+=2;
369 /* perhaps need to check for overflow of
370 * delta (but delta can be up to 2^32)
371 * 21-May-98 eay - added overflow check */
372 if (delta < d) goto again;
373 goto loop;
374 }
375 }
376 if (!BN_add_word(rnd,delta)) return(0);
377 return(1);
378 }
379
380static int probable_prime_dh(BIGNUM *rnd, int bits,
381 const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
382 {
383 int i,ret=0;
384 BIGNUM *t1;
385
386 BN_CTX_start(ctx);
387 if ((t1 = BN_CTX_get(ctx)) == NULL) goto err;
388
389 if (!BN_rand(rnd,bits,0,1)) goto err;
390
391 /* we need ((rnd-rem) % add) == 0 */
392
393 if (!BN_mod(t1,rnd,add,ctx)) goto err;
394 if (!BN_sub(rnd,rnd,t1)) goto err;
395 if (rem == NULL)
396 { if (!BN_add_word(rnd,1)) goto err; }
397 else
398 { if (!BN_add(rnd,rnd,rem)) goto err; }
399
400 /* we now have a random number 'rand' to test. */
401
402 loop: for (i=1; i<NUMPRIMES; i++)
403 {
404 /* check that rnd is a prime */
405 if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
406 {
407 if (!BN_add(rnd,rnd,add)) goto err;
408 goto loop;
409 }
410 }
411 ret=1;
412err:
413 BN_CTX_end(ctx);
414 return(ret);
415 }
416
417static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
418 const BIGNUM *rem, BN_CTX *ctx)
419 {
420 int i,ret=0;
421 BIGNUM *t1,*qadd,*q;
422
423 bits--;
424 BN_CTX_start(ctx);
425 t1 = BN_CTX_get(ctx);
426 q = BN_CTX_get(ctx);
427 qadd = BN_CTX_get(ctx);
428 if (qadd == NULL) goto err;
429
430 if (!BN_rshift1(qadd,padd)) goto err;
431
432 if (!BN_rand(q,bits,0,1)) goto err;
433
434 /* we need ((rnd-rem) % add) == 0 */
435 if (!BN_mod(t1,q,qadd,ctx)) goto err;
436 if (!BN_sub(q,q,t1)) goto err;
437 if (rem == NULL)
438 { if (!BN_add_word(q,1)) goto err; }
439 else
440 {
441 if (!BN_rshift1(t1,rem)) goto err;
442 if (!BN_add(q,q,t1)) goto err;
443 }
444
445 /* we now have a random number 'rand' to test. */
446 if (!BN_lshift1(p,q)) goto err;
447 if (!BN_add_word(p,1)) goto err;
448
449 loop: for (i=1; i<NUMPRIMES; i++)
450 {
451 /* check that p and q are prime */
452 /* check that for p and q
453 * gcd(p-1,primes) == 1 (except for 2) */
454 if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
455 (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
456 {
457 if (!BN_add(p,p,padd)) goto err;
458 if (!BN_add(q,q,qadd)) goto err;
459 goto loop;
460 }
461 }
462 ret=1;
463err:
464 BN_CTX_end(ctx);
465 return(ret);
466 }
diff --git a/src/lib/libcrypto/bn/bn_prime.h b/src/lib/libcrypto/bn/bn_prime.h
deleted file mode 100644
index b7cf9a9bfe..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.h
+++ /dev/null
@@ -1,325 +0,0 @@
1/* Auto generated by bn_prime.pl */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#ifndef EIGHT_BIT
60#define NUMPRIMES 2048
61#else
62#define NUMPRIMES 54
63#endif
64static const unsigned int primes[NUMPRIMES]=
65 {
66 2, 3, 5, 7, 11, 13, 17, 19,
67 23, 29, 31, 37, 41, 43, 47, 53,
68 59, 61, 67, 71, 73, 79, 83, 89,
69 97, 101, 103, 107, 109, 113, 127, 131,
70 137, 139, 149, 151, 157, 163, 167, 173,
71 179, 181, 191, 193, 197, 199, 211, 223,
72 227, 229, 233, 239, 241, 251,
73#ifndef EIGHT_BIT
74 257, 263,
75 269, 271, 277, 281, 283, 293, 307, 311,
76 313, 317, 331, 337, 347, 349, 353, 359,
77 367, 373, 379, 383, 389, 397, 401, 409,
78 419, 421, 431, 433, 439, 443, 449, 457,
79 461, 463, 467, 479, 487, 491, 499, 503,
80 509, 521, 523, 541, 547, 557, 563, 569,
81 571, 577, 587, 593, 599, 601, 607, 613,
82 617, 619, 631, 641, 643, 647, 653, 659,
83 661, 673, 677, 683, 691, 701, 709, 719,
84 727, 733, 739, 743, 751, 757, 761, 769,
85 773, 787, 797, 809, 811, 821, 823, 827,
86 829, 839, 853, 857, 859, 863, 877, 881,
87 883, 887, 907, 911, 919, 929, 937, 941,
88 947, 953, 967, 971, 977, 983, 991, 997,
89 1009,1013,1019,1021,1031,1033,1039,1049,
90 1051,1061,1063,1069,1087,1091,1093,1097,
91 1103,1109,1117,1123,1129,1151,1153,1163,
92 1171,1181,1187,1193,1201,1213,1217,1223,
93 1229,1231,1237,1249,1259,1277,1279,1283,
94 1289,1291,1297,1301,1303,1307,1319,1321,
95 1327,1361,1367,1373,1381,1399,1409,1423,
96 1427,1429,1433,1439,1447,1451,1453,1459,
97 1471,1481,1483,1487,1489,1493,1499,1511,
98 1523,1531,1543,1549,1553,1559,1567,1571,
99 1579,1583,1597,1601,1607,1609,1613,1619,
100 1621,1627,1637,1657,1663,1667,1669,1693,
101 1697,1699,1709,1721,1723,1733,1741,1747,
102 1753,1759,1777,1783,1787,1789,1801,1811,
103 1823,1831,1847,1861,1867,1871,1873,1877,
104 1879,1889,1901,1907,1913,1931,1933,1949,
105 1951,1973,1979,1987,1993,1997,1999,2003,
106 2011,2017,2027,2029,2039,2053,2063,2069,
107 2081,2083,2087,2089,2099,2111,2113,2129,
108 2131,2137,2141,2143,2153,2161,2179,2203,
109 2207,2213,2221,2237,2239,2243,2251,2267,
110 2269,2273,2281,2287,2293,2297,2309,2311,
111 2333,2339,2341,2347,2351,2357,2371,2377,
112 2381,2383,2389,2393,2399,2411,2417,2423,
113 2437,2441,2447,2459,2467,2473,2477,2503,
114 2521,2531,2539,2543,2549,2551,2557,2579,
115 2591,2593,2609,2617,2621,2633,2647,2657,
116 2659,2663,2671,2677,2683,2687,2689,2693,
117 2699,2707,2711,2713,2719,2729,2731,2741,
118 2749,2753,2767,2777,2789,2791,2797,2801,
119 2803,2819,2833,2837,2843,2851,2857,2861,
120 2879,2887,2897,2903,2909,2917,2927,2939,
121 2953,2957,2963,2969,2971,2999,3001,3011,
122 3019,3023,3037,3041,3049,3061,3067,3079,
123 3083,3089,3109,3119,3121,3137,3163,3167,
124 3169,3181,3187,3191,3203,3209,3217,3221,
125 3229,3251,3253,3257,3259,3271,3299,3301,
126 3307,3313,3319,3323,3329,3331,3343,3347,
127 3359,3361,3371,3373,3389,3391,3407,3413,
128 3433,3449,3457,3461,3463,3467,3469,3491,
129 3499,3511,3517,3527,3529,3533,3539,3541,
130 3547,3557,3559,3571,3581,3583,3593,3607,
131 3613,3617,3623,3631,3637,3643,3659,3671,
132 3673,3677,3691,3697,3701,3709,3719,3727,
133 3733,3739,3761,3767,3769,3779,3793,3797,
134 3803,3821,3823,3833,3847,3851,3853,3863,
135 3877,3881,3889,3907,3911,3917,3919,3923,
136 3929,3931,3943,3947,3967,3989,4001,4003,
137 4007,4013,4019,4021,4027,4049,4051,4057,
138 4073,4079,4091,4093,4099,4111,4127,4129,
139 4133,4139,4153,4157,4159,4177,4201,4211,
140 4217,4219,4229,4231,4241,4243,4253,4259,
141 4261,4271,4273,4283,4289,4297,4327,4337,
142 4339,4349,4357,4363,4373,4391,4397,4409,
143 4421,4423,4441,4447,4451,4457,4463,4481,
144 4483,4493,4507,4513,4517,4519,4523,4547,
145 4549,4561,4567,4583,4591,4597,4603,4621,
146 4637,4639,4643,4649,4651,4657,4663,4673,
147 4679,4691,4703,4721,4723,4729,4733,4751,
148 4759,4783,4787,4789,4793,4799,4801,4813,
149 4817,4831,4861,4871,4877,4889,4903,4909,
150 4919,4931,4933,4937,4943,4951,4957,4967,
151 4969,4973,4987,4993,4999,5003,5009,5011,
152 5021,5023,5039,5051,5059,5077,5081,5087,
153 5099,5101,5107,5113,5119,5147,5153,5167,
154 5171,5179,5189,5197,5209,5227,5231,5233,
155 5237,5261,5273,5279,5281,5297,5303,5309,
156 5323,5333,5347,5351,5381,5387,5393,5399,
157 5407,5413,5417,5419,5431,5437,5441,5443,
158 5449,5471,5477,5479,5483,5501,5503,5507,
159 5519,5521,5527,5531,5557,5563,5569,5573,
160 5581,5591,5623,5639,5641,5647,5651,5653,
161 5657,5659,5669,5683,5689,5693,5701,5711,
162 5717,5737,5741,5743,5749,5779,5783,5791,
163 5801,5807,5813,5821,5827,5839,5843,5849,
164 5851,5857,5861,5867,5869,5879,5881,5897,
165 5903,5923,5927,5939,5953,5981,5987,6007,
166 6011,6029,6037,6043,6047,6053,6067,6073,
167 6079,6089,6091,6101,6113,6121,6131,6133,
168 6143,6151,6163,6173,6197,6199,6203,6211,
169 6217,6221,6229,6247,6257,6263,6269,6271,
170 6277,6287,6299,6301,6311,6317,6323,6329,
171 6337,6343,6353,6359,6361,6367,6373,6379,
172 6389,6397,6421,6427,6449,6451,6469,6473,
173 6481,6491,6521,6529,6547,6551,6553,6563,
174 6569,6571,6577,6581,6599,6607,6619,6637,
175 6653,6659,6661,6673,6679,6689,6691,6701,
176 6703,6709,6719,6733,6737,6761,6763,6779,
177 6781,6791,6793,6803,6823,6827,6829,6833,
178 6841,6857,6863,6869,6871,6883,6899,6907,
179 6911,6917,6947,6949,6959,6961,6967,6971,
180 6977,6983,6991,6997,7001,7013,7019,7027,
181 7039,7043,7057,7069,7079,7103,7109,7121,
182 7127,7129,7151,7159,7177,7187,7193,7207,
183 7211,7213,7219,7229,7237,7243,7247,7253,
184 7283,7297,7307,7309,7321,7331,7333,7349,
185 7351,7369,7393,7411,7417,7433,7451,7457,
186 7459,7477,7481,7487,7489,7499,7507,7517,
187 7523,7529,7537,7541,7547,7549,7559,7561,
188 7573,7577,7583,7589,7591,7603,7607,7621,
189 7639,7643,7649,7669,7673,7681,7687,7691,
190 7699,7703,7717,7723,7727,7741,7753,7757,
191 7759,7789,7793,7817,7823,7829,7841,7853,
192 7867,7873,7877,7879,7883,7901,7907,7919,
193 7927,7933,7937,7949,7951,7963,7993,8009,
194 8011,8017,8039,8053,8059,8069,8081,8087,
195 8089,8093,8101,8111,8117,8123,8147,8161,
196 8167,8171,8179,8191,8209,8219,8221,8231,
197 8233,8237,8243,8263,8269,8273,8287,8291,
198 8293,8297,8311,8317,8329,8353,8363,8369,
199 8377,8387,8389,8419,8423,8429,8431,8443,
200 8447,8461,8467,8501,8513,8521,8527,8537,
201 8539,8543,8563,8573,8581,8597,8599,8609,
202 8623,8627,8629,8641,8647,8663,8669,8677,
203 8681,8689,8693,8699,8707,8713,8719,8731,
204 8737,8741,8747,8753,8761,8779,8783,8803,
205 8807,8819,8821,8831,8837,8839,8849,8861,
206 8863,8867,8887,8893,8923,8929,8933,8941,
207 8951,8963,8969,8971,8999,9001,9007,9011,
208 9013,9029,9041,9043,9049,9059,9067,9091,
209 9103,9109,9127,9133,9137,9151,9157,9161,
210 9173,9181,9187,9199,9203,9209,9221,9227,
211 9239,9241,9257,9277,9281,9283,9293,9311,
212 9319,9323,9337,9341,9343,9349,9371,9377,
213 9391,9397,9403,9413,9419,9421,9431,9433,
214 9437,9439,9461,9463,9467,9473,9479,9491,
215 9497,9511,9521,9533,9539,9547,9551,9587,
216 9601,9613,9619,9623,9629,9631,9643,9649,
217 9661,9677,9679,9689,9697,9719,9721,9733,
218 9739,9743,9749,9767,9769,9781,9787,9791,
219 9803,9811,9817,9829,9833,9839,9851,9857,
220 9859,9871,9883,9887,9901,9907,9923,9929,
221 9931,9941,9949,9967,9973,10007,10009,10037,
222 10039,10061,10067,10069,10079,10091,10093,10099,
223 10103,10111,10133,10139,10141,10151,10159,10163,
224 10169,10177,10181,10193,10211,10223,10243,10247,
225 10253,10259,10267,10271,10273,10289,10301,10303,
226 10313,10321,10331,10333,10337,10343,10357,10369,
227 10391,10399,10427,10429,10433,10453,10457,10459,
228 10463,10477,10487,10499,10501,10513,10529,10531,
229 10559,10567,10589,10597,10601,10607,10613,10627,
230 10631,10639,10651,10657,10663,10667,10687,10691,
231 10709,10711,10723,10729,10733,10739,10753,10771,
232 10781,10789,10799,10831,10837,10847,10853,10859,
233 10861,10867,10883,10889,10891,10903,10909,10937,
234 10939,10949,10957,10973,10979,10987,10993,11003,
235 11027,11047,11057,11059,11069,11071,11083,11087,
236 11093,11113,11117,11119,11131,11149,11159,11161,
237 11171,11173,11177,11197,11213,11239,11243,11251,
238 11257,11261,11273,11279,11287,11299,11311,11317,
239 11321,11329,11351,11353,11369,11383,11393,11399,
240 11411,11423,11437,11443,11447,11467,11471,11483,
241 11489,11491,11497,11503,11519,11527,11549,11551,
242 11579,11587,11593,11597,11617,11621,11633,11657,
243 11677,11681,11689,11699,11701,11717,11719,11731,
244 11743,11777,11779,11783,11789,11801,11807,11813,
245 11821,11827,11831,11833,11839,11863,11867,11887,
246 11897,11903,11909,11923,11927,11933,11939,11941,
247 11953,11959,11969,11971,11981,11987,12007,12011,
248 12037,12041,12043,12049,12071,12073,12097,12101,
249 12107,12109,12113,12119,12143,12149,12157,12161,
250 12163,12197,12203,12211,12227,12239,12241,12251,
251 12253,12263,12269,12277,12281,12289,12301,12323,
252 12329,12343,12347,12373,12377,12379,12391,12401,
253 12409,12413,12421,12433,12437,12451,12457,12473,
254 12479,12487,12491,12497,12503,12511,12517,12527,
255 12539,12541,12547,12553,12569,12577,12583,12589,
256 12601,12611,12613,12619,12637,12641,12647,12653,
257 12659,12671,12689,12697,12703,12713,12721,12739,
258 12743,12757,12763,12781,12791,12799,12809,12821,
259 12823,12829,12841,12853,12889,12893,12899,12907,
260 12911,12917,12919,12923,12941,12953,12959,12967,
261 12973,12979,12983,13001,13003,13007,13009,13033,
262 13037,13043,13049,13063,13093,13099,13103,13109,
263 13121,13127,13147,13151,13159,13163,13171,13177,
264 13183,13187,13217,13219,13229,13241,13249,13259,
265 13267,13291,13297,13309,13313,13327,13331,13337,
266 13339,13367,13381,13397,13399,13411,13417,13421,
267 13441,13451,13457,13463,13469,13477,13487,13499,
268 13513,13523,13537,13553,13567,13577,13591,13597,
269 13613,13619,13627,13633,13649,13669,13679,13681,
270 13687,13691,13693,13697,13709,13711,13721,13723,
271 13729,13751,13757,13759,13763,13781,13789,13799,
272 13807,13829,13831,13841,13859,13873,13877,13879,
273 13883,13901,13903,13907,13913,13921,13931,13933,
274 13963,13967,13997,13999,14009,14011,14029,14033,
275 14051,14057,14071,14081,14083,14087,14107,14143,
276 14149,14153,14159,14173,14177,14197,14207,14221,
277 14243,14249,14251,14281,14293,14303,14321,14323,
278 14327,14341,14347,14369,14387,14389,14401,14407,
279 14411,14419,14423,14431,14437,14447,14449,14461,
280 14479,14489,14503,14519,14533,14537,14543,14549,
281 14551,14557,14561,14563,14591,14593,14621,14627,
282 14629,14633,14639,14653,14657,14669,14683,14699,
283 14713,14717,14723,14731,14737,14741,14747,14753,
284 14759,14767,14771,14779,14783,14797,14813,14821,
285 14827,14831,14843,14851,14867,14869,14879,14887,
286 14891,14897,14923,14929,14939,14947,14951,14957,
287 14969,14983,15013,15017,15031,15053,15061,15073,
288 15077,15083,15091,15101,15107,15121,15131,15137,
289 15139,15149,15161,15173,15187,15193,15199,15217,
290 15227,15233,15241,15259,15263,15269,15271,15277,
291 15287,15289,15299,15307,15313,15319,15329,15331,
292 15349,15359,15361,15373,15377,15383,15391,15401,
293 15413,15427,15439,15443,15451,15461,15467,15473,
294 15493,15497,15511,15527,15541,15551,15559,15569,
295 15581,15583,15601,15607,15619,15629,15641,15643,
296 15647,15649,15661,15667,15671,15679,15683,15727,
297 15731,15733,15737,15739,15749,15761,15767,15773,
298 15787,15791,15797,15803,15809,15817,15823,15859,
299 15877,15881,15887,15889,15901,15907,15913,15919,
300 15923,15937,15959,15971,15973,15991,16001,16007,
301 16033,16057,16061,16063,16067,16069,16073,16087,
302 16091,16097,16103,16111,16127,16139,16141,16183,
303 16187,16189,16193,16217,16223,16229,16231,16249,
304 16253,16267,16273,16301,16319,16333,16339,16349,
305 16361,16363,16369,16381,16411,16417,16421,16427,
306 16433,16447,16451,16453,16477,16481,16487,16493,
307 16519,16529,16547,16553,16561,16567,16573,16603,
308 16607,16619,16631,16633,16649,16651,16657,16661,
309 16673,16691,16693,16699,16703,16729,16741,16747,
310 16759,16763,16787,16811,16823,16829,16831,16843,
311 16871,16879,16883,16889,16901,16903,16921,16927,
312 16931,16937,16943,16963,16979,16981,16987,16993,
313 17011,17021,17027,17029,17033,17041,17047,17053,
314 17077,17093,17099,17107,17117,17123,17137,17159,
315 17167,17183,17189,17191,17203,17207,17209,17231,
316 17239,17257,17291,17293,17299,17317,17321,17327,
317 17333,17341,17351,17359,17377,17383,17387,17389,
318 17393,17401,17417,17419,17431,17443,17449,17467,
319 17471,17477,17483,17489,17491,17497,17509,17519,
320 17539,17551,17569,17573,17579,17581,17597,17599,
321 17609,17623,17627,17657,17659,17669,17681,17683,
322 17707,17713,17729,17737,17747,17749,17761,17783,
323 17789,17791,17807,17827,17837,17839,17851,17863,
324#endif
325 };
diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl
deleted file mode 100644
index 9fc3765486..0000000000
--- a/src/lib/libcrypto/bn/bn_prime.pl
+++ /dev/null
@@ -1,117 +0,0 @@
1#!/usr/local/bin/perl
2# bn_prime.pl
3
4$num=2048;
5$num=$ARGV[0] if ($#ARGV >= 0);
6
7push(@primes,2);
8$p=1;
9loop: while ($#primes < $num-1)
10 {
11 $p+=2;
12 $s=int(sqrt($p));
13
14 for ($i=0; $primes[$i]<=$s; $i++)
15 {
16 next loop if (($p%$primes[$i]) == 0);
17 }
18 push(@primes,$p);
19 }
20
21# print <<"EOF";
22# /* Auto generated by bn_prime.pl */
23# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au).
24# * All rights reserved.
25# * Copyright remains Eric Young's, and as such any Copyright notices in
26# * the code are not to be removed.
27# * See the COPYRIGHT file in the SSLeay distribution for more details.
28# */
29#
30# EOF
31
32print <<\EOF;
33/* Auto generated by bn_prime.pl */
34/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35 * All rights reserved.
36 *
37 * This package is an SSL implementation written
38 * by Eric Young (eay@cryptsoft.com).
39 * The implementation was written so as to conform with Netscapes SSL.
40 *
41 * This library is free for commercial and non-commercial use as long as
42 * the following conditions are aheared to. The following conditions
43 * apply to all code found in this distribution, be it the RC4, RSA,
44 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
45 * included with this distribution is covered by the same copyright terms
46 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
47 *
48 * Copyright remains Eric Young's, and as such any Copyright notices in
49 * the code are not to be removed.
50 * If this package is used in a product, Eric Young should be given attribution
51 * as the author of the parts of the library used.
52 * This can be in the form of a textual message at program startup or
53 * in documentation (online or textual) provided with the package.
54 *
55 * Redistribution and use in source and binary forms, with or without
56 * modification, are permitted provided that the following conditions
57 * are met:
58 * 1. Redistributions of source code must retain the copyright
59 * notice, this list of conditions and the following disclaimer.
60 * 2. Redistributions in binary form must reproduce the above copyright
61 * notice, this list of conditions and the following disclaimer in the
62 * documentation and/or other materials provided with the distribution.
63 * 3. All advertising materials mentioning features or use of this software
64 * must display the following acknowledgement:
65 * "This product includes cryptographic software written by
66 * Eric Young (eay@cryptsoft.com)"
67 * The word 'cryptographic' can be left out if the rouines from the library
68 * being used are not cryptographic related :-).
69 * 4. If you include any Windows specific code (or a derivative thereof) from
70 * the apps directory (application code) you must include an acknowledgement:
71 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
72 *
73 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83 * SUCH DAMAGE.
84 *
85 * The licence and distribution terms for any publically available version or
86 * derivative of this code cannot be changed. i.e. this code cannot simply be
87 * copied and put under another distribution licence
88 * [including the GNU Public Licence.]
89 */
90
91EOF
92
93for ($i=0; $i <= $#primes; $i++)
94 {
95 if ($primes[$i] > 256)
96 {
97 $eight=$i;
98 last;
99 }
100 }
101
102printf "#ifndef EIGHT_BIT\n";
103printf "#define NUMPRIMES %d\n",$num;
104printf "#else\n";
105printf "#define NUMPRIMES %d\n",$eight;
106printf "#endif\n";
107print "static const unsigned int primes[NUMPRIMES]=\n\t{\n\t";
108$init=0;
109for ($i=0; $i <= $#primes; $i++)
110 {
111 printf "\n#ifndef EIGHT_BIT\n\t" if ($primes[$i] > 256) && !($init++);
112 printf("\n\t") if (($i%8) == 0) && ($i != 0);
113 printf("%4d,",$primes[$i]);
114 }
115print "\n#endif\n\t};\n";
116
117
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
deleted file mode 100644
index acba7ed7ee..0000000000
--- a/src/lib/libcrypto/bn/bn_print.c
+++ /dev/null
@@ -1,333 +0,0 @@
1/* crypto/bn/bn_print.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <ctype.h>
61#include "cryptlib.h"
62#include <openssl/buffer.h>
63#include "bn_lcl.h"
64
65static const char *Hex="0123456789ABCDEF";
66
67/* Must 'OPENSSL_free' the returned data */
68char *BN_bn2hex(const BIGNUM *a)
69 {
70 int i,j,v,z=0;
71 char *buf;
72 char *p;
73
74 buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2);
75 if (buf == NULL)
76 {
77 BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE);
78 goto err;
79 }
80 p=buf;
81 if (a->neg) *(p++)='-';
82 if (BN_is_zero(a)) *(p++)='0';
83 for (i=a->top-1; i >=0; i--)
84 {
85 for (j=BN_BITS2-8; j >= 0; j-=8)
86 {
87 /* strip leading zeros */
88 v=((int)(a->d[i]>>(long)j))&0xff;
89 if (z || (v != 0))
90 {
91 *(p++)=Hex[v>>4];
92 *(p++)=Hex[v&0x0f];
93 z=1;
94 }
95 }
96 }
97 *p='\0';
98err:
99 return(buf);
100 }
101
102/* Must 'OPENSSL_free' the returned data */
103char *BN_bn2dec(const BIGNUM *a)
104 {
105 int i=0,num;
106 char *buf=NULL;
107 char *p;
108 BIGNUM *t=NULL;
109 BN_ULONG *bn_data=NULL,*lp;
110
111 i=BN_num_bits(a)*3;
112 num=(i/10+i/1000+3)+1;
113 bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
114 buf=(char *)OPENSSL_malloc(num+3);
115 if ((buf == NULL) || (bn_data == NULL))
116 {
117 BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE);
118 goto err;
119 }
120 if ((t=BN_dup(a)) == NULL) goto err;
121
122#define BUF_REMAIN (num+3 - (size_t)(p - buf))
123 p=buf;
124 lp=bn_data;
125 if (t->neg) *(p++)='-';
126 if (BN_is_zero(t))
127 {
128 *(p++)='0';
129 *(p++)='\0';
130 }
131 else
132 {
133 i=0;
134 while (!BN_is_zero(t))
135 {
136 *lp=BN_div_word(t,BN_DEC_CONV);
137 lp++;
138 }
139 lp--;
140 /* We now have a series of blocks, BN_DEC_NUM chars
141 * in length, where the last one needs truncation.
142 * The blocks need to be reversed in order. */
143 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT1,*lp);
144 while (*p) p++;
145 while (lp != bn_data)
146 {
147 lp--;
148 BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT2,*lp);
149 while (*p) p++;
150 }
151 }
152err:
153 if (bn_data != NULL) OPENSSL_free(bn_data);
154 if (t != NULL) BN_free(t);
155 return(buf);
156 }
157
158int BN_hex2bn(BIGNUM **bn, const char *a)
159 {
160 BIGNUM *ret=NULL;
161 BN_ULONG l=0;
162 int neg=0,h,m,i,j,k,c;
163 int num;
164
165 if ((a == NULL) || (*a == '\0')) return(0);
166
167 if (*a == '-') { neg=1; a++; }
168
169 for (i=0; isxdigit((unsigned char) a[i]); i++)
170 ;
171
172 num=i+neg;
173 if (bn == NULL) return(num);
174
175 /* a is the start of the hex digits, and it is 'i' long */
176 if (*bn == NULL)
177 {
178 if ((ret=BN_new()) == NULL) return(0);
179 }
180 else
181 {
182 ret= *bn;
183 BN_zero(ret);
184 }
185
186 /* i is the number of hex digests; */
187 if (bn_expand(ret,i*4) == NULL) goto err;
188
189 j=i; /* least significant 'hex' */
190 m=0;
191 h=0;
192 while (j > 0)
193 {
194 m=((BN_BYTES*2) <= j)?(BN_BYTES*2):j;
195 l=0;
196 for (;;)
197 {
198 c=a[j-m];
199 if ((c >= '0') && (c <= '9')) k=c-'0';
200 else if ((c >= 'a') && (c <= 'f')) k=c-'a'+10;
201 else if ((c >= 'A') && (c <= 'F')) k=c-'A'+10;
202 else k=0; /* paranoia */
203 l=(l<<4)|k;
204
205 if (--m <= 0)
206 {
207 ret->d[h++]=l;
208 break;
209 }
210 }
211 j-=(BN_BYTES*2);
212 }
213 ret->top=h;
214 bn_fix_top(ret);
215 ret->neg=neg;
216
217 *bn=ret;
218 return(num);
219err:
220 if (*bn == NULL) BN_free(ret);
221 return(0);
222 }
223
224int BN_dec2bn(BIGNUM **bn, const char *a)
225 {
226 BIGNUM *ret=NULL;
227 BN_ULONG l=0;
228 int neg=0,i,j;
229 int num;
230
231 if ((a == NULL) || (*a == '\0')) return(0);
232 if (*a == '-') { neg=1; a++; }
233
234 for (i=0; isdigit((unsigned char) a[i]); i++)
235 ;
236
237 num=i+neg;
238 if (bn == NULL) return(num);
239
240 /* a is the start of the digits, and it is 'i' long.
241 * We chop it into BN_DEC_NUM digits at a time */
242 if (*bn == NULL)
243 {
244 if ((ret=BN_new()) == NULL) return(0);
245 }
246 else
247 {
248 ret= *bn;
249 BN_zero(ret);
250 }
251
252 /* i is the number of digests, a bit of an over expand; */
253 if (bn_expand(ret,i*4) == NULL) goto err;
254
255 j=BN_DEC_NUM-(i%BN_DEC_NUM);
256 if (j == BN_DEC_NUM) j=0;
257 l=0;
258 while (*a)
259 {
260 l*=10;
261 l+= *a-'0';
262 a++;
263 if (++j == BN_DEC_NUM)
264 {
265 BN_mul_word(ret,BN_DEC_CONV);
266 BN_add_word(ret,l);
267 l=0;
268 j=0;
269 }
270 }
271 ret->neg=neg;
272
273 bn_fix_top(ret);
274 *bn=ret;
275 return(num);
276err:
277 if (*bn == NULL) BN_free(ret);
278 return(0);
279 }
280
281#ifndef OPENSSL_NO_BIO
282#ifndef OPENSSL_NO_FP_API
283int BN_print_fp(FILE *fp, const BIGNUM *a)
284 {
285 BIO *b;
286 int ret;
287
288 if ((b=BIO_new(BIO_s_file())) == NULL)
289 return(0);
290 BIO_set_fp(b,fp,BIO_NOCLOSE);
291 ret=BN_print(b,a);
292 BIO_free(b);
293 return(ret);
294 }
295#endif
296
297int BN_print(BIO *bp, const BIGNUM *a)
298 {
299 int i,j,v,z=0;
300 int ret=0;
301
302 if ((a->neg) && (BIO_write(bp,"-",1) != 1)) goto end;
303 if ((BN_is_zero(a)) && (BIO_write(bp,"0",1) != 1)) goto end;
304 for (i=a->top-1; i >=0; i--)
305 {
306 for (j=BN_BITS2-4; j >= 0; j-=4)
307 {
308 /* strip leading zeros */
309 v=((int)(a->d[i]>>(long)j))&0x0f;
310 if (z || (v != 0))
311 {
312 if (BIO_write(bp,&(Hex[v]),1) != 1)
313 goto end;
314 z=1;
315 }
316 }
317 }
318 ret=1;
319end:
320 return(ret);
321 }
322#endif
323
324#ifdef BN_DEBUG
325void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n)
326 {
327 int i;
328 fprintf(o, "%s=", a);
329 for (i=n-1;i>=0;i--)
330 fprintf(o, "%08lX", b[i]); /* assumes 32-bit BN_ULONG */
331 fprintf(o, "\n");
332 }
333#endif
diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c
deleted file mode 100644
index 893c9d2af9..0000000000
--- a/src/lib/libcrypto/bn/bn_rand.c
+++ /dev/null
@@ -1,291 +0,0 @@
1/* crypto/bn/bn_rand.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include <stdio.h>
113#include <time.h>
114#include "cryptlib.h"
115#include "bn_lcl.h"
116#include <openssl/rand.h>
117
118static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
119 {
120 unsigned char *buf=NULL;
121 int ret=0,bit,bytes,mask;
122 time_t tim;
123
124 if (bits == 0)
125 {
126 BN_zero(rnd);
127 return 1;
128 }
129
130 bytes=(bits+7)/8;
131 bit=(bits-1)%8;
132 mask=0xff<<(bit+1);
133
134 buf=(unsigned char *)OPENSSL_malloc(bytes);
135 if (buf == NULL)
136 {
137 BNerr(BN_F_BN_RAND,ERR_R_MALLOC_FAILURE);
138 goto err;
139 }
140
141 /* make a random number and set the top and bottom bits */
142 time(&tim);
143 RAND_add(&tim,sizeof(tim),0);
144
145 if (pseudorand)
146 {
147 if (RAND_pseudo_bytes(buf, bytes) == -1)
148 goto err;
149 }
150 else
151 {
152 if (RAND_bytes(buf, bytes) <= 0)
153 goto err;
154 }
155
156#if 1
157 if (pseudorand == 2)
158 {
159 /* generate patterns that are more likely to trigger BN
160 library bugs */
161 int i;
162 unsigned char c;
163
164 for (i = 0; i < bytes; i++)
165 {
166 RAND_pseudo_bytes(&c, 1);
167 if (c >= 128 && i > 0)
168 buf[i] = buf[i-1];
169 else if (c < 42)
170 buf[i] = 0;
171 else if (c < 84)
172 buf[i] = 255;
173 }
174 }
175#endif
176
177 if (top != -1)
178 {
179 if (top)
180 {
181 if (bit == 0)
182 {
183 buf[0]=1;
184 buf[1]|=0x80;
185 }
186 else
187 {
188 buf[0]|=(3<<(bit-1));
189 }
190 }
191 else
192 {
193 buf[0]|=(1<<bit);
194 }
195 }
196 buf[0] &= ~mask;
197 if (bottom) /* set bottom bit if requested */
198 buf[bytes-1]|=1;
199 if (!BN_bin2bn(buf,bytes,rnd)) goto err;
200 ret=1;
201err:
202 if (buf != NULL)
203 {
204 OPENSSL_cleanse(buf,bytes);
205 OPENSSL_free(buf);
206 }
207 return(ret);
208 }
209
210int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
211 {
212 return bnrand(0, rnd, bits, top, bottom);
213 }
214
215int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
216 {
217 return bnrand(1, rnd, bits, top, bottom);
218 }
219
220#if 1
221int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
222 {
223 return bnrand(2, rnd, bits, top, bottom);
224 }
225#endif
226
227
228/* random number r: 0 <= r < range */
229static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range)
230 {
231 int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
232 int n;
233
234 if (range->neg || BN_is_zero(range))
235 {
236 BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
237 return 0;
238 }
239
240 n = BN_num_bits(range); /* n > 0 */
241
242 /* BN_is_bit_set(range, n - 1) always holds */
243
244 if (n == 1)
245 {
246 if (!BN_zero(r)) return 0;
247 }
248 else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3))
249 {
250 /* range = 100..._2,
251 * so 3*range (= 11..._2) is exactly one bit longer than range */
252 do
253 {
254 if (!bn_rand(r, n + 1, -1, 0)) return 0;
255 /* If r < 3*range, use r := r MOD range
256 * (which is either r, r - range, or r - 2*range).
257 * Otherwise, iterate once more.
258 * Since 3*range = 11..._2, each iteration succeeds with
259 * probability >= .75. */
260 if (BN_cmp(r ,range) >= 0)
261 {
262 if (!BN_sub(r, r, range)) return 0;
263 if (BN_cmp(r, range) >= 0)
264 if (!BN_sub(r, r, range)) return 0;
265 }
266 }
267 while (BN_cmp(r, range) >= 0);
268 }
269 else
270 {
271 do
272 {
273 /* range = 11..._2 or range = 101..._2 */
274 if (!bn_rand(r, n, -1, 0)) return 0;
275 }
276 while (BN_cmp(r, range) >= 0);
277 }
278
279 return 1;
280 }
281
282
283int BN_rand_range(BIGNUM *r, BIGNUM *range)
284 {
285 return bn_rand_range(0, r, range);
286 }
287
288int BN_pseudo_rand_range(BIGNUM *r, BIGNUM *range)
289 {
290 return bn_rand_range(1, r, range);
291 }
diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c
deleted file mode 100644
index ef5fdd4708..0000000000
--- a/src/lib/libcrypto/bn/bn_recp.c
+++ /dev/null
@@ -1,230 +0,0 @@
1/* crypto/bn/bn_recp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63void BN_RECP_CTX_init(BN_RECP_CTX *recp)
64 {
65 BN_init(&(recp->N));
66 BN_init(&(recp->Nr));
67 recp->num_bits=0;
68 recp->flags=0;
69 }
70
71BN_RECP_CTX *BN_RECP_CTX_new(void)
72 {
73 BN_RECP_CTX *ret;
74
75 if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
76 return(NULL);
77
78 BN_RECP_CTX_init(ret);
79 ret->flags=BN_FLG_MALLOCED;
80 return(ret);
81 }
82
83void BN_RECP_CTX_free(BN_RECP_CTX *recp)
84 {
85 if(recp == NULL)
86 return;
87
88 BN_free(&(recp->N));
89 BN_free(&(recp->Nr));
90 if (recp->flags & BN_FLG_MALLOCED)
91 OPENSSL_free(recp);
92 }
93
94int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
95 {
96 if (!BN_copy(&(recp->N),d)) return 0;
97 if (!BN_zero(&(recp->Nr))) return 0;
98 recp->num_bits=BN_num_bits(d);
99 recp->shift=0;
100 return(1);
101 }
102
103int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
104 BN_RECP_CTX *recp, BN_CTX *ctx)
105 {
106 int ret=0;
107 BIGNUM *a;
108 const BIGNUM *ca;
109
110 BN_CTX_start(ctx);
111 if ((a = BN_CTX_get(ctx)) == NULL) goto err;
112 if (y != NULL)
113 {
114 if (x == y)
115 { if (!BN_sqr(a,x,ctx)) goto err; }
116 else
117 { if (!BN_mul(a,x,y,ctx)) goto err; }
118 ca = a;
119 }
120 else
121 ca=x; /* Just do the mod */
122
123 ret = BN_div_recp(NULL,r,ca,recp,ctx);
124err:
125 BN_CTX_end(ctx);
126 return(ret);
127 }
128
129int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
130 BN_RECP_CTX *recp, BN_CTX *ctx)
131 {
132 int i,j,ret=0;
133 BIGNUM *a,*b,*d,*r;
134
135 BN_CTX_start(ctx);
136 a=BN_CTX_get(ctx);
137 b=BN_CTX_get(ctx);
138 if (dv != NULL)
139 d=dv;
140 else
141 d=BN_CTX_get(ctx);
142 if (rem != NULL)
143 r=rem;
144 else
145 r=BN_CTX_get(ctx);
146 if (a == NULL || b == NULL || d == NULL || r == NULL) goto err;
147
148 if (BN_ucmp(m,&(recp->N)) < 0)
149 {
150 if (!BN_zero(d)) return 0;
151 if (!BN_copy(r,m)) return 0;
152 BN_CTX_end(ctx);
153 return(1);
154 }
155
156 /* We want the remainder
157 * Given input of ABCDEF / ab
158 * we need multiply ABCDEF by 3 digests of the reciprocal of ab
159 *
160 */
161
162 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
163 i=BN_num_bits(m);
164 j=recp->num_bits<<1;
165 if (j>i) i=j;
166
167 /* Nr := round(2^i / N) */
168 if (i != recp->shift)
169 recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
170 i,ctx); /* BN_reciprocal returns i, or -1 for an error */
171 if (recp->shift == -1) goto err;
172
173 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
174 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
175 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
176 * = |m/N|
177 */
178 if (!BN_rshift(a,m,recp->num_bits)) goto err;
179 if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
180 if (!BN_rshift(d,b,i-recp->num_bits)) goto err;
181 d->neg=0;
182
183 if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
184 if (!BN_usub(r,m,b)) goto err;
185 r->neg=0;
186
187#if 1
188 j=0;
189 while (BN_ucmp(r,&(recp->N)) >= 0)
190 {
191 if (j++ > 2)
192 {
193 BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL);
194 goto err;
195 }
196 if (!BN_usub(r,r,&(recp->N))) goto err;
197 if (!BN_add_word(d,1)) goto err;
198 }
199#endif
200
201 r->neg=BN_is_zero(r)?0:m->neg;
202 d->neg=m->neg^recp->N.neg;
203 ret=1;
204err:
205 BN_CTX_end(ctx);
206 return(ret);
207 }
208
209/* len is the expected size of the result
210 * We actually calculate with an extra word of precision, so
211 * we can do faster division if the remainder is not required.
212 */
213/* r := 2^len / m */
214int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
215 {
216 int ret= -1;
217 BIGNUM t;
218
219 BN_init(&t);
220
221 if (!BN_zero(&t)) goto err;
222 if (!BN_set_bit(&t,len)) goto err;
223
224 if (!BN_div(r,NULL,&t,m,ctx)) goto err;
225
226 ret=len;
227err:
228 BN_free(&t);
229 return(ret);
230 }
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
deleted file mode 100644
index 70f785ea18..0000000000
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ /dev/null
@@ -1,205 +0,0 @@
1/* crypto/bn/bn_shift.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63int BN_lshift1(BIGNUM *r, const BIGNUM *a)
64 {
65 register BN_ULONG *ap,*rp,t,c;
66 int i;
67
68 if (r != a)
69 {
70 r->neg=a->neg;
71 if (bn_wexpand(r,a->top+1) == NULL) return(0);
72 r->top=a->top;
73 }
74 else
75 {
76 if (bn_wexpand(r,a->top+1) == NULL) return(0);
77 }
78 ap=a->d;
79 rp=r->d;
80 c=0;
81 for (i=0; i<a->top; i++)
82 {
83 t= *(ap++);
84 *(rp++)=((t<<1)|c)&BN_MASK2;
85 c=(t & BN_TBIT)?1:0;
86 }
87 if (c)
88 {
89 *rp=1;
90 r->top++;
91 }
92 return(1);
93 }
94
95int BN_rshift1(BIGNUM *r, const BIGNUM *a)
96 {
97 BN_ULONG *ap,*rp,t,c;
98 int i;
99
100 if (BN_is_zero(a))
101 {
102 BN_zero(r);
103 return(1);
104 }
105 if (a != r)
106 {
107 if (bn_wexpand(r,a->top) == NULL) return(0);
108 r->top=a->top;
109 r->neg=a->neg;
110 }
111 ap=a->d;
112 rp=r->d;
113 c=0;
114 for (i=a->top-1; i>=0; i--)
115 {
116 t=ap[i];
117 rp[i]=((t>>1)&BN_MASK2)|c;
118 c=(t&1)?BN_TBIT:0;
119 }
120 bn_fix_top(r);
121 return(1);
122 }
123
124int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
125 {
126 int i,nw,lb,rb;
127 BN_ULONG *t,*f;
128 BN_ULONG l;
129
130 r->neg=a->neg;
131 nw=n/BN_BITS2;
132 if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
133 lb=n%BN_BITS2;
134 rb=BN_BITS2-lb;
135 f=a->d;
136 t=r->d;
137 t[a->top+nw]=0;
138 if (lb == 0)
139 for (i=a->top-1; i>=0; i--)
140 t[nw+i]=f[i];
141 else
142 for (i=a->top-1; i>=0; i--)
143 {
144 l=f[i];
145 t[nw+i+1]|=(l>>rb)&BN_MASK2;
146 t[nw+i]=(l<<lb)&BN_MASK2;
147 }
148 memset(t,0,nw*sizeof(t[0]));
149/* for (i=0; i<nw; i++)
150 t[i]=0;*/
151 r->top=a->top+nw+1;
152 bn_fix_top(r);
153 return(1);
154 }
155
156int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
157 {
158 int i,j,nw,lb,rb;
159 BN_ULONG *t,*f;
160 BN_ULONG l,tmp;
161
162 nw=n/BN_BITS2;
163 rb=n%BN_BITS2;
164 lb=BN_BITS2-rb;
165 if (nw > a->top || a->top == 0)
166 {
167 BN_zero(r);
168 return(1);
169 }
170 if (r != a)
171 {
172 r->neg=a->neg;
173 if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
174 }
175 else
176 {
177 if (n == 0)
178 return 1; /* or the copying loop will go berserk */
179 }
180
181 f= &(a->d[nw]);
182 t=r->d;
183 j=a->top-nw;
184 r->top=j;
185
186 if (rb == 0)
187 {
188 for (i=j+1; i > 0; i--)
189 *(t++)= *(f++);
190 }
191 else
192 {
193 l= *(f++);
194 for (i=1; i<j; i++)
195 {
196 tmp =(l>>rb)&BN_MASK2;
197 l= *(f++);
198 *(t++) =(tmp|(l<<lb))&BN_MASK2;
199 }
200 *(t++) =(l>>rb)&BN_MASK2;
201 }
202 *t=0;
203 bn_fix_top(r);
204 return(1);
205 }
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
deleted file mode 100644
index c1d0cca438..0000000000
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ /dev/null
@@ -1,288 +0,0 @@
1/* crypto/bn/bn_sqr.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63/* r must not be a */
64/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
65int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
66 {
67 int max,al;
68 int ret = 0;
69 BIGNUM *tmp,*rr;
70
71#ifdef BN_COUNT
72 fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top);
73#endif
74 bn_check_top(a);
75
76 al=a->top;
77 if (al <= 0)
78 {
79 r->top=0;
80 return(1);
81 }
82
83 BN_CTX_start(ctx);
84 rr=(a != r) ? r : BN_CTX_get(ctx);
85 tmp=BN_CTX_get(ctx);
86 if (tmp == NULL) goto err;
87
88 max=(al+al);
89 if (bn_wexpand(rr,max+1) == NULL) goto err;
90
91 if (al == 4)
92 {
93#ifndef BN_SQR_COMBA
94 BN_ULONG t[8];
95 bn_sqr_normal(rr->d,a->d,4,t);
96#else
97 bn_sqr_comba4(rr->d,a->d);
98#endif
99 }
100 else if (al == 8)
101 {
102#ifndef BN_SQR_COMBA
103 BN_ULONG t[16];
104 bn_sqr_normal(rr->d,a->d,8,t);
105#else
106 bn_sqr_comba8(rr->d,a->d);
107#endif
108 }
109 else
110 {
111#if defined(BN_RECURSION)
112 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
113 {
114 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
115 bn_sqr_normal(rr->d,a->d,al,t);
116 }
117 else
118 {
119 int j,k;
120
121 j=BN_num_bits_word((BN_ULONG)al);
122 j=1<<(j-1);
123 k=j+j;
124 if (al == j)
125 {
126 if (bn_wexpand(tmp,k*2) == NULL) goto err;
127 bn_sqr_recursive(rr->d,a->d,al,tmp->d);
128 }
129 else
130 {
131 if (bn_wexpand(tmp,max) == NULL) goto err;
132 bn_sqr_normal(rr->d,a->d,al,tmp->d);
133 }
134 }
135#else
136 if (bn_wexpand(tmp,max) == NULL) goto err;
137 bn_sqr_normal(rr->d,a->d,al,tmp->d);
138#endif
139 }
140
141 rr->top=max;
142 rr->neg=0;
143 if ((max > 0) && (rr->d[max-1] == 0)) rr->top--;
144 if (rr != r) BN_copy(r,rr);
145 ret = 1;
146 err:
147 BN_CTX_end(ctx);
148 return(ret);
149 }
150
151/* tmp must have 2*n words */
152void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
153 {
154 int i,j,max;
155 const BN_ULONG *ap;
156 BN_ULONG *rp;
157
158 max=n*2;
159 ap=a;
160 rp=r;
161 rp[0]=rp[max-1]=0;
162 rp++;
163 j=n;
164
165 if (--j > 0)
166 {
167 ap++;
168 rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
169 rp+=2;
170 }
171
172 for (i=n-2; i>0; i--)
173 {
174 j--;
175 ap++;
176 rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
177 rp+=2;
178 }
179
180 bn_add_words(r,r,r,max);
181
182 /* There will not be a carry */
183
184 bn_sqr_words(tmp,a,n);
185
186 bn_add_words(r,r,tmp,max);
187 }
188
189#ifdef BN_RECURSION
190/* r is 2*n words in size,
191 * a and b are both n words in size. (There's not actually a 'b' here ...)
192 * n must be a power of 2.
193 * We multiply and return the result.
194 * t must be 2*n words in size
195 * We calculate
196 * a[0]*b[0]
197 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
198 * a[1]*b[1]
199 */
200void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
201 {
202 int n=n2/2;
203 int zero,c1;
204 BN_ULONG ln,lo,*p;
205
206#ifdef BN_COUNT
207 fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2);
208#endif
209 if (n2 == 4)
210 {
211#ifndef BN_SQR_COMBA
212 bn_sqr_normal(r,a,4,t);
213#else
214 bn_sqr_comba4(r,a);
215#endif
216 return;
217 }
218 else if (n2 == 8)
219 {
220#ifndef BN_SQR_COMBA
221 bn_sqr_normal(r,a,8,t);
222#else
223 bn_sqr_comba8(r,a);
224#endif
225 return;
226 }
227 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
228 {
229 bn_sqr_normal(r,a,n2,t);
230 return;
231 }
232 /* r=(a[0]-a[1])*(a[1]-a[0]) */
233 c1=bn_cmp_words(a,&(a[n]),n);
234 zero=0;
235 if (c1 > 0)
236 bn_sub_words(t,a,&(a[n]),n);
237 else if (c1 < 0)
238 bn_sub_words(t,&(a[n]),a,n);
239 else
240 zero=1;
241
242 /* The result will always be negative unless it is zero */
243 p= &(t[n2*2]);
244
245 if (!zero)
246 bn_sqr_recursive(&(t[n2]),t,n,p);
247 else
248 memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
249 bn_sqr_recursive(r,a,n,p);
250 bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
251
252 /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
253 * r[10] holds (a[0]*b[0])
254 * r[32] holds (b[1]*b[1])
255 */
256
257 c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
258
259 /* t[32] is negative */
260 c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
261
262 /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
263 * r[10] holds (a[0]*a[0])
264 * r[32] holds (a[1]*a[1])
265 * c1 holds the carry bits
266 */
267 c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
268 if (c1)
269 {
270 p= &(r[n+n2]);
271 lo= *p;
272 ln=(lo+c1)&BN_MASK2;
273 *p=ln;
274
275 /* The overflow will stop before we over write
276 * words we should not overwrite */
277 if (ln < (BN_ULONG)c1)
278 {
279 do {
280 p++;
281 lo= *p;
282 ln=(lo+1)&BN_MASK2;
283 *p=ln;
284 } while (ln == 0);
285 }
286 }
287 }
288#endif
diff --git a/src/lib/libcrypto/bn/bn_sqrt.c b/src/lib/libcrypto/bn/bn_sqrt.c
deleted file mode 100644
index e2a1105dc8..0000000000
--- a/src/lib/libcrypto/bn/bn_sqrt.c
+++ /dev/null
@@ -1,387 +0,0 @@
1/* crypto/bn/bn_mod.c */
2/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
3 * and Bodo Moeller for the OpenSSL project. */
4/* ====================================================================
5 * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. All advertising materials mentioning features or use of this
20 * software must display the following acknowledgment:
21 * "This product includes software developed by the OpenSSL Project
22 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
23 *
24 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
25 * endorse or promote products derived from this software without
26 * prior written permission. For written permission, please contact
27 * openssl-core@openssl.org.
28 *
29 * 5. Products derived from this software may not be called "OpenSSL"
30 * nor may "OpenSSL" appear in their names without prior written
31 * permission of the OpenSSL Project.
32 *
33 * 6. Redistributions of any form whatsoever must retain the following
34 * acknowledgment:
35 * "This product includes software developed by the OpenSSL Project
36 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
39 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
41 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
42 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
44 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
45 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
47 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
48 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
49 * OF THE POSSIBILITY OF SUCH DAMAGE.
50 * ====================================================================
51 *
52 * This product includes cryptographic software written by Eric Young
53 * (eay@cryptsoft.com). This product includes software written by Tim
54 * Hudson (tjh@cryptsoft.com).
55 *
56 */
57
58#include "cryptlib.h"
59#include "bn_lcl.h"
60
61
62BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
63/* Returns 'ret' such that
64 * ret^2 == a (mod p),
65 * using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course
66 * in Algebraic Computational Number Theory", algorithm 1.5.1).
67 * 'p' must be prime!
68 * If 'a' is not a square, this is not necessarily detected by
69 * the algorithms; a bogus result must be expected in this case.
70 */
71 {
72 BIGNUM *ret = in;
73 int err = 1;
74 int r;
75 BIGNUM *b, *q, *t, *x, *y;
76 int e, i, j;
77
78 if (!BN_is_odd(p) || BN_abs_is_word(p, 1))
79 {
80 if (BN_abs_is_word(p, 2))
81 {
82 if (ret == NULL)
83 ret = BN_new();
84 if (ret == NULL)
85 goto end;
86 if (!BN_set_word(ret, BN_is_bit_set(a, 0)))
87 {
88 BN_free(ret);
89 return NULL;
90 }
91 return ret;
92 }
93
94 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
95 return(NULL);
96 }
97
98 if (BN_is_zero(a) || BN_is_one(a))
99 {
100 if (ret == NULL)
101 ret = BN_new();
102 if (ret == NULL)
103 goto end;
104 if (!BN_set_word(ret, BN_is_one(a)))
105 {
106 BN_free(ret);
107 return NULL;
108 }
109 return ret;
110 }
111
112#if 0 /* if BN_mod_sqrt is used with correct input, this just wastes time */
113 r = BN_kronecker(a, p, ctx);
114 if (r < -1) return NULL;
115 if (r == -1)
116 {
117 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
118 return(NULL);
119 }
120#endif
121
122 BN_CTX_start(ctx);
123 b = BN_CTX_get(ctx);
124 q = BN_CTX_get(ctx);
125 t = BN_CTX_get(ctx);
126 x = BN_CTX_get(ctx);
127 y = BN_CTX_get(ctx);
128 if (y == NULL) goto end;
129
130 if (ret == NULL)
131 ret = BN_new();
132 if (ret == NULL) goto end;
133
134 /* now write |p| - 1 as 2^e*q where q is odd */
135 e = 1;
136 while (!BN_is_bit_set(p, e))
137 e++;
138 /* we'll set q later (if needed) */
139
140 if (e == 1)
141 {
142 /* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
143 * modulo (|p|-1)/2, and square roots can be computed
144 * directly by modular exponentiation.
145 * We have
146 * 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
147 * so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
148 */
149 if (!BN_rshift(q, p, 2)) goto end;
150 q->neg = 0;
151 if (!BN_add_word(q, 1)) goto end;
152 if (!BN_mod_exp(ret, a, q, p, ctx)) goto end;
153 err = 0;
154 goto end;
155 }
156
157 if (e == 2)
158 {
159 /* |p| == 5 (mod 8)
160 *
161 * In this case 2 is always a non-square since
162 * Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
163 * So if a really is a square, then 2*a is a non-square.
164 * Thus for
165 * b := (2*a)^((|p|-5)/8),
166 * i := (2*a)*b^2
167 * we have
168 * i^2 = (2*a)^((1 + (|p|-5)/4)*2)
169 * = (2*a)^((p-1)/2)
170 * = -1;
171 * so if we set
172 * x := a*b*(i-1),
173 * then
174 * x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
175 * = a^2 * b^2 * (-2*i)
176 * = a*(-i)*(2*a*b^2)
177 * = a*(-i)*i
178 * = a.
179 *
180 * (This is due to A.O.L. Atkin,
181 * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
182 * November 1992.)
183 */
184
185 /* make sure that a is reduced modulo p */
186 if (a->neg || BN_ucmp(a, p) >= 0)
187 {
188 if (!BN_nnmod(x, a, p, ctx)) goto end;
189 a = x; /* use x as temporary variable */
190 }
191
192 /* t := 2*a */
193 if (!BN_mod_lshift1_quick(t, a, p)) goto end;
194
195 /* b := (2*a)^((|p|-5)/8) */
196 if (!BN_rshift(q, p, 3)) goto end;
197 q->neg = 0;
198 if (!BN_mod_exp(b, t, q, p, ctx)) goto end;
199
200 /* y := b^2 */
201 if (!BN_mod_sqr(y, b, p, ctx)) goto end;
202
203 /* t := (2*a)*b^2 - 1*/
204 if (!BN_mod_mul(t, t, y, p, ctx)) goto end;
205 if (!BN_sub_word(t, 1)) goto end;
206
207 /* x = a*b*t */
208 if (!BN_mod_mul(x, a, b, p, ctx)) goto end;
209 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
210
211 if (!BN_copy(ret, x)) goto end;
212 err = 0;
213 goto end;
214 }
215
216 /* e > 2, so we really have to use the Tonelli/Shanks algorithm.
217 * First, find some y that is not a square. */
218 if (!BN_copy(q, p)) goto end; /* use 'q' as temp */
219 q->neg = 0;
220 i = 2;
221 do
222 {
223 /* For efficiency, try small numbers first;
224 * if this fails, try random numbers.
225 */
226 if (i < 22)
227 {
228 if (!BN_set_word(y, i)) goto end;
229 }
230 else
231 {
232 if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) goto end;
233 if (BN_ucmp(y, p) >= 0)
234 {
235 if (!(p->neg ? BN_add : BN_sub)(y, y, p)) goto end;
236 }
237 /* now 0 <= y < |p| */
238 if (BN_is_zero(y))
239 if (!BN_set_word(y, i)) goto end;
240 }
241
242 r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
243 if (r < -1) goto end;
244 if (r == 0)
245 {
246 /* m divides p */
247 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
248 goto end;
249 }
250 }
251 while (r == 1 && ++i < 82);
252
253 if (r != -1)
254 {
255 /* Many rounds and still no non-square -- this is more likely
256 * a bug than just bad luck.
257 * Even if p is not prime, we should have found some y
258 * such that r == -1.
259 */
260 BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
261 goto end;
262 }
263
264 /* Here's our actual 'q': */
265 if (!BN_rshift(q, q, e)) goto end;
266
267 /* Now that we have some non-square, we can find an element
268 * of order 2^e by computing its q'th power. */
269 if (!BN_mod_exp(y, y, q, p, ctx)) goto end;
270 if (BN_is_one(y))
271 {
272 BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
273 goto end;
274 }
275
276 /* Now we know that (if p is indeed prime) there is an integer
277 * k, 0 <= k < 2^e, such that
278 *
279 * a^q * y^k == 1 (mod p).
280 *
281 * As a^q is a square and y is not, k must be even.
282 * q+1 is even, too, so there is an element
283 *
284 * X := a^((q+1)/2) * y^(k/2),
285 *
286 * and it satisfies
287 *
288 * X^2 = a^q * a * y^k
289 * = a,
290 *
291 * so it is the square root that we are looking for.
292 */
293
294 /* t := (q-1)/2 (note that q is odd) */
295 if (!BN_rshift1(t, q)) goto end;
296
297 /* x := a^((q-1)/2) */
298 if (BN_is_zero(t)) /* special case: p = 2^e + 1 */
299 {
300 if (!BN_nnmod(t, a, p, ctx)) goto end;
301 if (BN_is_zero(t))
302 {
303 /* special case: a == 0 (mod p) */
304 if (!BN_zero(ret)) goto end;
305 err = 0;
306 goto end;
307 }
308 else
309 if (!BN_one(x)) goto end;
310 }
311 else
312 {
313 if (!BN_mod_exp(x, a, t, p, ctx)) goto end;
314 if (BN_is_zero(x))
315 {
316 /* special case: a == 0 (mod p) */
317 if (!BN_zero(ret)) goto end;
318 err = 0;
319 goto end;
320 }
321 }
322
323 /* b := a*x^2 (= a^q) */
324 if (!BN_mod_sqr(b, x, p, ctx)) goto end;
325 if (!BN_mod_mul(b, b, a, p, ctx)) goto end;
326
327 /* x := a*x (= a^((q+1)/2)) */
328 if (!BN_mod_mul(x, x, a, p, ctx)) goto end;
329
330 while (1)
331 {
332 /* Now b is a^q * y^k for some even k (0 <= k < 2^E
333 * where E refers to the original value of e, which we
334 * don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
335 *
336 * We have a*b = x^2,
337 * y^2^(e-1) = -1,
338 * b^2^(e-1) = 1.
339 */
340
341 if (BN_is_one(b))
342 {
343 if (!BN_copy(ret, x)) goto end;
344 err = 0;
345 goto end;
346 }
347
348
349 /* find smallest i such that b^(2^i) = 1 */
350 i = 1;
351 if (!BN_mod_sqr(t, b, p, ctx)) goto end;
352 while (!BN_is_one(t))
353 {
354 i++;
355 if (i == e)
356 {
357 BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
358 goto end;
359 }
360 if (!BN_mod_mul(t, t, t, p, ctx)) goto end;
361 }
362
363
364 /* t := y^2^(e - i - 1) */
365 if (!BN_copy(t, y)) goto end;
366 for (j = e - i - 1; j > 0; j--)
367 {
368 if (!BN_mod_sqr(t, t, p, ctx)) goto end;
369 }
370 if (!BN_mod_mul(y, t, t, p, ctx)) goto end;
371 if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
372 if (!BN_mod_mul(b, b, y, p, ctx)) goto end;
373 e = i;
374 }
375
376 end:
377 if (err)
378 {
379 if (ret != NULL && ret != in)
380 {
381 BN_clear_free(ret);
382 }
383 ret = NULL;
384 }
385 BN_CTX_end(ctx);
386 return ret;
387 }
diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c
deleted file mode 100644
index de610ce54c..0000000000
--- a/src/lib/libcrypto/bn/bn_word.c
+++ /dev/null
@@ -1,208 +0,0 @@
1/* crypto/bn/bn_word.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include "cryptlib.h"
61#include "bn_lcl.h"
62
63BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
64 {
65#ifndef BN_LLONG
66 BN_ULONG ret=0;
67#else
68 BN_ULLONG ret=0;
69#endif
70 int i;
71
72 w&=BN_MASK2;
73 for (i=a->top-1; i>=0; i--)
74 {
75#ifndef BN_LLONG
76 ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
77 ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
78#else
79 ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
80 (BN_ULLONG)w);
81#endif
82 }
83 return((BN_ULONG)ret);
84 }
85
86BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
87 {
88 BN_ULONG ret;
89 int i;
90
91 if (a->top == 0) return(0);
92 ret=0;
93 w&=BN_MASK2;
94 for (i=a->top-1; i>=0; i--)
95 {
96 BN_ULONG l,d;
97
98 l=a->d[i];
99 d=bn_div_words(ret,l,w);
100 ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
101 a->d[i]=d;
102 }
103 if ((a->top > 0) && (a->d[a->top-1] == 0))
104 a->top--;
105 return(ret);
106 }
107
108int BN_add_word(BIGNUM *a, BN_ULONG w)
109 {
110 BN_ULONG l;
111 int i;
112
113 if ((w & BN_MASK2) == 0)
114 return(1);
115
116 if (a->neg)
117 {
118 a->neg=0;
119 i=BN_sub_word(a,w);
120 if (!BN_is_zero(a))
121 a->neg=!(a->neg);
122 return(i);
123 }
124 w&=BN_MASK2;
125 if (bn_wexpand(a,a->top+1) == NULL) return(0);
126 i=0;
127 for (;;)
128 {
129 if (i >= a->top)
130 l=w;
131 else
132 l=(a->d[i]+(BN_ULONG)w)&BN_MASK2;
133 a->d[i]=l;
134 if (w > l)
135 w=1;
136 else
137 break;
138 i++;
139 }
140 if (i >= a->top)
141 a->top++;
142 return(1);
143 }
144
145int BN_sub_word(BIGNUM *a, BN_ULONG w)
146 {
147 int i;
148
149 if ((w & BN_MASK2) == 0)
150 return(1);
151
152 if (BN_is_zero(a) || a->neg)
153 {
154 a->neg=0;
155 i=BN_add_word(a,w);
156 a->neg=1;
157 return(i);
158 }
159
160 w&=BN_MASK2;
161 if ((a->top == 1) && (a->d[0] < w))
162 {
163 a->d[0]=w-a->d[0];
164 a->neg=1;
165 return(1);
166 }
167 i=0;
168 for (;;)
169 {
170 if (a->d[i] >= w)
171 {
172 a->d[i]-=w;
173 break;
174 }
175 else
176 {
177 a->d[i]=(a->d[i]-w)&BN_MASK2;
178 i++;
179 w=1;
180 }
181 }
182 if ((a->d[i] == 0) && (i == (a->top-1)))
183 a->top--;
184 return(1);
185 }
186
187int BN_mul_word(BIGNUM *a, BN_ULONG w)
188 {
189 BN_ULONG ll;
190
191 w&=BN_MASK2;
192 if (a->top)
193 {
194 if (w == 0)
195 BN_zero(a);
196 else
197 {
198 ll=bn_mul_words(a->d,a->d,a->top,w);
199 if (ll)
200 {
201 if (bn_wexpand(a,a->top+1) == NULL) return(0);
202 a->d[a->top++]=ll;
203 }
204 }
205 }
206 return(1);
207 }
208