From 15b5d84f9da2ce4bfae8580e56e34a859f74ad71 Mon Sep 17 00:00:00 2001 From: markus <> Date: Thu, 5 Sep 2002 12:51:50 +0000 Subject: import openssl-0.9.7-beta1 --- src/lib/libcrypto/bn/asm/bn-586.pl | 295 ++++- src/lib/libcrypto/bn/asm/pa-risc2.s | 2024 +++++++++++++++++++++++++++------- src/lib/libcrypto/bn/asm/pa-risc2W.s | 2 +- src/lib/libcrypto/bn/bn.h | 290 +++-- src/lib/libcrypto/bn/bn_add.c | 206 +++- src/lib/libcrypto/bn/bn_asm.c | 178 +-- src/lib/libcrypto/bn/bn_blind.c | 47 +- src/lib/libcrypto/bn/bn_ctx.c | 17 +- src/lib/libcrypto/bn/bn_div.c | 221 ++-- src/lib/libcrypto/bn/bn_err.c | 142 ++- src/lib/libcrypto/bn/bn_exp.c | 600 ++++++---- src/lib/libcrypto/bn/bn_exp2.c | 382 ++++--- src/lib/libcrypto/bn/bn_gcd.c | 389 ++++++- src/lib/libcrypto/bn/bn_lcl.h | 291 ++++- src/lib/libcrypto/bn/bn_lib.c | 571 +++++++--- src/lib/libcrypto/bn/bn_mod.c | 251 ++++- src/lib/libcrypto/bn/bn_mont.c | 335 +++--- src/lib/libcrypto/bn/bn_mpi.c | 11 +- src/lib/libcrypto/bn/bn_mul.c | 1158 +++++++++++++++++-- src/lib/libcrypto/bn/bn_prime.c | 459 ++++---- src/lib/libcrypto/bn/bn_prime.h | 4 +- src/lib/libcrypto/bn/bn_prime.pl | 71 +- src/lib/libcrypto/bn/bn_print.c | 67 +- src/lib/libcrypto/bn/bn_rand.c | 216 +++- src/lib/libcrypto/bn/bn_recp.c | 183 ++- src/lib/libcrypto/bn/bn_shift.c | 27 +- src/lib/libcrypto/bn/bn_sqr.c | 214 +++- src/lib/libcrypto/bn/bn_word.c | 47 +- 28 files changed, 6630 insertions(+), 2068 deletions(-) (limited to 'src/lib/libcrypto/bn') diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 19d425ee96..33f6125920 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl @@ -1,18 +1,17 @@ -#!/usr/bin/perl -# - #!/usr/local/bin/perl push(@INC,"perlasm","../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],"bn-586.pl"); +&asm_init($ARGV[0],$0); &bn_mul_add_words("bn_mul_add_words"); &bn_mul_words("bn_mul_words"); &bn_sqr_words("bn_sqr_words"); -&bn_div64("bn_div64"); +&bn_div_words("bn_div_words"); &bn_add_words("bn_add_words"); +&bn_sub_words("bn_sub_words"); +&bn_sub_part_words("bn_sub_part_words"); &asm_finish(); @@ -228,7 +227,7 @@ sub bn_sqr_words &function_end($name); } -sub bn_div64 +sub bn_div_words { local($name)=@_; @@ -302,12 +301,292 @@ sub bn_add_words &add($tmp1,$tmp2); &adc($c,0); &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &jz(&label("aw_end")) if ($i != 6); } &set_label("aw_end",0); - &mov("eax",$c); +# &mov("eax",$c); # $c is "eax" + + &function_end($name); + } + +sub bn_sub_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $a="esi"; + $b="edi"; + $c="eax"; + $r="ebx"; + $tmp1="ecx"; + $tmp2="edx"; + $num="ebp"; + + &mov($r,&wparam(0)); # get r + &mov($a,&wparam(1)); # get a + &mov($b,&wparam(2)); # get b + &mov($num,&wparam(3)); # get num + &xor($c,$c); # clear carry + &and($num,0xfffffff8); # num / 8 + + &jz(&label("aw_finish")); + + &set_label("aw_loop",0); + for ($i=0; $i<8; $i++) + { + &comment("Round $i"); + + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov($tmp2,&DWP($i*4,$b,"",0)); # *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + } + + &comment(""); + &add($a,32); + &add($b,32); + &add($r,32); + &sub($num,8); + &jnz(&label("aw_loop")); + + &set_label("aw_finish",0); + &mov($num,&wparam(3)); # get num + &and($num,7); + &jz(&label("aw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov($tmp2,&DWP($i*4,$b,"",0));# *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &dec($num) if ($i != 6); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jz(&label("aw_end")) if ($i != 6); + } + &set_label("aw_end",0); + +# &mov("eax",$c); # $c is "eax" + + &function_end($name); + } + +sub bn_sub_part_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $a="esi"; + $b="edi"; + $c="eax"; + $r="ebx"; + $tmp1="ecx"; + $tmp2="edx"; + $num="ebp"; + + &mov($r,&wparam(0)); # get r + &mov($a,&wparam(1)); # get a + &mov($b,&wparam(2)); # get b + &mov($num,&wparam(3)); # get num + &xor($c,$c); # clear carry + &and($num,0xfffffff8); # num / 8 + + &jz(&label("aw_finish")); + + &set_label("aw_loop",0); + for ($i=0; $i<8; $i++) + { + &comment("Round $i"); + + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov($tmp2,&DWP($i*4,$b,"",0)); # *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + } + + &comment(""); + &add($a,32); + &add($b,32); + &add($r,32); + &sub($num,8); + &jnz(&label("aw_loop")); + + &set_label("aw_finish",0); + &mov($num,&wparam(3)); # get num + &and($num,7); + &jz(&label("aw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov($tmp1,&DWP(0,$a,"",0)); # *a + &mov($tmp2,&DWP(0,$b,"",0));# *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP(0,$r,"",0),$tmp1); # *r + &add($a, 4); + &add($b, 4); + &add($r, 4); + &dec($num) if ($i != 6); + &jz(&label("aw_end")) if ($i != 6); + } + &set_label("aw_end",0); + + &cmp(&wparam(4),0); + &je(&label("pw_end")); + + &mov($num,&wparam(4)); # get dl + &cmp($num,0); + &je(&label("pw_end")); + &jge(&label("pw_pos")); + + &comment("pw_neg"); + &mov($tmp2,0); + &sub($tmp2,$num); + &mov($num,$tmp2); + &and($num,0xfffffff8); # num / 8 + &jz(&label("pw_neg_finish")); + + &set_label("pw_neg_loop",0); + for ($i=0; $i<8; $i++) + { + &comment("dl<0 Round $i"); + + &mov($tmp1,0); + &mov($tmp2,&DWP($i*4,$b,"",0)); # *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + } + + &comment(""); + &add($b,32); + &add($r,32); + &sub($num,8); + &jnz(&label("pw_neg_loop")); + + &set_label("pw_neg_finish",0); + &mov($tmp2,&wparam(4)); # get dl + &mov($num,0); + &sub($num,$tmp2); + &and($num,7); + &jz(&label("pw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("dl<0 Tail Round $i"); + &mov($tmp1,0); + &mov($tmp2,&DWP($i*4,$b,"",0));# *b + &sub($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &sub($tmp1,$tmp2); + &adc($c,0); + &dec($num) if ($i != 6); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jz(&label("pw_end")) if ($i != 6); + } + + &jmp(&label("pw_end")); + + &set_label("pw_pos",0); + + &and($num,0xfffffff8); # num / 8 + &jz(&label("pw_pos_finish")); + + &set_label("pw_pos_loop",0); + + for ($i=0; $i<8; $i++) + { + &comment("dl>0 Round $i"); + + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &sub($tmp1,$c); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jnc(&label("pw_nc".$i)); + } + + &comment(""); + &add($a,32); + &add($r,32); + &sub($num,8); + &jnz(&label("pw_pos_loop")); + + &set_label("pw_pos_finish",0); + &mov($num,&wparam(4)); # get dl + &and($num,7); + &jz(&label("pw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("dl>0 Tail Round $i"); + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &sub($tmp1,$c); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &jnc(&label("pw_tail_nc".$i)); + &dec($num) if ($i != 6); + &jz(&label("pw_end")) if ($i != 6); + } + &mov($c,1); + &jmp(&label("pw_end")); + + &set_label("pw_nc_loop",0); + for ($i=0; $i<8; $i++) + { + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &set_label("pw_nc".$i,0); + } + + &comment(""); + &add($a,32); + &add($r,32); + &sub($num,8); + &jnz(&label("pw_nc_loop")); + + &mov($num,&wparam(4)); # get dl + &and($num,7); + &jz(&label("pw_nc_end")); + + for ($i=0; $i<7; $i++) + { + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + &set_label("pw_tail_nc".$i,0); + &dec($num) if ($i != 6); + &jz(&label("pw_nc_end")) if ($i != 6); + } + + &set_label("pw_nc_end",0); + &mov($c,0); + + &set_label("pw_end",0); + +# &mov("eax",$c); # $c is "eax" &function_end($name); } diff --git a/src/lib/libcrypto/bn/asm/pa-risc2.s b/src/lib/libcrypto/bn/asm/pa-risc2.s index c2725996a4..af9730d062 100644 --- a/src/lib/libcrypto/bn/asm/pa-risc2.s +++ b/src/lib/libcrypto/bn/asm/pa-risc2.s @@ -1,416 +1,1618 @@ - .SPACE $PRIVATE$ - .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 - .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 - .SPACE $TEXT$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 - .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY - .IMPORT $global$,DATA - .IMPORT $$dyncall,MILLICODE -; gcc_compiled.: - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +; +; PA-RISC 2.0 implementation of bn_asm code, based on the +; 64-bit version of the code. This code is effectively the +; same as the 64-bit version except the register model is +; slightly different given all values must be 32-bit between +; function calls. Thus the 64-bit return values are returned +; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit +; +; +; This code is approximately 2x faster than the C version +; for RSA/DSA. +; +; See http://devresource.hp.com/ for more details on the PA-RISC +; architecture. Also see the book "PA-RISC 2.0 Architecture" +; by Gerry Kane for information on the instruction set architecture. +; +; Code written by Chris Ruemmler (with some help from the HP C +; compiler). +; +; The code compiles with HP's assembler +; + + .level 2.0N + .space $TEXT$ + .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY + +; +; Global Register definitions used for the routines. +; +; Some information about HP's runtime architecture for 32-bits. +; +; "Caller save" means the calling function must save the register +; if it wants the register to be preserved. +; "Callee save" means if a function uses the register, it must save +; the value before using it. +; +; For the floating point registers +; +; "caller save" registers: fr4-fr11, fr22-fr31 +; "callee save" registers: fr12-fr21 +; "special" registers: fr0-fr3 (status and exception registers) +; +; For the integer registers +; value zero : r0 +; "caller save" registers: r1,r19-r26 +; "callee save" registers: r3-r18 +; return register : r2 (rp) +; return values ; r28,r29 (ret0,ret1) +; Stack pointer ; r30 (sp) +; millicode return ptr ; r31 (also a caller save register) + + +; +; Arguments to the routines +; +r_ptr .reg %r26 +a_ptr .reg %r25 +b_ptr .reg %r24 +num .reg %r24 +n .reg %r23 + +; +; Note that the "w" argument for bn_mul_add_words and bn_mul_words +; is passed on the stack at a delta of -56 from the top of stack +; as the routine is entered. +; + +; +; Globals used in some routines +; + +top_overflow .reg %r23 +high_mask .reg %r22 ; value 0xffffffff80000000L + + +;------------------------------------------------------------------------------ +; +; bn_mul_add_words +; +;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, +; int num, BN_ULONG w) +; +; arg0 = r_ptr +; arg1 = a_ptr +; arg3 = num +; -56(sp) = w +; +; Local register definitions +; + +fm1 .reg %fr22 +fm .reg %fr23 +ht_temp .reg %fr24 +ht_temp_1 .reg %fr25 +lt_temp .reg %fr26 +lt_temp_1 .reg %fr27 +fm1_1 .reg %fr28 +fm_1 .reg %fr29 + +fw_h .reg %fr7L +fw_l .reg %fr7R +fw .reg %fr7 + +fht_0 .reg %fr8L +flt_0 .reg %fr8R +t_float_0 .reg %fr8 + +fht_1 .reg %fr9L +flt_1 .reg %fr9R +t_float_1 .reg %fr9 + +tmp_0 .reg %r31 +tmp_1 .reg %r21 +m_0 .reg %r20 +m_1 .reg %r19 +ht_0 .reg %r1 +ht_1 .reg %r3 +lt_0 .reg %r4 +lt_1 .reg %r5 +m1_0 .reg %r6 +m1_1 .reg %r7 +rp_val .reg %r8 +rp_val_1 .reg %r9 + bn_mul_add_words - .PROC - .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r4,64(0,%r30) - copy %r24,%r31 - stw %r3,-60(0,%r30) - ldi 0,%r20 - ldo 12(%r26),%r2 - stw %r23,-16(0,%r30) - copy %r25,%r3 - ldo 12(%r3),%r1 - fldws -16(0,%r30),%fr8L -L$0010 - copy %r20,%r25 - ldi 0,%r24 - fldws 0(0,%r3),%fr9L - ldw 0(0,%r26),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,0(0,%r26) - copy %r20,%r25 - ldi 0,%r24 - fldws -8(0,%r1),%fr9L - ldw -8(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,-8(0,%r2) - copy %r20,%r25 - ldi 0,%r24 - fldws -4(0,%r1),%fr9L - ldw -4(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,-4(0,%r2) - copy %r20,%r25 - ldi 0,%r24 - fldws 0(0,%r1),%fr9L - ldw 0(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,0(0,%r2) - ldo 16(%r1),%r1 - ldo 16(%r3),%r3 - ldo 16(%r2),%r2 - bl L$0010,0 - ldo 16(%r26),%r26 -L$0011 - copy %r20,%r28 - ldw -84(0,%r30),%r2 - ldw -60(0,%r30),%r3 - bv 0(%r2) - ldwm -64(0,%r30),%r4 - .EXIT - .PROCEND - .align 4 - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR + .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN + .proc + .callinfo frame=128 + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP ; Needed to make the loop 16-byte aligned + NOP ; needed to make the loop 16-byte aligned + + STD %r5,16(%sp) ; save r5 + NOP + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + + STD %r8,40(%sp) ; save r8 + STD %r9,48(%sp) ; save r9 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit + LDO 128(%sp),%sp ; bump stack + + ; + ; The loop is unrolled twice, so if there is only 1 number + ; then go straight to the cleanup code. + ; + CMPIB,= 1,num,bn_mul_add_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_add_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDD 8(r_ptr),rp_val_1 ; rp[1] + + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1[0] + FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] + + XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h + XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m[0] + FSTD fm_1,-40(%sp) ; -40(sp) = m[1] + + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 + + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 + + LDD -8(%sp),m_0 ; m[0] + LDD -40(%sp),m_1 ; m[1] + LDD -16(%sp),m1_0 ; m1[0] + LDD -48(%sp),m1_1 ; m1[1] + + LDD -24(%sp),ht_0 ; ht[0] + LDD -56(%sp),ht_1 ; ht[1] + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; + + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) + ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) + + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) + ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) + EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 + + EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 + ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) + ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) + + ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + + ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + + LDO -2(num),num ; num = num - 2; + ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + + ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] + ADD,DC ht_1,%r0,%ret1 ; ht[1]++ + LDO 16(a_ptr),a_ptr ; a_ptr += 2 + + STD lt_1,8(r_ptr) ; rp[1] = lt[1] + CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do + LDO 16(r_ptr),r_ptr ; r_ptr += 2 + + CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_add_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDO 8(a_ptr),a_ptr ; a_ptr++ + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 ; m1 = temp1 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD %ret1,tmp_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] + ADD,DC ht_0,%r0,%ret1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_add_words_exit + .EXIT + + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -80(%sp),%r9 ; restore r9 + LDD -88(%sp),%r8 ; restore r8 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +; +; arg0 = rp +; arg1 = ap +; arg3 = num +; w on stack at -56(sp) + bn_mul_words - .PROC - .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 - .ENTRY - stw %r2,-20(0,%r30) - copy %r25,%r2 - stwm %r4,64(0,%r30) - copy %r24,%r19 - ldi 0,%r28 - stw %r23,-16(0,%r30) - ldo 12(%r26),%r31 - ldo 12(%r2),%r29 - fldws -16(0,%r30),%fr8L -L$0026 - fldws 0(0,%r2),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,0(0,%r26) - fldws -8(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,-8(0,%r31) - fldws -4(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,-4(0,%r31) - fldws 0(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,0(0,%r31) - ldo 16(%r29),%r29 - ldo 16(%r2),%r2 - ldo 16(%r31),%r31 - bl L$0026,0 - ldo 16(%r26),%r26 -L$0027 - ldw -84(0,%r30),%r2 - bv 0(%r2) - ldwm -64(0,%r30),%r4 - .EXIT - .PROCEND - .align 4 - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR + .proc + .callinfo frame=128 + .entry + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; See if only 1 word to do, thus just do cleanup + ; + CMPIB,= 1,num,bn_mul_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l + + FSTD fm1,-16(%sp) ; -16(sp) = m1 + FSTD fm1_1,-48(%sp) ; -48(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h + + FSTD fm,-8(%sp) ; -8(sp) = m + FSTD fm_1,-40(%sp) ; -40(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h + + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt + LDD -8(%sp),m_0 + LDD -40(%sp),m_1 + + LDD -16(%sp),m1_0 + LDD -48(%sp),m1_1 + LDD -24(%sp),ht_0 + LDD -56(%sp),ht_1 + + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) + ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + EXTRD,U tmp_1,31,32,m_1 ; m>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD lt_1,m1_1,lt_1 ; lt = lt+m1; + ADD,DC ht_1,%r0,ht_1 ; ht++ + ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) + ADD,DC ht_1,%r0,ht_1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + STD lt_1,8(r_ptr) ; rp[1] = lt + + COPY ht_1,%ret1 ; carry = ht + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_mul_words_unroll2 + LDO 16(r_ptr),r_ptr ; rp++ + + CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt= lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD %ret1,lt_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + COPY ht_0,%ret1 ; copy carry + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_words_exit + .EXIT + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND + +;---------------------------------------------------------------------------- +; +;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; + bn_sqr_words + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + CMPIB,>= 0,num,bn_sqr_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; If only 1, the goto straight to cleanup + ; + CMPIB,= 1,num,bn_sqr_words_single_top + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + +bn_sqr_words_unroll2 + FLDD 0(a_ptr),t_float_0 ; a[0] + FLDD 8(a_ptr),t_float_1 ; a[1] + XMPYU fht_0,flt_0,fm ; m[0] + XMPYU fht_1,flt_1,fm_1 ; m[1] + + FSTD fm,-24(%sp) ; store m[0] + FSTD fm_1,-56(%sp) ; store m[1] + XMPYU flt_0,flt_0,lt_temp ; lt[0] + XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] + + FSTD lt_temp,-16(%sp) ; store lt[0] + FSTD lt_temp_1,-48(%sp) ; store lt[1] + XMPYU fht_0,fht_0,ht_temp ; ht[0] + XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] + + FSTD ht_temp,-8(%sp) ; store ht[0] + FSTD ht_temp_1,-40(%sp) ; store ht[1] + LDD -24(%sp),m_0 + LDD -56(%sp),m_1 + + AND m_0,high_mask,tmp_0 ; m[0] & Mask + AND m_1,high_mask,tmp_1 ; m[1] & Mask + DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 + DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 + + LDD -16(%sp),lt_0 + LDD -48(%sp),lt_1 + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 + EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 + + LDD -8(%sp),ht_0 + LDD -40(%sp),ht_1 + ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 + ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 + + ADD lt_0,m_0,lt_0 ; lt = lt+m + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + STD ht_0,8(r_ptr) ; rp[1] = ht[1] + + ADD lt_1,m_1,lt_1 ; lt = lt+m + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_1,16(r_ptr) ; rp[2] = lt[1] + STD ht_1,24(r_ptr) ; rp[3] = ht[1] + + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_sqr_words_unroll2 + LDO 32(r_ptr),r_ptr ; rp += 4 + + CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_sqr_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,flt_0,fm ; m + FSTD fm,-24(%sp) ; store m + + XMPYU flt_0,flt_0,lt_temp ; lt + FSTD lt_temp,-16(%sp) ; store lt + + XMPYU fht_0,fht_0,ht_temp ; ht + FSTD ht_temp,-8(%sp) ; store ht + + LDD -24(%sp),m_0 ; load m + AND m_0,high_mask,tmp_0 ; m & Mask + DEPD,Z m_0,30,31,m_0 ; m << 32+1 + LDD -16(%sp),lt_0 ; lt + + LDD -8(%sp),ht_0 ; ht + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 + ADD m_0,lt_0,lt_0 ; lt = lt+m + ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 + ADD,DC ht_0,%r0,ht_0 ; ht++ + + STD lt_0,0(r_ptr) ; rp[0] = lt + STD ht_0,8(r_ptr) ; rp[1] = ht + +bn_sqr_words_exit + .EXIT + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + .PROCEND ;in=23,24,25,26,29;out=28; + + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t .reg %r22 +b .reg %r21 +l .reg %r20 + +bn_add_words + .proc + .entry + .callinfo + .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + CMPIB,>= 0,n,bn_add_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_add_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_add_words_unroll2 + LDD 0(a_ptr),t + LDD 0(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + + LDD 8(a_ptr),t + LDD 8(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_add_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_add_words_exit ; are we done? + +bn_add_words_single_top + LDD 0(a_ptr),t + LDD 0(b_ptr),b + + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + +bn_add_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t1 .reg %r22 +t2 .reg %r21 +sub_tmp1 .reg %r20 +sub_tmp2 .reg %r19 + + +bn_sub_words + .proc + .callinfo + .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + CMPIB,>= 0,n,bn_sub_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_sub_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_sub_words_unroll2 + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,0(r_ptr) + + LDD 8(a_ptr),t1 + LDD 8(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_sub_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? + +bn_sub_words_single_top + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + + STD sub_tmp1,0(r_ptr) + +bn_sub_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;------------------------------------------------------------------------------ +; +; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) +; +; arg0 = h +; arg1 = l +; arg2 = d +; +; This is mainly just output from the HP C compiler. +; +;------------------------------------------------------------------------------ +bn_div_words .PROC - .CALLINFO FRAME=0,NO_CALLS - .ENTRY - ldo 28(%r26),%r19 - ldo 12(%r25),%r28 -L$0042 - fldws 0(0,%r25),%fr8L - fldws 0(0,%r25),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,0(0,%r26) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-24(0,%r19) - fldws -8(0,%r28),%fr8L - fldws -8(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-20(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-16(0,%r19) - fldws -4(0,%r28),%fr8L - fldws -4(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-12(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-8(0,%r19) - fldws 0(0,%r28),%fr8L - fldws 0(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-4(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,0(0,%r19) - ldo 16(%r28),%r28 - ldo 16(%r25),%r25 - ldo 32(%r19),%r19 - bl L$0042,0 - ldo 32(%r26),%r26 -L$0049 - bv,n 0(%r2) - .EXIT - .PROCEND - .IMPORT BN_num_bits_word,CODE - .IMPORT fprintf,CODE - .IMPORT __iob,DATA - .SPACE $TEXT$ - .SUBSPA $LIT$ - - .align 4 -L$C0000 - .STRING "Division would overflow (%d)\x0a\x00" - .IMPORT abort,CODE - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR -bn_div64 + .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN + .IMPORT BN_num_bits_word,CODE + .IMPORT __iob,DATA + .IMPORT fprintf,CODE + .IMPORT abort,CODE + .IMPORT $$div2U,MILLICODE + .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE + .ENTRY + STW %r2,-20(%r30) ;offset 0x8ec + STW,MA %r3,192(%r30) ;offset 0x8f0 + STW %r4,-188(%r30) ;offset 0x8f4 + DEPD %r5,31,32,%r6 ;offset 0x8f8 + STD %r6,-184(%r30) ;offset 0x8fc + DEPD %r7,31,32,%r8 ;offset 0x900 + STD %r8,-176(%r30) ;offset 0x904 + STW %r9,-168(%r30) ;offset 0x908 + LDD -248(%r30),%r3 ;offset 0x90c + COPY %r26,%r4 ;offset 0x910 + COPY %r24,%r5 ;offset 0x914 + DEPD %r25,31,32,%r4 ;offset 0x918 + CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c + DEPD %r23,31,32,%r5 ;offset 0x920 + MOVIB,TR -1,%r29,$00060002 ;offset 0x924 + EXTRD,U %r29,31,32,%r28 ;offset 0x928 +$0006002A + LDO -1(%r29),%r29 ;offset 0x92c + SUB %r23,%r7,%r23 ;offset 0x930 +$00060024 + SUB %r4,%r31,%r25 ;offset 0x934 + AND %r25,%r19,%r26 ;offset 0x938 + CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c + DEPD,Z %r25,31,32,%r20 ;offset 0x940 + OR %r20,%r24,%r21 ;offset 0x944 + CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 + SUB %r31,%r2,%r31 ;offset 0x94c +$00060046 +$0006002E + DEPD,Z %r23,31,32,%r25 ;offset 0x950 + EXTRD,U %r23,31,32,%r26 ;offset 0x954 + AND %r25,%r19,%r24 ;offset 0x958 + ADD,L %r31,%r26,%r31 ;offset 0x95c + CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 + LDO 1(%r31),%r31 ;offset 0x964 +$00060032 + CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 + LDO -1(%r29),%r29 ;offset 0x96c + ADD,L %r4,%r3,%r4 ;offset 0x970 +$00060036 + ADDIB,=,N -1,%r8,$D0 ;offset 0x974 + SUB %r5,%r24,%r28 ;offset 0x978 +$0006003A + SUB %r4,%r31,%r24 ;offset 0x97c + SHRPD %r24,%r28,32,%r4 ;offset 0x980 + DEPD,Z %r29,31,32,%r9 ;offset 0x984 + DEPD,Z %r28,31,32,%r5 ;offset 0x988 +$0006001C + EXTRD,U %r4,31,32,%r31 ;offset 0x98c + CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 + MOVB,TR %r6,%r29,$D1 ;offset 0x994 + STD %r29,-152(%r30) ;offset 0x998 +$0006000C + EXTRD,U %r3,31,32,%r25 ;offset 0x99c + COPY %r3,%r26 ;offset 0x9a0 + EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 + EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 + .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; + B,L BN_num_bits_word,%r2 ;offset 0x9ac + EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 + LDI 64,%r20 ;offset 0x9b4 + DEPD %r7,31,32,%r5 ;offset 0x9b8 + DEPD %r8,31,32,%r4 ;offset 0x9bc + DEPD %r9,31,32,%r3 ;offset 0x9c0 + CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 + COPY %r28,%r24 ;offset 0x9c8 + MTSARCM %r24 ;offset 0x9cc + DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 + CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 +$00060012 + SUBI 64,%r24,%r31 ;offset 0x9d8 + CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc + SUB %r4,%r3,%r4 ;offset 0x9e0 +$00060016 + CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 + COPY %r0,%r9 ;offset 0x9e8 + MTSARCM %r31 ;offset 0x9ec + DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 + SUBI 64,%r31,%r26 ;offset 0x9f4 + MTSAR %r26 ;offset 0x9f8 + SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc + MTSARCM %r31 ;offset 0xa00 + DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 +$0006001A + DEPDI,Z -1,31,32,%r19 ;offset 0xa08 + AND %r3,%r19,%r29 ;offset 0xa0c + EXTRD,U %r29,31,32,%r2 ;offset 0xa10 + DEPDI,Z -1,63,32,%r6 ;offset 0xa14 + MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 + EXTRD,U %r3,63,32,%r7 ;offset 0xa1c +$D2 + ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 + LDIL LR'C$7,%r21 ;offset 0xa24 + LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 + .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; + B,L fprintf,%r2 ;offset 0xa2c + LDO RR'C$7(%r21),%r25 ;offset 0xa30 + .CALL ; + B,L abort,%r2 ;offset 0xa34 + NOP ;offset 0xa38 + B $D3 ;offset 0xa3c + LDW -212(%r30),%r2 ;offset 0xa40 +$00060020 + COPY %r4,%r26 ;offset 0xa44 + EXTRD,U %r4,31,32,%r25 ;offset 0xa48 + COPY %r2,%r24 ;offset 0xa4c + .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) + B,L $$div2U,%r31 ;offset 0xa50 + EXTRD,U %r2,31,32,%r23 ;offset 0xa54 + DEPD %r28,31,32,%r29 ;offset 0xa58 +$00060022 + STD %r29,-152(%r30) ;offset 0xa5c +$D1 + AND %r5,%r19,%r24 ;offset 0xa60 + EXTRD,U %r24,31,32,%r24 ;offset 0xa64 + STW %r2,-160(%r30) ;offset 0xa68 + STW %r7,-128(%r30) ;offset 0xa6c + FLDD -152(%r30),%fr4 ;offset 0xa70 + FLDD -152(%r30),%fr7 ;offset 0xa74 + FLDW -160(%r30),%fr8L ;offset 0xa78 + FLDW -128(%r30),%fr5L ;offset 0xa7c + XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 + FSTD %fr10,-136(%r30) ;offset 0xa84 + XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 + FSTD %fr22,-144(%r30) ;offset 0xa8c + XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 + XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 + FSTD %fr11,-112(%r30) ;offset 0xa98 + FSTD %fr23,-120(%r30) ;offset 0xa9c + LDD -136(%r30),%r28 ;offset 0xaa0 + DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 + LDD -144(%r30),%r20 ;offset 0xaa8 + ADD,L %r20,%r31,%r31 ;offset 0xaac + LDD -112(%r30),%r22 ;offset 0xab0 + DEPD,Z %r22,31,32,%r22 ;offset 0xab4 + LDD -120(%r30),%r21 ;offset 0xab8 + B $00060024 ;offset 0xabc + ADD,L %r21,%r22,%r23 ;offset 0xac0 +$D0 + OR %r9,%r29,%r29 ;offset 0xac4 +$00060040 + EXTRD,U %r29,31,32,%r28 ;offset 0xac8 +$00060002 +$L2 + LDW -212(%r30),%r2 ;offset 0xacc +$D3 + LDW -168(%r30),%r9 ;offset 0xad0 + LDD -176(%r30),%r8 ;offset 0xad4 + EXTRD,U %r8,31,32,%r7 ;offset 0xad8 + LDD -184(%r30),%r6 ;offset 0xadc + EXTRD,U %r6,31,32,%r5 ;offset 0xae0 + LDW -188(%r30),%r4 ;offset 0xae4 + BVE (%r2) ;offset 0xae8 + .EXIT + LDW,MB -192(%r30),%r3 ;offset 0xaec + .PROCEND ;in=23,25;out=28,29;fpin=105,107; + + + + +;---------------------------------------------------------------------------- +; +; Registers to hold 64-bit values to manipulate. The "L" part +; of the register corresponds to the upper 32-bits, while the "R" +; part corresponds to the lower 32-bits +; +; Note, that when using b6 and b7, the code must save these before +; using them because they are callee save registers +; +; +; Floating point registers to use to save values that +; are manipulated. These don't collide with ftemp1-6 and +; are all caller save registers +; +a0 .reg %fr22 +a0L .reg %fr22L +a0R .reg %fr22R + +a1 .reg %fr23 +a1L .reg %fr23L +a1R .reg %fr23R + +a2 .reg %fr24 +a2L .reg %fr24L +a2R .reg %fr24R + +a3 .reg %fr25 +a3L .reg %fr25L +a3R .reg %fr25R + +a4 .reg %fr26 +a4L .reg %fr26L +a4R .reg %fr26R + +a5 .reg %fr27 +a5L .reg %fr27L +a5R .reg %fr27R + +a6 .reg %fr28 +a6L .reg %fr28L +a6R .reg %fr28R + +a7 .reg %fr29 +a7L .reg %fr29L +a7R .reg %fr29R + +b0 .reg %fr30 +b0L .reg %fr30L +b0R .reg %fr30R + +b1 .reg %fr31 +b1L .reg %fr31L +b1R .reg %fr31R + +; +; Temporary floating point variables, these are all caller save +; registers +; +ftemp1 .reg %fr4 +ftemp2 .reg %fr5 +ftemp3 .reg %fr6 +ftemp4 .reg %fr7 + +; +; The B set of registers when used. +; + +b2 .reg %fr8 +b2L .reg %fr8L +b2R .reg %fr8R + +b3 .reg %fr9 +b3L .reg %fr9L +b3R .reg %fr9R + +b4 .reg %fr10 +b4L .reg %fr10L +b4R .reg %fr10R + +b5 .reg %fr11 +b5L .reg %fr11L +b5R .reg %fr11R + +b6 .reg %fr12 +b6L .reg %fr12L +b6R .reg %fr12R + +b7 .reg %fr13 +b7L .reg %fr13L +b7R .reg %fr13R + +c1 .reg %r21 ; only reg +temp1 .reg %r20 ; only reg +temp2 .reg %r19 ; only reg +temp3 .reg %r31 ; only reg + +m1 .reg %r28 +c2 .reg %r23 +high_one .reg %r1 +ht .reg %r6 +lt .reg %r5 +m .reg %r4 +c3 .reg %r3 + +SQR_ADD_C .macro A0L,A0R,C1,C2,C3 + XMPYU A0L,A0R,ftemp1 ; m + FSTD ftemp1,-24(%sp) ; store m + + XMPYU A0R,A0R,ftemp2 ; lt + FSTD ftemp2,-16(%sp) ; store lt + + XMPYU A0L,A0L,ftemp3 ; ht + FSTD ftemp3,-8(%sp) ; store ht + + LDD -24(%sp),m ; load m + AND m,high_mask,temp2 ; m & Mask + DEPD,Z m,30,31,temp3 ; m << 32+1 + LDD -16(%sp),lt ; lt + + LDD -8(%sp),ht ; ht + EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 + ADD temp3,lt,lt ; lt = lt+m + ADD,L ht,temp1,ht ; ht += temp1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; ht++ + + ADD C2,ht,C2 ; c2=c2+ht + ADD,DC C3,%r0,C3 ; c3++ +.endm + +SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 + XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,A1L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,A1R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,A1L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD ht,ht,ht ; ht=ht+ht; + ADD,DC C3,%r0,C3 ; add in carry (c3++) + + ADD lt,lt,lt ; lt=lt+lt; + ADD,DC ht,%r0,ht ; add in carry (ht++) + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) + LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + +; +;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba8 .PROC - .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r8,128(0,%r30) - stw %r7,-124(0,%r30) - stw %r4,-112(0,%r30) - stw %r3,-108(0,%r30) - copy %r26,%r3 - copy %r25,%r4 - stw %r6,-120(0,%r30) - ldi 0,%r7 - stw %r5,-116(0,%r30) - movb,<> %r24,%r5,L$0051 - ldi 2,%r6 - bl L$0068,0 - ldi -1,%r28 -L$0051 - .CALL ARGW0=GR - bl BN_num_bits_word,%r2 - copy %r5,%r26 - copy %r28,%r24 - ldi 32,%r19 - comb,= %r19,%r24,L$0052 - subi 31,%r24,%r19 - mtsar %r19 - zvdepi 1,32,%r19 - comb,>>= %r19,%r3,L$0052 - addil LR'__iob-$global$+32,%r27 - ldo RR'__iob-$global$+32(%r1),%r26 - ldil LR'L$C0000,%r25 - .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR - bl fprintf,%r2 - ldo RR'L$C0000(%r25),%r25 - .CALL - bl abort,%r2 - nop -L$0052 - comb,>> %r5,%r3,L$0053 - subi 32,%r24,%r24 - sub %r3,%r5,%r3 -L$0053 - comib,= 0,%r24,L$0054 - subi 31,%r24,%r19 - mtsar %r19 - zvdep %r5,32,%r5 - zvdep %r3,32,%r21 - subi 32,%r24,%r20 - mtsar %r20 - vshd 0,%r4,%r20 - or %r21,%r20,%r3 - mtsar %r19 - zvdep %r4,32,%r4 -L$0054 - extru %r5,15,16,%r23 - extru %r5,31,16,%r28 -L$0055 - extru %r3,15,16,%r19 - comb,<> %r23,%r19,L$0058 - copy %r3,%r26 - bl L$0059,0 - zdepi -1,31,16,%r29 -L$0058 - .IMPORT $$divU,MILLICODE - bl $$divU,%r31 - copy %r23,%r25 -L$0059 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r23,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr9 - ldw -16(0,%r30),%r8 - fstws %fr9R,-16(0,%r30) - copy %r8,%r22 - ldw -16(0,%r30),%r8 - extru %r4,15,16,%r24 - copy %r8,%r21 -L$0060 - sub %r3,%r21,%r20 - copy %r20,%r19 - depi 0,31,16,%r19 - comib,<> 0,%r19,L$0061 - zdep %r20,15,16,%r19 - addl %r19,%r24,%r19 - comb,>>= %r19,%r22,L$0061 - sub %r22,%r28,%r22 - sub %r21,%r23,%r21 - bl L$0060,0 - ldo -1(%r29),%r29 -L$0061 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r8 - stw %r23,-16(0,%r30) - fldws -16(0,%r30),%fr10R - copy %r8,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - extru %r19,15,16,%r20 - ldw -16(0,%r30),%r8 - zdep %r19,15,16,%r19 - addl %r8,%r20,%r20 - comclr,<<= %r19,%r4,0 - addi 1,%r20,%r20 - comb,<<= %r20,%r3,L$0066 - sub %r4,%r19,%r4 - addl %r3,%r5,%r3 - ldo -1(%r29),%r29 -L$0066 - addib,= -1,%r6,L$0056 - sub %r3,%r20,%r3 - zdep %r29,15,16,%r7 - shd %r3,%r4,16,%r3 - bl L$0055,0 - zdep %r4,15,16,%r4 -L$0056 - or %r7,%r29,%r28 -L$0068 - ldw -148(0,%r30),%r2 - ldw -124(0,%r30),%r7 - ldw -120(0,%r30),%r6 - ldw -116(0,%r30),%r5 - ldw -112(0,%r30),%r4 - ldw -108(0,%r30),%r3 - bv 0(%r2) - ldwm -128(0,%r30),%r8 - .EXIT - .PROCEND + .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .ENTRY + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 + SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 + STD c2,56(r_ptr) ; r[7] = c2; + COPY %r0,c2 + + SQR_ADD_C a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 + SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 + STD c3,64(r_ptr) ; r[8] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 + STD c1,72(r_ptr) ; r[9] = c1; + COPY %r0,c1 + + SQR_ADD_C a5L,a5R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 + SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 + STD c2,80(r_ptr) ; r[10] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 + STD c3,88(r_ptr) ; r[11] = c3; + COPY %r0,c3 + + SQR_ADD_C a6L,a6R,c1,c2,c3 + SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 + STD c1,96(r_ptr) ; r[12] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 + STD c2,104(r_ptr) ; r[13] = c2; + COPY %r0,c2 + + SQR_ADD_C a7L,a7R,c3,c1,c2 + STD c3, 112(r_ptr) ; r[14] = c3 + STD c1, 120(r_ptr) ; r[15] = c1 + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + STD c2,56(r_ptr) ; r[7] = c2; + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--------------------------------------------------------------------------- + +MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 + XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,B0L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,B0R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,B0L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + + +; +;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba8 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + FLDD 32(b_ptr),b4 + FLDD 40(b_ptr),b5 + FLDD 48(b_ptr),b6 + FLDD 56(b_ptr),b7 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 + STD c1,48(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 + STD c2,56(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 + STD c3,64(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 + STD c1,72(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 + STD c2,80(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 + STD c3,88(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 + STD c1,96(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 + STD c2,104(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 + STD c3,112(r_ptr) + STD c1,120(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + STD c1,48(r_ptr) + STD c2,56(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SPACE $PRIVATE$,SORT=16 + .IMPORT $global$,DATA + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SUBSPA $LIT$,ACCESS=0x2c +C$7 + .ALIGN 8 + .STRINGZ "Division would overflow (%d)\n" + .END diff --git a/src/lib/libcrypto/bn/asm/pa-risc2W.s b/src/lib/libcrypto/bn/asm/pa-risc2W.s index 54b6606252..a99545754d 100644 --- a/src/lib/libcrypto/bn/asm/pa-risc2W.s +++ b/src/lib/libcrypto/bn/asm/pa-risc2W.s @@ -1598,7 +1598,7 @@ bn_mul_comba4 .IMPORT $global$,DATA .SPACE $TEXT$ .SUBSPA $CODE$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16 + .SUBSPA $LIT$,ACCESS=0x2c C$4 .ALIGN 8 .STRINGZ "Division would overflow (%d)\n" diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index f935e1ca79..1eaf879553 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h @@ -59,38 +59,39 @@ #ifndef HEADER_BN_H #define HEADER_BN_H -#ifndef WIN16 +#include +#ifndef OPENSSL_NO_FP_API #include /* FILE */ #endif -#include #ifdef __cplusplus extern "C" { #endif -#ifdef VMS +#ifdef OPENSSL_SYS_VMS #undef BN_LLONG /* experimental, so far... */ #endif #define BN_MUL_COMBA #define BN_SQR_COMBA #define BN_RECURSION -#define RECP_MUL_MOD -#define MONT_MUL_MOD /* This next option uses the C libraries (2 word)/(1 word) function. * If it is not defined, I use my C version (which is slower). * The reason for this flag is that when the particular C compiler * library routine is used, and the library is linked with a different * compiler, the library is missing. This mostly happens when the - * library is built with gcc and then linked using nornal cc. This would - * be a common occurance because gcc normally produces code that is + * library is built with gcc and then linked using normal cc. This would + * be a common occurrence because gcc normally produces code that is * 2 times faster than system compilers for the big number stuff. * For machines with only one compiler (or shared libraries), this should * be on. Again this in only really a problem on machines - * using "long long's", are 32bit, and are not using my assember code. */ -#if defined(MSDOS) || defined(WINDOWS) || defined(linux) -#define BN_DIV2W + * using "long long's", are 32bit, and are not using my assembler code. */ +#if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \ + defined(OPENSSL_SYS_WIN32) || defined(linux) +# ifndef BN_DIV2W +# define BN_DIV2W +# endif #endif /* assuming long is 64bit - this is the DEC Alpha @@ -118,8 +119,8 @@ extern "C" { /* This is where the long long data type is 64 bits, but long is 32. * For machines where there are 64bit registers, this is the mode to use. - * IRIX, on R4000 and above should use this mode, along with the relevent - * assember code :-). Do NOT define BN_LLONG. + * IRIX, on R4000 and above should use this mode, along with the relevant + * assembler code :-). Do NOT define BN_LLONG. */ #ifdef SIXTY_FOUR_BIT #undef BN_LLONG @@ -135,14 +136,14 @@ extern "C" { #define BN_MASK2h (0xffffffff00000000LL) #define BN_MASK2h1 (0xffffffff80000000LL) #define BN_TBIT (0x8000000000000000LL) -#define BN_DEC_CONV (10000000000000000000LL) +#define BN_DEC_CONV (10000000000000000000ULL) #define BN_DEC_FMT1 "%llu" #define BN_DEC_FMT2 "%019llu" #define BN_DEC_NUM 19 #endif #ifdef THIRTY_TWO_BIT -#if defined(WIN32) && !defined(__GNUC__) +#if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) #define BN_ULLONG unsigned _int64 #else #define BN_ULLONG unsigned long long @@ -153,7 +154,7 @@ extern "C" { #define BN_BYTES 4 #define BN_BITS2 32 #define BN_BITS4 16 -#ifdef WIN32 +#ifdef OPENSSL_SYS_WIN32 /* VC++ doesn't like the LL suffix */ #define BN_MASK (0xffffffffffffffffL) #else @@ -233,19 +234,13 @@ typedef struct bignum_st BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */ int top; /* Index of last used d +1. */ /* The next are internal book keeping for bn_expand. */ - int max; /* Size of the d array. */ + int dmax; /* Size of the d array. */ int neg; /* one if the number is negative */ int flags; } BIGNUM; -/* Used for temp variables */ -#define BN_CTX_NUM 12 -typedef struct bignum_ctx - { - int tos; - BIGNUM bn[BN_CTX_NUM+1]; - int flags; - } BN_CTX; +/* Used for temp variables (declaration hidden in bn_lcl.h) */ +typedef struct bignum_ctx BN_CTX; typedef struct bn_blinding_st { @@ -257,16 +252,15 @@ typedef struct bn_blinding_st /* Used for montgomery multiplication */ typedef struct bn_mont_ctx_st - { - int use_word; /* 0 for word form, 1 for long form */ - int ri; /* number of bits in R */ - BIGNUM RR; /* used to convert to montgomery form */ - BIGNUM N; /* The modulus */ - BIGNUM Ni; /* The inverse of N */ - BN_ULONG n0; /* word form of inverse, normally only one of - * Ni or n0 is defined */ + { + int ri; /* number of bits in R */ + BIGNUM RR; /* used to convert to montgomery form */ + BIGNUM N; /* The modulus */ + BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 + * (Ni is only stored for bignum algorithm) */ + BN_ULONG n0; /* least significant word of Ni */ int flags; - } BN_MONT_CTX; + } BN_MONT_CTX; /* Used for reciprocal division/mod functions * It cannot be shared between threads @@ -280,97 +274,129 @@ typedef struct bn_recp_ctx_st int flags; } BN_RECP_CTX; -#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\ - r,a,&((mont)->RR),(mont),ctx) - -#define BN_prime_checks (5) +#define BN_prime_checks 0 /* default: select number of iterations + based on the size of the number */ + +/* number of Miller-Rabin iterations for an error rate of less than 2^-80 + * for random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook + * of Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996]; + * original paper: Damgaard, Landrock, Pomerance: Average case error estimates + * for the strong probable prime test. -- Math. Comp. 61 (1993) 177-194) */ +#define BN_prime_checks_for_size(b) ((b) >= 1300 ? 2 : \ + (b) >= 850 ? 3 : \ + (b) >= 650 ? 4 : \ + (b) >= 550 ? 5 : \ + (b) >= 450 ? 6 : \ + (b) >= 400 ? 7 : \ + (b) >= 350 ? 8 : \ + (b) >= 300 ? 9 : \ + (b) >= 250 ? 12 : \ + (b) >= 200 ? 15 : \ + (b) >= 150 ? 18 : \ + /* b >= 100 */ 27) #define BN_num_bytes(a) ((BN_num_bits(a)+7)/8) -#define BN_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) -#define BN_is_zero(a) (((a)->top == 0) || BN_is_word(a,0)) -#define BN_is_one(a) (BN_is_word((a),1)) -#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1)) + +/* Note that BN_abs_is_word does not work reliably for w == 0 */ +#define BN_abs_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w))) +#define BN_is_zero(a) (((a)->top == 0) || BN_abs_is_word(a,0)) +#define BN_is_one(a) (BN_abs_is_word((a),1) && !(a)->neg) +#define BN_is_word(a,w) ((w) ? BN_abs_is_word((a),(w)) && !(a)->neg : \ + BN_is_zero((a))) +#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1)) + #define BN_one(a) (BN_set_word((a),1)) #define BN_zero(a) (BN_set_word((a),0)) /*#define BN_ascii2bn(a) BN_hex2bn(a) */ /*#define BN_bn2ascii(a) BN_bn2hex(a) */ -#define bn_expand(n,b) ((((((b+BN_BITS2-1))/BN_BITS2)) <= (n)->max)?\ - (n):bn_expand2((n),(b)/BN_BITS2+1)) -#define bn_wexpand(n,b) (((b) <= (n)->max)?(n):bn_expand2((n),(b))) - -#define bn_fix_top(a) \ - { \ - BN_ULONG *ftl; \ - if ((a)->top > 0) \ - { \ - for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ - if (*(ftl--)) break; \ - } \ - } - -BIGNUM *BN_value_one(void); +const BIGNUM *BN_value_one(void); char * BN_options(void); BN_CTX *BN_CTX_new(void); void BN_CTX_init(BN_CTX *c); void BN_CTX_free(BN_CTX *c); +void BN_CTX_start(BN_CTX *ctx); +BIGNUM *BN_CTX_get(BN_CTX *ctx); +void BN_CTX_end(BN_CTX *ctx); int BN_rand(BIGNUM *rnd, int bits, int top,int bottom); +int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom); +int BN_rand_range(BIGNUM *rnd, BIGNUM *range); +int BN_pseudo_rand_range(BIGNUM *rnd, BIGNUM *range); int BN_num_bits(const BIGNUM *a); int BN_num_bits_word(BN_ULONG); BIGNUM *BN_new(void); void BN_init(BIGNUM *); void BN_clear_free(BIGNUM *a); BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b); +void BN_swap(BIGNUM *a, BIGNUM *b); BIGNUM *BN_bin2bn(const unsigned char *s,int len,BIGNUM *ret); int BN_bn2bin(const BIGNUM *a, unsigned char *to); -BIGNUM *BN_mpi2bn(unsigned char *s,int len,BIGNUM *ret); +BIGNUM *BN_mpi2bn(const unsigned char *s,int len,BIGNUM *ret); int BN_bn2mpi(const BIGNUM *a, unsigned char *to); int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); -int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b); -int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx); +int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b); +int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); +int BN_sqr(BIGNUM *r, const BIGNUM *a,BN_CTX *ctx); + int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, - BN_CTX *ctx); -int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b,BN_CTX *ctx); -int BN_sqr(BIGNUM *r, BIGNUM *a,BN_CTX *ctx); -BN_ULONG BN_mod_word(BIGNUM *a, BN_ULONG w); + BN_CTX *ctx); +#define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx)) +int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx); +int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx); +int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m); +int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx); +int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m); +int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + const BIGNUM *m, BN_CTX *ctx); +int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx); +int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx); +int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m); +int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx); +int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m); + +BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w); BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w); int BN_mul_word(BIGNUM *a, BN_ULONG w); int BN_add_word(BIGNUM *a, BN_ULONG w); int BN_sub_word(BIGNUM *a, BN_ULONG w); int BN_set_word(BIGNUM *a, BN_ULONG w); -BN_ULONG BN_get_word(BIGNUM *a); +BN_ULONG BN_get_word(const BIGNUM *a); + int BN_cmp(const BIGNUM *a, const BIGNUM *b); void BN_free(BIGNUM *a); int BN_is_bit_set(const BIGNUM *a, int n); int BN_lshift(BIGNUM *r, const BIGNUM *a, int n); -int BN_lshift1(BIGNUM *r, BIGNUM *a); -int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p,BN_CTX *ctx); -int BN_mod_exp(BIGNUM *r, BIGNUM *a, const BIGNUM *p, - const BIGNUM *m,BN_CTX *ctx); -int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); -int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2, - BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx); -int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p, - BIGNUM *m,BN_CTX *ctx); +int BN_lshift1(BIGNUM *r, const BIGNUM *a); +int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,BN_CTX *ctx); + +int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m,BN_CTX *ctx); +int BN_mod_exp_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); +int BN_mod_exp_mont_word(BIGNUM *r, BN_ULONG a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); +int BN_mod_exp2_mont(BIGNUM *r, const BIGNUM *a1, const BIGNUM *p1, + const BIGNUM *a2, const BIGNUM *p2,const BIGNUM *m, + BN_CTX *ctx,BN_MONT_CTX *m_ctx); +int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m,BN_CTX *ctx); + int BN_mask_bits(BIGNUM *a,int n); -int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx); -#ifndef WIN16 -int BN_print_fp(FILE *fp, BIGNUM *a); +#ifndef OPENSSL_NO_FP_API +int BN_print_fp(FILE *fp, const BIGNUM *a); #endif #ifdef HEADER_BIO_H int BN_print(BIO *fp, const BIGNUM *a); #else -int BN_print(char *fp, const BIGNUM *a); +int BN_print(void *fp, const BIGNUM *a); #endif -int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx); -int BN_rshift(BIGNUM *r, BIGNUM *a, int n); -int BN_rshift1(BIGNUM *r, BIGNUM *a); +int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx); +int BN_rshift(BIGNUM *r, const BIGNUM *a, int n); +int BN_rshift1(BIGNUM *r, const BIGNUM *a); void BN_clear(BIGNUM *a); -BIGNUM *bn_expand2(BIGNUM *b, int bits); BIGNUM *BN_dup(const BIGNUM *a); int BN_ucmp(const BIGNUM *a, const BIGNUM *b); int BN_set_bit(BIGNUM *a, int n); @@ -379,26 +405,30 @@ char * BN_bn2hex(const BIGNUM *a); char * BN_bn2dec(const BIGNUM *a); int BN_hex2bn(BIGNUM **a, const char *str); int BN_dec2bn(BIGNUM **a, const char *str); -int BN_gcd(BIGNUM *r,BIGNUM *in_a,BIGNUM *in_b,BN_CTX *ctx); -BIGNUM *BN_mod_inverse(BIGNUM *ret,BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); -BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int strong,BIGNUM *add, - BIGNUM *rem,void (*callback)(int,int,void *),void *cb_arg); -int BN_is_prime(BIGNUM *p,int nchecks,void (*callback)(int,int,void *), - BN_CTX *ctx,void *cb_arg); -void ERR_load_BN_strings(void ); - -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w); -void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num); -BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); -BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); -BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num); +int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); +int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */ +BIGNUM *BN_mod_inverse(BIGNUM *ret, + const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); +BIGNUM *BN_mod_sqrt(BIGNUM *ret, + const BIGNUM *a, const BIGNUM *n,BN_CTX *ctx); +BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int safe, + const BIGNUM *add, const BIGNUM *rem, + void (*callback)(int,int,void *),void *cb_arg); +int BN_is_prime(const BIGNUM *p,int nchecks, + void (*callback)(int,int,void *), + BN_CTX *ctx,void *cb_arg); +int BN_is_prime_fasttest(const BIGNUM *p,int nchecks, + void (*callback)(int,int,void *),BN_CTX *ctx,void *cb_arg, + int do_trial_division); BN_MONT_CTX *BN_MONT_CTX_new(void ); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); -int BN_mod_mul_montgomery(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_MONT_CTX *mont, - BN_CTX *ctx); -int BN_from_montgomery(BIGNUM *r,BIGNUM *a,BN_MONT_CTX *mont,BN_CTX *ctx); +int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, + BN_MONT_CTX *mont, BN_CTX *ctx); +#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\ + (r),(a),&((mont)->RR),(mont),(ctx)) +int BN_from_montgomery(BIGNUM *r,const BIGNUM *a, + BN_MONT_CTX *mont, BN_CTX *ctx); void BN_MONT_CTX_free(BN_MONT_CTX *mont); int BN_MONT_CTX_set(BN_MONT_CTX *mont,const BIGNUM *modulus,BN_CTX *ctx); BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from); @@ -416,18 +446,55 @@ void BN_RECP_CTX_init(BN_RECP_CTX *recp); BN_RECP_CTX *BN_RECP_CTX_new(void); void BN_RECP_CTX_free(BN_RECP_CTX *recp); int BN_RECP_CTX_set(BN_RECP_CTX *recp,const BIGNUM *rdiv,BN_CTX *ctx); -int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, - BN_RECP_CTX *recp,BN_CTX *ctx); +int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, + BN_RECP_CTX *recp,BN_CTX *ctx); int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx); -int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, - BN_RECP_CTX *recp, BN_CTX *ctx); + const BIGNUM *m, BN_CTX *ctx); +int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, + BN_RECP_CTX *recp, BN_CTX *ctx); + +/* library internal functions */ + +#define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\ + (a):bn_expand2((a),(bits)/BN_BITS2+1)) +#define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words))) +BIGNUM *bn_expand2(BIGNUM *a, int words); +BIGNUM *bn_dup_expand(const BIGNUM *a, int words); + +#define bn_fix_top(a) \ + { \ + BN_ULONG *ftl; \ + if ((a)->top > 0) \ + { \ + for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ + if (*(ftl--)) break; \ + } \ + } +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); +void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); +BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); +BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num); +BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int num); + +#ifdef BN_DEBUG +void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n); +# define bn_print(a) {fprintf(stderr, #a "="); BN_print_fp(stderr,a); \ + fprintf(stderr,"\n");} +# define bn_dump(a,n) bn_dump1(stderr,#a,a,n); +#else +# define bn_print(a) +# define bn_dump(a,b) +#endif + +int BN_bntest_rand(BIGNUM *rnd, int bits, int top,int bottom); /* BEGIN ERROR CODES */ /* The following lines are auto generated by the script mkerr.pl. Any changes * made after this point may be overwritten when the script is next run. */ +void ERR_load_BN_strings(void); /* Error codes for the BN functions. */ @@ -438,30 +505,43 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, #define BN_F_BN_BLINDING_UPDATE 103 #define BN_F_BN_BN2DEC 104 #define BN_F_BN_BN2HEX 105 +#define BN_F_BN_CTX_GET 116 #define BN_F_BN_CTX_NEW 106 #define BN_F_BN_DIV 107 #define BN_F_BN_EXPAND2 108 +#define BN_F_BN_EXPAND_INTERNAL 120 +#define BN_F_BN_MOD_EXP2_MONT 118 #define BN_F_BN_MOD_EXP_MONT 109 +#define BN_F_BN_MOD_EXP_MONT_WORD 117 #define BN_F_BN_MOD_INVERSE 110 +#define BN_F_BN_MOD_LSHIFT_QUICK 119 #define BN_F_BN_MOD_MUL_RECIPROCAL 111 +#define BN_F_BN_MOD_SQRT 121 #define BN_F_BN_MPI2BN 112 #define BN_F_BN_NEW 113 #define BN_F_BN_RAND 114 +#define BN_F_BN_RAND_RANGE 122 #define BN_F_BN_USUB 115 /* Reason codes. */ #define BN_R_ARG2_LT_ARG3 100 #define BN_R_BAD_RECIPROCAL 101 +#define BN_R_BIGNUM_TOO_LONG 114 #define BN_R_CALLED_WITH_EVEN_MODULUS 102 #define BN_R_DIV_BY_ZERO 103 #define BN_R_ENCODING_ERROR 104 #define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105 +#define BN_R_INPUT_NOT_REDUCED 110 #define BN_R_INVALID_LENGTH 106 +#define BN_R_INVALID_RANGE 115 +#define BN_R_NOT_A_SQUARE 111 #define BN_R_NOT_INITIALIZED 107 #define BN_R_NO_INVERSE 108 +#define BN_R_P_IS_NOT_PRIME 112 +#define BN_R_TOO_MANY_ITERATIONS 113 +#define BN_R_TOO_MANY_TEMPORARY_VARIABLES 109 #ifdef __cplusplus } #endif #endif - diff --git a/src/lib/libcrypto/bn/bn_add.c b/src/lib/libcrypto/bn/bn_add.c index efb2e312e8..6cba07e9f6 100644 --- a/src/lib/libcrypto/bn/bn_add.c +++ b/src/lib/libcrypto/bn/bn_add.c @@ -61,76 +61,70 @@ #include "bn_lcl.h" /* r can == a or b */ -int BN_add(r, a, b) -BIGNUM *r; -BIGNUM *a; -BIGNUM *b; +int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { - int i; - BIGNUM *tmp; + const BIGNUM *tmp; + int a_neg = a->neg; + + bn_check_top(a); + bn_check_top(b); /* a + b a+b * a + -b a-b * -a + b b-a * -a + -b -(a+b) */ - if (a->neg ^ b->neg) + if (a_neg ^ b->neg) { /* only one is negative */ - if (a->neg) + if (a_neg) { tmp=a; a=b; b=tmp; } /* we are now a - b */ if (BN_ucmp(a,b) < 0) { - if (bn_wexpand(r,b->top) == NULL) return(0); - bn_qsub(r,b,a); + if (!BN_usub(r,b,a)) return(0); r->neg=1; } else { - if (bn_wexpand(r,a->top) == NULL) return(0); - bn_qsub(r,a,b); + if (!BN_usub(r,a,b)) return(0); r->neg=0; } return(1); } - if (a->neg) /* both are neg */ + if (!BN_uadd(r,a,b)) return(0); + if (a_neg) /* both are neg */ r->neg=1; else r->neg=0; - - i=(a->top > b->top); - - if (i) - { - if (bn_wexpand(r,a->top+1) == NULL) return(0); - bn_qadd(r,a,b); - } - else - { - if (bn_wexpand(r,b->top+1) == NULL) return(0); - bn_qadd(r,b,a); - } return(1); } /* unsigned add of b to a, r must be large enough */ -void bn_qadd(r,a,b) -BIGNUM *r; -BIGNUM *a; -BIGNUM *b; +int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { register int i; int max,min; BN_ULONG *ap,*bp,*rp,carry,t1; + const BIGNUM *tmp; + + bn_check_top(a); + bn_check_top(b); + if (a->top < b->top) + { tmp=a; a=b; b=tmp; } max=a->top; min=b->top; + + if (bn_wexpand(r,max+1) == NULL) + return(0); + r->top=max; + ap=a->d; bp=b->d; rp=r->d; @@ -160,8 +154,156 @@ BIGNUM *b; r->top++; } } - for (; ineg = 0; + return(1); + } + +/* unsigned subtraction of b from a, a must be larger than b. */ +int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) + { + int max,min; + register BN_ULONG t1,t2,*ap,*bp,*rp; + int i,carry; +#if defined(IRIX_CC_BUG) && !defined(LINT) + int dummy; +#endif + + bn_check_top(a); + bn_check_top(b); + + if (a->top < b->top) /* hmm... should not be happening */ + { + BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3); + return(0); + } + + max=a->top; + min=b->top; + if (bn_wexpand(r,max) == NULL) return(0); + + ap=a->d; + bp=b->d; + rp=r->d; + +#if 1 + carry=0; + for (i=0; i t2) break; + } + } +#if 0 + memcpy(rp,ap,sizeof(*rp)*(max-i)); +#else + if (rp != ap) + { + for (;;) + { + if (i++ >= max) break; + rp[0]=ap[0]; + if (i++ >= max) break; + rp[1]=ap[1]; + if (i++ >= max) break; + rp[2]=ap[2]; + if (i++ >= max) break; + rp[3]=ap[3]; + rp+=4; + ap+=4; + } + } +#endif + + r->top=max; + r->neg=0; + bn_fix_top(r); + return(1); + } + +int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) + { + int max; + int add=0,neg=0; + const BIGNUM *tmp; + + bn_check_top(a); + bn_check_top(b); + + /* a - b a-b + * a - -b a+b + * -a - b -(a+b) + * -a - -b b-a + */ + if (a->neg) + { + if (b->neg) + { tmp=a; a=b; b=tmp; } + else + { add=1; neg=1; } + } + else + { + if (b->neg) { add=1; neg=0; } + } + + if (add) + { + if (!BN_uadd(r,a,b)) return(0); + r->neg=neg; + return(1); + } + + /* We are actually doing a - b :-) */ + + max=(a->top > b->top)?a->top:b->top; + if (bn_wexpand(r,max) == NULL) return(0); + if (BN_ucmp(a,b) < 0) + { + if (!BN_usub(r,b,a)) return(0); + r->neg=1; + } + else + { + if (!BN_usub(r,a,b)) return(0); + r->neg=0; + } + return(1); } diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 4d3da16a0c..be8aa3ffc5 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c @@ -56,97 +56,95 @@ * [including the GNU Public Licence.] */ +#ifndef BN_DEBUG +# undef NDEBUG /* avoid conflicting definitions */ +# define NDEBUG +#endif + #include +#include #include "cryptlib.h" #include "bn_lcl.h" -#ifdef BN_LLONG +#if defined(BN_LLONG) || defined(BN_UMULT_HIGH) -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return(c1); - for (;;) + while (num&~3) { mul_add(rp[0],ap[0],w,c1); - if (--num == 0) break; mul_add(rp[1],ap[1],w,c1); - if (--num == 0) break; mul_add(rp[2],ap[2],w,c1); - if (--num == 0) break; mul_add(rp[3],ap[3],w,c1); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } + if (num) + { + mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; + mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; + mul_add(rp[2],ap[2],w,c1); return c1; } return(c1); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return(c1); - /* for (;;) */ - while (1) /* circumvent egcs-1.1.2 bug */ + while (num&~3) { mul(rp[0],ap[0],w,c1); - if (--num == 0) break; mul(rp[1],ap[1],w,c1); - if (--num == 0) break; mul(rp[2],ap[2],w,c1); - if (--num == 0) break; mul(rp[3],ap[3],w,c1); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } + if (num) + { + mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; + mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; + mul(rp[2],ap[2],w,c1); } return(c1); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { - bn_check_num(n); + assert(n >= 0); if (n <= 0) return; - for (;;) + while (n&~3) { - BN_ULLONG t; - - t=(BN_ULLONG)(a[0])*(a[0]); - r[0]=Lw(t); r[1]=Hw(t); - if (--n == 0) break; - - t=(BN_ULLONG)(a[1])*(a[1]); - r[2]=Lw(t); r[3]=Hw(t); - if (--n == 0) break; - - t=(BN_ULLONG)(a[2])*(a[2]); - r[4]=Lw(t); r[5]=Hw(t); - if (--n == 0) break; - - t=(BN_ULLONG)(a[3])*(a[3]); - r[6]=Lw(t); r[7]=Hw(t); - if (--n == 0) break; - - a+=4; - r+=8; + sqr(r[0],r[1],a[0]); + sqr(r[2],r[3],a[1]); + sqr(r[4],r[5],a[2]); + sqr(r[6],r[7],a[3]); + a+=4; r+=8; n-=4; + } + if (n) + { + sqr(r[0],r[1],a[0]); if (--n == 0) return; + sqr(r[2],r[3],a[1]); if (--n == 0) return; + sqr(r[4],r[5],a[2]); } } -#else +#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c=0; BN_ULONG bl,bh; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return((BN_ULONG)0); bl=LBITS(w); @@ -168,12 +166,12 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG carry=0; BN_ULONG bl,bh; - bn_check_num(num); + assert(num >= 0); if (num <= 0) return((BN_ULONG)0); bl=LBITS(w); @@ -195,9 +193,9 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(carry); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { - bn_check_num(n); + assert(n >= 0); if (n <= 0) return; for (;;) { @@ -218,7 +216,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) } } -#endif +#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ #if defined(BN_LLONG) && defined(BN_DIV2W) @@ -229,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) #else -/* Divide h-l by d and return the result. */ +/* Divide h,l by d and return the result. */ /* I need to test this some more :-( */ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { @@ -239,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) if (d == 0) return(BN_MASK2); i=BN_num_bits_word(d); - if ((i != BN_BITS2) && (h > (BN_ULONG)1< (BN_ULONG)1<= d) h-=d; @@ -300,14 +293,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) ret|=q; return(ret); } -#endif +#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ #ifdef BN_LLONG -BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULLONG ll=0; - bn_check_num(n); + assert(n >= 0); if (n <= 0) return((BN_ULONG)0); for (;;) @@ -338,12 +331,12 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) } return((BN_ULONG)ll); } -#else -BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +#else /* !BN_LLONG */ +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG c,l,t; - bn_check_num(n); + assert(n >= 0); if (n <= 0) return((BN_ULONG)0); c=0; @@ -387,14 +380,14 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) } return((BN_ULONG)c); } -#endif +#endif /* !BN_LLONG */ -BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) { BN_ULONG t1,t2; int c=0; - bn_check_num(n); + assert(n >= 0); if (n <= 0) return((BN_ULONG)0); for (;;) @@ -433,6 +426,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) #undef bn_sqr_comba8 #undef bn_sqr_comba4 +/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ +/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ +/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ +/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ + #ifdef BN_LLONG #define mul_add_c(a,b,c0,c1,c2) \ t=(BN_ULLONG)a*b; \ @@ -460,7 +458,39 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) #define sqr_add_c2(a,i,j,c0,c1,c2) \ mul_add_c2((a)[i],(a)[j],c0,c1,c2) -#else + +#elif defined(BN_UMULT_HIGH) + +#define mul_add_c(a,b,c0,c1,c2) { \ + BN_ULONG ta=(a),tb=(b); \ + t1 = ta * tb; \ + t2 = BN_UMULT_HIGH(ta,tb); \ + c0 += t1; t2 += (c0A=BN_new()) == NULL) goto err; if ((ret->Ai=BN_new()) == NULL) goto err; @@ -78,26 +81,26 @@ BIGNUM *mod; return(ret); err: if (ret != NULL) BN_BLINDING_free(ret); - return(ret); + return(NULL); } -void BN_BLINDING_free(r) -BN_BLINDING *r; +void BN_BLINDING_free(BN_BLINDING *r) { + if(r == NULL) + return; + if (r->A != NULL) BN_free(r->A ); if (r->Ai != NULL) BN_free(r->Ai); - Free(r); + OPENSSL_free(r); } -int BN_BLINDING_update(b,ctx) -BN_BLINDING *b; -BN_CTX *ctx; +int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) { int ret=0; if ((b->A == NULL) || (b->Ai == NULL)) { - BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITALISED); + BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED); goto err; } @@ -109,28 +112,26 @@ err: return(ret); } -int BN_BLINDING_convert(n,b,ctx) -BIGNUM *n; -BN_BLINDING *b; -BN_CTX *ctx; +int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx) { + bn_check_top(n); + if ((b->A == NULL) || (b->Ai == NULL)) { - BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITALISED); + BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITIALIZED); return(0); } return(BN_mod_mul(n,n,b->A,b->mod,ctx)); } -int BN_BLINDING_invert(n,b,ctx) -BIGNUM *n; -BN_BLINDING *b; -BN_CTX *ctx; +int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx) { int ret; + + bn_check_top(n); if ((b->A == NULL) || (b->Ai == NULL)) { - BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITALISED); + BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITIALIZED); return(0); } if ((ret=BN_mod_mul(n,n,b->Ai,b->mod,ctx)) >= 0) diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c index 46132fd180..7daf19eb84 100644 --- a/src/lib/libcrypto/bn/bn_ctx.c +++ b/src/lib/libcrypto/bn/bn_ctx.c @@ -61,15 +61,16 @@ #include #include + #include "cryptlib.h" -#include +#include "bn_lcl.h" BN_CTX *BN_CTX_new(void) { BN_CTX *ret; - ret=(BN_CTX *)Malloc(sizeof(BN_CTX)); + ret=(BN_CTX *)OPENSSL_malloc(sizeof(BN_CTX)); if (ret == NULL) { BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); @@ -83,6 +84,7 @@ BN_CTX *BN_CTX_new(void) void BN_CTX_init(BN_CTX *ctx) { +#if 0 /* explicit version */ int i; ctx->tos = 0; ctx->flags = 0; @@ -90,6 +92,9 @@ void BN_CTX_init(BN_CTX *ctx) ctx->too_many = 0; for (i = 0; i < BN_CTX_NUM; i++) BN_init(&(ctx->bn[i])); +#else + memset(ctx, 0, sizeof *ctx); +#endif } void BN_CTX_free(BN_CTX *ctx) @@ -102,7 +107,7 @@ void BN_CTX_free(BN_CTX *ctx) for (i=0; i < BN_CTX_NUM; i++) BN_clear_free(&(ctx->bn[i])); if (ctx->flags & BN_FLG_MALLOCED) - Free(ctx); + OPENSSL_free(ctx); } void BN_CTX_start(BN_CTX *ctx) @@ -112,8 +117,14 @@ void BN_CTX_start(BN_CTX *ctx) ctx->depth++; } + BIGNUM *BN_CTX_get(BN_CTX *ctx) { + /* Note: If BN_CTX_get is ever changed to allocate BIGNUMs dynamically, + * make sure that if BN_CTX_get fails once it will return NULL again + * until BN_CTX_end is called. (This is so that callers have to check + * only the last return value.) + */ if (ctx->depth > BN_CTX_NUM_POS || ctx->tos >= BN_CTX_NUM) { if (!ctx->too_many) diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index 2263bdc7da..f9a095e3b3 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c @@ -57,21 +57,22 @@ */ #include +#include #include "cryptlib.h" #include "bn_lcl.h" + /* The old slow way */ #if 0 -int BN_div(dv, rem, m, d,ctx) -BIGNUM *dv; -BIGNUM *rem; -BIGNUM *m; -BIGNUM *d; -BN_CTX *ctx; +int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, + BN_CTX *ctx) { int i,nm,nd; + int ret = 0; BIGNUM *D; + bn_check_top(m); + bn_check_top(d); if (BN_is_zero(d)) { BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); @@ -86,45 +87,83 @@ BN_CTX *ctx; return(1); } - D=ctx->bn[ctx->tos]; - if (dv == NULL) dv=ctx->bn[ctx->tos+1]; - if (rem == NULL) rem=ctx->bn[ctx->tos+2]; + BN_CTX_start(ctx); + D = BN_CTX_get(ctx); + if (dv == NULL) dv = BN_CTX_get(ctx); + if (rem == NULL) rem = BN_CTX_get(ctx); + if (D == NULL || dv == NULL || rem == NULL) + goto end; nd=BN_num_bits(d); nm=BN_num_bits(m); - if (BN_copy(D,d) == NULL) return(0); - if (BN_copy(rem,m) == NULL) return(0); + if (BN_copy(D,d) == NULL) goto end; + if (BN_copy(rem,m) == NULL) goto end; /* The next 2 are needed so we can do a dv->d[0]|=1 later * since BN_lshift1 will only work once there is a value :-) */ BN_zero(dv); + bn_wexpand(dv,1); dv->top=1; - if (!BN_lshift(D,D,nm-nd)) return(0); + if (!BN_lshift(D,D,nm-nd)) goto end; for (i=nm-nd; i>=0; i--) { - if (!BN_lshift1(dv,dv)) return(0); + if (!BN_lshift1(dv,dv)) goto end; if (BN_ucmp(rem,D) >= 0) { dv->d[0]|=1; - bn_qsub(rem,rem,D); + if (!BN_usub(rem,rem,D)) goto end; } /* CAN IMPROVE (and have now :=) */ - if (!BN_rshift1(D,D)) return(0); + if (!BN_rshift1(D,D)) goto end; } rem->neg=BN_is_zero(rem)?0:m->neg; dv->neg=m->neg^d->neg; - return(1); + ret = 1; + end: + BN_CTX_end(ctx); + return(ret); } #else -int BN_div(dv, rm, num, divisor,ctx) -BIGNUM *dv; -BIGNUM *rm; -BIGNUM *num; -BIGNUM *divisor; -BN_CTX *ctx; +#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \ + && !defined(PEDANTIC) && !defined(BN_DIV3W) +# if defined(__GNUC__) && __GNUC__>=2 +# if defined(__i386) || defined (__i386__) + /* + * There were two reasons for implementing this template: + * - GNU C generates a call to a function (__udivdi3 to be exact) + * in reply to ((((BN_ULLONG)n0)< + */ +# define bn_div_words(n0,n1,d0) \ + ({ asm volatile ( \ + "divl %4" \ + : "=a"(q), "=d"(rem) \ + : "a"(n1), "d"(n0), "g"(d0) \ + : "cc"); \ + q; \ + }) +# define REMAINDER_IS_ALREADY_CALCULATED +# endif /* __ */ +# endif /* __GNUC__ */ +#endif /* OPENSSL_NO_ASM */ + + +/* BN_div computes dv := num / divisor, rounding towards zero, and sets up + * rm such that dv*divisor + rm = num holds. + * Thus: + * dv->neg == num->neg ^ divisor->neg (unless the result is zero) + * rm->neg == num->neg (unless the remainder is zero) + * If 'dv' or 'rm' is NULL, the respective value is not returned. + */ +int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, + BN_CTX *ctx) { int norm_shift,i,j,loop; BIGNUM *tmp,wnum,*snum,*sdiv,*res; @@ -132,6 +171,9 @@ BN_CTX *ctx; BN_ULONG d0,d1; int num_n,div_n; + bn_check_top(num); + bn_check_top(divisor); + if (BN_is_zero(divisor)) { BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO); @@ -146,20 +188,22 @@ BN_CTX *ctx; return(1); } - tmp=ctx->bn[ctx->tos]; - tmp->neg=0; - snum=ctx->bn[ctx->tos+1]; - sdiv=ctx->bn[ctx->tos+2]; + BN_CTX_start(ctx); + tmp=BN_CTX_get(ctx); + snum=BN_CTX_get(ctx); + sdiv=BN_CTX_get(ctx); if (dv == NULL) - res=ctx->bn[ctx->tos+3]; + res=BN_CTX_get(ctx); else res=dv; + if (sdiv == NULL || res == NULL) goto err; + tmp->neg=0; /* First we normalise the numbers */ norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); - BN_lshift(sdiv,divisor,norm_shift); + if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err; sdiv->neg=0; norm_shift+=BN_BITS2; - BN_lshift(snum,num,norm_shift); + if (!(BN_lshift(snum,num,norm_shift))) goto err; snum->neg=0; div_n=sdiv->top; num_n=snum->top; @@ -168,10 +212,10 @@ BN_CTX *ctx; /* Lets setup a 'window' into snum * This is the part that corresponds to the current * 'area' being divided */ + BN_init(&wnum); wnum.d= &(snum->d[loop]); wnum.top= div_n; - wnum.max= snum->max; /* a bit of a lie */ - wnum.neg= 0; + wnum.dmax= snum->dmax+1; /* a bit of a lie */ /* Get the top 2 words of sdiv */ /* i=sdiv->top; */ @@ -183,8 +227,8 @@ BN_CTX *ctx; /* Setup to 'res' */ res->neg= (num->neg^divisor->neg); - res->top=loop; if (!bn_wexpand(res,(loop+1))) goto err; + res->top=loop; resp= &(res->d[loop-1]); /* space for temp */ @@ -192,74 +236,98 @@ BN_CTX *ctx; if (BN_ucmp(&wnum,sdiv) >= 0) { - bn_qsub(&wnum,&wnum,sdiv); + if (!BN_usub(&wnum,&wnum,sdiv)) goto err; *resp=1; res->d[res->top-1]=1; } else res->top--; + if (res->top == 0) + res->neg = 0; resp--; for (i=0; i>BN_BITS2) || - (t2 <= ((BN_ULLONG)(rem< t1l) t3h++; - t3h=(t1h-t3h)&BN_MASK2; - - /*if ((t3>>BN_BITS2) || - (t2 <= ((t3<d,sdiv->d,div_n,q); + wnum.d--; wnum.top++; tmp->d[div_n]=l0; for (j=div_n+1; j>0; j--) if (tmp->d[j-1]) break; tmp->top=j; j=wnum.top; - BN_sub(&wnum,&wnum,tmp); + if (!BN_sub(&wnum,&wnum,tmp)) goto err; snum->top=snum->top+wnum.top-j; @@ -267,7 +335,7 @@ BN_CTX *ctx; { q--; j=wnum.top; - BN_add(&wnum,&wnum,sdiv); + if (!BN_add(&wnum,&wnum,sdiv)) goto err; snum->top+=wnum.top-j; } *(resp--)=q; @@ -275,11 +343,18 @@ BN_CTX *ctx; } if (rm != NULL) { + /* Keep a copy of the neg flag in num because if rm==num + * BN_rshift() will overwrite it. + */ + int neg = num->neg; BN_rshift(rm,snum,norm_shift); - rm->neg=num->neg; + if (!BN_is_zero(rm)) + rm->neg = neg; } + BN_CTX_end(ctx); return(1); err: + BN_CTX_end(ctx); return(0); } diff --git a/src/lib/libcrypto/bn/bn_err.c b/src/lib/libcrypto/bn/bn_err.c index 029ae810d5..fb84ee96d8 100644 --- a/src/lib/libcrypto/bn/bn_err.c +++ b/src/lib/libcrypto/bn/bn_err.c @@ -1,66 +1,69 @@ -/* lib/bn/bn_err.c */ -/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) - * All rights reserved. +/* crypto/bn/bn_err.c */ +/* ==================================================================== + * Copyright (c) 1999 The OpenSSL Project. All rights reserved. * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * */ + +/* NOTE: this file was auto generated by the mkerr.pl script: any changes + * made to it will be overwritten when the script next updates this file, + * only reason strings will be preserved. + */ + #include -#include "err.h" -#include "bn.h" +#include +#include /* BEGIN ERROR CODES */ -#ifndef NO_ERR +#ifndef OPENSSL_NO_ERR static ERR_STRING_DATA BN_str_functs[]= { {ERR_PACK(0,BN_F_BN_BLINDING_CONVERT,0), "BN_BLINDING_convert"}, @@ -69,40 +72,57 @@ static ERR_STRING_DATA BN_str_functs[]= {ERR_PACK(0,BN_F_BN_BLINDING_UPDATE,0), "BN_BLINDING_update"}, {ERR_PACK(0,BN_F_BN_BN2DEC,0), "BN_bn2dec"}, {ERR_PACK(0,BN_F_BN_BN2HEX,0), "BN_bn2hex"}, +{ERR_PACK(0,BN_F_BN_CTX_GET,0), "BN_CTX_get"}, {ERR_PACK(0,BN_F_BN_CTX_NEW,0), "BN_CTX_new"}, {ERR_PACK(0,BN_F_BN_DIV,0), "BN_div"}, {ERR_PACK(0,BN_F_BN_EXPAND2,0), "bn_expand2"}, +{ERR_PACK(0,BN_F_BN_EXPAND_INTERNAL,0), "BN_EXPAND_INTERNAL"}, +{ERR_PACK(0,BN_F_BN_MOD_EXP2_MONT,0), "BN_mod_exp2_mont"}, {ERR_PACK(0,BN_F_BN_MOD_EXP_MONT,0), "BN_mod_exp_mont"}, +{ERR_PACK(0,BN_F_BN_MOD_EXP_MONT_WORD,0), "BN_mod_exp_mont_word"}, {ERR_PACK(0,BN_F_BN_MOD_INVERSE,0), "BN_mod_inverse"}, +{ERR_PACK(0,BN_F_BN_MOD_LSHIFT_QUICK,0), "BN_mod_lshift_quick"}, {ERR_PACK(0,BN_F_BN_MOD_MUL_RECIPROCAL,0), "BN_mod_mul_reciprocal"}, +{ERR_PACK(0,BN_F_BN_MOD_SQRT,0), "BN_mod_sqrt"}, {ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"}, {ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"}, {ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"}, -{0,NULL}, +{ERR_PACK(0,BN_F_BN_RAND_RANGE,0), "BN_rand_range"}, +{ERR_PACK(0,BN_F_BN_USUB,0), "BN_usub"}, +{0,NULL} }; static ERR_STRING_DATA BN_str_reasons[]= { +{BN_R_ARG2_LT_ARG3 ,"arg2 lt arg3"}, {BN_R_BAD_RECIPROCAL ,"bad reciprocal"}, +{BN_R_BIGNUM_TOO_LONG ,"bignum too long"}, {BN_R_CALLED_WITH_EVEN_MODULUS ,"called with even modulus"}, {BN_R_DIV_BY_ZERO ,"div by zero"}, {BN_R_ENCODING_ERROR ,"encoding error"}, +{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"}, +{BN_R_INPUT_NOT_REDUCED ,"input not reduced"}, {BN_R_INVALID_LENGTH ,"invalid length"}, -{BN_R_NOT_INITALISED ,"not initalised"}, +{BN_R_INVALID_RANGE ,"invalid range"}, +{BN_R_NOT_A_SQUARE ,"not a square"}, +{BN_R_NOT_INITIALIZED ,"not initialized"}, {BN_R_NO_INVERSE ,"no inverse"}, -{0,NULL}, +{BN_R_P_IS_NOT_PRIME ,"p is not prime"}, +{BN_R_TOO_MANY_ITERATIONS ,"too many iterations"}, +{BN_R_TOO_MANY_TEMPORARY_VARIABLES ,"too many temporary variables"}, +{0,NULL} }; #endif -void ERR_load_BN_strings() +void ERR_load_BN_strings(void) { static int init=1; - if (init); - {; + if (init) + { init=0; -#ifndef NO_ERR +#ifndef OPENSSL_NO_ERR ERR_load_strings(ERR_LIB_BN,BN_str_functs); ERR_load_strings(ERR_LIB_BN,BN_str_reasons); #endif diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index c056a5083f..afdfd580fb 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c @@ -55,112 +55,145 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + -#include #include "cryptlib.h" #include "bn_lcl.h" -/* slow but works */ -int BN_mod_mul(ret, a, b, m, ctx) -BIGNUM *ret; -BIGNUM *a; -BIGNUM *b; -BIGNUM *m; -BN_CTX *ctx; - { - BIGNUM *t; - int r=0; - - t=ctx->bn[ctx->tos++]; - if (a == b) - { if (!BN_sqr(t,a,ctx)) goto err; } - else - { if (!BN_mul(t,a,b)) goto err; } - if (!BN_mod(ret,t,m,ctx)) goto err; - r=1; -err: - ctx->tos--; - return(r); - } +#define TABLE_SIZE 32 -#if 0 /* this one works - simple but works */ -int BN_mod_exp(r,a,p,m,ctx) -BIGNUM *r,*a,*p,*m; -BN_CTX *ctx; +int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int i,bits,ret=0; - BIGNUM *v,*tmp; + BIGNUM *v,*rr; - v=ctx->bn[ctx->tos++]; - tmp=ctx->bn[ctx->tos++]; + BN_CTX_start(ctx); + if ((r == a) || (r == p)) + rr = BN_CTX_get(ctx); + else + rr = r; + if ((v = BN_CTX_get(ctx)) == NULL) goto err; if (BN_copy(v,a) == NULL) goto err; bits=BN_num_bits(p); if (BN_is_odd(p)) - { if (BN_copy(r,a) == NULL) goto err; } - else { if (BN_one(r)) goto err; } + { if (BN_copy(rr,a) == NULL) goto err; } + else { if (!BN_one(rr)) goto err; } for (i=1; itos-=2; + if (r != rr) BN_copy(r,rr); + BN_CTX_end(ctx); return(ret); } -#endif - -/* this one works - simple but works */ -int BN_exp(r,a,p,ctx) -BIGNUM *r,*a,*p; -BN_CTX *ctx; - { - int i,bits,ret=0; - BIGNUM *v,*tmp; - - v=ctx->bn[ctx->tos++]; - tmp=ctx->bn[ctx->tos++]; - - if (BN_copy(v,a) == NULL) goto err; - bits=BN_num_bits(p); - - if (BN_is_odd(p)) - { if (BN_copy(r,a) == NULL) goto err; } - else { if (BN_one(r)) goto err; } - - for (i=1; itos-=2; - return(ret); - } -int BN_mod_exp(r,a,p,m,ctx) -BIGNUM *r; -BIGNUM *a; -BIGNUM *p; -BIGNUM *m; -BN_CTX *ctx; +int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, + BN_CTX *ctx) { int ret; + bn_check_top(a); + bn_check_top(p); + bn_check_top(m); + + /* For even modulus m = 2^k*m_odd, it might make sense to compute + * a^p mod m_odd and a^p mod 2^k separately (with Montgomery + * exponentiation for the odd part), using appropriate exponent + * reductions, and combine the results using the CRT. + * + * For now, we use Montgomery only if the modulus is odd; otherwise, + * exponentiation using the reciprocal-based quick remaindering + * algorithm is used. + * + * (Timing obtained with expspeed.c [computations a^p mod m + * where a, p, m are of the same length: 256, 512, 1024, 2048, + * 4096, 8192 bits], compared to the running time of the + * standard algorithm: + * + * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] + * 55 .. 77 % [UltraSparc processor, but + * debug-solaris-sparcv8-gcc conf.] + * + * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] + * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] + * + * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont + * at 2048 and more bits, but at 512 and 1024 bits, it was + * slower even than the standard algorithm! + * + * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] + * should be obtained when the new Montgomery reduction code + * has been integrated into OpenSSL.) + */ + +#define MONT_MUL_MOD +#define MONT_EXP_WORD +#define RECP_MUL_MOD + #ifdef MONT_MUL_MOD /* I have finally been able to take out this pre-condition of * the top bit being set. It was caused by an error in BN_div @@ -169,7 +202,17 @@ BN_CTX *ctx; /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ if (BN_is_odd(m)) - { ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); } + { +# ifdef MONT_EXP_WORD + if (a->top == 1 && !a->neg) + { + BN_ULONG A = a->d[0]; + ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); + } + else +# endif + ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); + } else #endif #ifdef RECP_MUL_MOD @@ -181,55 +224,65 @@ BN_CTX *ctx; return(ret); } -/* #ifdef RECP_MUL_MOD */ -int BN_mod_exp_recp(r,a,p,m,ctx) -BIGNUM *r; -BIGNUM *a; -BIGNUM *p; -BIGNUM *m; -BN_CTX *ctx; + +int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx) { - int nb,i,j,bits,ret=0,wstart,wend,window,wvalue; - int start=1; - BIGNUM *d,*aa; - BIGNUM *val[16]; + int i,j,bits,ret=0,wstart,wend,window,wvalue; + int start=1,ts=0; + BIGNUM *aa; + BIGNUM val[TABLE_SIZE]; + BN_RECP_CTX recp; - d=ctx->bn[ctx->tos++]; - aa=ctx->bn[ctx->tos++]; bits=BN_num_bits(p); if (bits == 0) { - BN_one(r); - return(1); + ret = BN_one(r); + return ret; + } + + BN_CTX_start(ctx); + if ((aa = BN_CTX_get(ctx)) == NULL) goto err; + + BN_RECP_CTX_init(&recp); + if (m->neg) + { + /* ignore sign of 'm' */ + if (!BN_copy(aa, m)) goto err; + aa->neg = 0; + if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err; } - nb=BN_reciprocal(d,m,ctx); - if (nb == -1) goto err; - - val[0]=BN_new(); - if (!BN_mod(val[0],a,m,ctx)) goto err; /* 1 */ - if (!BN_mod_mul_reciprocal(aa,val[0],val[0],m,d,nb,ctx)) - goto err; /* 2 */ - - if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits >= 256) - window=5; /* max size of window */ - else if (bits >= 128) - window=4; else - window=3; + { + if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err; + } + + BN_init(&(val[0])); + ts=1; - j=1<<(window-1); - for (i=1; i 1) + { + if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx)) + goto err; /* 2 */ + j=1<<(window-1); + for (i=1; i>1],m,d,nb,ctx)) + if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx)) goto err; /* move the 'window' down further */ @@ -290,84 +343,86 @@ BN_CTX *ctx; } ret=1; err: - ctx->tos-=2; - for (i=0; i<16; i++) - if (val[i] != NULL) BN_clear_free(val[i]); + BN_CTX_end(ctx); + for (i=0; id[0] & 1)) { BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); return(0); } - d=ctx->bn[ctx->tos++]; bits=BN_num_bits(p); if (bits == 0) { - BN_one(r); - return(1); + ret = BN_one(rr); + return ret; } + BN_CTX_start(ctx); + d = BN_CTX_get(ctx); + r = BN_CTX_get(ctx); + if (d == NULL || r == NULL) goto err; + /* If this is not done, things will break in the montgomery * part */ -#if 1 if (in_mont != NULL) mont=in_mont; else -#endif { if ((mont=BN_MONT_CTX_new()) == NULL) goto err; if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; } - val[0]=BN_new(); - if (BN_ucmp(a,m) >= 0) + BN_init(&val[0]); + ts=1; + if (a->neg || BN_ucmp(a,m) >= 0) { - BN_mod(val[0],a,m,ctx); - aa=val[0]; + if (!BN_nnmod(&(val[0]),a,m,ctx)) + goto err; + aa= &(val[0]); } else aa=a; - if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */ - if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ - - if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits > 250) - window=5; /* max size of window */ - else if (bits >= 120) - window=4; - else - window=3; + if (BN_is_zero(aa)) + { + ret = BN_zero(rr); + goto err; + } + if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */ - j=1<<(window-1); - for (i=1; i 1) { - val[i]=BN_new(); - if (!BN_mod_mul_montgomery(val[i],val[i-1],d,mont,ctx)) - goto err; + if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */ + j=1<<(window-1); + for (i=1; i>1],mont,ctx)) + if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx)) goto err; /* move the 'window' down further */ @@ -428,62 +483,201 @@ BN_MONT_CTX *in_mont; start=0; if (wstart < 0) break; } - BN_from_montgomery(r,r,mont,ctx); + if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; ret=1; err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); - ctx->tos--; - for (i=0; itop == 0 || !(m->d[0] & 1)) + { + BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); + return(0); + } + if (m->top == 1) + a %= m->d[0]; /* make sure that 'a' is reduced */ + + bits = BN_num_bits(p); + if (bits == 0) + { + ret = BN_one(rr); + return ret; + } + if (a == 0) + { + ret = BN_zero(rr); + return ret; + } + + BN_CTX_start(ctx); + d = BN_CTX_get(ctx); + r = BN_CTX_get(ctx); + t = BN_CTX_get(ctx); + if (d == NULL || r == NULL || t == NULL) goto err; + + if (in_mont != NULL) + mont=in_mont; + else + { + if ((mont = BN_MONT_CTX_new()) == NULL) goto err; + if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; + } + + r_is_one = 1; /* except for Montgomery factor */ + + /* bits-1 >= 0 */ + + /* The result is accumulated in the product r*w. */ + w = a; /* bit 'bits-1' of 'p' is always set */ + for (b = bits-2; b >= 0; b--) + { + /* First, square r*w. */ + next_w = w*w; + if ((next_w/w) != w) /* overflow */ + { + if (r_is_one) + { + if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; + r_is_one = 0; + } + else + { + if (!BN_MOD_MUL_WORD(r, w, m)) goto err; + } + next_w = 1; + } + w = next_w; + if (!r_is_one) + { + if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; + } + + /* Second, multiply r*w by 'a' if exponent bit is set. */ + if (BN_is_bit_set(p, b)) + { + next_w = w*a; + if ((next_w/a) != w) /* overflow */ + { + if (r_is_one) + { + if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; + r_is_one = 0; + } + else + { + if (!BN_MOD_MUL_WORD(r, w, m)) goto err; + } + next_w = a; + } + w = next_w; + } + } + + /* Finally, set r:=r*w. */ + if (w != 1) + { + if (r_is_one) + { + if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; + r_is_one = 0; + } + else + { + if (!BN_MOD_MUL_WORD(r, w, m)) goto err; + } + } + + if (r_is_one) /* can happen only if a == 1*/ + { + if (!BN_one(rr)) goto err; + } + else + { + if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; + } + ret = 1; +err: + if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); + BN_CTX_end(ctx); + return(ret); + } + /* The old fallback, simple version :-) */ -int BN_mod_exp_simple(r,a,p,m,ctx) -BIGNUM *r; -BIGNUM *a; -BIGNUM *p; -BIGNUM *m; -BN_CTX *ctx; +int BN_mod_exp_simple(BIGNUM *r, + const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, + BN_CTX *ctx) { - int i,j,bits,ret=0,wstart,wend,window,wvalue; + int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0; int start=1; BIGNUM *d; - BIGNUM *val[16]; + BIGNUM val[TABLE_SIZE]; - d=ctx->bn[ctx->tos++]; bits=BN_num_bits(p); if (bits == 0) { - BN_one(r); - return(1); + ret = BN_one(r); + return ret; } - val[0]=BN_new(); - if (!BN_mod(val[0],a,m,ctx)) goto err; /* 1 */ - if (!BN_mod_mul(d,val[0],val[0],m,ctx)) - goto err; /* 2 */ - - if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits >= 256) - window=5; /* max size of window */ - else if (bits >= 128) - window=4; - else - window=3; + BN_CTX_start(ctx); + if ((d = BN_CTX_get(ctx)) == NULL) goto err; - j=1<<(window-1); - for (i=1; i 1) + { + if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx)) + goto err; /* 2 */ + j=1<<(window-1); + for (i=1; i>1],m,ctx)) + if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx)) goto err; /* move the 'window' down further */ @@ -545,9 +739,9 @@ BN_CTX *ctx; } ret=1; err: - ctx->tos--; - for (i=0; i<16; i++) - if (val[i] != NULL) BN_clear_free(val[i]); + BN_CTX_end(ctx); + for (i=0; i #include "cryptlib.h" #include "bn_lcl.h" -/* I've done some timing with different table sizes. - * The main hassle is that even with bits set at 3, this requires - * 63 BIGNUMs to store the pre-calculated values. - * 512 1024 - * bits=1 75.4% 79.4% - * bits=2 61.2% 62.4% - * bits=3 61.3% 59.3% - * The lack of speed improvment is also a function of the pre-calculation - * which could be removed. - */ -#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */ -#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */ +#define TABLE_SIZE 32 -int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, - BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) +int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, + const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, + BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue; - int start=1,ts=0,x,y; - BIGNUM *d,*aa1,*aa2,*r; - BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE]; + int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2; + int r_is_one=1,ts1=0,ts2=0; + BIGNUM *d,*r; + const BIGNUM *a_mod_m; + BIGNUM val1[TABLE_SIZE], val2[TABLE_SIZE]; BN_MONT_CTX *mont=NULL; bn_check_top(a1); @@ -32,22 +134,23 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, if (!(m->d[0] & 1)) { - BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); + BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); return(0); } - d= &(ctx->bn[ctx->tos++]); - r= &(ctx->bn[ctx->tos++]); bits1=BN_num_bits(p1); bits2=BN_num_bits(p2); if ((bits1 == 0) && (bits2 == 0)) { - BN_one(r); - return(1); + ret = BN_one(rr); + return ret; } + bits=(bits1 > bits2)?bits1:bits2; - /* If this is not done, things will break in the montgomery - * part */ + BN_CTX_start(ctx); + d = BN_CTX_get(ctx); + r = BN_CTX_get(ctx); + if (d == NULL || r == NULL) goto err; if (in_mont != NULL) mont=in_mont; @@ -57,139 +160,154 @@ int BN_mod_exp2_mont(BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, BIGNUM *a2, if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; } - BN_init(&(val[0][0])); - BN_init(&(val[1][1])); - BN_init(&(val[0][1])); - BN_init(&(val[1][0])); - ts=1; - if (BN_ucmp(a1,m) >= 0) + window1 = BN_window_bits_for_exponent_size(bits1); + window2 = BN_window_bits_for_exponent_size(bits2); + + /* + * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) + */ + BN_init(&val1[0]); + ts1=1; + if (a1->neg || BN_ucmp(a1,m) >= 0) { - BN_mod(&(val[1][0]),a1,m,ctx); - aa1= &(val[1][0]); + if (!BN_mod(&(val1[0]),a1,m,ctx)) + goto err; + a_mod_m = &(val1[0]); } else - aa1=a1; - if (BN_ucmp(a2,m) >= 0) + a_mod_m = a1; + if (BN_is_zero(a_mod_m)) { - BN_mod(&(val[0][1]),a2,m,ctx); - aa2= &(val[0][1]); - } - else - aa2=a2; - if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err; - if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err; - if (!BN_mod_mul_montgomery(&(val[1][1]), - &(val[1][0]),&(val[0][1]),mont,ctx)) + ret = BN_zero(rr); goto err; + } -#if 0 - if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ - window=1; - else if (bits > 250) - window=5; /* max size of window */ - else if (bits >= 120) - window=4; - else - window=3; -#else - window=EXP2_TABLE_BITS; -#endif - - k=1< 1) { - if (x >= 2) + if (!BN_mod_mul_montgomery(d,&(val1[0]),&(val1[0]),mont,ctx)) goto err; + + j=1<<(window1-1); + for (i=1; ineg || BN_ucmp(a2,m) >= 0) + { + if (!BN_mod(&(val2[0]),a2,m,ctx)) + goto err; + a_mod_m = &(val2[0]); + } + else + a_mod_m = a2; + if (BN_is_zero(a_mod_m)) + { + ret = BN_zero(rr); + goto err; + } + if (!BN_to_montgomery(&(val2[0]),a_mod_m,mont,ctx)) goto err; + if (window2 > 1) + { + if (!BN_mod_mul_montgomery(d,&(val2[0]),&(val2[0]),mont,ctx)) goto err; + + j=1<<(window2-1); + for (i=1; i 0, the bottom bit of the first window */ + wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */ + + if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; + for (b=bits-1; b>=0; b--) { - xvalue=BN_is_bit_set(p1,wstart); - yvalue=BN_is_bit_set(p2,wstart); - if (!(xvalue || yvalue)) + if (!r_is_one) { - if (!start) + if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) + goto err; + } + + if (!wvalue1) + if (BN_is_bit_set(p1, b)) { - if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) - goto err; + /* consider bits b-window1+1 .. b for this window */ + i = b-window1+1; + while (!BN_is_bit_set(p1, i)) /* works for i<0 */ + i++; + wpos1 = i; + wvalue1 = 1; + for (i = b-1; i >= wpos1; i--) + { + wvalue1 <<= 1; + if (BN_is_bit_set(p1, i)) + wvalue1++; + } } - wstart--; - if (wstart < 0) break; - continue; - } - /* We now have wstart on a 'set' bit, we now need to work out - * how bit a window to do. To do this we need to scan - * forward until the last set bit before the end of the - * window */ - j=wstart; - /* xvalue=BN_is_bit_set(p1,wstart); already set */ - /* yvalue=BN_is_bit_set(p1,wstart); already set */ - wend=0; - for (i=1; i= wpos2; i--) + { + wvalue2 <<= 1; + if (BN_is_bit_set(p2, i)) + wvalue2++; + } } + + if (wvalue1 && b == wpos1) + { + /* wvalue1 is odd and < 2^window1 */ + if (!BN_mod_mul_montgomery(r,r,&(val1[wvalue1>>1]),mont,ctx)) + goto err; + wvalue1 = 0; + r_is_one = 0; + } - /* wvalue will be an odd number < 2^window */ - if (xvalue || yvalue) + if (wvalue2 && b == wpos2) { - if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]), - mont,ctx)) goto err; + /* wvalue2 is odd and < 2^window2 */ + if (!BN_mod_mul_montgomery(r,r,&(val2[wvalue2>>1]),mont,ctx)) + goto err; + wvalue2 = 0; + r_is_one = 0; } - - /* move the 'window' down further */ - wstart-=i; - start=0; - if (wstart < 0) break; } BN_from_montgomery(rr,r,mont,ctx); ret=1; err: if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); - ctx->tos-=2; - for (i=0; i #include "cryptlib.h" #include "bn_lcl.h" -#ifndef NOPROTO static BIGNUM *euclid(BIGNUM *a, BIGNUM *b); -#else -static BIGNUM *euclid(); -#endif -int BN_gcd(r,in_a,in_b,ctx) -BIGNUM *r,*in_a,*in_b; -BN_CTX *ctx; +int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx) { BIGNUM *a,*b,*t; int ret=0; - a=ctx->bn[ctx->tos]; - b=ctx->bn[ctx->tos+1]; + bn_check_top(in_a); + bn_check_top(in_b); + + BN_CTX_start(ctx); + a = BN_CTX_get(ctx); + b = BN_CTX_get(ctx); + if (a == NULL || b == NULL) goto err; if (BN_copy(a,in_a) == NULL) goto err; if (BN_copy(b,in_b) == NULL) goto err; + a->neg = 0; + b->neg = 0; if (BN_cmp(a,b) < 0) { t=a; a=b; b=t; } t=euclid(a,b); @@ -86,19 +139,22 @@ BN_CTX *ctx; if (BN_copy(r,t) == NULL) goto err; ret=1; err: + BN_CTX_end(ctx); return(ret); } -static BIGNUM *euclid(a,b) -BIGNUM *a,*b; +static BIGNUM *euclid(BIGNUM *a, BIGNUM *b) { BIGNUM *t; int shifts=0; - for (;;) + bn_check_top(a); + bn_check_top(b); + + /* 0 <= b <= a */ + while (!BN_is_zero(b)) { - if (BN_is_zero(b)) - break; + /* 0 < b <= a */ if (BN_is_odd(a)) { @@ -131,7 +187,9 @@ BIGNUM *a,*b; shifts++; } } + /* 0 <= b <= a */ } + if (shifts) { if (!BN_lshift(a,a,shifts)) goto err; @@ -141,54 +199,284 @@ err: return(NULL); } + /* solves ax == 1 (mod n) */ -BIGNUM *BN_mod_inverse(a, n, ctx) -BIGNUM *a; -BIGNUM *n; -BN_CTX *ctx; +BIGNUM *BN_mod_inverse(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) { - BIGNUM *A,*B,*X,*Y,*M,*D,*R; - BIGNUM *ret=NULL,*T; + BIGNUM *A,*B,*X,*Y,*M,*D,*T,*R=NULL; + BIGNUM *ret=NULL; int sign; - A=ctx->bn[ctx->tos]; - B=ctx->bn[ctx->tos+1]; - X=ctx->bn[ctx->tos+2]; - D=ctx->bn[ctx->tos+3]; - M=ctx->bn[ctx->tos+4]; - Y=ctx->bn[ctx->tos+5]; - ctx->tos+=6; - R=BN_new(); + bn_check_top(a); + bn_check_top(n); + + BN_CTX_start(ctx); + A = BN_CTX_get(ctx); + B = BN_CTX_get(ctx); + X = BN_CTX_get(ctx); + D = BN_CTX_get(ctx); + M = BN_CTX_get(ctx); + Y = BN_CTX_get(ctx); + T = BN_CTX_get(ctx); + if (T == NULL) goto err; + + if (in == NULL) + R=BN_new(); + else + R=in; if (R == NULL) goto err; - BN_zero(X); - BN_one(Y); - if (BN_copy(A,a) == NULL) goto err; - if (BN_copy(B,n) == NULL) goto err; - sign=1; + BN_one(X); + BN_zero(Y); + if (BN_copy(B,a) == NULL) goto err; + if (BN_copy(A,n) == NULL) goto err; + A->neg = 0; + if (B->neg || (BN_ucmp(B, A) >= 0)) + { + if (!BN_nnmod(B, B, A, ctx)) goto err; + } + sign = -1; + /* From B = a mod |n|, A = |n| it follows that + * + * 0 <= B < A, + * -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|). + */ + + if (BN_is_odd(n) && (BN_num_bits(n) <= (BN_BITS <= 32 ? 450 : 2048))) + { + /* Binary inversion algorithm; requires odd modulus. + * This is faster than the general algorithm if the modulus + * is sufficiently small (about 400 .. 500 bits on 32-bit + * sytems, but much more on 64-bit systems) */ + int shift; + + while (!BN_is_zero(B)) + { + /* + * 0 < B < |n|, + * 0 < A <= |n|, + * (1) -sign*X*a == B (mod |n|), + * (2) sign*Y*a == A (mod |n|) + */ + + /* Now divide B by the maximum possible power of two in the integers, + * and divide X by the same value mod |n|. + * When we're done, (1) still holds. */ + shift = 0; + while (!BN_is_bit_set(B, shift)) /* note that 0 < B */ + { + shift++; + + if (BN_is_odd(X)) + { + if (!BN_uadd(X, X, n)) goto err; + } + /* now X is even, so we can easily divide it by two */ + if (!BN_rshift1(X, X)) goto err; + } + if (shift > 0) + { + if (!BN_rshift(B, B, shift)) goto err; + } + - while (!BN_is_zero(B)) + /* Same for A and Y. Afterwards, (2) still holds. */ + shift = 0; + while (!BN_is_bit_set(A, shift)) /* note that 0 < A */ + { + shift++; + + if (BN_is_odd(Y)) + { + if (!BN_uadd(Y, Y, n)) goto err; + } + /* now Y is even */ + if (!BN_rshift1(Y, Y)) goto err; + } + if (shift > 0) + { + if (!BN_rshift(A, A, shift)) goto err; + } + + + /* We still have (1) and (2). + * Both A and B are odd. + * The following computations ensure that + * + * 0 <= B < |n|, + * 0 < A < |n|, + * (1) -sign*X*a == B (mod |n|), + * (2) sign*Y*a == A (mod |n|), + * + * and that either A or B is even in the next iteration. + */ + if (BN_ucmp(B, A) >= 0) + { + /* -sign*(X + Y)*a == B - A (mod |n|) */ + if (!BN_uadd(X, X, Y)) goto err; + /* NB: we could use BN_mod_add_quick(X, X, Y, n), but that + * actually makes the algorithm slower */ + if (!BN_usub(B, B, A)) goto err; + } + else + { + /* sign*(X + Y)*a == A - B (mod |n|) */ + if (!BN_uadd(Y, Y, X)) goto err; + /* as above, BN_mod_add_quick(Y, Y, X, n) would slow things down */ + if (!BN_usub(A, A, B)) goto err; + } + } + } + else { - if (!BN_div(D,M,A,B,ctx)) goto err; - T=A; - A=B; - B=M; - /* T has a struct, M does not */ - - if (!BN_mul(T,D,X)) goto err; - if (!BN_add(T,T,Y)) goto err; - M=Y; - Y=X; - X=T; - sign= -sign; + /* general inversion algorithm */ + + while (!BN_is_zero(B)) + { + BIGNUM *tmp; + + /* + * 0 < B < A, + * (*) -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|) + */ + + /* (D, M) := (A/B, A%B) ... */ + if (BN_num_bits(A) == BN_num_bits(B)) + { + if (!BN_one(D)) goto err; + if (!BN_sub(M,A,B)) goto err; + } + else if (BN_num_bits(A) == BN_num_bits(B) + 1) + { + /* A/B is 1, 2, or 3 */ + if (!BN_lshift1(T,B)) goto err; + if (BN_ucmp(A,T) < 0) + { + /* A < 2*B, so D=1 */ + if (!BN_one(D)) goto err; + if (!BN_sub(M,A,B)) goto err; + } + else + { + /* A >= 2*B, so D=2 or D=3 */ + if (!BN_sub(M,A,T)) goto err; + if (!BN_add(D,T,B)) goto err; /* use D (:= 3*B) as temp */ + if (BN_ucmp(A,D) < 0) + { + /* A < 3*B, so D=2 */ + if (!BN_set_word(D,2)) goto err; + /* M (= A - 2*B) already has the correct value */ + } + else + { + /* only D=3 remains */ + if (!BN_set_word(D,3)) goto err; + /* currently M = A - 2*B, but we need M = A - 3*B */ + if (!BN_sub(M,M,B)) goto err; + } + } + } + else + { + if (!BN_div(D,M,A,B,ctx)) goto err; + } + + /* Now + * A = D*B + M; + * thus we have + * (**) sign*Y*a == D*B + M (mod |n|). + */ + + tmp=A; /* keep the BIGNUM object, the value does not matter */ + + /* (A, B) := (B, A mod B) ... */ + A=B; + B=M; + /* ... so we have 0 <= B < A again */ + + /* Since the former M is now B and the former B is now A, + * (**) translates into + * sign*Y*a == D*A + B (mod |n|), + * i.e. + * sign*Y*a - D*A == B (mod |n|). + * Similarly, (*) translates into + * -sign*X*a == A (mod |n|). + * + * Thus, + * sign*Y*a + D*sign*X*a == B (mod |n|), + * i.e. + * sign*(Y + D*X)*a == B (mod |n|). + * + * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at + * -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|). + * Note that X and Y stay non-negative all the time. + */ + + /* most of the time D is very small, so we can optimize tmp := D*X+Y */ + if (BN_is_one(D)) + { + if (!BN_add(tmp,X,Y)) goto err; + } + else + { + if (BN_is_word(D,2)) + { + if (!BN_lshift1(tmp,X)) goto err; + } + else if (BN_is_word(D,4)) + { + if (!BN_lshift(tmp,X,2)) goto err; + } + else if (D->top == 1) + { + if (!BN_copy(tmp,X)) goto err; + if (!BN_mul_word(tmp,D->d[0])) goto err; + } + else + { + if (!BN_mul(tmp,D,X,ctx)) goto err; + } + if (!BN_add(tmp,tmp,Y)) goto err; + } + + M=Y; /* keep the BIGNUM object, the value does not matter */ + Y=X; + X=tmp; + sign = -sign; + } } + + /* + * The while loop (Euclid's algorithm) ends when + * A == gcd(a,n); + * we have + * sign*Y*a == A (mod |n|), + * where Y is non-negative. + */ + if (sign < 0) { if (!BN_sub(Y,n,Y)) goto err; } + /* Now Y*a == A (mod |n|). */ + if (BN_is_one(A)) - { if (!BN_mod(R,Y,n,ctx)) goto err; } + { + /* Y*a == 1 (mod |n|) */ + if (!Y->neg && BN_ucmp(Y,n) < 0) + { + if (!BN_copy(R,Y)) goto err; + } + else + { + if (!BN_nnmod(R,Y,n,ctx)) goto err; + } + } else { BNerr(BN_F_BN_MOD_INVERSE,BN_R_NO_INVERSE); @@ -196,8 +484,7 @@ BN_CTX *ctx; } ret=R; err: - if ((ret == NULL) && (R != NULL)) BN_free(R); - ctx->tos-=6; + if ((ret == NULL) && (in == NULL)) BN_free(R); + BN_CTX_end(ctx); return(ret); } - diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index edfd788338..8a4dba375a 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h @@ -55,30 +55,228 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ #ifndef HEADER_BN_LCL_H #define HEADER_BN_LCL_H -#include "bn.h" +#include #ifdef __cplusplus extern "C" { #endif + +/* Used for temp variables */ +#define BN_CTX_NUM 32 +#define BN_CTX_NUM_POS 12 +struct bignum_ctx + { + int tos; + BIGNUM bn[BN_CTX_NUM]; + int flags; + int depth; + int pos[BN_CTX_NUM_POS]; + int too_many; + } /* BN_CTX */; + + +/* + * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions + * + * + * For window size 'w' (w >= 2) and a random 'b' bits exponent, + * the number of multiplications is a constant plus on average + * + * 2^(w-1) + (b-w)/(w+1); + * + * here 2^(w-1) is for precomputing the table (we actually need + * entries only for windows that have the lowest bit set), and + * (b-w)/(w+1) is an approximation for the expected number of + * w-bit windows, not counting the first one. + * + * Thus we should use + * + * w >= 6 if b > 671 + * w = 5 if 671 > b > 239 + * w = 4 if 239 > b > 79 + * w = 3 if 79 > b > 23 + * w <= 2 if 23 > b + * + * (with draws in between). Very small exponents are often selected + * with low Hamming weight, so we use w = 1 for b <= 23. + */ +#if 1 +#define BN_window_bits_for_exponent_size(b) \ + ((b) > 671 ? 6 : \ + (b) > 239 ? 5 : \ + (b) > 79 ? 4 : \ + (b) > 23 ? 3 : 1) +#else +/* Old SSLeay/OpenSSL table. + * Maximum window size was 5, so this table differs for b==1024; + * but it coincides for other interesting values (b==160, b==512). + */ +#define BN_window_bits_for_exponent_size(b) \ + ((b) > 255 ? 5 : \ + (b) > 127 ? 4 : \ + (b) > 17 ? 3 : 1) +#endif + + + +/* Pentium pro 16,16,16,32,64 */ +/* Alpha 16,16,16,16.64 */ +#define BN_MULL_SIZE_NORMAL (16) /* 32 */ +#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */ +#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */ +#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ +#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ + +#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) +/* + * BN_UMULT_HIGH section. + * + * No, I'm not trying to overwhelm you when stating that the + * product of N-bit numbers is 2*N bits wide:-) No, I don't expect + * you to be impressed when I say that if the compiler doesn't + * support 2*N integer type, then you have to replace every N*N + * multiplication with 4 (N/2)*(N/2) accompanied by some shifts + * and additions which unavoidably results in severe performance + * penalties. Of course provided that the hardware is capable of + * producing 2*N result... That's when you normally start + * considering assembler implementation. However! It should be + * pointed out that some CPUs (most notably Alpha, PowerPC and + * upcoming IA-64 family:-) provide *separate* instruction + * calculating the upper half of the product placing the result + * into a general purpose register. Now *if* the compiler supports + * inline assembler, then it's not impossible to implement the + * "bignum" routines (and have the compiler optimize 'em) + * exhibiting "native" performance in C. That's what BN_UMULT_HIGH + * macro is about:-) + * + * + */ +# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) +# if defined(__DECC) +# include +# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) +# elif defined(__GNUC__) +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("umulh %1,%2,%0" \ + : "=r"(ret) \ + : "r"(a), "r"(b)); \ + ret; }) +# endif /* compiler */ +# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) +# if defined(__GNUC__) +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("mulhdu %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a), "r"(b)); \ + ret; }) +# endif /* compiler */ +# endif /* cpu */ +#endif /* OPENSSL_NO_ASM */ + /************************************************************* * Using the long long type */ #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) -#define bn_fix_top(a) \ - { \ - BN_ULONG *fix_top_l; \ - for (fix_top_l= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ - if (*(fix_top_l--)) break; \ +/* This is used for internal error checking and is not normally used */ +#ifdef BN_DEBUG +# include +# define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->dmax); +#else +# define bn_check_top(a) +#endif + +/* This macro is to add extra stuff for development checking */ +#ifdef BN_DEBUG +#define bn_set_max(r) ((r)->max=(r)->top,BN_set_flags((r),BN_FLG_STATIC_DATA)) +#else +#define bn_set_max(r) +#endif + +/* These macros are used to 'take' a section of a bignum for read only use */ +#define bn_set_low(r,a,n) \ + { \ + (r)->top=((a)->top > (n))?(n):(a)->top; \ + (r)->d=(a)->d; \ + (r)->neg=(a)->neg; \ + (r)->flags|=BN_FLG_STATIC_DATA; \ + bn_set_max(r); \ } -/* #define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?(n):bn_expand2((n),(b))) */ +#define bn_set_high(r,a,n) \ + { \ + if ((a)->top > (n)) \ + { \ + (r)->top=(a)->top-n; \ + (r)->d= &((a)->d[n]); \ + } \ + else \ + (r)->top=0; \ + (r)->neg=(a)->neg; \ + (r)->flags|=BN_FLG_STATIC_DATA; \ + bn_set_max(r); \ + } #ifdef BN_LLONG #define mul_add(r,a,w,c) { \ @@ -95,6 +293,43 @@ extern "C" { (c)= Hw(t); \ } +#define sqr(r0,r1,a) { \ + BN_ULLONG t; \ + t=(BN_ULLONG)(a)*(a); \ + (r0)=Lw(t); \ + (r1)=Hw(t); \ + } + +#elif defined(BN_UMULT_HIGH) +#define mul_add(r,a,w,c) { \ + BN_ULONG high,low,ret,tmp=(a); \ + ret = (r); \ + high= BN_UMULT_HIGH(w,tmp); \ + ret += (c); \ + low = (w) * tmp; \ + (c) = (ret<(c))?1:0; \ + (c) += high; \ + ret += low; \ + (c) += (ret +#include #include #include "cryptlib.h" #include "bn_lcl.h" -char *BN_version="Big Number part of SSLeay 0.9.0b 29-Jun-1998"; +const char *BN_version="Big Number" OPENSSL_VERSION_PTEXT; -BIGNUM *BN_value_one() +/* For a 32 bit machine + * 2 - 4 == 128 + * 3 - 8 == 256 + * 4 - 16 == 512 + * 5 - 32 == 1024 + * 6 - 64 == 2048 + * 7 - 128 == 4096 + * 8 - 256 == 8192 + */ +static int bn_limit_bits=0; +static int bn_limit_num=8; /* (1<= 0) + { + if (mult > (sizeof(int)*8)-1) + mult=sizeof(int)*8-1; + bn_limit_bits=mult; + bn_limit_num=1<= 0) + { + if (high > (sizeof(int)*8)-1) + high=sizeof(int)*8-1; + bn_limit_bits_high=high; + bn_limit_num_high=1<= 0) + { + if (low > (sizeof(int)*8)-1) + low=sizeof(int)*8-1; + bn_limit_bits_low=low; + bn_limit_num_low=1<= 0) + { + if (mont > (sizeof(int)*8)-1) + mont=sizeof(int)*8-1; + bn_limit_bits_mont=mont; + bn_limit_num_mont=1<>56]+56); + return(bits[(int)(l>>56)]+56); } - else return(bits[l>>48]+48); + else return(bits[(int)(l>>48)]+48); } else { if (l & 0x0000ff0000000000L) { - return(bits[l>>40]+40); + return(bits[(int)(l>>40)]+40); } - else return(bits[l>>32]+32); + else return(bits[(int)(l>>32)]+32); } } else @@ -140,17 +205,17 @@ BN_ULONG l; { if (l & 0xff00000000000000LL) { - return(bits[l>>56]+56); + return(bits[(int)(l>>56)]+56); } - else return(bits[l>>48]+48); + else return(bits[(int)(l>>48)]+48); } else { if (l & 0x0000ff0000000000LL) { - return(bits[l>>40]+40); + return(bits[(int)(l>>40)]+40); } - else return(bits[l>>32]+32); + else return(bits[(int)(l>>32)]+32); } } else @@ -161,161 +226,256 @@ BN_ULONG l; if (l & 0xffff0000L) { if (l & 0xff000000L) - return(bits[l>>24L]+24); - else return(bits[l>>16L]+16); + return(bits[(int)(l>>24L)]+24); + else return(bits[(int)(l>>16L)]+16); } else #endif { #if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) if (l & 0xff00L) - return(bits[l>>8]+8); + return(bits[(int)(l>>8)]+8); else #endif - return(bits[l ] ); + return(bits[(int)(l )] ); } } } -int BN_num_bits(a) -BIGNUM *a; +int BN_num_bits(const BIGNUM *a) { BN_ULONG l; int i; + bn_check_top(a); + if (a->top == 0) return(0); l=a->d[a->top-1]; + assert(l != 0); i=(a->top-1)*BN_BITS2; - if (l == 0) - { -#if !defined(NO_STDIO) && !defined(WIN16) - fprintf(stderr,"BAD TOP VALUE\n"); -#endif - abort(); - } return(i+BN_num_bits_word(l)); } -void BN_clear_free(a) -BIGNUM *a; +void BN_clear_free(BIGNUM *a) { + int i; + if (a == NULL) return; if (a->d != NULL) { - memset(a->d,0,a->max*sizeof(a->d[0])); - Free(a->d); + memset(a->d,0,a->dmax*sizeof(a->d[0])); + if (!(BN_get_flags(a,BN_FLG_STATIC_DATA))) + OPENSSL_free(a->d); } + i=BN_get_flags(a,BN_FLG_MALLOCED); memset(a,0,sizeof(BIGNUM)); - Free(a); + if (i) + OPENSSL_free(a); } -void BN_free(a) -BIGNUM *a; +void BN_free(BIGNUM *a) { if (a == NULL) return; - if (a->d != NULL) Free(a->d); - Free(a); + if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA))) + OPENSSL_free(a->d); + a->flags|=BN_FLG_FREE; /* REMOVE? */ + if (a->flags & BN_FLG_MALLOCED) + OPENSSL_free(a); } -BIGNUM *BN_new() +void BN_init(BIGNUM *a) + { + memset(a,0,sizeof(BIGNUM)); + } + +BIGNUM *BN_new(void) { BIGNUM *ret; - BN_ULONG *p; - ret=(BIGNUM *)Malloc(sizeof(BIGNUM)); - if (ret == NULL) goto err; + if ((ret=(BIGNUM *)OPENSSL_malloc(sizeof(BIGNUM))) == NULL) + { + BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); + return(NULL); + } + ret->flags=BN_FLG_MALLOCED; ret->top=0; ret->neg=0; - ret->max=(BN_DEFAULT_BITS/BN_BITS2); - p=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(ret->max+1)); - if (p == NULL) goto err; - ret->d=p; - - memset(p,0,(ret->max+1)*sizeof(p[0])); + ret->dmax=0; + ret->d=NULL; return(ret); -err: - BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE); - return(NULL); } -BN_CTX *BN_CTX_new() +/* This is used both by bn_expand2() and bn_dup_expand() */ +/* The caller MUST check that words > b->dmax before calling this */ +static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) { - BN_CTX *ret; - BIGNUM *n; - int i,j; + BN_ULONG *A,*a = NULL; + const BN_ULONG *B; + int i; - ret=(BN_CTX *)Malloc(sizeof(BN_CTX)); - if (ret == NULL) goto err2; + if (words > (INT_MAX/(4*BN_BITS2))) + { + BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_BIGNUM_TOO_LONG); + return NULL; + } - for (i=0; ibn[i]=n; + BNerr(BN_F_BN_EXPAND_INTERNAL,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); + return(NULL); + } + a=A=(BN_ULONG *)OPENSSL_malloc(sizeof(BN_ULONG)*(words+1)); + if (A == NULL) + { + BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE); + return(NULL); + } +#if 1 + B=b->d; + /* Check if the previous number needs to be copied */ + if (B != NULL) + { + for (i=b->top>>2; i>0; i--,A+=4,B+=4) + { + /* + * The fact that the loop is unrolled + * 4-wise is a tribute to Intel. It's + * the one that doesn't have enough + * registers to accomodate more data. + * I'd unroll it 8-wise otherwise:-) + * + * + */ + BN_ULONG a0,a1,a2,a3; + a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3]; + A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3; + } + switch (b->top&3) + { + case 3: A[2]=B[2]; + case 2: A[1]=B[1]; + case 1: A[0]=B[0]; + case 0: /* workaround for ultrix cc: without 'case 0', the optimizer does + * the switch table by doing a=top&3; a--; goto jump_table[a]; + * which fails for top== 0 */ + ; + } } - /* There is actually an extra one, this is for debugging my - * stuff */ - ret->bn[BN_CTX_NUM]=NULL; + /* Now need to zero any data between b->top and b->max */ + /* XXX Why? */ - ret->tos=0; - return(ret); -err: - for (j=0; jbn[j]); - Free(ret); -err2: - BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE); - return(NULL); + A= &(a[b->top]); + for (i=(words - b->top)>>3; i>0; i--,A+=8) + { + A[0]=0; A[1]=0; A[2]=0; A[3]=0; + A[4]=0; A[5]=0; A[6]=0; A[7]=0; + } + for (i=(words - b->top)&7; i>0; i--,A++) + A[0]=0; +#else + memset(A,0,sizeof(BN_ULONG)*(words+1)); + memcpy(A,b->d,sizeof(b->d[0])*b->top); +#endif + + return(a); } -void BN_CTX_free(c) -BN_CTX *c; +/* This is an internal function that can be used instead of bn_expand2() + * when there is a need to copy BIGNUMs instead of only expanding the + * data part, while still expanding them. + * Especially useful when needing to expand BIGNUMs that are declared + * 'const' and should therefore not be changed. + * The reason to use this instead of a BN_dup() followed by a bn_expand2() + * is memory allocation overhead. A BN_dup() followed by a bn_expand2() + * will allocate new memory for the BIGNUM data twice, and free it once, + * while bn_dup_expand() makes sure allocation is made only once. + */ + +BIGNUM *bn_dup_expand(const BIGNUM *b, int words) { - int i; + BIGNUM *r = NULL; + + if (words > b->dmax) + { + BN_ULONG *a = bn_expand_internal(b, words); - for (i=0; ibn[i]); - Free(c); + if (a) + { + r = BN_new(); + if (r) + { + r->top = b->top; + r->dmax = words; + r->neg = b->neg; + r->d = a; + } + else + { + /* r == NULL, BN_new failure */ + OPENSSL_free(a); + } + } + /* If a == NULL, there was an error in allocation in + bn_expand_internal(), and NULL should be returned */ + } + else + { + r = BN_dup(b); + } + + return r; } -BIGNUM *bn_expand2(b, words) -BIGNUM *b; -int words; - { - BN_ULONG *p; +/* This is an internal function that should not be used in applications. + * It ensures that 'b' has enough room for a 'words' word number number. + * It is mostly used by the various BIGNUM routines. If there is an error, + * NULL is returned. If not, 'b' is returned. */ - if (words > b->max) +BIGNUM *bn_expand2(BIGNUM *b, int words) + { + if (words > b->dmax) { - p=(BN_ULONG *)Realloc(b->d,sizeof(BN_ULONG)*(words+1)); - if (p == NULL) + BN_ULONG *a = bn_expand_internal(b, words); + + if (a) { - BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); - return(NULL); + if (b->d) + OPENSSL_free(b->d); + b->d=a; + b->dmax=words; } - b->d=p; - memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); - b->max=words; + else + b = NULL; } - return(b); + return b; } -BIGNUM *BN_dup(a) -BIGNUM *a; +BIGNUM *BN_dup(const BIGNUM *a) { - BIGNUM *r; + BIGNUM *r, *t; - r=BN_new(); - if (r == NULL) return(NULL); - return((BIGNUM *)BN_copy(r,a)); + if (a == NULL) return NULL; + + bn_check_top(a); + + t = BN_new(); + if (t == NULL) return(NULL); + r = BN_copy(t, a); + /* now r == t || r == NULL */ + if (r == NULL) + BN_free(t); + return r; } -BIGNUM *BN_copy(a, b) -BIGNUM *a; -BIGNUM *b; +BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) { int i; - BN_ULONG *A,*B; + BN_ULONG *A; + const BN_ULONG *B; + + bn_check_top(b); if (a == b) return(a); if (bn_wexpand(a,b->top) == NULL) return(NULL); @@ -323,35 +483,18 @@ BIGNUM *b; #if 1 A=a->d; B=b->d; - for (i=b->top&(~7); i>0; i-=8) - { - A[0]=B[0]; - A[1]=B[1]; - A[2]=B[2]; - A[3]=B[3]; - A[4]=B[4]; - A[5]=B[5]; - A[6]=B[6]; - A[7]=B[7]; - A+=8; - B+=8; - } - switch (b->top&7) - { - case 7: - A[6]=B[6]; - case 6: - A[5]=B[5]; - case 5: - A[4]=B[4]; - case 4: - A[3]=B[3]; - case 3: - A[2]=B[2]; - case 2: - A[1]=B[1]; - case 1: - A[0]=B[0]; + for (i=b->top>>2; i>0; i--,A+=4,B+=4) + { + BN_ULONG a0,a1,a2,a3; + a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3]; + A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3; + } + switch (b->top&3) + { + case 3: A[2]=B[2]; + case 2: A[1]=B[1]; + case 1: A[0]=B[0]; + case 0: ; /* ultrix cc workaround, see comments in bn_expand_internal */ } #else memcpy(a->d,b->d,sizeof(b->d[0])*b->top); @@ -359,52 +502,76 @@ BIGNUM *b; /* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/ a->top=b->top; - if (a->top == 0) + if ((a->top == 0) && (a->d != NULL)) a->d[0]=0; a->neg=b->neg; return(a); } -void BN_clear(a) -BIGNUM *a; +void BN_swap(BIGNUM *a, BIGNUM *b) + { + int flags_old_a, flags_old_b; + BN_ULONG *tmp_d; + int tmp_top, tmp_dmax, tmp_neg; + + flags_old_a = a->flags; + flags_old_b = b->flags; + + tmp_d = a->d; + tmp_top = a->top; + tmp_dmax = a->dmax; + tmp_neg = a->neg; + + a->d = b->d; + a->top = b->top; + a->dmax = b->dmax; + a->neg = b->neg; + + b->d = tmp_d; + b->top = tmp_top; + b->dmax = tmp_dmax; + b->neg = tmp_neg; + + a->flags = (flags_old_a & BN_FLG_MALLOCED) | (flags_old_b & BN_FLG_STATIC_DATA); + b->flags = (flags_old_b & BN_FLG_MALLOCED) | (flags_old_a & BN_FLG_STATIC_DATA); + } + + +void BN_clear(BIGNUM *a) { - memset(a->d,0,a->max*sizeof(a->d[0])); + if (a->d != NULL) + memset(a->d,0,a->dmax*sizeof(a->d[0])); a->top=0; a->neg=0; } -unsigned long BN_get_word(a) -BIGNUM *a; +BN_ULONG BN_get_word(const BIGNUM *a) { int i,n; - unsigned long ret=0; + BN_ULONG ret=0; n=BN_num_bytes(a); - if (n > sizeof(unsigned long)) -#ifdef SIXTY_FOUR_BIT_LONG + if (n > sizeof(BN_ULONG)) return(BN_MASK2); -#else - return(0xFFFFFFFFL); -#endif for (i=a->top-1; i>=0; i--) { #ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ ret<<=BN_BITS4; /* stops the compiler complaining */ ret<<=BN_BITS4; +#else + ret=0; #endif ret|=a->d[i]; } return(ret); } -int BN_set_word(a,w) -BIGNUM *a; -unsigned long w; +int BN_set_word(BIGNUM *a, BN_ULONG w) { int i,n; - if (bn_expand(a,sizeof(unsigned long)*8) == NULL) return(0); + if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0); - n=sizeof(unsigned long)/BN_BYTES; + n=sizeof(BN_ULONG)/BN_BYTES; a->neg=0; a->top=0; a->d[0]=(BN_ULONG)w&BN_MASK2; @@ -417,6 +584,8 @@ unsigned long w; #ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ w>>=BN_BITS4; w>>=BN_BITS4; +#else + w=0; #endif a->d[i]=(BN_ULONG)w&BN_MASK2; if (a->d[i] != 0) a->top=i+1; @@ -424,11 +593,7 @@ unsigned long w; return(1); } -/* ignore negative */ -BIGNUM *BN_bin2bn(s, len, ret) -unsigned char *s; -int len; -BIGNUM *ret; +BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) { unsigned int i,m; unsigned int n; @@ -448,6 +613,7 @@ BIGNUM *ret; i=((n-1)/BN_BYTES)+1; m=((n-1)%(BN_BYTES)); ret->top=i; + ret->neg=0; while (n-- > 0) { l=(l<<8L)| *(s++); @@ -465,9 +631,7 @@ BIGNUM *ret; } /* ignore negative */ -int BN_bn2bin(a, to) -BIGNUM *a; -unsigned char *to; +int BN_bn2bin(const BIGNUM *a, unsigned char *to) { int n,i; BN_ULONG l; @@ -481,13 +645,14 @@ unsigned char *to; return(n); } -int BN_ucmp(a, b) -BIGNUM *a; -BIGNUM *b; +int BN_ucmp(const BIGNUM *a, const BIGNUM *b) { int i; BN_ULONG t1,t2,*ap,*bp; + bn_check_top(a); + bn_check_top(b); + i=a->top-b->top; if (i != 0) return(i); ap=a->d; @@ -502,9 +667,7 @@ BIGNUM *b; return(0); } -int BN_cmp(a, b) -BIGNUM *a; -BIGNUM *b; +int BN_cmp(const BIGNUM *a, const BIGNUM *b) { int i; int gt,lt; @@ -519,6 +682,10 @@ BIGNUM *b; else return(0); } + + bn_check_top(a); + bn_check_top(b); + if (a->neg != b->neg) { if (a->neg) @@ -541,27 +708,25 @@ BIGNUM *b; return(0); } -int BN_set_bit(a, n) -BIGNUM *a; -int n; +int BN_set_bit(BIGNUM *a, int n) { - int i,j; + int i,j,k; i=n/BN_BITS2; j=n%BN_BITS2; if (a->top <= i) { - if (bn_expand(a,n) == NULL) return(0); + if (bn_wexpand(a,i+1) == NULL) return(0); + for(k=a->top; kd[k]=0; a->top=i+1; } - a->d[i]|=(1L<d[i]|=(((BN_ULONG)1)<top <= i) return(0); - a->d[i]&=(~(1L<d[i]&=(~(((BN_ULONG)1)<d[i]&(((BN_ULONG)1)<top=w+1; a->d[w]&= ~(BN_MASK2<= 0) && (a->d[w] == 0)) + } + bn_fix_top(a); + return(1); + } + +int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n) + { + int i; + BN_ULONG aa,bb; + + aa=a[n-1]; + bb=b[n-1]; + if (aa != bb) return((aa > bb)?1:-1); + for (i=n-2; i>=0; i--) + { + aa=a[i]; + bb=b[i]; + if (aa != bb) return((aa > bb)?1:-1); + } + return(0); + } + +/* Here follows a specialised variants of bn_cmp_words(). It has the + property of performing the operation on arrays of different sizes. + The sizes of those arrays is expressed through cl, which is the + common length ( basicall, min(len(a),len(b)) ), and dl, which is the + delta between the two lengths, calculated as len(a)-len(b). + All lengths are the number of BN_ULONGs... */ + +int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl) + { + int n,i; + n = cl-1; + + if (dl < 0) + { + for (i=dl; i<0; i++) { - a->top--; - w--; + if (b[n-i] != 0) + return -1; /* a < b */ } } - return(1); + if (dl > 0) + { + for (i=dl; i>0; i--) + { + if (a[n+i] != 0) + return 1; /* a > b */ + } + } + return bn_cmp_words(a,b,cl); } diff --git a/src/lib/libcrypto/bn/bn_mod.c b/src/lib/libcrypto/bn/bn_mod.c index c351aac14f..5cf82480d7 100644 --- a/src/lib/libcrypto/bn/bn_mod.c +++ b/src/lib/libcrypto/bn/bn_mod.c @@ -1,4 +1,59 @@ /* crypto/bn/bn_mod.c */ +/* Includes code written by Lenka Fibikova + * for the OpenSSL project. */ +/* ==================================================================== + * Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -56,42 +111,186 @@ * [including the GNU Public Licence.] */ -#include #include "cryptlib.h" #include "bn_lcl.h" -/* rem != m */ -int BN_mod(rem, m, d,ctx) -BIGNUM *rem; -BIGNUM *m; -BIGNUM *d; -BN_CTX *ctx; + +#if 0 /* now just a #define */ +int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) + { + return(BN_div(NULL,rem,m,d,ctx)); + /* note that rem->neg == m->neg (unless the remainder is zero) */ + } +#endif + + +int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) + { + /* like BN_mod, but returns non-negative remainder + * (i.e., 0 <= r < |d| always holds) */ + + if (!(BN_mod(r,m,d,ctx))) + return 0; + if (!r->neg) + return 1; + /* now -|d| < r < 0, so we have to set r := r + |d| */ + return (d->neg ? BN_sub : BN_add)(r, r, d); +} + + +int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) + { + if (!BN_add(r, a, b)) return 0; + return BN_nnmod(r, r, m, ctx); + } + + +/* BN_mod_add variant that may be used if both a and b are non-negative + * and less than m */ +int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m) + { + if (!BN_add(r, a, b)) return 0; + if (BN_ucmp(r, m) >= 0) + return BN_usub(r, r, m); + return 1; + } + + +int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) + { + if (!BN_sub(r, a, b)) return 0; + return BN_nnmod(r, r, m, ctx); + } + + +/* BN_mod_sub variant that may be used if both a and b are non-negative + * and less than m */ +int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m) + { + if (!BN_sub(r, a, b)) return 0; + if (r->neg) + return BN_add(r, r, m); + return 1; + } + + +/* slow but works */ +int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, + BN_CTX *ctx) { -#if 0 /* The old slow way */ - int i,nm,nd; - BIGNUM *dv; + BIGNUM *t; + int ret=0; + + bn_check_top(a); + bn_check_top(b); + bn_check_top(m); + + BN_CTX_start(ctx); + if ((t = BN_CTX_get(ctx)) == NULL) goto err; + if (a == b) + { if (!BN_sqr(t,a,ctx)) goto err; } + else + { if (!BN_mul(t,a,b,ctx)) goto err; } + if (!BN_nnmod(r,t,m,ctx)) goto err; + ret=1; +err: + BN_CTX_end(ctx); + return(ret); + } - if (BN_ucmp(m,d) < 0) - return((BN_copy(rem,m) == NULL)?0:1); - dv=ctx->bn[ctx->tos]; +int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) + { + if (!BN_sqr(r, a, ctx)) return 0; + /* r->neg == 0, thus we don't need BN_nnmod */ + return BN_mod(r, r, m, ctx); + } - if (!BN_copy(rem,m)) return(0); - nm=BN_num_bits(rem); - nd=BN_num_bits(d); - if (!BN_lshift(dv,d,nm-nd)) return(0); - for (i=nm-nd; i>=0; i--) +int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) + { + if (!BN_lshift1(r, a)) return 0; + return BN_nnmod(r, r, m, ctx); + } + + +/* BN_mod_lshift1 variant that may be used if a is non-negative + * and less than m */ +int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m) + { + if (!BN_lshift1(r, a)) return 0; + if (BN_cmp(r, m) >= 0) + return BN_sub(r, r, m); + return 1; + } + + +int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx) + { + BIGNUM *abs_m = NULL; + int ret; + + if (!BN_nnmod(r, a, m, ctx)) return 0; + + if (m->neg) { - if (BN_cmp(rem,dv) >= 0) + abs_m = BN_dup(m); + if (abs_m == NULL) return 0; + abs_m->neg = 0; + } + + ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m)); + + if (abs_m) + BN_free(abs_m); + return ret; + } + + +/* BN_mod_lshift variant that may be used if a is non-negative + * and less than m */ +int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m) + { + if (r != a) + { + if (BN_copy(r, a) == NULL) return 0; + } + + while (n > 0) + { + int max_shift; + + /* 0 < r < m */ + max_shift = BN_num_bits(m) - BN_num_bits(r); + /* max_shift >= 0 */ + + if (max_shift < 0) + { + BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED); + return 0; + } + + if (max_shift > n) + max_shift = n; + + if (max_shift) + { + if (!BN_lshift(r, r, max_shift)) return 0; + n -= max_shift; + } + else + { + if (!BN_lshift1(r, r)) return 0; + --n; + } + + /* BN_num_bits(r) <= BN_num_bits(m) */ + + if (BN_cmp(r, m) >= 0) { - if (!BN_sub(rem,rem,dv)) return(0); + if (!BN_sub(r, r, m)) return 0; } - if (!BN_rshift1(dv,dv)) return(0); } - return(1); -#else - return(BN_div(NULL,rem,m,d,ctx)); -#endif + + return 1; } - diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index e435df61f8..c9ebdbaabe 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c @@ -56,59 +56,67 @@ * [including the GNU Public Licence.] */ +/* + * Details about Montgomery multiplication algorithms can be found at + * http://security.ece.orst.edu/publications.html, e.g. + * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and + * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf + */ + #include #include "cryptlib.h" #include "bn_lcl.h" -int BN_mod_mul_montgomery(r,a,b,mont,ctx) -BIGNUM *r,*a,*b; -BN_MONT_CTX *mont; -BN_CTX *ctx; +#define MONT_WORD /* use the faster word-based algorithm */ + +int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + BN_MONT_CTX *mont, BN_CTX *ctx) { BIGNUM *tmp; + int ret=0; - tmp=ctx->bn[ctx->tos++]; + BN_CTX_start(ctx); + tmp = BN_CTX_get(ctx); + if (tmp == NULL) goto err; + bn_check_top(tmp); if (a == b) { if (!BN_sqr(tmp,a,ctx)) goto err; } else { - if (!BN_mul(tmp,a,b)) goto err; + if (!BN_mul(tmp,a,b,ctx)) goto err; } /* reduce from aRR to aR */ if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; - ctx->tos--; - return(1); + ret=1; err: - return(0); + BN_CTX_end(ctx); + return(ret); } -#define MONT_WORD +int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) + { + int retn=0; #ifdef MONT_WORD -int BN_from_montgomery(ret,a,mont,ctx) -BIGNUM *ret; -BIGNUM *a; -BN_MONT_CTX *mont; -BN_CTX *ctx; - { - BIGNUM *n,*t1,*r; - BN_ULONG *ap,*np,*rp,n0,v; + BIGNUM *n,*r; + BN_ULONG *ap,*np,*rp,n0,v,*nrp; int al,nl,max,i,x,ri; - int retn=0; - t1=ctx->bn[ctx->tos]; - r=ctx->bn[ctx->tos+1]; + BN_CTX_start(ctx); + if ((r = BN_CTX_get(ctx)) == NULL) goto err; if (!BN_copy(r,a)) goto err; - n=mont->N; + n= &(mont->N); ap=a->d; - /* mont->ri is the size of mont->N in bits/words */ + /* mont->ri is the size of mont->N in bits (rounded up + to the word size) */ al=ri=mont->ri/BN_BITS2; - + nl=n->top; if ((al == 0) || (nl == 0)) { r->top=0; return(1); } @@ -119,6 +127,7 @@ BN_CTX *ctx; r->neg=a->neg^n->neg; np=n->d; rp=r->d; + nrp= &(r->d[nl]); /* clear the top words of T */ #if 1 @@ -131,176 +140,210 @@ BN_CTX *ctx; r->top=max; n0=mont->n0; +#ifdef BN_COUNT + fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl); +#endif for (i=0; i nl) - { - x2=nl; - x1=0; - } - else - { - x2=i+4; - x1=nl-x2; - } - v=bn_mul_add_words(&(rp[x1]),&(np[x1]),x2,(rp[x1]*n0)&BN_MASK2); +#ifdef __TANDEM + { + long long t1; + long long t2; + long long t3; + t1 = rp[0] * (n0 & 0177777); + t2 = 037777600000l; + t2 = n0 & t2; + t3 = rp[0] & 0177777; + t2 = (t3 * t2) & BN_MASK2; + t1 = t1 + t2; + v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1); + } #else v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); #endif - - if (((rp[nl]+=v)&BN_MASK2) < v) + nrp++; + rp++; + if (((nrp[-1]+=v)&BN_MASK2) >= v) + continue; + else { - for (x=(nl+1); (((++rp[x])&BN_MASK2) == 0); x++) - ; + if (((++nrp[0])&BN_MASK2) != 0) continue; + if (((++nrp[1])&BN_MASK2) != 0) continue; + for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; } - rp++; } - while (r->d[r->top-1] == 0) - r->top--; - + bn_fix_top(r); + /* mont->ri will be a multiple of the word size */ #if 0 BN_rshift(ret,r,mont->ri); #else - ap=r->d; - rp=ret->d; + ret->neg = r->neg; x=ri; - al=r->top-x; - for (i=0; id; + ap= &(r->d[x]); + if (r->top < x) + al=0; + else + al=r->top-x; ret->top=al; -#endif - - if (BN_ucmp(ret,mont->N) >= 0) + al-=4; + for (i=0; iN); /* XXX */ + BN_ULONG t1,t2,t3,t4; + + t1=ap[i+0]; + t2=ap[i+1]; + t3=ap[i+2]; + t4=ap[i+3]; + rp[i+0]=t1; + rp[i+1]=t2; + rp[i+2]=t3; + rp[i+3]=t4; } - retn=1; -err: - return(retn); - } -#else -int BN_from_montgomery(r,a,mont,ctx) -BIGNUM *r; -BIGNUM *a; -BN_MONT_CTX *mont; -BN_CTX *ctx; - { + al+=4; + for (; ibn[ctx->tos]; - t2=ctx->bn[ctx->tos+1]; - + BN_CTX_start(ctx); + t1 = BN_CTX_get(ctx); + t2 = BN_CTX_get(ctx); + if (t1 == NULL || t2 == NULL) goto err; + if (!BN_copy(t1,a)) goto err; - /* can cheat */ BN_mask_bits(t1,mont->ri); - if (!BN_mul(t2,t1,mont->Ni)) goto err; + if (!BN_mul(t2,t1,&mont->Ni,ctx)) goto err; BN_mask_bits(t2,mont->ri); - if (!BN_mul(t1,t2,mont->N)) goto err; + if (!BN_mul(t1,t2,&mont->N,ctx)) goto err; if (!BN_add(t2,a,t1)) goto err; - BN_rshift(r,t2,mont->ri); + if (!BN_rshift(ret,t2,mont->ri)) goto err; +#endif /* MONT_WORD */ - if (BN_ucmp(r,mont->N) >= 0) - bn_qsub(r,r,mont->N); - - return(1); -err: - return(0); + if (BN_ucmp(ret, &(mont->N)) >= 0) + { + if (!BN_usub(ret,ret,&(mont->N))) goto err; + } + retn=1; + err: + BN_CTX_end(ctx); + return(retn); } -#endif -BN_MONT_CTX *BN_MONT_CTX_new() +BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret; - if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL) - return(NULL); - ret->ri=0; - ret->RR=BN_new(); - ret->N=BN_new(); - ret->Ni=NULL; - if ((ret->RR == NULL) || (ret->N == NULL)) - { - BN_MONT_CTX_free(ret); + if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL) return(NULL); - } + + BN_MONT_CTX_init(ret); + ret->flags=BN_FLG_MALLOCED; return(ret); } -void BN_MONT_CTX_free(mont) -BN_MONT_CTX *mont; +void BN_MONT_CTX_init(BN_MONT_CTX *ctx) { - if (mont->RR != NULL) BN_free(mont->RR); - if (mont->N != NULL) BN_free(mont->N); - if (mont->Ni != NULL) BN_free(mont->Ni); - Free(mont); + ctx->ri=0; + BN_init(&(ctx->RR)); + BN_init(&(ctx->N)); + BN_init(&(ctx->Ni)); + ctx->flags=0; } -int BN_MONT_CTX_set(mont,mod,ctx) -BN_MONT_CTX *mont; -BIGNUM *mod; -BN_CTX *ctx; +void BN_MONT_CTX_free(BN_MONT_CTX *mont) { - BIGNUM *Ri=NULL,*R=NULL; + if(mont == NULL) + return; + + BN_free(&(mont->RR)); + BN_free(&(mont->N)); + BN_free(&(mont->Ni)); + if (mont->flags & BN_FLG_MALLOCED) + OPENSSL_free(mont); + } - if (mont->RR == NULL) mont->RR=BN_new(); - if (mont->N == NULL) mont->N=BN_new(); +int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) + { + BIGNUM Ri,*R; - R=mont->RR; /* grab RR as a temp */ - BN_copy(mont->N,mod); /* Set N */ + BN_init(&Ri); + R= &(mont->RR); /* grab RR as a temp */ + BN_copy(&(mont->N),mod); /* Set N */ + mont->N.neg = 0; #ifdef MONT_WORD -{ - BIGNUM tmod; - BN_ULONG buf[2]; - /* int z; */ - - mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; - BN_lshift(R,BN_value_one(),BN_BITS2); /* R */ - /* I was bad, this modification of a passed variable was - * breaking the multithreaded stuff :-( - * z=mod->top; - * mod->top=1; */ - - buf[0]=mod->d[0]; - buf[1]=0; - tmod.d=buf; - tmod.top=1; - tmod.max=mod->max; - tmod.neg=mod->neg; - - if ((Ri=BN_mod_inverse(R,&tmod,ctx)) == NULL) goto err; /* Ri */ - BN_lshift(Ri,Ri,BN_BITS2); /* R*Ri */ - bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */ - BN_div(Ri,NULL,Ri,&tmod,ctx); - mont->n0=Ri->d[0]; - BN_free(Ri); - /* mod->top=z; */ -} -#else - mont->ri=BN_num_bits(mod); - BN_lshift(R,BN_value_one(),mont->ri); /* R */ - if ((Ri=BN_mod_inverse(R,mod,ctx)) == NULL) goto err; /* Ri */ - BN_lshift(Ri,Ri,mont->ri); /* R*Ri */ - bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */ - BN_div(Ri,NULL,Ri,mod,ctx); - if (mont->Ni != NULL) BN_free(mont->Ni); - mont->Ni=Ri; /* Ni=(R*Ri-1)/N */ + { + BIGNUM tmod; + BN_ULONG buf[2]; + + mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; + if (!(BN_zero(R))) goto err; + if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ + + buf[0]=mod->d[0]; /* tmod = N mod word size */ + buf[1]=0; + tmod.d=buf; + tmod.top=1; + tmod.dmax=2; + tmod.neg=0; + /* Ri = R^-1 mod N*/ + if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL) + goto err; + if (!BN_lshift(&Ri,&Ri,BN_BITS2)) goto err; /* R*Ri */ + if (!BN_is_zero(&Ri)) + { + if (!BN_sub_word(&Ri,1)) goto err; + } + else /* if N mod word size == 1 */ + { + if (!BN_set_word(&Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */ + } + if (!BN_div(&Ri,NULL,&Ri,&tmod,ctx)) goto err; + /* Ni = (R*Ri-1)/N, + * keep only least significant word: */ + mont->n0 = (Ri.top > 0) ? Ri.d[0] : 0; + BN_free(&Ri); + } +#else /* !MONT_WORD */ + { /* bignum version */ + mont->ri=BN_num_bits(&mont->N); + if (!BN_zero(R)) goto err; + if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */ + /* Ri = R^-1 mod N*/ + if ((BN_mod_inverse(&Ri,R,&mont->N,ctx)) == NULL) + goto err; + if (!BN_lshift(&Ri,&Ri,mont->ri)) goto err; /* R*Ri */ + if (!BN_sub_word(&Ri,1)) goto err; + /* Ni = (R*Ri-1) / N */ + if (!BN_div(&(mont->Ni),NULL,&Ri,&mont->N,ctx)) goto err; + BN_free(&Ri); + } #endif /* setup RR for conversions */ - BN_lshift(mont->RR,BN_value_one(),mont->ri*2); - BN_mod(mont->RR,mont->RR,mont->N,ctx); + if (!BN_zero(&(mont->RR))) goto err; + if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err; + if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err; return(1); err: return(0); } +BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) + { + if (to == from) return(to); + + if (!BN_copy(&(to->RR),&(from->RR))) return NULL; + if (!BN_copy(&(to->N),&(from->N))) return NULL; + if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; + to->ri=from->ri; + to->n0=from->n0; + return(to); + } + diff --git a/src/lib/libcrypto/bn/bn_mpi.c b/src/lib/libcrypto/bn/bn_mpi.c index 53945c1057..05fa9d1e9a 100644 --- a/src/lib/libcrypto/bn/bn_mpi.c +++ b/src/lib/libcrypto/bn/bn_mpi.c @@ -60,9 +60,7 @@ #include "cryptlib.h" #include "bn_lcl.h" -int BN_bn2mpi(a,d) -BIGNUM *a; -unsigned char *d; +int BN_bn2mpi(const BIGNUM *a, unsigned char *d) { int bits; int num=0; @@ -90,10 +88,7 @@ unsigned char *d; return(num+4+ext); } -BIGNUM *BN_mpi2bn(d,n,a) -unsigned char *d; -int n; -BIGNUM *a; +BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a) { long len; int neg=0; @@ -103,7 +98,7 @@ BIGNUM *a; BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH); return(NULL); } - len=(d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3]; + len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3]; if ((len+4) != n) { BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR); diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index d0c04e1d4b..fd598b8b3d 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c @@ -56,154 +56,1108 @@ * [including the GNU Public Licence.] */ +#ifndef BN_DEBUG +# undef NDEBUG /* avoid conflicting definitions */ +# define NDEBUG +#endif + #include +#include #include "cryptlib.h" #include "bn_lcl.h" -/* r must be different to a and b */ -/* int BN_mmul(r, a, b) */ -int BN_mul(r, a, b) -BIGNUM *r; -BIGNUM *a; -BIGNUM *b; +#if defined(OPENSSL_NO_ASM) || !(defined(__i386) || defined(__i386__))/* Assembler implementation exists only for x86 */ +/* Here follows specialised variants of bn_add_words() and + bn_sub_words(). They have the property performing operations on + arrays of different sizes. The sizes of those arrays is expressed through + cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, + which is the delta between the two lengths, calculated as len(a)-len(b). + All lengths are the number of BN_ULONGs... For the operations that require + a result array as parameter, it must have the length cl+abs(dl). + These functions should probably end up in bn_asm.c as soon as there are + assembler counterparts for the systems that use assembler files. */ + +BN_ULONG bn_sub_part_words(BN_ULONG *r, + const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl) { - int i; - int max,al,bl; - BN_ULONG *ap,*bp,*rp; + BN_ULONG c, t; - al=a->top; - bl=b->top; - if ((al == 0) || (bl == 0)) + assert(cl >= 0); + c = bn_sub_words(r, a, b, cl); + + if (dl == 0) + return c; + + r += cl; + a += cl; + b += cl; + + if (dl < 0) { - r->top=0; - return(1); - } +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); +#endif + for (;;) + { + t = b[0]; + r[0] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; + + t = b[1]; + r[1] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; + + t = b[2]; + r[2] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; - max=(al+bl); - if (bn_wexpand(r,max) == NULL) return(0); - r->top=max; - r->neg=a->neg^b->neg; - ap=a->d; - bp=b->d; - rp=r->d; + t = b[3]; + r[3] = (0-t-c)&BN_MASK2; + if (t != 0) c=1; + if (++dl >= 0) break; - rp[al]=bn_mul_words(rp,ap,al,*(bp++)); - rp++; - for (i=1; i 0, c = %d)\n", cl, dl, c); +#endif + while(c) + { + t = a[0]; + r[0] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + t = a[1]; + r[1] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + t = a[2]; + r[2] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + t = a[3]; + r[3] = (t-c)&BN_MASK2; + if (t != 0) c=0; + if (--dl <= 0) break; + + save_dl = dl; + a += 4; + r += 4; + } + if (dl > 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); +#endif + if (save_dl > dl) + { + switch (save_dl - dl) + { + case 1: + r[1] = a[1]; + if (--dl <= 0) break; + case 2: + r[2] = a[2]; + if (--dl <= 0) break; + case 3: + r[3] = a[3]; + if (--dl <= 0) break; + } + a += 4; + r += 4; + } + } + if (dl > 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl); +#endif + for(;;) + { + r[0] = a[0]; + if (--dl <= 0) break; + r[1] = a[1]; + if (--dl <= 0) break; + r[2] = a[2]; + if (--dl <= 0) break; + r[3] = a[3]; + if (--dl <= 0) break; + + a += 4; + r += 4; + } + } } - if (r->d[max-1] == 0) r->top--; - return(1); + return c; } +#endif -#if 0 -#include "stack.h" +BN_ULONG bn_add_part_words(BN_ULONG *r, + const BN_ULONG *a, const BN_ULONG *b, + int cl, int dl) + { + BN_ULONG c, l, t; + + assert(cl >= 0); + c = bn_add_words(r, a, b, cl); + + if (dl == 0) + return c; + + r += cl; + a += cl; + b += cl; + + if (dl < 0) + { + int save_dl = dl; +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); +#endif + while (c) + { + l=(c+b[0])&BN_MASK2; + c=(l < c); + r[0]=l; + if (++dl >= 0) break; + + l=(c+b[1])&BN_MASK2; + c=(l < c); + r[1]=l; + if (++dl >= 0) break; + + l=(c+b[2])&BN_MASK2; + c=(l < c); + r[2]=l; + if (++dl >= 0) break; -int limit=16; + l=(c+b[3])&BN_MASK2; + c=(l < c); + r[3]=l; + if (++dl >= 0) break; -typedef struct bn_pool_st + save_dl = dl; + b+=4; + r+=4; + } + if (dl < 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl); +#endif + if (save_dl < dl) + { + switch (dl - save_dl) + { + case 1: + r[1] = b[1]; + if (++dl >= 0) break; + case 2: + r[2] = b[2]; + if (++dl >= 0) break; + case 3: + r[3] = b[3]; + if (++dl >= 0) break; + } + b += 4; + r += 4; + } + } + if (dl < 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl); +#endif + for(;;) + { + r[0] = b[0]; + if (++dl >= 0) break; + r[1] = b[1]; + if (++dl >= 0) break; + r[2] = b[2]; + if (++dl >= 0) break; + r[3] = b[3]; + if (++dl >= 0) break; + + b += 4; + r += 4; + } + } + } + else + { + int save_dl = dl; +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl); +#endif + while (c) + { + t=(a[0]+c)&BN_MASK2; + c=(t < c); + r[0]=t; + if (--dl <= 0) break; + + t=(a[1]+c)&BN_MASK2; + c=(t < c); + r[1]=t; + if (--dl <= 0) break; + + t=(a[2]+c)&BN_MASK2; + c=(t < c); + r[2]=t; + if (--dl <= 0) break; + + t=(a[3]+c)&BN_MASK2; + c=(t < c); + r[3]=t; + if (--dl <= 0) break; + + save_dl = dl; + a+=4; + r+=4; + } +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); +#endif + if (dl > 0) + { + if (save_dl > dl) + { + switch (save_dl - dl) + { + case 1: + r[1] = a[1]; + if (--dl <= 0) break; + case 2: + r[2] = a[2]; + if (--dl <= 0) break; + case 3: + r[3] = a[3]; + if (--dl <= 0) break; + } + a += 4; + r += 4; + } + } + if (dl > 0) + { +#ifdef BN_COUNT + fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl); +#endif + for(;;) + { + r[0] = a[0]; + if (--dl <= 0) break; + r[1] = a[1]; + if (--dl <= 0) break; + r[2] = a[2]; + if (--dl <= 0) break; + r[3] = a[3]; + if (--dl <= 0) break; + + a += 4; + r += 4; + } + } + } + return c; + } + +#ifdef BN_RECURSION +/* Karatsuba recursive multiplication algorithm + * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ + +/* r is 2*n2 words in size, + * a and b are both n2 words in size. + * n2 must be a power of 2. + * We multiply and return the result. + * t must be 2*n2 words in size + * We calculate + * a[0]*b[0] + * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) + * a[1]*b[1] + */ +void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, + int dna, int dnb, BN_ULONG *t) { - int used; - int tos; - STACK *sk; - } BN_POOL; + int n=n2/2,c1,c2; + int tna=n+dna, tnb=n+dnb; + unsigned int neg,zero; + BN_ULONG ln,lo,*p; + +# ifdef BN_COUNT + fprintf(stderr," bn_mul_recursive %d * %d\n",n2,n2); +# endif +# ifdef BN_MUL_COMBA +# if 0 + if (n2 == 4) + { + bn_mul_comba4(r,a,b); + return; + } +# endif + /* Only call bn_mul_comba 8 if n2 == 8 and the + * two arrays are complete [steve] + */ + if (n2 == 8 && dna == 0 && dnb == 0) + { + bn_mul_comba8(r,a,b); + return; + } +# endif /* BN_MUL_COMBA */ + /* Else do normal multiply */ + if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) + { + bn_mul_normal(r,a,n2+dna,b,n2+dnb); + if ((dna + dnb) < 0) + memset(&r[2*n2 + dna + dnb], 0, + sizeof(BN_ULONG) * -(dna + dnb)); + return; + } + /* r=(a[0]-a[1])*(b[1]-b[0]) */ + c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); + c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); + zero=neg=0; + switch (c1*3+c2) + { + case -4: + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + break; + case -3: + zero=1; + break; + case -2: + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ + neg=1; + break; + case -1: + case 0: + case 1: + zero=1; + break; + case 2: + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + neg=1; + break; + case 3: + zero=1; + break; + case 4: + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); + break; + } + +# ifdef BN_MUL_COMBA + if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take + extra args to do this well */ + { + if (!zero) + bn_mul_comba4(&(t[n2]),t,&(t[n])); + else + memset(&(t[n2]),0,8*sizeof(BN_ULONG)); + + bn_mul_comba4(r,a,b); + bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); + } + else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could + take extra args to do this + well */ + { + if (!zero) + bn_mul_comba8(&(t[n2]),t,&(t[n])); + else + memset(&(t[n2]),0,16*sizeof(BN_ULONG)); + + bn_mul_comba8(r,a,b); + bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n])); + } + else +# endif /* BN_MUL_COMBA */ + { + p= &(t[n2*2]); + if (!zero) + bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); + else + memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); + bn_mul_recursive(r,a,b,n,0,0,p); + bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p); + } -BIGNUM *BN_POOL_push(bp) -BN_POOL *bp; + /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + */ + + c1=(int)(bn_add_words(t,r,&(r[n2]),n2)); + + if (neg) /* if t[32] is negative */ + { + c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2)); + } + else + { + /* Might have a carry */ + c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2)); + } + + /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + * c1 holds the carry bits + */ + c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2)); + if (c1) + { + p= &(r[n+n2]); + lo= *p; + ln=(lo+c1)&BN_MASK2; + *p=ln; + + /* The overflow will stop before we over write + * words we should not overwrite */ + if (ln < (BN_ULONG)c1) + { + do { + p++; + lo= *p; + ln=(lo+1)&BN_MASK2; + *p=ln; + } while (ln == 0); + } + } + } + +/* n+tn is the word length + * t needs to be n*4 is size, as does r */ +void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, + int tna, int tnb, BN_ULONG *t) { - BIGNUM *ret; + int i,j,n2=n*2; + unsigned int c1,c2,neg,zero; + BN_ULONG ln,lo,*p; + +# ifdef BN_COUNT + fprintf(stderr," bn_mul_part_recursive (%d+%d) * (%d+%d)\n", + tna, n, tnb, n); +# endif + if (n < 8) + { + bn_mul_normal(r,a,n+tna,b,n+tnb); + return; + } - if (bp->used >= bp->tos) + /* r=(a[0]-a[1])*(b[1]-b[0]) */ + c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); + c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); + zero=neg=0; + switch (c1*3+c2) + { + case -4: + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + break; + case -3: + zero=1; + /* break; */ + case -2: + bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ + neg=1; + break; + case -1: + case 0: + case 1: + zero=1; + /* break; */ + case 2: + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ + bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ + neg=1; + break; + case 3: + zero=1; + /* break; */ + case 4: + bn_sub_part_words(t, a, &(a[n]),tna,n-tna); + bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); + break; + } + /* The zero case isn't yet implemented here. The speedup + would probably be negligible. */ +# if 0 + if (n == 4) { - ret=BN_new(); - sk_push(bp->sk,(char *)ret); - bp->tos++; - bp->used++; + bn_mul_comba4(&(t[n2]),t,&(t[n])); + bn_mul_comba4(r,a,b); + bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); + memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); } else +# endif + if (n == 8) { - ret=(BIGNUM *)sk_value(bp->sk,bp->used); - bp->used++; + bn_mul_comba8(&(t[n2]),t,&(t[n])); + bn_mul_comba8(r,a,b); + bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); + memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb)); + } + else + { + p= &(t[n2*2]); + bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); + bn_mul_recursive(r,a,b,n,0,0,p); + i=n/2; + /* If there is only a bottom half to the number, + * just do it */ + if (tna > tnb) + j = tna - i; + else + j = tnb - i; + if (j == 0) + { + bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]), + i,tna-i,tnb-i,p); + memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); + } + else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ + { + bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), + i,tna-i,tnb-i,p); + memset(&(r[n2+tna+tnb]),0, + sizeof(BN_ULONG)*(n2-tna-tnb)); + } + else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ + { + memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); + if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL + && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) + { + bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); + } + else + { + for (;;) + { + i/=2; + if (i < tna && i < tnb) + { + bn_mul_part_recursive(&(r[n2]), + &(a[n]),&(b[n]), + i,tna-i,tnb-i,p); + break; + } + else if (i <= tna && i <= tnb) + { + bn_mul_recursive(&(r[n2]), + &(a[n]),&(b[n]), + i,tna-i,tnb-i,p); + break; + } + } + } + } + } + + /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + */ + + c1=(int)(bn_add_words(t,r,&(r[n2]),n2)); + + if (neg) /* if t[32] is negative */ + { + c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2)); + } + else + { + /* Might have a carry */ + c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2)); + } + + /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + * c1 holds the carry bits + */ + c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2)); + if (c1) + { + p= &(r[n+n2]); + lo= *p; + ln=(lo+c1)&BN_MASK2; + *p=ln; + + /* The overflow will stop before we over write + * words we should not overwrite */ + if (ln < c1) + { + do { + p++; + lo= *p; + ln=(lo+1)&BN_MASK2; + *p=ln; + } while (ln == 0); + } } - return(ret); } -void BN_POOL_pop(bp,num) -BN_POOL *bp; -int num; +/* a and b must be the same size, which is n2. + * r needs to be n2 words and t needs to be n2*2 + */ +void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, + BN_ULONG *t) { - bp->used-=num; + int n=n2/2; + +# ifdef BN_COUNT + fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2); +# endif + + bn_mul_recursive(r,a,b,n,0,0,&(t[0])); + if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) + { + bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); + bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); + bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2])); + bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); + } + else + { + bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n); + bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n); + bn_add_words(&(r[n]),&(r[n]),&(t[0]),n); + bn_add_words(&(r[n]),&(r[n]),&(t[n]),n); + } } -int BN_mul(r,a,b) -BIGNUM *r,*a,*b; +/* a and b must be the same size, which is n2. + * r needs to be n2 words and t needs to be n2*2 + * l is the low words of the output. + * t needs to be n2*3 + */ +void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, + BN_ULONG *t) { - static BN_POOL bp; - static init=1; + int i,n; + int c1,c2; + int neg,oneg,zero; + BN_ULONG ll,lc,*lp,*mp; + +# ifdef BN_COUNT + fprintf(stderr," bn_mul_high %d * %d\n",n2,n2); +# endif + n=n2/2; + + /* Calculate (al-ah)*(bh-bl) */ + neg=zero=0; + c1=bn_cmp_words(&(a[0]),&(a[n]),n); + c2=bn_cmp_words(&(b[n]),&(b[0]),n); + switch (c1*3+c2) + { + case -4: + bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n); + bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n); + break; + case -3: + zero=1; + break; + case -2: + bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n); + bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n); + neg=1; + break; + case -1: + case 0: + case 1: + zero=1; + break; + case 2: + bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n); + bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n); + neg=1; + break; + case 3: + zero=1; + break; + case 4: + bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n); + bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n); + break; + } + + oneg=neg; + /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ + /* r[10] = (a[1]*b[1]) */ +# ifdef BN_MUL_COMBA + if (n == 8) + { + bn_mul_comba8(&(t[0]),&(r[0]),&(r[n])); + bn_mul_comba8(r,&(a[n]),&(b[n])); + } + else +# endif + { + bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2])); + bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2])); + } + + /* s0 == low(al*bl) + * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) + * We know s0 and s1 so the only unknown is high(al*bl) + * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) + * high(al*bl) == s1 - (r[0]+l[0]+t[0]) + */ + if (l != NULL) + { + lp= &(t[n2+n]); + c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n)); + } + else + { + c1=0; + lp= &(r[0]); + } + + if (neg) + neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n)); + else + { + bn_add_words(&(t[n2]),lp,&(t[0]),n); + neg=0; + } + + if (l != NULL) + { + bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n); + } + else + { + lp= &(t[n2+n]); + mp= &(t[n2]); + for (i=0; i 0) + { + lc=c1; + do { + ll=(r[i]+lc)&BN_MASK2; + r[i++]=ll; + lc=(lc > ll); + } while (lc); + } + else + { + lc= -c1; + do { + ll=r[i]; + r[i++]=(ll-lc)&BN_MASK2; + lc=(lc > ll); + } while (lc); + } + } + if (c2 != 0) /* Add starting at r[1] */ + { + i=n; + if (c2 > 0) + { + lc=c2; + do { + ll=(r[i]+lc)&BN_MASK2; + r[i++]=ll; + lc=(lc > ll); + } while (lc); + } + else + { + lc= -c2; + do { + ll=r[i]; + r[i++]=(ll-lc)&BN_MASK2; + lc=(lc > ll); + } while (lc); + } } - return(BN_mm(r,a,b,&bp)); } +#endif /* BN_RECURSION */ -/* r must be different to a and b */ -int BN_mm(m, A, B, bp) -BIGNUM *m,*A,*B; -BN_POOL *bp; +int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { - int i,num; - int an,bn; - BIGNUM *a,*b,*c,*d,*ac,*bd; + int ret=0; + int top,al,bl; + BIGNUM *rr; +#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) + int i; +#endif +#ifdef BN_RECURSION + BIGNUM *t=NULL; + int j=0,k; +#endif + +#ifdef BN_COUNT + fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top); +#endif - an=A->top; - bn=B->top; - if ((an <= limit) || (bn <= limit)) + bn_check_top(a); + bn_check_top(b); + bn_check_top(r); + + al=a->top; + bl=b->top; + + if ((al == 0) || (bl == 0)) { - return(BN_mmul(m,A,B)); + if (!BN_zero(r)) goto err; + return(1); } + top=al+bl; - a=BN_POOL_push(bp); - b=BN_POOL_push(bp); - c=BN_POOL_push(bp); - d=BN_POOL_push(bp); - ac=BN_POOL_push(bp); - bd=BN_POOL_push(bp); + BN_CTX_start(ctx); + if ((r == a) || (r == b)) + { + if ((rr = BN_CTX_get(ctx)) == NULL) goto err; + } + else + rr = r; + rr->neg=a->neg^b->neg; - num=(an <= bn)?an:bn; - num=1<<(BN_num_bits_word(num-1)-1); +#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) + i = al-bl; +#endif +#ifdef BN_MUL_COMBA + if (i == 0) + { +# if 0 + if (al == 4) + { + if (bn_wexpand(rr,8) == NULL) goto err; + rr->top=8; + bn_mul_comba4(rr->d,a->d,b->d); + goto end; + } +# endif + if (al == 8) + { + if (bn_wexpand(rr,16) == NULL) goto err; + rr->top=16; + bn_mul_comba8(rr->d,a->d,b->d); + goto end; + } + } +#endif /* BN_MUL_COMBA */ +#ifdef BN_RECURSION + if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) + { + if (i >= -1 && i <= 1) + { + int sav_j =0; + /* Find out the power of two lower or equal + to the longest of the two numbers */ + if (i >= 0) + { + j = BN_num_bits_word((BN_ULONG)al); + } + if (i == -1) + { + j = BN_num_bits_word((BN_ULONG)bl); + } + sav_j = j; + j = 1<<(j-1); + assert(j <= al || j <= bl); + k = j+j; + t = BN_CTX_get(ctx); + if (al > j || bl > j) + { + bn_wexpand(t,k*4); + bn_wexpand(rr,k*4); + bn_mul_part_recursive(rr->d,a->d,b->d, + j,al-j,bl-j,t->d); + } + else /* al <= j || bl <= j */ + { + bn_wexpand(t,k*2); + bn_wexpand(rr,k*2); + bn_mul_recursive(rr->d,a->d,b->d, + j,al-j,bl-j,t->d); + } + rr->top=top; + goto end; + } +#if 0 + if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA)) + { + BIGNUM *tmp_bn = (BIGNUM *)b; + if (bn_wexpand(tmp_bn,al) == NULL) goto err; + tmp_bn->d[bl]=0; + bl++; + i--; + } + else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA)) + { + BIGNUM *tmp_bn = (BIGNUM *)a; + if (bn_wexpand(tmp_bn,bl) == NULL) goto err; + tmp_bn->d[al]=0; + al++; + i++; + } + if (i == 0) + { + /* symmetric and > 4 */ + /* 16 or larger */ + j=BN_num_bits_word((BN_ULONG)al); + j=1<<(j-1); + k=j+j; + t = BN_CTX_get(ctx); + if (al == j) /* exact multiple */ + { + if (bn_wexpand(t,k*2) == NULL) goto err; + if (bn_wexpand(rr,k*2) == NULL) goto err; + bn_mul_recursive(rr->d,a->d,b->d,al,t->d); + } + else + { + if (bn_wexpand(t,k*4) == NULL) goto err; + if (bn_wexpand(rr,k*4) == NULL) goto err; + bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); + } + rr->top=top; + goto end; + } +#endif + } +#endif /* BN_RECURSION */ + if (bn_wexpand(rr,top) == NULL) goto err; + rr->top=top; + bn_mul_normal(rr->d,a->d,al,b->d,bl); - /* Are going to now chop things into 'num' word chunks. */ - num*=BN_BITS2; +#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) +end: +#endif + bn_fix_top(rr); + if (r != rr) BN_copy(r,rr); + ret=1; +err: + BN_CTX_end(ctx); + return(ret); + } - BN_copy(a,A); - BN_mask_bits(a,num); - BN_rshift(b,A,num); +void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) + { + BN_ULONG *rr; - BN_copy(c,B); - BN_mask_bits(c,num); - BN_rshift(d,B,num); +#ifdef BN_COUNT + fprintf(stderr," bn_mul_normal %d * %d\n",na,nb); +#endif - BN_sub(ac ,b,a); - BN_sub(bd,c,d); - BN_mm(m,ac,bd,bp); - BN_mm(ac,a,c,bp); - BN_mm(bd,b,d,bp); + if (na < nb) + { + int itmp; + BN_ULONG *ltmp; - BN_add(m,m,ac); - BN_add(m,m,bd); - BN_lshift(m,m,num); - BN_lshift(bd,bd,num*2); + itmp=na; na=nb; nb=itmp; + ltmp=a; a=b; b=ltmp; - BN_add(m,m,ac); - BN_add(m,m,bd); - BN_POOL_pop(bp,6); - return(1); + } + rr= &(r[na]); + if (nb <= 0) + { + (void)bn_mul_words(r,a,na,0); + return; + } + else + rr[0]=bn_mul_words(r,a,na,b[0]); + + for (;;) + { + if (--nb <= 0) return; + rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]); + if (--nb <= 0) return; + rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]); + if (--nb <= 0) return; + rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]); + if (--nb <= 0) return; + rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]); + rr+=4; + r+=4; + b+=4; + } } + +void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) + { +#ifdef BN_COUNT + fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n); #endif + bn_mul_words(r,a,n,b[0]); + + for (;;) + { + if (--n <= 0) return; + bn_mul_add_words(&(r[1]),a,n,b[1]); + if (--n <= 0) return; + bn_mul_add_words(&(r[2]),a,n,b[2]); + if (--n <= 0) return; + bn_mul_add_words(&(r[3]),a,n,b[3]); + if (--n <= 0) return; + bn_mul_add_words(&(r[4]),a,n,b[4]); + r+=4; + b+=4; + } + } diff --git a/src/lib/libcrypto/bn/bn_prime.c b/src/lib/libcrypto/bn/bn_prime.c index 0c85f70b59..918b9237c6 100644 --- a/src/lib/libcrypto/bn/bn_prime.c +++ b/src/lib/libcrypto/bn/bn_prime.c @@ -55,53 +55,100 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ #include #include #include "cryptlib.h" #include "bn_lcl.h" -#include "rand.h" +#include -/* The quick seive algorithm approach to weeding out primes is +/* The quick sieve algorithm approach to weeding out primes is * Philip Zimmermann's, as implemented in PGP. I have had a read of * his comments and implemented my own version. */ #include "bn_prime.h" -#ifndef NOPROTO -static int witness(BIGNUM *a, BIGNUM *n, BN_CTX *ctx,BN_CTX *ctx2, - BN_MONT_CTX *mont); +static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1, + const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont); static int probable_prime(BIGNUM *rnd, int bits); static int probable_prime_dh(BIGNUM *rnd, int bits, - BIGNUM *add, BIGNUM *rem, BN_CTX *ctx); -static int probable_prime_dh_strong(BIGNUM *rnd, int bits, - BIGNUM *add, BIGNUM *rem, BN_CTX *ctx); -#else -static int witness(); -static int probable_prime(); -static int probable_prime_dh(); -static int probable_prime_dh_strong(); -#endif - -BIGNUM *BN_generate_prime(bits,strong,add,rem,callback,cb_arg) -int bits; -int strong; -BIGNUM *add; -BIGNUM *rem; -void (*callback)(P_I_I_P); -char *cb_arg; + const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx); +static int probable_prime_dh_safe(BIGNUM *rnd, int bits, + const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx); + +BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe, + const BIGNUM *add, const BIGNUM *rem, + void (*callback)(int,int,void *), void *cb_arg) { BIGNUM *rnd=NULL; - BIGNUM *ret=NULL; - BIGNUM *t=NULL; + BIGNUM t; + int found=0; int i,j,c1=0; BN_CTX *ctx; + int checks = BN_prime_checks_for_size(bits); ctx=BN_CTX_new(); if (ctx == NULL) goto err; - if ((rnd=BN_new()) == NULL) goto err; - if (strong) - if ((t=BN_new()) == NULL) goto err; + if (ret == NULL) + { + if ((rnd=BN_new()) == NULL) goto err; + } + else + rnd=ret; + BN_init(&t); loop: /* make a random number and set the top and bottom bits */ if (add == NULL) @@ -110,9 +157,9 @@ loop: } else { - if (strong) + if (safe) { - if (!probable_prime_dh_strong(rnd,bits,add,rem,ctx)) + if (!probable_prime_dh_safe(rnd,bits,add,rem,ctx)) goto err; } else @@ -124,171 +171,188 @@ loop: /* if (BN_mod_word(rnd,(BN_ULONG)3) == 1) goto loop; */ if (callback != NULL) callback(0,c1++,cb_arg); - if (!strong) + if (!safe) { - i=BN_is_prime(rnd,BN_prime_checks,callback,ctx,cb_arg); + i=BN_is_prime_fasttest(rnd,checks,callback,ctx,cb_arg,0); if (i == -1) goto err; if (i == 0) goto loop; } else { - /* for a strong prime generation, + /* for "safe prime" generation, * check that (p-1)/2 is prime. * Since a prime is odd, We just * need to divide by 2 */ - if (!BN_rshift1(t,rnd)) goto err; + if (!BN_rshift1(&t,rnd)) goto err; - for (i=0; ibn[ctx->tos++]; + if ((ctx=BN_CTX_new()) == NULL) + goto err; + BN_CTX_start(ctx); - /* Setup the montgomery structure */ - if (!BN_MONT_CTX_set(mont,a,ctx2)) goto err; + /* A := abs(a) */ + if (a->neg) + { + BIGNUM *t; + if ((t = BN_CTX_get(ctx)) == NULL) goto err; + BN_copy(t, a); + t->neg = 0; + A = t; + } + else + A = a; + A1 = BN_CTX_get(ctx); + A1_odd = BN_CTX_get(ctx); + check = BN_CTX_get(ctx); + if (check == NULL) goto err; + + /* compute A1 := A - 1 */ + if (!BN_copy(A1, A)) + goto err; + if (!BN_sub_word(A1, 1)) + goto err; + if (BN_is_zero(A1)) + { + ret = 0; + goto err; + } - for (i=0; itos--; - if ((ctx_passed == NULL) && (ctx != NULL)) - BN_CTX_free(ctx); - if (ctx2 != NULL) - BN_CTX_free(ctx2); - if (mont != NULL) BN_MONT_CTX_free(mont); - + if (ctx != NULL) + { + BN_CTX_end(ctx); + if (ctx_passed == NULL) + BN_CTX_free(ctx); + } + if (mont != NULL) + BN_MONT_CTX_free(mont); + return(ret); } -#define RECP_MUL_MOD - -static int witness(a,n,ctx,ctx2,mont) -BIGNUM *a; -BIGNUM *n; -BN_CTX *ctx,*ctx2; -BN_MONT_CTX *mont; +static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1, + const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont) { - int k,i,ret= -1,good; - BIGNUM *d,*dd,*tmp,*d1,*d2,*n1; - BIGNUM *mont_one,*mont_n1,*mont_a; - - d1=ctx->bn[ctx->tos]; - d2=ctx->bn[ctx->tos+1]; - n1=ctx->bn[ctx->tos+2]; - ctx->tos+=3; - - mont_one=ctx2->bn[ctx2->tos]; - mont_n1=ctx2->bn[ctx2->tos+1]; - mont_a=ctx2->bn[ctx2->tos+2]; - ctx2->tos+=3; - - d=d1; - dd=d2; - if (!BN_one(d)) goto err; - if (!BN_sub(n1,n,d)) goto err; /* n1=n-1; */ - k=BN_num_bits(n1); - - if (!BN_to_montgomery(mont_one,BN_value_one(),mont,ctx2)) goto err; - if (!BN_to_montgomery(mont_n1,n1,mont,ctx2)) goto err; - if (!BN_to_montgomery(mont_a,a,mont,ctx2)) goto err; - - BN_copy(d,mont_one); - for (i=k-1; i>=0; i--) + if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */ + return -1; + if (BN_is_one(w)) + return 0; /* probably prime */ + if (BN_cmp(w, a1) == 0) + return 0; /* w == -1 (mod a), 'a' is probably prime */ + while (--k) { - if ( (BN_cmp(d,mont_one) != 0) && - (BN_cmp(d,mont_n1) != 0)) - good=1; - else - good=0; - - BN_mod_mul_montgomery(dd,d,d,mont,ctx2); - - if (good && (BN_cmp(dd,mont_one) == 0)) - { - ret=1; - goto err; - } - if (BN_is_bit_set(n1,i)) - { - BN_mod_mul_montgomery(d,dd,mont_a,mont,ctx2); - } - else - { - tmp=d; - d=dd; - dd=tmp; - } + if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */ + return -1; + if (BN_is_one(w)) + return 1; /* 'a' is composite, otherwise a previous 'w' would + * have been == -1 (mod 'a') */ + if (BN_cmp(w, a1) == 0) + return 0; /* w == -1 (mod a), 'a' is probably prime */ } - if (BN_cmp(d,mont_one) == 0) - i=0; - else i=1; - ret=i; -err: - ctx->tos-=3; - ctx2->tos-=3; - return(ret); + /* If we get here, 'w' is the (a-1)/2-th power of the original 'w', + * and it is neither -1 nor +1 -- so 'a' cannot be prime */ + return 1; } -static int probable_prime(rnd, bits) -BIGNUM *rnd; -int bits; +static int probable_prime(BIGNUM *rnd, int bits) { int i; - MS_STATIC BN_ULONG mods[NUMPRIMES]; - BN_ULONG delta; + BN_ULONG mods[NUMPRIMES]; + BN_ULONG delta,d; +again: if (!BN_rand(rnd,bits,1,1)) return(0); /* we now have a random number 'rand' to test. */ for (i=1; ibn[ctx->tos++]; + BN_CTX_start(ctx); + if ((t1 = BN_CTX_get(ctx)) == NULL) goto err; if (!BN_rand(rnd,bits,0,1)) goto err; @@ -338,7 +402,7 @@ BN_CTX *ctx; loop: for (i=1; itos--; + BN_CTX_end(ctx); return(ret); } -static int probable_prime_dh_strong(p, bits, padd, rem,ctx) -BIGNUM *p; -int bits; -BIGNUM *padd; -BIGNUM *rem; -BN_CTX *ctx; +static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, + const BIGNUM *rem, BN_CTX *ctx) { int i,ret=0; - BIGNUM *t1,*qadd=NULL,*q=NULL; + BIGNUM *t1,*qadd,*q; bits--; - t1=ctx->bn[ctx->tos++]; - q=ctx->bn[ctx->tos++]; - qadd=ctx->bn[ctx->tos++]; + BN_CTX_start(ctx); + t1 = BN_CTX_get(ctx); + q = BN_CTX_get(ctx); + qadd = BN_CTX_get(ctx); + if (qadd == NULL) goto err; if (!BN_rshift1(qadd,padd)) goto err; @@ -389,8 +451,8 @@ BN_CTX *ctx; /* check that p and q are prime */ /* check that for p and q * gcd(p-1,primes) == 1 (except for 2) */ - if ( (BN_mod_word(p,(BN_LONG)primes[i]) == 0) || - (BN_mod_word(q,(BN_LONG)primes[i]) == 0)) + if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) || + (BN_mod_word(q,(BN_ULONG)primes[i]) == 0)) { if (!BN_add(p,p,padd)) goto err; if (!BN_add(q,q,qadd)) goto err; @@ -399,75 +461,6 @@ BN_CTX *ctx; } ret=1; err: - ctx->tos-=3; - return(ret); - } - -#if 0 -static int witness(a, n,ctx) -BIGNUM *a; -BIGNUM *n; -BN_CTX *ctx; - { - int k,i,nb,ret= -1; - BIGNUM *d,*dd,*tmp; - BIGNUM *d1,*d2,*x,*n1,*inv; - - d1=ctx->bn[ctx->tos]; - d2=ctx->bn[ctx->tos+1]; - x=ctx->bn[ctx->tos+2]; - n1=ctx->bn[ctx->tos+3]; - inv=ctx->bn[ctx->tos+4]; - ctx->tos+=5; - - d=d1; - dd=d2; - if (!BN_one(d)) goto err; - if (!BN_sub(n1,n,d)) goto err; /* n1=n-1; */ - k=BN_num_bits(n1); - - /* i=BN_num_bits(n); */ -#ifdef RECP_MUL_MOD - nb=BN_reciprocal(inv,n,ctx); /**/ - if (nb == -1) goto err; -#endif - - for (i=k-1; i>=0; i--) - { - if (BN_copy(x,d) == NULL) goto err; -#ifndef RECP_MUL_MOD - if (!BN_mod_mul(dd,d,d,n,ctx)) goto err; -#else - if (!BN_mod_mul_reciprocal(dd,d,d,n,inv,nb,ctx)) goto err; -#endif - if ( BN_is_one(dd) && - !BN_is_one(x) && - (BN_cmp(x,n1) != 0)) - { - ret=1; - goto err; - } - if (BN_is_bit_set(n1,i)) - { -#ifndef RECP_MUL_MOD - if (!BN_mod_mul(d,dd,a,n,ctx)) goto err; -#else - if (!BN_mod_mul_reciprocal(d,dd,a,n,inv,nb,ctx)) goto err; -#endif - } - else - { - tmp=d; - d=dd; - dd=tmp; - } - } - if (BN_is_one(d)) - i=0; - else i=1; - ret=i; -err: - ctx->tos-=5; + BN_CTX_end(ctx); return(ret); } -#endif diff --git a/src/lib/libcrypto/bn/bn_prime.h b/src/lib/libcrypto/bn/bn_prime.h index 6fce0210cd..b7cf9a9bfe 100644 --- a/src/lib/libcrypto/bn/bn_prime.h +++ b/src/lib/libcrypto/bn/bn_prime.h @@ -1,4 +1,4 @@ -/* crypto/bn/bn_prime.h */ +/* Auto generated by bn_prime.pl */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -61,7 +61,7 @@ #else #define NUMPRIMES 54 #endif -static unsigned int primes[NUMPRIMES]= +static const unsigned int primes[NUMPRIMES]= { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, diff --git a/src/lib/libcrypto/bn/bn_prime.pl b/src/lib/libcrypto/bn/bn_prime.pl index 1b00c21a77..9fc3765486 100644 --- a/src/lib/libcrypto/bn/bn_prime.pl +++ b/src/lib/libcrypto/bn/bn_prime.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/local/bin/perl # bn_prime.pl $num=2048; @@ -18,13 +18,74 @@ loop: while ($#primes < $num-1) push(@primes,$p); } -print <<"EOF"; +# print <<"EOF"; +# /* Auto generated by bn_prime.pl */ +# /* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au). +# * All rights reserved. +# * Copyright remains Eric Young's, and as such any Copyright notices in +# * the code are not to be removed. +# * See the COPYRIGHT file in the SSLeay distribution for more details. +# */ +# +# EOF + +print <<\EOF; /* Auto generated by bn_prime.pl */ -/* Copyright (C) 1995-1997 Eric Young (eay\@mincom.oz.au). +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * * Copyright remains Eric Young's, and as such any Copyright notices in * the code are not to be removed. - * See the COPYRIGHT file in the SSLeay distribution for more details. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] */ EOF @@ -43,7 +104,7 @@ printf "#define NUMPRIMES %d\n",$num; printf "#else\n"; printf "#define NUMPRIMES %d\n",$eight; printf "#endif\n"; -print "static unsigned int primes[NUMPRIMES]=\n\t{\n\t"; +print "static const unsigned int primes[NUMPRIMES]=\n\t{\n\t"; $init=0; for ($i=0; $i <= $#primes; $i++) { diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c index 2bcc11c852..5f46b1826c 100644 --- a/src/lib/libcrypto/bn/bn_print.c +++ b/src/lib/libcrypto/bn/bn_print.c @@ -59,20 +59,19 @@ #include #include #include "cryptlib.h" -#include "buffer.h" +#include #include "bn_lcl.h" -static char *Hex="0123456789ABCDEF"; +static const char *Hex="0123456789ABCDEF"; -/* Must 'Free' the returned data */ -char *BN_bn2hex(a) -BIGNUM *a; +/* Must 'OPENSSL_free' the returned data */ +char *BN_bn2hex(const BIGNUM *a) { int i,j,v,z=0; char *buf; char *p; - buf=(char *)Malloc(a->top*BN_BYTES*2+2); + buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2); if (buf == NULL) { BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE); @@ -100,9 +99,8 @@ err: return(buf); } -/* Must 'Free' the returned data */ -char *BN_bn2dec(a) -BIGNUM *a; +/* Must 'OPENSSL_free' the returned data */ +char *BN_bn2dec(const BIGNUM *a) { int i=0,num; char *buf=NULL; @@ -112,8 +110,8 @@ BIGNUM *a; i=BN_num_bits(a)*3; num=(i/10+i/1000+3)+1; - bn_data=(BN_ULONG *)Malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); - buf=(char *)Malloc(num+3); + bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG)); + buf=(char *)OPENSSL_malloc(num+3); if ((buf == NULL) || (bn_data == NULL)) { BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE); @@ -139,7 +137,7 @@ BIGNUM *a; } lp--; /* We now have a series of blocks, BN_DEC_NUM chars - * in length, where the last one needs trucation. + * in length, where the last one needs truncation. * The blocks need to be reversed in order. */ sprintf(p,BN_DEC_FMT1,*lp); while (*p) p++; @@ -151,14 +149,12 @@ BIGNUM *a; } } err: - if (bn_data != NULL) Free(bn_data); + if (bn_data != NULL) OPENSSL_free(bn_data); if (t != NULL) BN_free(t); return(buf); } -int BN_hex2bn(bn,a) -BIGNUM **bn; -char *a; +int BN_hex2bn(BIGNUM **bn, const char *a) { BIGNUM *ret=NULL; BN_ULONG l=0; @@ -169,13 +165,13 @@ char *a; if (*a == '-') { neg=1; a++; } - for (i=0; isxdigit(a[i]); i++) + for (i=0; isxdigit((unsigned char) a[i]); i++) ; num=i+neg; if (bn == NULL) return(num); - /* a is the start of the hex digets, and it is 'i' long */ + /* a is the start of the hex digits, and it is 'i' long */ if (*bn == NULL) { if ((ret=BN_new()) == NULL) return(0); @@ -189,7 +185,7 @@ char *a; /* i is the number of hex digests; */ if (bn_expand(ret,i*4) == NULL) goto err; - j=i; /* least significate 'hex' */ + j=i; /* least significant 'hex' */ m=0; h=0; while (j > 0) @@ -224,9 +220,7 @@ err: return(0); } -int BN_dec2bn(bn,a) -BIGNUM **bn; -char *a; +int BN_dec2bn(BIGNUM **bn, const char *a) { BIGNUM *ret=NULL; BN_ULONG l=0; @@ -236,14 +230,14 @@ char *a; if ((a == NULL) || (*a == '\0')) return(0); if (*a == '-') { neg=1; a++; } - for (i=0; isdigit(a[i]); i++) + for (i=0; isdigit((unsigned char) a[i]); i++) ; num=i+neg; if (bn == NULL) return(num); - /* a is the start of the digets, and it is 'i' long. - * We chop it into BN_DEC_NUM digets at a time */ + /* a is the start of the digits, and it is 'i' long. + * We chop it into BN_DEC_NUM digits at a time */ if (*bn == NULL) { if ((ret=BN_new()) == NULL) return(0); @@ -283,12 +277,9 @@ err: return(0); } -#ifndef NO_BIO - -#ifndef NO_FP_API -int BN_print_fp(fp, a) -FILE *fp; -BIGNUM *a; +#ifndef OPENSSL_NO_BIO +#ifndef OPENSSL_NO_FP_API +int BN_print_fp(FILE *fp, const BIGNUM *a) { BIO *b; int ret; @@ -302,9 +293,7 @@ BIGNUM *a; } #endif -int BN_print(bp, a) -BIO *bp; -BIGNUM *a; +int BN_print(BIO *bp, const BIGNUM *a) { int i,j,v,z=0; int ret=0; @@ -329,5 +318,15 @@ BIGNUM *a; end: return(ret); } +#endif +#ifdef BN_DEBUG +void bn_dump1(FILE *o, const char *a, const BN_ULONG *b,int n) + { + int i; + fprintf(o, "%s=", a); + for (i=n-1;i>=0;i--) + fprintf(o, "%08lX", b[i]); /* assumes 32-bit BN_ULONG */ + fprintf(o, "\n"); + } #endif diff --git a/src/lib/libcrypto/bn/bn_rand.c b/src/lib/libcrypto/bn/bn_rand.c index 75b6b0493b..9e08ccd22e 100644 --- a/src/lib/libcrypto/bn/bn_rand.c +++ b/src/lib/libcrypto/bn/bn_rand.c @@ -55,28 +55,83 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ #include #include #include "cryptlib.h" #include "bn_lcl.h" -#include "rand.h" +#include -int BN_rand(rnd, bits, top, bottom) -BIGNUM *rnd; -int bits; -int top; -int bottom; +static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) { unsigned char *buf=NULL; int ret=0,bit,bytes,mask; time_t tim; + if (bits == 0) + { + BN_zero(rnd); + return 1; + } + bytes=(bits+7)/8; bit=(bits-1)%8; - mask=0xff<= 128 && i > 0) + buf[i] = buf[i-1]; + else if (c < 42) + buf[i] = 0; + else if (c < 84) + buf[i] = 255; + } + } +#endif + + if (top != -1) + { + if (top) + { + if (bit == 0) + { + buf[0]=1; + buf[1]|=0x80; + } + else + { + buf[0]|=(3<<(bit-1)); + } } else { - buf[0]|=(3<<(bit-1)); - buf[0]&= ~(mask<<1); + buf[0]|=(1<neg || BN_is_zero(range)) + { + BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE); + return 0; + } + + n = BN_num_bits(range); /* n > 0 */ + + /* BN_is_bit_set(range, n - 1) always holds */ + + if (n == 1) + { + if (!BN_zero(r)) return 0; + } + else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) + { + /* range = 100..._2, + * so 3*range (= 11..._2) is exactly one bit longer than range */ + do + { + if (!bn_rand(r, n + 1, -1, 0)) return 0; + /* If r < 3*range, use r := r MOD range + * (which is either r, r - range, or r - 2*range). + * Otherwise, iterate once more. + * Since 3*range = 11..._2, each iteration succeeds with + * probability >= .75. */ + if (BN_cmp(r ,range) >= 0) + { + if (!BN_sub(r, r, range)) return 0; + if (BN_cmp(r, range) >= 0) + if (!BN_sub(r, r, range)) return 0; + } + } + while (BN_cmp(r, range) >= 0); + } + else + { + do + { + /* range = 11..._2 or range = 101..._2 */ + if (!bn_rand(r, n, -1, 0)) return 0; + } + while (BN_cmp(r, range) >= 0); + } + + return 1; + } + + +int BN_rand_range(BIGNUM *r, BIGNUM *range) + { + return bn_rand_range(0, r, range); + } + +int BN_pseudo_rand_range(BIGNUM *r, BIGNUM *range) + { + return bn_rand_range(1, r, range); + } diff --git a/src/lib/libcrypto/bn/bn_recp.c b/src/lib/libcrypto/bn/bn_recp.c index 72cd69d3fc..ef5fdd4708 100644 --- a/src/lib/libcrypto/bn/bn_recp.c +++ b/src/lib/libcrypto/bn/bn_recp.c @@ -60,66 +60,171 @@ #include "cryptlib.h" #include "bn_lcl.h" -int BN_mod_mul_reciprocal(r, x, y, m, i, nb, ctx) -BIGNUM *r; -BIGNUM *x; -BIGNUM *y; -BIGNUM *m; -BIGNUM *i; -int nb; -BN_CTX *ctx; +void BN_RECP_CTX_init(BN_RECP_CTX *recp) { - int ret=0,j; - BIGNUM *a,*b,*c,*d; + BN_init(&(recp->N)); + BN_init(&(recp->Nr)); + recp->num_bits=0; + recp->flags=0; + } + +BN_RECP_CTX *BN_RECP_CTX_new(void) + { + BN_RECP_CTX *ret; + + if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL) + return(NULL); + + BN_RECP_CTX_init(ret); + ret->flags=BN_FLG_MALLOCED; + return(ret); + } + +void BN_RECP_CTX_free(BN_RECP_CTX *recp) + { + if(recp == NULL) + return; + + BN_free(&(recp->N)); + BN_free(&(recp->Nr)); + if (recp->flags & BN_FLG_MALLOCED) + OPENSSL_free(recp); + } - a=ctx->bn[ctx->tos++]; - b=ctx->bn[ctx->tos++]; - c=ctx->bn[ctx->tos++]; - d=ctx->bn[ctx->tos++]; +int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) + { + if (!BN_copy(&(recp->N),d)) return 0; + if (!BN_zero(&(recp->Nr))) return 0; + recp->num_bits=BN_num_bits(d); + recp->shift=0; + return(1); + } + +int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, + BN_RECP_CTX *recp, BN_CTX *ctx) + { + int ret=0; + BIGNUM *a; + const BIGNUM *ca; - if (x == y) - { if (!BN_sqr(a,x,ctx)) goto err; } + BN_CTX_start(ctx); + if ((a = BN_CTX_get(ctx)) == NULL) goto err; + if (y != NULL) + { + if (x == y) + { if (!BN_sqr(a,x,ctx)) goto err; } + else + { if (!BN_mul(a,x,y,ctx)) goto err; } + ca = a; + } else - { if (!BN_mul(a,x,y)) goto err; } - if (!BN_rshift(d,a,nb)) goto err; - if (!BN_mul(b,d,i)) goto err; - if (!BN_rshift(c,b,nb)) goto err; - if (!BN_mul(b,m,c)) goto err; - if (!BN_sub(r,a,b)) goto err; + ca=x; /* Just do the mod */ + + ret = BN_div_recp(NULL,r,ca,recp,ctx); +err: + BN_CTX_end(ctx); + return(ret); + } + +int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, + BN_RECP_CTX *recp, BN_CTX *ctx) + { + int i,j,ret=0; + BIGNUM *a,*b,*d,*r; + + BN_CTX_start(ctx); + a=BN_CTX_get(ctx); + b=BN_CTX_get(ctx); + if (dv != NULL) + d=dv; + else + d=BN_CTX_get(ctx); + if (rem != NULL) + r=rem; + else + r=BN_CTX_get(ctx); + if (a == NULL || b == NULL || d == NULL || r == NULL) goto err; + + if (BN_ucmp(m,&(recp->N)) < 0) + { + if (!BN_zero(d)) return 0; + if (!BN_copy(r,m)) return 0; + BN_CTX_end(ctx); + return(1); + } + + /* We want the remainder + * Given input of ABCDEF / ab + * we need multiply ABCDEF by 3 digests of the reciprocal of ab + * + */ + + /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */ + i=BN_num_bits(m); + j=recp->num_bits<<1; + if (j>i) i=j; + + /* Nr := round(2^i / N) */ + if (i != recp->shift) + recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N), + i,ctx); /* BN_reciprocal returns i, or -1 for an error */ + if (recp->shift == -1) goto err; + + /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))| + * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))| + * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)| + * = |m/N| + */ + if (!BN_rshift(a,m,recp->num_bits)) goto err; + if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err; + if (!BN_rshift(d,b,i-recp->num_bits)) goto err; + d->neg=0; + + if (!BN_mul(b,&(recp->N),d,ctx)) goto err; + if (!BN_usub(r,m,b)) goto err; + r->neg=0; + +#if 1 j=0; - while (BN_cmp(r,m) >= 0) + while (BN_ucmp(r,&(recp->N)) >= 0) { if (j++ > 2) { BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL); goto err; } - if (!BN_sub(r,r,m)) goto err; + if (!BN_usub(r,r,&(recp->N))) goto err; + if (!BN_add_word(d,1)) goto err; } +#endif + r->neg=BN_is_zero(r)?0:m->neg; + d->neg=m->neg^recp->N.neg; ret=1; err: - ctx->tos-=4; + BN_CTX_end(ctx); return(ret); - } + } -int BN_reciprocal(r, m,ctx) -BIGNUM *r; -BIGNUM *m; -BN_CTX *ctx; +/* len is the expected size of the result + * We actually calculate with an extra word of precision, so + * we can do faster division if the remainder is not required. + */ +/* r := 2^len / m */ +int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) { - int nm,ret= -1; - BIGNUM *t; + int ret= -1; + BIGNUM t; - t=ctx->bn[ctx->tos++]; + BN_init(&t); - nm=BN_num_bits(m); - if (!BN_lshift(t,BN_value_one(),nm*2)) goto err; + if (!BN_zero(&t)) goto err; + if (!BN_set_bit(&t,len)) goto err; - if (!BN_div(r,NULL,t,m,ctx)) goto err; - ret=nm; + if (!BN_div(r,NULL,&t,m,ctx)) goto err; + + ret=len; err: - ctx->tos--; + BN_free(&t); return(ret); } - diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c index 944bf1794b..70f785ea18 100644 --- a/src/lib/libcrypto/bn/bn_shift.c +++ b/src/lib/libcrypto/bn/bn_shift.c @@ -60,9 +60,7 @@ #include "cryptlib.h" #include "bn_lcl.h" -int BN_lshift1(r, a) -BIGNUM *r; -BIGNUM *a; +int BN_lshift1(BIGNUM *r, const BIGNUM *a) { register BN_ULONG *ap,*rp,t,c; int i; @@ -94,9 +92,7 @@ BIGNUM *a; return(1); } -int BN_rshift1(r, a) -BIGNUM *r; -BIGNUM *a; +int BN_rshift1(BIGNUM *r, const BIGNUM *a) { BN_ULONG *ap,*rp,t,c; int i; @@ -125,18 +121,15 @@ BIGNUM *a; return(1); } -int BN_lshift(r, a, n) -BIGNUM *r; -BIGNUM *a; -int n; +int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) { int i,nw,lb,rb; BN_ULONG *t,*f; BN_ULONG l; r->neg=a->neg; - if (bn_wexpand(r,a->top+(n/BN_BITS2)+1) == NULL) return(0); nw=n/BN_BITS2; + if (bn_wexpand(r,a->top+nw+1) == NULL) return(0); lb=n%BN_BITS2; rb=BN_BITS2-lb; f=a->d; @@ -160,10 +153,7 @@ int n; return(1); } -int BN_rshift(r, a, n) -BIGNUM *r; -BIGNUM *a; -int n; +int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) { int i,j,nw,lb,rb; BN_ULONG *t,*f; @@ -172,7 +162,7 @@ int n; nw=n/BN_BITS2; rb=n%BN_BITS2; lb=BN_BITS2-rb; - if (nw > a->top) + if (nw > a->top || a->top == 0) { BN_zero(r); return(1); @@ -182,6 +172,11 @@ int n; r->neg=a->neg; if (bn_wexpand(r,a->top-nw+1) == NULL) return(0); } + else + { + if (n == 0) + return 1; /* or the copying loop will go berserk */ + } f= &(a->d[nw]); t=r->d; diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index a8464610e5..c1d0cca438 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c @@ -62,35 +62,105 @@ /* r must not be a */ /* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */ -int BN_sqr(r, a, ctx) -BIGNUM *r; -BIGNUM *a; -BN_CTX *ctx; +int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) { - int i,j,max,al; - BIGNUM *tmp; - BN_ULONG *ap,*rp; + int max,al; + int ret = 0; + BIGNUM *tmp,*rr; - tmp=ctx->bn[ctx->tos]; +#ifdef BN_COUNT + fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top); +#endif + bn_check_top(a); al=a->top; - if (al == 0) + if (al <= 0) { r->top=0; return(1); } - max=(al*2); - if (bn_wexpand(r,1+max) == NULL) return(0); - if (bn_wexpand(tmp,1+max) == NULL) return(0); + BN_CTX_start(ctx); + rr=(a != r) ? r : BN_CTX_get(ctx); + tmp=BN_CTX_get(ctx); + if (tmp == NULL) goto err; - r->neg=0; + max=(al+al); + if (bn_wexpand(rr,max+1) == NULL) goto err; - ap=a->d; - rp=r->d; + if (al == 4) + { +#ifndef BN_SQR_COMBA + BN_ULONG t[8]; + bn_sqr_normal(rr->d,a->d,4,t); +#else + bn_sqr_comba4(rr->d,a->d); +#endif + } + else if (al == 8) + { +#ifndef BN_SQR_COMBA + BN_ULONG t[16]; + bn_sqr_normal(rr->d,a->d,8,t); +#else + bn_sqr_comba8(rr->d,a->d); +#endif + } + else + { +#if defined(BN_RECURSION) + if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) + { + BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2]; + bn_sqr_normal(rr->d,a->d,al,t); + } + else + { + int j,k; + + j=BN_num_bits_word((BN_ULONG)al); + j=1<<(j-1); + k=j+j; + if (al == j) + { + if (bn_wexpand(tmp,k*2) == NULL) goto err; + bn_sqr_recursive(rr->d,a->d,al,tmp->d); + } + else + { + if (bn_wexpand(tmp,max) == NULL) goto err; + bn_sqr_normal(rr->d,a->d,al,tmp->d); + } + } +#else + if (bn_wexpand(tmp,max) == NULL) goto err; + bn_sqr_normal(rr->d,a->d,al,tmp->d); +#endif + } + + rr->top=max; + rr->neg=0; + if ((max > 0) && (rr->d[max-1] == 0)) rr->top--; + if (rr != r) BN_copy(r,rr); + ret = 1; + err: + BN_CTX_end(ctx); + return(ret); + } + +/* tmp must have 2*n words */ +void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) + { + int i,j,max; + const BN_ULONG *ap; + BN_ULONG *rp; + + max=n*2; + ap=a; + rp=r; rp[0]=rp[max-1]=0; rp++; - j=al; + j=n; if (--j > 0) { @@ -99,7 +169,7 @@ BN_CTX *ctx; rp+=2; } - for (i=2; i0; i--) { j--; ap++; @@ -107,16 +177,112 @@ BN_CTX *ctx; rp+=2; } - bn_add_words(r->d,r->d,r->d,max); + bn_add_words(r,r,r,max); /* There will not be a carry */ - bn_sqr_words(tmp->d,a->d,al); + bn_sqr_words(tmp,a,n); - bn_add_words(r->d,r->d,tmp->d,max); - - r->top=max; - if (r->d[max-1] == 0) r->top--; - return(1); + bn_add_words(r,r,tmp,max); } +#ifdef BN_RECURSION +/* r is 2*n words in size, + * a and b are both n words in size. (There's not actually a 'b' here ...) + * n must be a power of 2. + * We multiply and return the result. + * t must be 2*n words in size + * We calculate + * a[0]*b[0] + * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) + * a[1]*b[1] + */ +void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t) + { + int n=n2/2; + int zero,c1; + BN_ULONG ln,lo,*p; + +#ifdef BN_COUNT + fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2); +#endif + if (n2 == 4) + { +#ifndef BN_SQR_COMBA + bn_sqr_normal(r,a,4,t); +#else + bn_sqr_comba4(r,a); +#endif + return; + } + else if (n2 == 8) + { +#ifndef BN_SQR_COMBA + bn_sqr_normal(r,a,8,t); +#else + bn_sqr_comba8(r,a); +#endif + return; + } + if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) + { + bn_sqr_normal(r,a,n2,t); + return; + } + /* r=(a[0]-a[1])*(a[1]-a[0]) */ + c1=bn_cmp_words(a,&(a[n]),n); + zero=0; + if (c1 > 0) + bn_sub_words(t,a,&(a[n]),n); + else if (c1 < 0) + bn_sub_words(t,&(a[n]),a,n); + else + zero=1; + + /* The result will always be negative unless it is zero */ + p= &(t[n2*2]); + + if (!zero) + bn_sqr_recursive(&(t[n2]),t,n,p); + else + memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); + bn_sqr_recursive(r,a,n,p); + bn_sqr_recursive(&(r[n2]),&(a[n]),n,p); + + /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero + * r[10] holds (a[0]*b[0]) + * r[32] holds (b[1]*b[1]) + */ + + c1=(int)(bn_add_words(t,r,&(r[n2]),n2)); + + /* t[32] is negative */ + c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2)); + + /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1]) + * r[10] holds (a[0]*a[0]) + * r[32] holds (a[1]*a[1]) + * c1 holds the carry bits + */ + c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2)); + if (c1) + { + p= &(r[n+n2]); + lo= *p; + ln=(lo+c1)&BN_MASK2; + *p=ln; + + /* The overflow will stop before we over write + * words we should not overwrite */ + if (ln < (BN_ULONG)c1) + { + do { + p++; + lo= *p; + ln=(lo+1)&BN_MASK2; + *p=ln; + } while (ln == 0); + } + } + } +#endif diff --git a/src/lib/libcrypto/bn/bn_word.c b/src/lib/libcrypto/bn/bn_word.c index 4b3d0f011d..cd59baa2c4 100644 --- a/src/lib/libcrypto/bn/bn_word.c +++ b/src/lib/libcrypto/bn/bn_word.c @@ -60,9 +60,7 @@ #include "cryptlib.h" #include "bn_lcl.h" -BN_ULONG BN_mod_word(a, w) -BIGNUM *a; -unsigned long w; +BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) { #ifndef BN_LLONG BN_ULONG ret=0; @@ -75,8 +73,8 @@ unsigned long w; for (i=a->top-1; i>=0; i--) { #ifndef BN_LLONG - ret=((ret<d[i]>>BN_BITS4)&BN_MASK2l))%(unsigned long)w; - ret=((ret<d[i]&BN_MASK2l))%(unsigned long)w; + ret=((ret<d[i]>>BN_BITS4)&BN_MASK2l))%w; + ret=((ret<d[i]&BN_MASK2l))%w; #else ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])% (BN_ULLONG)w); @@ -85,9 +83,7 @@ unsigned long w; return((BN_ULONG)ret); } -BN_ULONG BN_div_word(a, w) -BIGNUM *a; -unsigned long w; +BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) { BN_ULONG ret; int i; @@ -100,18 +96,16 @@ unsigned long w; BN_ULONG l,d; l=a->d[i]; - d=bn_div64(ret,l,w); + d=bn_div_words(ret,l,w); ret=(l-((d*w)&BN_MASK2))&BN_MASK2; a->d[i]=d; } - if (a->d[a->top-1] == 0) + if ((a->top > 0) && (a->d[a->top-1] == 0)) a->top--; return(ret); } -int BN_add_word(a, w) -BIGNUM *a; -unsigned long w; +int BN_add_word(BIGNUM *a, BN_ULONG w) { BN_ULONG l; int i; @@ -121,7 +115,7 @@ unsigned long w; a->neg=0; i=BN_sub_word(a,w); if (!BN_is_zero(a)) - a->neg=1; + a->neg=!(a->neg); return(i); } w&=BN_MASK2; @@ -142,13 +136,11 @@ unsigned long w; return(1); } -int BN_sub_word(a, w) -BIGNUM *a; -unsigned long w; +int BN_sub_word(BIGNUM *a, BN_ULONG w) { int i; - if (a->neg) + if (BN_is_zero(a) || a->neg) { a->neg=0; i=BN_add_word(a,w); @@ -183,22 +175,25 @@ unsigned long w; return(1); } -int BN_mul_word(a,w) -BIGNUM *a; -unsigned long w; +int BN_mul_word(BIGNUM *a, BN_ULONG w) { BN_ULONG ll; w&=BN_MASK2; if (a->top) { - ll=bn_mul_words(a->d,a->d,a->top,w); - if (ll) + if (w == 0) + BN_zero(a); + else { - if (bn_wexpand(a,a->top+1) == NULL) return(0); - a->d[a->top++]=ll; + ll=bn_mul_words(a->d,a->d,a->top,w); + if (ll) + { + if (bn_wexpand(a,a->top+1) == NULL) return(0); + a->d[a->top++]=ll; + } } } - return(0); + return(1); } -- cgit v1.2.3-55-g6feb