From 829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2 Mon Sep 17 00:00:00 2001 From: djm <> Date: Fri, 1 Oct 2010 22:54:21 +0000 Subject: import OpenSSL-1.0.0a --- src/lib/libcrypto/bn/asm/alpha-mont.pl | 8 +- src/lib/libcrypto/bn/asm/armv4-mont.pl | 1 + src/lib/libcrypto/bn/asm/bn-586.pl | 203 ++++++++++++++------ src/lib/libcrypto/bn/asm/co-586.pl | 3 +- src/lib/libcrypto/bn/asm/ppc.pl | 233 +++++++---------------- src/lib/libcrypto/bn/asm/sparcv8plus.S | 15 +- src/lib/libcrypto/bn/asm/x86_64-gcc.c | 29 ++- src/lib/libcrypto/bn/asm/x86_64-mont.pl | 136 +++++++++++++- src/lib/libcrypto/bn/bn.h | 181 +++++++++--------- src/lib/libcrypto/bn/bn_asm.c | 322 ++++++++++++++++++++++++-------- src/lib/libcrypto/bn/bn_blind.c | 17 +- src/lib/libcrypto/bn/bn_ctx.c | 6 +- src/lib/libcrypto/bn/bn_div.c | 15 +- src/lib/libcrypto/bn/bn_exp.c | 3 +- src/lib/libcrypto/bn/bn_gf2m.c | 145 +++++--------- src/lib/libcrypto/bn/bn_lcl.h | 3 +- src/lib/libcrypto/bn/bn_lib.c | 29 ++- src/lib/libcrypto/bn/bn_mont.c | 269 ++++++-------------------- src/lib/libcrypto/bn/bn_mul.c | 10 +- src/lib/libcrypto/bn/bn_print.c | 21 +++ 20 files changed, 899 insertions(+), 750 deletions(-) (limited to 'src/lib/libcrypto/bn') diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl index 7a2cc3173b..f7e0ca1646 100644 --- a/src/lib/libcrypto/bn/asm/alpha-mont.pl +++ b/src/lib/libcrypto/bn/asm/alpha-mont.pl @@ -53,15 +53,15 @@ $code=<<___; .align 5 .ent bn_mul_mont bn_mul_mont: - lda sp,-40(sp) + lda sp,-48(sp) stq ra,0(sp) stq s3,8(sp) stq s4,16(sp) stq s5,24(sp) stq fp,32(sp) mov sp,fp - .mask 0x0400f000,-40 - .frame fp,40,ra + .mask 0x0400f000,-48 + .frame fp,48,ra .prologue 0 .align 4 @@ -306,7 +306,7 @@ bn_mul_mont: ldq s4,16(sp) ldq s5,24(sp) ldq fp,32(sp) - lda sp,40(sp) + lda sp,48(sp) ret (ra) .end bn_mul_mont .rdata diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl index 05d5dc1a48..14e0d2d1dd 100644 --- a/src/lib/libcrypto/bn/asm/armv4-mont.pl +++ b/src/lib/libcrypto/bn/asm/armv4-mont.pl @@ -193,6 +193,7 @@ bn_mul_mont: bx lr @ interoperable with Thumb ISA:-) .size bn_mul_mont,.-bn_mul_mont .asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " +.align 2 ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 26c2685a72..332ef3e91d 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl @@ -1,6 +1,7 @@ #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],$0); @@ -24,38 +25,25 @@ sub bn_mul_add_words { local($name)=@_; - &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); - &comment(""); - $Low="eax"; - $High="edx"; - $a="ebx"; - $w="ebp"; - $r="edi"; - $c="esi"; - - &xor($c,$c); # clear carry - &mov($r,&wparam(0)); # - - &mov("ecx",&wparam(2)); # - &mov($a,&wparam(1)); # - - &and("ecx",0xfffffff8); # num / 8 - &mov($w,&wparam(3)); # - - &push("ecx"); # Up the stack for a tmp variable - - &jz(&label("maw_finish")); + $r="eax"; + $a="edx"; + $c="ecx"; if ($sse2) { &picmeup("eax","OPENSSL_ia32cap_P"); &bt(&DWP(0,"eax"),26); - &jnc(&label("maw_loop")); + &jnc(&label("maw_non_sse2")); - &movd("mm0",$w); # mm0 = w + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + &movd("mm0",&wparam(3)); # mm0 = w &pxor("mm1","mm1"); # mm1 = carry_in - - &set_label("maw_sse2_loop",0); + &jmp(&label("maw_sse2_entry")); + + &set_label("maw_sse2_unrolled",16); &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] &paddq("mm1","mm3"); # mm1 = carry_in + r[0] &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] @@ -112,42 +100,82 @@ sub bn_mul_add_words &psrlq("mm1",32); # mm1 = carry6 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] &movd(&DWP(28,$r,"",0),"mm1"); - &add($r,32); + &lea($r,&DWP(32,$r)); &psrlq("mm1",32); # mm1 = carry_out - &sub("ecx",8); + &sub($c,8); + &jz(&label("maw_sse2_exit")); + &set_label("maw_sse2_entry"); + &test($c,0xfffffff8); + &jnz(&label("maw_sse2_unrolled")); + + &set_label("maw_sse2_loop",4); + &movd("mm2",&DWP(0,$a)); # mm2 = a[i] + &movd("mm3",&DWP(0,$r)); # mm3 = r[i] + &pmuludq("mm2","mm0"); # a[i] *= w + &lea($a,&DWP(4,$a)); + &paddq("mm1","mm3"); # carry += r[i] + &paddq("mm1","mm2"); # carry += a[i]*w + &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low + &sub($c,1); + &psrlq("mm1",32); # carry = carry_high + &lea($r,&DWP(4,$r)); &jnz(&label("maw_sse2_loop")); - - &movd($c,"mm1"); # c = carry_out + &set_label("maw_sse2_exit"); + &movd("eax","mm1"); # c = carry_out &emms(); + &ret(); - &jmp(&label("maw_finish")); + &set_label("maw_non_sse2",16); } - &set_label("maw_loop",0); + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + &comment(""); + $Low="eax"; + $High="edx"; + $a="ebx"; + $w="ebp"; + $r="edi"; + $c="esi"; + + &xor($c,$c); # clear carry + &mov($r,&wparam(0)); # + + &mov("ecx",&wparam(2)); # + &mov($a,&wparam(1)); # + + &and("ecx",0xfffffff8); # num / 8 + &mov($w,&wparam(3)); # - &mov(&swtmp(0),"ecx"); # + &push("ecx"); # Up the stack for a tmp variable + + &jz(&label("maw_finish")); + + &set_label("maw_loop",16); for ($i=0; $i<32; $i+=4) { &comment("Round $i"); - &mov("eax",&DWP($i,$a,"",0)); # *a + &mov("eax",&DWP($i,$a)); # *a &mul($w); # *a * w - &add("eax",$c); # L(t)+= *r - &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r + &add("eax",$c); # L(t)+= c &adc("edx",0); # H(t)+=carry - &add("eax",$c); # L(t)+=c + &add("eax",&DWP($i,$r)); # L(t)+= *r &adc("edx",0); # H(t)+=carry - &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); + &mov(&DWP($i,$r),"eax"); # *r= L(t); &mov($c,"edx"); # c= H(t); } &comment(""); - &mov("ecx",&swtmp(0)); # - &add($a,32); - &add($r,32); &sub("ecx",8); + &lea($a,&DWP(32,$a)); + &lea($r,&DWP(32,$r)); &jnz(&label("maw_loop")); &set_label("maw_finish",0); @@ -160,16 +188,15 @@ sub bn_mul_add_words for ($i=0; $i<7; $i++) { &comment("Tail Round $i"); - &mov("eax",&DWP($i*4,$a,"",0));# *a + &mov("eax",&DWP($i*4,$a)); # *a &mul($w); # *a * w &add("eax",$c); # L(t)+=c - &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r &adc("edx",0); # H(t)+=carry - &add("eax",$c); + &add("eax",&DWP($i*4,$r)); # L(t)+= *r &adc("edx",0); # H(t)+=carry &dec("ecx") if ($i != 7-1); - &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); - &mov($c,"edx"); # c= H(t); + &mov(&DWP($i*4,$r),"eax"); # *r= L(t); + &mov($c,"edx"); # c= H(t); &jz(&label("maw_end")) if ($i != 7-1); } &set_label("maw_end",0); @@ -184,7 +211,45 @@ sub bn_mul_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + + $r="eax"; + $a="edx"; + $c="ecx"; + + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("mw_non_sse2")); + + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + &movd("mm0",&wparam(3)); # mm0 = w + &pxor("mm1","mm1"); # mm1 = carry = 0 + + &set_label("mw_sse2_loop",16); + &movd("mm2",&DWP(0,$a)); # mm2 = a[i] + &pmuludq("mm2","mm0"); # a[i] *= w + &lea($a,&DWP(4,$a)); + &paddq("mm1","mm2"); # carry += a[i]*w + &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low + &sub($c,1); + &psrlq("mm1",32); # carry = carry_high + &lea($r,&DWP(4,$r)); + &jnz(&label("mw_sse2_loop")); + + &movd("eax","mm1"); # return carry + &emms(); + &ret(); + &set_label("mw_non_sse2",16); + } + + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); &comment(""); $Low="eax"; @@ -257,7 +322,40 @@ sub bn_sqr_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + + $r="eax"; + $a="edx"; + $c="ecx"; + + if ($sse2) { + &picmeup("eax","OPENSSL_ia32cap_P"); + &bt(&DWP(0,"eax"),26); + &jnc(&label("sqr_non_sse2")); + + &mov($r,&wparam(0)); + &mov($a,&wparam(1)); + &mov($c,&wparam(2)); + + &set_label("sqr_sse2_loop",16); + &movd("mm0",&DWP(0,$a)); # mm0 = a[i] + &pmuludq("mm0","mm0"); # a[i] *= a[i] + &lea($a,&DWP(4,$a)); # a++ + &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] + &sub($c,1); + &lea($r,&DWP(8,$r)); # r += 2 + &jnz(&label("sqr_sse2_loop")); + + &emms(); + &ret(); + &set_label("sqr_non_sse2",16); + } + + # function_begin prologue + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); &comment(""); $r="esi"; @@ -313,12 +411,13 @@ sub bn_div_words { local($name)=@_; - &function_begin($name,""); + &function_begin_B($name,""); &mov("edx",&wparam(0)); # &mov("eax",&wparam(1)); # - &mov("ebx",&wparam(2)); # - &div("ebx"); - &function_end($name); + &mov("ecx",&wparam(2)); # + &div("ecx"); + &ret(); + &function_end_B($name); } sub bn_add_words diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl index 5d962cb957..57101a6bd7 100644 --- a/src/lib/libcrypto/bn/asm/co-586.pl +++ b/src/lib/libcrypto/bn/asm/co-586.pl @@ -1,6 +1,7 @@ #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; &asm_init($ARGV[0],$0); diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl index 08e0053473..37c65d3511 100644 --- a/src/lib/libcrypto/bn/asm/ppc.pl +++ b/src/lib/libcrypto/bn/asm/ppc.pl @@ -100,9 +100,9 @@ # me a note at schari@us.ibm.com # -$opf = shift; +$flavour = shift; -if ($opf =~ /32\.s/) { +if ($flavour =~ /32/) { $BITS= 32; $BNSZ= $BITS/8; $ISA= "\"ppc\""; @@ -125,7 +125,7 @@ if ($opf =~ /32\.s/) { $INSR= "insrwi"; # insert right $ROTL= "rotlwi"; # rotate left by immediate $TR= "tw"; # conditional trap -} elsif ($opf =~ /64\.s/) { +} elsif ($flavour =~ /64/) { $BITS= 64; $BNSZ= $BITS/8; $ISA= "\"ppc64\""; @@ -149,93 +149,16 @@ if ($opf =~ /32\.s/) { $INSR= "insrdi"; # insert right $ROTL= "rotldi"; # rotate left by immediate $TR= "td"; # conditional trap -} else { die "nonsense $opf"; } +} else { die "nonsense $flavour"; } -( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!"; +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; -# function entry points from the AIX code -# -# There are other, more elegant, ways to handle this. We (IBM) chose -# this approach as it plays well with scripts we run to 'namespace' -# OpenSSL .i.e. we add a prefix to all the public symbols so we can -# co-exist in the same process with other implementations of OpenSSL. -# 'cleverer' ways of doing these substitutions tend to hide data we -# need to be obvious. -# -my @items = ("bn_sqr_comba4", - "bn_sqr_comba8", - "bn_mul_comba4", - "bn_mul_comba8", - "bn_sub_words", - "bn_add_words", - "bn_div_words", - "bn_sqr_words", - "bn_mul_words", - "bn_mul_add_words"); +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; -if ($opf =~ /linux/) { do_linux(); } -elsif ($opf =~ /aix/) { do_aix(); } -elsif ($opf =~ /osx/) { do_osx(); } -else { do_bsd(); } - -sub do_linux { - $d=&data(); - - if ($BITS==64) { - foreach $t (@items) { - $d =~ s/\.$t:/\ -\t.section\t".opd","aw"\ -\t.align\t3\ -\t.globl\t$t\ -$t:\ -\t.quad\t.$t,.TOC.\@tocbase,0\ -\t.size\t$t,24\ -\t.previous\n\ -\t.type\t.$t,\@function\ -\t.globl\t.$t\ -.$t:/g; - } - } - else { - foreach $t (@items) { - $d=~s/\.$t/$t/g; - } - } - # hide internal labels to avoid pollution of name table... - $d=~s/Lppcasm_/.Lppcasm_/gm; - print $d; -} - -sub do_aix { - # AIX assembler is smart enough to please the linker without - # making us do something special... - print &data(); -} - -# MacOSX 32 bit -sub do_osx { - $d=&data(); - # Change the bn symbol prefix from '.' to '_' - foreach $t (@items) { - $d=~s/\.$t/_$t/g; - } - # Change .machine to something OS X asm will accept - $d=~s/\.machine.*/.text/g; - $d=~s/\#/;/g; # change comment from '#' to ';' - print $d; -} - -# BSD (Untested) -sub do_bsd { - $d=&data(); - foreach $t (@items) { - $d=~s/\.$t/_$t/g; - } - print $d; -} - -sub data { - local($data)=< 0 then result !=0 # In either case carry bit is set. - bc BO_IF,CR0_EQ,Lppcasm_sub_adios + beq Lppcasm_sub_adios addi r4,r4,-$BNSZ addi r3,r3,-$BNSZ addi r5,r5,-$BNSZ @@ -1635,11 +1545,11 @@ Lppcasm_sub_mainloop: # if carry = 1 this is r7-r8. Else it # is r7-r8 -1 as we need. $STU r6,$BNSZ(r3) - bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop + bdnz- Lppcasm_sub_mainloop Lppcasm_sub_adios: subfze r3,r0 # if carry bit is set then r3 = 0 else -1 andi. r3,r3,1 # keep only last bit. - bclr BO_ALWAYS,CR0_LT + blr .long 0x00000000 @@ -1670,7 +1580,7 @@ Lppcasm_sub_adios: # check for r6 = 0. Is this needed? # addic. r6,r6,0 #test r6 and clear carry bit. - bc BO_IF,CR0_EQ,Lppcasm_add_adios + beq Lppcasm_add_adios addi r4,r4,-$BNSZ addi r3,r3,-$BNSZ addi r5,r5,-$BNSZ @@ -1680,10 +1590,10 @@ Lppcasm_add_mainloop: $LDU r8,$BNSZ(r5) adde r8,r7,r8 $STU r8,$BNSZ(r3) - bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop + bdnz- Lppcasm_add_mainloop Lppcasm_add_adios: addze r3,r0 #return carry bit. - bclr BO_ALWAYS,CR0_LT + blr .long 0x00000000 # @@ -1707,24 +1617,24 @@ Lppcasm_add_adios: # r5 = d $UCMPI 0,r5,0 # compare r5 and 0 - bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0 + bne Lppcasm_div1 # proceed if d!=0 li r3,-1 # d=0 return -1 - bclr BO_ALWAYS,CR0_LT + blr Lppcasm_div1: xor r0,r0,r0 #r0=0 li r8,$BITS $CNTLZ. r7,r5 #r7 = num leading 0s in d. - bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros + beq Lppcasm_div2 #proceed if no leading zeros subf r8,r7,r8 #r8 = BN_num_bits_word(d) $SHR. r9,r3,r8 #are there any bits above r8'th? $TR 16,r9,r0 #if there're, signal to dump core... Lppcasm_div2: $UCMP 0,r3,r5 #h>=d? - bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not + blt Lppcasm_div3 #goto Lppcasm_div3 if not subf r3,r5,r3 #h-=d ; Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i cmpi 0,0,r7,0 # is (i == 0)? - bc BO_IF,CR0_EQ,Lppcasm_div4 + beq Lppcasm_div4 $SHL r3,r3,r7 # h = (h<< i) $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) $SHL r5,r5,r7 # d<<=i @@ -1741,7 +1651,7 @@ Lppcasm_divouterloop: $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 # compute here for innerloop. $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh - bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not + bne Lppcasm_div5 # goto Lppcasm_div5 if not li r8,-1 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l @@ -1762,9 +1672,9 @@ Lppcasm_divinnerloop: # the following 2 instructions do that $SHLI r7,r10,`$BITS/2` # r7 = (t<>BN_BITS4) - $UCMP 1,r6,r7 # compare (tl <= r7) - bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit - bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit + $UCMP cr1,r6,r7 # compare (tl <= r7) + bne Lppcasm_divinnerexit + ble cr1,Lppcasm_divinnerexit addi r8,r8,-1 #q-- subf r12,r9,r12 #th -=dh $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. @@ -1773,14 +1683,14 @@ Lppcasm_divinnerloop: Lppcasm_divinnerexit: $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) $SHLI r11,r6,`$BITS/2` #tl=(tl<=tl) goto Lppcasm_div7 + bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 addi r12,r12,1 # th++ Lppcasm_div7: subf r11,r11,r4 #r11=l-tl - $UCMP 1,r3,r12 #compare h and th - bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 + $UCMP cr1,r3,r12 #compare h and th + bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 addi r8,r8,-1 # q-- add r3,r5,r3 # h+=d Lppcasm_div8: @@ -1791,12 +1701,12 @@ Lppcasm_div8: # the following 2 instructions will do this. $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 - bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ; + bdz Lppcasm_div9 #if (count==0) break ; $SHLI r0,r8,`$BITS/2` #ret =q<> 2 - bc BO_IF,CR0_EQ,Lppcasm_mw_REM + beq Lppcasm_mw_REM mtctr r7 Lppcasm_mw_LOOP: #mul(rp[0],ap[0],w,c1); @@ -1896,11 +1806,11 @@ Lppcasm_mw_LOOP: addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` - bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP + bdnz- Lppcasm_mw_LOOP Lppcasm_mw_REM: andi. r5,r5,0x3 - bc BO_IF,CR0_EQ,Lppcasm_mw_OVER + beq Lppcasm_mw_OVER #mul(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1912,7 +1822,7 @@ Lppcasm_mw_REM: addi r5,r5,-1 cmpli 0,0,r5,0 - bc BO_IF,CR0_EQ,Lppcasm_mw_OVER + beq Lppcasm_mw_OVER #mul(rp[1],ap[1],w,c1); @@ -1926,7 +1836,7 @@ Lppcasm_mw_REM: addi r5,r5,-1 cmpli 0,0,r5,0 - bc BO_IF,CR0_EQ,Lppcasm_mw_OVER + beq Lppcasm_mw_OVER #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) @@ -1939,7 +1849,7 @@ Lppcasm_mw_REM: Lppcasm_mw_OVER: addi r3,r12,0 - bclr BO_ALWAYS,CR0_LT + blr .long 0x00000000 # @@ -1964,7 +1874,7 @@ Lppcasm_mw_OVER: xor r0,r0,r0 #r0 = 0 xor r12,r12,r12 #r12 = 0 . used for carry rlwinm. r7,r5,30,2,31 # num >> 2 - bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover + beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover mtctr r7 Lppcasm_maw_mainloop: #mul_add(rp[0],ap[0],w,c1); @@ -2017,11 +1927,11 @@ Lppcasm_maw_mainloop: $ST r11,`3*$BNSZ`(r3) addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` - bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop + bdnz- Lppcasm_maw_mainloop Lppcasm_maw_leftover: andi. r5,r5,0x3 - bc BO_IF,CR0_EQ,Lppcasm_maw_adios + beq Lppcasm_maw_adios addi r3,r3,-$BNSZ addi r4,r4,-$BNSZ #mul_add(rp[0],ap[0],w,c1); @@ -2036,7 +1946,7 @@ Lppcasm_maw_leftover: addze r12,r10 $ST r9,0(r3) - bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios + bdz Lppcasm_maw_adios #mul_add(rp[1],ap[1],w,c1); $LDU r8,$BNSZ(r4) $UMULL r9,r6,r8 @@ -2048,7 +1958,7 @@ Lppcasm_maw_leftover: addze r12,r10 $ST r9,0(r3) - bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios + bdz Lppcasm_maw_adios #mul_add(rp[2],ap[2],w,c1); $LDU r8,$BNSZ(r4) $UMULL r9,r6,r8 @@ -2062,17 +1972,10 @@ Lppcasm_maw_leftover: Lppcasm_maw_adios: addi r3,r12,0 - bclr BO_ALWAYS,CR0_LT + blr .long 0x00000000 .align 4 EOF - $data =~ s/\`([^\`]*)\`/eval $1/gem; - - # if some assembler chokes on some simplified mnemonic, - # this is the spot to fix it up, e.g.: - # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare - $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm; - # assembler X doesn't accept li, load immediate value - #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm; - return($data); -} +$data =~ s/\`([^\`]*)\`/eval $1/gem; +print $data; +close STDOUT; diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S index 8c56e2e7e7..63de1860f2 100644 --- a/src/lib/libcrypto/bn/asm/sparcv8plus.S +++ b/src/lib/libcrypto/bn/asm/sparcv8plus.S @@ -144,6 +144,19 @@ * } */ +#if defined(__SUNPRO_C) && defined(__sparcv9) + /* They've said -xarch=v9 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#elif defined(__GNUC__) && defined(__arch64__) + /* They've said -m64 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#else +# define FRAME_SIZE -96 +#endif /* * GNU assembler can't stand stuw:-( */ @@ -619,8 +632,6 @@ bn_sub_words: * Andy. */ -#define FRAME_SIZE -96 - /* * Here is register usage map for *all* routines below. */ diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c index f13f52dd85..acb0b40118 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c @@ -1,4 +1,5 @@ -#ifdef __SUNPRO_C +#include "../bn_lcl.h" +#if !(defined(__GNUC__) && __GNUC__>=2) # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ #else /* @@ -54,7 +55,15 @@ * machine. */ +#ifdef _WIN64 +#define BN_ULONG unsigned long long +#else #define BN_ULONG unsigned long +#endif + +#undef mul +#undef mul_add +#undef sqr /* * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; @@ -97,7 +106,7 @@ : "a"(a) \ : "cc"); -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; @@ -121,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c1); } -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1=0; @@ -144,7 +153,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) return(c1); } -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { if (n <= 0) return; @@ -175,14 +184,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) return ret; } -BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) +BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) { BN_ULONG ret=0,i=0; if (n <= 0) return 0; asm ( " subq %2,%2 \n" - ".align 16 \n" + ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" @@ -198,14 +207,14 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) } #ifndef SIMICS -BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) +BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) { BN_ULONG ret=0,i=0; if (n <= 0) return 0; asm ( " subq %2,%2 \n" - ".align 16 \n" + ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" @@ -485,7 +494,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) r[7]=c2; } -void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t1,t2; BN_ULONG c1,c2,c3; @@ -561,7 +570,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) r[15]=c1; } -void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { BN_ULONG t1,t2; BN_ULONG c1,c2,c3; diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl index c43b69592a..3b7a6f243f 100755 --- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-mont.pl @@ -15,14 +15,18 @@ # respectful 50%. It remains to be seen if loop unrolling and # dedicated squaring routine can provide further improvement... -$output=shift; +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $output"; +open STDOUT,"| $^X $xlate $flavour $output"; # int bn_mul_mont( $rp="%rdi"; # BN_ULONG *rp, @@ -55,13 +59,14 @@ bn_mul_mont: push %r15 mov ${num}d,${num}d - lea 2($num),%rax - mov %rsp,%rbp - neg %rax - lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2)) + lea 2($num),%r10 + mov %rsp,%r11 + neg %r10 + lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2)) and \$-1024,%rsp # minimize TLB usage - mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp + mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp +.Lprologue: mov %rdx,$bp # $bp reassigned, remember? mov ($n0),$n0 # pull n0[0] value @@ -197,18 +202,129 @@ bn_mul_mont: dec $j jge .Lcopy - mov 8(%rsp,$num,8),%rsp # restore %rsp + mov 8(%rsp,$num,8),%rsi # restore %rsp mov \$1,%rax + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue: + ret +.size bn_mul_mont,.-bn_mul_mont +.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by " +.align 16 +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lprologue(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + lea .Lepilogue(%rip),%r10 + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov 192($context),%r10 # pull $num + mov 8(%rax,%r10,8),%rax # pull saved stack pointer + lea 48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq pop %r15 pop %r14 pop %r13 pop %r12 pop %rbp pop %rbx + pop %rdi + pop %rsi ret -.size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by " +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_bn_mul_mont + .rva .LSEH_end_bn_mul_mont + .rva .LSEH_info_bn_mul_mont + +.section .xdata +.align 8 +.LSEH_info_bn_mul_mont: + .byte 9,0,0,0 + .rva se_handler ___ +} print $code; close STDOUT; diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index f1719a5877..e484b7fc11 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h @@ -55,6 +55,59 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ +/* ==================================================================== + * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ /* ==================================================================== * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. * @@ -77,6 +130,7 @@ #include /* FILE */ #endif #include +#include #ifdef __cplusplus extern "C" { @@ -94,9 +148,11 @@ extern "C" { /* #define BN_DEBUG */ /* #define BN_DEBUG_RAND */ +#ifndef OPENSSL_SMALL_FOOTPRINT #define BN_MUL_COMBA #define BN_SQR_COMBA #define BN_RECURSION +#endif /* This next option uses the C libraries (2 word)/(1 word) function. * If it is not defined, I use my C version (which is slower). @@ -137,6 +193,8 @@ extern "C" { #define BN_DEC_FMT1 "%lu" #define BN_DEC_FMT2 "%019lu" #define BN_DEC_NUM 19 +#define BN_HEX_FMT1 "%lX" +#define BN_HEX_FMT2 "%016lX" #endif /* This is where the long long data type is 64 bits, but long is 32. @@ -162,83 +220,37 @@ extern "C" { #define BN_DEC_FMT1 "%llu" #define BN_DEC_FMT2 "%019llu" #define BN_DEC_NUM 19 +#define BN_HEX_FMT1 "%llX" +#define BN_HEX_FMT2 "%016llX" #endif #ifdef THIRTY_TWO_BIT #ifdef BN_LLONG -# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) +# if defined(_WIN32) && !defined(__GNUC__) # define BN_ULLONG unsigned __int64 +# define BN_MASK (0xffffffffffffffffI64) # else # define BN_ULLONG unsigned long long +# define BN_MASK (0xffffffffffffffffLL) # endif #endif -#define BN_ULONG unsigned long -#define BN_LONG long +#define BN_ULONG unsigned int +#define BN_LONG int #define BN_BITS 64 #define BN_BYTES 4 #define BN_BITS2 32 #define BN_BITS4 16 -#ifdef OPENSSL_SYS_WIN32 -/* VC++ doesn't like the LL suffix */ -#define BN_MASK (0xffffffffffffffffL) -#else -#define BN_MASK (0xffffffffffffffffLL) -#endif #define BN_MASK2 (0xffffffffL) #define BN_MASK2l (0xffff) #define BN_MASK2h1 (0xffff8000L) #define BN_MASK2h (0xffff0000L) #define BN_TBIT (0x80000000L) #define BN_DEC_CONV (1000000000L) -#define BN_DEC_FMT1 "%lu" -#define BN_DEC_FMT2 "%09lu" -#define BN_DEC_NUM 9 -#endif - -#ifdef SIXTEEN_BIT -#ifndef BN_DIV2W -#define BN_DIV2W -#endif -#define BN_ULLONG unsigned long -#define BN_ULONG unsigned short -#define BN_LONG short -#define BN_BITS 32 -#define BN_BYTES 2 -#define BN_BITS2 16 -#define BN_BITS4 8 -#define BN_MASK (0xffffffff) -#define BN_MASK2 (0xffff) -#define BN_MASK2l (0xff) -#define BN_MASK2h1 (0xff80) -#define BN_MASK2h (0xff00) -#define BN_TBIT (0x8000) -#define BN_DEC_CONV (100000) #define BN_DEC_FMT1 "%u" -#define BN_DEC_FMT2 "%05u" -#define BN_DEC_NUM 5 -#endif - -#ifdef EIGHT_BIT -#ifndef BN_DIV2W -#define BN_DIV2W -#endif -#define BN_ULLONG unsigned short -#define BN_ULONG unsigned char -#define BN_LONG char -#define BN_BITS 16 -#define BN_BYTES 1 -#define BN_BITS2 8 -#define BN_BITS4 4 -#define BN_MASK (0xffff) -#define BN_MASK2 (0xff) -#define BN_MASK2l (0xf) -#define BN_MASK2h1 (0xf8) -#define BN_MASK2h (0xf0) -#define BN_TBIT (0x80) -#define BN_DEC_CONV (100) -#define BN_DEC_FMT1 "%u" -#define BN_DEC_FMT2 "%02u" -#define BN_DEC_NUM 2 +#define BN_DEC_FMT2 "%09u" +#define BN_DEC_NUM 9 +#define BN_HEX_FMT1 "%X" +#define BN_HEX_FMT2 "%08X" #endif #define BN_DEFAULT_BITS 1280 @@ -303,12 +315,8 @@ struct bn_mont_ctx_st BIGNUM N; /* The modulus */ BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 * (Ni is only stored for bignum algorithm) */ -#if 0 - /* OpenSSL 0.9.9 preview: */ - BN_ULONG n0[2];/* least significant word(s) of Ni */ -#else - BN_ULONG n0; /* least significant word of Ni */ -#endif + BN_ULONG n0[2];/* least significant word(s) of Ni; + (type changed with 0.9.9, was "BN_ULONG n0;" before) */ int flags; }; @@ -504,6 +512,7 @@ char * BN_bn2hex(const BIGNUM *a); char * BN_bn2dec(const BIGNUM *a); int BN_hex2bn(BIGNUM **a, const char *str); int BN_dec2bn(BIGNUM **a, const char *str); +int BN_asc2bn(BIGNUM **a, const char *str); int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */ BIGNUM *BN_mod_inverse(BIGNUM *ret, @@ -531,17 +540,6 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb); int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, int do_trial_division, BN_GENCB *cb); -int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); - -int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, - const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb); -int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, - BIGNUM *Xp1, BIGNUM *Xp2, - const BIGNUM *Xp, - const BIGNUM *e, BN_CTX *ctx, - BN_GENCB *cb); - BN_MONT_CTX *BN_MONT_CTX_new(void ); void BN_MONT_CTX_init(BN_MONT_CTX *ctx); int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, @@ -560,19 +558,22 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, #define BN_BLINDING_NO_UPDATE 0x00000001 #define BN_BLINDING_NO_RECREATE 0x00000002 -BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod); +BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod); void BN_BLINDING_free(BN_BLINDING *b); int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx); int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *); int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *); +#ifndef OPENSSL_NO_DEPRECATED unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *); void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); +#endif +CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *); unsigned long BN_BLINDING_get_flags(const BN_BLINDING *); void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long); BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, - const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, + const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), BN_MONT_CTX *m_ctx); @@ -625,24 +626,24 @@ int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, * t^p[0] + t^p[1] + ... + t^p[k] * where m = p[0] > p[1] > ... > p[k] = 0. */ -int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]); +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]); /* r = a mod p */ int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */ -int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], + const int p[], BN_CTX *ctx); /* r = (a * b) mod p */ +int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx); /* r = (a * a) mod p */ -int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[], +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[], BN_CTX *ctx); /* r = (1 / b) mod p */ int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */ + const int p[], BN_CTX *ctx); /* r = (a / b) mod p */ int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, - const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ + const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, - const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ + const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, - const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ -int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max); -int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a); + const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ +int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); +int BN_GF2m_arr2poly(const int p[], BIGNUM *a); /* faster mod functions for the 'NIST primes' * 0 <= a < p^2 */ @@ -751,10 +752,12 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); #define bn_correct_top(a) \ { \ BN_ULONG *ftl; \ - if ((a)->top > 0) \ + int tmp_top = (a)->top; \ + if (tmp_top > 0) \ { \ - for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ - if (*(ftl--)) break; \ + for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \ + if (*(ftl--)) break; \ + (a)->top = tmp_top; \ } \ bn_pollute(a); \ } diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 99bc2de491..c43c91cc09 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c @@ -75,6 +75,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT while (num&~3) { mul_add(rp[0],ap[0],w,c1); @@ -83,11 +84,11 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) mul_add(rp[3],ap[3],w,c1); ap+=4; rp+=4; num-=4; } - if (num) +#endif + while (num) { - mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; - mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; - mul_add(rp[2],ap[2],w,c1); return c1; + mul_add(rp[0],ap[0],w,c1); + ap++; rp++; num--; } return(c1); @@ -100,6 +101,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT while (num&~3) { mul(rp[0],ap[0],w,c1); @@ -108,11 +110,11 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) mul(rp[3],ap[3],w,c1); ap+=4; rp+=4; num-=4; } - if (num) +#endif + while (num) { - mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; - mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; - mul(rp[2],ap[2],w,c1); + mul(rp[0],ap[0],w,c1); + ap++; rp++; num--; } return(c1); } @@ -121,6 +123,8 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { assert(n >= 0); if (n <= 0) return; + +#ifndef OPENSSL_SMALL_FOOTPRINT while (n&~3) { sqr(r[0],r[1],a[0]); @@ -129,11 +133,11 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) sqr(r[6],r[7],a[3]); a+=4; r+=8; n-=4; } - if (n) +#endif + while (n) { - sqr(r[0],r[1],a[0]); if (--n == 0) return; - sqr(r[2],r[3],a[1]); if (--n == 0) return; - sqr(r[4],r[5],a[2]); + sqr(r[0],r[1],a[0]); + a++; r+=2; n--; } } @@ -150,18 +154,20 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) bl=LBITS(w); bh=HBITS(w); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (num&~3) { mul_add(rp[0],ap[0],bl,bh,c); - if (--num == 0) break; mul_add(rp[1],ap[1],bl,bh,c); - if (--num == 0) break; mul_add(rp[2],ap[2],bl,bh,c); - if (--num == 0) break; mul_add(rp[3],ap[3],bl,bh,c); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } +#endif + while (num) + { + mul_add(rp[0],ap[0],bl,bh,c); + ap++; rp++; num--; } return(c); } @@ -177,18 +183,20 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) bl=LBITS(w); bh=HBITS(w); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (num&~3) { mul(rp[0],ap[0],bl,bh,carry); - if (--num == 0) break; mul(rp[1],ap[1],bl,bh,carry); - if (--num == 0) break; mul(rp[2],ap[2],bl,bh,carry); - if (--num == 0) break; mul(rp[3],ap[3],bl,bh,carry); - if (--num == 0) break; - ap+=4; - rp+=4; + ap+=4; rp+=4; num-=4; + } +#endif + while (num) + { + mul(rp[0],ap[0],bl,bh,carry); + ap++; rp++; num--; } return(carry); } @@ -197,22 +205,21 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { assert(n >= 0); if (n <= 0) return; - for (;;) + +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { sqr64(r[0],r[1],a[0]); - if (--n == 0) break; - sqr64(r[2],r[3],a[1]); - if (--n == 0) break; - sqr64(r[4],r[5],a[2]); - if (--n == 0) break; - sqr64(r[6],r[7],a[3]); - if (--n == 0) break; - - a+=4; - r+=8; + a+=4; r+=8; n-=4; + } +#endif + while (n) + { + sqr64(r[0],r[1],a[0]); + a++; r+=2; n--; } } @@ -303,31 +310,30 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) assert(n >= 0); if (n <= 0) return((BN_ULONG)0); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { ll+=(BN_ULLONG)a[0]+b[0]; r[0]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[1]+b[1]; r[1]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[2]+b[2]; r[2]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - ll+=(BN_ULLONG)a[3]+b[3]; r[3]=(BN_ULONG)ll&BN_MASK2; ll>>=BN_BITS2; - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while (n) + { + ll+=(BN_ULLONG)a[0]+b[0]; + r[0]=(BN_ULONG)ll&BN_MASK2; + ll>>=BN_BITS2; + a++; b++; r++; n--; } return((BN_ULONG)ll); } @@ -340,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) if (n <= 0) return((BN_ULONG)0); c=0; - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { t=a[0]; t=(t+c)&BN_MASK2; @@ -348,35 +355,36 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) l=(t+b[0])&BN_MASK2; c+=(l < t); r[0]=l; - if (--n <= 0) break; - t=a[1]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[1])&BN_MASK2; c+=(l < t); r[1]=l; - if (--n <= 0) break; - t=a[2]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[2])&BN_MASK2; c+=(l < t); r[2]=l; - if (--n <= 0) break; - t=a[3]; t=(t+c)&BN_MASK2; c=(t < c); l=(t+b[3])&BN_MASK2; c+=(l < t); r[3]=l; - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while(n) + { + t=a[0]; + t=(t+c)&BN_MASK2; + c=(t < c); + l=(t+b[0])&BN_MASK2; + c+=(l < t); + r[0]=l; + a++; b++; r++; n--; } return((BN_ULONG)c); } @@ -390,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) assert(n >= 0); if (n <= 0) return((BN_ULONG)0); - for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n&~3) { t1=a[0]; t2=b[0]; r[0]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[1]; t2=b[1]; r[1]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[2]; t2=b[2]; r[2]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - t1=a[3]; t2=b[3]; r[3]=(t1-t2-c)&BN_MASK2; if (t1 != t2) c=(t1 < t2); - if (--n <= 0) break; - - a+=4; - b+=4; - r+=4; + a+=4; b+=4; r+=4; n-=4; + } +#endif + while (n) + { + t1=a[0]; t2=b[0]; + r[0]=(t1-t2-c)&BN_MASK2; + if (t1 != t2) c=(t1 < t2); + a++; b++; r++; n--; } return(c); } -#ifdef BN_MUL_COMBA +#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) #undef bn_mul_comba8 #undef bn_mul_comba4 @@ -820,18 +827,134 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) r[6]=c1; r[7]=c2; } + +#ifdef OPENSSL_NO_ASM +#ifdef OPENSSL_BN_ASM_MONT +#include +/* + * This is essentially reference implementation, which may or may not + * result in performance improvement. E.g. on IA-32 this routine was + * observed to give 40% faster rsa1024 private key operations and 10% + * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only + * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a + * reference implementation, one to be used as starting point for + * platform-specific assembler. Mentioned numbers apply to compiler + * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and + * can vary not only from platform to platform, but even for compiler + * versions. Assembler vs. assembler improvement coefficients can + * [and are known to] differ and are to be documented elsewhere. + */ +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) + { + BN_ULONG c0,c1,ml,*tp,n0; +#ifdef mul64 + BN_ULONG mh; +#endif + volatile BN_ULONG *vp; + int i=0,j; + +#if 0 /* template for platform-specific implementation */ + if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num); +#endif + vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + + n0 = *n0p; + + c0 = 0; + ml = bp[0]; +#ifdef mul64 + mh = HBITS(ml); + ml = LBITS(ml); + for (j=0;j=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;i +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) + { + BN_ULONG c0,c1,*tp,n0=*n0p; + volatile BN_ULONG *vp; + int i=0,j; + + vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + + for(i=0;i<=num;i++) tp[i]=0; + + for(i=0;i=np[num-1]) + { + c0 = bn_sub_words(rp,tp,np,num); + if (tp[num]!=0 || c0==0) + { + for(i=0;imod, BN_FLG_CONSTTIME); ret->counter = BN_BLINDING_COUNTER; + CRYPTO_THREADID_current(&ret->tid); return(ret); err: if (ret != NULL) BN_BLINDING_free(ret); @@ -263,6 +267,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct return(ret); } +#ifndef OPENSSL_NO_DEPRECATED unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b) { return b->thread_id; @@ -272,6 +277,12 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n) { b->thread_id = n; } +#endif + +CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b) + { + return &b->tid; + } unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b) { @@ -284,7 +295,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags) } BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, - const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, + const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), BN_MONT_CTX *m_ctx) diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c index b3452f1a91..3f2256f675 100644 --- a/src/lib/libcrypto/bn/bn_ctx.c +++ b/src/lib/libcrypto/bn/bn_ctx.c @@ -161,7 +161,7 @@ static void ctxdbg(BN_CTX *ctx) fprintf(stderr,"(%08x): ", (unsigned int)ctx); while(bnidx < ctx->used) { - fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); + fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); if(!(bnidx % BN_CTX_POOL_SIZE)) item = item->next; } @@ -171,8 +171,8 @@ static void ctxdbg(BN_CTX *ctx) while(fpidx < stack->depth) { while(bnidx++ < stack->indexes[fpidx]) - fprintf(stderr," "); - fprintf(stderr,"^^ "); + fprintf(stderr," "); + fprintf(stderr,"^^^ "); bnidx++; fpidx++; } diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index 1e8e57626b..802a43d642 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c @@ -102,7 +102,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, /* The next 2 are needed so we can do a dv->d[0]|=1 later * since BN_lshift1 will only work once there is a value :-) */ BN_zero(dv); - bn_wexpand(dv,1); + if(bn_wexpand(dv,1) == NULL) goto end; dv->top=1; if (!BN_lshift(D,D,nm-nd)) goto end; @@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (dv == NULL) res=BN_CTX_get(ctx); else res=dv; - if (sdiv == NULL || res == NULL) goto err; + if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) + goto err; /* First we normalise the numbers */ norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); @@ -336,7 +337,7 @@ X) -> 0x%08X\n", t2 -= d1; } #else /* !BN_LLONG */ - BN_ULONG t2l,t2h,ql,qh; + BN_ULONG t2l,t2h; q=bn_div_words(n0,n1,d0); #ifdef BN_DEBUG_LEVITTE @@ -354,9 +355,12 @@ X) -> 0x%08X\n", t2l = d1 * q; t2h = BN_UMULT_HIGH(d1,q); #else + { + BN_ULONG ql, qh; t2l=LBITS(d1); t2h=HBITS(d1); ql =LBITS(q); qh =HBITS(q); mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ + } #endif for (;;) @@ -560,7 +564,7 @@ X) -> 0x%08X\n", t2 -= d1; } #else /* !BN_LLONG */ - BN_ULONG t2l,t2h,ql,qh; + BN_ULONG t2l,t2h; q=bn_div_words(n0,n1,d0); #ifdef BN_DEBUG_LEVITTE @@ -578,9 +582,12 @@ X) -> 0x%08X\n", t2l = d1 * q; t2h = BN_UMULT_HIGH(d1,q); #else + { + BN_ULONG ql, qh; t2l=LBITS(d1); t2h=HBITS(d1); ql =LBITS(q); qh =HBITS(q); mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ + } #endif for (;;) diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index 70a33f0d93..d9b6c737fc 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c @@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) rr = BN_CTX_get(ctx); else rr = r; - if ((v = BN_CTX_get(ctx)) == NULL) goto err; + v = BN_CTX_get(ctx); + if (rr == NULL || v == NULL) goto err; if (BN_copy(v,a) == NULL) goto err; bits=BN_num_bits(p); diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c index 306f029f27..527b0fa15b 100644 --- a/src/lib/libcrypto/bn/bn_gf2m.c +++ b/src/lib/libcrypto/bn/bn_gf2m.c @@ -121,74 +121,12 @@ static const BN_ULONG SQR_tb[16] = SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] #endif -#ifdef SIXTEEN_BIT -#define SQR1(w) \ - SQR_tb[(w) >> 12 & 0xF] << 8 | SQR_tb[(w) >> 8 & 0xF] -#define SQR0(w) \ - SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] -#endif -#ifdef EIGHT_BIT -#define SQR1(w) \ - SQR_tb[(w) >> 4 & 0xF] -#define SQR0(w) \ - SQR_tb[(w) & 15] -#endif /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, * result is a polynomial r with degree < 2 * BN_BITS - 1 * The caller MUST ensure that the variables have the right amount * of space allocated. */ -#ifdef EIGHT_BIT -static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) - { - register BN_ULONG h, l, s; - BN_ULONG tab[4], top1b = a >> 7; - register BN_ULONG a1, a2; - - a1 = a & (0x7F); a2 = a1 << 1; - - tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; - - s = tab[b & 0x3]; l = s; - s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 6; - s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4; - s = tab[b >> 6 ]; l ^= s << 6; h ^= s >> 2; - - /* compensate for the top bit of a */ - - if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } - - *r1 = h; *r0 = l; - } -#endif -#ifdef SIXTEEN_BIT -static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) - { - register BN_ULONG h, l, s; - BN_ULONG tab[4], top1b = a >> 15; - register BN_ULONG a1, a2; - - a1 = a & (0x7FFF); a2 = a1 << 1; - - tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; - - s = tab[b & 0x3]; l = s; - s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 14; - s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 12; - s = tab[b >> 6 & 0x3]; l ^= s << 6; h ^= s >> 10; - s = tab[b >> 8 & 0x3]; l ^= s << 8; h ^= s >> 8; - s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >> 6; - s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >> 4; - s = tab[b >>14 ]; l ^= s << 14; h ^= s >> 2; - - /* compensate for the top bit of a */ - - if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } - - *r1 = h; *r0 = l; - } -#endif #ifdef THIRTY_TWO_BIT static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) { @@ -294,7 +232,8 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) if (a->top < b->top) { at = b; bt = a; } else { at = a; bt = b; } - bn_wexpand(r, at->top); + if(bn_wexpand(r, at->top) == NULL) + return 0; for (i = 0; i < bt->top; i++) { @@ -320,7 +259,7 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) /* Performs modular reduction of a and store result in r. r could be a. */ -int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) { int j, k; int n, dN, d0, d1; @@ -421,11 +360,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -443,7 +382,7 @@ err: /* Compute the product of two polynomials a and b, reduce modulo p, and store * the result in r. r could be a or b; a could be b. */ -int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx) { int zlen, i, j, k, ret = 0; BIGNUM *s; @@ -499,12 +438,12 @@ err: int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(b); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -520,7 +459,7 @@ err: /* Square a, reduce the result mod p, and store it in a. r could be a. */ -int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) { int i, ret = 0; BIGNUM *s; @@ -555,12 +494,12 @@ err: int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -642,7 +581,7 @@ err: * function is only provided for convenience; for best performance, use the * BN_GF2m_mod_inv function. */ -int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx) { BIGNUM *field; int ret = 0; @@ -768,7 +707,7 @@ err: * function is only provided for convenience; for best performance, use the * BN_GF2m_mod_div function. */ -int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx) { BIGNUM *field; int ret = 0; @@ -793,7 +732,7 @@ err: * the result in r. r could be a. * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363. */ -int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx) { int ret = 0, i, n; BIGNUM *u; @@ -839,12 +778,12 @@ err: int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(b); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -862,7 +801,7 @@ err: * the result in r. r could be a. * Uses exponentiation as in algorithm A.4.1 from IEEE P1363. */ -int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) { int ret = 0; BIGNUM *u; @@ -898,11 +837,11 @@ err: int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) { @@ -919,10 +858,9 @@ err: /* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. * Uses algorithms A.4.7 and A.4.6 from IEEE P1363. */ -int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx) { - int ret = 0, count = 0; - unsigned int j; + int ret = 0, count = 0, j; BIGNUM *a, *z, *rho, *w, *w2, *tmp; bn_check_top(a_); @@ -1017,11 +955,11 @@ err: int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { int ret = 0; - const int max = BN_num_bits(p); - unsigned int *arr=NULL; + const int max = BN_num_bits(p) + 1; + int *arr=NULL; bn_check_top(a); bn_check_top(p); - if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * + if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; ret = BN_GF2m_poly2arr(p, arr, max); if (!ret || ret > max) @@ -1037,20 +975,17 @@ err: } /* Convert the bit-string representation of a polynomial - * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array - * of integers corresponding to the bits with non-zero coefficient. + * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding + * to the bits with non-zero coefficient. Array is terminated with -1. * Up to max elements of the array will be filled. Return value is total - * number of coefficients that would be extracted if array was large enough. + * number of array elements that would be filled if array was large enough. */ -int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) +int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max) { int i, j, k = 0; BN_ULONG mask; - if (BN_is_zero(a) || !BN_is_bit_set(a, 0)) - /* a_0 == 0 => return error (the unsigned int array - * must be terminated by 0) - */ + if (BN_is_zero(a)) return 0; for (i = a->top - 1; i >= 0; i--) @@ -1070,24 +1005,28 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) } } + if (k < max) { + p[k] = -1; + k++; + } + return k; } /* Convert the coefficient array representation of a polynomial to a - * bit-string. The array must be terminated by 0. + * bit-string. The array must be terminated by -1. */ -int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a) +int BN_GF2m_arr2poly(const int p[], BIGNUM *a) { int i; bn_check_top(a); BN_zero(a); - for (i = 0; p[i] != 0; i++) + for (i = 0; p[i] != -1; i++) { if (BN_set_bit(a, p[i]) == 0) return 0; } - BN_set_bit(a, 0); bn_check_top(a); return 1; diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index 27ac4397a1..8e5e98e3f2 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h @@ -255,7 +255,8 @@ extern "C" { : "r"(a), "r"(b)); \ ret; }) # endif /* compiler */ -# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) +# elif (defined(__x86_64) || defined(__x86_64__)) && \ + (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) # if defined(__GNUC__) # define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret,discard; \ diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index 32a8fbaf51..5470fbe6ef 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c @@ -133,15 +133,34 @@ int BN_get_params(int which) const BIGNUM *BN_value_one(void) { - static BN_ULONG data_one=1L; - static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA}; + static const BN_ULONG data_one=1L; + static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA}; return(&const_one); } +char *BN_options(void) + { + static int init=0; + static char data[16]; + + if (!init) + { + init++; +#ifdef BN_LLONG + BIO_snprintf(data,sizeof data,"bn(%d,%d)", + (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); +#else + BIO_snprintf(data,sizeof data,"bn(%d,%d)", + (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); +#endif + } + return(data); + } + int BN_num_bits_word(BN_ULONG l) { - static const char bits[256]={ + static const unsigned char bits[256]={ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, @@ -216,7 +235,7 @@ int BN_num_bits_word(BN_ULONG l) else #endif { -#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) +#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) if (l & 0xff00L) return(bits[(int)(l>>8)]+8); else @@ -744,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n) i=n/BN_BITS2; j=n%BN_BITS2; if (a->top <= i) return 0; - return(((a->d[i])>>j)&((BN_ULONG)1)); + return (int)(((a->d[i])>>j)&((BN_ULONG)1)); } int BN_mask_bits(BIGNUM *a, int n) diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index 4799b152dd..7224637ab3 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c @@ -122,26 +122,10 @@ #define MONT_WORD /* use the faster word-based algorithm */ -#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) -/* This condition means we have a specific non-default build: - * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any - * BN_BITS2<=32 platform; an explicit "enable-montasm" is required. - * I.e., if we are here, the user intentionally deviates from the - * normal stable build to get better Montgomery performance from - * the 0.9.9-dev backport. - * - * In this case only, we also enable BN_from_montgomery_word() - * (another non-stable feature from 0.9.9-dev). - */ -#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD -#endif - -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); #endif - - int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_MONT_CTX *mont, BN_CTX *ctx) { @@ -153,11 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (num>1 && a->top==num && b->top==num) { if (bn_wexpand(r,num) == NULL) return(0); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) -#else - if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num)) -#endif { r->neg = a->neg^b->neg; r->top = num; @@ -181,7 +161,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (!BN_mul(tmp,a,b,ctx)) goto err; } /* reduce from aRR to aR */ -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD if (!BN_from_montgomery_word(r,tmp,mont)) goto err; #else if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; @@ -193,7 +173,7 @@ err: return(ret); } -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) { BIGNUM *n; @@ -217,15 +197,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) nrp= &(r->d[nl]); /* clear the top words of T */ +#if 1 for (i=r->top; id[i]=0; +#else + memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); +#endif r->top=max; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ n0=mont->n0[0]; -#else - n0=mont->n0; -#endif #ifdef BN_COUNT fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); @@ -270,6 +250,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) } al=r->top-ri; +#define BRANCH_FREE 1 +#if BRANCH_FREE if (bn_wexpand(ret,ri) == NULL) return(0); x=0-(((al-ri)>>(sizeof(al)*8-1))&1); ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ @@ -317,164 +299,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) rp[i]=nrp[i], ap[i]=0; bn_correct_top(r); bn_correct_top(ret); - bn_check_top(ret); - - return(1); - } - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, - BN_CTX *ctx) - { - int retn=0; - BIGNUM *t; - - BN_CTX_start(ctx); - if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) - retn = BN_from_montgomery_word(ret,t,mont); - BN_CTX_end(ctx); - return retn; - } - -#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, - BN_CTX *ctx) - { - int retn=0; - -#ifdef MONT_WORD - BIGNUM *n,*r; - BN_ULONG *ap,*np,*rp,n0,v,*nrp; - int al,nl,max,i,x,ri; - - BN_CTX_start(ctx); - if ((r = BN_CTX_get(ctx)) == NULL) goto err; - - if (!BN_copy(r,a)) goto err; - n= &(mont->N); - - ap=a->d; - /* mont->ri is the size of mont->N in bits (rounded up - to the word size) */ - al=ri=mont->ri/BN_BITS2; - - nl=n->top; - if ((al == 0) || (nl == 0)) { r->top=0; return(1); } - - max=(nl+al+1); /* allow for overflow (no?) XXX */ - if (bn_wexpand(r,max) == NULL) goto err; - - r->neg=a->neg^n->neg; - np=n->d; - rp=r->d; - nrp= &(r->d[nl]); - - /* clear the top words of T */ -#if 1 - for (i=r->top; id[i]=0; #else - memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); -#endif - - r->top=max; - n0=mont->n0; - -#ifdef BN_COUNT - fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl); -#endif - for (i=0; i= v) - continue; - else - { - if (((++nrp[0])&BN_MASK2) != 0) continue; - if (((++nrp[1])&BN_MASK2) != 0) continue; - for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; - } - } - bn_correct_top(r); - - /* mont->ri will be a multiple of the word size and below code - * is kind of BN_rshift(ret,r,mont->ri) equivalent */ - if (r->top <= ri) - { - ret->top=0; - retn=1; - goto err; - } - al=r->top-ri; - -# define BRANCH_FREE 1 -# if BRANCH_FREE - if (bn_wexpand(ret,ri) == NULL) goto err; - x=0-(((al-ri)>>(sizeof(al)*8-1))&1); - ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ - ret->neg=r->neg; - - rp=ret->d; - ap=&(r->d[ri]); - - { - size_t m1,m2; - - v=bn_sub_words(rp,ap,np,ri); - /* this ----------------^^ works even in alri) nrp=rp; else nrp=ap; */ - /* in other words if subtraction result is real, then - * trick unconditional memcpy below to perform in-place - * "refresh" instead of actual copy. */ - m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al>(sizeof(al)*8-1))&1); /* al>ri */ - m1|=m2; /* (al!=ri) */ - m1|=(0-(size_t)v); /* (al!=ri || v) */ - m1&=~m2; /* (al!=ri || v) && !al>ri */ - nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1)); - } - - /* 'itop=al; ret->neg=r->neg; @@ -497,8 +323,30 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, al+=4; for (; iN)) >= 0) + { + if (!BN_usub(ret,ret,&(mont->N))) return(0); + } +#endif + bn_check_top(ret); + + return(1); + } +#endif /* MONT_WORD */ + +int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) + { + int retn=0; +#ifdef MONT_WORD + BIGNUM *t; + + BN_CTX_start(ctx); + if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) + retn = BN_from_montgomery_word(ret,t,mont); + BN_CTX_end(ctx); +#else /* !MONT_WORD */ BIGNUM *t1,*t2; BN_CTX_start(ctx); @@ -515,21 +363,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, if (!BN_mul(t1,t2,&mont->N,ctx)) goto err; if (!BN_add(t2,a,t1)) goto err; if (!BN_rshift(ret,t2,mont->ri)) goto err; -#endif /* MONT_WORD */ -#if !defined(BRANCH_FREE) || BRANCH_FREE==0 if (BN_ucmp(ret, &(mont->N)) >= 0) { if (!BN_usub(ret,ret,&(mont->N))) goto err; } -#endif retn=1; bn_check_top(ret); err: BN_CTX_end(ctx); +#endif /* MONT_WORD */ return(retn); } -#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ BN_MONT_CTX *BN_MONT_CTX_new(void) { @@ -549,11 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx) BN_init(&(ctx->RR)); BN_init(&(ctx->N)); BN_init(&(ctx->Ni)); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ ctx->n0[0] = ctx->n0[1] = 0; -#else - ctx->n0 = 0; -#endif ctx->flags=0; } @@ -585,26 +426,22 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BIGNUM tmod; BN_ULONG buf[2]; - mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; - BN_zero(R); -#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)", - only certain BN_BITS2<=32 platforms actually need this */ - if (!(BN_set_bit(R,2*BN_BITS2))) goto err; /* R */ -#else - if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ -#endif - - buf[0]=mod->d[0]; /* tmod = N mod word size */ - buf[1]=0; - BN_init(&tmod); tmod.d=buf; - tmod.top = buf[0] != 0 ? 1 : 0; tmod.dmax=2; tmod.neg=0; -#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)"; - only certain BN_BITS2<=32 platforms actually need this */ + mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; + +#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) + /* Only certain BN_BITS2<=32 platforms actually make use of + * n0[1], and we could use the #else case (with a shorter R + * value) for the others. However, currently only the assembler + * files do know which is which. */ + + BN_zero(R); + if (!(BN_set_bit(R,2*BN_BITS2))) goto err; + tmod.top=0; if ((buf[0] = mod->d[0])) tmod.top=1; if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2; @@ -632,6 +469,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; #else + BN_zero(R); + if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ + + buf[0]=mod->d[0]; /* tmod = N mod word size */ + buf[1]=0; + tmod.top = buf[0] != 0 ? 1 : 0; /* Ri = R^-1 mod N*/ if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) goto err; @@ -647,12 +490,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; /* Ni = (R*Ri-1)/N, * keep only least significant word: */ -# if 0 /* for OpenSSL 0.9.9 mont->n0 */ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[1] = 0; -# else - mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; -# endif #endif } #else /* !MONT_WORD */ @@ -689,12 +528,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) if (!BN_copy(&(to->N),&(from->N))) return NULL; if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; to->ri=from->ri; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ to->n0[0]=from->n0[0]; to->n0[1]=from->n0[1]; -#else - to->n0=from->n0; -#endif return(to); } diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b848c8cc60..a0e9ec3b46 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c @@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) assert(j <= al || j <= bl); k = j+j; t = BN_CTX_get(ctx); + if (t == NULL) + goto err; if (al > j || bl > j) { - bn_wexpand(t,k*4); - bn_wexpand(rr,k*4); + if (bn_wexpand(t,k*4) == NULL) goto err; + if (bn_wexpand(rr,k*4) == NULL) goto err; bn_mul_part_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } else /* al <= j || bl <= j */ { - bn_wexpand(t,k*2); - bn_wexpand(rr,k*2); + if (bn_wexpand(t,k*2) == NULL) goto err; + if (bn_wexpand(rr,k*2) == NULL) goto err; bn_mul_recursive(rr->d,a->d,b->d, j,al-j,bl-j,t->d); } diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c index 810dde34e1..bebb466d08 100644 --- a/src/lib/libcrypto/bn/bn_print.c +++ b/src/lib/libcrypto/bn/bn_print.c @@ -294,6 +294,27 @@ err: return(0); } +int BN_asc2bn(BIGNUM **bn, const char *a) + { + const char *p = a; + if (*p == '-') + p++; + + if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) + { + if (!BN_hex2bn(bn, p + 2)) + return 0; + } + else + { + if (!BN_dec2bn(bn, p)) + return 0; + } + if (*a == '-') + (*bn)->neg = 1; + return 1; + } + #ifndef OPENSSL_NO_BIO #ifndef OPENSSL_NO_FP_API int BN_print_fp(FILE *fp, const BIGNUM *a) -- cgit v1.2.3-55-g6feb