import OpenSSL-1.0.0a

author: djm <> 2010-10-01 22:54:21 +0000
committer: djm <> 2010-10-01 22:54:21 +0000
commit: 829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2 (patch)
tree: e03b9f1bd051e844b971936729e9df549a209130 /src/lib/libcrypto/bn
parent: e6b755d2a53d3cac7a344dfdd6bf7c951cac754c (diff)
download: openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.tar.gz
openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.tar.bz2
openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.zip
20 files changed, 899 insertions, 750 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl
index 7a2cc3173b..f7e0ca1646 100644
--- a/src/lib/libcrypto/bn/asm/alpha-mont.pl
+++ b/src/lib/libcrypto/bn/asm/alpha-mont.pl
@@ -53,15 +53,15 @@ $code=<<___;
 .align  5
 .ent    bn_mul_mont
 bn_mul_mont:
-        lda     sp,-40(sp)
+        lda     sp,-48(sp)
        stq     ra,0(sp)
        stq     s3,8(sp)
        stq     s4,16(sp)
        stq     s5,24(sp)
        stq     fp,32(sp)
        mov     sp,fp
-        .mask   0x0400f000,-40
+        .mask   0x0400f000,-48
-        .frame  fp,40,ra
+        .frame  fp,48,ra
        .prologue 0
        .align  4
@@ -306,7 +306,7 @@ bn_mul_mont:
        ldq     s4,16(sp)
        ldq     s5,24(sp)
        ldq     fp,32(sp)
-        lda     sp,40(sp)
+        lda     sp,48(sp)
        ret     (ra)
 .end    bn_mul_mont
 .rdata
diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl
index 05d5dc1a48..14e0d2d1dd 100644
--- a/src/lib/libcrypto/bn/asm/armv4-mont.pl
+++ b/src/lib/libcrypto/bn/asm/armv4-mont.pl
@@ -193,6 +193,7 @@ bn_mul_mont:
        bx      lr                      @ interoperable with Thumb ISA:-)
 .size   bn_mul_mont,.-bn_mul_mont
 .asciz  "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+.align  2
 ___
 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;    # make it possible to compile with -march=armv4
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 26c2685a72..332ef3e91d 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -1,6 +1,7 @@
 #!/usr/local/bin/perl
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
 &asm_init($ARGV[0],$0);
@@ -24,38 +25,25 @@ sub bn_mul_add_words
        {
        local($name)=@_;
-        &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+        &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
-        &comment("");
+        $r="eax";
-        $Low="eax";
+        $a="edx";
-        $High="edx";
+        $c="ecx";
-        $a="ebx";
-        $w="ebp";
-        $r="edi";
-        $c="esi";
-        &xor($c,$c);            # clear carry
-        &mov($r,&wparam(0));    #
-        &mov("ecx",&wparam(2)); #
-        &mov($a,&wparam(1));    #
-        &and("ecx",0xfffffff8); # num / 8
-        &mov($w,&wparam(3));    #
-        &push("ecx");           # Up the stack for a tmp variable
-        &jz(&label("maw_finish"));
        if ($sse2) {
                &picmeup("eax","OPENSSL_ia32cap_P");
                &bt(&DWP(0,"eax"),26);
-                &jnc(&label("maw_loop"));
+                &jnc(&label("maw_non_sse2"));
-                &movd("mm0",$w);                # mm0 = w
+                &mov($r,&wparam(0));
+                &mov($a,&wparam(1));
+                &mov($c,&wparam(2));
+                &movd("mm0",&wparam(3));        # mm0 = w
                &pxor("mm1","mm1");             # mm1 = carry_in
+                &jmp(&label("maw_sse2_entry"));
-                &set_label("maw_sse2_loop",0);
+                
+        &set_label("maw_sse2_unrolled",16);
                &movd("mm3",&DWP(0,$r,"",0));   # mm3 = r[0]
                &paddq("mm1","mm3");            # mm1 = carry_in + r[0]
                &movd("mm2",&DWP(0,$a,"",0));   # mm2 = a[0]
@@ -112,42 +100,82 @@ sub bn_mul_add_words
                &psrlq("mm1",32);               # mm1 = carry6
                &paddq("mm1","mm3");            # mm1 = carry6 + r[7] + w*a[7]
                &movd(&DWP(28,$r,"",0),"mm1");
-                &add($r,32);
+                &lea($r,&DWP(32,$r));
                &psrlq("mm1",32);               # mm1 = carry_out
-                &sub("ecx",8);
+                &sub($c,8);
+                &jz(&label("maw_sse2_exit"));
+        &set_label("maw_sse2_entry");
+                &test($c,0xfffffff8);
+                &jnz(&label("maw_sse2_unrolled"));
+        &set_label("maw_sse2_loop",4);
+                &movd("mm2",&DWP(0,$a));        # mm2 = a[i]
+                &movd("mm3",&DWP(0,$r));        # mm3 = r[i]
+                &pmuludq("mm2","mm0");          # a[i] *= w
+                &lea($a,&DWP(4,$a));
+                &paddq("mm1","mm3");            # carry += r[i]
+                &paddq("mm1","mm2");            # carry += a[i]*w
+                &movd(&DWP(0,$r),"mm1");        # r[i] = carry_low
+                &sub($c,1);
+                &psrlq("mm1",32);               # carry = carry_high
+                &lea($r,&DWP(4,$r));
                &jnz(&label("maw_sse2_loop"));
+        &set_label("maw_sse2_exit");
-                &movd($c,"mm1");                # c = carry_out
+                &movd("eax","mm1");             # c = carry_out
                &emms();
+                &ret();
-                &jmp(&label("maw_finish"));
+        &set_label("maw_non_sse2",16);
        }
-        &set_label("maw_loop",0);
+        # function_begin prologue
+        &push("ebp");
+        &push("ebx");
+        &push("esi");
+        &push("edi");
+        &comment("");
+        $Low="eax";
+        $High="edx";
+        $a="ebx";
+        $w="ebp";
+        $r="edi";
+        $c="esi";
+        &xor($c,$c);            # clear carry
+        &mov($r,&wparam(0));    #
+        &mov("ecx",&wparam(2)); #
+        &mov($a,&wparam(1));    #
+        &and("ecx",0xfffffff8); # num / 8
+        &mov($w,&wparam(3));    #
-        &mov(&swtmp(0),"ecx");  #
+        &push("ecx");           # Up the stack for a tmp variable
+        &jz(&label("maw_finish"));
+        &set_label("maw_loop",16);
        for ($i=0; $i<32; $i+=4)
                {
                &comment("Round $i");
-                 &mov("eax",&DWP($i,$a,"",0));  # *a
+                 &mov("eax",&DWP($i,$a));       # *a
                &mul($w);                       # *a * w
-                &add("eax",$c);         # L(t)+= *r
+                &add("eax",$c);                 # L(t)+= c
-                 &mov($c,&DWP($i,$r,"",0));     # L(t)+= *r
                &adc("edx",0);                  # H(t)+=carry
-                 &add("eax",$c);                # L(t)+=c
+                 &add("eax",&DWP($i,$r));       # L(t)+= *r
                &adc("edx",0);                  # H(t)+=carry
-                 &mov(&DWP($i,$r,"",0),"eax");  # *r= L(t);
+                 &mov(&DWP($i,$r),"eax");       # *r= L(t);
                &mov($c,"edx");                 # c=  H(t);
                }
        &comment("");
-        &mov("ecx",&swtmp(0));  #
-        &add($a,32);
-        &add($r,32);
        &sub("ecx",8);
+        &lea($a,&DWP(32,$a));
+        &lea($r,&DWP(32,$r));
        &jnz(&label("maw_loop"));
        &set_label("maw_finish",0);
@@ -160,16 +188,15 @@ sub bn_mul_add_words
        for ($i=0; $i<7; $i++)
                {
                &comment("Tail Round $i");
-                 &mov("eax",&DWP($i*4,$a,"",0));# *a
+                 &mov("eax",&DWP($i*4,$a));     # *a
                &mul($w);                       # *a * w
                &add("eax",$c);                 # L(t)+=c
-                 &mov($c,&DWP($i*4,$r,"",0));   # L(t)+= *r
                &adc("edx",0);                  # H(t)+=carry
-                 &add("eax",$c);
+                 &add("eax",&DWP($i*4,$r));     # L(t)+= *r
                &adc("edx",0);                  # H(t)+=carry
                 &dec("ecx") if ($i != 7-1);
-                &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
+                &mov(&DWP($i*4,$r),"eax");      # *r= L(t);
-                 &mov($c,"edx");                        # c=  H(t);
+                 &mov($c,"edx");                # c=  H(t);
                &jz(&label("maw_end")) if ($i != 7-1);
                }
        &set_label("maw_end",0);
@@ -184,7 +211,45 @@ sub bn_mul_words
        {
        local($name)=@_;
-        &function_begin($name,"");
+        &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+        $r="eax";
+        $a="edx";
+        $c="ecx";
+        if ($sse2) {
+                &picmeup("eax","OPENSSL_ia32cap_P");
+                &bt(&DWP(0,"eax"),26);
+                &jnc(&label("mw_non_sse2"));
+                &mov($r,&wparam(0));
+                &mov($a,&wparam(1));
+                &mov($c,&wparam(2));
+                &movd("mm0",&wparam(3));        # mm0 = w
+                &pxor("mm1","mm1");             # mm1 = carry = 0
+        &set_label("mw_sse2_loop",16);
+                &movd("mm2",&DWP(0,$a));        # mm2 = a[i]
+                &pmuludq("mm2","mm0");          # a[i] *= w
+                &lea($a,&DWP(4,$a));
+                &paddq("mm1","mm2");            # carry += a[i]*w
+                &movd(&DWP(0,$r),"mm1");        # r[i] = carry_low
+                &sub($c,1);
+                &psrlq("mm1",32);               # carry = carry_high
+                &lea($r,&DWP(4,$r));
+                &jnz(&label("mw_sse2_loop"));
+                &movd("eax","mm1");             # return carry
+                &emms();
+                &ret();
+        &set_label("mw_non_sse2",16);
+        }
+        # function_begin prologue
+        &push("ebp");
+        &push("ebx");
+        &push("esi");
+        &push("edi");
        &comment("");
        $Low="eax";
@@ -257,7 +322,40 @@ sub bn_sqr_words
        {
        local($name)=@_;
-        &function_begin($name,"");
+        &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+        $r="eax";
+        $a="edx";
+        $c="ecx";
+        if ($sse2) {
+                &picmeup("eax","OPENSSL_ia32cap_P");
+                &bt(&DWP(0,"eax"),26);
+                &jnc(&label("sqr_non_sse2"));
+                &mov($r,&wparam(0));
+                &mov($a,&wparam(1));
+                &mov($c,&wparam(2));
+        &set_label("sqr_sse2_loop",16);
+                &movd("mm0",&DWP(0,$a));        # mm0 = a[i]
+                &pmuludq("mm0","mm0");          # a[i] *= a[i]
+                &lea($a,&DWP(4,$a));            # a++
+                &movq(&QWP(0,$r),"mm0");        # r[i] = a[i]*a[i]
+                &sub($c,1);
+                &lea($r,&DWP(8,$r));            # r += 2
+                &jnz(&label("sqr_sse2_loop"));
+                &emms();
+                &ret();
+        &set_label("sqr_non_sse2",16);
+        }
+        # function_begin prologue
+        &push("ebp");
+        &push("ebx");
+        &push("esi");
+        &push("edi");
        &comment("");
        $r="esi";
@@ -313,12 +411,13 @@ sub bn_div_words
        {
        local($name)=@_;
-        &function_begin($name,"");
+        &function_begin_B($name,"");
        &mov("edx",&wparam(0)); #
        &mov("eax",&wparam(1)); #
-        &mov("ebx",&wparam(2)); #
+        &mov("ecx",&wparam(2)); #
-        &div("ebx");
+        &div("ecx");
-        &function_end($name);
+        &ret();
+        &function_end_B($name);
        }
 sub bn_add_words
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl
index 5d962cb957..57101a6bd7 100644
--- a/src/lib/libcrypto/bn/asm/co-586.pl
+++ b/src/lib/libcrypto/bn/asm/co-586.pl
@@ -1,6 +1,7 @@
 #!/usr/local/bin/perl
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
 &asm_init($ARGV[0],$0);
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
index 08e0053473..37c65d3511 100644
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -100,9 +100,9 @@
 #       me a note at schari@us.ibm.com
 #
-$opf = shift;
+$flavour = shift;
-if ($opf =~ /32\.s/) {
+if ($flavour =~ /32/) {
        $BITS=  32;
        $BNSZ=  $BITS/8;
        $ISA=   "\"ppc\"";
@@ -125,7 +125,7 @@ if ($opf =~ /32\.s/) {
        $INSR=  "insrwi";       # insert right
        $ROTL=  "rotlwi";       # rotate left by immediate
        $TR=    "tw";           # conditional trap
-} elsif ($opf =~ /64\.s/) {
+} elsif ($flavour =~ /64/) {
        $BITS=  64;
        $BNSZ=  $BITS/8;
        $ISA=   "\"ppc64\"";
@@ -149,93 +149,16 @@ if ($opf =~ /32\.s/) {
        $INSR=  "insrdi";       # insert right 
        $ROTL=  "rotldi";       # rotate left by immediate
        $TR=    "td";           # conditional trap
-} else { die "nonsense $opf"; }
+} else { die "nonsense $flavour"; }
-( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
-# function entry points from the AIX code
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-#
-# There are other, more elegant, ways to handle this. We (IBM) chose
-# this approach as it plays well with scripts we run to 'namespace'
-# OpenSSL .i.e. we add a prefix to all the public symbols so we can
-# co-exist in the same process with other implementations of OpenSSL.
-# 'cleverer' ways of doing these substitutions tend to hide data we
-# need to be obvious.
-#
-my @items = ("bn_sqr_comba4",
-             "bn_sqr_comba8",
-             "bn_mul_comba4",
-             "bn_mul_comba8",
-             "bn_sub_words",
-             "bn_add_words",
-             "bn_div_words",
-             "bn_sqr_words",
-             "bn_mul_words",
-             "bn_mul_add_words");
-if    ($opf =~ /linux/) {  do_linux();  }
+$data=<<EOF;
-elsif ($opf =~ /aix/)   {  do_aix();    }
-elsif ($opf =~ /osx/)   {  do_osx();    }
-else                    {  do_bsd();    }
-sub do_linux {
-    $d=&data();
-    if ($BITS==64) {
-      foreach $t (@items) {
-        $d =~ s/\.$t:/\
-\t.section\t".opd","aw"\
-\t.align\t3\
-\t.globl\t$t\
-$t:\
-\t.quad\t.$t,.TOC.\@tocbase,0\
-\t.size\t$t,24\
-\t.previous\n\
-\t.type\t.$t,\@function\
-\t.globl\t.$t\
-.$t:/g;
-      }
-    }
-    else {
-      foreach $t (@items) {
-        $d=~s/\.$t/$t/g;
-      }
-    }
-    # hide internal labels to avoid pollution of name table...
-    $d=~s/Lppcasm_/.Lppcasm_/gm;
-    print $d;
-}
-sub do_aix {
-    # AIX assembler is smart enough to please the linker without
-    # making us do something special...
-    print &data();
-}
-# MacOSX 32 bit
-sub do_osx {
-    $d=&data();
-    # Change the bn symbol prefix from '.' to '_'
-    foreach $t (@items) {
-      $d=~s/\.$t/_$t/g;
-    }
-    # Change .machine to something OS X asm will accept
-    $d=~s/\.machine.*/.text/g;
-    $d=~s/\#/;/g; # change comment from '#' to ';'
-    print $d;
-}
-# BSD (Untested)
-sub do_bsd {
-    $d=&data();
-    foreach $t (@items) {
-      $d=~s/\.$t/_$t/g;
-    }
-    print $d;
-}
-sub data {
-        local($data)=<<EOF;
 #--------------------------------------------------------------------
 #
 #
@@ -297,33 +220,20 @@ sub data {
 #
 #       Defines to be used in the assembly code.
 #       
-.set r0,0       # we use it as storage for value of 0
+#.set r0,0      # we use it as storage for value of 0
-.set SP,1       # preserved
+#.set SP,1      # preserved
-.set RTOC,2     # preserved 
+#.set RTOC,2    # preserved 
-.set r3,3       # 1st argument/return value
+#.set r3,3      # 1st argument/return value
-.set r4,4       # 2nd argument/volatile register
+#.set r4,4      # 2nd argument/volatile register
-.set r5,5       # 3rd argument/volatile register
+#.set r5,5      # 3rd argument/volatile register
-.set r6,6       # ...
+#.set r6,6      # ...
-.set r7,7
+#.set r7,7
-.set r8,8
+#.set r8,8
-.set r9,9
+#.set r9,9
-.set r10,10
+#.set r10,10
-.set r11,11
+#.set r11,11
-.set r12,12
+#.set r12,12
-.set r13,13     # not used, nor any other "below" it...
+#.set r13,13    # not used, nor any other "below" it...
-.set BO_IF_NOT,4
-.set BO_IF,12
-.set BO_dCTR_NZERO,16
-.set BO_dCTR_ZERO,18
-.set BO_ALWAYS,20
-.set CR0_LT,0;
-.set CR0_GT,1;
-.set CR0_EQ,2
-.set CR1_FX,4;
-.set CR1_FEX,5;
-.set CR1_VX,6
-.set LR,8
 #       Declare function names to be global
 #       NOTE:   For gcc these names MUST be changed to remove
@@ -344,7 +254,7 @@ sub data {
        
 # .text section
        
-        .machine        $ISA
+        .machine        "any"
 #
 #       NOTE:   The following label name should be changed to
@@ -478,7 +388,7 @@ sub data {
        $ST             r9,`6*$BNSZ`(r3)        #r[6]=c1
        $ST             r10,`7*$BNSZ`(r3)       #r[7]=c2
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
 #
@@ -903,7 +813,7 @@ sub data {
        $ST             r9, `15*$BNSZ`(r3)      #r[15]=c1;
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
@@ -1055,7 +965,7 @@ sub data {
        $ST     r10,`6*$BNSZ`(r3)       #r[6]=c1
        $ST     r11,`7*$BNSZ`(r3)       #r[7]=c2
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
 #
@@ -1591,7 +1501,7 @@ sub data {
        adde    r10,r10,r9
        $ST     r12,`14*$BNSZ`(r3)      #r[14]=c3;
        $ST     r10,`15*$BNSZ`(r3)      #r[15]=c1;
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
 #
@@ -1623,7 +1533,7 @@ sub data {
        subfc.  r7,r0,r6        # If r6 is 0 then result is 0.
                                # if r6 > 0 then result !=0
                                # In either case carry bit is set.
-        bc      BO_IF,CR0_EQ,Lppcasm_sub_adios
+        beq     Lppcasm_sub_adios
        addi    r4,r4,-$BNSZ
        addi    r3,r3,-$BNSZ
        addi    r5,r5,-$BNSZ
@@ -1635,11 +1545,11 @@ Lppcasm_sub_mainloop:
                                # if carry = 1 this is r7-r8. Else it
                                # is r7-r8 -1 as we need.
        $STU    r6,$BNSZ(r3)
-        bc      BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
+        bdnz-   Lppcasm_sub_mainloop
 Lppcasm_sub_adios:      
        subfze  r3,r0           # if carry bit is set then r3 = 0 else -1
        andi.   r3,r3,1         # keep only last bit.
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
@@ -1670,7 +1580,7 @@ Lppcasm_sub_adios:
 #       check for r6 = 0. Is this needed?
 #
        addic.  r6,r6,0         #test r6 and clear carry bit.
-        bc      BO_IF,CR0_EQ,Lppcasm_add_adios
+        beq     Lppcasm_add_adios
        addi    r4,r4,-$BNSZ
        addi    r3,r3,-$BNSZ
        addi    r5,r5,-$BNSZ
@@ -1680,10 +1590,10 @@ Lppcasm_add_mainloop:
        $LDU    r8,$BNSZ(r5)
        adde    r8,r7,r8
        $STU    r8,$BNSZ(r3)
-        bc      BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
+        bdnz-   Lppcasm_add_mainloop
 Lppcasm_add_adios:      
        addze   r3,r0                   #return carry bit.
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
 #
@@ -1707,24 +1617,24 @@ Lppcasm_add_adios:
 #       r5 = d
        
        $UCMPI  0,r5,0                  # compare r5 and 0
-        bc      BO_IF_NOT,CR0_EQ,Lppcasm_div1   # proceed if d!=0
+        bne     Lppcasm_div1            # proceed if d!=0
        li      r3,-1                   # d=0 return -1
-        bclr    BO_ALWAYS,CR0_LT        
+        blr
 Lppcasm_div1:
        xor     r0,r0,r0                #r0=0
        li      r8,$BITS
        $CNTLZ. r7,r5                   #r7 = num leading 0s in d.
-        bc      BO_IF,CR0_EQ,Lppcasm_div2       #proceed if no leading zeros
+        beq     Lppcasm_div2            #proceed if no leading zeros
        subf    r8,r7,r8                #r8 = BN_num_bits_word(d)
        $SHR.   r9,r3,r8                #are there any bits above r8'th?
        $TR     16,r9,r0                #if there're, signal to dump core...
 Lppcasm_div2:
        $UCMP   0,r3,r5                 #h>=d?
-        bc      BO_IF,CR0_LT,Lppcasm_div3       #goto Lppcasm_div3 if not
+        blt     Lppcasm_div3            #goto Lppcasm_div3 if not
        subf    r3,r5,r3                #h-=d ; 
 Lppcasm_div3:                           #r7 = BN_BITS2-i. so r7=i
        cmpi    0,0,r7,0                # is (i == 0)?
-        bc      BO_IF,CR0_EQ,Lppcasm_div4
+        beq     Lppcasm_div4
        $SHL    r3,r3,r7                # h = (h<< i)
        $SHR    r8,r4,r8                # r8 = (l >> BN_BITS2 -i)
        $SHL    r5,r5,r7                # d<<=i
@@ -1741,7 +1651,7 @@ Lppcasm_divouterloop:
        $SHRI   r11,r4,`$BITS/2`        #r11= (l&BN_MASK2h)>>BN_BITS4
                                        # compute here for innerloop.
        $UCMP   0,r8,r9                 # is (h>>BN_BITS4)==dh
-        bc      BO_IF_NOT,CR0_EQ,Lppcasm_div5   # goto Lppcasm_div5 if not
+        bne     Lppcasm_div5            # goto Lppcasm_div5 if not
        li      r8,-1
        $CLRU   r8,r8,`$BITS/2`         #q = BN_MASK2l 
@@ -1762,9 +1672,9 @@ Lppcasm_divinnerloop:
                                        # the following 2 instructions do that
        $SHLI   r7,r10,`$BITS/2`        # r7 = (t<<BN_BITS4)
        or      r7,r7,r11               # r7|=((l&BN_MASK2h)>>BN_BITS4)
-        $UCMP   1,r6,r7                 # compare (tl <= r7)
+        $UCMP   cr1,r6,r7               # compare (tl <= r7)
-        bc      BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
+        bne     Lppcasm_divinnerexit
-        bc      BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
+        ble     cr1,Lppcasm_divinnerexit
        addi    r8,r8,-1                #q--
        subf    r12,r9,r12              #th -=dh
        $CLRU   r10,r5,`$BITS/2`        #r10=dl. t is no longer needed in loop.
@@ -1773,14 +1683,14 @@ Lppcasm_divinnerloop:
 Lppcasm_divinnerexit:
        $SHRI   r10,r6,`$BITS/2`        #t=(tl>>BN_BITS4)
        $SHLI   r11,r6,`$BITS/2`        #tl=(tl<<BN_BITS4)&BN_MASK2h;
-        $UCMP   1,r4,r11                # compare l and tl
+        $UCMP   cr1,r4,r11              # compare l and tl
        add     r12,r12,r10             # th+=t
-        bc      BO_IF_NOT,CR1_FX,Lppcasm_div7  # if (l>=tl) goto Lppcasm_div7
+        bge     cr1,Lppcasm_div7        # if (l>=tl) goto Lppcasm_div7
        addi    r12,r12,1               # th++
 Lppcasm_div7:
        subf    r11,r11,r4              #r11=l-tl
-        $UCMP   1,r3,r12                #compare h and th
+        $UCMP   cr1,r3,r12              #compare h and th
-        bc      BO_IF_NOT,CR1_FX,Lppcasm_div8   #if (h>=th) goto Lppcasm_div8
+        bge     cr1,Lppcasm_div8        #if (h>=th) goto Lppcasm_div8
        addi    r8,r8,-1                # q--
        add     r3,r5,r3                # h+=d
 Lppcasm_div8:
@@ -1791,12 +1701,12 @@ Lppcasm_div8:
                                        # the following 2 instructions will do this.
        $INSR   r11,r12,`$BITS/2`,`$BITS/2`     # r11 is the value we want rotated $BITS/2.
        $ROTL   r3,r11,`$BITS/2`        # rotate by $BITS/2 and store in r3
-        bc      BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
+        bdz     Lppcasm_div9            #if (count==0) break ;
        $SHLI   r0,r8,`$BITS/2`         #ret =q<<BN_BITS4
        b       Lppcasm_divouterloop
 Lppcasm_div9:
        or      r3,r8,r0
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
 #
@@ -1822,7 +1732,7 @@ Lppcasm_div9:
 #       No unrolling done here. Not performance critical.
        addic.  r5,r5,0                 #test r5.
-        bc      BO_IF,CR0_EQ,Lppcasm_sqr_adios
+        beq     Lppcasm_sqr_adios
        addi    r4,r4,-$BNSZ
        addi    r3,r3,-$BNSZ
        mtctr   r5
@@ -1833,9 +1743,9 @@ Lppcasm_sqr_mainloop:
        $UMULH  r8,r6,r6
        $STU    r7,$BNSZ(r3)
        $STU    r8,$BNSZ(r3)
-        bc      BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
+        bdnz-   Lppcasm_sqr_mainloop
 Lppcasm_sqr_adios:      
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
@@ -1858,7 +1768,7 @@ Lppcasm_sqr_adios:
        xor     r0,r0,r0
        xor     r12,r12,r12             # used for carry
        rlwinm. r7,r5,30,2,31           # num >> 2
-        bc      BO_IF,CR0_EQ,Lppcasm_mw_REM
+        beq     Lppcasm_mw_REM
        mtctr   r7
 Lppcasm_mw_LOOP:        
                                        #mul(rp[0],ap[0],w,c1);
@@ -1896,11 +1806,11 @@ Lppcasm_mw_LOOP:
        
        addi    r3,r3,`4*$BNSZ`
        addi    r4,r4,`4*$BNSZ`
-        bc      BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
+        bdnz-   Lppcasm_mw_LOOP
 Lppcasm_mw_REM:
        andi.   r5,r5,0x3
-        bc      BO_IF,CR0_EQ,Lppcasm_mw_OVER
+        beq     Lppcasm_mw_OVER
                                        #mul(rp[0],ap[0],w,c1);
        $LD     r8,`0*$BNSZ`(r4)
        $UMULL  r9,r6,r8
@@ -1912,7 +1822,7 @@ Lppcasm_mw_REM:
        
        addi    r5,r5,-1
        cmpli   0,0,r5,0
-        bc      BO_IF,CR0_EQ,Lppcasm_mw_OVER
+        beq     Lppcasm_mw_OVER
        
                                        #mul(rp[1],ap[1],w,c1);
@@ -1926,7 +1836,7 @@ Lppcasm_mw_REM:
        
        addi    r5,r5,-1
        cmpli   0,0,r5,0
-        bc      BO_IF,CR0_EQ,Lppcasm_mw_OVER
+        beq     Lppcasm_mw_OVER
        
                                        #mul_add(rp[2],ap[2],w,c1);
        $LD     r8,`2*$BNSZ`(r4)
@@ -1939,7 +1849,7 @@ Lppcasm_mw_REM:
                
 Lppcasm_mw_OVER:        
        addi    r3,r12,0
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
 #
@@ -1964,7 +1874,7 @@ Lppcasm_mw_OVER:
        xor     r0,r0,r0                #r0 = 0
        xor     r12,r12,r12             #r12 = 0 . used for carry               
        rlwinm. r7,r5,30,2,31           # num >> 2
-        bc      BO_IF,CR0_EQ,Lppcasm_maw_leftover       # if (num < 4) go LPPCASM_maw_leftover
+        beq     Lppcasm_maw_leftover    # if (num < 4) go LPPCASM_maw_leftover
        mtctr   r7
 Lppcasm_maw_mainloop:   
                                        #mul_add(rp[0],ap[0],w,c1);
@@ -2017,11 +1927,11 @@ Lppcasm_maw_mainloop:
        $ST     r11,`3*$BNSZ`(r3)
        addi    r3,r3,`4*$BNSZ`
        addi    r4,r4,`4*$BNSZ`
-        bc      BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
+        bdnz-   Lppcasm_maw_mainloop
        
 Lppcasm_maw_leftover:
        andi.   r5,r5,0x3
-        bc      BO_IF,CR0_EQ,Lppcasm_maw_adios
+        beq     Lppcasm_maw_adios
        addi    r3,r3,-$BNSZ
        addi    r4,r4,-$BNSZ
                                        #mul_add(rp[0],ap[0],w,c1);
@@ -2036,7 +1946,7 @@ Lppcasm_maw_leftover:
        addze   r12,r10
        $ST     r9,0(r3)
        
-        bc      BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
+        bdz     Lppcasm_maw_adios
                                        #mul_add(rp[1],ap[1],w,c1);
        $LDU    r8,$BNSZ(r4)    
        $UMULL  r9,r6,r8
@@ -2048,7 +1958,7 @@ Lppcasm_maw_leftover:
        addze   r12,r10
        $ST     r9,0(r3)
        
-        bc      BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
+        bdz     Lppcasm_maw_adios
                                        #mul_add(rp[2],ap[2],w,c1);
        $LDU    r8,$BNSZ(r4)
        $UMULL  r9,r6,r8
@@ -2062,17 +1972,10 @@ Lppcasm_maw_leftover:
                
 Lppcasm_maw_adios:      
        addi    r3,r12,0
-        bclr    BO_ALWAYS,CR0_LT
+        blr
        .long   0x00000000
        .align  4
 EOF
-        $data =~ s/\`([^\`]*)\`/eval $1/gem;
+$data =~ s/\`([^\`]*)\`/eval $1/gem;
+print $data;
-        # if some assembler chokes on some simplified mnemonic,
+close STDOUT;
-        # this is the spot to fix it up, e.g.:
-        # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
-        $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
-        # assembler X doesn't accept li, load immediate value
-        #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
-        return($data);
-}
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S
index 8c56e2e7e7..63de1860f2 100644
--- a/src/lib/libcrypto/bn/asm/sparcv8plus.S
+++ b/src/lib/libcrypto/bn/asm/sparcv8plus.S
@@ -144,6 +144,19 @@
 *          }
 */
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+  /* They've said -xarch=v9 at command line */
+  .register     %g2,#scratch
+  .register     %g3,#scratch
+# define        FRAME_SIZE      -192
+#elif defined(__GNUC__) && defined(__arch64__)
+  /* They've said -m64 at command line */
+  .register     %g2,#scratch
+  .register     %g3,#scratch
+# define        FRAME_SIZE      -192
+#else 
+# define        FRAME_SIZE      -96
+#endif 
 /*
 * GNU assembler can't stand stuw:-(
 */
@@ -619,8 +632,6 @@ bn_sub_words:
 *                                                      Andy.
 */
-#define FRAME_SIZE      -96
 /*
 * Here is register usage map for *all* routines below.
 */
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
index f13f52dd85..acb0b40118 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
@@ -1,4 +1,5 @@
-#ifdef __SUNPRO_C
+#include "../bn_lcl.h"
+#if !(defined(__GNUC__) && __GNUC__>=2)
 # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
 #else
 /*
@@ -54,7 +55,15 @@
 *    machine.
 */
+#ifdef _WIN64
+#define BN_ULONG unsigned long long
+#else
 #define BN_ULONG unsigned long
+#endif
+#undef mul
+#undef mul_add
+#undef sqr
 /*
 * "m"(a), "+m"(r)      is the way to favor DirectPath �-code;
@@ -97,7 +106,7 @@
                : "a"(a)                \
                : "cc");
-BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
        {
        BN_ULONG c1=0;
@@ -121,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
        return(c1);
        } 
-BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
        {
        BN_ULONG c1=0;
@@ -144,7 +153,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
        return(c1);
        } 
-void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
        {
        if (n <= 0) return;
@@ -175,14 +184,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
        return ret;
 }
-BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
+BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
 { BN_ULONG ret=0,i=0;
        if (n <= 0) return 0;
        asm (
        "       subq    %2,%2           \n"
-        ".align 16                      \n"
+        ".p2align 4                     \n"
        "1:     movq    (%4,%2,8),%0    \n"
        "       adcq    (%5,%2,8),%0    \n"
        "       movq    %0,(%3,%2,8)    \n"
@@ -198,14 +207,14 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
 }
 #ifndef SIMICS
-BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
+BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
 { BN_ULONG ret=0,i=0;
        if (n <= 0) return 0;
        asm (
        "       subq    %2,%2           \n"
-        ".align 16                      \n"
+        ".p2align 4                     \n"
        "1:     movq    (%4,%2,8),%0    \n"
        "       sbbq    (%5,%2,8),%0    \n"
        "       movq    %0,(%3,%2,8)    \n"
@@ -485,7 +494,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
        r[7]=c2;
        }
-void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
        {
        BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
@@ -561,7 +570,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
        r[15]=c1;
        }
-void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
        {
        BN_ULONG t1,t2;
        BN_ULONG c1,c2,c3;
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
index c43b69592a..3b7a6f243f 100755
--- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl
+++ b/src/lib/libcrypto/bn/asm/x86_64-mont.pl
@@ -15,14 +15,18 @@
 # respectful 50%. It remains to be seen if loop unrolling and
 # dedicated squaring routine can provide further improvement...
-$output=shift;
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
-open STDOUT,"| $^X $xlate $output";
+open STDOUT,"| $^X $xlate $flavour $output";
 # int bn_mul_mont(
 $rp="%rdi";     # BN_ULONG *rp,
@@ -55,13 +59,14 @@ bn_mul_mont:
        push    %r15
        mov     ${num}d,${num}d
-        lea     2($num),%rax
+        lea     2($num),%r10
-        mov     %rsp,%rbp
+        mov     %rsp,%r11
-        neg     %rax
+        neg     %r10
-        lea     (%rsp,%rax,8),%rsp      # tp=alloca(8*(num+2))
+        lea     (%rsp,%r10,8),%rsp      # tp=alloca(8*(num+2))
        and     \$-1024,%rsp            # minimize TLB usage
-        mov     %rbp,8(%rsp,$num,8)     # tp[num+1]=%rsp
+        mov     %r11,8(%rsp,$num,8)     # tp[num+1]=%rsp
+.Lprologue:
        mov     %rdx,$bp                # $bp reassigned, remember?
        mov     ($n0),$n0               # pull n0[0] value
@@ -197,18 +202,129 @@ bn_mul_mont:
        dec     $j
        jge     .Lcopy
-        mov     8(%rsp,$num,8),%rsp     # restore %rsp
+        mov     8(%rsp,$num,8),%rsi     # restore %rsp
        mov     \$1,%rax
+        mov     (%rsi),%r15
+        mov     8(%rsi),%r14
+        mov     16(%rsi),%r13
+        mov     24(%rsi),%r12
+        mov     32(%rsi),%rbp
+        mov     40(%rsi),%rbx
+        lea     48(%rsi),%rsp
+.Lepilogue:
+        ret
+.size   bn_mul_mont,.-bn_mul_mont
+.asciz  "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align  16
+___
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#               CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+.type   se_handler,\@abi-omnipotent
+.align  16
+se_handler:
+        push    %rsi
+        push    %rdi
+        push    %rbx
+        push    %rbp
+        push    %r12
+        push    %r13
+        push    %r14
+        push    %r15
+        pushfq
+        sub     \$64,%rsp
+        mov     120($context),%rax      # pull context->Rax
+        mov     248($context),%rbx      # pull context->Rip
+        lea     .Lprologue(%rip),%r10
+        cmp     %r10,%rbx               # context->Rip<.Lprologue
+        jb      .Lin_prologue
+        mov     152($context),%rax      # pull context->Rsp
+        lea     .Lepilogue(%rip),%r10
+        cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+        jae     .Lin_prologue
+        mov     192($context),%r10      # pull $num
+        mov     8(%rax,%r10,8),%rax     # pull saved stack pointer
+        lea     48(%rax),%rax
+        mov     -8(%rax),%rbx
+        mov     -16(%rax),%rbp
+        mov     -24(%rax),%r12
+        mov     -32(%rax),%r13
+        mov     -40(%rax),%r14
+        mov     -48(%rax),%r15
+        mov     %rbx,144($context)      # restore context->Rbx
+        mov     %rbp,160($context)      # restore context->Rbp
+        mov     %r12,216($context)      # restore context->R12
+        mov     %r13,224($context)      # restore context->R13
+        mov     %r14,232($context)      # restore context->R14
+        mov     %r15,240($context)      # restore context->R15
+.Lin_prologue:
+        mov     8(%rax),%rdi
+        mov     16(%rax),%rsi
+        mov     %rax,152($context)      # restore context->Rsp
+        mov     %rsi,168($context)      # restore context->Rsi
+        mov     %rdi,176($context)      # restore context->Rdi
+        mov     40($disp),%rdi          # disp->ContextRecord
+        mov     $context,%rsi           # context
+        mov     \$154,%ecx              # sizeof(CONTEXT)
+        .long   0xa548f3fc              # cld; rep movsq
+        mov     $disp,%rsi
+        xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+        mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+        mov     0(%rsi),%r8             # arg3, disp->ControlPc
+        mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+        mov     40(%rsi),%r10           # disp->ContextRecord
+        lea     56(%rsi),%r11           # &disp->HandlerData
+        lea     24(%rsi),%r12           # &disp->EstablisherFrame
+        mov     %r10,32(%rsp)           # arg5
+        mov     %r11,40(%rsp)           # arg6
+        mov     %r12,48(%rsp)           # arg7
+        mov     %rcx,56(%rsp)           # arg8, (NULL)
+        call    *__imp_RtlVirtualUnwind(%rip)
+        mov     \$1,%eax                # ExceptionContinueSearch
+        add     \$64,%rsp
+        popfq
        pop     %r15
        pop     %r14
        pop     %r13
        pop     %r12
        pop     %rbp
        pop     %rbx
+        pop     %rdi
+        pop     %rsi
        ret
-.size   bn_mul_mont,.-bn_mul_mont
+.size   se_handler,.-se_handler
-.asciz  "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.section        .pdata
+.align  4
+        .rva    .LSEH_begin_bn_mul_mont
+        .rva    .LSEH_end_bn_mul_mont
+        .rva    .LSEH_info_bn_mul_mont
+.section        .xdata
+.align  8
+.LSEH_info_bn_mul_mont:
+        .byte   9,0,0,0
+        .rva    se_handler
 ___
+}
 print $code;
 close STDOUT;
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
index f1719a5877..e484b7fc11 100644
--- a/src/lib/libcrypto/bn/bn.h
+++ b/src/lib/libcrypto/bn/bn.h
@@ -56,6 +56,59 @@
 * [including the GNU Public Licence.]
 */
 /* ====================================================================
+ * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+/* ====================================================================
 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
 *
 * Portions of the attached software ("Contribution") are developed by 
@@ -77,6 +130,7 @@
 #include <stdio.h> /* FILE */
 #endif
 #include <openssl/ossl_typ.h>
+#include <openssl/crypto.h>
 #ifdef  __cplusplus
 extern "C" {
@@ -94,9 +148,11 @@ extern "C" {
 /* #define BN_DEBUG */
 /* #define BN_DEBUG_RAND */
+#ifndef OPENSSL_SMALL_FOOTPRINT
 #define BN_MUL_COMBA
 #define BN_SQR_COMBA
 #define BN_RECURSION
+#endif
 /* This next option uses the C libraries (2 word)/(1 word) function.
 * If it is not defined, I use my C version (which is slower).
@@ -137,6 +193,8 @@ extern "C" {
 #define BN_DEC_FMT1     "%lu"
 #define BN_DEC_FMT2     "%019lu"
 #define BN_DEC_NUM      19
+#define BN_HEX_FMT1     "%lX"
+#define BN_HEX_FMT2     "%016lX"
 #endif
 /* This is where the long long data type is 64 bits, but long is 32.
@@ -162,83 +220,37 @@ extern "C" {
 #define BN_DEC_FMT1     "%llu"
 #define BN_DEC_FMT2     "%019llu"
 #define BN_DEC_NUM      19
+#define BN_HEX_FMT1     "%llX"
+#define BN_HEX_FMT2     "%016llX"
 #endif
 #ifdef THIRTY_TWO_BIT
 #ifdef BN_LLONG
-# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__)
+# if defined(_WIN32) && !defined(__GNUC__)
 #  define BN_ULLONG     unsigned __int64
+#  define BN_MASK       (0xffffffffffffffffI64)
 # else
 #  define BN_ULLONG     unsigned long long
+#  define BN_MASK       (0xffffffffffffffffLL)
 # endif
 #endif
-#define BN_ULONG        unsigned long
+#define BN_ULONG        unsigned int
-#define BN_LONG         long
+#define BN_LONG         int
 #define BN_BITS         64
 #define BN_BYTES        4
 #define BN_BITS2        32
 #define BN_BITS4        16
-#ifdef OPENSSL_SYS_WIN32
-/* VC++ doesn't like the LL suffix */
-#define BN_MASK         (0xffffffffffffffffL)
-#else
-#define BN_MASK         (0xffffffffffffffffLL)
-#endif
 #define BN_MASK2        (0xffffffffL)
 #define BN_MASK2l       (0xffff)
 #define BN_MASK2h1      (0xffff8000L)
 #define BN_MASK2h       (0xffff0000L)
 #define BN_TBIT         (0x80000000L)
 #define BN_DEC_CONV     (1000000000L)
-#define BN_DEC_FMT1     "%lu"
-#define BN_DEC_FMT2     "%09lu"
-#define BN_DEC_NUM      9
-#endif
-#ifdef SIXTEEN_BIT
-#ifndef BN_DIV2W
-#define BN_DIV2W
-#endif
-#define BN_ULLONG       unsigned long
-#define BN_ULONG        unsigned short
-#define BN_LONG         short
-#define BN_BITS         32
-#define BN_BYTES        2
-#define BN_BITS2        16
-#define BN_BITS4        8
-#define BN_MASK         (0xffffffff)
-#define BN_MASK2        (0xffff)
-#define BN_MASK2l       (0xff)
-#define BN_MASK2h1      (0xff80)
-#define BN_MASK2h       (0xff00)
-#define BN_TBIT         (0x8000)
-#define BN_DEC_CONV     (100000)
 #define BN_DEC_FMT1     "%u"
-#define BN_DEC_FMT2     "%05u"
+#define BN_DEC_FMT2     "%09u"
-#define BN_DEC_NUM      5
+#define BN_DEC_NUM      9
-#endif
+#define BN_HEX_FMT1     "%X"
+#define BN_HEX_FMT2     "%08X"
-#ifdef EIGHT_BIT
-#ifndef BN_DIV2W
-#define BN_DIV2W
-#endif
-#define BN_ULLONG       unsigned short
-#define BN_ULONG        unsigned char
-#define BN_LONG         char
-#define BN_BITS         16
-#define BN_BYTES        1
-#define BN_BITS2        8
-#define BN_BITS4        4
-#define BN_MASK         (0xffff)
-#define BN_MASK2        (0xff)
-#define BN_MASK2l       (0xf)
-#define BN_MASK2h1      (0xf8)
-#define BN_MASK2h       (0xf0)
-#define BN_TBIT         (0x80)
-#define BN_DEC_CONV     (100)
-#define BN_DEC_FMT1     "%u"
-#define BN_DEC_FMT2     "%02u"
-#define BN_DEC_NUM      2
 #endif
 #define BN_DEFAULT_BITS 1280
@@ -303,12 +315,8 @@ struct bn_mont_ctx_st
        BIGNUM N;      /* The modulus */
        BIGNUM Ni;     /* R*(1/R mod N) - N*Ni = 1
                        * (Ni is only stored for bignum algorithm) */
-#if 0
+        BN_ULONG n0[2];/* least significant word(s) of Ni;
-        /* OpenSSL 0.9.9 preview: */
+                          (type changed with 0.9.9, was "BN_ULONG n0;" before) */
-        BN_ULONG n0[2];/* least significant word(s) of Ni */
-#else
-        BN_ULONG n0;   /* least significant word of Ni */
-#endif
        int flags;
        };
@@ -504,6 +512,7 @@ char *	BN_bn2hex(const BIGNUM *a);
 char *  BN_bn2dec(const BIGNUM *a);
 int     BN_hex2bn(BIGNUM **a, const char *str);
 int     BN_dec2bn(BIGNUM **a, const char *str);
+int     BN_asc2bn(BIGNUM **a, const char *str);
 int     BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
 int     BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
 BIGNUM *BN_mod_inverse(BIGNUM *ret,
@@ -531,17 +540,6 @@ int	BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
 int     BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
                int do_trial_division, BN_GENCB *cb);
-int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx);
-int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
-                        const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
-                        const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb);
-int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
-                        BIGNUM *Xp1, BIGNUM *Xp2,
-                        const BIGNUM *Xp,
-                        const BIGNUM *e, BN_CTX *ctx,
-                        BN_GENCB *cb);
 BN_MONT_CTX *BN_MONT_CTX_new(void );
 void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
 int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
@@ -560,19 +558,22 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
 #define BN_BLINDING_NO_UPDATE   0x00000001
 #define BN_BLINDING_NO_RECREATE 0x00000002
-BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod);
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod);
 void BN_BLINDING_free(BN_BLINDING *b);
 int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
 int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
 int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
 int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *);
 int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *);
+#ifndef OPENSSL_NO_DEPRECATED
 unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *);
 void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long);
+#endif
+CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *);
 unsigned long BN_BLINDING_get_flags(const BN_BLINDING *);
 void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long);
 BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
-        const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
+        const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
        int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
        BN_MONT_CTX *m_ctx);
@@ -625,24 +626,24 @@ int	BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
 *     t^p[0] + t^p[1] + ... + t^p[k]
 * where m = p[0] > p[1] > ... > p[k] = 0.
 */
-int     BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]);
+int     BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]);
        /* r = a mod p */
 int     BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
-        const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */
+        const int p[], BN_CTX *ctx); /* r = (a * b) mod p */
-int     BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[],
+int     BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],
        BN_CTX *ctx); /* r = (a * a) mod p */
-int     BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[],
+int     BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[],
        BN_CTX *ctx); /* r = (1 / b) mod p */
 int     BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
-        const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */
+        const int p[], BN_CTX *ctx); /* r = (a / b) mod p */
 int     BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
-        const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */
+        const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */
 int     BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a,
-        const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */
+        const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */
 int     BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a,
-        const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */
+        const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */
-int     BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max);
+int     BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max);
-int     BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a);
+int     BN_GF2m_arr2poly(const int p[], BIGNUM *a);
 /* faster mod functions for the 'NIST primes' 
 * 0 <= a < p^2 */
@@ -751,10 +752,12 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);
 #define bn_correct_top(a) \
        { \
        BN_ULONG *ftl; \
-        if ((a)->top > 0) \
+        int tmp_top = (a)->top; \
+        if (tmp_top > 0) \
                { \
-                for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
+                for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \
-                if (*(ftl--)) break; \
+                        if (*(ftl--)) break; \
+                (a)->top = tmp_top; \
                } \
        bn_pollute(a); \
        }
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index 99bc2de491..c43c91cc09 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -75,6 +75,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
        assert(num >= 0);
        if (num <= 0) return(c1);
+#ifndef OPENSSL_SMALL_FOOTPRINT
        while (num&~3)
                {
                mul_add(rp[0],ap[0],w,c1);
@@ -83,11 +84,11 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
                mul_add(rp[3],ap[3],w,c1);
                ap+=4; rp+=4; num-=4;
                }
-        if (num)
+#endif
+        while (num)
                {
-                mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
+                mul_add(rp[0],ap[0],w,c1);
-                mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
+                ap++; rp++; num--;
-                mul_add(rp[2],ap[2],w,c1); return c1;
                }
        
        return(c1);
@@ -100,6 +101,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
        assert(num >= 0);
        if (num <= 0) return(c1);
+#ifndef OPENSSL_SMALL_FOOTPRINT
        while (num&~3)
                {
                mul(rp[0],ap[0],w,c1);
@@ -108,11 +110,11 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
                mul(rp[3],ap[3],w,c1);
                ap+=4; rp+=4; num-=4;
                }
-        if (num)
+#endif
+        while (num)
                {
-                mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
+                mul(rp[0],ap[0],w,c1);
-                mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
+                ap++; rp++; num--;
-                mul(rp[2],ap[2],w,c1);
                }
        return(c1);
        } 
@@ -121,6 +123,8 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
        {
        assert(n >= 0);
        if (n <= 0) return;
+#ifndef OPENSSL_SMALL_FOOTPRINT
        while (n&~3)
                {
                sqr(r[0],r[1],a[0]);
@@ -129,11 +133,11 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
                sqr(r[6],r[7],a[3]);
                a+=4; r+=8; n-=4;
                }
-        if (n)
+#endif
+        while (n)
                {
-                sqr(r[0],r[1],a[0]); if (--n == 0) return;
+                sqr(r[0],r[1],a[0]);
-                sqr(r[2],r[3],a[1]); if (--n == 0) return;
+                a++; r+=2; n--;
-                sqr(r[4],r[5],a[2]);
                }
        }
@@ -150,18 +154,20 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
        bl=LBITS(w);
        bh=HBITS(w);
-        for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+        while (num&~3)
                {
                mul_add(rp[0],ap[0],bl,bh,c);
-                if (--num == 0) break;
                mul_add(rp[1],ap[1],bl,bh,c);
-                if (--num == 0) break;
                mul_add(rp[2],ap[2],bl,bh,c);
-                if (--num == 0) break;
                mul_add(rp[3],ap[3],bl,bh,c);
-                if (--num == 0) break;
+                ap+=4; rp+=4; num-=4;
-                ap+=4;
+                }
-                rp+=4;
+#endif
+        while (num)
+                {
+                mul_add(rp[0],ap[0],bl,bh,c);
+                ap++; rp++; num--;
                }
        return(c);
        } 
@@ -177,18 +183,20 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
        bl=LBITS(w);
        bh=HBITS(w);
-        for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+        while (num&~3)
                {
                mul(rp[0],ap[0],bl,bh,carry);
-                if (--num == 0) break;
                mul(rp[1],ap[1],bl,bh,carry);
-                if (--num == 0) break;
                mul(rp[2],ap[2],bl,bh,carry);
-                if (--num == 0) break;
                mul(rp[3],ap[3],bl,bh,carry);
-                if (--num == 0) break;
+                ap+=4; rp+=4; num-=4;
-                ap+=4;
+                }
-                rp+=4;
+#endif
+        while (num)
+                {
+                mul(rp[0],ap[0],bl,bh,carry);
+                ap++; rp++; num--;
                }
        return(carry);
        } 
@@ -197,22 +205,21 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
        {
        assert(n >= 0);
        if (n <= 0) return;
-        for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+        while (n&~3)
                {
                sqr64(r[0],r[1],a[0]);
-                if (--n == 0) break;
                sqr64(r[2],r[3],a[1]);
-                if (--n == 0) break;
                sqr64(r[4],r[5],a[2]);
-                if (--n == 0) break;
                sqr64(r[6],r[7],a[3]);
-                if (--n == 0) break;
+                a+=4; r+=8; n-=4;
+                }
-                a+=4;
+#endif
-                r+=8;
+        while (n)
+                {
+                sqr64(r[0],r[1],a[0]);
+                a++; r+=2; n--;
                }
        }
@@ -303,31 +310,30 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
        assert(n >= 0);
        if (n <= 0) return((BN_ULONG)0);
-        for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+        while (n&~3)
                {
                ll+=(BN_ULLONG)a[0]+b[0];
                r[0]=(BN_ULONG)ll&BN_MASK2;
                ll>>=BN_BITS2;
-                if (--n <= 0) break;
                ll+=(BN_ULLONG)a[1]+b[1];
                r[1]=(BN_ULONG)ll&BN_MASK2;
                ll>>=BN_BITS2;
-                if (--n <= 0) break;
                ll+=(BN_ULLONG)a[2]+b[2];
                r[2]=(BN_ULONG)ll&BN_MASK2;
                ll>>=BN_BITS2;
-                if (--n <= 0) break;
                ll+=(BN_ULLONG)a[3]+b[3];
                r[3]=(BN_ULONG)ll&BN_MASK2;
                ll>>=BN_BITS2;
-                if (--n <= 0) break;
+                a+=4; b+=4; r+=4; n-=4;
+                }
-                a+=4;
+#endif
-                b+=4;
+        while (n)
-                r+=4;
+                {
+                ll+=(BN_ULLONG)a[0]+b[0];
+                r[0]=(BN_ULONG)ll&BN_MASK2;
+                ll>>=BN_BITS2;
+                a++; b++; r++; n--;
                }
        return((BN_ULONG)ll);
        }
@@ -340,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
        if (n <= 0) return((BN_ULONG)0);
        c=0;
-        for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+        while (n&~3)
                {
                t=a[0];
                t=(t+c)&BN_MASK2;
@@ -348,35 +355,36 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
                l=(t+b[0])&BN_MASK2;
                c+=(l < t);
                r[0]=l;
-                if (--n <= 0) break;
                t=a[1];
                t=(t+c)&BN_MASK2;
                c=(t < c);
                l=(t+b[1])&BN_MASK2;
                c+=(l < t);
                r[1]=l;
-                if (--n <= 0) break;
                t=a[2];
                t=(t+c)&BN_MASK2;
                c=(t < c);
                l=(t+b[2])&BN_MASK2;
                c+=(l < t);
                r[2]=l;
-                if (--n <= 0) break;
                t=a[3];
                t=(t+c)&BN_MASK2;
                c=(t < c);
                l=(t+b[3])&BN_MASK2;
                c+=(l < t);
                r[3]=l;
-                if (--n <= 0) break;
+                a+=4; b+=4; r+=4; n-=4;
+                }
-                a+=4;
+#endif
-                b+=4;
+        while(n)
-                r+=4;
+                {
+                t=a[0];
+                t=(t+c)&BN_MASK2;
+                c=(t < c);
+                l=(t+b[0])&BN_MASK2;
+                c+=(l < t);
+                r[0]=l;
+                a++; b++; r++; n--;
                }
        return((BN_ULONG)c);
        }
@@ -390,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
        assert(n >= 0);
        if (n <= 0) return((BN_ULONG)0);
-        for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+        while (n&~3)
                {
                t1=a[0]; t2=b[0];
                r[0]=(t1-t2-c)&BN_MASK2;
                if (t1 != t2) c=(t1 < t2);
-                if (--n <= 0) break;
                t1=a[1]; t2=b[1];
                r[1]=(t1-t2-c)&BN_MASK2;
                if (t1 != t2) c=(t1 < t2);
-                if (--n <= 0) break;
                t1=a[2]; t2=b[2];
                r[2]=(t1-t2-c)&BN_MASK2;
                if (t1 != t2) c=(t1 < t2);
-                if (--n <= 0) break;
                t1=a[3]; t2=b[3];
                r[3]=(t1-t2-c)&BN_MASK2;
                if (t1 != t2) c=(t1 < t2);
-                if (--n <= 0) break;
+                a+=4; b+=4; r+=4; n-=4;
+                }
-                a+=4;
+#endif
-                b+=4;
+        while (n)
-                r+=4;
+                {
+                t1=a[0]; t2=b[0];
+                r[0]=(t1-t2-c)&BN_MASK2;
+                if (t1 != t2) c=(t1 < t2);
+                a++; b++; r++; n--;
                }
        return(c);
        }
-#ifdef BN_MUL_COMBA
+#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
 #undef bn_mul_comba8
 #undef bn_mul_comba4
@@ -820,18 +827,134 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
        r[6]=c1;
        r[7]=c2;
        }
+#ifdef OPENSSL_NO_ASM
+#ifdef OPENSSL_BN_ASM_MONT
+#include <alloca.h>
+/*
+ * This is essentially reference implementation, which may or may not
+ * result in performance improvement. E.g. on IA-32 this routine was
+ * observed to give 40% faster rsa1024 private key operations and 10%
+ * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
+ * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
+ * reference implementation, one to be used as starting point for
+ * platform-specific assembler. Mentioned numbers apply to compiler
+ * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
+ * can vary not only from platform to platform, but even for compiler
+ * versions. Assembler vs. assembler improvement coefficients can
+ * [and are known to] differ and are to be documented elsewhere.
+ */
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
+        {
+        BN_ULONG c0,c1,ml,*tp,n0;
+#ifdef mul64
+        BN_ULONG mh;
+#endif
+        volatile BN_ULONG *vp;
+        int i=0,j;
+#if 0   /* template for platform-specific implementation */
+        if (ap==bp)     return bn_sqr_mont(rp,ap,np,n0p,num);
+#endif
+        vp = tp = alloca((num+2)*sizeof(BN_ULONG));
+        n0 = *n0p;
+        c0 = 0;
+        ml = bp[0];
+#ifdef mul64
+        mh = HBITS(ml);
+        ml = LBITS(ml);
+        for (j=0;j<num;++j)
+                mul(tp[j],ap[j],ml,mh,c0);
+#else
+        for (j=0;j<num;++j)
+                mul(tp[j],ap[j],ml,c0);
+#endif
+        tp[num]   = c0;
+        tp[num+1] = 0;
+        goto enter;
+        for(i=0;i<num;i++)
+                {
+                c0 = 0;
+                ml = bp[i];
+#ifdef mul64
+                mh = HBITS(ml);
+                ml = LBITS(ml);
+                for (j=0;j<num;++j)
+                        mul_add(tp[j],ap[j],ml,mh,c0);
+#else
+                for (j=0;j<num;++j)
+                        mul_add(tp[j],ap[j],ml,c0);
+#endif
+                c1 = (tp[num] + c0)&BN_MASK2;
+                tp[num]   = c1;
+                tp[num+1] = (c1<c0?1:0);
+        enter:
+                c1  = tp[0];
+                ml = (c1*n0)&BN_MASK2;
+                c0 = 0;
+#ifdef mul64
+                mh = HBITS(ml);
+                ml = LBITS(ml);
+                mul_add(c1,np[0],ml,mh,c0);
+#else
+                mul_add(c1,ml,np[0],c0);
+#endif
+                for(j=1;j<num;j++)
+                        {
+                        c1 = tp[j];
+#ifdef mul64
+                        mul_add(c1,np[j],ml,mh,c0);
+#else
+                        mul_add(c1,ml,np[j],c0);
+#endif
+                        tp[j-1] = c1&BN_MASK2;
+                        }
+                c1        = (tp[num] + c0)&BN_MASK2;
+                tp[num-1] = c1;
+                tp[num]   = tp[num+1] + (c1<c0?1:0);
+                }
+        if (tp[num]!=0 || tp[num-1]>=np[num-1])
+                {
+                c0 = bn_sub_words(rp,tp,np,num);
+                if (tp[num]!=0 || c0==0)
+                        {
+                        for(i=0;i<num+2;i++)    vp[i] = 0;
+                        return 1;
+                        }
+                }
+        for(i=0;i<num;i++)      rp[i] = tp[i],  vp[i] = 0;
+        vp[num]   = 0;
+        vp[num+1] = 0;
+        return 1;
+        }
+#else
+/*
+ * Return value of 0 indicates that multiplication/convolution was not
+ * performed to signal the caller to fall down to alternative/original
+ * code-path.
+ */
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
+{       return 0;       }
+#endif /* OPENSSL_BN_ASM_MONT */
+#endif
 #else /* !BN_MUL_COMBA */
 /* hmm... is it faster just to do a multiply? */
 #undef bn_sqr_comba4
-void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
        {
        BN_ULONG t[8];
        bn_sqr_normal(r,a,4,t);
        }
 #undef bn_sqr_comba8
-void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
        {
        BN_ULONG t[16];
        bn_sqr_normal(r,a,8,t);
@@ -857,4 +980,51 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
        r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
        }
+#ifdef OPENSSL_NO_ASM
+#ifdef OPENSSL_BN_ASM_MONT
+#include <alloca.h>
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
+        {
+        BN_ULONG c0,c1,*tp,n0=*n0p;
+        volatile BN_ULONG *vp;
+        int i=0,j;
+        vp = tp = alloca((num+2)*sizeof(BN_ULONG));
+        for(i=0;i<=num;i++)     tp[i]=0;
+        for(i=0;i<num;i++)
+                {
+                c0         = bn_mul_add_words(tp,ap,num,bp[i]);
+                c1         = (tp[num] + c0)&BN_MASK2;
+                tp[num]    = c1;
+                tp[num+1]  = (c1<c0?1:0);
+                c0         = bn_mul_add_words(tp,np,num,tp[0]*n0);
+                c1         = (tp[num] + c0)&BN_MASK2;
+                tp[num]    = c1;
+                tp[num+1] += (c1<c0?1:0);
+                for(j=0;j<=num;j++)     tp[j]=tp[j+1];
+                }
+        if (tp[num]!=0 || tp[num-1]>=np[num-1])
+                {
+                c0 = bn_sub_words(rp,tp,np,num);
+                if (tp[num]!=0 || c0==0)
+                        {
+                        for(i=0;i<num+2;i++)    vp[i] = 0;
+                        return 1;
+                        }
+                }
+        for(i=0;i<num;i++)      rp[i] = tp[i],  vp[i] = 0;
+        vp[num]   = 0;
+        vp[num+1] = 0;
+        return 1;
+        }
+#else
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
+{       return 0;       }
+#endif /* OPENSSL_BN_ASM_MONT */
+#endif
 #endif /* !BN_MUL_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c
index c11fb4ccc2..e060592fdc 100644
--- a/src/lib/libcrypto/bn/bn_blind.c
+++ b/src/lib/libcrypto/bn/bn_blind.c
@@ -1,6 +1,6 @@
 /* crypto/bn/bn_blind.c */
 /* ====================================================================
- * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
@@ -121,8 +121,11 @@ struct bn_blinding_st
        BIGNUM *Ai;
        BIGNUM *e;
        BIGNUM *mod; /* just a reference */
+#ifndef OPENSSL_NO_DEPRECATED
        unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
                                  * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
+#endif
+        CRYPTO_THREADID tid;
        unsigned int  counter;
        unsigned long flags;
        BN_MONT_CTX *m_ctx;
@@ -131,7 +134,7 @@ struct bn_blinding_st
                          BN_MONT_CTX *m_ctx);
        };
-BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod)
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
        {
        BN_BLINDING *ret=NULL;
@@ -158,6 +161,7 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGN
                BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
        ret->counter = BN_BLINDING_COUNTER;
+        CRYPTO_THREADID_current(&ret->tid);
        return(ret);
 err:
        if (ret != NULL) BN_BLINDING_free(ret);
@@ -263,6 +267,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct
        return(ret);
        }
+#ifndef OPENSSL_NO_DEPRECATED
 unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b)
        {
        return b->thread_id;
@@ -272,6 +277,12 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n)
        {
        b->thread_id = n;
        }
+#endif
+CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b)
+        {
+        return &b->tid;
+        }
 unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
        {
@@ -284,7 +295,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
        }
 BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
-        const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
+        const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
        int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
        BN_MONT_CTX *m_ctx)
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c
index b3452f1a91..3f2256f675 100644
--- a/src/lib/libcrypto/bn/bn_ctx.c
+++ b/src/lib/libcrypto/bn/bn_ctx.c
@@ -161,7 +161,7 @@ static void ctxdbg(BN_CTX *ctx)
        fprintf(stderr,"(%08x): ", (unsigned int)ctx);
        while(bnidx < ctx->used)
                {
-                fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
+                fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
                if(!(bnidx % BN_CTX_POOL_SIZE))
                        item = item->next;
                }
@@ -171,8 +171,8 @@ static void ctxdbg(BN_CTX *ctx)
        while(fpidx < stack->depth)
                {
                while(bnidx++ < stack->indexes[fpidx])
-                        fprintf(stderr,"   ");
+                        fprintf(stderr,"    ");
-                fprintf(stderr,"^^ ");
+                fprintf(stderr,"^^^ ");
                bnidx++;
                fpidx++;
                }
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
index 1e8e57626b..802a43d642 100644
--- a/src/lib/libcrypto/bn/bn_div.c
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -102,7 +102,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
        /* The next 2 are needed so we can do a dv->d[0]|=1 later
         * since BN_lshift1 will only work once there is a value :-) */
        BN_zero(dv);
-        bn_wexpand(dv,1);
+        if(bn_wexpand(dv,1) == NULL) goto end;
        dv->top=1;
        if (!BN_lshift(D,D,nm-nd)) goto end;
@@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
        if (dv == NULL)
                res=BN_CTX_get(ctx);
        else    res=dv;
-        if (sdiv == NULL || res == NULL) goto err;
+        if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
+                goto err;
        /* First we normalise the numbers */
        norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
@@ -336,7 +337,7 @@ X) -> 0x%08X\n",
                                t2 -= d1;
                                }
 #else /* !BN_LLONG */
-                        BN_ULONG t2l,t2h,ql,qh;
+                        BN_ULONG t2l,t2h;
                        q=bn_div_words(n0,n1,d0);
 #ifdef BN_DEBUG_LEVITTE
@@ -354,9 +355,12 @@ X) -> 0x%08X\n",
                        t2l = d1 * q;
                        t2h = BN_UMULT_HIGH(d1,q);
 #else
+                        {
+                        BN_ULONG ql, qh;
                        t2l=LBITS(d1); t2h=HBITS(d1);
                        ql =LBITS(q);  qh =HBITS(q);
                        mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
+                        }
 #endif
                        for (;;)
@@ -560,7 +564,7 @@ X) -> 0x%08X\n",
                                t2 -= d1;
                                }
 #else /* !BN_LLONG */
-                        BN_ULONG t2l,t2h,ql,qh;
+                        BN_ULONG t2l,t2h;
                        q=bn_div_words(n0,n1,d0);
 #ifdef BN_DEBUG_LEVITTE
@@ -578,9 +582,12 @@ X) -> 0x%08X\n",
                        t2l = d1 * q;
                        t2h = BN_UMULT_HIGH(d1,q);
 #else
+                        {
+                        BN_ULONG ql, qh;
                        t2l=LBITS(d1); t2h=HBITS(d1);
                        ql =LBITS(q);  qh =HBITS(q);
                        mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
+                        }
 #endif
                        for (;;)
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index 70a33f0d93..d9b6c737fc 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
                rr = BN_CTX_get(ctx);
        else
                rr = r;
-        if ((v = BN_CTX_get(ctx)) == NULL) goto err;
+        v = BN_CTX_get(ctx);
+        if (rr == NULL || v == NULL) goto err;
        if (BN_copy(v,a) == NULL) goto err;
        bits=BN_num_bits(p);
diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c
index 306f029f27..527b0fa15b 100644
--- a/src/lib/libcrypto/bn/bn_gf2m.c
+++ b/src/lib/libcrypto/bn/bn_gf2m.c
@@ -121,74 +121,12 @@ static const BN_ULONG SQR_tb[16] =
    SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >>  8 & 0xF] << 16 | \
    SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
 #endif
-#ifdef SIXTEEN_BIT
-#define SQR1(w) \
-    SQR_tb[(w) >> 12 & 0xF] <<  8 | SQR_tb[(w) >>  8 & 0xF]
-#define SQR0(w) \
-    SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]
-#endif
-#ifdef EIGHT_BIT
-#define SQR1(w) \
-    SQR_tb[(w) >>  4 & 0xF]
-#define SQR0(w) \
-    SQR_tb[(w)       & 15]
-#endif
 /* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
 * result is a polynomial r with degree < 2 * BN_BITS - 1
 * The caller MUST ensure that the variables have the right amount
 * of space allocated.
 */
-#ifdef EIGHT_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
-        {
-        register BN_ULONG h, l, s;
-        BN_ULONG tab[4], top1b = a >> 7;
-        register BN_ULONG a1, a2;
-        a1 = a & (0x7F); a2 = a1 << 1;
-        tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-        s = tab[b      & 0x3]; l  = s;
-        s = tab[b >> 2 & 0x3]; l ^= s << 2; h  = s >> 6;
-        s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4;
-        s = tab[b >> 6      ]; l ^= s << 6; h ^= s >> 2;
-        
-        /* compensate for the top bit of a */
-        if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } 
-        *r1 = h; *r0 = l;
-        } 
-#endif
-#ifdef SIXTEEN_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
-        {
-        register BN_ULONG h, l, s;
-        BN_ULONG tab[4], top1b = a >> 15; 
-        register BN_ULONG a1, a2;
-        a1 = a & (0x7FFF); a2 = a1 << 1;
-        tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-        s = tab[b      & 0x3]; l  = s;
-        s = tab[b >> 2 & 0x3]; l ^= s <<  2; h  = s >> 14;
-        s = tab[b >> 4 & 0x3]; l ^= s <<  4; h ^= s >> 12;
-        s = tab[b >> 6 & 0x3]; l ^= s <<  6; h ^= s >> 10;
-        s = tab[b >> 8 & 0x3]; l ^= s <<  8; h ^= s >>  8;
-        s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >>  6;
-        s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >>  4;
-        s = tab[b >>14      ]; l ^= s << 14; h ^= s >>  2;
-        /* compensate for the top bit of a */
-        if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } 
-        *r1 = h; *r0 = l;
-        } 
-#endif
 #ifdef THIRTY_TWO_BIT
 static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
        {
@@ -294,7 +232,8 @@ int	BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
        if (a->top < b->top) { at = b; bt = a; }
        else { at = a; bt = b; }
-        bn_wexpand(r, at->top);
+        if(bn_wexpand(r, at->top) == NULL)
+                return 0;
        for (i = 0; i < bt->top; i++)
                {
@@ -320,7 +259,7 @@ int	BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
 /* Performs modular reduction of a and store result in r.  r could be a. */
-int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
+int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
        {
        int j, k;
        int n, dN, d0, d1;
@@ -421,11 +360,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
 int     BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
        {
        int ret = 0;
-        const int max = BN_num_bits(p);
+        const int max = BN_num_bits(p) + 1;
-        unsigned int *arr=NULL;
+        int *arr=NULL;
        bn_check_top(a);
        bn_check_top(p);
-        if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+        if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
        ret = BN_GF2m_poly2arr(p, arr, max);
        if (!ret || ret > max)
                {
@@ -443,7 +382,7 @@ err:
 /* Compute the product of two polynomials a and b, reduce modulo p, and store
 * the result in r.  r could be a or b; a could be b.
 */
-int     BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int     BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
        {
        int zlen, i, j, k, ret = 0;
        BIGNUM *s;
@@ -499,12 +438,12 @@ err:
 int     BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
        {
        int ret = 0;
-        const int max = BN_num_bits(p);
+        const int max = BN_num_bits(p) + 1;
-        unsigned int *arr=NULL;
+        int *arr=NULL;
        bn_check_top(a);
        bn_check_top(b);
        bn_check_top(p);
-        if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+        if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
        ret = BN_GF2m_poly2arr(p, arr, max);
        if (!ret || ret > max)
                {
@@ -520,7 +459,7 @@ err:
 /* Square a, reduce the result mod p, and store it in a.  r could be a. */
-int     BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int     BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
        {
        int i, ret = 0;
        BIGNUM *s;
@@ -555,12 +494,12 @@ err:
 int     BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
        {
        int ret = 0;
-        const int max = BN_num_bits(p);
+        const int max = BN_num_bits(p) + 1;
-        unsigned int *arr=NULL;
+        int *arr=NULL;
        bn_check_top(a);
        bn_check_top(p);
-        if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+        if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
        ret = BN_GF2m_poly2arr(p, arr, max);
        if (!ret || ret > max)
                {
@@ -642,7 +581,7 @@ err:
 * function is only provided for convenience; for best performance, use the 
 * BN_GF2m_mod_inv function.
 */
-int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx)
        {
        BIGNUM *field;
        int ret = 0;
@@ -768,7 +707,7 @@ err:
 * function is only provided for convenience; for best performance, use the 
 * BN_GF2m_mod_div function.
 */
-int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx)
        {
        BIGNUM *field;
        int ret = 0;
@@ -793,7 +732,7 @@ err:
 * the result in r.  r could be a.
 * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.
 */
-int     BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int     BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
        {
        int ret = 0, i, n;
        BIGNUM *u;
@@ -839,12 +778,12 @@ err:
 int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
        {
        int ret = 0;
-        const int max = BN_num_bits(p);
+        const int max = BN_num_bits(p) + 1;
-        unsigned int *arr=NULL;
+        int *arr=NULL;
        bn_check_top(a);
        bn_check_top(b);
        bn_check_top(p);
-        if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+        if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
        ret = BN_GF2m_poly2arr(p, arr, max);
        if (!ret || ret > max)
                {
@@ -862,7 +801,7 @@ err:
 * the result in r.  r could be a.
 * Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
 */
-int     BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int     BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
        {
        int ret = 0;
        BIGNUM *u;
@@ -898,11 +837,11 @@ err:
 int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
        {
        int ret = 0;
-        const int max = BN_num_bits(p);
+        const int max = BN_num_bits(p) + 1;
-        unsigned int *arr=NULL;
+        int *arr=NULL;
        bn_check_top(a);
        bn_check_top(p);
-        if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+        if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
        ret = BN_GF2m_poly2arr(p, arr, max);
        if (!ret || ret > max)
                {
@@ -919,10 +858,9 @@ err:
 /* Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns 0.
 * Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
 */
-int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx)
        {
-        int ret = 0, count = 0;
+        int ret = 0, count = 0, j;
-        unsigned int j;
        BIGNUM *a, *z, *rho, *w, *w2, *tmp;
        bn_check_top(a_);
@@ -1017,11 +955,11 @@ err:
 int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
        {
        int ret = 0;
-        const int max = BN_num_bits(p);
+        const int max = BN_num_bits(p) + 1;
-        unsigned int *arr=NULL;
+        int *arr=NULL;
        bn_check_top(a);
        bn_check_top(p);
-        if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) *
+        if ((arr = (int *)OPENSSL_malloc(sizeof(int) *
                                                max)) == NULL) goto err;
        ret = BN_GF2m_poly2arr(p, arr, max);
        if (!ret || ret > max)
@@ -1037,20 +975,17 @@ err:
        }
 /* Convert the bit-string representation of a polynomial
- * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array
+ * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding 
- * of integers corresponding to the bits with non-zero coefficient.
+ * to the bits with non-zero coefficient.  Array is terminated with -1.
 * Up to max elements of the array will be filled.  Return value is total
- * number of coefficients that would be extracted if array was large enough.
+ * number of array elements that would be filled if array was large enough.
 */
-int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
+int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)
        {
        int i, j, k = 0;
        BN_ULONG mask;
-        if (BN_is_zero(a) || !BN_is_bit_set(a, 0))
+        if (BN_is_zero(a))
-                /* a_0 == 0 => return error (the unsigned int array
-                 * must be terminated by 0)
-                 */
                return 0;
        for (i = a->top - 1; i >= 0; i--)
@@ -1070,24 +1005,28 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
                        }
                }
+        if (k < max) {
+                p[k] = -1;
+                k++;
+        }
        return k;
        }
 /* Convert the coefficient array representation of a polynomial to a 
- * bit-string.  The array must be terminated by 0.
+ * bit-string.  The array must be terminated by -1.
 */
-int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a)
+int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
        {
        int i;
        bn_check_top(a);
        BN_zero(a);
-        for (i = 0; p[i] != 0; i++)
+        for (i = 0; p[i] != -1; i++)
                {
                if (BN_set_bit(a, p[i]) == 0)
                        return 0;
                }
-        BN_set_bit(a, 0);
        bn_check_top(a);
        return 1;
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
index 27ac4397a1..8e5e98e3f2 100644
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ b/src/lib/libcrypto/bn/bn_lcl.h
@@ -255,7 +255,8 @@ extern "C" {
             : "r"(a), "r"(b));         \
        ret;                    })
 #  endif        /* compiler */
-# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
+# elif (defined(__x86_64) || defined(__x86_64__)) && \
+       (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 #  if defined(__GNUC__)
 #   define BN_UMULT_HIGH(a,b)   ({      \
        register BN_ULONG ret,discard;  \
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
index 32a8fbaf51..5470fbe6ef 100644
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -133,15 +133,34 @@ int BN_get_params(int which)
 const BIGNUM *BN_value_one(void)
        {
-        static BN_ULONG data_one=1L;
+        static const BN_ULONG data_one=1L;
-        static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA};
+        static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA};
        return(&const_one);
        }
+char *BN_options(void)
+        {
+        static int init=0;
+        static char data[16];
+        if (!init)
+                {
+                init++;
+#ifdef BN_LLONG
+                BIO_snprintf(data,sizeof data,"bn(%d,%d)",
+                             (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
+#else
+                BIO_snprintf(data,sizeof data,"bn(%d,%d)",
+                             (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
+#endif
+                }
+        return(data);
+        }
 int BN_num_bits_word(BN_ULONG l)
        {
-        static const char bits[256]={
+        static const unsigned char bits[256]={
                0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
                5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
                6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
@@ -216,7 +235,7 @@ int BN_num_bits_word(BN_ULONG l)
                else
 #endif
                        {
-#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
+#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
                        if (l & 0xff00L)
                                return(bits[(int)(l>>8)]+8);
                        else    
@@ -744,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n)
        i=n/BN_BITS2;
        j=n%BN_BITS2;
        if (a->top <= i) return 0;
-        return(((a->d[i])>>j)&((BN_ULONG)1));
+        return (int)(((a->d[i])>>j)&((BN_ULONG)1));
        }
 int BN_mask_bits(BIGNUM *a, int n)
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index 4799b152dd..7224637ab3 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -122,26 +122,10 @@
 #define MONT_WORD /* use the faster word-based algorithm */
-#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+#ifdef MONT_WORD
-/* This condition means we have a specific non-default build:
- * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any
- * BN_BITS2<=32 platform; an explicit "enable-montasm" is required.
- * I.e., if we are here, the user intentionally deviates from the
- * normal stable build to get better Montgomery performance from
- * the 0.9.9-dev backport.
- *
- * In this case only, we also enable BN_from_montgomery_word()
- * (another non-stable feature from 0.9.9-dev).
- */
-#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
-#endif
-#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
 static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
 #endif
 int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                          BN_MONT_CTX *mont, BN_CTX *ctx)
        {
@@ -153,11 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
        if (num>1 && a->top==num && b->top==num)
                {
                if (bn_wexpand(r,num) == NULL) return(0);
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
                if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
-#else
-                if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num))
-#endif
                        {
                        r->neg = a->neg^b->neg;
                        r->top = num;
@@ -181,7 +161,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                if (!BN_mul(tmp,a,b,ctx)) goto err;
                }
        /* reduce from aRR to aR */
-#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+#ifdef MONT_WORD
        if (!BN_from_montgomery_word(r,tmp,mont)) goto err;
 #else
        if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
@@ -193,7 +173,7 @@ err:
        return(ret);
        }
-#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+#ifdef MONT_WORD
 static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
        {
        BIGNUM *n;
@@ -217,15 +197,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
        nrp= &(r->d[nl]);
        /* clear the top words of T */
+#if 1
        for (i=r->top; i<max; i++) /* memset? XXX */
                r->d[i]=0;
+#else
+        memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); 
+#endif
        r->top=max;
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
        n0=mont->n0[0];
-#else
-        n0=mont->n0;
-#endif
 #ifdef BN_COUNT
        fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
@@ -270,6 +250,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
                }
        al=r->top-ri;
+#define BRANCH_FREE 1
+#if BRANCH_FREE
        if (bn_wexpand(ret,ri) == NULL) return(0);
        x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
        ret->top=x=(ri&~x)|(al&x);      /* min(ri,al) */
@@ -317,164 +299,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
                rp[i]=nrp[i], ap[i]=0;
        bn_correct_top(r);
        bn_correct_top(ret);
-        bn_check_top(ret);
-        return(1);
-        }
-int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
-             BN_CTX *ctx)
-        {
-        int retn=0;
-        BIGNUM *t;
-        BN_CTX_start(ctx);
-        if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
-                retn = BN_from_montgomery_word(ret,t,mont);
-        BN_CTX_end(ctx);
-        return retn;
-        }
-#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
-int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
-             BN_CTX *ctx)
-        {
-        int retn=0;
-#ifdef MONT_WORD
-        BIGNUM *n,*r;
-        BN_ULONG *ap,*np,*rp,n0,v,*nrp;
-        int al,nl,max,i,x,ri;
-        BN_CTX_start(ctx);
-        if ((r = BN_CTX_get(ctx)) == NULL) goto err;
-        if (!BN_copy(r,a)) goto err;
-        n= &(mont->N);
-        ap=a->d;
-        /* mont->ri is the size of mont->N in bits (rounded up
-           to the word size) */
-        al=ri=mont->ri/BN_BITS2;
-        
-        nl=n->top;
-        if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
-        max=(nl+al+1); /* allow for overflow (no?) XXX */
-        if (bn_wexpand(r,max) == NULL) goto err;
-        r->neg=a->neg^n->neg;
-        np=n->d;
-        rp=r->d;
-        nrp= &(r->d[nl]);
-        /* clear the top words of T */
-#if 1
-        for (i=r->top; i<max; i++) /* memset? XXX */
-                r->d[i]=0;
 #else
-        memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); 
+        if (bn_wexpand(ret,al) == NULL) return(0);
-#endif
-        r->top=max;
-        n0=mont->n0;
-#ifdef BN_COUNT
-        fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl);
-#endif
-        for (i=0; i<nl; i++)
-                {
-#ifdef __TANDEM
-                {
-                   long long t1;
-                   long long t2;
-                   long long t3;
-                   t1 = rp[0] * (n0 & 0177777);
-                   t2 = 037777600000l;
-                   t2 = n0 & t2;
-                   t3 = rp[0] & 0177777;
-                   t2 = (t3 * t2) & BN_MASK2;
-                   t1 = t1 + t2;
-                   v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
-                }
-#else
-                v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
-#endif
-                nrp++;
-                rp++;
-                if (((nrp[-1]+=v)&BN_MASK2) >= v)
-                        continue;
-                else
-                        {
-                        if (((++nrp[0])&BN_MASK2) != 0) continue;
-                        if (((++nrp[1])&BN_MASK2) != 0) continue;
-                        for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
-                        }
-                }
-        bn_correct_top(r);
-        
-        /* mont->ri will be a multiple of the word size and below code
-         * is kind of BN_rshift(ret,r,mont->ri) equivalent */
-        if (r->top <= ri)
-                {
-                ret->top=0;
-                retn=1;
-                goto err;
-                }
-        al=r->top-ri;
-# define BRANCH_FREE 1
-# if BRANCH_FREE
-        if (bn_wexpand(ret,ri) == NULL) goto err;
-        x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
-        ret->top=x=(ri&~x)|(al&x);      /* min(ri,al) */
-        ret->neg=r->neg;
-        rp=ret->d;
-        ap=&(r->d[ri]);
-        {
-        size_t m1,m2;
-        v=bn_sub_words(rp,ap,np,ri);
-        /* this ----------------^^ works even in al<ri case
-         * thanks to zealous zeroing of top of the vector in the
-         * beginning. */
-        /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
-        /* in other words if subtraction result is real, then
-         * trick unconditional memcpy below to perform in-place
-         * "refresh" instead of actual copy. */
-        m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1);   /* al<ri */
-        m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1);   /* al>ri */
-        m1|=m2;                 /* (al!=ri) */
-        m1|=(0-(size_t)v);      /* (al!=ri || v) */
-        m1&=~m2;                /* (al!=ri || v) && !al>ri */
-        nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
-        }
-        /* 'i<ri' is chosen to eliminate dependency on input data, even
-         * though it results in redundant copy in al<ri case. */
-        for (i=0,ri-=4; i<ri; i+=4)
-                {
-                BN_ULONG t1,t2,t3,t4;
-                
-                t1=nrp[i+0];
-                t2=nrp[i+1];
-                t3=nrp[i+2];    ap[i+0]=0;
-                t4=nrp[i+3];    ap[i+1]=0;
-                rp[i+0]=t1;     ap[i+2]=0;
-                rp[i+1]=t2;     ap[i+3]=0;
-                rp[i+2]=t3;
-                rp[i+3]=t4;
-                }
-        for (ri+=4; i<ri; i++)
-                rp[i]=nrp[i], ap[i]=0;
-        bn_correct_top(r);
-        bn_correct_top(ret);
-# else
-        if (bn_wexpand(ret,al) == NULL) goto err;
        ret->top=al;
        ret->neg=r->neg;
@@ -497,8 +323,30 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
        al+=4;
        for (; i<al; i++)
                rp[i]=ap[i];
-# endif
-#else /* !MONT_WORD */ 
+        if (BN_ucmp(ret, &(mont->N)) >= 0)
+                {
+                if (!BN_usub(ret,ret,&(mont->N))) return(0);
+                }
+#endif
+        bn_check_top(ret);
+        return(1);
+        }
+#endif  /* MONT_WORD */
+int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+             BN_CTX *ctx)
+        {
+        int retn=0;
+#ifdef MONT_WORD
+        BIGNUM *t;
+        BN_CTX_start(ctx);
+        if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
+                retn = BN_from_montgomery_word(ret,t,mont);
+        BN_CTX_end(ctx);
+#else /* !MONT_WORD */
        BIGNUM *t1,*t2;
        BN_CTX_start(ctx);
@@ -515,21 +363,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
        if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
        if (!BN_add(t2,a,t1)) goto err;
        if (!BN_rshift(ret,t2,mont->ri)) goto err;
-#endif /* MONT_WORD */
-#if !defined(BRANCH_FREE) || BRANCH_FREE==0
        if (BN_ucmp(ret, &(mont->N)) >= 0)
                {
                if (!BN_usub(ret,ret,&(mont->N))) goto err;
                }
-#endif
        retn=1;
        bn_check_top(ret);
 err:
        BN_CTX_end(ctx);
+#endif /* MONT_WORD */
        return(retn);
        }
-#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
 BN_MONT_CTX *BN_MONT_CTX_new(void)
        {
@@ -549,11 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
        BN_init(&(ctx->RR));
        BN_init(&(ctx->N));
        BN_init(&(ctx->Ni));
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
        ctx->n0[0] = ctx->n0[1] = 0;
-#else
-        ctx->n0 = 0;
-#endif
        ctx->flags=0;
        }
@@ -585,26 +426,22 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                BIGNUM tmod;
                BN_ULONG buf[2];
-                mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
-                BN_zero(R);
-#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)",
-         only certain BN_BITS2<=32 platforms actually need this */
-                if (!(BN_set_bit(R,2*BN_BITS2))) goto err;      /* R */
-#else
-                if (!(BN_set_bit(R,BN_BITS2))) goto err;        /* R */
-#endif
-                buf[0]=mod->d[0]; /* tmod = N mod word size */
-                buf[1]=0;
                BN_init(&tmod);
                tmod.d=buf;
-                tmod.top = buf[0] != 0 ? 1 : 0;
                tmod.dmax=2;
                tmod.neg=0;
-#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)";
+                mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
-         only certain BN_BITS2<=32 platforms actually need this */
+#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+                /* Only certain BN_BITS2<=32 platforms actually make use of
+                 * n0[1], and we could use the #else case (with a shorter R
+                 * value) for the others.  However, currently only the assembler
+                 * files do know which is which. */
+                BN_zero(R);
+                if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
                                                                tmod.top=0;
                if ((buf[0] = mod->d[0]))                       tmod.top=1;
                if ((buf[1] = mod->top>1 ? mod->d[1] : 0))      tmod.top=2;
@@ -632,6 +469,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
                mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
 #else
+                BN_zero(R);
+                if (!(BN_set_bit(R,BN_BITS2))) goto err;        /* R */
+                buf[0]=mod->d[0]; /* tmod = N mod word size */
+                buf[1]=0;
+                tmod.top = buf[0] != 0 ? 1 : 0;
                                                        /* Ri = R^-1 mod N*/
                if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
                        goto err;
@@ -647,12 +490,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
                if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
                /* Ni = (R*Ri-1)/N,
                 * keep only least significant word: */
-# if 0 /* for OpenSSL 0.9.9 mont->n0 */
                mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
                mont->n0[1] = 0;
-# else
-                mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
-# endif
 #endif
                }
 #else /* !MONT_WORD */
@@ -689,12 +528,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
        if (!BN_copy(&(to->N),&(from->N))) return NULL;
        if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
        to->ri=from->ri;
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
        to->n0[0]=from->n0[0];
        to->n0[1]=from->n0[1];
-#else
-        to->n0=from->n0;
-#endif
        return(to);
        }
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index b848c8cc60..a0e9ec3b46 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
                        assert(j <= al || j <= bl);
                        k = j+j;
                        t = BN_CTX_get(ctx);
+                        if (t == NULL)
+                                goto err;
                        if (al > j || bl > j)
                                {
-                                bn_wexpand(t,k*4);
+                                if (bn_wexpand(t,k*4) == NULL) goto err;
-                                bn_wexpand(rr,k*4);
+                                if (bn_wexpand(rr,k*4) == NULL) goto err;
                                bn_mul_part_recursive(rr->d,a->d,b->d,
                                        j,al-j,bl-j,t->d);
                                }
                        else    /* al <= j || bl <= j */
                                {
-                                bn_wexpand(t,k*2);
+                                if (bn_wexpand(t,k*2) == NULL) goto err;
-                                bn_wexpand(rr,k*2);
+                                if (bn_wexpand(rr,k*2) == NULL) goto err;
                                bn_mul_recursive(rr->d,a->d,b->d,
                                        j,al-j,bl-j,t->d);
                                }
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
index 810dde34e1..bebb466d08 100644
--- a/src/lib/libcrypto/bn/bn_print.c
+++ b/src/lib/libcrypto/bn/bn_print.c
@@ -294,6 +294,27 @@ err:
        return(0);
        }
+int BN_asc2bn(BIGNUM **bn, const char *a)
+        {
+        const char *p = a;
+        if (*p == '-')
+                p++;
+        if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x'))
+                {               
+                if (!BN_hex2bn(bn, p + 2))
+                        return 0;
+                }
+        else
+                {
+                if (!BN_dec2bn(bn, p))
+                        return 0;
+                }
+        if (*a == '-')
+                (*bn)->neg = 1;
+        return 1;
+        }
 #ifndef OPENSSL_NO_BIO
 #ifndef OPENSSL_NO_FP_API
 int BN_print_fp(FILE *fp, const BIGNUM *a)
author	djm <>	2010-10-01 22:54:21 +0000
committer	djm <>	2010-10-01 22:54:21 +0000
commit	829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2 (patch)
tree	e03b9f1bd051e844b971936729e9df549a209130 /src/lib/libcrypto/bn
parent	e6b755d2a53d3cac7a344dfdd6bf7c951cac754c (diff)
download	openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.tar.gz openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.tar.bz2 openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.zip