diff options
| author | djm <> | 2010-10-01 22:54:21 +0000 |
|---|---|---|
| committer | djm <> | 2010-10-01 22:54:21 +0000 |
| commit | 829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2 (patch) | |
| tree | e03b9f1bd051e844b971936729e9df549a209130 /src/lib/libcrypto/bn | |
| parent | e6b755d2a53d3cac7a344dfdd6bf7c951cac754c (diff) | |
| download | openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.tar.gz openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.tar.bz2 openbsd-829fd51d4f8dde4a7f3bf54754f3c1d1a502f5e2.zip | |
import OpenSSL-1.0.0a
Diffstat (limited to 'src/lib/libcrypto/bn')
| -rw-r--r-- | src/lib/libcrypto/bn/asm/alpha-mont.pl | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/armv4-mont.pl | 1 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/bn-586.pl | 203 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/co-586.pl | 3 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/ppc.pl | 233 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/sparcv8plus.S | 15 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/x86_64-gcc.c | 29 | ||||
| -rwxr-xr-x | src/lib/libcrypto/bn/asm/x86_64-mont.pl | 136 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn.h | 181 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 322 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_blind.c | 17 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_ctx.c | 6 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_div.c | 15 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_exp.c | 3 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_gf2m.c | 145 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_lcl.h | 3 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_lib.c | 29 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_mont.c | 269 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 10 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_print.c | 21 |
20 files changed, 899 insertions, 750 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha-mont.pl b/src/lib/libcrypto/bn/asm/alpha-mont.pl index 7a2cc3173b..f7e0ca1646 100644 --- a/src/lib/libcrypto/bn/asm/alpha-mont.pl +++ b/src/lib/libcrypto/bn/asm/alpha-mont.pl | |||
| @@ -53,15 +53,15 @@ $code=<<___; | |||
| 53 | .align 5 | 53 | .align 5 |
| 54 | .ent bn_mul_mont | 54 | .ent bn_mul_mont |
| 55 | bn_mul_mont: | 55 | bn_mul_mont: |
| 56 | lda sp,-40(sp) | 56 | lda sp,-48(sp) |
| 57 | stq ra,0(sp) | 57 | stq ra,0(sp) |
| 58 | stq s3,8(sp) | 58 | stq s3,8(sp) |
| 59 | stq s4,16(sp) | 59 | stq s4,16(sp) |
| 60 | stq s5,24(sp) | 60 | stq s5,24(sp) |
| 61 | stq fp,32(sp) | 61 | stq fp,32(sp) |
| 62 | mov sp,fp | 62 | mov sp,fp |
| 63 | .mask 0x0400f000,-40 | 63 | .mask 0x0400f000,-48 |
| 64 | .frame fp,40,ra | 64 | .frame fp,48,ra |
| 65 | .prologue 0 | 65 | .prologue 0 |
| 66 | 66 | ||
| 67 | .align 4 | 67 | .align 4 |
| @@ -306,7 +306,7 @@ bn_mul_mont: | |||
| 306 | ldq s4,16(sp) | 306 | ldq s4,16(sp) |
| 307 | ldq s5,24(sp) | 307 | ldq s5,24(sp) |
| 308 | ldq fp,32(sp) | 308 | ldq fp,32(sp) |
| 309 | lda sp,40(sp) | 309 | lda sp,48(sp) |
| 310 | ret (ra) | 310 | ret (ra) |
| 311 | .end bn_mul_mont | 311 | .end bn_mul_mont |
| 312 | .rdata | 312 | .rdata |
diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl index 05d5dc1a48..14e0d2d1dd 100644 --- a/src/lib/libcrypto/bn/asm/armv4-mont.pl +++ b/src/lib/libcrypto/bn/asm/armv4-mont.pl | |||
| @@ -193,6 +193,7 @@ bn_mul_mont: | |||
| 193 | bx lr @ interoperable with Thumb ISA:-) | 193 | bx lr @ interoperable with Thumb ISA:-) |
| 194 | .size bn_mul_mont,.-bn_mul_mont | 194 | .size bn_mul_mont,.-bn_mul_mont |
| 195 | .asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" | 195 | .asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" |
| 196 | .align 2 | ||
| 196 | ___ | 197 | ___ |
| 197 | 198 | ||
| 198 | $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 | 199 | $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 |
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index 26c2685a72..332ef3e91d 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #!/usr/local/bin/perl | 1 | #!/usr/local/bin/perl |
| 2 | 2 | ||
| 3 | push(@INC,"perlasm","../../perlasm"); | 3 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 4 | push(@INC,"${dir}","${dir}../../perlasm"); | ||
| 4 | require "x86asm.pl"; | 5 | require "x86asm.pl"; |
| 5 | 6 | ||
| 6 | &asm_init($ARGV[0],$0); | 7 | &asm_init($ARGV[0],$0); |
| @@ -24,38 +25,25 @@ sub bn_mul_add_words | |||
| 24 | { | 25 | { |
| 25 | local($name)=@_; | 26 | local($name)=@_; |
| 26 | 27 | ||
| 27 | &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); | 28 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); |
| 28 | 29 | ||
| 29 | &comment(""); | 30 | $r="eax"; |
| 30 | $Low="eax"; | 31 | $a="edx"; |
| 31 | $High="edx"; | 32 | $c="ecx"; |
| 32 | $a="ebx"; | ||
| 33 | $w="ebp"; | ||
| 34 | $r="edi"; | ||
| 35 | $c="esi"; | ||
| 36 | |||
| 37 | &xor($c,$c); # clear carry | ||
| 38 | &mov($r,&wparam(0)); # | ||
| 39 | |||
| 40 | &mov("ecx",&wparam(2)); # | ||
| 41 | &mov($a,&wparam(1)); # | ||
| 42 | |||
| 43 | &and("ecx",0xfffffff8); # num / 8 | ||
| 44 | &mov($w,&wparam(3)); # | ||
| 45 | |||
| 46 | &push("ecx"); # Up the stack for a tmp variable | ||
| 47 | |||
| 48 | &jz(&label("maw_finish")); | ||
| 49 | 33 | ||
| 50 | if ($sse2) { | 34 | if ($sse2) { |
| 51 | &picmeup("eax","OPENSSL_ia32cap_P"); | 35 | &picmeup("eax","OPENSSL_ia32cap_P"); |
| 52 | &bt(&DWP(0,"eax"),26); | 36 | &bt(&DWP(0,"eax"),26); |
| 53 | &jnc(&label("maw_loop")); | 37 | &jnc(&label("maw_non_sse2")); |
| 54 | 38 | ||
| 55 | &movd("mm0",$w); # mm0 = w | 39 | &mov($r,&wparam(0)); |
| 40 | &mov($a,&wparam(1)); | ||
| 41 | &mov($c,&wparam(2)); | ||
| 42 | &movd("mm0",&wparam(3)); # mm0 = w | ||
| 56 | &pxor("mm1","mm1"); # mm1 = carry_in | 43 | &pxor("mm1","mm1"); # mm1 = carry_in |
| 57 | 44 | &jmp(&label("maw_sse2_entry")); | |
| 58 | &set_label("maw_sse2_loop",0); | 45 | |
| 46 | &set_label("maw_sse2_unrolled",16); | ||
| 59 | &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] | 47 | &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] |
| 60 | &paddq("mm1","mm3"); # mm1 = carry_in + r[0] | 48 | &paddq("mm1","mm3"); # mm1 = carry_in + r[0] |
| 61 | &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] | 49 | &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] |
| @@ -112,42 +100,82 @@ sub bn_mul_add_words | |||
| 112 | &psrlq("mm1",32); # mm1 = carry6 | 100 | &psrlq("mm1",32); # mm1 = carry6 |
| 113 | &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] | 101 | &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] |
| 114 | &movd(&DWP(28,$r,"",0),"mm1"); | 102 | &movd(&DWP(28,$r,"",0),"mm1"); |
| 115 | &add($r,32); | 103 | &lea($r,&DWP(32,$r)); |
| 116 | &psrlq("mm1",32); # mm1 = carry_out | 104 | &psrlq("mm1",32); # mm1 = carry_out |
| 117 | 105 | ||
| 118 | &sub("ecx",8); | 106 | &sub($c,8); |
| 107 | &jz(&label("maw_sse2_exit")); | ||
| 108 | &set_label("maw_sse2_entry"); | ||
| 109 | &test($c,0xfffffff8); | ||
| 110 | &jnz(&label("maw_sse2_unrolled")); | ||
| 111 | |||
| 112 | &set_label("maw_sse2_loop",4); | ||
| 113 | &movd("mm2",&DWP(0,$a)); # mm2 = a[i] | ||
| 114 | &movd("mm3",&DWP(0,$r)); # mm3 = r[i] | ||
| 115 | &pmuludq("mm2","mm0"); # a[i] *= w | ||
| 116 | &lea($a,&DWP(4,$a)); | ||
| 117 | &paddq("mm1","mm3"); # carry += r[i] | ||
| 118 | &paddq("mm1","mm2"); # carry += a[i]*w | ||
| 119 | &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low | ||
| 120 | &sub($c,1); | ||
| 121 | &psrlq("mm1",32); # carry = carry_high | ||
| 122 | &lea($r,&DWP(4,$r)); | ||
| 119 | &jnz(&label("maw_sse2_loop")); | 123 | &jnz(&label("maw_sse2_loop")); |
| 120 | 124 | &set_label("maw_sse2_exit"); | |
| 121 | &movd($c,"mm1"); # c = carry_out | 125 | &movd("eax","mm1"); # c = carry_out |
| 122 | &emms(); | 126 | &emms(); |
| 127 | &ret(); | ||
| 123 | 128 | ||
| 124 | &jmp(&label("maw_finish")); | 129 | &set_label("maw_non_sse2",16); |
| 125 | } | 130 | } |
| 126 | 131 | ||
| 127 | &set_label("maw_loop",0); | 132 | # function_begin prologue |
| 133 | &push("ebp"); | ||
| 134 | &push("ebx"); | ||
| 135 | &push("esi"); | ||
| 136 | &push("edi"); | ||
| 137 | |||
| 138 | &comment(""); | ||
| 139 | $Low="eax"; | ||
| 140 | $High="edx"; | ||
| 141 | $a="ebx"; | ||
| 142 | $w="ebp"; | ||
| 143 | $r="edi"; | ||
| 144 | $c="esi"; | ||
| 145 | |||
| 146 | &xor($c,$c); # clear carry | ||
| 147 | &mov($r,&wparam(0)); # | ||
| 148 | |||
| 149 | &mov("ecx",&wparam(2)); # | ||
| 150 | &mov($a,&wparam(1)); # | ||
| 151 | |||
| 152 | &and("ecx",0xfffffff8); # num / 8 | ||
| 153 | &mov($w,&wparam(3)); # | ||
| 128 | 154 | ||
| 129 | &mov(&swtmp(0),"ecx"); # | 155 | &push("ecx"); # Up the stack for a tmp variable |
| 156 | |||
| 157 | &jz(&label("maw_finish")); | ||
| 158 | |||
| 159 | &set_label("maw_loop",16); | ||
| 130 | 160 | ||
| 131 | for ($i=0; $i<32; $i+=4) | 161 | for ($i=0; $i<32; $i+=4) |
| 132 | { | 162 | { |
| 133 | &comment("Round $i"); | 163 | &comment("Round $i"); |
| 134 | 164 | ||
| 135 | &mov("eax",&DWP($i,$a,"",0)); # *a | 165 | &mov("eax",&DWP($i,$a)); # *a |
| 136 | &mul($w); # *a * w | 166 | &mul($w); # *a * w |
| 137 | &add("eax",$c); # L(t)+= *r | 167 | &add("eax",$c); # L(t)+= c |
| 138 | &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r | ||
| 139 | &adc("edx",0); # H(t)+=carry | 168 | &adc("edx",0); # H(t)+=carry |
| 140 | &add("eax",$c); # L(t)+=c | 169 | &add("eax",&DWP($i,$r)); # L(t)+= *r |
| 141 | &adc("edx",0); # H(t)+=carry | 170 | &adc("edx",0); # H(t)+=carry |
| 142 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); | 171 | &mov(&DWP($i,$r),"eax"); # *r= L(t); |
| 143 | &mov($c,"edx"); # c= H(t); | 172 | &mov($c,"edx"); # c= H(t); |
| 144 | } | 173 | } |
| 145 | 174 | ||
| 146 | &comment(""); | 175 | &comment(""); |
| 147 | &mov("ecx",&swtmp(0)); # | ||
| 148 | &add($a,32); | ||
| 149 | &add($r,32); | ||
| 150 | &sub("ecx",8); | 176 | &sub("ecx",8); |
| 177 | &lea($a,&DWP(32,$a)); | ||
| 178 | &lea($r,&DWP(32,$r)); | ||
| 151 | &jnz(&label("maw_loop")); | 179 | &jnz(&label("maw_loop")); |
| 152 | 180 | ||
| 153 | &set_label("maw_finish",0); | 181 | &set_label("maw_finish",0); |
| @@ -160,16 +188,15 @@ sub bn_mul_add_words | |||
| 160 | for ($i=0; $i<7; $i++) | 188 | for ($i=0; $i<7; $i++) |
| 161 | { | 189 | { |
| 162 | &comment("Tail Round $i"); | 190 | &comment("Tail Round $i"); |
| 163 | &mov("eax",&DWP($i*4,$a,"",0));# *a | 191 | &mov("eax",&DWP($i*4,$a)); # *a |
| 164 | &mul($w); # *a * w | 192 | &mul($w); # *a * w |
| 165 | &add("eax",$c); # L(t)+=c | 193 | &add("eax",$c); # L(t)+=c |
| 166 | &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r | ||
| 167 | &adc("edx",0); # H(t)+=carry | 194 | &adc("edx",0); # H(t)+=carry |
| 168 | &add("eax",$c); | 195 | &add("eax",&DWP($i*4,$r)); # L(t)+= *r |
| 169 | &adc("edx",0); # H(t)+=carry | 196 | &adc("edx",0); # H(t)+=carry |
| 170 | &dec("ecx") if ($i != 7-1); | 197 | &dec("ecx") if ($i != 7-1); |
| 171 | &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); | 198 | &mov(&DWP($i*4,$r),"eax"); # *r= L(t); |
| 172 | &mov($c,"edx"); # c= H(t); | 199 | &mov($c,"edx"); # c= H(t); |
| 173 | &jz(&label("maw_end")) if ($i != 7-1); | 200 | &jz(&label("maw_end")) if ($i != 7-1); |
| 174 | } | 201 | } |
| 175 | &set_label("maw_end",0); | 202 | &set_label("maw_end",0); |
| @@ -184,7 +211,45 @@ sub bn_mul_words | |||
| 184 | { | 211 | { |
| 185 | local($name)=@_; | 212 | local($name)=@_; |
| 186 | 213 | ||
| 187 | &function_begin($name,""); | 214 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); |
| 215 | |||
| 216 | $r="eax"; | ||
| 217 | $a="edx"; | ||
| 218 | $c="ecx"; | ||
| 219 | |||
| 220 | if ($sse2) { | ||
| 221 | &picmeup("eax","OPENSSL_ia32cap_P"); | ||
| 222 | &bt(&DWP(0,"eax"),26); | ||
| 223 | &jnc(&label("mw_non_sse2")); | ||
| 224 | |||
| 225 | &mov($r,&wparam(0)); | ||
| 226 | &mov($a,&wparam(1)); | ||
| 227 | &mov($c,&wparam(2)); | ||
| 228 | &movd("mm0",&wparam(3)); # mm0 = w | ||
| 229 | &pxor("mm1","mm1"); # mm1 = carry = 0 | ||
| 230 | |||
| 231 | &set_label("mw_sse2_loop",16); | ||
| 232 | &movd("mm2",&DWP(0,$a)); # mm2 = a[i] | ||
| 233 | &pmuludq("mm2","mm0"); # a[i] *= w | ||
| 234 | &lea($a,&DWP(4,$a)); | ||
| 235 | &paddq("mm1","mm2"); # carry += a[i]*w | ||
| 236 | &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low | ||
| 237 | &sub($c,1); | ||
| 238 | &psrlq("mm1",32); # carry = carry_high | ||
| 239 | &lea($r,&DWP(4,$r)); | ||
| 240 | &jnz(&label("mw_sse2_loop")); | ||
| 241 | |||
| 242 | &movd("eax","mm1"); # return carry | ||
| 243 | &emms(); | ||
| 244 | &ret(); | ||
| 245 | &set_label("mw_non_sse2",16); | ||
| 246 | } | ||
| 247 | |||
| 248 | # function_begin prologue | ||
| 249 | &push("ebp"); | ||
| 250 | &push("ebx"); | ||
| 251 | &push("esi"); | ||
| 252 | &push("edi"); | ||
| 188 | 253 | ||
| 189 | &comment(""); | 254 | &comment(""); |
| 190 | $Low="eax"; | 255 | $Low="eax"; |
| @@ -257,7 +322,40 @@ sub bn_sqr_words | |||
| 257 | { | 322 | { |
| 258 | local($name)=@_; | 323 | local($name)=@_; |
| 259 | 324 | ||
| 260 | &function_begin($name,""); | 325 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); |
| 326 | |||
| 327 | $r="eax"; | ||
| 328 | $a="edx"; | ||
| 329 | $c="ecx"; | ||
| 330 | |||
| 331 | if ($sse2) { | ||
| 332 | &picmeup("eax","OPENSSL_ia32cap_P"); | ||
| 333 | &bt(&DWP(0,"eax"),26); | ||
| 334 | &jnc(&label("sqr_non_sse2")); | ||
| 335 | |||
| 336 | &mov($r,&wparam(0)); | ||
| 337 | &mov($a,&wparam(1)); | ||
| 338 | &mov($c,&wparam(2)); | ||
| 339 | |||
| 340 | &set_label("sqr_sse2_loop",16); | ||
| 341 | &movd("mm0",&DWP(0,$a)); # mm0 = a[i] | ||
| 342 | &pmuludq("mm0","mm0"); # a[i] *= a[i] | ||
| 343 | &lea($a,&DWP(4,$a)); # a++ | ||
| 344 | &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] | ||
| 345 | &sub($c,1); | ||
| 346 | &lea($r,&DWP(8,$r)); # r += 2 | ||
| 347 | &jnz(&label("sqr_sse2_loop")); | ||
| 348 | |||
| 349 | &emms(); | ||
| 350 | &ret(); | ||
| 351 | &set_label("sqr_non_sse2",16); | ||
| 352 | } | ||
| 353 | |||
| 354 | # function_begin prologue | ||
| 355 | &push("ebp"); | ||
| 356 | &push("ebx"); | ||
| 357 | &push("esi"); | ||
| 358 | &push("edi"); | ||
| 261 | 359 | ||
| 262 | &comment(""); | 360 | &comment(""); |
| 263 | $r="esi"; | 361 | $r="esi"; |
| @@ -313,12 +411,13 @@ sub bn_div_words | |||
| 313 | { | 411 | { |
| 314 | local($name)=@_; | 412 | local($name)=@_; |
| 315 | 413 | ||
| 316 | &function_begin($name,""); | 414 | &function_begin_B($name,""); |
| 317 | &mov("edx",&wparam(0)); # | 415 | &mov("edx",&wparam(0)); # |
| 318 | &mov("eax",&wparam(1)); # | 416 | &mov("eax",&wparam(1)); # |
| 319 | &mov("ebx",&wparam(2)); # | 417 | &mov("ecx",&wparam(2)); # |
| 320 | &div("ebx"); | 418 | &div("ecx"); |
| 321 | &function_end($name); | 419 | &ret(); |
| 420 | &function_end_B($name); | ||
| 322 | } | 421 | } |
| 323 | 422 | ||
| 324 | sub bn_add_words | 423 | sub bn_add_words |
diff --git a/src/lib/libcrypto/bn/asm/co-586.pl b/src/lib/libcrypto/bn/asm/co-586.pl index 5d962cb957..57101a6bd7 100644 --- a/src/lib/libcrypto/bn/asm/co-586.pl +++ b/src/lib/libcrypto/bn/asm/co-586.pl | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #!/usr/local/bin/perl | 1 | #!/usr/local/bin/perl |
| 2 | 2 | ||
| 3 | push(@INC,"perlasm","../../perlasm"); | 3 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 4 | push(@INC,"${dir}","${dir}../../perlasm"); | ||
| 4 | require "x86asm.pl"; | 5 | require "x86asm.pl"; |
| 5 | 6 | ||
| 6 | &asm_init($ARGV[0],$0); | 7 | &asm_init($ARGV[0],$0); |
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl index 08e0053473..37c65d3511 100644 --- a/src/lib/libcrypto/bn/asm/ppc.pl +++ b/src/lib/libcrypto/bn/asm/ppc.pl | |||
| @@ -100,9 +100,9 @@ | |||
| 100 | # me a note at schari@us.ibm.com | 100 | # me a note at schari@us.ibm.com |
| 101 | # | 101 | # |
| 102 | 102 | ||
| 103 | $opf = shift; | 103 | $flavour = shift; |
| 104 | 104 | ||
| 105 | if ($opf =~ /32\.s/) { | 105 | if ($flavour =~ /32/) { |
| 106 | $BITS= 32; | 106 | $BITS= 32; |
| 107 | $BNSZ= $BITS/8; | 107 | $BNSZ= $BITS/8; |
| 108 | $ISA= "\"ppc\""; | 108 | $ISA= "\"ppc\""; |
| @@ -125,7 +125,7 @@ if ($opf =~ /32\.s/) { | |||
| 125 | $INSR= "insrwi"; # insert right | 125 | $INSR= "insrwi"; # insert right |
| 126 | $ROTL= "rotlwi"; # rotate left by immediate | 126 | $ROTL= "rotlwi"; # rotate left by immediate |
| 127 | $TR= "tw"; # conditional trap | 127 | $TR= "tw"; # conditional trap |
| 128 | } elsif ($opf =~ /64\.s/) { | 128 | } elsif ($flavour =~ /64/) { |
| 129 | $BITS= 64; | 129 | $BITS= 64; |
| 130 | $BNSZ= $BITS/8; | 130 | $BNSZ= $BITS/8; |
| 131 | $ISA= "\"ppc64\""; | 131 | $ISA= "\"ppc64\""; |
| @@ -149,93 +149,16 @@ if ($opf =~ /32\.s/) { | |||
| 149 | $INSR= "insrdi"; # insert right | 149 | $INSR= "insrdi"; # insert right |
| 150 | $ROTL= "rotldi"; # rotate left by immediate | 150 | $ROTL= "rotldi"; # rotate left by immediate |
| 151 | $TR= "td"; # conditional trap | 151 | $TR= "td"; # conditional trap |
| 152 | } else { die "nonsense $opf"; } | 152 | } else { die "nonsense $flavour"; } |
| 153 | 153 | ||
| 154 | ( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!"; | 154 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 155 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | ||
| 156 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | ||
| 157 | die "can't locate ppc-xlate.pl"; | ||
| 155 | 158 | ||
| 156 | # function entry points from the AIX code | 159 | open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; |
| 157 | # | ||
| 158 | # There are other, more elegant, ways to handle this. We (IBM) chose | ||
| 159 | # this approach as it plays well with scripts we run to 'namespace' | ||
| 160 | # OpenSSL .i.e. we add a prefix to all the public symbols so we can | ||
| 161 | # co-exist in the same process with other implementations of OpenSSL. | ||
| 162 | # 'cleverer' ways of doing these substitutions tend to hide data we | ||
| 163 | # need to be obvious. | ||
| 164 | # | ||
| 165 | my @items = ("bn_sqr_comba4", | ||
| 166 | "bn_sqr_comba8", | ||
| 167 | "bn_mul_comba4", | ||
| 168 | "bn_mul_comba8", | ||
| 169 | "bn_sub_words", | ||
| 170 | "bn_add_words", | ||
| 171 | "bn_div_words", | ||
| 172 | "bn_sqr_words", | ||
| 173 | "bn_mul_words", | ||
| 174 | "bn_mul_add_words"); | ||
| 175 | 160 | ||
| 176 | if ($opf =~ /linux/) { do_linux(); } | 161 | $data=<<EOF; |
| 177 | elsif ($opf =~ /aix/) { do_aix(); } | ||
| 178 | elsif ($opf =~ /osx/) { do_osx(); } | ||
| 179 | else { do_bsd(); } | ||
| 180 | |||
| 181 | sub do_linux { | ||
| 182 | $d=&data(); | ||
| 183 | |||
| 184 | if ($BITS==64) { | ||
| 185 | foreach $t (@items) { | ||
| 186 | $d =~ s/\.$t:/\ | ||
| 187 | \t.section\t".opd","aw"\ | ||
| 188 | \t.align\t3\ | ||
| 189 | \t.globl\t$t\ | ||
| 190 | $t:\ | ||
| 191 | \t.quad\t.$t,.TOC.\@tocbase,0\ | ||
| 192 | \t.size\t$t,24\ | ||
| 193 | \t.previous\n\ | ||
| 194 | \t.type\t.$t,\@function\ | ||
| 195 | \t.globl\t.$t\ | ||
| 196 | .$t:/g; | ||
| 197 | } | ||
| 198 | } | ||
| 199 | else { | ||
| 200 | foreach $t (@items) { | ||
| 201 | $d=~s/\.$t/$t/g; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | # hide internal labels to avoid pollution of name table... | ||
| 205 | $d=~s/Lppcasm_/.Lppcasm_/gm; | ||
| 206 | print $d; | ||
| 207 | } | ||
| 208 | |||
| 209 | sub do_aix { | ||
| 210 | # AIX assembler is smart enough to please the linker without | ||
| 211 | # making us do something special... | ||
| 212 | print &data(); | ||
| 213 | } | ||
| 214 | |||
| 215 | # MacOSX 32 bit | ||
| 216 | sub do_osx { | ||
| 217 | $d=&data(); | ||
| 218 | # Change the bn symbol prefix from '.' to '_' | ||
| 219 | foreach $t (@items) { | ||
| 220 | $d=~s/\.$t/_$t/g; | ||
| 221 | } | ||
| 222 | # Change .machine to something OS X asm will accept | ||
| 223 | $d=~s/\.machine.*/.text/g; | ||
| 224 | $d=~s/\#/;/g; # change comment from '#' to ';' | ||
| 225 | print $d; | ||
| 226 | } | ||
| 227 | |||
| 228 | # BSD (Untested) | ||
| 229 | sub do_bsd { | ||
| 230 | $d=&data(); | ||
| 231 | foreach $t (@items) { | ||
| 232 | $d=~s/\.$t/_$t/g; | ||
| 233 | } | ||
| 234 | print $d; | ||
| 235 | } | ||
| 236 | |||
| 237 | sub data { | ||
| 238 | local($data)=<<EOF; | ||
| 239 | #-------------------------------------------------------------------- | 162 | #-------------------------------------------------------------------- |
| 240 | # | 163 | # |
| 241 | # | 164 | # |
| @@ -297,33 +220,20 @@ sub data { | |||
| 297 | # | 220 | # |
| 298 | # Defines to be used in the assembly code. | 221 | # Defines to be used in the assembly code. |
| 299 | # | 222 | # |
| 300 | .set r0,0 # we use it as storage for value of 0 | 223 | #.set r0,0 # we use it as storage for value of 0 |
| 301 | .set SP,1 # preserved | 224 | #.set SP,1 # preserved |
| 302 | .set RTOC,2 # preserved | 225 | #.set RTOC,2 # preserved |
| 303 | .set r3,3 # 1st argument/return value | 226 | #.set r3,3 # 1st argument/return value |
| 304 | .set r4,4 # 2nd argument/volatile register | 227 | #.set r4,4 # 2nd argument/volatile register |
| 305 | .set r5,5 # 3rd argument/volatile register | 228 | #.set r5,5 # 3rd argument/volatile register |
| 306 | .set r6,6 # ... | 229 | #.set r6,6 # ... |
| 307 | .set r7,7 | 230 | #.set r7,7 |
| 308 | .set r8,8 | 231 | #.set r8,8 |
| 309 | .set r9,9 | 232 | #.set r9,9 |
| 310 | .set r10,10 | 233 | #.set r10,10 |
| 311 | .set r11,11 | 234 | #.set r11,11 |
| 312 | .set r12,12 | 235 | #.set r12,12 |
| 313 | .set r13,13 # not used, nor any other "below" it... | 236 | #.set r13,13 # not used, nor any other "below" it... |
| 314 | |||
| 315 | .set BO_IF_NOT,4 | ||
| 316 | .set BO_IF,12 | ||
| 317 | .set BO_dCTR_NZERO,16 | ||
| 318 | .set BO_dCTR_ZERO,18 | ||
| 319 | .set BO_ALWAYS,20 | ||
| 320 | .set CR0_LT,0; | ||
| 321 | .set CR0_GT,1; | ||
| 322 | .set CR0_EQ,2 | ||
| 323 | .set CR1_FX,4; | ||
| 324 | .set CR1_FEX,5; | ||
| 325 | .set CR1_VX,6 | ||
| 326 | .set LR,8 | ||
| 327 | 237 | ||
| 328 | # Declare function names to be global | 238 | # Declare function names to be global |
| 329 | # NOTE: For gcc these names MUST be changed to remove | 239 | # NOTE: For gcc these names MUST be changed to remove |
| @@ -344,7 +254,7 @@ sub data { | |||
| 344 | 254 | ||
| 345 | # .text section | 255 | # .text section |
| 346 | 256 | ||
| 347 | .machine $ISA | 257 | .machine "any" |
| 348 | 258 | ||
| 349 | # | 259 | # |
| 350 | # NOTE: The following label name should be changed to | 260 | # NOTE: The following label name should be changed to |
| @@ -478,7 +388,7 @@ sub data { | |||
| 478 | 388 | ||
| 479 | $ST r9,`6*$BNSZ`(r3) #r[6]=c1 | 389 | $ST r9,`6*$BNSZ`(r3) #r[6]=c1 |
| 480 | $ST r10,`7*$BNSZ`(r3) #r[7]=c2 | 390 | $ST r10,`7*$BNSZ`(r3) #r[7]=c2 |
| 481 | bclr BO_ALWAYS,CR0_LT | 391 | blr |
| 482 | .long 0x00000000 | 392 | .long 0x00000000 |
| 483 | 393 | ||
| 484 | # | 394 | # |
| @@ -903,7 +813,7 @@ sub data { | |||
| 903 | $ST r9, `15*$BNSZ`(r3) #r[15]=c1; | 813 | $ST r9, `15*$BNSZ`(r3) #r[15]=c1; |
| 904 | 814 | ||
| 905 | 815 | ||
| 906 | bclr BO_ALWAYS,CR0_LT | 816 | blr |
| 907 | 817 | ||
| 908 | .long 0x00000000 | 818 | .long 0x00000000 |
| 909 | 819 | ||
| @@ -1055,7 +965,7 @@ sub data { | |||
| 1055 | 965 | ||
| 1056 | $ST r10,`6*$BNSZ`(r3) #r[6]=c1 | 966 | $ST r10,`6*$BNSZ`(r3) #r[6]=c1 |
| 1057 | $ST r11,`7*$BNSZ`(r3) #r[7]=c2 | 967 | $ST r11,`7*$BNSZ`(r3) #r[7]=c2 |
| 1058 | bclr BO_ALWAYS,CR0_LT | 968 | blr |
| 1059 | .long 0x00000000 | 969 | .long 0x00000000 |
| 1060 | 970 | ||
| 1061 | # | 971 | # |
| @@ -1591,7 +1501,7 @@ sub data { | |||
| 1591 | adde r10,r10,r9 | 1501 | adde r10,r10,r9 |
| 1592 | $ST r12,`14*$BNSZ`(r3) #r[14]=c3; | 1502 | $ST r12,`14*$BNSZ`(r3) #r[14]=c3; |
| 1593 | $ST r10,`15*$BNSZ`(r3) #r[15]=c1; | 1503 | $ST r10,`15*$BNSZ`(r3) #r[15]=c1; |
| 1594 | bclr BO_ALWAYS,CR0_LT | 1504 | blr |
| 1595 | .long 0x00000000 | 1505 | .long 0x00000000 |
| 1596 | 1506 | ||
| 1597 | # | 1507 | # |
| @@ -1623,7 +1533,7 @@ sub data { | |||
| 1623 | subfc. r7,r0,r6 # If r6 is 0 then result is 0. | 1533 | subfc. r7,r0,r6 # If r6 is 0 then result is 0. |
| 1624 | # if r6 > 0 then result !=0 | 1534 | # if r6 > 0 then result !=0 |
| 1625 | # In either case carry bit is set. | 1535 | # In either case carry bit is set. |
| 1626 | bc BO_IF,CR0_EQ,Lppcasm_sub_adios | 1536 | beq Lppcasm_sub_adios |
| 1627 | addi r4,r4,-$BNSZ | 1537 | addi r4,r4,-$BNSZ |
| 1628 | addi r3,r3,-$BNSZ | 1538 | addi r3,r3,-$BNSZ |
| 1629 | addi r5,r5,-$BNSZ | 1539 | addi r5,r5,-$BNSZ |
| @@ -1635,11 +1545,11 @@ Lppcasm_sub_mainloop: | |||
| 1635 | # if carry = 1 this is r7-r8. Else it | 1545 | # if carry = 1 this is r7-r8. Else it |
| 1636 | # is r7-r8 -1 as we need. | 1546 | # is r7-r8 -1 as we need. |
| 1637 | $STU r6,$BNSZ(r3) | 1547 | $STU r6,$BNSZ(r3) |
| 1638 | bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop | 1548 | bdnz- Lppcasm_sub_mainloop |
| 1639 | Lppcasm_sub_adios: | 1549 | Lppcasm_sub_adios: |
| 1640 | subfze r3,r0 # if carry bit is set then r3 = 0 else -1 | 1550 | subfze r3,r0 # if carry bit is set then r3 = 0 else -1 |
| 1641 | andi. r3,r3,1 # keep only last bit. | 1551 | andi. r3,r3,1 # keep only last bit. |
| 1642 | bclr BO_ALWAYS,CR0_LT | 1552 | blr |
| 1643 | .long 0x00000000 | 1553 | .long 0x00000000 |
| 1644 | 1554 | ||
| 1645 | 1555 | ||
| @@ -1670,7 +1580,7 @@ Lppcasm_sub_adios: | |||
| 1670 | # check for r6 = 0. Is this needed? | 1580 | # check for r6 = 0. Is this needed? |
| 1671 | # | 1581 | # |
| 1672 | addic. r6,r6,0 #test r6 and clear carry bit. | 1582 | addic. r6,r6,0 #test r6 and clear carry bit. |
| 1673 | bc BO_IF,CR0_EQ,Lppcasm_add_adios | 1583 | beq Lppcasm_add_adios |
| 1674 | addi r4,r4,-$BNSZ | 1584 | addi r4,r4,-$BNSZ |
| 1675 | addi r3,r3,-$BNSZ | 1585 | addi r3,r3,-$BNSZ |
| 1676 | addi r5,r5,-$BNSZ | 1586 | addi r5,r5,-$BNSZ |
| @@ -1680,10 +1590,10 @@ Lppcasm_add_mainloop: | |||
| 1680 | $LDU r8,$BNSZ(r5) | 1590 | $LDU r8,$BNSZ(r5) |
| 1681 | adde r8,r7,r8 | 1591 | adde r8,r7,r8 |
| 1682 | $STU r8,$BNSZ(r3) | 1592 | $STU r8,$BNSZ(r3) |
| 1683 | bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop | 1593 | bdnz- Lppcasm_add_mainloop |
| 1684 | Lppcasm_add_adios: | 1594 | Lppcasm_add_adios: |
| 1685 | addze r3,r0 #return carry bit. | 1595 | addze r3,r0 #return carry bit. |
| 1686 | bclr BO_ALWAYS,CR0_LT | 1596 | blr |
| 1687 | .long 0x00000000 | 1597 | .long 0x00000000 |
| 1688 | 1598 | ||
| 1689 | # | 1599 | # |
| @@ -1707,24 +1617,24 @@ Lppcasm_add_adios: | |||
| 1707 | # r5 = d | 1617 | # r5 = d |
| 1708 | 1618 | ||
| 1709 | $UCMPI 0,r5,0 # compare r5 and 0 | 1619 | $UCMPI 0,r5,0 # compare r5 and 0 |
| 1710 | bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0 | 1620 | bne Lppcasm_div1 # proceed if d!=0 |
| 1711 | li r3,-1 # d=0 return -1 | 1621 | li r3,-1 # d=0 return -1 |
| 1712 | bclr BO_ALWAYS,CR0_LT | 1622 | blr |
| 1713 | Lppcasm_div1: | 1623 | Lppcasm_div1: |
| 1714 | xor r0,r0,r0 #r0=0 | 1624 | xor r0,r0,r0 #r0=0 |
| 1715 | li r8,$BITS | 1625 | li r8,$BITS |
| 1716 | $CNTLZ. r7,r5 #r7 = num leading 0s in d. | 1626 | $CNTLZ. r7,r5 #r7 = num leading 0s in d. |
| 1717 | bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros | 1627 | beq Lppcasm_div2 #proceed if no leading zeros |
| 1718 | subf r8,r7,r8 #r8 = BN_num_bits_word(d) | 1628 | subf r8,r7,r8 #r8 = BN_num_bits_word(d) |
| 1719 | $SHR. r9,r3,r8 #are there any bits above r8'th? | 1629 | $SHR. r9,r3,r8 #are there any bits above r8'th? |
| 1720 | $TR 16,r9,r0 #if there're, signal to dump core... | 1630 | $TR 16,r9,r0 #if there're, signal to dump core... |
| 1721 | Lppcasm_div2: | 1631 | Lppcasm_div2: |
| 1722 | $UCMP 0,r3,r5 #h>=d? | 1632 | $UCMP 0,r3,r5 #h>=d? |
| 1723 | bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not | 1633 | blt Lppcasm_div3 #goto Lppcasm_div3 if not |
| 1724 | subf r3,r5,r3 #h-=d ; | 1634 | subf r3,r5,r3 #h-=d ; |
| 1725 | Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i | 1635 | Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i |
| 1726 | cmpi 0,0,r7,0 # is (i == 0)? | 1636 | cmpi 0,0,r7,0 # is (i == 0)? |
| 1727 | bc BO_IF,CR0_EQ,Lppcasm_div4 | 1637 | beq Lppcasm_div4 |
| 1728 | $SHL r3,r3,r7 # h = (h<< i) | 1638 | $SHL r3,r3,r7 # h = (h<< i) |
| 1729 | $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) | 1639 | $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) |
| 1730 | $SHL r5,r5,r7 # d<<=i | 1640 | $SHL r5,r5,r7 # d<<=i |
| @@ -1741,7 +1651,7 @@ Lppcasm_divouterloop: | |||
| 1741 | $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 | 1651 | $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 |
| 1742 | # compute here for innerloop. | 1652 | # compute here for innerloop. |
| 1743 | $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh | 1653 | $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh |
| 1744 | bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not | 1654 | bne Lppcasm_div5 # goto Lppcasm_div5 if not |
| 1745 | 1655 | ||
| 1746 | li r8,-1 | 1656 | li r8,-1 |
| 1747 | $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l | 1657 | $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l |
| @@ -1762,9 +1672,9 @@ Lppcasm_divinnerloop: | |||
| 1762 | # the following 2 instructions do that | 1672 | # the following 2 instructions do that |
| 1763 | $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4) | 1673 | $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4) |
| 1764 | or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4) | 1674 | or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4) |
| 1765 | $UCMP 1,r6,r7 # compare (tl <= r7) | 1675 | $UCMP cr1,r6,r7 # compare (tl <= r7) |
| 1766 | bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit | 1676 | bne Lppcasm_divinnerexit |
| 1767 | bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit | 1677 | ble cr1,Lppcasm_divinnerexit |
| 1768 | addi r8,r8,-1 #q-- | 1678 | addi r8,r8,-1 #q-- |
| 1769 | subf r12,r9,r12 #th -=dh | 1679 | subf r12,r9,r12 #th -=dh |
| 1770 | $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. | 1680 | $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. |
| @@ -1773,14 +1683,14 @@ Lppcasm_divinnerloop: | |||
| 1773 | Lppcasm_divinnerexit: | 1683 | Lppcasm_divinnerexit: |
| 1774 | $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) | 1684 | $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) |
| 1775 | $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h; | 1685 | $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h; |
| 1776 | $UCMP 1,r4,r11 # compare l and tl | 1686 | $UCMP cr1,r4,r11 # compare l and tl |
| 1777 | add r12,r12,r10 # th+=t | 1687 | add r12,r12,r10 # th+=t |
| 1778 | bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 | 1688 | bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 |
| 1779 | addi r12,r12,1 # th++ | 1689 | addi r12,r12,1 # th++ |
| 1780 | Lppcasm_div7: | 1690 | Lppcasm_div7: |
| 1781 | subf r11,r11,r4 #r11=l-tl | 1691 | subf r11,r11,r4 #r11=l-tl |
| 1782 | $UCMP 1,r3,r12 #compare h and th | 1692 | $UCMP cr1,r3,r12 #compare h and th |
| 1783 | bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 | 1693 | bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 |
| 1784 | addi r8,r8,-1 # q-- | 1694 | addi r8,r8,-1 # q-- |
| 1785 | add r3,r5,r3 # h+=d | 1695 | add r3,r5,r3 # h+=d |
| 1786 | Lppcasm_div8: | 1696 | Lppcasm_div8: |
| @@ -1791,12 +1701,12 @@ Lppcasm_div8: | |||
| 1791 | # the following 2 instructions will do this. | 1701 | # the following 2 instructions will do this. |
| 1792 | $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. | 1702 | $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. |
| 1793 | $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 | 1703 | $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 |
| 1794 | bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ; | 1704 | bdz Lppcasm_div9 #if (count==0) break ; |
| 1795 | $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4 | 1705 | $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4 |
| 1796 | b Lppcasm_divouterloop | 1706 | b Lppcasm_divouterloop |
| 1797 | Lppcasm_div9: | 1707 | Lppcasm_div9: |
| 1798 | or r3,r8,r0 | 1708 | or r3,r8,r0 |
| 1799 | bclr BO_ALWAYS,CR0_LT | 1709 | blr |
| 1800 | .long 0x00000000 | 1710 | .long 0x00000000 |
| 1801 | 1711 | ||
| 1802 | # | 1712 | # |
| @@ -1822,7 +1732,7 @@ Lppcasm_div9: | |||
| 1822 | # No unrolling done here. Not performance critical. | 1732 | # No unrolling done here. Not performance critical. |
| 1823 | 1733 | ||
| 1824 | addic. r5,r5,0 #test r5. | 1734 | addic. r5,r5,0 #test r5. |
| 1825 | bc BO_IF,CR0_EQ,Lppcasm_sqr_adios | 1735 | beq Lppcasm_sqr_adios |
| 1826 | addi r4,r4,-$BNSZ | 1736 | addi r4,r4,-$BNSZ |
| 1827 | addi r3,r3,-$BNSZ | 1737 | addi r3,r3,-$BNSZ |
| 1828 | mtctr r5 | 1738 | mtctr r5 |
| @@ -1833,9 +1743,9 @@ Lppcasm_sqr_mainloop: | |||
| 1833 | $UMULH r8,r6,r6 | 1743 | $UMULH r8,r6,r6 |
| 1834 | $STU r7,$BNSZ(r3) | 1744 | $STU r7,$BNSZ(r3) |
| 1835 | $STU r8,$BNSZ(r3) | 1745 | $STU r8,$BNSZ(r3) |
| 1836 | bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop | 1746 | bdnz- Lppcasm_sqr_mainloop |
| 1837 | Lppcasm_sqr_adios: | 1747 | Lppcasm_sqr_adios: |
| 1838 | bclr BO_ALWAYS,CR0_LT | 1748 | blr |
| 1839 | .long 0x00000000 | 1749 | .long 0x00000000 |
| 1840 | 1750 | ||
| 1841 | 1751 | ||
| @@ -1858,7 +1768,7 @@ Lppcasm_sqr_adios: | |||
| 1858 | xor r0,r0,r0 | 1768 | xor r0,r0,r0 |
| 1859 | xor r12,r12,r12 # used for carry | 1769 | xor r12,r12,r12 # used for carry |
| 1860 | rlwinm. r7,r5,30,2,31 # num >> 2 | 1770 | rlwinm. r7,r5,30,2,31 # num >> 2 |
| 1861 | bc BO_IF,CR0_EQ,Lppcasm_mw_REM | 1771 | beq Lppcasm_mw_REM |
| 1862 | mtctr r7 | 1772 | mtctr r7 |
| 1863 | Lppcasm_mw_LOOP: | 1773 | Lppcasm_mw_LOOP: |
| 1864 | #mul(rp[0],ap[0],w,c1); | 1774 | #mul(rp[0],ap[0],w,c1); |
| @@ -1896,11 +1806,11 @@ Lppcasm_mw_LOOP: | |||
| 1896 | 1806 | ||
| 1897 | addi r3,r3,`4*$BNSZ` | 1807 | addi r3,r3,`4*$BNSZ` |
| 1898 | addi r4,r4,`4*$BNSZ` | 1808 | addi r4,r4,`4*$BNSZ` |
| 1899 | bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP | 1809 | bdnz- Lppcasm_mw_LOOP |
| 1900 | 1810 | ||
| 1901 | Lppcasm_mw_REM: | 1811 | Lppcasm_mw_REM: |
| 1902 | andi. r5,r5,0x3 | 1812 | andi. r5,r5,0x3 |
| 1903 | bc BO_IF,CR0_EQ,Lppcasm_mw_OVER | 1813 | beq Lppcasm_mw_OVER |
| 1904 | #mul(rp[0],ap[0],w,c1); | 1814 | #mul(rp[0],ap[0],w,c1); |
| 1905 | $LD r8,`0*$BNSZ`(r4) | 1815 | $LD r8,`0*$BNSZ`(r4) |
| 1906 | $UMULL r9,r6,r8 | 1816 | $UMULL r9,r6,r8 |
| @@ -1912,7 +1822,7 @@ Lppcasm_mw_REM: | |||
| 1912 | 1822 | ||
| 1913 | addi r5,r5,-1 | 1823 | addi r5,r5,-1 |
| 1914 | cmpli 0,0,r5,0 | 1824 | cmpli 0,0,r5,0 |
| 1915 | bc BO_IF,CR0_EQ,Lppcasm_mw_OVER | 1825 | beq Lppcasm_mw_OVER |
| 1916 | 1826 | ||
| 1917 | 1827 | ||
| 1918 | #mul(rp[1],ap[1],w,c1); | 1828 | #mul(rp[1],ap[1],w,c1); |
| @@ -1926,7 +1836,7 @@ Lppcasm_mw_REM: | |||
| 1926 | 1836 | ||
| 1927 | addi r5,r5,-1 | 1837 | addi r5,r5,-1 |
| 1928 | cmpli 0,0,r5,0 | 1838 | cmpli 0,0,r5,0 |
| 1929 | bc BO_IF,CR0_EQ,Lppcasm_mw_OVER | 1839 | beq Lppcasm_mw_OVER |
| 1930 | 1840 | ||
| 1931 | #mul_add(rp[2],ap[2],w,c1); | 1841 | #mul_add(rp[2],ap[2],w,c1); |
| 1932 | $LD r8,`2*$BNSZ`(r4) | 1842 | $LD r8,`2*$BNSZ`(r4) |
| @@ -1939,7 +1849,7 @@ Lppcasm_mw_REM: | |||
| 1939 | 1849 | ||
| 1940 | Lppcasm_mw_OVER: | 1850 | Lppcasm_mw_OVER: |
| 1941 | addi r3,r12,0 | 1851 | addi r3,r12,0 |
| 1942 | bclr BO_ALWAYS,CR0_LT | 1852 | blr |
| 1943 | .long 0x00000000 | 1853 | .long 0x00000000 |
| 1944 | 1854 | ||
| 1945 | # | 1855 | # |
| @@ -1964,7 +1874,7 @@ Lppcasm_mw_OVER: | |||
| 1964 | xor r0,r0,r0 #r0 = 0 | 1874 | xor r0,r0,r0 #r0 = 0 |
| 1965 | xor r12,r12,r12 #r12 = 0 . used for carry | 1875 | xor r12,r12,r12 #r12 = 0 . used for carry |
| 1966 | rlwinm. r7,r5,30,2,31 # num >> 2 | 1876 | rlwinm. r7,r5,30,2,31 # num >> 2 |
| 1967 | bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover | 1877 | beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover |
| 1968 | mtctr r7 | 1878 | mtctr r7 |
| 1969 | Lppcasm_maw_mainloop: | 1879 | Lppcasm_maw_mainloop: |
| 1970 | #mul_add(rp[0],ap[0],w,c1); | 1880 | #mul_add(rp[0],ap[0],w,c1); |
| @@ -2017,11 +1927,11 @@ Lppcasm_maw_mainloop: | |||
| 2017 | $ST r11,`3*$BNSZ`(r3) | 1927 | $ST r11,`3*$BNSZ`(r3) |
| 2018 | addi r3,r3,`4*$BNSZ` | 1928 | addi r3,r3,`4*$BNSZ` |
| 2019 | addi r4,r4,`4*$BNSZ` | 1929 | addi r4,r4,`4*$BNSZ` |
| 2020 | bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop | 1930 | bdnz- Lppcasm_maw_mainloop |
| 2021 | 1931 | ||
| 2022 | Lppcasm_maw_leftover: | 1932 | Lppcasm_maw_leftover: |
| 2023 | andi. r5,r5,0x3 | 1933 | andi. r5,r5,0x3 |
| 2024 | bc BO_IF,CR0_EQ,Lppcasm_maw_adios | 1934 | beq Lppcasm_maw_adios |
| 2025 | addi r3,r3,-$BNSZ | 1935 | addi r3,r3,-$BNSZ |
| 2026 | addi r4,r4,-$BNSZ | 1936 | addi r4,r4,-$BNSZ |
| 2027 | #mul_add(rp[0],ap[0],w,c1); | 1937 | #mul_add(rp[0],ap[0],w,c1); |
| @@ -2036,7 +1946,7 @@ Lppcasm_maw_leftover: | |||
| 2036 | addze r12,r10 | 1946 | addze r12,r10 |
| 2037 | $ST r9,0(r3) | 1947 | $ST r9,0(r3) |
| 2038 | 1948 | ||
| 2039 | bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios | 1949 | bdz Lppcasm_maw_adios |
| 2040 | #mul_add(rp[1],ap[1],w,c1); | 1950 | #mul_add(rp[1],ap[1],w,c1); |
| 2041 | $LDU r8,$BNSZ(r4) | 1951 | $LDU r8,$BNSZ(r4) |
| 2042 | $UMULL r9,r6,r8 | 1952 | $UMULL r9,r6,r8 |
| @@ -2048,7 +1958,7 @@ Lppcasm_maw_leftover: | |||
| 2048 | addze r12,r10 | 1958 | addze r12,r10 |
| 2049 | $ST r9,0(r3) | 1959 | $ST r9,0(r3) |
| 2050 | 1960 | ||
| 2051 | bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios | 1961 | bdz Lppcasm_maw_adios |
| 2052 | #mul_add(rp[2],ap[2],w,c1); | 1962 | #mul_add(rp[2],ap[2],w,c1); |
| 2053 | $LDU r8,$BNSZ(r4) | 1963 | $LDU r8,$BNSZ(r4) |
| 2054 | $UMULL r9,r6,r8 | 1964 | $UMULL r9,r6,r8 |
| @@ -2062,17 +1972,10 @@ Lppcasm_maw_leftover: | |||
| 2062 | 1972 | ||
| 2063 | Lppcasm_maw_adios: | 1973 | Lppcasm_maw_adios: |
| 2064 | addi r3,r12,0 | 1974 | addi r3,r12,0 |
| 2065 | bclr BO_ALWAYS,CR0_LT | 1975 | blr |
| 2066 | .long 0x00000000 | 1976 | .long 0x00000000 |
| 2067 | .align 4 | 1977 | .align 4 |
| 2068 | EOF | 1978 | EOF |
| 2069 | $data =~ s/\`([^\`]*)\`/eval $1/gem; | 1979 | $data =~ s/\`([^\`]*)\`/eval $1/gem; |
| 2070 | 1980 | print $data; | |
| 2071 | # if some assembler chokes on some simplified mnemonic, | 1981 | close STDOUT; |
| 2072 | # this is the spot to fix it up, e.g.: | ||
| 2073 | # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare | ||
| 2074 | $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm; | ||
| 2075 | # assembler X doesn't accept li, load immediate value | ||
| 2076 | #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm; | ||
| 2077 | return($data); | ||
| 2078 | } | ||
diff --git a/src/lib/libcrypto/bn/asm/sparcv8plus.S b/src/lib/libcrypto/bn/asm/sparcv8plus.S index 8c56e2e7e7..63de1860f2 100644 --- a/src/lib/libcrypto/bn/asm/sparcv8plus.S +++ b/src/lib/libcrypto/bn/asm/sparcv8plus.S | |||
| @@ -144,6 +144,19 @@ | |||
| 144 | * } | 144 | * } |
| 145 | */ | 145 | */ |
| 146 | 146 | ||
| 147 | #if defined(__SUNPRO_C) && defined(__sparcv9) | ||
| 148 | /* They've said -xarch=v9 at command line */ | ||
| 149 | .register %g2,#scratch | ||
| 150 | .register %g3,#scratch | ||
| 151 | # define FRAME_SIZE -192 | ||
| 152 | #elif defined(__GNUC__) && defined(__arch64__) | ||
| 153 | /* They've said -m64 at command line */ | ||
| 154 | .register %g2,#scratch | ||
| 155 | .register %g3,#scratch | ||
| 156 | # define FRAME_SIZE -192 | ||
| 157 | #else | ||
| 158 | # define FRAME_SIZE -96 | ||
| 159 | #endif | ||
| 147 | /* | 160 | /* |
| 148 | * GNU assembler can't stand stuw:-( | 161 | * GNU assembler can't stand stuw:-( |
| 149 | */ | 162 | */ |
| @@ -619,8 +632,6 @@ bn_sub_words: | |||
| 619 | * Andy. | 632 | * Andy. |
| 620 | */ | 633 | */ |
| 621 | 634 | ||
| 622 | #define FRAME_SIZE -96 | ||
| 623 | |||
| 624 | /* | 635 | /* |
| 625 | * Here is register usage map for *all* routines below. | 636 | * Here is register usage map for *all* routines below. |
| 626 | */ | 637 | */ |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c index f13f52dd85..acb0b40118 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | #ifdef __SUNPRO_C | 1 | #include "../bn_lcl.h" |
| 2 | #if !(defined(__GNUC__) && __GNUC__>=2) | ||
| 2 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 3 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
| 3 | #else | 4 | #else |
| 4 | /* | 5 | /* |
| @@ -54,7 +55,15 @@ | |||
| 54 | * machine. | 55 | * machine. |
| 55 | */ | 56 | */ |
| 56 | 57 | ||
| 58 | #ifdef _WIN64 | ||
| 59 | #define BN_ULONG unsigned long long | ||
| 60 | #else | ||
| 57 | #define BN_ULONG unsigned long | 61 | #define BN_ULONG unsigned long |
| 62 | #endif | ||
| 63 | |||
| 64 | #undef mul | ||
| 65 | #undef mul_add | ||
| 66 | #undef sqr | ||
| 58 | 67 | ||
| 59 | /* | 68 | /* |
| 60 | * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; | 69 | * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; |
| @@ -97,7 +106,7 @@ | |||
| 97 | : "a"(a) \ | 106 | : "a"(a) \ |
| 98 | : "cc"); | 107 | : "cc"); |
| 99 | 108 | ||
| 100 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 109 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
| 101 | { | 110 | { |
| 102 | BN_ULONG c1=0; | 111 | BN_ULONG c1=0; |
| 103 | 112 | ||
| @@ -121,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
| 121 | return(c1); | 130 | return(c1); |
| 122 | } | 131 | } |
| 123 | 132 | ||
| 124 | BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 133 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
| 125 | { | 134 | { |
| 126 | BN_ULONG c1=0; | 135 | BN_ULONG c1=0; |
| 127 | 136 | ||
| @@ -144,7 +153,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
| 144 | return(c1); | 153 | return(c1); |
| 145 | } | 154 | } |
| 146 | 155 | ||
| 147 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 156 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
| 148 | { | 157 | { |
| 149 | if (n <= 0) return; | 158 | if (n <= 0) return; |
| 150 | 159 | ||
| @@ -175,14 +184,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
| 175 | return ret; | 184 | return ret; |
| 176 | } | 185 | } |
| 177 | 186 | ||
| 178 | BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) | 187 | BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) |
| 179 | { BN_ULONG ret=0,i=0; | 188 | { BN_ULONG ret=0,i=0; |
| 180 | 189 | ||
| 181 | if (n <= 0) return 0; | 190 | if (n <= 0) return 0; |
| 182 | 191 | ||
| 183 | asm ( | 192 | asm ( |
| 184 | " subq %2,%2 \n" | 193 | " subq %2,%2 \n" |
| 185 | ".align 16 \n" | 194 | ".p2align 4 \n" |
| 186 | "1: movq (%4,%2,8),%0 \n" | 195 | "1: movq (%4,%2,8),%0 \n" |
| 187 | " adcq (%5,%2,8),%0 \n" | 196 | " adcq (%5,%2,8),%0 \n" |
| 188 | " movq %0,(%3,%2,8) \n" | 197 | " movq %0,(%3,%2,8) \n" |
| @@ -198,14 +207,14 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) | |||
| 198 | } | 207 | } |
| 199 | 208 | ||
| 200 | #ifndef SIMICS | 209 | #ifndef SIMICS |
| 201 | BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) | 210 | BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n) |
| 202 | { BN_ULONG ret=0,i=0; | 211 | { BN_ULONG ret=0,i=0; |
| 203 | 212 | ||
| 204 | if (n <= 0) return 0; | 213 | if (n <= 0) return 0; |
| 205 | 214 | ||
| 206 | asm ( | 215 | asm ( |
| 207 | " subq %2,%2 \n" | 216 | " subq %2,%2 \n" |
| 208 | ".align 16 \n" | 217 | ".p2align 4 \n" |
| 209 | "1: movq (%4,%2,8),%0 \n" | 218 | "1: movq (%4,%2,8),%0 \n" |
| 210 | " sbbq (%5,%2,8),%0 \n" | 219 | " sbbq (%5,%2,8),%0 \n" |
| 211 | " movq %0,(%3,%2,8) \n" | 220 | " movq %0,(%3,%2,8) \n" |
| @@ -485,7 +494,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 485 | r[7]=c2; | 494 | r[7]=c2; |
| 486 | } | 495 | } |
| 487 | 496 | ||
| 488 | void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | 497 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 489 | { | 498 | { |
| 490 | BN_ULONG t1,t2; | 499 | BN_ULONG t1,t2; |
| 491 | BN_ULONG c1,c2,c3; | 500 | BN_ULONG c1,c2,c3; |
| @@ -561,7 +570,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | |||
| 561 | r[15]=c1; | 570 | r[15]=c1; |
| 562 | } | 571 | } |
| 563 | 572 | ||
| 564 | void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | 573 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 565 | { | 574 | { |
| 566 | BN_ULONG t1,t2; | 575 | BN_ULONG t1,t2; |
| 567 | BN_ULONG c1,c2,c3; | 576 | BN_ULONG c1,c2,c3; |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-mont.pl b/src/lib/libcrypto/bn/asm/x86_64-mont.pl index c43b69592a..3b7a6f243f 100755 --- a/src/lib/libcrypto/bn/asm/x86_64-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86_64-mont.pl | |||
| @@ -15,14 +15,18 @@ | |||
| 15 | # respectful 50%. It remains to be seen if loop unrolling and | 15 | # respectful 50%. It remains to be seen if loop unrolling and |
| 16 | # dedicated squaring routine can provide further improvement... | 16 | # dedicated squaring routine can provide further improvement... |
| 17 | 17 | ||
| 18 | $output=shift; | 18 | $flavour = shift; |
| 19 | $output = shift; | ||
| 20 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | ||
| 21 | |||
| 22 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | ||
| 19 | 23 | ||
| 20 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | 24 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
| 21 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or | 25 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
| 22 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 26 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
| 23 | die "can't locate x86_64-xlate.pl"; | 27 | die "can't locate x86_64-xlate.pl"; |
| 24 | 28 | ||
| 25 | open STDOUT,"| $^X $xlate $output"; | 29 | open STDOUT,"| $^X $xlate $flavour $output"; |
| 26 | 30 | ||
| 27 | # int bn_mul_mont( | 31 | # int bn_mul_mont( |
| 28 | $rp="%rdi"; # BN_ULONG *rp, | 32 | $rp="%rdi"; # BN_ULONG *rp, |
| @@ -55,13 +59,14 @@ bn_mul_mont: | |||
| 55 | push %r15 | 59 | push %r15 |
| 56 | 60 | ||
| 57 | mov ${num}d,${num}d | 61 | mov ${num}d,${num}d |
| 58 | lea 2($num),%rax | 62 | lea 2($num),%r10 |
| 59 | mov %rsp,%rbp | 63 | mov %rsp,%r11 |
| 60 | neg %rax | 64 | neg %r10 |
| 61 | lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2)) | 65 | lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2)) |
| 62 | and \$-1024,%rsp # minimize TLB usage | 66 | and \$-1024,%rsp # minimize TLB usage |
| 63 | 67 | ||
| 64 | mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp | 68 | mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp |
| 69 | .Lprologue: | ||
| 65 | mov %rdx,$bp # $bp reassigned, remember? | 70 | mov %rdx,$bp # $bp reassigned, remember? |
| 66 | 71 | ||
| 67 | mov ($n0),$n0 # pull n0[0] value | 72 | mov ($n0),$n0 # pull n0[0] value |
| @@ -197,18 +202,129 @@ bn_mul_mont: | |||
| 197 | dec $j | 202 | dec $j |
| 198 | jge .Lcopy | 203 | jge .Lcopy |
| 199 | 204 | ||
| 200 | mov 8(%rsp,$num,8),%rsp # restore %rsp | 205 | mov 8(%rsp,$num,8),%rsi # restore %rsp |
| 201 | mov \$1,%rax | 206 | mov \$1,%rax |
| 207 | mov (%rsi),%r15 | ||
| 208 | mov 8(%rsi),%r14 | ||
| 209 | mov 16(%rsi),%r13 | ||
| 210 | mov 24(%rsi),%r12 | ||
| 211 | mov 32(%rsi),%rbp | ||
| 212 | mov 40(%rsi),%rbx | ||
| 213 | lea 48(%rsi),%rsp | ||
| 214 | .Lepilogue: | ||
| 215 | ret | ||
| 216 | .size bn_mul_mont,.-bn_mul_mont | ||
| 217 | .asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" | ||
| 218 | .align 16 | ||
| 219 | ___ | ||
| 220 | |||
| 221 | # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, | ||
| 222 | # CONTEXT *context,DISPATCHER_CONTEXT *disp) | ||
| 223 | if ($win64) { | ||
| 224 | $rec="%rcx"; | ||
| 225 | $frame="%rdx"; | ||
| 226 | $context="%r8"; | ||
| 227 | $disp="%r9"; | ||
| 228 | |||
| 229 | $code.=<<___; | ||
| 230 | .extern __imp_RtlVirtualUnwind | ||
| 231 | .type se_handler,\@abi-omnipotent | ||
| 232 | .align 16 | ||
| 233 | se_handler: | ||
| 234 | push %rsi | ||
| 235 | push %rdi | ||
| 236 | push %rbx | ||
| 237 | push %rbp | ||
| 238 | push %r12 | ||
| 239 | push %r13 | ||
| 240 | push %r14 | ||
| 241 | push %r15 | ||
| 242 | pushfq | ||
| 243 | sub \$64,%rsp | ||
| 244 | |||
| 245 | mov 120($context),%rax # pull context->Rax | ||
| 246 | mov 248($context),%rbx # pull context->Rip | ||
| 247 | |||
| 248 | lea .Lprologue(%rip),%r10 | ||
| 249 | cmp %r10,%rbx # context->Rip<.Lprologue | ||
| 250 | jb .Lin_prologue | ||
| 251 | |||
| 252 | mov 152($context),%rax # pull context->Rsp | ||
| 253 | |||
| 254 | lea .Lepilogue(%rip),%r10 | ||
| 255 | cmp %r10,%rbx # context->Rip>=.Lepilogue | ||
| 256 | jae .Lin_prologue | ||
| 257 | |||
| 258 | mov 192($context),%r10 # pull $num | ||
| 259 | mov 8(%rax,%r10,8),%rax # pull saved stack pointer | ||
| 260 | lea 48(%rax),%rax | ||
| 261 | |||
| 262 | mov -8(%rax),%rbx | ||
| 263 | mov -16(%rax),%rbp | ||
| 264 | mov -24(%rax),%r12 | ||
| 265 | mov -32(%rax),%r13 | ||
| 266 | mov -40(%rax),%r14 | ||
| 267 | mov -48(%rax),%r15 | ||
| 268 | mov %rbx,144($context) # restore context->Rbx | ||
| 269 | mov %rbp,160($context) # restore context->Rbp | ||
| 270 | mov %r12,216($context) # restore context->R12 | ||
| 271 | mov %r13,224($context) # restore context->R13 | ||
| 272 | mov %r14,232($context) # restore context->R14 | ||
| 273 | mov %r15,240($context) # restore context->R15 | ||
| 274 | |||
| 275 | .Lin_prologue: | ||
| 276 | mov 8(%rax),%rdi | ||
| 277 | mov 16(%rax),%rsi | ||
| 278 | mov %rax,152($context) # restore context->Rsp | ||
| 279 | mov %rsi,168($context) # restore context->Rsi | ||
| 280 | mov %rdi,176($context) # restore context->Rdi | ||
| 281 | |||
| 282 | mov 40($disp),%rdi # disp->ContextRecord | ||
| 283 | mov $context,%rsi # context | ||
| 284 | mov \$154,%ecx # sizeof(CONTEXT) | ||
| 285 | .long 0xa548f3fc # cld; rep movsq | ||
| 286 | |||
| 287 | mov $disp,%rsi | ||
| 288 | xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER | ||
| 289 | mov 8(%rsi),%rdx # arg2, disp->ImageBase | ||
| 290 | mov 0(%rsi),%r8 # arg3, disp->ControlPc | ||
| 291 | mov 16(%rsi),%r9 # arg4, disp->FunctionEntry | ||
| 292 | mov 40(%rsi),%r10 # disp->ContextRecord | ||
| 293 | lea 56(%rsi),%r11 # &disp->HandlerData | ||
| 294 | lea 24(%rsi),%r12 # &disp->EstablisherFrame | ||
| 295 | mov %r10,32(%rsp) # arg5 | ||
| 296 | mov %r11,40(%rsp) # arg6 | ||
| 297 | mov %r12,48(%rsp) # arg7 | ||
| 298 | mov %rcx,56(%rsp) # arg8, (NULL) | ||
| 299 | call *__imp_RtlVirtualUnwind(%rip) | ||
| 300 | |||
| 301 | mov \$1,%eax # ExceptionContinueSearch | ||
| 302 | add \$64,%rsp | ||
| 303 | popfq | ||
| 202 | pop %r15 | 304 | pop %r15 |
| 203 | pop %r14 | 305 | pop %r14 |
| 204 | pop %r13 | 306 | pop %r13 |
| 205 | pop %r12 | 307 | pop %r12 |
| 206 | pop %rbp | 308 | pop %rbp |
| 207 | pop %rbx | 309 | pop %rbx |
| 310 | pop %rdi | ||
| 311 | pop %rsi | ||
| 208 | ret | 312 | ret |
| 209 | .size bn_mul_mont,.-bn_mul_mont | 313 | .size se_handler,.-se_handler |
| 210 | .asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" | 314 | |
| 315 | .section .pdata | ||
| 316 | .align 4 | ||
| 317 | .rva .LSEH_begin_bn_mul_mont | ||
| 318 | .rva .LSEH_end_bn_mul_mont | ||
| 319 | .rva .LSEH_info_bn_mul_mont | ||
| 320 | |||
| 321 | .section .xdata | ||
| 322 | .align 8 | ||
| 323 | .LSEH_info_bn_mul_mont: | ||
| 324 | .byte 9,0,0,0 | ||
| 325 | .rva se_handler | ||
| 211 | ___ | 326 | ___ |
| 327 | } | ||
| 212 | 328 | ||
| 213 | print $code; | 329 | print $code; |
| 214 | close STDOUT; | 330 | close STDOUT; |
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h index f1719a5877..e484b7fc11 100644 --- a/src/lib/libcrypto/bn/bn.h +++ b/src/lib/libcrypto/bn/bn.h | |||
| @@ -56,6 +56,59 @@ | |||
| 56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
| 57 | */ | 57 | */ |
| 58 | /* ==================================================================== | 58 | /* ==================================================================== |
| 59 | * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. | ||
| 60 | * | ||
| 61 | * Redistribution and use in source and binary forms, with or without | ||
| 62 | * modification, are permitted provided that the following conditions | ||
| 63 | * are met: | ||
| 64 | * | ||
| 65 | * 1. Redistributions of source code must retain the above copyright | ||
| 66 | * notice, this list of conditions and the following disclaimer. | ||
| 67 | * | ||
| 68 | * 2. Redistributions in binary form must reproduce the above copyright | ||
| 69 | * notice, this list of conditions and the following disclaimer in | ||
| 70 | * the documentation and/or other materials provided with the | ||
| 71 | * distribution. | ||
| 72 | * | ||
| 73 | * 3. All advertising materials mentioning features or use of this | ||
| 74 | * software must display the following acknowledgment: | ||
| 75 | * "This product includes software developed by the OpenSSL Project | ||
| 76 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | ||
| 77 | * | ||
| 78 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | ||
| 79 | * endorse or promote products derived from this software without | ||
| 80 | * prior written permission. For written permission, please contact | ||
| 81 | * openssl-core@openssl.org. | ||
| 82 | * | ||
| 83 | * 5. Products derived from this software may not be called "OpenSSL" | ||
| 84 | * nor may "OpenSSL" appear in their names without prior written | ||
| 85 | * permission of the OpenSSL Project. | ||
| 86 | * | ||
| 87 | * 6. Redistributions of any form whatsoever must retain the following | ||
| 88 | * acknowledgment: | ||
| 89 | * "This product includes software developed by the OpenSSL Project | ||
| 90 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | ||
| 91 | * | ||
| 92 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | ||
| 93 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 94 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
| 95 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | ||
| 96 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 97 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
| 98 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 99 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 100 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| 101 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 102 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| 103 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 104 | * ==================================================================== | ||
| 105 | * | ||
| 106 | * This product includes cryptographic software written by Eric Young | ||
| 107 | * (eay@cryptsoft.com). This product includes software written by Tim | ||
| 108 | * Hudson (tjh@cryptsoft.com). | ||
| 109 | * | ||
| 110 | */ | ||
| 111 | /* ==================================================================== | ||
| 59 | * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. | 112 | * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. |
| 60 | * | 113 | * |
| 61 | * Portions of the attached software ("Contribution") are developed by | 114 | * Portions of the attached software ("Contribution") are developed by |
| @@ -77,6 +130,7 @@ | |||
| 77 | #include <stdio.h> /* FILE */ | 130 | #include <stdio.h> /* FILE */ |
| 78 | #endif | 131 | #endif |
| 79 | #include <openssl/ossl_typ.h> | 132 | #include <openssl/ossl_typ.h> |
| 133 | #include <openssl/crypto.h> | ||
| 80 | 134 | ||
| 81 | #ifdef __cplusplus | 135 | #ifdef __cplusplus |
| 82 | extern "C" { | 136 | extern "C" { |
| @@ -94,9 +148,11 @@ extern "C" { | |||
| 94 | /* #define BN_DEBUG */ | 148 | /* #define BN_DEBUG */ |
| 95 | /* #define BN_DEBUG_RAND */ | 149 | /* #define BN_DEBUG_RAND */ |
| 96 | 150 | ||
| 151 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 97 | #define BN_MUL_COMBA | 152 | #define BN_MUL_COMBA |
| 98 | #define BN_SQR_COMBA | 153 | #define BN_SQR_COMBA |
| 99 | #define BN_RECURSION | 154 | #define BN_RECURSION |
| 155 | #endif | ||
| 100 | 156 | ||
| 101 | /* This next option uses the C libraries (2 word)/(1 word) function. | 157 | /* This next option uses the C libraries (2 word)/(1 word) function. |
| 102 | * If it is not defined, I use my C version (which is slower). | 158 | * If it is not defined, I use my C version (which is slower). |
| @@ -137,6 +193,8 @@ extern "C" { | |||
| 137 | #define BN_DEC_FMT1 "%lu" | 193 | #define BN_DEC_FMT1 "%lu" |
| 138 | #define BN_DEC_FMT2 "%019lu" | 194 | #define BN_DEC_FMT2 "%019lu" |
| 139 | #define BN_DEC_NUM 19 | 195 | #define BN_DEC_NUM 19 |
| 196 | #define BN_HEX_FMT1 "%lX" | ||
| 197 | #define BN_HEX_FMT2 "%016lX" | ||
| 140 | #endif | 198 | #endif |
| 141 | 199 | ||
| 142 | /* This is where the long long data type is 64 bits, but long is 32. | 200 | /* This is where the long long data type is 64 bits, but long is 32. |
| @@ -162,83 +220,37 @@ extern "C" { | |||
| 162 | #define BN_DEC_FMT1 "%llu" | 220 | #define BN_DEC_FMT1 "%llu" |
| 163 | #define BN_DEC_FMT2 "%019llu" | 221 | #define BN_DEC_FMT2 "%019llu" |
| 164 | #define BN_DEC_NUM 19 | 222 | #define BN_DEC_NUM 19 |
| 223 | #define BN_HEX_FMT1 "%llX" | ||
| 224 | #define BN_HEX_FMT2 "%016llX" | ||
| 165 | #endif | 225 | #endif |
| 166 | 226 | ||
| 167 | #ifdef THIRTY_TWO_BIT | 227 | #ifdef THIRTY_TWO_BIT |
| 168 | #ifdef BN_LLONG | 228 | #ifdef BN_LLONG |
| 169 | # if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) | 229 | # if defined(_WIN32) && !defined(__GNUC__) |
| 170 | # define BN_ULLONG unsigned __int64 | 230 | # define BN_ULLONG unsigned __int64 |
| 231 | # define BN_MASK (0xffffffffffffffffI64) | ||
| 171 | # else | 232 | # else |
| 172 | # define BN_ULLONG unsigned long long | 233 | # define BN_ULLONG unsigned long long |
| 234 | # define BN_MASK (0xffffffffffffffffLL) | ||
| 173 | # endif | 235 | # endif |
| 174 | #endif | 236 | #endif |
| 175 | #define BN_ULONG unsigned long | 237 | #define BN_ULONG unsigned int |
| 176 | #define BN_LONG long | 238 | #define BN_LONG int |
| 177 | #define BN_BITS 64 | 239 | #define BN_BITS 64 |
| 178 | #define BN_BYTES 4 | 240 | #define BN_BYTES 4 |
| 179 | #define BN_BITS2 32 | 241 | #define BN_BITS2 32 |
| 180 | #define BN_BITS4 16 | 242 | #define BN_BITS4 16 |
| 181 | #ifdef OPENSSL_SYS_WIN32 | ||
| 182 | /* VC++ doesn't like the LL suffix */ | ||
| 183 | #define BN_MASK (0xffffffffffffffffL) | ||
| 184 | #else | ||
| 185 | #define BN_MASK (0xffffffffffffffffLL) | ||
| 186 | #endif | ||
| 187 | #define BN_MASK2 (0xffffffffL) | 243 | #define BN_MASK2 (0xffffffffL) |
| 188 | #define BN_MASK2l (0xffff) | 244 | #define BN_MASK2l (0xffff) |
| 189 | #define BN_MASK2h1 (0xffff8000L) | 245 | #define BN_MASK2h1 (0xffff8000L) |
| 190 | #define BN_MASK2h (0xffff0000L) | 246 | #define BN_MASK2h (0xffff0000L) |
| 191 | #define BN_TBIT (0x80000000L) | 247 | #define BN_TBIT (0x80000000L) |
| 192 | #define BN_DEC_CONV (1000000000L) | 248 | #define BN_DEC_CONV (1000000000L) |
| 193 | #define BN_DEC_FMT1 "%lu" | ||
| 194 | #define BN_DEC_FMT2 "%09lu" | ||
| 195 | #define BN_DEC_NUM 9 | ||
| 196 | #endif | ||
| 197 | |||
| 198 | #ifdef SIXTEEN_BIT | ||
| 199 | #ifndef BN_DIV2W | ||
| 200 | #define BN_DIV2W | ||
| 201 | #endif | ||
| 202 | #define BN_ULLONG unsigned long | ||
| 203 | #define BN_ULONG unsigned short | ||
| 204 | #define BN_LONG short | ||
| 205 | #define BN_BITS 32 | ||
| 206 | #define BN_BYTES 2 | ||
| 207 | #define BN_BITS2 16 | ||
| 208 | #define BN_BITS4 8 | ||
| 209 | #define BN_MASK (0xffffffff) | ||
| 210 | #define BN_MASK2 (0xffff) | ||
| 211 | #define BN_MASK2l (0xff) | ||
| 212 | #define BN_MASK2h1 (0xff80) | ||
| 213 | #define BN_MASK2h (0xff00) | ||
| 214 | #define BN_TBIT (0x8000) | ||
| 215 | #define BN_DEC_CONV (100000) | ||
| 216 | #define BN_DEC_FMT1 "%u" | 249 | #define BN_DEC_FMT1 "%u" |
| 217 | #define BN_DEC_FMT2 "%05u" | 250 | #define BN_DEC_FMT2 "%09u" |
| 218 | #define BN_DEC_NUM 5 | 251 | #define BN_DEC_NUM 9 |
| 219 | #endif | 252 | #define BN_HEX_FMT1 "%X" |
| 220 | 253 | #define BN_HEX_FMT2 "%08X" | |
| 221 | #ifdef EIGHT_BIT | ||
| 222 | #ifndef BN_DIV2W | ||
| 223 | #define BN_DIV2W | ||
| 224 | #endif | ||
| 225 | #define BN_ULLONG unsigned short | ||
| 226 | #define BN_ULONG unsigned char | ||
| 227 | #define BN_LONG char | ||
| 228 | #define BN_BITS 16 | ||
| 229 | #define BN_BYTES 1 | ||
| 230 | #define BN_BITS2 8 | ||
| 231 | #define BN_BITS4 4 | ||
| 232 | #define BN_MASK (0xffff) | ||
| 233 | #define BN_MASK2 (0xff) | ||
| 234 | #define BN_MASK2l (0xf) | ||
| 235 | #define BN_MASK2h1 (0xf8) | ||
| 236 | #define BN_MASK2h (0xf0) | ||
| 237 | #define BN_TBIT (0x80) | ||
| 238 | #define BN_DEC_CONV (100) | ||
| 239 | #define BN_DEC_FMT1 "%u" | ||
| 240 | #define BN_DEC_FMT2 "%02u" | ||
| 241 | #define BN_DEC_NUM 2 | ||
| 242 | #endif | 254 | #endif |
| 243 | 255 | ||
| 244 | #define BN_DEFAULT_BITS 1280 | 256 | #define BN_DEFAULT_BITS 1280 |
| @@ -303,12 +315,8 @@ struct bn_mont_ctx_st | |||
| 303 | BIGNUM N; /* The modulus */ | 315 | BIGNUM N; /* The modulus */ |
| 304 | BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 | 316 | BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 |
| 305 | * (Ni is only stored for bignum algorithm) */ | 317 | * (Ni is only stored for bignum algorithm) */ |
| 306 | #if 0 | 318 | BN_ULONG n0[2];/* least significant word(s) of Ni; |
| 307 | /* OpenSSL 0.9.9 preview: */ | 319 | (type changed with 0.9.9, was "BN_ULONG n0;" before) */ |
| 308 | BN_ULONG n0[2];/* least significant word(s) of Ni */ | ||
| 309 | #else | ||
| 310 | BN_ULONG n0; /* least significant word of Ni */ | ||
| 311 | #endif | ||
| 312 | int flags; | 320 | int flags; |
| 313 | }; | 321 | }; |
| 314 | 322 | ||
| @@ -504,6 +512,7 @@ char * BN_bn2hex(const BIGNUM *a); | |||
| 504 | char * BN_bn2dec(const BIGNUM *a); | 512 | char * BN_bn2dec(const BIGNUM *a); |
| 505 | int BN_hex2bn(BIGNUM **a, const char *str); | 513 | int BN_hex2bn(BIGNUM **a, const char *str); |
| 506 | int BN_dec2bn(BIGNUM **a, const char *str); | 514 | int BN_dec2bn(BIGNUM **a, const char *str); |
| 515 | int BN_asc2bn(BIGNUM **a, const char *str); | ||
| 507 | int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); | 516 | int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); |
| 508 | int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */ | 517 | int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */ |
| 509 | BIGNUM *BN_mod_inverse(BIGNUM *ret, | 518 | BIGNUM *BN_mod_inverse(BIGNUM *ret, |
| @@ -531,17 +540,6 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb); | |||
| 531 | int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, | 540 | int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, |
| 532 | int do_trial_division, BN_GENCB *cb); | 541 | int do_trial_division, BN_GENCB *cb); |
| 533 | 542 | ||
| 534 | int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); | ||
| 535 | |||
| 536 | int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, | ||
| 537 | const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, | ||
| 538 | const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb); | ||
| 539 | int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, | ||
| 540 | BIGNUM *Xp1, BIGNUM *Xp2, | ||
| 541 | const BIGNUM *Xp, | ||
| 542 | const BIGNUM *e, BN_CTX *ctx, | ||
| 543 | BN_GENCB *cb); | ||
| 544 | |||
| 545 | BN_MONT_CTX *BN_MONT_CTX_new(void ); | 543 | BN_MONT_CTX *BN_MONT_CTX_new(void ); |
| 546 | void BN_MONT_CTX_init(BN_MONT_CTX *ctx); | 544 | void BN_MONT_CTX_init(BN_MONT_CTX *ctx); |
| 547 | int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, | 545 | int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, |
| @@ -560,19 +558,22 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock, | |||
| 560 | #define BN_BLINDING_NO_UPDATE 0x00000001 | 558 | #define BN_BLINDING_NO_UPDATE 0x00000001 |
| 561 | #define BN_BLINDING_NO_RECREATE 0x00000002 | 559 | #define BN_BLINDING_NO_RECREATE 0x00000002 |
| 562 | 560 | ||
| 563 | BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod); | 561 | BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod); |
| 564 | void BN_BLINDING_free(BN_BLINDING *b); | 562 | void BN_BLINDING_free(BN_BLINDING *b); |
| 565 | int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx); | 563 | int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx); |
| 566 | int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); | 564 | int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); |
| 567 | int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); | 565 | int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx); |
| 568 | int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *); | 566 | int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *); |
| 569 | int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *); | 567 | int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *); |
| 568 | #ifndef OPENSSL_NO_DEPRECATED | ||
| 570 | unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *); | 569 | unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *); |
| 571 | void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); | 570 | void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); |
| 571 | #endif | ||
| 572 | CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *); | ||
| 572 | unsigned long BN_BLINDING_get_flags(const BN_BLINDING *); | 573 | unsigned long BN_BLINDING_get_flags(const BN_BLINDING *); |
| 573 | void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long); | 574 | void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long); |
| 574 | BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, | 575 | BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, |
| 575 | const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, | 576 | const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, |
| 576 | int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, | 577 | int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
| 577 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), | 578 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), |
| 578 | BN_MONT_CTX *m_ctx); | 579 | BN_MONT_CTX *m_ctx); |
| @@ -625,24 +626,24 @@ int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, | |||
| 625 | * t^p[0] + t^p[1] + ... + t^p[k] | 626 | * t^p[0] + t^p[1] + ... + t^p[k] |
| 626 | * where m = p[0] > p[1] > ... > p[k] = 0. | 627 | * where m = p[0] > p[1] > ... > p[k] = 0. |
| 627 | */ | 628 | */ |
| 628 | int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]); | 629 | int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]); |
| 629 | /* r = a mod p */ | 630 | /* r = a mod p */ |
| 630 | int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | 631 | int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, |
| 631 | const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */ | 632 | const int p[], BN_CTX *ctx); /* r = (a * b) mod p */ |
| 632 | int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], | 633 | int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], |
| 633 | BN_CTX *ctx); /* r = (a * a) mod p */ | 634 | BN_CTX *ctx); /* r = (a * a) mod p */ |
| 634 | int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[], | 635 | int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[], |
| 635 | BN_CTX *ctx); /* r = (1 / b) mod p */ | 636 | BN_CTX *ctx); /* r = (1 / b) mod p */ |
| 636 | int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | 637 | int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, |
| 637 | const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */ | 638 | const int p[], BN_CTX *ctx); /* r = (a / b) mod p */ |
| 638 | int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | 639 | int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, |
| 639 | const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ | 640 | const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ |
| 640 | int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, | 641 | int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, |
| 641 | const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ | 642 | const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ |
| 642 | int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, | 643 | int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, |
| 643 | const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ | 644 | const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ |
| 644 | int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max); | 645 | int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); |
| 645 | int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a); | 646 | int BN_GF2m_arr2poly(const int p[], BIGNUM *a); |
| 646 | 647 | ||
| 647 | /* faster mod functions for the 'NIST primes' | 648 | /* faster mod functions for the 'NIST primes' |
| 648 | * 0 <= a < p^2 */ | 649 | * 0 <= a < p^2 */ |
| @@ -751,10 +752,12 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); | |||
| 751 | #define bn_correct_top(a) \ | 752 | #define bn_correct_top(a) \ |
| 752 | { \ | 753 | { \ |
| 753 | BN_ULONG *ftl; \ | 754 | BN_ULONG *ftl; \ |
| 754 | if ((a)->top > 0) \ | 755 | int tmp_top = (a)->top; \ |
| 756 | if (tmp_top > 0) \ | ||
| 755 | { \ | 757 | { \ |
| 756 | for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ | 758 | for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \ |
| 757 | if (*(ftl--)) break; \ | 759 | if (*(ftl--)) break; \ |
| 760 | (a)->top = tmp_top; \ | ||
| 758 | } \ | 761 | } \ |
| 759 | bn_pollute(a); \ | 762 | bn_pollute(a); \ |
| 760 | } | 763 | } |
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 99bc2de491..c43c91cc09 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
| @@ -75,6 +75,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | |||
| 75 | assert(num >= 0); | 75 | assert(num >= 0); |
| 76 | if (num <= 0) return(c1); | 76 | if (num <= 0) return(c1); |
| 77 | 77 | ||
| 78 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 78 | while (num&~3) | 79 | while (num&~3) |
| 79 | { | 80 | { |
| 80 | mul_add(rp[0],ap[0],w,c1); | 81 | mul_add(rp[0],ap[0],w,c1); |
| @@ -83,11 +84,11 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | |||
| 83 | mul_add(rp[3],ap[3],w,c1); | 84 | mul_add(rp[3],ap[3],w,c1); |
| 84 | ap+=4; rp+=4; num-=4; | 85 | ap+=4; rp+=4; num-=4; |
| 85 | } | 86 | } |
| 86 | if (num) | 87 | #endif |
| 88 | while (num) | ||
| 87 | { | 89 | { |
| 88 | mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; | 90 | mul_add(rp[0],ap[0],w,c1); |
| 89 | mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; | 91 | ap++; rp++; num--; |
| 90 | mul_add(rp[2],ap[2],w,c1); return c1; | ||
| 91 | } | 92 | } |
| 92 | 93 | ||
| 93 | return(c1); | 94 | return(c1); |
| @@ -100,6 +101,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | |||
| 100 | assert(num >= 0); | 101 | assert(num >= 0); |
| 101 | if (num <= 0) return(c1); | 102 | if (num <= 0) return(c1); |
| 102 | 103 | ||
| 104 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 103 | while (num&~3) | 105 | while (num&~3) |
| 104 | { | 106 | { |
| 105 | mul(rp[0],ap[0],w,c1); | 107 | mul(rp[0],ap[0],w,c1); |
| @@ -108,11 +110,11 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | |||
| 108 | mul(rp[3],ap[3],w,c1); | 110 | mul(rp[3],ap[3],w,c1); |
| 109 | ap+=4; rp+=4; num-=4; | 111 | ap+=4; rp+=4; num-=4; |
| 110 | } | 112 | } |
| 111 | if (num) | 113 | #endif |
| 114 | while (num) | ||
| 112 | { | 115 | { |
| 113 | mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; | 116 | mul(rp[0],ap[0],w,c1); |
| 114 | mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; | 117 | ap++; rp++; num--; |
| 115 | mul(rp[2],ap[2],w,c1); | ||
| 116 | } | 118 | } |
| 117 | return(c1); | 119 | return(c1); |
| 118 | } | 120 | } |
| @@ -121,6 +123,8 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) | |||
| 121 | { | 123 | { |
| 122 | assert(n >= 0); | 124 | assert(n >= 0); |
| 123 | if (n <= 0) return; | 125 | if (n <= 0) return; |
| 126 | |||
| 127 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 124 | while (n&~3) | 128 | while (n&~3) |
| 125 | { | 129 | { |
| 126 | sqr(r[0],r[1],a[0]); | 130 | sqr(r[0],r[1],a[0]); |
| @@ -129,11 +133,11 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) | |||
| 129 | sqr(r[6],r[7],a[3]); | 133 | sqr(r[6],r[7],a[3]); |
| 130 | a+=4; r+=8; n-=4; | 134 | a+=4; r+=8; n-=4; |
| 131 | } | 135 | } |
| 132 | if (n) | 136 | #endif |
| 137 | while (n) | ||
| 133 | { | 138 | { |
| 134 | sqr(r[0],r[1],a[0]); if (--n == 0) return; | 139 | sqr(r[0],r[1],a[0]); |
| 135 | sqr(r[2],r[3],a[1]); if (--n == 0) return; | 140 | a++; r+=2; n--; |
| 136 | sqr(r[4],r[5],a[2]); | ||
| 137 | } | 141 | } |
| 138 | } | 142 | } |
| 139 | 143 | ||
| @@ -150,18 +154,20 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | |||
| 150 | bl=LBITS(w); | 154 | bl=LBITS(w); |
| 151 | bh=HBITS(w); | 155 | bh=HBITS(w); |
| 152 | 156 | ||
| 153 | for (;;) | 157 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 158 | while (num&~3) | ||
| 154 | { | 159 | { |
| 155 | mul_add(rp[0],ap[0],bl,bh,c); | 160 | mul_add(rp[0],ap[0],bl,bh,c); |
| 156 | if (--num == 0) break; | ||
| 157 | mul_add(rp[1],ap[1],bl,bh,c); | 161 | mul_add(rp[1],ap[1],bl,bh,c); |
| 158 | if (--num == 0) break; | ||
| 159 | mul_add(rp[2],ap[2],bl,bh,c); | 162 | mul_add(rp[2],ap[2],bl,bh,c); |
| 160 | if (--num == 0) break; | ||
| 161 | mul_add(rp[3],ap[3],bl,bh,c); | 163 | mul_add(rp[3],ap[3],bl,bh,c); |
| 162 | if (--num == 0) break; | 164 | ap+=4; rp+=4; num-=4; |
| 163 | ap+=4; | 165 | } |
| 164 | rp+=4; | 166 | #endif |
| 167 | while (num) | ||
| 168 | { | ||
| 169 | mul_add(rp[0],ap[0],bl,bh,c); | ||
| 170 | ap++; rp++; num--; | ||
| 165 | } | 171 | } |
| 166 | return(c); | 172 | return(c); |
| 167 | } | 173 | } |
| @@ -177,18 +183,20 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | |||
| 177 | bl=LBITS(w); | 183 | bl=LBITS(w); |
| 178 | bh=HBITS(w); | 184 | bh=HBITS(w); |
| 179 | 185 | ||
| 180 | for (;;) | 186 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 187 | while (num&~3) | ||
| 181 | { | 188 | { |
| 182 | mul(rp[0],ap[0],bl,bh,carry); | 189 | mul(rp[0],ap[0],bl,bh,carry); |
| 183 | if (--num == 0) break; | ||
| 184 | mul(rp[1],ap[1],bl,bh,carry); | 190 | mul(rp[1],ap[1],bl,bh,carry); |
| 185 | if (--num == 0) break; | ||
| 186 | mul(rp[2],ap[2],bl,bh,carry); | 191 | mul(rp[2],ap[2],bl,bh,carry); |
| 187 | if (--num == 0) break; | ||
| 188 | mul(rp[3],ap[3],bl,bh,carry); | 192 | mul(rp[3],ap[3],bl,bh,carry); |
| 189 | if (--num == 0) break; | 193 | ap+=4; rp+=4; num-=4; |
| 190 | ap+=4; | 194 | } |
| 191 | rp+=4; | 195 | #endif |
| 196 | while (num) | ||
| 197 | { | ||
| 198 | mul(rp[0],ap[0],bl,bh,carry); | ||
| 199 | ap++; rp++; num--; | ||
| 192 | } | 200 | } |
| 193 | return(carry); | 201 | return(carry); |
| 194 | } | 202 | } |
| @@ -197,22 +205,21 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) | |||
| 197 | { | 205 | { |
| 198 | assert(n >= 0); | 206 | assert(n >= 0); |
| 199 | if (n <= 0) return; | 207 | if (n <= 0) return; |
| 200 | for (;;) | 208 | |
| 209 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
| 210 | while (n&~3) | ||
| 201 | { | 211 | { |
| 202 | sqr64(r[0],r[1],a[0]); | 212 | sqr64(r[0],r[1],a[0]); |
| 203 | if (--n == 0) break; | ||
| 204 | |||
| 205 | sqr64(r[2],r[3],a[1]); | 213 | sqr64(r[2],r[3],a[1]); |
| 206 | if (--n == 0) break; | ||
| 207 | |||
| 208 | sqr64(r[4],r[5],a[2]); | 214 | sqr64(r[4],r[5],a[2]); |
| 209 | if (--n == 0) break; | ||
| 210 | |||
| 211 | sqr64(r[6],r[7],a[3]); | 215 | sqr64(r[6],r[7],a[3]); |
| 212 | if (--n == 0) break; | 216 | a+=4; r+=8; n-=4; |
| 213 | 217 | } | |
| 214 | a+=4; | 218 | #endif |
| 215 | r+=8; | 219 | while (n) |
| 220 | { | ||
| 221 | sqr64(r[0],r[1],a[0]); | ||
| 222 | a++; r+=2; n--; | ||
| 216 | } | 223 | } |
| 217 | } | 224 | } |
| 218 | 225 | ||
| @@ -303,31 +310,30 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 303 | assert(n >= 0); | 310 | assert(n >= 0); |
| 304 | if (n <= 0) return((BN_ULONG)0); | 311 | if (n <= 0) return((BN_ULONG)0); |
| 305 | 312 | ||
| 306 | for (;;) | 313 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 314 | while (n&~3) | ||
| 307 | { | 315 | { |
| 308 | ll+=(BN_ULLONG)a[0]+b[0]; | 316 | ll+=(BN_ULLONG)a[0]+b[0]; |
| 309 | r[0]=(BN_ULONG)ll&BN_MASK2; | 317 | r[0]=(BN_ULONG)ll&BN_MASK2; |
| 310 | ll>>=BN_BITS2; | 318 | ll>>=BN_BITS2; |
| 311 | if (--n <= 0) break; | ||
| 312 | |||
| 313 | ll+=(BN_ULLONG)a[1]+b[1]; | 319 | ll+=(BN_ULLONG)a[1]+b[1]; |
| 314 | r[1]=(BN_ULONG)ll&BN_MASK2; | 320 | r[1]=(BN_ULONG)ll&BN_MASK2; |
| 315 | ll>>=BN_BITS2; | 321 | ll>>=BN_BITS2; |
| 316 | if (--n <= 0) break; | ||
| 317 | |||
| 318 | ll+=(BN_ULLONG)a[2]+b[2]; | 322 | ll+=(BN_ULLONG)a[2]+b[2]; |
| 319 | r[2]=(BN_ULONG)ll&BN_MASK2; | 323 | r[2]=(BN_ULONG)ll&BN_MASK2; |
| 320 | ll>>=BN_BITS2; | 324 | ll>>=BN_BITS2; |
| 321 | if (--n <= 0) break; | ||
| 322 | |||
| 323 | ll+=(BN_ULLONG)a[3]+b[3]; | 325 | ll+=(BN_ULLONG)a[3]+b[3]; |
| 324 | r[3]=(BN_ULONG)ll&BN_MASK2; | 326 | r[3]=(BN_ULONG)ll&BN_MASK2; |
| 325 | ll>>=BN_BITS2; | 327 | ll>>=BN_BITS2; |
| 326 | if (--n <= 0) break; | 328 | a+=4; b+=4; r+=4; n-=4; |
| 327 | 329 | } | |
| 328 | a+=4; | 330 | #endif |
| 329 | b+=4; | 331 | while (n) |
| 330 | r+=4; | 332 | { |
| 333 | ll+=(BN_ULLONG)a[0]+b[0]; | ||
| 334 | r[0]=(BN_ULONG)ll&BN_MASK2; | ||
| 335 | ll>>=BN_BITS2; | ||
| 336 | a++; b++; r++; n--; | ||
| 331 | } | 337 | } |
| 332 | return((BN_ULONG)ll); | 338 | return((BN_ULONG)ll); |
| 333 | } | 339 | } |
| @@ -340,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 340 | if (n <= 0) return((BN_ULONG)0); | 346 | if (n <= 0) return((BN_ULONG)0); |
| 341 | 347 | ||
| 342 | c=0; | 348 | c=0; |
| 343 | for (;;) | 349 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 350 | while (n&~3) | ||
| 344 | { | 351 | { |
| 345 | t=a[0]; | 352 | t=a[0]; |
| 346 | t=(t+c)&BN_MASK2; | 353 | t=(t+c)&BN_MASK2; |
| @@ -348,35 +355,36 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 348 | l=(t+b[0])&BN_MASK2; | 355 | l=(t+b[0])&BN_MASK2; |
| 349 | c+=(l < t); | 356 | c+=(l < t); |
| 350 | r[0]=l; | 357 | r[0]=l; |
| 351 | if (--n <= 0) break; | ||
| 352 | |||
| 353 | t=a[1]; | 358 | t=a[1]; |
| 354 | t=(t+c)&BN_MASK2; | 359 | t=(t+c)&BN_MASK2; |
| 355 | c=(t < c); | 360 | c=(t < c); |
| 356 | l=(t+b[1])&BN_MASK2; | 361 | l=(t+b[1])&BN_MASK2; |
| 357 | c+=(l < t); | 362 | c+=(l < t); |
| 358 | r[1]=l; | 363 | r[1]=l; |
| 359 | if (--n <= 0) break; | ||
| 360 | |||
| 361 | t=a[2]; | 364 | t=a[2]; |
| 362 | t=(t+c)&BN_MASK2; | 365 | t=(t+c)&BN_MASK2; |
| 363 | c=(t < c); | 366 | c=(t < c); |
| 364 | l=(t+b[2])&BN_MASK2; | 367 | l=(t+b[2])&BN_MASK2; |
| 365 | c+=(l < t); | 368 | c+=(l < t); |
| 366 | r[2]=l; | 369 | r[2]=l; |
| 367 | if (--n <= 0) break; | ||
| 368 | |||
| 369 | t=a[3]; | 370 | t=a[3]; |
| 370 | t=(t+c)&BN_MASK2; | 371 | t=(t+c)&BN_MASK2; |
| 371 | c=(t < c); | 372 | c=(t < c); |
| 372 | l=(t+b[3])&BN_MASK2; | 373 | l=(t+b[3])&BN_MASK2; |
| 373 | c+=(l < t); | 374 | c+=(l < t); |
| 374 | r[3]=l; | 375 | r[3]=l; |
| 375 | if (--n <= 0) break; | 376 | a+=4; b+=4; r+=4; n-=4; |
| 376 | 377 | } | |
| 377 | a+=4; | 378 | #endif |
| 378 | b+=4; | 379 | while(n) |
| 379 | r+=4; | 380 | { |
| 381 | t=a[0]; | ||
| 382 | t=(t+c)&BN_MASK2; | ||
| 383 | c=(t < c); | ||
| 384 | l=(t+b[0])&BN_MASK2; | ||
| 385 | c+=(l < t); | ||
| 386 | r[0]=l; | ||
| 387 | a++; b++; r++; n--; | ||
| 380 | } | 388 | } |
| 381 | return((BN_ULONG)c); | 389 | return((BN_ULONG)c); |
| 382 | } | 390 | } |
| @@ -390,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 390 | assert(n >= 0); | 398 | assert(n >= 0); |
| 391 | if (n <= 0) return((BN_ULONG)0); | 399 | if (n <= 0) return((BN_ULONG)0); |
| 392 | 400 | ||
| 393 | for (;;) | 401 | #ifndef OPENSSL_SMALL_FOOTPRINT |
| 402 | while (n&~3) | ||
| 394 | { | 403 | { |
| 395 | t1=a[0]; t2=b[0]; | 404 | t1=a[0]; t2=b[0]; |
| 396 | r[0]=(t1-t2-c)&BN_MASK2; | 405 | r[0]=(t1-t2-c)&BN_MASK2; |
| 397 | if (t1 != t2) c=(t1 < t2); | 406 | if (t1 != t2) c=(t1 < t2); |
| 398 | if (--n <= 0) break; | ||
| 399 | |||
| 400 | t1=a[1]; t2=b[1]; | 407 | t1=a[1]; t2=b[1]; |
| 401 | r[1]=(t1-t2-c)&BN_MASK2; | 408 | r[1]=(t1-t2-c)&BN_MASK2; |
| 402 | if (t1 != t2) c=(t1 < t2); | 409 | if (t1 != t2) c=(t1 < t2); |
| 403 | if (--n <= 0) break; | ||
| 404 | |||
| 405 | t1=a[2]; t2=b[2]; | 410 | t1=a[2]; t2=b[2]; |
| 406 | r[2]=(t1-t2-c)&BN_MASK2; | 411 | r[2]=(t1-t2-c)&BN_MASK2; |
| 407 | if (t1 != t2) c=(t1 < t2); | 412 | if (t1 != t2) c=(t1 < t2); |
| 408 | if (--n <= 0) break; | ||
| 409 | |||
| 410 | t1=a[3]; t2=b[3]; | 413 | t1=a[3]; t2=b[3]; |
| 411 | r[3]=(t1-t2-c)&BN_MASK2; | 414 | r[3]=(t1-t2-c)&BN_MASK2; |
| 412 | if (t1 != t2) c=(t1 < t2); | 415 | if (t1 != t2) c=(t1 < t2); |
| 413 | if (--n <= 0) break; | 416 | a+=4; b+=4; r+=4; n-=4; |
| 414 | 417 | } | |
| 415 | a+=4; | 418 | #endif |
| 416 | b+=4; | 419 | while (n) |
| 417 | r+=4; | 420 | { |
| 421 | t1=a[0]; t2=b[0]; | ||
| 422 | r[0]=(t1-t2-c)&BN_MASK2; | ||
| 423 | if (t1 != t2) c=(t1 < t2); | ||
| 424 | a++; b++; r++; n--; | ||
| 418 | } | 425 | } |
| 419 | return(c); | 426 | return(c); |
| 420 | } | 427 | } |
| 421 | 428 | ||
| 422 | #ifdef BN_MUL_COMBA | 429 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) |
| 423 | 430 | ||
| 424 | #undef bn_mul_comba8 | 431 | #undef bn_mul_comba8 |
| 425 | #undef bn_mul_comba4 | 432 | #undef bn_mul_comba4 |
| @@ -820,18 +827,134 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | |||
| 820 | r[6]=c1; | 827 | r[6]=c1; |
| 821 | r[7]=c2; | 828 | r[7]=c2; |
| 822 | } | 829 | } |
| 830 | |||
| 831 | #ifdef OPENSSL_NO_ASM | ||
| 832 | #ifdef OPENSSL_BN_ASM_MONT | ||
| 833 | #include <alloca.h> | ||
| 834 | /* | ||
| 835 | * This is essentially reference implementation, which may or may not | ||
| 836 | * result in performance improvement. E.g. on IA-32 this routine was | ||
| 837 | * observed to give 40% faster rsa1024 private key operations and 10% | ||
| 838 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only | ||
| 839 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a | ||
| 840 | * reference implementation, one to be used as starting point for | ||
| 841 | * platform-specific assembler. Mentioned numbers apply to compiler | ||
| 842 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and | ||
| 843 | * can vary not only from platform to platform, but even for compiler | ||
| 844 | * versions. Assembler vs. assembler improvement coefficients can | ||
| 845 | * [and are known to] differ and are to be documented elsewhere. | ||
| 846 | */ | ||
| 847 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) | ||
| 848 | { | ||
| 849 | BN_ULONG c0,c1,ml,*tp,n0; | ||
| 850 | #ifdef mul64 | ||
| 851 | BN_ULONG mh; | ||
| 852 | #endif | ||
| 853 | volatile BN_ULONG *vp; | ||
| 854 | int i=0,j; | ||
| 855 | |||
| 856 | #if 0 /* template for platform-specific implementation */ | ||
| 857 | if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num); | ||
| 858 | #endif | ||
| 859 | vp = tp = alloca((num+2)*sizeof(BN_ULONG)); | ||
| 860 | |||
| 861 | n0 = *n0p; | ||
| 862 | |||
| 863 | c0 = 0; | ||
| 864 | ml = bp[0]; | ||
| 865 | #ifdef mul64 | ||
| 866 | mh = HBITS(ml); | ||
| 867 | ml = LBITS(ml); | ||
| 868 | for (j=0;j<num;++j) | ||
| 869 | mul(tp[j],ap[j],ml,mh,c0); | ||
| 870 | #else | ||
| 871 | for (j=0;j<num;++j) | ||
| 872 | mul(tp[j],ap[j],ml,c0); | ||
| 873 | #endif | ||
| 874 | |||
| 875 | tp[num] = c0; | ||
| 876 | tp[num+1] = 0; | ||
| 877 | goto enter; | ||
| 878 | |||
| 879 | for(i=0;i<num;i++) | ||
| 880 | { | ||
| 881 | c0 = 0; | ||
| 882 | ml = bp[i]; | ||
| 883 | #ifdef mul64 | ||
| 884 | mh = HBITS(ml); | ||
| 885 | ml = LBITS(ml); | ||
| 886 | for (j=0;j<num;++j) | ||
| 887 | mul_add(tp[j],ap[j],ml,mh,c0); | ||
| 888 | #else | ||
| 889 | for (j=0;j<num;++j) | ||
| 890 | mul_add(tp[j],ap[j],ml,c0); | ||
| 891 | #endif | ||
| 892 | c1 = (tp[num] + c0)&BN_MASK2; | ||
| 893 | tp[num] = c1; | ||
| 894 | tp[num+1] = (c1<c0?1:0); | ||
| 895 | enter: | ||
| 896 | c1 = tp[0]; | ||
| 897 | ml = (c1*n0)&BN_MASK2; | ||
| 898 | c0 = 0; | ||
| 899 | #ifdef mul64 | ||
| 900 | mh = HBITS(ml); | ||
| 901 | ml = LBITS(ml); | ||
| 902 | mul_add(c1,np[0],ml,mh,c0); | ||
| 903 | #else | ||
| 904 | mul_add(c1,ml,np[0],c0); | ||
| 905 | #endif | ||
| 906 | for(j=1;j<num;j++) | ||
| 907 | { | ||
| 908 | c1 = tp[j]; | ||
| 909 | #ifdef mul64 | ||
| 910 | mul_add(c1,np[j],ml,mh,c0); | ||
| 911 | #else | ||
| 912 | mul_add(c1,ml,np[j],c0); | ||
| 913 | #endif | ||
| 914 | tp[j-1] = c1&BN_MASK2; | ||
| 915 | } | ||
| 916 | c1 = (tp[num] + c0)&BN_MASK2; | ||
| 917 | tp[num-1] = c1; | ||
| 918 | tp[num] = tp[num+1] + (c1<c0?1:0); | ||
| 919 | } | ||
| 920 | |||
| 921 | if (tp[num]!=0 || tp[num-1]>=np[num-1]) | ||
| 922 | { | ||
| 923 | c0 = bn_sub_words(rp,tp,np,num); | ||
| 924 | if (tp[num]!=0 || c0==0) | ||
| 925 | { | ||
| 926 | for(i=0;i<num+2;i++) vp[i] = 0; | ||
| 927 | return 1; | ||
| 928 | } | ||
| 929 | } | ||
| 930 | for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; | ||
| 931 | vp[num] = 0; | ||
| 932 | vp[num+1] = 0; | ||
| 933 | return 1; | ||
| 934 | } | ||
| 935 | #else | ||
| 936 | /* | ||
| 937 | * Return value of 0 indicates that multiplication/convolution was not | ||
| 938 | * performed to signal the caller to fall down to alternative/original | ||
| 939 | * code-path. | ||
| 940 | */ | ||
| 941 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) | ||
| 942 | { return 0; } | ||
| 943 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
| 944 | #endif | ||
| 945 | |||
| 823 | #else /* !BN_MUL_COMBA */ | 946 | #else /* !BN_MUL_COMBA */ |
| 824 | 947 | ||
| 825 | /* hmm... is it faster just to do a multiply? */ | 948 | /* hmm... is it faster just to do a multiply? */ |
| 826 | #undef bn_sqr_comba4 | 949 | #undef bn_sqr_comba4 |
| 827 | void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | 950 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 828 | { | 951 | { |
| 829 | BN_ULONG t[8]; | 952 | BN_ULONG t[8]; |
| 830 | bn_sqr_normal(r,a,4,t); | 953 | bn_sqr_normal(r,a,4,t); |
| 831 | } | 954 | } |
| 832 | 955 | ||
| 833 | #undef bn_sqr_comba8 | 956 | #undef bn_sqr_comba8 |
| 834 | void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | 957 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 835 | { | 958 | { |
| 836 | BN_ULONG t[16]; | 959 | BN_ULONG t[16]; |
| 837 | bn_sqr_normal(r,a,8,t); | 960 | bn_sqr_normal(r,a,8,t); |
| @@ -857,4 +980,51 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 857 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); | 980 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); |
| 858 | } | 981 | } |
| 859 | 982 | ||
| 983 | #ifdef OPENSSL_NO_ASM | ||
| 984 | #ifdef OPENSSL_BN_ASM_MONT | ||
| 985 | #include <alloca.h> | ||
| 986 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) | ||
| 987 | { | ||
| 988 | BN_ULONG c0,c1,*tp,n0=*n0p; | ||
| 989 | volatile BN_ULONG *vp; | ||
| 990 | int i=0,j; | ||
| 991 | |||
| 992 | vp = tp = alloca((num+2)*sizeof(BN_ULONG)); | ||
| 993 | |||
| 994 | for(i=0;i<=num;i++) tp[i]=0; | ||
| 995 | |||
| 996 | for(i=0;i<num;i++) | ||
| 997 | { | ||
| 998 | c0 = bn_mul_add_words(tp,ap,num,bp[i]); | ||
| 999 | c1 = (tp[num] + c0)&BN_MASK2; | ||
| 1000 | tp[num] = c1; | ||
| 1001 | tp[num+1] = (c1<c0?1:0); | ||
| 1002 | |||
| 1003 | c0 = bn_mul_add_words(tp,np,num,tp[0]*n0); | ||
| 1004 | c1 = (tp[num] + c0)&BN_MASK2; | ||
| 1005 | tp[num] = c1; | ||
| 1006 | tp[num+1] += (c1<c0?1:0); | ||
| 1007 | for(j=0;j<=num;j++) tp[j]=tp[j+1]; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | if (tp[num]!=0 || tp[num-1]>=np[num-1]) | ||
| 1011 | { | ||
| 1012 | c0 = bn_sub_words(rp,tp,np,num); | ||
| 1013 | if (tp[num]!=0 || c0==0) | ||
| 1014 | { | ||
| 1015 | for(i=0;i<num+2;i++) vp[i] = 0; | ||
| 1016 | return 1; | ||
| 1017 | } | ||
| 1018 | } | ||
| 1019 | for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; | ||
| 1020 | vp[num] = 0; | ||
| 1021 | vp[num+1] = 0; | ||
| 1022 | return 1; | ||
| 1023 | } | ||
| 1024 | #else | ||
| 1025 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) | ||
| 1026 | { return 0; } | ||
| 1027 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
| 1028 | #endif | ||
| 1029 | |||
| 860 | #endif /* !BN_MUL_COMBA */ | 1030 | #endif /* !BN_MUL_COMBA */ |
diff --git a/src/lib/libcrypto/bn/bn_blind.c b/src/lib/libcrypto/bn/bn_blind.c index c11fb4ccc2..e060592fdc 100644 --- a/src/lib/libcrypto/bn/bn_blind.c +++ b/src/lib/libcrypto/bn/bn_blind.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* crypto/bn/bn_blind.c */ | 1 | /* crypto/bn/bn_blind.c */ |
| 2 | /* ==================================================================== | 2 | /* ==================================================================== |
| 3 | * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. | 3 | * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without | 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions | 6 | * modification, are permitted provided that the following conditions |
| @@ -121,8 +121,11 @@ struct bn_blinding_st | |||
| 121 | BIGNUM *Ai; | 121 | BIGNUM *Ai; |
| 122 | BIGNUM *e; | 122 | BIGNUM *e; |
| 123 | BIGNUM *mod; /* just a reference */ | 123 | BIGNUM *mod; /* just a reference */ |
| 124 | #ifndef OPENSSL_NO_DEPRECATED | ||
| 124 | unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; | 125 | unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; |
| 125 | * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ | 126 | * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ |
| 127 | #endif | ||
| 128 | CRYPTO_THREADID tid; | ||
| 126 | unsigned int counter; | 129 | unsigned int counter; |
| 127 | unsigned long flags; | 130 | unsigned long flags; |
| 128 | BN_MONT_CTX *m_ctx; | 131 | BN_MONT_CTX *m_ctx; |
| @@ -131,7 +134,7 @@ struct bn_blinding_st | |||
| 131 | BN_MONT_CTX *m_ctx); | 134 | BN_MONT_CTX *m_ctx); |
| 132 | }; | 135 | }; |
| 133 | 136 | ||
| 134 | BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod) | 137 | BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) |
| 135 | { | 138 | { |
| 136 | BN_BLINDING *ret=NULL; | 139 | BN_BLINDING *ret=NULL; |
| 137 | 140 | ||
| @@ -158,6 +161,7 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGN | |||
| 158 | BN_set_flags(ret->mod, BN_FLG_CONSTTIME); | 161 | BN_set_flags(ret->mod, BN_FLG_CONSTTIME); |
| 159 | 162 | ||
| 160 | ret->counter = BN_BLINDING_COUNTER; | 163 | ret->counter = BN_BLINDING_COUNTER; |
| 164 | CRYPTO_THREADID_current(&ret->tid); | ||
| 161 | return(ret); | 165 | return(ret); |
| 162 | err: | 166 | err: |
| 163 | if (ret != NULL) BN_BLINDING_free(ret); | 167 | if (ret != NULL) BN_BLINDING_free(ret); |
| @@ -263,6 +267,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct | |||
| 263 | return(ret); | 267 | return(ret); |
| 264 | } | 268 | } |
| 265 | 269 | ||
| 270 | #ifndef OPENSSL_NO_DEPRECATED | ||
| 266 | unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b) | 271 | unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b) |
| 267 | { | 272 | { |
| 268 | return b->thread_id; | 273 | return b->thread_id; |
| @@ -272,6 +277,12 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n) | |||
| 272 | { | 277 | { |
| 273 | b->thread_id = n; | 278 | b->thread_id = n; |
| 274 | } | 279 | } |
| 280 | #endif | ||
| 281 | |||
| 282 | CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b) | ||
| 283 | { | ||
| 284 | return &b->tid; | ||
| 285 | } | ||
| 275 | 286 | ||
| 276 | unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b) | 287 | unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b) |
| 277 | { | 288 | { |
| @@ -284,7 +295,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags) | |||
| 284 | } | 295 | } |
| 285 | 296 | ||
| 286 | BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, | 297 | BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, |
| 287 | const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, | 298 | const BIGNUM *e, BIGNUM *m, BN_CTX *ctx, |
| 288 | int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, | 299 | int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
| 289 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), | 300 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx), |
| 290 | BN_MONT_CTX *m_ctx) | 301 | BN_MONT_CTX *m_ctx) |
diff --git a/src/lib/libcrypto/bn/bn_ctx.c b/src/lib/libcrypto/bn/bn_ctx.c index b3452f1a91..3f2256f675 100644 --- a/src/lib/libcrypto/bn/bn_ctx.c +++ b/src/lib/libcrypto/bn/bn_ctx.c | |||
| @@ -161,7 +161,7 @@ static void ctxdbg(BN_CTX *ctx) | |||
| 161 | fprintf(stderr,"(%08x): ", (unsigned int)ctx); | 161 | fprintf(stderr,"(%08x): ", (unsigned int)ctx); |
| 162 | while(bnidx < ctx->used) | 162 | while(bnidx < ctx->used) |
| 163 | { | 163 | { |
| 164 | fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); | 164 | fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); |
| 165 | if(!(bnidx % BN_CTX_POOL_SIZE)) | 165 | if(!(bnidx % BN_CTX_POOL_SIZE)) |
| 166 | item = item->next; | 166 | item = item->next; |
| 167 | } | 167 | } |
| @@ -171,8 +171,8 @@ static void ctxdbg(BN_CTX *ctx) | |||
| 171 | while(fpidx < stack->depth) | 171 | while(fpidx < stack->depth) |
| 172 | { | 172 | { |
| 173 | while(bnidx++ < stack->indexes[fpidx]) | 173 | while(bnidx++ < stack->indexes[fpidx]) |
| 174 | fprintf(stderr," "); | 174 | fprintf(stderr," "); |
| 175 | fprintf(stderr,"^^ "); | 175 | fprintf(stderr,"^^^ "); |
| 176 | bnidx++; | 176 | bnidx++; |
| 177 | fpidx++; | 177 | fpidx++; |
| 178 | } | 178 | } |
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c index 1e8e57626b..802a43d642 100644 --- a/src/lib/libcrypto/bn/bn_div.c +++ b/src/lib/libcrypto/bn/bn_div.c | |||
| @@ -102,7 +102,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, | |||
| 102 | /* The next 2 are needed so we can do a dv->d[0]|=1 later | 102 | /* The next 2 are needed so we can do a dv->d[0]|=1 later |
| 103 | * since BN_lshift1 will only work once there is a value :-) */ | 103 | * since BN_lshift1 will only work once there is a value :-) */ |
| 104 | BN_zero(dv); | 104 | BN_zero(dv); |
| 105 | bn_wexpand(dv,1); | 105 | if(bn_wexpand(dv,1) == NULL) goto end; |
| 106 | dv->top=1; | 106 | dv->top=1; |
| 107 | 107 | ||
| 108 | if (!BN_lshift(D,D,nm-nd)) goto end; | 108 | if (!BN_lshift(D,D,nm-nd)) goto end; |
| @@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, | |||
| 229 | if (dv == NULL) | 229 | if (dv == NULL) |
| 230 | res=BN_CTX_get(ctx); | 230 | res=BN_CTX_get(ctx); |
| 231 | else res=dv; | 231 | else res=dv; |
| 232 | if (sdiv == NULL || res == NULL) goto err; | 232 | if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) |
| 233 | goto err; | ||
| 233 | 234 | ||
| 234 | /* First we normalise the numbers */ | 235 | /* First we normalise the numbers */ |
| 235 | norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); | 236 | norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); |
| @@ -336,7 +337,7 @@ X) -> 0x%08X\n", | |||
| 336 | t2 -= d1; | 337 | t2 -= d1; |
| 337 | } | 338 | } |
| 338 | #else /* !BN_LLONG */ | 339 | #else /* !BN_LLONG */ |
| 339 | BN_ULONG t2l,t2h,ql,qh; | 340 | BN_ULONG t2l,t2h; |
| 340 | 341 | ||
| 341 | q=bn_div_words(n0,n1,d0); | 342 | q=bn_div_words(n0,n1,d0); |
| 342 | #ifdef BN_DEBUG_LEVITTE | 343 | #ifdef BN_DEBUG_LEVITTE |
| @@ -354,9 +355,12 @@ X) -> 0x%08X\n", | |||
| 354 | t2l = d1 * q; | 355 | t2l = d1 * q; |
| 355 | t2h = BN_UMULT_HIGH(d1,q); | 356 | t2h = BN_UMULT_HIGH(d1,q); |
| 356 | #else | 357 | #else |
| 358 | { | ||
| 359 | BN_ULONG ql, qh; | ||
| 357 | t2l=LBITS(d1); t2h=HBITS(d1); | 360 | t2l=LBITS(d1); t2h=HBITS(d1); |
| 358 | ql =LBITS(q); qh =HBITS(q); | 361 | ql =LBITS(q); qh =HBITS(q); |
| 359 | mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ | 362 | mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ |
| 363 | } | ||
| 360 | #endif | 364 | #endif |
| 361 | 365 | ||
| 362 | for (;;) | 366 | for (;;) |
| @@ -560,7 +564,7 @@ X) -> 0x%08X\n", | |||
| 560 | t2 -= d1; | 564 | t2 -= d1; |
| 561 | } | 565 | } |
| 562 | #else /* !BN_LLONG */ | 566 | #else /* !BN_LLONG */ |
| 563 | BN_ULONG t2l,t2h,ql,qh; | 567 | BN_ULONG t2l,t2h; |
| 564 | 568 | ||
| 565 | q=bn_div_words(n0,n1,d0); | 569 | q=bn_div_words(n0,n1,d0); |
| 566 | #ifdef BN_DEBUG_LEVITTE | 570 | #ifdef BN_DEBUG_LEVITTE |
| @@ -578,9 +582,12 @@ X) -> 0x%08X\n", | |||
| 578 | t2l = d1 * q; | 582 | t2l = d1 * q; |
| 579 | t2h = BN_UMULT_HIGH(d1,q); | 583 | t2h = BN_UMULT_HIGH(d1,q); |
| 580 | #else | 584 | #else |
| 585 | { | ||
| 586 | BN_ULONG ql, qh; | ||
| 581 | t2l=LBITS(d1); t2h=HBITS(d1); | 587 | t2l=LBITS(d1); t2h=HBITS(d1); |
| 582 | ql =LBITS(q); qh =HBITS(q); | 588 | ql =LBITS(q); qh =HBITS(q); |
| 583 | mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ | 589 | mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ |
| 590 | } | ||
| 584 | #endif | 591 | #endif |
| 585 | 592 | ||
| 586 | for (;;) | 593 | for (;;) |
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index 70a33f0d93..d9b6c737fc 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c | |||
| @@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) | |||
| 134 | rr = BN_CTX_get(ctx); | 134 | rr = BN_CTX_get(ctx); |
| 135 | else | 135 | else |
| 136 | rr = r; | 136 | rr = r; |
| 137 | if ((v = BN_CTX_get(ctx)) == NULL) goto err; | 137 | v = BN_CTX_get(ctx); |
| 138 | if (rr == NULL || v == NULL) goto err; | ||
| 138 | 139 | ||
| 139 | if (BN_copy(v,a) == NULL) goto err; | 140 | if (BN_copy(v,a) == NULL) goto err; |
| 140 | bits=BN_num_bits(p); | 141 | bits=BN_num_bits(p); |
diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c index 306f029f27..527b0fa15b 100644 --- a/src/lib/libcrypto/bn/bn_gf2m.c +++ b/src/lib/libcrypto/bn/bn_gf2m.c | |||
| @@ -121,74 +121,12 @@ static const BN_ULONG SQR_tb[16] = | |||
| 121 | SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ | 121 | SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ |
| 122 | SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] | 122 | SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] |
| 123 | #endif | 123 | #endif |
| 124 | #ifdef SIXTEEN_BIT | ||
| 125 | #define SQR1(w) \ | ||
| 126 | SQR_tb[(w) >> 12 & 0xF] << 8 | SQR_tb[(w) >> 8 & 0xF] | ||
| 127 | #define SQR0(w) \ | ||
| 128 | SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] | ||
| 129 | #endif | ||
| 130 | #ifdef EIGHT_BIT | ||
| 131 | #define SQR1(w) \ | ||
| 132 | SQR_tb[(w) >> 4 & 0xF] | ||
| 133 | #define SQR0(w) \ | ||
| 134 | SQR_tb[(w) & 15] | ||
| 135 | #endif | ||
| 136 | 124 | ||
| 137 | /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, | 125 | /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, |
| 138 | * result is a polynomial r with degree < 2 * BN_BITS - 1 | 126 | * result is a polynomial r with degree < 2 * BN_BITS - 1 |
| 139 | * The caller MUST ensure that the variables have the right amount | 127 | * The caller MUST ensure that the variables have the right amount |
| 140 | * of space allocated. | 128 | * of space allocated. |
| 141 | */ | 129 | */ |
| 142 | #ifdef EIGHT_BIT | ||
| 143 | static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) | ||
| 144 | { | ||
| 145 | register BN_ULONG h, l, s; | ||
| 146 | BN_ULONG tab[4], top1b = a >> 7; | ||
| 147 | register BN_ULONG a1, a2; | ||
| 148 | |||
| 149 | a1 = a & (0x7F); a2 = a1 << 1; | ||
| 150 | |||
| 151 | tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; | ||
| 152 | |||
| 153 | s = tab[b & 0x3]; l = s; | ||
| 154 | s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 6; | ||
| 155 | s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4; | ||
| 156 | s = tab[b >> 6 ]; l ^= s << 6; h ^= s >> 2; | ||
| 157 | |||
| 158 | /* compensate for the top bit of a */ | ||
| 159 | |||
| 160 | if (top1b & 01) { l ^= b << 7; h ^= b >> 1; } | ||
| 161 | |||
| 162 | *r1 = h; *r0 = l; | ||
| 163 | } | ||
| 164 | #endif | ||
| 165 | #ifdef SIXTEEN_BIT | ||
| 166 | static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) | ||
| 167 | { | ||
| 168 | register BN_ULONG h, l, s; | ||
| 169 | BN_ULONG tab[4], top1b = a >> 15; | ||
| 170 | register BN_ULONG a1, a2; | ||
| 171 | |||
| 172 | a1 = a & (0x7FFF); a2 = a1 << 1; | ||
| 173 | |||
| 174 | tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; | ||
| 175 | |||
| 176 | s = tab[b & 0x3]; l = s; | ||
| 177 | s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 14; | ||
| 178 | s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 12; | ||
| 179 | s = tab[b >> 6 & 0x3]; l ^= s << 6; h ^= s >> 10; | ||
| 180 | s = tab[b >> 8 & 0x3]; l ^= s << 8; h ^= s >> 8; | ||
| 181 | s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >> 6; | ||
| 182 | s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >> 4; | ||
| 183 | s = tab[b >>14 ]; l ^= s << 14; h ^= s >> 2; | ||
| 184 | |||
| 185 | /* compensate for the top bit of a */ | ||
| 186 | |||
| 187 | if (top1b & 01) { l ^= b << 15; h ^= b >> 1; } | ||
| 188 | |||
| 189 | *r1 = h; *r0 = l; | ||
| 190 | } | ||
| 191 | #endif | ||
| 192 | #ifdef THIRTY_TWO_BIT | 130 | #ifdef THIRTY_TWO_BIT |
| 193 | static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) | 131 | static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) |
| 194 | { | 132 | { |
| @@ -294,7 +232,8 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) | |||
| 294 | if (a->top < b->top) { at = b; bt = a; } | 232 | if (a->top < b->top) { at = b; bt = a; } |
| 295 | else { at = a; bt = b; } | 233 | else { at = a; bt = b; } |
| 296 | 234 | ||
| 297 | bn_wexpand(r, at->top); | 235 | if(bn_wexpand(r, at->top) == NULL) |
| 236 | return 0; | ||
| 298 | 237 | ||
| 299 | for (i = 0; i < bt->top; i++) | 238 | for (i = 0; i < bt->top; i++) |
| 300 | { | 239 | { |
| @@ -320,7 +259,7 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) | |||
| 320 | 259 | ||
| 321 | 260 | ||
| 322 | /* Performs modular reduction of a and store result in r. r could be a. */ | 261 | /* Performs modular reduction of a and store result in r. r could be a. */ |
| 323 | int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) | 262 | int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) |
| 324 | { | 263 | { |
| 325 | int j, k; | 264 | int j, k; |
| 326 | int n, dN, d0, d1; | 265 | int n, dN, d0, d1; |
| @@ -421,11 +360,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) | |||
| 421 | int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) | 360 | int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) |
| 422 | { | 361 | { |
| 423 | int ret = 0; | 362 | int ret = 0; |
| 424 | const int max = BN_num_bits(p); | 363 | const int max = BN_num_bits(p) + 1; |
| 425 | unsigned int *arr=NULL; | 364 | int *arr=NULL; |
| 426 | bn_check_top(a); | 365 | bn_check_top(a); |
| 427 | bn_check_top(p); | 366 | bn_check_top(p); |
| 428 | if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; | 367 | if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; |
| 429 | ret = BN_GF2m_poly2arr(p, arr, max); | 368 | ret = BN_GF2m_poly2arr(p, arr, max); |
| 430 | if (!ret || ret > max) | 369 | if (!ret || ret > max) |
| 431 | { | 370 | { |
| @@ -443,7 +382,7 @@ err: | |||
| 443 | /* Compute the product of two polynomials a and b, reduce modulo p, and store | 382 | /* Compute the product of two polynomials a and b, reduce modulo p, and store |
| 444 | * the result in r. r could be a or b; a could be b. | 383 | * the result in r. r could be a or b; a could be b. |
| 445 | */ | 384 | */ |
| 446 | int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) | 385 | int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx) |
| 447 | { | 386 | { |
| 448 | int zlen, i, j, k, ret = 0; | 387 | int zlen, i, j, k, ret = 0; |
| 449 | BIGNUM *s; | 388 | BIGNUM *s; |
| @@ -499,12 +438,12 @@ err: | |||
| 499 | int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) | 438 | int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) |
| 500 | { | 439 | { |
| 501 | int ret = 0; | 440 | int ret = 0; |
| 502 | const int max = BN_num_bits(p); | 441 | const int max = BN_num_bits(p) + 1; |
| 503 | unsigned int *arr=NULL; | 442 | int *arr=NULL; |
| 504 | bn_check_top(a); | 443 | bn_check_top(a); |
| 505 | bn_check_top(b); | 444 | bn_check_top(b); |
| 506 | bn_check_top(p); | 445 | bn_check_top(p); |
| 507 | if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; | 446 | if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; |
| 508 | ret = BN_GF2m_poly2arr(p, arr, max); | 447 | ret = BN_GF2m_poly2arr(p, arr, max); |
| 509 | if (!ret || ret > max) | 448 | if (!ret || ret > max) |
| 510 | { | 449 | { |
| @@ -520,7 +459,7 @@ err: | |||
| 520 | 459 | ||
| 521 | 460 | ||
| 522 | /* Square a, reduce the result mod p, and store it in a. r could be a. */ | 461 | /* Square a, reduce the result mod p, and store it in a. r could be a. */ |
| 523 | int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) | 462 | int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) |
| 524 | { | 463 | { |
| 525 | int i, ret = 0; | 464 | int i, ret = 0; |
| 526 | BIGNUM *s; | 465 | BIGNUM *s; |
| @@ -555,12 +494,12 @@ err: | |||
| 555 | int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) | 494 | int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) |
| 556 | { | 495 | { |
| 557 | int ret = 0; | 496 | int ret = 0; |
| 558 | const int max = BN_num_bits(p); | 497 | const int max = BN_num_bits(p) + 1; |
| 559 | unsigned int *arr=NULL; | 498 | int *arr=NULL; |
| 560 | 499 | ||
| 561 | bn_check_top(a); | 500 | bn_check_top(a); |
| 562 | bn_check_top(p); | 501 | bn_check_top(p); |
| 563 | if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; | 502 | if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; |
| 564 | ret = BN_GF2m_poly2arr(p, arr, max); | 503 | ret = BN_GF2m_poly2arr(p, arr, max); |
| 565 | if (!ret || ret > max) | 504 | if (!ret || ret > max) |
| 566 | { | 505 | { |
| @@ -642,7 +581,7 @@ err: | |||
| 642 | * function is only provided for convenience; for best performance, use the | 581 | * function is only provided for convenience; for best performance, use the |
| 643 | * BN_GF2m_mod_inv function. | 582 | * BN_GF2m_mod_inv function. |
| 644 | */ | 583 | */ |
| 645 | int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) | 584 | int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx) |
| 646 | { | 585 | { |
| 647 | BIGNUM *field; | 586 | BIGNUM *field; |
| 648 | int ret = 0; | 587 | int ret = 0; |
| @@ -768,7 +707,7 @@ err: | |||
| 768 | * function is only provided for convenience; for best performance, use the | 707 | * function is only provided for convenience; for best performance, use the |
| 769 | * BN_GF2m_mod_div function. | 708 | * BN_GF2m_mod_div function. |
| 770 | */ | 709 | */ |
| 771 | int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) | 710 | int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx) |
| 772 | { | 711 | { |
| 773 | BIGNUM *field; | 712 | BIGNUM *field; |
| 774 | int ret = 0; | 713 | int ret = 0; |
| @@ -793,7 +732,7 @@ err: | |||
| 793 | * the result in r. r could be a. | 732 | * the result in r. r could be a. |
| 794 | * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363. | 733 | * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363. |
| 795 | */ | 734 | */ |
| 796 | int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) | 735 | int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx) |
| 797 | { | 736 | { |
| 798 | int ret = 0, i, n; | 737 | int ret = 0, i, n; |
| 799 | BIGNUM *u; | 738 | BIGNUM *u; |
| @@ -839,12 +778,12 @@ err: | |||
| 839 | int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) | 778 | int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx) |
| 840 | { | 779 | { |
| 841 | int ret = 0; | 780 | int ret = 0; |
| 842 | const int max = BN_num_bits(p); | 781 | const int max = BN_num_bits(p) + 1; |
| 843 | unsigned int *arr=NULL; | 782 | int *arr=NULL; |
| 844 | bn_check_top(a); | 783 | bn_check_top(a); |
| 845 | bn_check_top(b); | 784 | bn_check_top(b); |
| 846 | bn_check_top(p); | 785 | bn_check_top(p); |
| 847 | if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; | 786 | if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; |
| 848 | ret = BN_GF2m_poly2arr(p, arr, max); | 787 | ret = BN_GF2m_poly2arr(p, arr, max); |
| 849 | if (!ret || ret > max) | 788 | if (!ret || ret > max) |
| 850 | { | 789 | { |
| @@ -862,7 +801,7 @@ err: | |||
| 862 | * the result in r. r could be a. | 801 | * the result in r. r could be a. |
| 863 | * Uses exponentiation as in algorithm A.4.1 from IEEE P1363. | 802 | * Uses exponentiation as in algorithm A.4.1 from IEEE P1363. |
| 864 | */ | 803 | */ |
| 865 | int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) | 804 | int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) |
| 866 | { | 805 | { |
| 867 | int ret = 0; | 806 | int ret = 0; |
| 868 | BIGNUM *u; | 807 | BIGNUM *u; |
| @@ -898,11 +837,11 @@ err: | |||
| 898 | int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) | 837 | int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) |
| 899 | { | 838 | { |
| 900 | int ret = 0; | 839 | int ret = 0; |
| 901 | const int max = BN_num_bits(p); | 840 | const int max = BN_num_bits(p) + 1; |
| 902 | unsigned int *arr=NULL; | 841 | int *arr=NULL; |
| 903 | bn_check_top(a); | 842 | bn_check_top(a); |
| 904 | bn_check_top(p); | 843 | bn_check_top(p); |
| 905 | if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; | 844 | if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; |
| 906 | ret = BN_GF2m_poly2arr(p, arr, max); | 845 | ret = BN_GF2m_poly2arr(p, arr, max); |
| 907 | if (!ret || ret > max) | 846 | if (!ret || ret > max) |
| 908 | { | 847 | { |
| @@ -919,10 +858,9 @@ err: | |||
| 919 | /* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. | 858 | /* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. |
| 920 | * Uses algorithms A.4.7 and A.4.6 from IEEE P1363. | 859 | * Uses algorithms A.4.7 and A.4.6 from IEEE P1363. |
| 921 | */ | 860 | */ |
| 922 | int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx) | 861 | int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx) |
| 923 | { | 862 | { |
| 924 | int ret = 0, count = 0; | 863 | int ret = 0, count = 0, j; |
| 925 | unsigned int j; | ||
| 926 | BIGNUM *a, *z, *rho, *w, *w2, *tmp; | 864 | BIGNUM *a, *z, *rho, *w, *w2, *tmp; |
| 927 | 865 | ||
| 928 | bn_check_top(a_); | 866 | bn_check_top(a_); |
| @@ -1017,11 +955,11 @@ err: | |||
| 1017 | int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) | 955 | int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) |
| 1018 | { | 956 | { |
| 1019 | int ret = 0; | 957 | int ret = 0; |
| 1020 | const int max = BN_num_bits(p); | 958 | const int max = BN_num_bits(p) + 1; |
| 1021 | unsigned int *arr=NULL; | 959 | int *arr=NULL; |
| 1022 | bn_check_top(a); | 960 | bn_check_top(a); |
| 1023 | bn_check_top(p); | 961 | bn_check_top(p); |
| 1024 | if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * | 962 | if ((arr = (int *)OPENSSL_malloc(sizeof(int) * |
| 1025 | max)) == NULL) goto err; | 963 | max)) == NULL) goto err; |
| 1026 | ret = BN_GF2m_poly2arr(p, arr, max); | 964 | ret = BN_GF2m_poly2arr(p, arr, max); |
| 1027 | if (!ret || ret > max) | 965 | if (!ret || ret > max) |
| @@ -1037,20 +975,17 @@ err: | |||
| 1037 | } | 975 | } |
| 1038 | 976 | ||
| 1039 | /* Convert the bit-string representation of a polynomial | 977 | /* Convert the bit-string representation of a polynomial |
| 1040 | * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array | 978 | * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding |
| 1041 | * of integers corresponding to the bits with non-zero coefficient. | 979 | * to the bits with non-zero coefficient. Array is terminated with -1. |
| 1042 | * Up to max elements of the array will be filled. Return value is total | 980 | * Up to max elements of the array will be filled. Return value is total |
| 1043 | * number of coefficients that would be extracted if array was large enough. | 981 | * number of array elements that would be filled if array was large enough. |
| 1044 | */ | 982 | */ |
| 1045 | int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) | 983 | int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max) |
| 1046 | { | 984 | { |
| 1047 | int i, j, k = 0; | 985 | int i, j, k = 0; |
| 1048 | BN_ULONG mask; | 986 | BN_ULONG mask; |
| 1049 | 987 | ||
| 1050 | if (BN_is_zero(a) || !BN_is_bit_set(a, 0)) | 988 | if (BN_is_zero(a)) |
| 1051 | /* a_0 == 0 => return error (the unsigned int array | ||
| 1052 | * must be terminated by 0) | ||
| 1053 | */ | ||
| 1054 | return 0; | 989 | return 0; |
| 1055 | 990 | ||
| 1056 | for (i = a->top - 1; i >= 0; i--) | 991 | for (i = a->top - 1; i >= 0; i--) |
| @@ -1070,24 +1005,28 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) | |||
| 1070 | } | 1005 | } |
| 1071 | } | 1006 | } |
| 1072 | 1007 | ||
| 1008 | if (k < max) { | ||
| 1009 | p[k] = -1; | ||
| 1010 | k++; | ||
| 1011 | } | ||
| 1012 | |||
| 1073 | return k; | 1013 | return k; |
| 1074 | } | 1014 | } |
| 1075 | 1015 | ||
| 1076 | /* Convert the coefficient array representation of a polynomial to a | 1016 | /* Convert the coefficient array representation of a polynomial to a |
| 1077 | * bit-string. The array must be terminated by 0. | 1017 | * bit-string. The array must be terminated by -1. |
| 1078 | */ | 1018 | */ |
| 1079 | int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a) | 1019 | int BN_GF2m_arr2poly(const int p[], BIGNUM *a) |
| 1080 | { | 1020 | { |
| 1081 | int i; | 1021 | int i; |
| 1082 | 1022 | ||
| 1083 | bn_check_top(a); | 1023 | bn_check_top(a); |
| 1084 | BN_zero(a); | 1024 | BN_zero(a); |
| 1085 | for (i = 0; p[i] != 0; i++) | 1025 | for (i = 0; p[i] != -1; i++) |
| 1086 | { | 1026 | { |
| 1087 | if (BN_set_bit(a, p[i]) == 0) | 1027 | if (BN_set_bit(a, p[i]) == 0) |
| 1088 | return 0; | 1028 | return 0; |
| 1089 | } | 1029 | } |
| 1090 | BN_set_bit(a, 0); | ||
| 1091 | bn_check_top(a); | 1030 | bn_check_top(a); |
| 1092 | 1031 | ||
| 1093 | return 1; | 1032 | return 1; |
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index 27ac4397a1..8e5e98e3f2 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h | |||
| @@ -255,7 +255,8 @@ extern "C" { | |||
| 255 | : "r"(a), "r"(b)); \ | 255 | : "r"(a), "r"(b)); \ |
| 256 | ret; }) | 256 | ret; }) |
| 257 | # endif /* compiler */ | 257 | # endif /* compiler */ |
| 258 | # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) | 258 | # elif (defined(__x86_64) || defined(__x86_64__)) && \ |
| 259 | (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) | ||
| 259 | # if defined(__GNUC__) | 260 | # if defined(__GNUC__) |
| 260 | # define BN_UMULT_HIGH(a,b) ({ \ | 261 | # define BN_UMULT_HIGH(a,b) ({ \ |
| 261 | register BN_ULONG ret,discard; \ | 262 | register BN_ULONG ret,discard; \ |
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c index 32a8fbaf51..5470fbe6ef 100644 --- a/src/lib/libcrypto/bn/bn_lib.c +++ b/src/lib/libcrypto/bn/bn_lib.c | |||
| @@ -133,15 +133,34 @@ int BN_get_params(int which) | |||
| 133 | 133 | ||
| 134 | const BIGNUM *BN_value_one(void) | 134 | const BIGNUM *BN_value_one(void) |
| 135 | { | 135 | { |
| 136 | static BN_ULONG data_one=1L; | 136 | static const BN_ULONG data_one=1L; |
| 137 | static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA}; | 137 | static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA}; |
| 138 | 138 | ||
| 139 | return(&const_one); | 139 | return(&const_one); |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | char *BN_options(void) | ||
| 143 | { | ||
| 144 | static int init=0; | ||
| 145 | static char data[16]; | ||
| 146 | |||
| 147 | if (!init) | ||
| 148 | { | ||
| 149 | init++; | ||
| 150 | #ifdef BN_LLONG | ||
| 151 | BIO_snprintf(data,sizeof data,"bn(%d,%d)", | ||
| 152 | (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); | ||
| 153 | #else | ||
| 154 | BIO_snprintf(data,sizeof data,"bn(%d,%d)", | ||
| 155 | (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); | ||
| 156 | #endif | ||
| 157 | } | ||
| 158 | return(data); | ||
| 159 | } | ||
| 160 | |||
| 142 | int BN_num_bits_word(BN_ULONG l) | 161 | int BN_num_bits_word(BN_ULONG l) |
| 143 | { | 162 | { |
| 144 | static const char bits[256]={ | 163 | static const unsigned char bits[256]={ |
| 145 | 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, | 164 | 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, |
| 146 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, | 165 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, |
| 147 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, | 166 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
| @@ -216,7 +235,7 @@ int BN_num_bits_word(BN_ULONG l) | |||
| 216 | else | 235 | else |
| 217 | #endif | 236 | #endif |
| 218 | { | 237 | { |
| 219 | #if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) | 238 | #if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) |
| 220 | if (l & 0xff00L) | 239 | if (l & 0xff00L) |
| 221 | return(bits[(int)(l>>8)]+8); | 240 | return(bits[(int)(l>>8)]+8); |
| 222 | else | 241 | else |
| @@ -744,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n) | |||
| 744 | i=n/BN_BITS2; | 763 | i=n/BN_BITS2; |
| 745 | j=n%BN_BITS2; | 764 | j=n%BN_BITS2; |
| 746 | if (a->top <= i) return 0; | 765 | if (a->top <= i) return 0; |
| 747 | return(((a->d[i])>>j)&((BN_ULONG)1)); | 766 | return (int)(((a->d[i])>>j)&((BN_ULONG)1)); |
| 748 | } | 767 | } |
| 749 | 768 | ||
| 750 | int BN_mask_bits(BIGNUM *a, int n) | 769 | int BN_mask_bits(BIGNUM *a, int n) |
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index 4799b152dd..7224637ab3 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
| @@ -122,26 +122,10 @@ | |||
| 122 | 122 | ||
| 123 | #define MONT_WORD /* use the faster word-based algorithm */ | 123 | #define MONT_WORD /* use the faster word-based algorithm */ |
| 124 | 124 | ||
| 125 | #if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) | 125 | #ifdef MONT_WORD |
| 126 | /* This condition means we have a specific non-default build: | ||
| 127 | * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any | ||
| 128 | * BN_BITS2<=32 platform; an explicit "enable-montasm" is required. | ||
| 129 | * I.e., if we are here, the user intentionally deviates from the | ||
| 130 | * normal stable build to get better Montgomery performance from | ||
| 131 | * the 0.9.9-dev backport. | ||
| 132 | * | ||
| 133 | * In this case only, we also enable BN_from_montgomery_word() | ||
| 134 | * (another non-stable feature from 0.9.9-dev). | ||
| 135 | */ | ||
| 136 | #define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD | ||
| 137 | #endif | ||
| 138 | |||
| 139 | #ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD | ||
| 140 | static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); | 126 | static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); |
| 141 | #endif | 127 | #endif |
| 142 | 128 | ||
| 143 | |||
| 144 | |||
| 145 | int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | 129 | int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, |
| 146 | BN_MONT_CTX *mont, BN_CTX *ctx) | 130 | BN_MONT_CTX *mont, BN_CTX *ctx) |
| 147 | { | 131 | { |
| @@ -153,11 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | |||
| 153 | if (num>1 && a->top==num && b->top==num) | 137 | if (num>1 && a->top==num && b->top==num) |
| 154 | { | 138 | { |
| 155 | if (bn_wexpand(r,num) == NULL) return(0); | 139 | if (bn_wexpand(r,num) == NULL) return(0); |
| 156 | #if 0 /* for OpenSSL 0.9.9 mont->n0 */ | ||
| 157 | if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) | 140 | if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) |
| 158 | #else | ||
| 159 | if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num)) | ||
| 160 | #endif | ||
| 161 | { | 141 | { |
| 162 | r->neg = a->neg^b->neg; | 142 | r->neg = a->neg^b->neg; |
| 163 | r->top = num; | 143 | r->top = num; |
| @@ -181,7 +161,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, | |||
| 181 | if (!BN_mul(tmp,a,b,ctx)) goto err; | 161 | if (!BN_mul(tmp,a,b,ctx)) goto err; |
| 182 | } | 162 | } |
| 183 | /* reduce from aRR to aR */ | 163 | /* reduce from aRR to aR */ |
| 184 | #ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD | 164 | #ifdef MONT_WORD |
| 185 | if (!BN_from_montgomery_word(r,tmp,mont)) goto err; | 165 | if (!BN_from_montgomery_word(r,tmp,mont)) goto err; |
| 186 | #else | 166 | #else |
| 187 | if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; | 167 | if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; |
| @@ -193,7 +173,7 @@ err: | |||
| 193 | return(ret); | 173 | return(ret); |
| 194 | } | 174 | } |
| 195 | 175 | ||
| 196 | #ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD | 176 | #ifdef MONT_WORD |
| 197 | static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) | 177 | static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) |
| 198 | { | 178 | { |
| 199 | BIGNUM *n; | 179 | BIGNUM *n; |
| @@ -217,15 +197,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) | |||
| 217 | nrp= &(r->d[nl]); | 197 | nrp= &(r->d[nl]); |
| 218 | 198 | ||
| 219 | /* clear the top words of T */ | 199 | /* clear the top words of T */ |
| 200 | #if 1 | ||
| 220 | for (i=r->top; i<max; i++) /* memset? XXX */ | 201 | for (i=r->top; i<max; i++) /* memset? XXX */ |
| 221 | r->d[i]=0; | 202 | r->d[i]=0; |
| 203 | #else | ||
| 204 | memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); | ||
| 205 | #endif | ||
| 222 | 206 | ||
| 223 | r->top=max; | 207 | r->top=max; |
| 224 | #if 0 /* for OpenSSL 0.9.9 mont->n0 */ | ||
| 225 | n0=mont->n0[0]; | 208 | n0=mont->n0[0]; |
| 226 | #else | ||
| 227 | n0=mont->n0; | ||
| 228 | #endif | ||
| 229 | 209 | ||
| 230 | #ifdef BN_COUNT | 210 | #ifdef BN_COUNT |
| 231 | fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); | 211 | fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); |
| @@ -270,6 +250,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) | |||
| 270 | } | 250 | } |
| 271 | al=r->top-ri; | 251 | al=r->top-ri; |
| 272 | 252 | ||
| 253 | #define BRANCH_FREE 1 | ||
| 254 | #if BRANCH_FREE | ||
| 273 | if (bn_wexpand(ret,ri) == NULL) return(0); | 255 | if (bn_wexpand(ret,ri) == NULL) return(0); |
| 274 | x=0-(((al-ri)>>(sizeof(al)*8-1))&1); | 256 | x=0-(((al-ri)>>(sizeof(al)*8-1))&1); |
| 275 | ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ | 257 | ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ |
| @@ -317,164 +299,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) | |||
| 317 | rp[i]=nrp[i], ap[i]=0; | 299 | rp[i]=nrp[i], ap[i]=0; |
| 318 | bn_correct_top(r); | 300 | bn_correct_top(r); |
| 319 | bn_correct_top(ret); | 301 | bn_correct_top(ret); |
| 320 | bn_check_top(ret); | ||
| 321 | |||
| 322 | return(1); | ||
| 323 | } | ||
| 324 | |||
| 325 | int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, | ||
| 326 | BN_CTX *ctx) | ||
| 327 | { | ||
| 328 | int retn=0; | ||
| 329 | BIGNUM *t; | ||
| 330 | |||
| 331 | BN_CTX_start(ctx); | ||
| 332 | if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) | ||
| 333 | retn = BN_from_montgomery_word(ret,t,mont); | ||
| 334 | BN_CTX_end(ctx); | ||
| 335 | return retn; | ||
| 336 | } | ||
| 337 | |||
| 338 | #else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ | ||
| 339 | |||
| 340 | int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, | ||
| 341 | BN_CTX *ctx) | ||
| 342 | { | ||
| 343 | int retn=0; | ||
| 344 | |||
| 345 | #ifdef MONT_WORD | ||
| 346 | BIGNUM *n,*r; | ||
| 347 | BN_ULONG *ap,*np,*rp,n0,v,*nrp; | ||
| 348 | int al,nl,max,i,x,ri; | ||
| 349 | |||
| 350 | BN_CTX_start(ctx); | ||
| 351 | if ((r = BN_CTX_get(ctx)) == NULL) goto err; | ||
| 352 | |||
| 353 | if (!BN_copy(r,a)) goto err; | ||
| 354 | n= &(mont->N); | ||
| 355 | |||
| 356 | ap=a->d; | ||
| 357 | /* mont->ri is the size of mont->N in bits (rounded up | ||
| 358 | to the word size) */ | ||
| 359 | al=ri=mont->ri/BN_BITS2; | ||
| 360 | |||
| 361 | nl=n->top; | ||
| 362 | if ((al == 0) || (nl == 0)) { r->top=0; return(1); } | ||
| 363 | |||
| 364 | max=(nl+al+1); /* allow for overflow (no?) XXX */ | ||
| 365 | if (bn_wexpand(r,max) == NULL) goto err; | ||
| 366 | |||
| 367 | r->neg=a->neg^n->neg; | ||
| 368 | np=n->d; | ||
| 369 | rp=r->d; | ||
| 370 | nrp= &(r->d[nl]); | ||
| 371 | |||
| 372 | /* clear the top words of T */ | ||
| 373 | #if 1 | ||
| 374 | for (i=r->top; i<max; i++) /* memset? XXX */ | ||
| 375 | r->d[i]=0; | ||
| 376 | #else | 302 | #else |
| 377 | memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); | 303 | if (bn_wexpand(ret,al) == NULL) return(0); |
| 378 | #endif | ||
| 379 | |||
| 380 | r->top=max; | ||
| 381 | n0=mont->n0; | ||
| 382 | |||
| 383 | #ifdef BN_COUNT | ||
| 384 | fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl); | ||
| 385 | #endif | ||
| 386 | for (i=0; i<nl; i++) | ||
| 387 | { | ||
| 388 | #ifdef __TANDEM | ||
| 389 | { | ||
| 390 | long long t1; | ||
| 391 | long long t2; | ||
| 392 | long long t3; | ||
| 393 | t1 = rp[0] * (n0 & 0177777); | ||
| 394 | t2 = 037777600000l; | ||
| 395 | t2 = n0 & t2; | ||
| 396 | t3 = rp[0] & 0177777; | ||
| 397 | t2 = (t3 * t2) & BN_MASK2; | ||
| 398 | t1 = t1 + t2; | ||
| 399 | v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1); | ||
| 400 | } | ||
| 401 | #else | ||
| 402 | v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); | ||
| 403 | #endif | ||
| 404 | nrp++; | ||
| 405 | rp++; | ||
| 406 | if (((nrp[-1]+=v)&BN_MASK2) >= v) | ||
| 407 | continue; | ||
| 408 | else | ||
| 409 | { | ||
| 410 | if (((++nrp[0])&BN_MASK2) != 0) continue; | ||
| 411 | if (((++nrp[1])&BN_MASK2) != 0) continue; | ||
| 412 | for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; | ||
| 413 | } | ||
| 414 | } | ||
| 415 | bn_correct_top(r); | ||
| 416 | |||
| 417 | /* mont->ri will be a multiple of the word size and below code | ||
| 418 | * is kind of BN_rshift(ret,r,mont->ri) equivalent */ | ||
| 419 | if (r->top <= ri) | ||
| 420 | { | ||
| 421 | ret->top=0; | ||
| 422 | retn=1; | ||
| 423 | goto err; | ||
| 424 | } | ||
| 425 | al=r->top-ri; | ||
| 426 | |||
| 427 | # define BRANCH_FREE 1 | ||
| 428 | # if BRANCH_FREE | ||
| 429 | if (bn_wexpand(ret,ri) == NULL) goto err; | ||
| 430 | x=0-(((al-ri)>>(sizeof(al)*8-1))&1); | ||
| 431 | ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */ | ||
| 432 | ret->neg=r->neg; | ||
| 433 | |||
| 434 | rp=ret->d; | ||
| 435 | ap=&(r->d[ri]); | ||
| 436 | |||
| 437 | { | ||
| 438 | size_t m1,m2; | ||
| 439 | |||
| 440 | v=bn_sub_words(rp,ap,np,ri); | ||
| 441 | /* this ----------------^^ works even in al<ri case | ||
| 442 | * thanks to zealous zeroing of top of the vector in the | ||
| 443 | * beginning. */ | ||
| 444 | |||
| 445 | /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */ | ||
| 446 | /* in other words if subtraction result is real, then | ||
| 447 | * trick unconditional memcpy below to perform in-place | ||
| 448 | * "refresh" instead of actual copy. */ | ||
| 449 | m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */ | ||
| 450 | m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */ | ||
| 451 | m1|=m2; /* (al!=ri) */ | ||
| 452 | m1|=(0-(size_t)v); /* (al!=ri || v) */ | ||
| 453 | m1&=~m2; /* (al!=ri || v) && !al>ri */ | ||
| 454 | nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1)); | ||
| 455 | } | ||
| 456 | |||
| 457 | /* 'i<ri' is chosen to eliminate dependency on input data, even | ||
| 458 | * though it results in redundant copy in al<ri case. */ | ||
| 459 | for (i=0,ri-=4; i<ri; i+=4) | ||
| 460 | { | ||
| 461 | BN_ULONG t1,t2,t3,t4; | ||
| 462 | |||
| 463 | t1=nrp[i+0]; | ||
| 464 | t2=nrp[i+1]; | ||
| 465 | t3=nrp[i+2]; ap[i+0]=0; | ||
| 466 | t4=nrp[i+3]; ap[i+1]=0; | ||
| 467 | rp[i+0]=t1; ap[i+2]=0; | ||
| 468 | rp[i+1]=t2; ap[i+3]=0; | ||
| 469 | rp[i+2]=t3; | ||
| 470 | rp[i+3]=t4; | ||
| 471 | } | ||
| 472 | for (ri+=4; i<ri; i++) | ||
| 473 | rp[i]=nrp[i], ap[i]=0; | ||
| 474 | bn_correct_top(r); | ||
| 475 | bn_correct_top(ret); | ||
| 476 | # else | ||
| 477 | if (bn_wexpand(ret,al) == NULL) goto err; | ||
| 478 | ret->top=al; | 304 | ret->top=al; |
| 479 | ret->neg=r->neg; | 305 | ret->neg=r->neg; |
| 480 | 306 | ||
| @@ -497,8 +323,30 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, | |||
| 497 | al+=4; | 323 | al+=4; |
| 498 | for (; i<al; i++) | 324 | for (; i<al; i++) |
| 499 | rp[i]=ap[i]; | 325 | rp[i]=ap[i]; |
| 500 | # endif | 326 | |
| 501 | #else /* !MONT_WORD */ | 327 | if (BN_ucmp(ret, &(mont->N)) >= 0) |
| 328 | { | ||
| 329 | if (!BN_usub(ret,ret,&(mont->N))) return(0); | ||
| 330 | } | ||
| 331 | #endif | ||
| 332 | bn_check_top(ret); | ||
| 333 | |||
| 334 | return(1); | ||
| 335 | } | ||
| 336 | #endif /* MONT_WORD */ | ||
| 337 | |||
| 338 | int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, | ||
| 339 | BN_CTX *ctx) | ||
| 340 | { | ||
| 341 | int retn=0; | ||
| 342 | #ifdef MONT_WORD | ||
| 343 | BIGNUM *t; | ||
| 344 | |||
| 345 | BN_CTX_start(ctx); | ||
| 346 | if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) | ||
| 347 | retn = BN_from_montgomery_word(ret,t,mont); | ||
| 348 | BN_CTX_end(ctx); | ||
| 349 | #else /* !MONT_WORD */ | ||
| 502 | BIGNUM *t1,*t2; | 350 | BIGNUM *t1,*t2; |
| 503 | 351 | ||
| 504 | BN_CTX_start(ctx); | 352 | BN_CTX_start(ctx); |
| @@ -515,21 +363,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, | |||
| 515 | if (!BN_mul(t1,t2,&mont->N,ctx)) goto err; | 363 | if (!BN_mul(t1,t2,&mont->N,ctx)) goto err; |
| 516 | if (!BN_add(t2,a,t1)) goto err; | 364 | if (!BN_add(t2,a,t1)) goto err; |
| 517 | if (!BN_rshift(ret,t2,mont->ri)) goto err; | 365 | if (!BN_rshift(ret,t2,mont->ri)) goto err; |
| 518 | #endif /* MONT_WORD */ | ||
| 519 | 366 | ||
| 520 | #if !defined(BRANCH_FREE) || BRANCH_FREE==0 | ||
| 521 | if (BN_ucmp(ret, &(mont->N)) >= 0) | 367 | if (BN_ucmp(ret, &(mont->N)) >= 0) |
| 522 | { | 368 | { |
| 523 | if (!BN_usub(ret,ret,&(mont->N))) goto err; | 369 | if (!BN_usub(ret,ret,&(mont->N))) goto err; |
| 524 | } | 370 | } |
| 525 | #endif | ||
| 526 | retn=1; | 371 | retn=1; |
| 527 | bn_check_top(ret); | 372 | bn_check_top(ret); |
| 528 | err: | 373 | err: |
| 529 | BN_CTX_end(ctx); | 374 | BN_CTX_end(ctx); |
| 375 | #endif /* MONT_WORD */ | ||
| 530 | return(retn); | 376 | return(retn); |
| 531 | } | 377 | } |
| 532 | #endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ | ||
| 533 | 378 | ||
| 534 | BN_MONT_CTX *BN_MONT_CTX_new(void) | 379 | BN_MONT_CTX *BN_MONT_CTX_new(void) |
| 535 | { | 380 | { |
| @@ -549,11 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx) | |||
| 549 | BN_init(&(ctx->RR)); | 394 | BN_init(&(ctx->RR)); |
| 550 | BN_init(&(ctx->N)); | 395 | BN_init(&(ctx->N)); |
| 551 | BN_init(&(ctx->Ni)); | 396 | BN_init(&(ctx->Ni)); |
| 552 | #if 0 /* for OpenSSL 0.9.9 mont->n0 */ | ||
| 553 | ctx->n0[0] = ctx->n0[1] = 0; | 397 | ctx->n0[0] = ctx->n0[1] = 0; |
| 554 | #else | ||
| 555 | ctx->n0 = 0; | ||
| 556 | #endif | ||
| 557 | ctx->flags=0; | 398 | ctx->flags=0; |
| 558 | } | 399 | } |
| 559 | 400 | ||
| @@ -585,26 +426,22 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | |||
| 585 | BIGNUM tmod; | 426 | BIGNUM tmod; |
| 586 | BN_ULONG buf[2]; | 427 | BN_ULONG buf[2]; |
| 587 | 428 | ||
| 588 | mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; | ||
| 589 | BN_zero(R); | ||
| 590 | #if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)", | ||
| 591 | only certain BN_BITS2<=32 platforms actually need this */ | ||
| 592 | if (!(BN_set_bit(R,2*BN_BITS2))) goto err; /* R */ | ||
| 593 | #else | ||
| 594 | if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ | ||
| 595 | #endif | ||
| 596 | |||
| 597 | buf[0]=mod->d[0]; /* tmod = N mod word size */ | ||
| 598 | buf[1]=0; | ||
| 599 | |||
| 600 | BN_init(&tmod); | 429 | BN_init(&tmod); |
| 601 | tmod.d=buf; | 430 | tmod.d=buf; |
| 602 | tmod.top = buf[0] != 0 ? 1 : 0; | ||
| 603 | tmod.dmax=2; | 431 | tmod.dmax=2; |
| 604 | tmod.neg=0; | 432 | tmod.neg=0; |
| 605 | 433 | ||
| 606 | #if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)"; | 434 | mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; |
| 607 | only certain BN_BITS2<=32 platforms actually need this */ | 435 | |
| 436 | #if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) | ||
| 437 | /* Only certain BN_BITS2<=32 platforms actually make use of | ||
| 438 | * n0[1], and we could use the #else case (with a shorter R | ||
| 439 | * value) for the others. However, currently only the assembler | ||
| 440 | * files do know which is which. */ | ||
| 441 | |||
| 442 | BN_zero(R); | ||
| 443 | if (!(BN_set_bit(R,2*BN_BITS2))) goto err; | ||
| 444 | |||
| 608 | tmod.top=0; | 445 | tmod.top=0; |
| 609 | if ((buf[0] = mod->d[0])) tmod.top=1; | 446 | if ((buf[0] = mod->d[0])) tmod.top=1; |
| 610 | if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2; | 447 | if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2; |
| @@ -632,6 +469,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | |||
| 632 | mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; | 469 | mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; |
| 633 | mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; | 470 | mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; |
| 634 | #else | 471 | #else |
| 472 | BN_zero(R); | ||
| 473 | if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ | ||
| 474 | |||
| 475 | buf[0]=mod->d[0]; /* tmod = N mod word size */ | ||
| 476 | buf[1]=0; | ||
| 477 | tmod.top = buf[0] != 0 ? 1 : 0; | ||
| 635 | /* Ri = R^-1 mod N*/ | 478 | /* Ri = R^-1 mod N*/ |
| 636 | if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) | 479 | if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) |
| 637 | goto err; | 480 | goto err; |
| @@ -647,12 +490,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) | |||
| 647 | if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; | 490 | if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; |
| 648 | /* Ni = (R*Ri-1)/N, | 491 | /* Ni = (R*Ri-1)/N, |
| 649 | * keep only least significant word: */ | 492 | * keep only least significant word: */ |
| 650 | # if 0 /* for OpenSSL 0.9.9 mont->n0 */ | ||
| 651 | mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; | 493 | mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; |
| 652 | mont->n0[1] = 0; | 494 | mont->n0[1] = 0; |
| 653 | # else | ||
| 654 | mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; | ||
| 655 | # endif | ||
| 656 | #endif | 495 | #endif |
| 657 | } | 496 | } |
| 658 | #else /* !MONT_WORD */ | 497 | #else /* !MONT_WORD */ |
| @@ -689,12 +528,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) | |||
| 689 | if (!BN_copy(&(to->N),&(from->N))) return NULL; | 528 | if (!BN_copy(&(to->N),&(from->N))) return NULL; |
| 690 | if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; | 529 | if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; |
| 691 | to->ri=from->ri; | 530 | to->ri=from->ri; |
| 692 | #if 0 /* for OpenSSL 0.9.9 mont->n0 */ | ||
| 693 | to->n0[0]=from->n0[0]; | 531 | to->n0[0]=from->n0[0]; |
| 694 | to->n0[1]=from->n0[1]; | 532 | to->n0[1]=from->n0[1]; |
| 695 | #else | ||
| 696 | to->n0=from->n0; | ||
| 697 | #endif | ||
| 698 | return(to); | 533 | return(to); |
| 699 | } | 534 | } |
| 700 | 535 | ||
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b848c8cc60..a0e9ec3b46 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
| @@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
| 1028 | assert(j <= al || j <= bl); | 1028 | assert(j <= al || j <= bl); |
| 1029 | k = j+j; | 1029 | k = j+j; |
| 1030 | t = BN_CTX_get(ctx); | 1030 | t = BN_CTX_get(ctx); |
| 1031 | if (t == NULL) | ||
| 1032 | goto err; | ||
| 1031 | if (al > j || bl > j) | 1033 | if (al > j || bl > j) |
| 1032 | { | 1034 | { |
| 1033 | bn_wexpand(t,k*4); | 1035 | if (bn_wexpand(t,k*4) == NULL) goto err; |
| 1034 | bn_wexpand(rr,k*4); | 1036 | if (bn_wexpand(rr,k*4) == NULL) goto err; |
| 1035 | bn_mul_part_recursive(rr->d,a->d,b->d, | 1037 | bn_mul_part_recursive(rr->d,a->d,b->d, |
| 1036 | j,al-j,bl-j,t->d); | 1038 | j,al-j,bl-j,t->d); |
| 1037 | } | 1039 | } |
| 1038 | else /* al <= j || bl <= j */ | 1040 | else /* al <= j || bl <= j */ |
| 1039 | { | 1041 | { |
| 1040 | bn_wexpand(t,k*2); | 1042 | if (bn_wexpand(t,k*2) == NULL) goto err; |
| 1041 | bn_wexpand(rr,k*2); | 1043 | if (bn_wexpand(rr,k*2) == NULL) goto err; |
| 1042 | bn_mul_recursive(rr->d,a->d,b->d, | 1044 | bn_mul_recursive(rr->d,a->d,b->d, |
| 1043 | j,al-j,bl-j,t->d); | 1045 | j,al-j,bl-j,t->d); |
| 1044 | } | 1046 | } |
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c index 810dde34e1..bebb466d08 100644 --- a/src/lib/libcrypto/bn/bn_print.c +++ b/src/lib/libcrypto/bn/bn_print.c | |||
| @@ -294,6 +294,27 @@ err: | |||
| 294 | return(0); | 294 | return(0); |
| 295 | } | 295 | } |
| 296 | 296 | ||
| 297 | int BN_asc2bn(BIGNUM **bn, const char *a) | ||
| 298 | { | ||
| 299 | const char *p = a; | ||
| 300 | if (*p == '-') | ||
| 301 | p++; | ||
| 302 | |||
| 303 | if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) | ||
| 304 | { | ||
| 305 | if (!BN_hex2bn(bn, p + 2)) | ||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | else | ||
| 309 | { | ||
| 310 | if (!BN_dec2bn(bn, p)) | ||
| 311 | return 0; | ||
| 312 | } | ||
| 313 | if (*a == '-') | ||
| 314 | (*bn)->neg = 1; | ||
| 315 | return 1; | ||
| 316 | } | ||
| 317 | |||
| 297 | #ifndef OPENSSL_NO_BIO | 318 | #ifndef OPENSSL_NO_BIO |
| 298 | #ifndef OPENSSL_NO_FP_API | 319 | #ifndef OPENSSL_NO_FP_API |
| 299 | int BN_print_fp(FILE *fp, const BIGNUM *a) | 320 | int BN_print_fp(FILE *fp, const BIGNUM *a) |
