summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn')
-rw-r--r--src/lib/libcrypto/bn/Makefile34
-rw-r--r--src/lib/libcrypto/bn/bn.h15
-rw-r--r--src/lib/libcrypto/bn/bn_div.c272
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c240
-rw-r--r--src/lib/libcrypto/bn/bn_gf2m.c114
-rw-r--r--src/lib/libcrypto/bn/bn_lcl.h23
-rw-r--r--src/lib/libcrypto/bn/bn_lib.c19
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c116
-rw-r--r--src/lib/libcrypto/bn/bn_nist.c338
-rw-r--r--src/lib/libcrypto/bn/bn_print.c19
-rw-r--r--src/lib/libcrypto/bn/bn_shift.c27
-rw-r--r--src/lib/libcrypto/bn/bntest.c8
12 files changed, 734 insertions, 491 deletions
diff --git a/src/lib/libcrypto/bn/Makefile b/src/lib/libcrypto/bn/Makefile
index aabc4f56b8..672773454c 100644
--- a/src/lib/libcrypto/bn/Makefile
+++ b/src/lib/libcrypto/bn/Makefile
@@ -26,13 +26,13 @@ LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
26 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ 26 bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
27 bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \ 27 bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
28 bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ 28 bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
29 bn_depr.c bn_const.c 29 bn_depr.c bn_const.c bn_x931p.c
30 30
31LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \ 31LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
32 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \ 32 bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
33 bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \ 33 bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
34 bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \ 34 bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \
35 bn_depr.o bn_const.o 35 bn_depr.o bn_const.o bn_x931p.o
36 36
37SRC= $(LIBSRC) 37SRC= $(LIBSRC)
38 38
@@ -66,6 +66,8 @@ co-586.s: asm/co-586.pl ../perlasm/x86asm.pl
66 $(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ 66 $(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
67x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl 67x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl
68 $(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ 68 $(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
69x86-gf2m.s: asm/x86-gf2m.pl ../perlasm/x86asm.pl
70 $(PERL) asm/x86-gf2m.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
69 71
70sparcv8.o: asm/sparcv8.S 72sparcv8.o: asm/sparcv8.S
71 $(CC) $(CFLAGS) -c asm/sparcv8.S 73 $(CC) $(CFLAGS) -c asm/sparcv8.S
@@ -82,16 +84,31 @@ bn-mips3.o: asm/mips3.s
82 as -$$ABI -O -o $@ asm/mips3.s; \ 84 as -$$ABI -O -o $@ asm/mips3.s; \
83 else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi 85 else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi
84 86
87bn-mips.s: asm/mips.pl
88 $(PERL) asm/mips.pl $(PERLASM_SCHEME) $@
89mips-mont.s: asm/mips-mont.pl
90 $(PERL) asm/mips-mont.pl $(PERLASM_SCHEME) $@
91
85bn-s390x.o: asm/s390x.S 92bn-s390x.o: asm/s390x.S
86 $(CC) $(CFLAGS) -c -o $@ asm/s390x.S 93 $(CC) $(CFLAGS) -c -o $@ asm/s390x.S
94s390x-gf2m.s: asm/s390x-gf2m.pl
95 $(PERL) asm/s390x-gf2m.pl $(PERLASM_SCHEME) $@
87 96
88x86_64-gcc.o: asm/x86_64-gcc.c 97x86_64-gcc.o: asm/x86_64-gcc.c
89 $(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c 98 $(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c
90x86_64-mont.s: asm/x86_64-mont.pl 99x86_64-mont.s: asm/x86_64-mont.pl
91 $(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@ 100 $(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@
101x86_64-mont5.s: asm/x86_64-mont5.pl
102 $(PERL) asm/x86_64-mont5.pl $(PERLASM_SCHEME) > $@
103x86_64-gf2m.s: asm/x86_64-gf2m.pl
104 $(PERL) asm/x86_64-gf2m.pl $(PERLASM_SCHEME) > $@
105modexp512-x86_64.s: asm/modexp512-x86_64.pl
106 $(PERL) asm/modexp512-x86_64.pl $(PERLASM_SCHEME) > $@
92 107
93bn-ia64.s: asm/ia64.S 108bn-ia64.s: asm/ia64.S
94 $(CC) $(CFLAGS) -E asm/ia64.S > $@ 109 $(CC) $(CFLAGS) -E asm/ia64.S > $@
110ia64-mont.s: asm/ia64-mont.pl
111 $(PERL) asm/ia64-mont.pl $@ $(CFLAGS)
95 112
96# GNU assembler fails to compile PA-RISC2 modules, insist on calling 113# GNU assembler fails to compile PA-RISC2 modules, insist on calling
97# vendor assembler... 114# vendor assembler...
@@ -99,16 +116,22 @@ pa-risc2W.o: asm/pa-risc2W.s
99 /usr/ccs/bin/as -o pa-risc2W.o asm/pa-risc2W.s 116 /usr/ccs/bin/as -o pa-risc2W.o asm/pa-risc2W.s
100pa-risc2.o: asm/pa-risc2.s 117pa-risc2.o: asm/pa-risc2.s
101 /usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s 118 /usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s
119parisc-mont.s: asm/parisc-mont.pl
120 $(PERL) asm/parisc-mont.pl $(PERLASM_SCHEME) $@
102 121
103# ppc - AIX, Linux, MacOS X... 122# ppc - AIX, Linux, MacOS X...
104bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@ 123bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@
105ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@ 124ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
125ppc64-mont.s: asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@
106 126
107alpha-mont.s: asm/alpha-mont.pl 127alpha-mont.s: asm/alpha-mont.pl
108 $(PERL) $< | $(CC) -E - | tee $@ > /dev/null 128 $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
109 129
110# GNU make "catch all" 130# GNU make "catch all"
111%-mont.s: asm/%-mont.pl; $(PERL) $< $(CFLAGS) > $@ 131%-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@
132%-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@
133
134armv4-gf2m.o: armv4-gf2m.S
112 135
113files: 136files:
114 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO 137 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
@@ -345,3 +368,8 @@ bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
345bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h 368bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
346bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h 369bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
347bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c 370bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c
371bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/crypto.h
372bn_x931p.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
373bn_x931p.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
374bn_x931p.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
375bn_x931p.o: ../../include/openssl/symhacks.h bn_x931p.c
diff --git a/src/lib/libcrypto/bn/bn.h b/src/lib/libcrypto/bn/bn.h
index a0bc47837d..f34248ec4f 100644
--- a/src/lib/libcrypto/bn/bn.h
+++ b/src/lib/libcrypto/bn/bn.h
@@ -558,6 +558,17 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
558int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, 558int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
559 int do_trial_division, BN_GENCB *cb); 559 int do_trial_division, BN_GENCB *cb);
560 560
561int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx);
562
563int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
564 const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
565 const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb);
566int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
567 BIGNUM *Xp1, BIGNUM *Xp2,
568 const BIGNUM *Xp,
569 const BIGNUM *e, BN_CTX *ctx,
570 BN_GENCB *cb);
571
561BN_MONT_CTX *BN_MONT_CTX_new(void ); 572BN_MONT_CTX *BN_MONT_CTX_new(void );
562void BN_MONT_CTX_init(BN_MONT_CTX *ctx); 573void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
563int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, 574int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
@@ -612,6 +623,8 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
612int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, 623int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
613 BN_RECP_CTX *recp, BN_CTX *ctx); 624 BN_RECP_CTX *recp, BN_CTX *ctx);
614 625
626#ifndef OPENSSL_NO_EC2M
627
615/* Functions for arithmetic over binary polynomials represented by BIGNUMs. 628/* Functions for arithmetic over binary polynomials represented by BIGNUMs.
616 * 629 *
617 * The BIGNUM::neg property of BIGNUMs representing binary polynomials is 630 * The BIGNUM::neg property of BIGNUMs representing binary polynomials is
@@ -663,6 +676,8 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a,
663int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); 676int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max);
664int BN_GF2m_arr2poly(const int p[], BIGNUM *a); 677int BN_GF2m_arr2poly(const int p[], BIGNUM *a);
665 678
679#endif
680
666/* faster mod functions for the 'NIST primes' 681/* faster mod functions for the 'NIST primes'
667 * 0 <= a < p^2 */ 682 * 0 <= a < p^2 */
668int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx); 683int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx);
diff --git a/src/lib/libcrypto/bn/bn_div.c b/src/lib/libcrypto/bn/bn_div.c
index 802a43d642..52b3304293 100644
--- a/src/lib/libcrypto/bn/bn_div.c
+++ b/src/lib/libcrypto/bn/bn_div.c
@@ -169,15 +169,13 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
169#endif /* OPENSSL_NO_ASM */ 169#endif /* OPENSSL_NO_ASM */
170 170
171 171
172/* BN_div[_no_branch] computes dv := num / divisor, rounding towards 172/* BN_div computes dv := num / divisor, rounding towards
173 * zero, and sets up rm such that dv*divisor + rm = num holds. 173 * zero, and sets up rm such that dv*divisor + rm = num holds.
174 * Thus: 174 * Thus:
175 * dv->neg == num->neg ^ divisor->neg (unless the result is zero) 175 * dv->neg == num->neg ^ divisor->neg (unless the result is zero)
176 * rm->neg == num->neg (unless the remainder is zero) 176 * rm->neg == num->neg (unless the remainder is zero)
177 * If 'dv' or 'rm' is NULL, the respective value is not returned. 177 * If 'dv' or 'rm' is NULL, the respective value is not returned.
178 */ 178 */
179static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
180 const BIGNUM *divisor, BN_CTX *ctx);
181int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, 179int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
182 BN_CTX *ctx) 180 BN_CTX *ctx)
183 { 181 {
@@ -186,6 +184,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
186 BN_ULONG *resp,*wnump; 184 BN_ULONG *resp,*wnump;
187 BN_ULONG d0,d1; 185 BN_ULONG d0,d1;
188 int num_n,div_n; 186 int num_n,div_n;
187 int no_branch=0;
189 188
190 /* Invalid zero-padding would have particularly bad consequences 189 /* Invalid zero-padding would have particularly bad consequences
191 * in the case of 'num', so don't just rely on bn_check_top() for this one 190 * in the case of 'num', so don't just rely on bn_check_top() for this one
@@ -200,7 +199,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
200 199
201 if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) 200 if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
202 { 201 {
203 return BN_div_no_branch(dv, rm, num, divisor, ctx); 202 no_branch=1;
204 } 203 }
205 204
206 bn_check_top(dv); 205 bn_check_top(dv);
@@ -214,7 +213,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
214 return(0); 213 return(0);
215 } 214 }
216 215
217 if (BN_ucmp(num,divisor) < 0) 216 if (!no_branch && BN_ucmp(num,divisor) < 0)
218 { 217 {
219 if (rm != NULL) 218 if (rm != NULL)
220 { if (BN_copy(rm,num) == NULL) return(0); } 219 { if (BN_copy(rm,num) == NULL) return(0); }
@@ -239,242 +238,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
239 norm_shift+=BN_BITS2; 238 norm_shift+=BN_BITS2;
240 if (!(BN_lshift(snum,num,norm_shift))) goto err; 239 if (!(BN_lshift(snum,num,norm_shift))) goto err;
241 snum->neg=0; 240 snum->neg=0;
242 div_n=sdiv->top;
243 num_n=snum->top;
244 loop=num_n-div_n;
245 /* Lets setup a 'window' into snum
246 * This is the part that corresponds to the current
247 * 'area' being divided */
248 wnum.neg = 0;
249 wnum.d = &(snum->d[loop]);
250 wnum.top = div_n;
251 /* only needed when BN_ucmp messes up the values between top and max */
252 wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
253
254 /* Get the top 2 words of sdiv */
255 /* div_n=sdiv->top; */
256 d0=sdiv->d[div_n-1];
257 d1=(div_n == 1)?0:sdiv->d[div_n-2];
258
259 /* pointer to the 'top' of snum */
260 wnump= &(snum->d[num_n-1]);
261
262 /* Setup to 'res' */
263 res->neg= (num->neg^divisor->neg);
264 if (!bn_wexpand(res,(loop+1))) goto err;
265 res->top=loop;
266 resp= &(res->d[loop-1]);
267
268 /* space for temp */
269 if (!bn_wexpand(tmp,(div_n+1))) goto err;
270 241
271 if (BN_ucmp(&wnum,sdiv) >= 0) 242 if (no_branch)
272 { 243 {
273 /* If BN_DEBUG_RAND is defined BN_ucmp changes (via 244 /* Since we don't know whether snum is larger than sdiv,
274 * bn_pollute) the const bignum arguments => 245 * we pad snum with enough zeroes without changing its
275 * clean the values between top and max again */ 246 * value.
276 bn_clear_top2max(&wnum); 247 */
277 bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n); 248 if (snum->top <= sdiv->top+1)
278 *resp=1;
279 }
280 else
281 res->top--;
282 /* if res->top == 0 then clear the neg value otherwise decrease
283 * the resp pointer */
284 if (res->top == 0)
285 res->neg = 0;
286 else
287 resp--;
288
289 for (i=0; i<loop-1; i++, wnump--, resp--)
290 {
291 BN_ULONG q,l0;
292 /* the first part of the loop uses the top two words of
293 * snum and sdiv to calculate a BN_ULONG q such that
294 * | wnum - sdiv * q | < sdiv */
295#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
296 BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
297 q=bn_div_3_words(wnump,d1,d0);
298#else
299 BN_ULONG n0,n1,rem=0;
300
301 n0=wnump[0];
302 n1=wnump[-1];
303 if (n0 == d0)
304 q=BN_MASK2;
305 else /* n0 < d0 */
306 {
307#ifdef BN_LLONG
308 BN_ULLONG t2;
309
310#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
311 q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
312#else
313 q=bn_div_words(n0,n1,d0);
314#ifdef BN_DEBUG_LEVITTE
315 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
316X) -> 0x%08X\n",
317 n0, n1, d0, q);
318#endif
319#endif
320
321#ifndef REMAINDER_IS_ALREADY_CALCULATED
322 /*
323 * rem doesn't have to be BN_ULLONG. The least we
324 * know it's less that d0, isn't it?
325 */
326 rem=(n1-q*d0)&BN_MASK2;
327#endif
328 t2=(BN_ULLONG)d1*q;
329
330 for (;;)
331 {
332 if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
333 break;
334 q--;
335 rem += d0;
336 if (rem < d0) break; /* don't let rem overflow */
337 t2 -= d1;
338 }
339#else /* !BN_LLONG */
340 BN_ULONG t2l,t2h;
341
342 q=bn_div_words(n0,n1,d0);
343#ifdef BN_DEBUG_LEVITTE
344 fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
345X) -> 0x%08X\n",
346 n0, n1, d0, q);
347#endif
348#ifndef REMAINDER_IS_ALREADY_CALCULATED
349 rem=(n1-q*d0)&BN_MASK2;
350#endif
351
352#if defined(BN_UMULT_LOHI)
353 BN_UMULT_LOHI(t2l,t2h,d1,q);
354#elif defined(BN_UMULT_HIGH)
355 t2l = d1 * q;
356 t2h = BN_UMULT_HIGH(d1,q);
357#else
358 { 249 {
359 BN_ULONG ql, qh; 250 if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
360 t2l=LBITS(d1); t2h=HBITS(d1); 251 for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
361 ql =LBITS(q); qh =HBITS(q); 252 snum->top = sdiv->top + 2;
362 mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
363 } 253 }
364#endif 254 else
365
366 for (;;)
367 {
368 if ((t2h < rem) ||
369 ((t2h == rem) && (t2l <= wnump[-2])))
370 break;
371 q--;
372 rem += d0;
373 if (rem < d0) break; /* don't let rem overflow */
374 if (t2l < d1) t2h--; t2l -= d1;
375 }
376#endif /* !BN_LLONG */
377 }
378#endif /* !BN_DIV3W */
379
380 l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
381 tmp->d[div_n]=l0;
382 wnum.d--;
383 /* ingore top values of the bignums just sub the two
384 * BN_ULONG arrays with bn_sub_words */
385 if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
386 { 255 {
387 /* Note: As we have considered only the leading 256 if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
388 * two BN_ULONGs in the calculation of q, sdiv * q 257 snum->d[snum->top] = 0;
389 * might be greater than wnum (but then (q-1) * sdiv 258 snum->top ++;
390 * is less or equal than wnum)
391 */
392 q--;
393 if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
394 /* we can't have an overflow here (assuming
395 * that q != 0, but if q == 0 then tmp is
396 * zero anyway) */
397 (*wnump)++;
398 } 259 }
399 /* store part of the result */
400 *resp = q;
401 }
402 bn_correct_top(snum);
403 if (rm != NULL)
404 {
405 /* Keep a copy of the neg flag in num because if rm==num
406 * BN_rshift() will overwrite it.
407 */
408 int neg = num->neg;
409 BN_rshift(rm,snum,norm_shift);
410 if (!BN_is_zero(rm))
411 rm->neg = neg;
412 bn_check_top(rm);
413 }
414 BN_CTX_end(ctx);
415 return(1);
416err:
417 bn_check_top(rm);
418 BN_CTX_end(ctx);
419 return(0);
420 }
421
422
423/* BN_div_no_branch is a special version of BN_div. It does not contain
424 * branches that may leak sensitive information.
425 */
426static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
427 const BIGNUM *divisor, BN_CTX *ctx)
428 {
429 int norm_shift,i,loop;
430 BIGNUM *tmp,wnum,*snum,*sdiv,*res;
431 BN_ULONG *resp,*wnump;
432 BN_ULONG d0,d1;
433 int num_n,div_n;
434
435 bn_check_top(dv);
436 bn_check_top(rm);
437 /* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
438 bn_check_top(divisor);
439
440 if (BN_is_zero(divisor))
441 {
442 BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
443 return(0);
444 }
445
446 BN_CTX_start(ctx);
447 tmp=BN_CTX_get(ctx);
448 snum=BN_CTX_get(ctx);
449 sdiv=BN_CTX_get(ctx);
450 if (dv == NULL)
451 res=BN_CTX_get(ctx);
452 else res=dv;
453 if (sdiv == NULL || res == NULL) goto err;
454
455 /* First we normalise the numbers */
456 norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
457 if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
458 sdiv->neg=0;
459 norm_shift+=BN_BITS2;
460 if (!(BN_lshift(snum,num,norm_shift))) goto err;
461 snum->neg=0;
462
463 /* Since we don't know whether snum is larger than sdiv,
464 * we pad snum with enough zeroes without changing its
465 * value.
466 */
467 if (snum->top <= sdiv->top+1)
468 {
469 if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
470 for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
471 snum->top = sdiv->top + 2;
472 }
473 else
474 {
475 if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
476 snum->d[snum->top] = 0;
477 snum->top ++;
478 } 260 }
479 261
480 div_n=sdiv->top; 262 div_n=sdiv->top;
@@ -500,12 +282,27 @@ static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
500 /* Setup to 'res' */ 282 /* Setup to 'res' */
501 res->neg= (num->neg^divisor->neg); 283 res->neg= (num->neg^divisor->neg);
502 if (!bn_wexpand(res,(loop+1))) goto err; 284 if (!bn_wexpand(res,(loop+1))) goto err;
503 res->top=loop-1; 285 res->top=loop-no_branch;
504 resp= &(res->d[loop-1]); 286 resp= &(res->d[loop-1]);
505 287
506 /* space for temp */ 288 /* space for temp */
507 if (!bn_wexpand(tmp,(div_n+1))) goto err; 289 if (!bn_wexpand(tmp,(div_n+1))) goto err;
508 290
291 if (!no_branch)
292 {
293 if (BN_ucmp(&wnum,sdiv) >= 0)
294 {
295 /* If BN_DEBUG_RAND is defined BN_ucmp changes (via
296 * bn_pollute) the const bignum arguments =>
297 * clean the values between top and max again */
298 bn_clear_top2max(&wnum);
299 bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
300 *resp=1;
301 }
302 else
303 res->top--;
304 }
305
509 /* if res->top == 0 then clear the neg value otherwise decrease 306 /* if res->top == 0 then clear the neg value otherwise decrease
510 * the resp pointer */ 307 * the resp pointer */
511 if (res->top == 0) 308 if (res->top == 0)
@@ -638,7 +435,7 @@ X) -> 0x%08X\n",
638 rm->neg = neg; 435 rm->neg = neg;
639 bn_check_top(rm); 436 bn_check_top(rm);
640 } 437 }
641 bn_correct_top(res); 438 if (no_branch) bn_correct_top(res);
642 BN_CTX_end(ctx); 439 BN_CTX_end(ctx);
643 return(1); 440 return(1);
644err: 441err:
@@ -646,5 +443,4 @@ err:
646 BN_CTX_end(ctx); 443 BN_CTX_end(ctx);
647 return(0); 444 return(0);
648 } 445 }
649
650#endif 446#endif
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index d9b6c737fc..2abf6fd678 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -113,6 +113,18 @@
113#include "cryptlib.h" 113#include "cryptlib.h"
114#include "bn_lcl.h" 114#include "bn_lcl.h"
115 115
116#include <stdlib.h>
117#ifdef _WIN32
118# include <malloc.h>
119# ifndef alloca
120# define alloca _alloca
121# endif
122#elif defined(__GNUC__)
123# ifndef alloca
124# define alloca(s) __builtin_alloca((s))
125# endif
126#endif
127
116/* maximum precomputation table size for *variable* sliding windows */ 128/* maximum precomputation table size for *variable* sliding windows */
117#define TABLE_SIZE 32 129#define TABLE_SIZE 32
118 130
@@ -522,23 +534,17 @@ err:
522 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 534 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
523 * from/to that table. */ 535 * from/to that table. */
524 536
525static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) 537static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width)
526 { 538 {
527 size_t i, j; 539 size_t i, j;
528 540
529 if (bn_wexpand(b, top) == NULL) 541 if (top > b->top)
530 return 0; 542 top = b->top; /* this works because 'buf' is explicitly zeroed */
531 while (b->top < top)
532 {
533 b->d[b->top++] = 0;
534 }
535
536 for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) 543 for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
537 { 544 {
538 buf[j] = ((unsigned char*)b->d)[i]; 545 buf[j] = ((unsigned char*)b->d)[i];
539 } 546 }
540 547
541 bn_correct_top(b);
542 return 1; 548 return 1;
543 } 549 }
544 550
@@ -561,7 +567,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf
561 567
562/* Given a pointer value, compute the next address that is a cache line multiple. */ 568/* Given a pointer value, compute the next address that is a cache line multiple. */
563#define MOD_EXP_CTIME_ALIGN(x_) \ 569#define MOD_EXP_CTIME_ALIGN(x_) \
564 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 570 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
565 571
566/* This variant of BN_mod_exp_mont() uses fixed windows and the special 572/* This variant of BN_mod_exp_mont() uses fixed windows and the special
567 * precomputation memory layout to limit data-dependency to a minimum 573 * precomputation memory layout to limit data-dependency to a minimum
@@ -572,17 +578,15 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf
572int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 578int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
573 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 579 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
574 { 580 {
575 int i,bits,ret=0,idx,window,wvalue; 581 int i,bits,ret=0,window,wvalue;
576 int top; 582 int top;
577 BIGNUM *r;
578 const BIGNUM *aa;
579 BN_MONT_CTX *mont=NULL; 583 BN_MONT_CTX *mont=NULL;
580 584
581 int numPowers; 585 int numPowers;
582 unsigned char *powerbufFree=NULL; 586 unsigned char *powerbufFree=NULL;
583 int powerbufLen = 0; 587 int powerbufLen = 0;
584 unsigned char *powerbuf=NULL; 588 unsigned char *powerbuf=NULL;
585 BIGNUM *computeTemp=NULL, *am=NULL; 589 BIGNUM tmp, am;
586 590
587 bn_check_top(a); 591 bn_check_top(a);
588 bn_check_top(p); 592 bn_check_top(p);
@@ -602,10 +606,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
602 return ret; 606 return ret;
603 } 607 }
604 608
605 /* Initialize BIGNUM context and allocate intermediate result */
606 BN_CTX_start(ctx); 609 BN_CTX_start(ctx);
607 r = BN_CTX_get(ctx);
608 if (r == NULL) goto err;
609 610
610 /* Allocate a montgomery context if it was not supplied by the caller. 611 /* Allocate a montgomery context if it was not supplied by the caller.
611 * If this is not done, things will break in the montgomery part. 612 * If this is not done, things will break in the montgomery part.
@@ -620,40 +621,154 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
620 621
621 /* Get the window size to use with size of p. */ 622 /* Get the window size to use with size of p. */
622 window = BN_window_bits_for_ctime_exponent_size(bits); 623 window = BN_window_bits_for_ctime_exponent_size(bits);
624#if defined(OPENSSL_BN_ASM_MONT5)
625 if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */
626#endif
623 627
624 /* Allocate a buffer large enough to hold all of the pre-computed 628 /* Allocate a buffer large enough to hold all of the pre-computed
625 * powers of a. 629 * powers of am, am itself and tmp.
626 */ 630 */
627 numPowers = 1 << window; 631 numPowers = 1 << window;
628 powerbufLen = sizeof(m->d[0])*top*numPowers; 632 powerbufLen = sizeof(m->d[0])*(top*numPowers +
633 ((2*top)>numPowers?(2*top):numPowers));
634#ifdef alloca
635 if (powerbufLen < 3072)
636 powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
637 else
638#endif
629 if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) 639 if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
630 goto err; 640 goto err;
631 641
632 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 642 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
633 memset(powerbuf, 0, powerbufLen); 643 memset(powerbuf, 0, powerbufLen);
634 644
635 /* Initialize the intermediate result. Do this early to save double conversion, 645#ifdef alloca
636 * once each for a^0 and intermediate result. 646 if (powerbufLen < 3072)
637 */ 647 powerbufFree = NULL;
638 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; 648#endif
639 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
640 649
641 /* Initialize computeTemp as a^1 with montgomery precalcs */ 650 /* lay down tmp and am right after powers table */
642 computeTemp = BN_CTX_get(ctx); 651 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
643 am = BN_CTX_get(ctx); 652 am.d = tmp.d + top;
644 if (computeTemp==NULL || am==NULL) goto err; 653 tmp.top = am.top = 0;
654 tmp.dmax = am.dmax = top;
655 tmp.neg = am.neg = 0;
656 tmp.flags = am.flags = BN_FLG_STATIC_DATA;
657
658 /* prepare a^0 in Montgomery domain */
659#if 1
660 if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err;
661#else
662 tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */
663 for (i=1;i<top;i++)
664 tmp.d[i] = (~m->d[i])&BN_MASK2;
665 tmp.top = top;
666#endif
645 667
668 /* prepare a^1 in Montgomery domain */
646 if (a->neg || BN_ucmp(a,m) >= 0) 669 if (a->neg || BN_ucmp(a,m) >= 0)
647 { 670 {
648 if (!BN_mod(am,a,m,ctx)) 671 if (!BN_mod(&am,a,m,ctx)) goto err;
649 goto err; 672 if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err;
650 aa= am;
651 } 673 }
652 else 674 else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err;
653 aa=a; 675
654 if (!BN_to_montgomery(am,aa,mont,ctx)) goto err; 676#if defined(OPENSSL_BN_ASM_MONT5)
655 if (!BN_copy(computeTemp, am)) goto err; 677 /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
656 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err; 678 * specifically optimization of cache-timing attack countermeasures
679 * and pre-computation optimization. */
680
681 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
682 * 512-bit RSA is hardly relevant, we omit it to spare size... */
683 if (window==5)
684 {
685 void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
686 const void *table,const BN_ULONG *np,
687 const BN_ULONG *n0,int num,int power);
688 void bn_scatter5(const BN_ULONG *inp,size_t num,
689 void *table,size_t power);
690 void bn_gather5(BN_ULONG *out,size_t num,
691 void *table,size_t power);
692
693 BN_ULONG *np=mont->N.d, *n0=mont->n0;
694
695 /* BN_to_montgomery can contaminate words above .top
696 * [in BN_DEBUG[_DEBUG] build]... */
697 for (i=am.top; i<top; i++) am.d[i]=0;
698 for (i=tmp.top; i<top; i++) tmp.d[i]=0;
699
700 bn_scatter5(tmp.d,top,powerbuf,0);
701 bn_scatter5(am.d,am.top,powerbuf,1);
702 bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
703 bn_scatter5(tmp.d,top,powerbuf,2);
704
705#if 0
706 for (i=3; i<32; i++)
707 {
708 /* Calculate a^i = a^(i-1) * a */
709 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
710 bn_scatter5(tmp.d,top,powerbuf,i);
711 }
712#else
713 /* same as above, but uses squaring for 1/2 of operations */
714 for (i=4; i<32; i*=2)
715 {
716 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
717 bn_scatter5(tmp.d,top,powerbuf,i);
718 }
719 for (i=3; i<8; i+=2)
720 {
721 int j;
722 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
723 bn_scatter5(tmp.d,top,powerbuf,i);
724 for (j=2*i; j<32; j*=2)
725 {
726 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
727 bn_scatter5(tmp.d,top,powerbuf,j);
728 }
729 }
730 for (; i<16; i+=2)
731 {
732 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
733 bn_scatter5(tmp.d,top,powerbuf,i);
734 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
735 bn_scatter5(tmp.d,top,powerbuf,2*i);
736 }
737 for (; i<32; i+=2)
738 {
739 bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
740 bn_scatter5(tmp.d,top,powerbuf,i);
741 }
742#endif
743 bits--;
744 for (wvalue=0, i=bits%5; i>=0; i--,bits--)
745 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
746 bn_gather5(tmp.d,top,powerbuf,wvalue);
747
748 /* Scan the exponent one window at a time starting from the most
749 * significant bits.
750 */
751 while (bits >= 0)
752 {
753 for (wvalue=0, i=0; i<5; i++,bits--)
754 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
755
756 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
757 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
758 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
759 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
760 bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
761 bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
762 }
763
764 tmp.top=top;
765 bn_correct_top(&tmp);
766 }
767 else
768#endif
769 {
770 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err;
771 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err;
657 772
658 /* If the window size is greater than 1, then calculate 773 /* If the window size is greater than 1, then calculate
659 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 774 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
@@ -662,62 +777,54 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
662 */ 777 */
663 if (window > 1) 778 if (window > 1)
664 { 779 {
665 for (i=2; i<numPowers; i++) 780 if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err;
781 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err;
782 for (i=3; i<numPowers; i++)
666 { 783 {
667 /* Calculate a^i = a^(i-1) * a */ 784 /* Calculate a^i = a^(i-1) * a */
668 if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx)) 785 if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx))
669 goto err; 786 goto err;
670 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err; 787 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err;
671 } 788 }
672 } 789 }
673 790
674 /* Adjust the number of bits up to a multiple of the window size. 791 bits--;
675 * If the exponent length is not a multiple of the window size, then 792 for (wvalue=0, i=bits%window; i>=0; i--,bits--)
676 * this pads the most significant bits with zeros to normalize the 793 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
677 * scanning loop to there's no special cases. 794 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err;
678 * 795
679 * * NOTE: Making the window size a power of two less than the native 796 /* Scan the exponent one window at a time starting from the most
680 * * word size ensures that the padded bits won't go past the last 797 * significant bits.
681 * * word in the internal BIGNUM structure. Going past the end will 798 */
682 * * still produce the correct result, but causes a different branch 799 while (bits >= 0)
683 * * to be taken in the BN_is_bit_set function.
684 */
685 bits = ((bits+window-1)/window)*window;
686 idx=bits-1; /* The top bit of the window */
687
688 /* Scan the exponent one window at a time starting from the most
689 * significant bits.
690 */
691 while (idx >= 0)
692 { 800 {
693 wvalue=0; /* The 'value' of the window */ 801 wvalue=0; /* The 'value' of the window */
694 802
695 /* Scan the window, squaring the result as we go */ 803 /* Scan the window, squaring the result as we go */
696 for (i=0; i<window; i++,idx--) 804 for (i=0; i<window; i++,bits--)
697 { 805 {
698 if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err; 806 if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx)) goto err;
699 wvalue = (wvalue<<1)+BN_is_bit_set(p,idx); 807 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
700 } 808 }
701 809
702 /* Fetch the appropriate pre-computed value from the pre-buf */ 810 /* Fetch the appropriate pre-computed value from the pre-buf */
703 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err; 811 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err;
704 812
705 /* Multiply the result into the intermediate result */ 813 /* Multiply the result into the intermediate result */
706 if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err; 814 if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err;
707 } 815 }
816 }
708 817
709 /* Convert the final result from montgomery to standard format */ 818 /* Convert the final result from montgomery to standard format */
710 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; 819 if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
711 ret=1; 820 ret=1;
712err: 821err:
713 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 822 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
714 if (powerbuf!=NULL) 823 if (powerbuf!=NULL)
715 { 824 {
716 OPENSSL_cleanse(powerbuf,powerbufLen); 825 OPENSSL_cleanse(powerbuf,powerbufLen);
717 OPENSSL_free(powerbufFree); 826 if (powerbufFree) OPENSSL_free(powerbufFree);
718 } 827 }
719 if (am!=NULL) BN_clear(am);
720 if (computeTemp!=NULL) BN_clear(computeTemp);
721 BN_CTX_end(ctx); 828 BN_CTX_end(ctx);
722 return(ret); 829 return(ret);
723 } 830 }
@@ -988,4 +1095,3 @@ err:
988 bn_check_top(r); 1095 bn_check_top(r);
989 return(ret); 1096 return(ret);
990 } 1097 }
991
diff --git a/src/lib/libcrypto/bn/bn_gf2m.c b/src/lib/libcrypto/bn/bn_gf2m.c
index 432a3aa338..8a4dc20ad9 100644
--- a/src/lib/libcrypto/bn/bn_gf2m.c
+++ b/src/lib/libcrypto/bn/bn_gf2m.c
@@ -94,6 +94,8 @@
94#include "cryptlib.h" 94#include "cryptlib.h"
95#include "bn_lcl.h" 95#include "bn_lcl.h"
96 96
97#ifndef OPENSSL_NO_EC2M
98
97/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */ 99/* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */
98#define MAX_ITERATIONS 50 100#define MAX_ITERATIONS 50
99 101
@@ -122,6 +124,7 @@ static const BN_ULONG SQR_tb[16] =
122 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] 124 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
123#endif 125#endif
124 126
127#if !defined(OPENSSL_BN_ASM_GF2m)
125/* Product of two polynomials a, b each with degree < BN_BITS2 - 1, 128/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
126 * result is a polynomial r with degree < 2 * BN_BITS - 1 129 * result is a polynomial r with degree < 2 * BN_BITS - 1
127 * The caller MUST ensure that the variables have the right amount 130 * The caller MUST ensure that the variables have the right amount
@@ -216,7 +219,9 @@ static void bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, c
216 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ 219 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
217 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ 220 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
218 } 221 }
219 222#else
223void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
224#endif
220 225
221/* Add polynomials a and b and store result in r; r could be a or b, a and b 226/* Add polynomials a and b and store result in r; r could be a or b, a and b
222 * could be equal; r is the bitwise XOR of a and b. 227 * could be equal; r is the bitwise XOR of a and b.
@@ -360,21 +365,17 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
360int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) 365int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
361 { 366 {
362 int ret = 0; 367 int ret = 0;
363 const int max = BN_num_bits(p) + 1; 368 int arr[6];
364 int *arr=NULL;
365 bn_check_top(a); 369 bn_check_top(a);
366 bn_check_top(p); 370 bn_check_top(p);
367 if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err; 371 ret = BN_GF2m_poly2arr(p, arr, sizeof(arr)/sizeof(arr[0]));
368 ret = BN_GF2m_poly2arr(p, arr, max); 372 if (!ret || ret > (int)(sizeof(arr)/sizeof(arr[0])))
369 if (!ret || ret > max)
370 { 373 {
371 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH); 374 BNerr(BN_F_BN_GF2M_MOD,BN_R_INVALID_LENGTH);
372 goto err; 375 return 0;
373 } 376 }
374 ret = BN_GF2m_mod_arr(r, a, arr); 377 ret = BN_GF2m_mod_arr(r, a, arr);
375 bn_check_top(r); 378 bn_check_top(r);
376err:
377 if (arr) OPENSSL_free(arr);
378 return ret; 379 return ret;
379 } 380 }
380 381
@@ -521,7 +522,7 @@ err:
521 */ 522 */
522int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 523int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
523 { 524 {
524 BIGNUM *b, *c, *u, *v, *tmp; 525 BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;
525 int ret = 0; 526 int ret = 0;
526 527
527 bn_check_top(a); 528 bn_check_top(a);
@@ -529,18 +530,18 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
529 530
530 BN_CTX_start(ctx); 531 BN_CTX_start(ctx);
531 532
532 b = BN_CTX_get(ctx); 533 if ((b = BN_CTX_get(ctx))==NULL) goto err;
533 c = BN_CTX_get(ctx); 534 if ((c = BN_CTX_get(ctx))==NULL) goto err;
534 u = BN_CTX_get(ctx); 535 if ((u = BN_CTX_get(ctx))==NULL) goto err;
535 v = BN_CTX_get(ctx); 536 if ((v = BN_CTX_get(ctx))==NULL) goto err;
536 if (v == NULL) goto err;
537 537
538 if (!BN_one(b)) goto err;
539 if (!BN_GF2m_mod(u, a, p)) goto err; 538 if (!BN_GF2m_mod(u, a, p)) goto err;
540 if (!BN_copy(v, p)) goto err;
541
542 if (BN_is_zero(u)) goto err; 539 if (BN_is_zero(u)) goto err;
543 540
541 if (!BN_copy(v, p)) goto err;
542#if 0
543 if (!BN_one(b)) goto err;
544
544 while (1) 545 while (1)
545 { 546 {
546 while (!BN_is_odd(u)) 547 while (!BN_is_odd(u))
@@ -565,13 +566,89 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
565 if (!BN_GF2m_add(u, u, v)) goto err; 566 if (!BN_GF2m_add(u, u, v)) goto err;
566 if (!BN_GF2m_add(b, b, c)) goto err; 567 if (!BN_GF2m_add(b, b, c)) goto err;
567 } 568 }
569#else
570 {
571 int i, ubits = BN_num_bits(u),
572 vbits = BN_num_bits(v), /* v is copy of p */
573 top = p->top;
574 BN_ULONG *udp,*bdp,*vdp,*cdp;
575
576 bn_wexpand(u,top); udp = u->d;
577 for (i=u->top;i<top;i++) udp[i] = 0;
578 u->top = top;
579 bn_wexpand(b,top); bdp = b->d;
580 bdp[0] = 1;
581 for (i=1;i<top;i++) bdp[i] = 0;
582 b->top = top;
583 bn_wexpand(c,top); cdp = c->d;
584 for (i=0;i<top;i++) cdp[i] = 0;
585 c->top = top;
586 vdp = v->d; /* It pays off to "cache" *->d pointers, because
587 * it allows optimizer to be more aggressive.
588 * But we don't have to "cache" p->d, because *p
589 * is declared 'const'... */
590 while (1)
591 {
592 while (ubits && !(udp[0]&1))
593 {
594 BN_ULONG u0,u1,b0,b1,mask;
595
596 u0 = udp[0];
597 b0 = bdp[0];
598 mask = (BN_ULONG)0-(b0&1);
599 b0 ^= p->d[0]&mask;
600 for (i=0;i<top-1;i++)
601 {
602 u1 = udp[i+1];
603 udp[i] = ((u0>>1)|(u1<<(BN_BITS2-1)))&BN_MASK2;
604 u0 = u1;
605 b1 = bdp[i+1]^(p->d[i+1]&mask);
606 bdp[i] = ((b0>>1)|(b1<<(BN_BITS2-1)))&BN_MASK2;
607 b0 = b1;
608 }
609 udp[i] = u0>>1;
610 bdp[i] = b0>>1;
611 ubits--;
612 }
568 613
614 if (ubits<=BN_BITS2 && udp[0]==1) break;
615
616 if (ubits<vbits)
617 {
618 i = ubits; ubits = vbits; vbits = i;
619 tmp = u; u = v; v = tmp;
620 tmp = b; b = c; c = tmp;
621 udp = vdp; vdp = v->d;
622 bdp = cdp; cdp = c->d;
623 }
624 for(i=0;i<top;i++)
625 {
626 udp[i] ^= vdp[i];
627 bdp[i] ^= cdp[i];
628 }
629 if (ubits==vbits)
630 {
631 BN_ULONG ul;
632 int utop = (ubits-1)/BN_BITS2;
633
634 while ((ul=udp[utop])==0 && utop) utop--;
635 ubits = utop*BN_BITS2 + BN_num_bits_word(ul);
636 }
637 }
638 bn_correct_top(b);
639 }
640#endif
569 641
570 if (!BN_copy(r, b)) goto err; 642 if (!BN_copy(r, b)) goto err;
571 bn_check_top(r); 643 bn_check_top(r);
572 ret = 1; 644 ret = 1;
573 645
574err: 646err:
647#ifdef BN_DEBUG /* BN_CTX_end would complain about the expanded form */
648 bn_correct_top(c);
649 bn_correct_top(u);
650 bn_correct_top(v);
651#endif
575 BN_CTX_end(ctx); 652 BN_CTX_end(ctx);
576 return ret; 653 return ret;
577 } 654 }
@@ -1033,3 +1110,4 @@ int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
1033 return 1; 1110 return 1;
1034 } 1111 }
1035 1112
1113#endif
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h
index 8e5e98e3f2..eecfd8cc99 100644
--- a/src/lib/libcrypto/bn/bn_lcl.h
+++ b/src/lib/libcrypto/bn/bn_lcl.h
@@ -238,7 +238,7 @@ extern "C" {
238# if defined(__DECC) 238# if defined(__DECC)
239# include <c_asm.h> 239# include <c_asm.h>
240# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) 240# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
241# elif defined(__GNUC__) 241# elif defined(__GNUC__) && __GNUC__>=2
242# define BN_UMULT_HIGH(a,b) ({ \ 242# define BN_UMULT_HIGH(a,b) ({ \
243 register BN_ULONG ret; \ 243 register BN_ULONG ret; \
244 asm ("umulh %1,%2,%0" \ 244 asm ("umulh %1,%2,%0" \
@@ -247,7 +247,7 @@ extern "C" {
247 ret; }) 247 ret; })
248# endif /* compiler */ 248# endif /* compiler */
249# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) 249# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
250# if defined(__GNUC__) 250# if defined(__GNUC__) && __GNUC__>=2
251# define BN_UMULT_HIGH(a,b) ({ \ 251# define BN_UMULT_HIGH(a,b) ({ \
252 register BN_ULONG ret; \ 252 register BN_ULONG ret; \
253 asm ("mulhdu %0,%1,%2" \ 253 asm ("mulhdu %0,%1,%2" \
@@ -257,7 +257,7 @@ extern "C" {
257# endif /* compiler */ 257# endif /* compiler */
258# elif (defined(__x86_64) || defined(__x86_64__)) && \ 258# elif (defined(__x86_64) || defined(__x86_64__)) && \
259 (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) 259 (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
260# if defined(__GNUC__) 260# if defined(__GNUC__) && __GNUC__>=2
261# define BN_UMULT_HIGH(a,b) ({ \ 261# define BN_UMULT_HIGH(a,b) ({ \
262 register BN_ULONG ret,discard; \ 262 register BN_ULONG ret,discard; \
263 asm ("mulq %3" \ 263 asm ("mulq %3" \
@@ -280,6 +280,19 @@ extern "C" {
280# define BN_UMULT_HIGH(a,b) __umulh((a),(b)) 280# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
281# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) 281# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
282# endif 282# endif
283# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
284# if defined(__GNUC__) && __GNUC__>=2
285# define BN_UMULT_HIGH(a,b) ({ \
286 register BN_ULONG ret; \
287 asm ("dmultu %1,%2" \
288 : "=h"(ret) \
289 : "r"(a), "r"(b) : "l"); \
290 ret; })
291# define BN_UMULT_LOHI(low,high,a,b) \
292 asm ("dmultu %2,%3" \
293 : "=l"(low),"=h"(high) \
294 : "r"(a), "r"(b));
295# endif
283# endif /* cpu */ 296# endif /* cpu */
284#endif /* OPENSSL_NO_ASM */ 297#endif /* OPENSSL_NO_ASM */
285 298
@@ -459,6 +472,10 @@ extern "C" {
459 } 472 }
460#endif /* !BN_LLONG */ 473#endif /* !BN_LLONG */
461 474
475#if defined(OPENSSL_DOING_MAKEDEPEND) && defined(OPENSSL_FIPS)
476#undef bn_div_words
477#endif
478
462void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb); 479void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
463void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); 480void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
464void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); 481void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
diff --git a/src/lib/libcrypto/bn/bn_lib.c b/src/lib/libcrypto/bn/bn_lib.c
index 5470fbe6ef..7a5676de69 100644
--- a/src/lib/libcrypto/bn/bn_lib.c
+++ b/src/lib/libcrypto/bn/bn_lib.c
@@ -139,25 +139,6 @@ const BIGNUM *BN_value_one(void)
139 return(&const_one); 139 return(&const_one);
140 } 140 }
141 141
142char *BN_options(void)
143 {
144 static int init=0;
145 static char data[16];
146
147 if (!init)
148 {
149 init++;
150#ifdef BN_LLONG
151 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
152 (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
153#else
154 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
155 (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
156#endif
157 }
158 return(data);
159 }
160
161int BN_num_bits_word(BN_ULONG l) 142int BN_num_bits_word(BN_ULONG l)
162 { 143 {
163 static const unsigned char bits[256]={ 144 static const unsigned char bits[256]={
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index 1a866880f5..427b5cf4df 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -177,31 +177,26 @@ err:
177static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) 177static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
178 { 178 {
179 BIGNUM *n; 179 BIGNUM *n;
180 BN_ULONG *ap,*np,*rp,n0,v,*nrp; 180 BN_ULONG *ap,*np,*rp,n0,v,carry;
181 int al,nl,max,i,x,ri; 181 int nl,max,i;
182 182
183 n= &(mont->N); 183 n= &(mont->N);
184 /* mont->ri is the size of mont->N in bits (rounded up
185 to the word size) */
186 al=ri=mont->ri/BN_BITS2;
187
188 nl=n->top; 184 nl=n->top;
189 if ((al == 0) || (nl == 0)) { ret->top=0; return(1); } 185 if (nl == 0) { ret->top=0; return(1); }
190 186
191 max=(nl+al+1); /* allow for overflow (no?) XXX */ 187 max=(2*nl); /* carry is stored separately */
192 if (bn_wexpand(r,max) == NULL) return(0); 188 if (bn_wexpand(r,max) == NULL) return(0);
193 189
194 r->neg^=n->neg; 190 r->neg^=n->neg;
195 np=n->d; 191 np=n->d;
196 rp=r->d; 192 rp=r->d;
197 nrp= &(r->d[nl]);
198 193
199 /* clear the top words of T */ 194 /* clear the top words of T */
200#if 1 195#if 1
201 for (i=r->top; i<max; i++) /* memset? XXX */ 196 for (i=r->top; i<max; i++) /* memset? XXX */
202 r->d[i]=0; 197 rp[i]=0;
203#else 198#else
204 memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG)); 199 memset(&(rp[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
205#endif 200#endif
206 201
207 r->top=max; 202 r->top=max;
@@ -210,7 +205,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
210#ifdef BN_COUNT 205#ifdef BN_COUNT
211 fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); 206 fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
212#endif 207#endif
213 for (i=0; i<nl; i++) 208 for (carry=0, i=0; i<nl; i++, rp++)
214 { 209 {
215#ifdef __TANDEM 210#ifdef __TANDEM
216 { 211 {
@@ -228,61 +223,33 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
228#else 223#else
229 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); 224 v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
230#endif 225#endif
231 nrp++; 226 v = (v+carry+rp[nl])&BN_MASK2;
232 rp++; 227 carry |= (v != rp[nl]);
233 if (((nrp[-1]+=v)&BN_MASK2) >= v) 228 carry &= (v <= rp[nl]);
234 continue; 229 rp[nl]=v;
235 else
236 {
237 if (((++nrp[0])&BN_MASK2) != 0) continue;
238 if (((++nrp[1])&BN_MASK2) != 0) continue;
239 for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
240 }
241 }
242 bn_correct_top(r);
243
244 /* mont->ri will be a multiple of the word size and below code
245 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
246 if (r->top <= ri)
247 {
248 ret->top=0;
249 return(1);
250 } 230 }
251 al=r->top-ri;
252 231
253#define BRANCH_FREE 1 232 if (bn_wexpand(ret,nl) == NULL) return(0);
254#if BRANCH_FREE 233 ret->top=nl;
255 if (bn_wexpand(ret,ri) == NULL) return(0);
256 x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
257 ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
258 ret->neg=r->neg; 234 ret->neg=r->neg;
259 235
260 rp=ret->d; 236 rp=ret->d;
261 ap=&(r->d[ri]); 237 ap=&(r->d[nl]);
262 238
239#define BRANCH_FREE 1
240#if BRANCH_FREE
263 { 241 {
264 size_t m1,m2; 242 BN_ULONG *nrp;
265 243 size_t m;
266 v=bn_sub_words(rp,ap,np,ri);
267 /* this ----------------^^ works even in al<ri case
268 * thanks to zealous zeroing of top of the vector in the
269 * beginning. */
270 244
271 /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */ 245 v=bn_sub_words(rp,ap,np,nl)-carry;
272 /* in other words if subtraction result is real, then 246 /* if subtraction result is real, then
273 * trick unconditional memcpy below to perform in-place 247 * trick unconditional memcpy below to perform in-place
274 * "refresh" instead of actual copy. */ 248 * "refresh" instead of actual copy. */
275 m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */ 249 m=(0-(size_t)v);
276 m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */ 250 nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m)|((PTR_SIZE_INT)ap&m));
277 m1|=m2; /* (al!=ri) */
278 m1|=(0-(size_t)v); /* (al!=ri || v) */
279 m1&=~m2; /* (al!=ri || v) && !al>ri */
280 nrp=(BN_ULONG *)(((PTR_SIZE_INT)rp&~m1)|((PTR_SIZE_INT)ap&m1));
281 }
282 251
283 /* 'i<ri' is chosen to eliminate dependency on input data, even 252 for (i=0,nl-=4; i<nl; i+=4)
284 * though it results in redundant copy in al<ri case. */
285 for (i=0,ri-=4; i<ri; i+=4)
286 { 253 {
287 BN_ULONG t1,t2,t3,t4; 254 BN_ULONG t1,t2,t3,t4;
288 255
@@ -295,40 +262,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
295 rp[i+2]=t3; 262 rp[i+2]=t3;
296 rp[i+3]=t4; 263 rp[i+3]=t4;
297 } 264 }
298 for (ri+=4; i<ri; i++) 265 for (nl+=4; i<nl; i++)
299 rp[i]=nrp[i], ap[i]=0; 266 rp[i]=nrp[i], ap[i]=0;
300 bn_correct_top(r); 267 }
301 bn_correct_top(ret);
302#else 268#else
303 if (bn_wexpand(ret,al) == NULL) return(0); 269 if (bn_sub_words (rp,ap,np,nl)-carry)
304 ret->top=al; 270 memcpy(rp,ap,nl*sizeof(BN_ULONG));
305 ret->neg=r->neg;
306
307 rp=ret->d;
308 ap=&(r->d[ri]);
309 al-=4;
310 for (i=0; i<al; i+=4)
311 {
312 BN_ULONG t1,t2,t3,t4;
313
314 t1=ap[i+0];
315 t2=ap[i+1];
316 t3=ap[i+2];
317 t4=ap[i+3];
318 rp[i+0]=t1;
319 rp[i+1]=t2;
320 rp[i+2]=t3;
321 rp[i+3]=t4;
322 }
323 al+=4;
324 for (; i<al; i++)
325 rp[i]=ap[i];
326
327 if (BN_ucmp(ret, &(mont->N)) >= 0)
328 {
329 if (!BN_usub(ret,ret,&(mont->N))) return(0);
330 }
331#endif 271#endif
272 bn_correct_top(r);
273 bn_correct_top(ret);
332 bn_check_top(ret); 274 bn_check_top(ret);
333 275
334 return(1); 276 return(1);
diff --git a/src/lib/libcrypto/bn/bn_nist.c b/src/lib/libcrypto/bn/bn_nist.c
index c6de032696..43caee4770 100644
--- a/src/lib/libcrypto/bn/bn_nist.c
+++ b/src/lib/libcrypto/bn/bn_nist.c
@@ -319,6 +319,13 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
319 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l))) 319 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
320#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0)); 320#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
321#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n) 321#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
322# if defined(L_ENDIAN)
323# if defined(__arch64__)
324# define NIST_INT64 long
325# else
326# define NIST_INT64 long long
327# endif
328# endif
322#else 329#else
323#define bn_cp_64(to, n, from, m) \ 330#define bn_cp_64(to, n, from, m) \
324 { \ 331 { \
@@ -330,13 +337,15 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
330 bn_32_set_0(to, (n)*2); \ 337 bn_32_set_0(to, (n)*2); \
331 bn_32_set_0(to, (n)*2+1); \ 338 bn_32_set_0(to, (n)*2+1); \
332 } 339 }
333#if BN_BITS2 == 32
334#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0; 340#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
335#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0; 341#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
336#endif 342# if defined(_WIN32) && !defined(__GNUC__)
343# define NIST_INT64 __int64
344# elif defined(BN_LLONG)
345# define NIST_INT64 long long
346# endif
337#endif /* BN_BITS2 != 64 */ 347#endif /* BN_BITS2 != 64 */
338 348
339
340#define nist_set_192(to, from, a1, a2, a3) \ 349#define nist_set_192(to, from, a1, a2, a3) \
341 { \ 350 { \
342 bn_cp_64(to, 0, from, (a3) - 3) \ 351 bn_cp_64(to, 0, from, (a3) - 3) \
@@ -350,9 +359,11 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
350 int top = a->top, i; 359 int top = a->top, i;
351 int carry; 360 int carry;
352 register BN_ULONG *r_d, *a_d = a->d; 361 register BN_ULONG *r_d, *a_d = a->d;
353 BN_ULONG t_d[BN_NIST_192_TOP], 362 union {
354 buf[BN_NIST_192_TOP], 363 BN_ULONG bn[BN_NIST_192_TOP];
355 c_d[BN_NIST_192_TOP], 364 unsigned int ui[BN_NIST_192_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
365 } buf;
366 BN_ULONG c_d[BN_NIST_192_TOP],
356 *res; 367 *res;
357 PTR_SIZE_INT mask; 368 PTR_SIZE_INT mask;
358 static const BIGNUM _bignum_nist_p_192_sqr = { 369 static const BIGNUM _bignum_nist_p_192_sqr = {
@@ -385,15 +396,48 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
385 else 396 else
386 r_d = a_d; 397 r_d = a_d;
387 398
388 nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP); 399 nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
400
401#if defined(NIST_INT64)
402 {
403 NIST_INT64 acc; /* accumulator */
404 unsigned int *rp=(unsigned int *)r_d;
405 const unsigned int *bp=(const unsigned int *)buf.ui;
406
407 acc = rp[0]; acc += bp[3*2-6];
408 acc += bp[5*2-6]; rp[0] = (unsigned int)acc; acc >>= 32;
409
410 acc += rp[1]; acc += bp[3*2-5];
411 acc += bp[5*2-5]; rp[1] = (unsigned int)acc; acc >>= 32;
389 412
390 nist_set_192(t_d, buf, 0, 3, 3); 413 acc += rp[2]; acc += bp[3*2-6];
414 acc += bp[4*2-6];
415 acc += bp[5*2-6]; rp[2] = (unsigned int)acc; acc >>= 32;
416
417 acc += rp[3]; acc += bp[3*2-5];
418 acc += bp[4*2-5];
419 acc += bp[5*2-5]; rp[3] = (unsigned int)acc; acc >>= 32;
420
421 acc += rp[4]; acc += bp[4*2-6];
422 acc += bp[5*2-6]; rp[4] = (unsigned int)acc; acc >>= 32;
423
424 acc += rp[5]; acc += bp[4*2-5];
425 acc += bp[5*2-5]; rp[5] = (unsigned int)acc;
426
427 carry = (int)(acc>>32);
428 }
429#else
430 {
431 BN_ULONG t_d[BN_NIST_192_TOP];
432
433 nist_set_192(t_d, buf.bn, 0, 3, 3);
391 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); 434 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
392 nist_set_192(t_d, buf, 4, 4, 0); 435 nist_set_192(t_d, buf.bn, 4, 4, 0);
393 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); 436 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
394 nist_set_192(t_d, buf, 5, 5, 5) 437 nist_set_192(t_d, buf.bn, 5, 5, 5)
395 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP); 438 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
396 439 }
440#endif
397 if (carry > 0) 441 if (carry > 0)
398 carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP); 442 carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
399 else 443 else
@@ -435,8 +479,7 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
435 int top = a->top, i; 479 int top = a->top, i;
436 int carry; 480 int carry;
437 BN_ULONG *r_d, *a_d = a->d; 481 BN_ULONG *r_d, *a_d = a->d;
438 BN_ULONG t_d[BN_NIST_224_TOP], 482 BN_ULONG buf[BN_NIST_224_TOP],
439 buf[BN_NIST_224_TOP],
440 c_d[BN_NIST_224_TOP], 483 c_d[BN_NIST_224_TOP],
441 *res; 484 *res;
442 PTR_SIZE_INT mask; 485 PTR_SIZE_INT mask;
@@ -474,14 +517,54 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
474 517
475#if BN_BITS2==64 518#if BN_BITS2==64
476 /* copy upper 256 bits of 448 bit number ... */ 519 /* copy upper 256 bits of 448 bit number ... */
477 nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP); 520 nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
478 /* ... and right shift by 32 to obtain upper 224 bits */ 521 /* ... and right shift by 32 to obtain upper 224 bits */
479 nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8); 522 nist_set_224(buf, c_d, 14, 13, 12, 11, 10, 9, 8);
480 /* truncate lower part to 224 bits too */ 523 /* truncate lower part to 224 bits too */
481 r_d[BN_NIST_224_TOP-1] &= BN_MASK2l; 524 r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
482#else 525#else
483 nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); 526 nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
484#endif 527#endif
528
529#if defined(NIST_INT64) && BN_BITS2!=64
530 {
531 NIST_INT64 acc; /* accumulator */
532 unsigned int *rp=(unsigned int *)r_d;
533 const unsigned int *bp=(const unsigned int *)buf;
534
535 acc = rp[0]; acc -= bp[7-7];
536 acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32;
537
538 acc += rp[1]; acc -= bp[8-7];
539 acc -= bp[12-7]; rp[1] = (unsigned int)acc; acc >>= 32;
540
541 acc += rp[2]; acc -= bp[9-7];
542 acc -= bp[13-7]; rp[2] = (unsigned int)acc; acc >>= 32;
543
544 acc += rp[3]; acc += bp[7-7];
545 acc += bp[11-7];
546 acc -= bp[10-7]; rp[3] = (unsigned int)acc; acc>>= 32;
547
548 acc += rp[4]; acc += bp[8-7];
549 acc += bp[12-7];
550 acc -= bp[11-7]; rp[4] = (unsigned int)acc; acc >>= 32;
551
552 acc += rp[5]; acc += bp[9-7];
553 acc += bp[13-7];
554 acc -= bp[12-7]; rp[5] = (unsigned int)acc; acc >>= 32;
555
556 acc += rp[6]; acc += bp[10-7];
557 acc -= bp[13-7]; rp[6] = (unsigned int)acc;
558
559 carry = (int)(acc>>32);
560# if BN_BITS2==64
561 rp[7] = carry;
562# endif
563 }
564#else
565 {
566 BN_ULONG t_d[BN_NIST_224_TOP];
567
485 nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0); 568 nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
486 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP); 569 carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
487 nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0); 570 nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
@@ -494,6 +577,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
494#if BN_BITS2==64 577#if BN_BITS2==64
495 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32); 578 carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
496#endif 579#endif
580 }
581#endif
497 u.f = bn_sub_words; 582 u.f = bn_sub_words;
498 if (carry > 0) 583 if (carry > 0)
499 { 584 {
@@ -548,9 +633,11 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
548 int i, top = a->top; 633 int i, top = a->top;
549 int carry = 0; 634 int carry = 0;
550 register BN_ULONG *a_d = a->d, *r_d; 635 register BN_ULONG *a_d = a->d, *r_d;
551 BN_ULONG t_d[BN_NIST_256_TOP], 636 union {
552 buf[BN_NIST_256_TOP], 637 BN_ULONG bn[BN_NIST_256_TOP];
553 c_d[BN_NIST_256_TOP], 638 unsigned int ui[BN_NIST_256_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
639 } buf;
640 BN_ULONG c_d[BN_NIST_256_TOP],
554 *res; 641 *res;
555 PTR_SIZE_INT mask; 642 PTR_SIZE_INT mask;
556 union { bn_addsub_f f; PTR_SIZE_INT p; } u; 643 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
@@ -584,12 +671,87 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
584 else 671 else
585 r_d = a_d; 672 r_d = a_d;
586 673
587 nist_cp_bn_0(buf, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP); 674 nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP, BN_NIST_256_TOP);
675
676#if defined(NIST_INT64)
677 {
678 NIST_INT64 acc; /* accumulator */
679 unsigned int *rp=(unsigned int *)r_d;
680 const unsigned int *bp=(const unsigned int *)buf.ui;
681
682 acc = rp[0]; acc += bp[8-8];
683 acc += bp[9-8];
684 acc -= bp[11-8];
685 acc -= bp[12-8];
686 acc -= bp[13-8];
687 acc -= bp[14-8]; rp[0] = (unsigned int)acc; acc >>= 32;
688
689 acc += rp[1]; acc += bp[9-8];
690 acc += bp[10-8];
691 acc -= bp[12-8];
692 acc -= bp[13-8];
693 acc -= bp[14-8];
694 acc -= bp[15-8]; rp[1] = (unsigned int)acc; acc >>= 32;
695
696 acc += rp[2]; acc += bp[10-8];
697 acc += bp[11-8];
698 acc -= bp[13-8];
699 acc -= bp[14-8];
700 acc -= bp[15-8]; rp[2] = (unsigned int)acc; acc >>= 32;
701
702 acc += rp[3]; acc += bp[11-8];
703 acc += bp[11-8];
704 acc += bp[12-8];
705 acc += bp[12-8];
706 acc += bp[13-8];
707 acc -= bp[15-8];
708 acc -= bp[8-8];
709 acc -= bp[9-8]; rp[3] = (unsigned int)acc; acc >>= 32;
710
711 acc += rp[4]; acc += bp[12-8];
712 acc += bp[12-8];
713 acc += bp[13-8];
714 acc += bp[13-8];
715 acc += bp[14-8];
716 acc -= bp[9-8];
717 acc -= bp[10-8]; rp[4] = (unsigned int)acc; acc >>= 32;
718
719 acc += rp[5]; acc += bp[13-8];
720 acc += bp[13-8];
721 acc += bp[14-8];
722 acc += bp[14-8];
723 acc += bp[15-8];
724 acc -= bp[10-8];
725 acc -= bp[11-8]; rp[5] = (unsigned int)acc; acc >>= 32;
726
727 acc += rp[6]; acc += bp[14-8];
728 acc += bp[14-8];
729 acc += bp[15-8];
730 acc += bp[15-8];
731 acc += bp[14-8];
732 acc += bp[13-8];
733 acc -= bp[8-8];
734 acc -= bp[9-8]; rp[6] = (unsigned int)acc; acc >>= 32;
735
736 acc += rp[7]; acc += bp[15-8];
737 acc += bp[15-8];
738 acc += bp[15-8];
739 acc += bp[8 -8];
740 acc -= bp[10-8];
741 acc -= bp[11-8];
742 acc -= bp[12-8];
743 acc -= bp[13-8]; rp[7] = (unsigned int)acc;
744
745 carry = (int)(acc>>32);
746 }
747#else
748 {
749 BN_ULONG t_d[BN_NIST_256_TOP];
588 750
589 /*S1*/ 751 /*S1*/
590 nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0); 752 nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
591 /*S2*/ 753 /*S2*/
592 nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0); 754 nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
593 carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP); 755 carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
594 /* left shift */ 756 /* left shift */
595 { 757 {
@@ -607,24 +769,26 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
607 } 769 }
608 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); 770 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
609 /*S3*/ 771 /*S3*/
610 nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8); 772 nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
611 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); 773 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
612 /*S4*/ 774 /*S4*/
613 nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9); 775 nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
614 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP); 776 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
615 /*D1*/ 777 /*D1*/
616 nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11); 778 nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
617 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 779 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
618 /*D2*/ 780 /*D2*/
619 nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12); 781 nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
620 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 782 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
621 /*D3*/ 783 /*D3*/
622 nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13); 784 nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
623 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 785 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
624 /*D4*/ 786 /*D4*/
625 nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14); 787 nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
626 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP); 788 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
627 789
790 }
791#endif
628 /* see BN_nist_mod_224 for explanation */ 792 /* see BN_nist_mod_224 for explanation */
629 u.f = bn_sub_words; 793 u.f = bn_sub_words;
630 if (carry > 0) 794 if (carry > 0)
@@ -672,9 +836,11 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
672 int i, top = a->top; 836 int i, top = a->top;
673 int carry = 0; 837 int carry = 0;
674 register BN_ULONG *r_d, *a_d = a->d; 838 register BN_ULONG *r_d, *a_d = a->d;
675 BN_ULONG t_d[BN_NIST_384_TOP], 839 union {
676 buf[BN_NIST_384_TOP], 840 BN_ULONG bn[BN_NIST_384_TOP];
677 c_d[BN_NIST_384_TOP], 841 unsigned int ui[BN_NIST_384_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
842 } buf;
843 BN_ULONG c_d[BN_NIST_384_TOP],
678 *res; 844 *res;
679 PTR_SIZE_INT mask; 845 PTR_SIZE_INT mask;
680 union { bn_addsub_f f; PTR_SIZE_INT p; } u; 846 union { bn_addsub_f f; PTR_SIZE_INT p; } u;
@@ -709,10 +875,100 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
709 else 875 else
710 r_d = a_d; 876 r_d = a_d;
711 877
712 nist_cp_bn_0(buf, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP); 878 nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP, BN_NIST_384_TOP);
879
880#if defined(NIST_INT64)
881 {
882 NIST_INT64 acc; /* accumulator */
883 unsigned int *rp=(unsigned int *)r_d;
884 const unsigned int *bp=(const unsigned int *)buf.ui;
885
886 acc = rp[0]; acc += bp[12-12];
887 acc += bp[21-12];
888 acc += bp[20-12];
889 acc -= bp[23-12]; rp[0] = (unsigned int)acc; acc >>= 32;
890
891 acc += rp[1]; acc += bp[13-12];
892 acc += bp[22-12];
893 acc += bp[23-12];
894 acc -= bp[12-12];
895 acc -= bp[20-12]; rp[1] = (unsigned int)acc; acc >>= 32;
896
897 acc += rp[2]; acc += bp[14-12];
898 acc += bp[23-12];
899 acc -= bp[13-12];
900 acc -= bp[21-12]; rp[2] = (unsigned int)acc; acc >>= 32;
901
902 acc += rp[3]; acc += bp[15-12];
903 acc += bp[12-12];
904 acc += bp[20-12];
905 acc += bp[21-12];
906 acc -= bp[14-12];
907 acc -= bp[22-12];
908 acc -= bp[23-12]; rp[3] = (unsigned int)acc; acc >>= 32;
909
910 acc += rp[4]; acc += bp[21-12];
911 acc += bp[21-12];
912 acc += bp[16-12];
913 acc += bp[13-12];
914 acc += bp[12-12];
915 acc += bp[20-12];
916 acc += bp[22-12];
917 acc -= bp[15-12];
918 acc -= bp[23-12];
919 acc -= bp[23-12]; rp[4] = (unsigned int)acc; acc >>= 32;
920
921 acc += rp[5]; acc += bp[22-12];
922 acc += bp[22-12];
923 acc += bp[17-12];
924 acc += bp[14-12];
925 acc += bp[13-12];
926 acc += bp[21-12];
927 acc += bp[23-12];
928 acc -= bp[16-12]; rp[5] = (unsigned int)acc; acc >>= 32;
929
930 acc += rp[6]; acc += bp[23-12];
931 acc += bp[23-12];
932 acc += bp[18-12];
933 acc += bp[15-12];
934 acc += bp[14-12];
935 acc += bp[22-12];
936 acc -= bp[17-12]; rp[6] = (unsigned int)acc; acc >>= 32;
937
938 acc += rp[7]; acc += bp[19-12];
939 acc += bp[16-12];
940 acc += bp[15-12];
941 acc += bp[23-12];
942 acc -= bp[18-12]; rp[7] = (unsigned int)acc; acc >>= 32;
943
944 acc += rp[8]; acc += bp[20-12];
945 acc += bp[17-12];
946 acc += bp[16-12];
947 acc -= bp[19-12]; rp[8] = (unsigned int)acc; acc >>= 32;
948
949 acc += rp[9]; acc += bp[21-12];
950 acc += bp[18-12];
951 acc += bp[17-12];
952 acc -= bp[20-12]; rp[9] = (unsigned int)acc; acc >>= 32;
953
954 acc += rp[10]; acc += bp[22-12];
955 acc += bp[19-12];
956 acc += bp[18-12];
957 acc -= bp[21-12]; rp[10] = (unsigned int)acc; acc >>= 32;
958
959 acc += rp[11]; acc += bp[23-12];
960 acc += bp[20-12];
961 acc += bp[19-12];
962 acc -= bp[22-12]; rp[11] = (unsigned int)acc;
963
964 carry = (int)(acc>>32);
965 }
966#else
967 {
968 BN_ULONG t_d[BN_NIST_384_TOP];
713 969
714 /*S1*/ 970 /*S1*/
715 nist_set_256(t_d, buf, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4); 971 nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23-4, 22-4, 21-4);
716 /* left shift */ 972 /* left shift */
717 { 973 {
718 register BN_ULONG *ap,t,c; 974 register BN_ULONG *ap,t,c;
@@ -729,29 +985,31 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
729 carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2), 985 carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
730 t_d, BN_NIST_256_TOP); 986 t_d, BN_NIST_256_TOP);
731 /*S2 */ 987 /*S2 */
732 carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP); 988 carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
733 /*S3*/ 989 /*S3*/
734 nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21); 990 nist_set_384(t_d,buf.bn,20,19,18,17,16,15,14,13,12,23,22,21);
735 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 991 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
736 /*S4*/ 992 /*S4*/
737 nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0); 993 nist_set_384(t_d,buf.bn,19,18,17,16,15,14,13,12,20,0,23,0);
738 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 994 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
739 /*S5*/ 995 /*S5*/
740 nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0); 996 nist_set_384(t_d, buf.bn,0,0,0,0,23,22,21,20,0,0,0,0);
741 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 997 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
742 /*S6*/ 998 /*S6*/
743 nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20); 999 nist_set_384(t_d,buf.bn,0,0,0,0,0,0,23,22,21,0,0,20);
744 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1000 carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
745 /*D1*/ 1001 /*D1*/
746 nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23); 1002 nist_set_384(t_d,buf.bn,22,21,20,19,18,17,16,15,14,13,12,23);
747 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1003 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
748 /*D2*/ 1004 /*D2*/
749 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0); 1005 nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,22,21,20,0);
750 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1006 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
751 /*D3*/ 1007 /*D3*/
752 nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0); 1008 nist_set_384(t_d,buf.bn,0,0,0,0,0,0,0,23,23,0,0,0);
753 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP); 1009 carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
754 1010
1011 }
1012#endif
755 /* see BN_nist_mod_224 for explanation */ 1013 /* see BN_nist_mod_224 for explanation */
756 u.f = bn_sub_words; 1014 u.f = bn_sub_words;
757 if (carry > 0) 1015 if (carry > 0)
diff --git a/src/lib/libcrypto/bn/bn_print.c b/src/lib/libcrypto/bn/bn_print.c
index bebb466d08..1743b6a7e2 100644
--- a/src/lib/libcrypto/bn/bn_print.c
+++ b/src/lib/libcrypto/bn/bn_print.c
@@ -357,3 +357,22 @@ end:
357 return(ret); 357 return(ret);
358 } 358 }
359#endif 359#endif
360
361char *BN_options(void)
362 {
363 static int init=0;
364 static char data[16];
365
366 if (!init)
367 {
368 init++;
369#ifdef BN_LLONG
370 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
371 (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
372#else
373 BIO_snprintf(data,sizeof data,"bn(%d,%d)",
374 (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
375#endif
376 }
377 return(data);
378 }
diff --git a/src/lib/libcrypto/bn/bn_shift.c b/src/lib/libcrypto/bn/bn_shift.c
index c4d301afc4..a6fca2c424 100644
--- a/src/lib/libcrypto/bn/bn_shift.c
+++ b/src/lib/libcrypto/bn/bn_shift.c
@@ -99,7 +99,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a)
99int BN_rshift1(BIGNUM *r, const BIGNUM *a) 99int BN_rshift1(BIGNUM *r, const BIGNUM *a)
100 { 100 {
101 BN_ULONG *ap,*rp,t,c; 101 BN_ULONG *ap,*rp,t,c;
102 int i; 102 int i,j;
103 103
104 bn_check_top(r); 104 bn_check_top(r);
105 bn_check_top(a); 105 bn_check_top(a);
@@ -109,22 +109,25 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
109 BN_zero(r); 109 BN_zero(r);
110 return(1); 110 return(1);
111 } 111 }
112 i = a->top;
113 ap= a->d;
114 j = i-(ap[i-1]==1);
112 if (a != r) 115 if (a != r)
113 { 116 {
114 if (bn_wexpand(r,a->top) == NULL) return(0); 117 if (bn_wexpand(r,j) == NULL) return(0);
115 r->top=a->top;
116 r->neg=a->neg; 118 r->neg=a->neg;
117 } 119 }
118 ap=a->d;
119 rp=r->d; 120 rp=r->d;
120 c=0; 121 t=ap[--i];
121 for (i=a->top-1; i>=0; i--) 122 c=(t&1)?BN_TBIT:0;
123 if (t>>=1) rp[i]=t;
124 while (i>0)
122 { 125 {
123 t=ap[i]; 126 t=ap[--i];
124 rp[i]=((t>>1)&BN_MASK2)|c; 127 rp[i]=((t>>1)&BN_MASK2)|c;
125 c=(t&1)?BN_TBIT:0; 128 c=(t&1)?BN_TBIT:0;
126 } 129 }
127 bn_correct_top(r); 130 r->top=j;
128 bn_check_top(r); 131 bn_check_top(r);
129 return(1); 132 return(1);
130 } 133 }
@@ -182,10 +185,11 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
182 BN_zero(r); 185 BN_zero(r);
183 return(1); 186 return(1);
184 } 187 }
188 i = (BN_num_bits(a)-n+(BN_BITS2-1))/BN_BITS2;
185 if (r != a) 189 if (r != a)
186 { 190 {
187 r->neg=a->neg; 191 r->neg=a->neg;
188 if (bn_wexpand(r,a->top-nw+1) == NULL) return(0); 192 if (bn_wexpand(r,i) == NULL) return(0);
189 } 193 }
190 else 194 else
191 { 195 {
@@ -196,7 +200,7 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
196 f= &(a->d[nw]); 200 f= &(a->d[nw]);
197 t=r->d; 201 t=r->d;
198 j=a->top-nw; 202 j=a->top-nw;
199 r->top=j; 203 r->top=i;
200 204
201 if (rb == 0) 205 if (rb == 0)
202 { 206 {
@@ -212,9 +216,8 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
212 l= *(f++); 216 l= *(f++);
213 *(t++) =(tmp|(l<<lb))&BN_MASK2; 217 *(t++) =(tmp|(l<<lb))&BN_MASK2;
214 } 218 }
215 *(t++) =(l>>rb)&BN_MASK2; 219 if ((l = (l>>rb)&BN_MASK2)) *(t) = l;
216 } 220 }
217 bn_correct_top(r);
218 bn_check_top(r); 221 bn_check_top(r);
219 return(1); 222 return(1);
220 } 223 }
diff --git a/src/lib/libcrypto/bn/bntest.c b/src/lib/libcrypto/bn/bntest.c
index 0cd99c5b4b..06f5954acc 100644
--- a/src/lib/libcrypto/bn/bntest.c
+++ b/src/lib/libcrypto/bn/bntest.c
@@ -262,7 +262,7 @@ int main(int argc, char *argv[])
262 message(out,"BN_mod_sqrt"); 262 message(out,"BN_mod_sqrt");
263 if (!test_sqrt(out,ctx)) goto err; 263 if (!test_sqrt(out,ctx)) goto err;
264 (void)BIO_flush(out); 264 (void)BIO_flush(out);
265 265#ifndef OPENSSL_NO_EC2M
266 message(out,"BN_GF2m_add"); 266 message(out,"BN_GF2m_add");
267 if (!test_gf2m_add(out)) goto err; 267 if (!test_gf2m_add(out)) goto err;
268 (void)BIO_flush(out); 268 (void)BIO_flush(out);
@@ -298,7 +298,7 @@ int main(int argc, char *argv[])
298 message(out,"BN_GF2m_mod_solve_quad"); 298 message(out,"BN_GF2m_mod_solve_quad");
299 if (!test_gf2m_mod_solve_quad(out,ctx)) goto err; 299 if (!test_gf2m_mod_solve_quad(out,ctx)) goto err;
300 (void)BIO_flush(out); 300 (void)BIO_flush(out);
301 301#endif
302 BN_CTX_free(ctx); 302 BN_CTX_free(ctx);
303 BIO_free(out); 303 BIO_free(out);
304 304
@@ -1061,7 +1061,7 @@ int test_exp(BIO *bp, BN_CTX *ctx)
1061 BN_free(one); 1061 BN_free(one);
1062 return(1); 1062 return(1);
1063 } 1063 }
1064 1064#ifndef OPENSSL_NO_EC2M
1065int test_gf2m_add(BIO *bp) 1065int test_gf2m_add(BIO *bp)
1066 { 1066 {
1067 BIGNUM a,b,c; 1067 BIGNUM a,b,c;
@@ -1636,7 +1636,7 @@ int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx)
1636 BN_free(e); 1636 BN_free(e);
1637 return ret; 1637 return ret;
1638 } 1638 }
1639 1639#endif
1640static int genprime_cb(int p, int n, BN_GENCB *arg) 1640static int genprime_cb(int p, int n, BN_GENCB *arg)
1641 { 1641 {
1642 char c='*'; 1642 char c='*';