summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjsing <>2023-02-03 05:27:50 +0000
committerjsing <>2023-02-03 05:27:50 +0000
commitacdcbccd5c839ed36b33b72d9378f9a65a938915 (patch)
treef478056d5e579aebc512bedcfed1c32b4d2f1ddb /src
parent8c388cc76603dfe3b33db90e5d3790131a43777b (diff)
downloadopenbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.tar.gz
openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.tar.bz2
openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.zip
Reorder functions in bn_exp.c to be slightly sensible...
No functional change intended.
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/bn/bn_exp.c561
1 files changed, 279 insertions, 282 deletions
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c
index e36eeff6bf..b72b535962 100644
--- a/src/lib/libcrypto/bn/bn_exp.c
+++ b/src/lib/libcrypto/bn/bn_exp.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_exp.c,v 1.35 2022/11/26 16:08:51 tb Exp $ */ 1/* $OpenBSD: bn_exp.c,v 1.36 2023/02/03 05:27:50 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -171,90 +171,16 @@ err:
171 return (ret); 171 return (ret);
172} 172}
173 173
174static int 174/* The old fallback, simple version :-) */
175BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
176 BN_CTX *ctx, int ct)
177{
178 int ret;
179
180
181 /* For even modulus m = 2^k*m_odd, it might make sense to compute
182 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
183 * exponentiation for the odd part), using appropriate exponent
184 * reductions, and combine the results using the CRT.
185 *
186 * For now, we use Montgomery only if the modulus is odd; otherwise,
187 * exponentiation using the reciprocal-based quick remaindering
188 * algorithm is used.
189 *
190 * (Timing obtained with expspeed.c [computations a^p mod m
191 * where a, p, m are of the same length: 256, 512, 1024, 2048,
192 * 4096, 8192 bits], compared to the running time of the
193 * standard algorithm:
194 *
195 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
196 * 55 .. 77 % [UltraSparc processor, but
197 * debug-solaris-sparcv8-gcc conf.]
198 *
199 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
200 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
201 *
202 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
203 * at 2048 and more bits, but at 512 and 1024 bits, it was
204 * slower even than the standard algorithm!
205 *
206 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
207 * should be obtained when the new Montgomery reduction code
208 * has been integrated into OpenSSL.)
209 */
210
211 if (BN_is_odd(m)) {
212 if (a->top == 1 && !a->neg && !ct) {
213 BN_ULONG A = a->d[0];
214 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
215 } else
216 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
217 } else {
218 ret = BN_mod_exp_recp(r, a,p, m, ctx);
219 }
220
221 return (ret);
222}
223
224int
225BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
226 BN_CTX *ctx)
227{
228 return BN_mod_exp_internal(r, a, p, m, ctx,
229 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
230}
231
232int
233BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
234 BN_CTX *ctx)
235{
236 return BN_mod_exp_internal(r, a, p, m, ctx, 1);
237}
238
239
240int
241BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
242 BN_CTX *ctx)
243{
244 return BN_mod_exp_internal(r, a, p, m, ctx, 0);
245}
246
247
248int 175int
249BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 176BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
250 BN_CTX *ctx) 177 BN_CTX *ctx)
251{ 178{
252 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 179 int i, j, bits, ret = 0, wstart, wend, window, wvalue;
253 int start = 1; 180 int start = 1;
254 BIGNUM *aa; 181 BIGNUM *d;
255 /* Table of variables obtained from 'ctx' */ 182 /* Table of variables obtained from 'ctx' */
256 BIGNUM *val[TABLE_SIZE]; 183 BIGNUM *val[TABLE_SIZE];
257 BN_RECP_CTX recp;
258 184
259 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 185 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
260 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 186 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
@@ -273,27 +199,13 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
273 return ret; 199 return ret;
274 } 200 }
275 201
276 BN_RECP_CTX_init(&recp);
277
278 BN_CTX_start(ctx); 202 BN_CTX_start(ctx);
279 if ((aa = BN_CTX_get(ctx)) == NULL) 203 if ((d = BN_CTX_get(ctx)) == NULL)
280 goto err; 204 goto err;
281 if ((val[0] = BN_CTX_get(ctx)) == NULL) 205 if ((val[0] = BN_CTX_get(ctx)) == NULL)
282 goto err; 206 goto err;
283 207
284 if (m->neg) { 208 if (!BN_nnmod(val[0],a,m,ctx))
285 /* ignore sign of 'm' */
286 if (!BN_copy(aa, m))
287 goto err;
288 aa->neg = 0;
289 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
290 goto err;
291 } else {
292 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
293 goto err;
294 }
295
296 if (!BN_nnmod(val[0], a, m, ctx))
297 goto err; /* 1 */ 209 goto err; /* 1 */
298 if (BN_is_zero(val[0])) { 210 if (BN_is_zero(val[0])) {
299 BN_zero(r); 211 BN_zero(r);
@@ -303,13 +215,12 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
303 215
304 window = BN_window_bits_for_exponent_size(bits); 216 window = BN_window_bits_for_exponent_size(bits);
305 if (window > 1) { 217 if (window > 1) {
306 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 218 if (!BN_mod_mul(d, val[0], val[0], m, ctx))
307 goto err; /* 2 */ 219 goto err; /* 2 */
308 j = 1 << (window - 1); 220 j = 1 << (window - 1);
309 for (i = 1; i < j; i++) { 221 for (i = 1; i < j; i++) {
310 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 222 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
311 !BN_mod_mul_reciprocal(val[i], val[i - 1], 223 !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
312 aa, &recp, ctx))
313 goto err; 224 goto err;
314 } 225 }
315 } 226 }
@@ -327,153 +238,8 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
327 for (;;) { 238 for (;;) {
328 if (BN_is_bit_set(p, wstart) == 0) { 239 if (BN_is_bit_set(p, wstart) == 0) {
329 if (!start) 240 if (!start)
330 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 241 if (!BN_mod_mul(r, r, r, m, ctx))
331 goto err;
332 if (wstart == 0)
333 break;
334 wstart--;
335 continue;
336 }
337 /* We now have wstart on a 'set' bit, we now need to work out
338 * how bit a window to do. To do this we need to scan
339 * forward until the last set bit before the end of the
340 * window */
341 j = wstart;
342 wvalue = 1;
343 wend = 0;
344 for (i = 1; i < window; i++) {
345 if (wstart - i < 0)
346 break;
347 if (BN_is_bit_set(p, wstart - i)) {
348 wvalue <<= (i - wend);
349 wvalue |= 1;
350 wend = i;
351 }
352 }
353
354 /* wend is the size of the current window */
355 j = wend + 1;
356 /* add the 'bytes above' */
357 if (!start)
358 for (i = 0; i < j; i++) {
359 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
360 goto err;
361 }
362
363 /* wvalue will be an odd number < 2^window */
364 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx))
365 goto err;
366
367 /* move the 'window' down further */
368 wstart -= wend + 1;
369 wvalue = 0;
370 start = 0;
371 if (wstart < 0)
372 break;
373 }
374 ret = 1;
375
376err:
377 BN_CTX_end(ctx);
378 BN_RECP_CTX_free(&recp);
379 return (ret);
380}
381
382static int
383BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
384 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
385{
386 int i, j, bits, ret = 0, wstart, wend, window, wvalue;
387 int start = 1;
388 BIGNUM *d, *r;
389 const BIGNUM *aa;
390 /* Table of variables obtained from 'ctx' */
391 BIGNUM *val[TABLE_SIZE];
392 BN_MONT_CTX *mont = NULL;
393
394 if (ct) {
395 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
396 }
397
398
399 if (!BN_is_odd(m)) {
400 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
401 return (0);
402 }
403
404 bits = BN_num_bits(p);
405 if (bits == 0) {
406 /* x**0 mod 1 is still zero. */
407 if (BN_is_one(m)) {
408 ret = 1;
409 BN_zero(rr);
410 } else
411 ret = BN_one(rr);
412 return ret;
413 }
414
415 BN_CTX_start(ctx);
416 if ((d = BN_CTX_get(ctx)) == NULL)
417 goto err;
418 if ((r = BN_CTX_get(ctx)) == NULL)
419 goto err;
420 if ((val[0] = BN_CTX_get(ctx)) == NULL)
421 goto err;
422
423 /* If this is not done, things will break in the montgomery
424 * part */
425
426 if (in_mont != NULL)
427 mont = in_mont;
428 else {
429 if ((mont = BN_MONT_CTX_new()) == NULL)
430 goto err;
431 if (!BN_MONT_CTX_set(mont, m, ctx))
432 goto err;
433 }
434
435 if (a->neg || BN_ucmp(a, m) >= 0) {
436 if (!BN_nnmod(val[0], a,m, ctx))
437 goto err;
438 aa = val[0];
439 } else
440 aa = a;
441 if (BN_is_zero(aa)) {
442 BN_zero(rr);
443 ret = 1;
444 goto err;
445 }
446 if (!BN_to_montgomery(val[0], aa, mont, ctx))
447 goto err; /* 1 */
448
449 window = BN_window_bits_for_exponent_size(bits);
450 if (window > 1) {
451 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
452 goto err; /* 2 */
453 j = 1 << (window - 1);
454 for (i = 1; i < j; i++) {
455 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
456 !BN_mod_mul_montgomery(val[i], val[i - 1],
457 d, mont, ctx))
458 goto err;
459 }
460 }
461
462 start = 1; /* This is used to avoid multiplication etc
463 * when there is only the value '1' in the
464 * buffer. */
465 wvalue = 0; /* The 'value' of the window */
466 wstart = bits - 1; /* The top bit of the window */
467 wend = 0; /* The bottom bit of the window */
468
469 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
470 goto err;
471 for (;;) {
472 if (BN_is_bit_set(p, wstart) == 0) {
473 if (!start) {
474 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
475 goto err; 242 goto err;
476 }
477 if (wstart == 0) 243 if (wstart == 0)
478 break; 244 break;
479 wstart--; 245 wstart--;
@@ -501,12 +267,12 @@ BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIG
501 /* add the 'bytes above' */ 267 /* add the 'bytes above' */
502 if (!start) 268 if (!start)
503 for (i = 0; i < j; i++) { 269 for (i = 0; i < j; i++) {
504 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 270 if (!BN_mod_mul(r, r, r, m, ctx))
505 goto err; 271 goto err;
506 } 272 }
507 273
508 /* wvalue will be an odd number < 2^window */ 274 /* wvalue will be an odd number < 2^window */
509 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 275 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
510 goto err; 276 goto err;
511 277
512 /* move the 'window' down further */ 278 /* move the 'window' down further */
@@ -516,39 +282,13 @@ BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIG
516 if (wstart < 0) 282 if (wstart < 0)
517 break; 283 break;
518 } 284 }
519 if (!BN_from_montgomery(rr, r,mont, ctx))
520 goto err;
521 ret = 1; 285 ret = 1;
522 286
523err: 287err:
524 if ((in_mont == NULL) && (mont != NULL))
525 BN_MONT_CTX_free(mont);
526 BN_CTX_end(ctx); 288 BN_CTX_end(ctx);
527 return (ret); 289 return (ret);
528} 290}
529 291
530int
531BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
532 BN_CTX *ctx, BN_MONT_CTX *in_mont)
533{
534 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
535 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
536}
537
538int
539BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
540 BN_CTX *ctx, BN_MONT_CTX *in_mont)
541{
542 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
543}
544
545int
546BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
547 BN_CTX *ctx, BN_MONT_CTX *in_mont)
548{
549 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
550}
551
552/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 292/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
553 * so that accessing any of these table values shows the same access pattern as far 293 * so that accessing any of these table values shows the same access pattern as far
554 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 294 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
@@ -892,6 +632,176 @@ err:
892 return (ret); 632 return (ret);
893} 633}
894 634
635static int
636BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
637 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
638{
639 int i, j, bits, ret = 0, wstart, wend, window, wvalue;
640 int start = 1;
641 BIGNUM *d, *r;
642 const BIGNUM *aa;
643 /* Table of variables obtained from 'ctx' */
644 BIGNUM *val[TABLE_SIZE];
645 BN_MONT_CTX *mont = NULL;
646
647 if (ct) {
648 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
649 }
650
651
652 if (!BN_is_odd(m)) {
653 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
654 return (0);
655 }
656
657 bits = BN_num_bits(p);
658 if (bits == 0) {
659 /* x**0 mod 1 is still zero. */
660 if (BN_is_one(m)) {
661 ret = 1;
662 BN_zero(rr);
663 } else
664 ret = BN_one(rr);
665 return ret;
666 }
667
668 BN_CTX_start(ctx);
669 if ((d = BN_CTX_get(ctx)) == NULL)
670 goto err;
671 if ((r = BN_CTX_get(ctx)) == NULL)
672 goto err;
673 if ((val[0] = BN_CTX_get(ctx)) == NULL)
674 goto err;
675
676 /* If this is not done, things will break in the montgomery
677 * part */
678
679 if (in_mont != NULL)
680 mont = in_mont;
681 else {
682 if ((mont = BN_MONT_CTX_new()) == NULL)
683 goto err;
684 if (!BN_MONT_CTX_set(mont, m, ctx))
685 goto err;
686 }
687
688 if (a->neg || BN_ucmp(a, m) >= 0) {
689 if (!BN_nnmod(val[0], a,m, ctx))
690 goto err;
691 aa = val[0];
692 } else
693 aa = a;
694 if (BN_is_zero(aa)) {
695 BN_zero(rr);
696 ret = 1;
697 goto err;
698 }
699 if (!BN_to_montgomery(val[0], aa, mont, ctx))
700 goto err; /* 1 */
701
702 window = BN_window_bits_for_exponent_size(bits);
703 if (window > 1) {
704 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
705 goto err; /* 2 */
706 j = 1 << (window - 1);
707 for (i = 1; i < j; i++) {
708 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
709 !BN_mod_mul_montgomery(val[i], val[i - 1],
710 d, mont, ctx))
711 goto err;
712 }
713 }
714
715 start = 1; /* This is used to avoid multiplication etc
716 * when there is only the value '1' in the
717 * buffer. */
718 wvalue = 0; /* The 'value' of the window */
719 wstart = bits - 1; /* The top bit of the window */
720 wend = 0; /* The bottom bit of the window */
721
722 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
723 goto err;
724 for (;;) {
725 if (BN_is_bit_set(p, wstart) == 0) {
726 if (!start) {
727 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
728 goto err;
729 }
730 if (wstart == 0)
731 break;
732 wstart--;
733 continue;
734 }
735 /* We now have wstart on a 'set' bit, we now need to work out
736 * how bit a window to do. To do this we need to scan
737 * forward until the last set bit before the end of the
738 * window */
739 j = wstart;
740 wvalue = 1;
741 wend = 0;
742 for (i = 1; i < window; i++) {
743 if (wstart - i < 0)
744 break;
745 if (BN_is_bit_set(p, wstart - i)) {
746 wvalue <<= (i - wend);
747 wvalue |= 1;
748 wend = i;
749 }
750 }
751
752 /* wend is the size of the current window */
753 j = wend + 1;
754 /* add the 'bytes above' */
755 if (!start)
756 for (i = 0; i < j; i++) {
757 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
758 goto err;
759 }
760
761 /* wvalue will be an odd number < 2^window */
762 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
763 goto err;
764
765 /* move the 'window' down further */
766 wstart -= wend + 1;
767 wvalue = 0;
768 start = 0;
769 if (wstart < 0)
770 break;
771 }
772 if (!BN_from_montgomery(rr, r,mont, ctx))
773 goto err;
774 ret = 1;
775
776err:
777 if ((in_mont == NULL) && (mont != NULL))
778 BN_MONT_CTX_free(mont);
779 BN_CTX_end(ctx);
780 return (ret);
781}
782
783int
784BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
785 BN_CTX *ctx, BN_MONT_CTX *in_mont)
786{
787 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
788 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
789}
790
791int
792BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
793 BN_CTX *ctx, BN_MONT_CTX *in_mont)
794{
795 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
796}
797
798int
799BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
800 BN_CTX *ctx, BN_MONT_CTX *in_mont)
801{
802 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
803}
804
895int 805int
896BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 806BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
897 BN_CTX *ctx, BN_MONT_CTX *in_mont) 807 BN_CTX *ctx, BN_MONT_CTX *in_mont)
@@ -1040,17 +950,16 @@ err:
1040 return (ret); 950 return (ret);
1041} 951}
1042 952
1043
1044/* The old fallback, simple version :-) */
1045int 953int
1046BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 954BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1047 BN_CTX *ctx) 955 BN_CTX *ctx)
1048{ 956{
1049 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 957 int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1050 int start = 1; 958 int start = 1;
1051 BIGNUM *d; 959 BIGNUM *aa;
1052 /* Table of variables obtained from 'ctx' */ 960 /* Table of variables obtained from 'ctx' */
1053 BIGNUM *val[TABLE_SIZE]; 961 BIGNUM *val[TABLE_SIZE];
962 BN_RECP_CTX recp;
1054 963
1055 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 964 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1056 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 965 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
@@ -1069,13 +978,27 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1069 return ret; 978 return ret;
1070 } 979 }
1071 980
981 BN_RECP_CTX_init(&recp);
982
1072 BN_CTX_start(ctx); 983 BN_CTX_start(ctx);
1073 if ((d = BN_CTX_get(ctx)) == NULL) 984 if ((aa = BN_CTX_get(ctx)) == NULL)
1074 goto err; 985 goto err;
1075 if ((val[0] = BN_CTX_get(ctx)) == NULL) 986 if ((val[0] = BN_CTX_get(ctx)) == NULL)
1076 goto err; 987 goto err;
1077 988
1078 if (!BN_nnmod(val[0],a,m,ctx)) 989 if (m->neg) {
990 /* ignore sign of 'm' */
991 if (!BN_copy(aa, m))
992 goto err;
993 aa->neg = 0;
994 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
995 goto err;
996 } else {
997 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
998 goto err;
999 }
1000
1001 if (!BN_nnmod(val[0], a, m, ctx))
1079 goto err; /* 1 */ 1002 goto err; /* 1 */
1080 if (BN_is_zero(val[0])) { 1003 if (BN_is_zero(val[0])) {
1081 BN_zero(r); 1004 BN_zero(r);
@@ -1085,12 +1008,13 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1085 1008
1086 window = BN_window_bits_for_exponent_size(bits); 1009 window = BN_window_bits_for_exponent_size(bits);
1087 if (window > 1) { 1010 if (window > 1) {
1088 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1011 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
1089 goto err; /* 2 */ 1012 goto err; /* 2 */
1090 j = 1 << (window - 1); 1013 j = 1 << (window - 1);
1091 for (i = 1; i < j; i++) { 1014 for (i = 1; i < j; i++) {
1092 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1015 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1093 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 1016 !BN_mod_mul_reciprocal(val[i], val[i - 1],
1017 aa, &recp, ctx))
1094 goto err; 1018 goto err;
1095 } 1019 }
1096 } 1020 }
@@ -1108,7 +1032,7 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1108 for (;;) { 1032 for (;;) {
1109 if (BN_is_bit_set(p, wstart) == 0) { 1033 if (BN_is_bit_set(p, wstart) == 0) {
1110 if (!start) 1034 if (!start)
1111 if (!BN_mod_mul(r, r, r, m, ctx)) 1035 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
1112 goto err; 1036 goto err;
1113 if (wstart == 0) 1037 if (wstart == 0)
1114 break; 1038 break;
@@ -1137,12 +1061,12 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1137 /* add the 'bytes above' */ 1061 /* add the 'bytes above' */
1138 if (!start) 1062 if (!start)
1139 for (i = 0; i < j; i++) { 1063 for (i = 0; i < j; i++) {
1140 if (!BN_mod_mul(r, r, r, m, ctx)) 1064 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
1141 goto err; 1065 goto err;
1142 } 1066 }
1143 1067
1144 /* wvalue will be an odd number < 2^window */ 1068 /* wvalue will be an odd number < 2^window */
1145 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1069 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx))
1146 goto err; 1070 goto err;
1147 1071
1148 /* move the 'window' down further */ 1072 /* move the 'window' down further */
@@ -1156,5 +1080,78 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1156 1080
1157err: 1081err:
1158 BN_CTX_end(ctx); 1082 BN_CTX_end(ctx);
1083 BN_RECP_CTX_free(&recp);
1159 return (ret); 1084 return (ret);
1160} 1085}
1086
1087static int
1088BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1089 BN_CTX *ctx, int ct)
1090{
1091 int ret;
1092
1093
1094 /* For even modulus m = 2^k*m_odd, it might make sense to compute
1095 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
1096 * exponentiation for the odd part), using appropriate exponent
1097 * reductions, and combine the results using the CRT.
1098 *
1099 * For now, we use Montgomery only if the modulus is odd; otherwise,
1100 * exponentiation using the reciprocal-based quick remaindering
1101 * algorithm is used.
1102 *
1103 * (Timing obtained with expspeed.c [computations a^p mod m
1104 * where a, p, m are of the same length: 256, 512, 1024, 2048,
1105 * 4096, 8192 bits], compared to the running time of the
1106 * standard algorithm:
1107 *
1108 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
1109 * 55 .. 77 % [UltraSparc processor, but
1110 * debug-solaris-sparcv8-gcc conf.]
1111 *
1112 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
1113 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
1114 *
1115 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
1116 * at 2048 and more bits, but at 512 and 1024 bits, it was
1117 * slower even than the standard algorithm!
1118 *
1119 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
1120 * should be obtained when the new Montgomery reduction code
1121 * has been integrated into OpenSSL.)
1122 */
1123
1124 if (BN_is_odd(m)) {
1125 if (a->top == 1 && !a->neg && !ct) {
1126 BN_ULONG A = a->d[0];
1127 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
1128 } else
1129 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
1130 } else {
1131 ret = BN_mod_exp_recp(r, a,p, m, ctx);
1132 }
1133
1134 return (ret);
1135}
1136
1137int
1138BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1139 BN_CTX *ctx)
1140{
1141 return BN_mod_exp_internal(r, a, p, m, ctx,
1142 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
1143}
1144
1145int
1146BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1147 BN_CTX *ctx)
1148{
1149 return BN_mod_exp_internal(r, a, p, m, ctx, 1);
1150}
1151
1152int
1153BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1154 BN_CTX *ctx)
1155{
1156 return BN_mod_exp_internal(r, a, p, m, ctx, 0);
1157}