diff options
author | jsing <> | 2023-02-03 05:27:50 +0000 |
---|---|---|
committer | jsing <> | 2023-02-03 05:27:50 +0000 |
commit | acdcbccd5c839ed36b33b72d9378f9a65a938915 (patch) | |
tree | f478056d5e579aebc512bedcfed1c32b4d2f1ddb /src | |
parent | 8c388cc76603dfe3b33db90e5d3790131a43777b (diff) | |
download | openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.tar.gz openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.tar.bz2 openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.zip |
Reorder functions in bn_exp.c to be slightly sensible...
No functional change intended.
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/bn_exp.c | 561 |
1 files changed, 279 insertions, 282 deletions
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index e36eeff6bf..b72b535962 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_exp.c,v 1.35 2022/11/26 16:08:51 tb Exp $ */ | 1 | /* $OpenBSD: bn_exp.c,v 1.36 2023/02/03 05:27:50 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -171,90 +171,16 @@ err: | |||
171 | return (ret); | 171 | return (ret); |
172 | } | 172 | } |
173 | 173 | ||
174 | static int | 174 | /* The old fallback, simple version :-) */ |
175 | BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
176 | BN_CTX *ctx, int ct) | ||
177 | { | ||
178 | int ret; | ||
179 | |||
180 | |||
181 | /* For even modulus m = 2^k*m_odd, it might make sense to compute | ||
182 | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery | ||
183 | * exponentiation for the odd part), using appropriate exponent | ||
184 | * reductions, and combine the results using the CRT. | ||
185 | * | ||
186 | * For now, we use Montgomery only if the modulus is odd; otherwise, | ||
187 | * exponentiation using the reciprocal-based quick remaindering | ||
188 | * algorithm is used. | ||
189 | * | ||
190 | * (Timing obtained with expspeed.c [computations a^p mod m | ||
191 | * where a, p, m are of the same length: 256, 512, 1024, 2048, | ||
192 | * 4096, 8192 bits], compared to the running time of the | ||
193 | * standard algorithm: | ||
194 | * | ||
195 | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] | ||
196 | * 55 .. 77 % [UltraSparc processor, but | ||
197 | * debug-solaris-sparcv8-gcc conf.] | ||
198 | * | ||
199 | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] | ||
200 | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] | ||
201 | * | ||
202 | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont | ||
203 | * at 2048 and more bits, but at 512 and 1024 bits, it was | ||
204 | * slower even than the standard algorithm! | ||
205 | * | ||
206 | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] | ||
207 | * should be obtained when the new Montgomery reduction code | ||
208 | * has been integrated into OpenSSL.) | ||
209 | */ | ||
210 | |||
211 | if (BN_is_odd(m)) { | ||
212 | if (a->top == 1 && !a->neg && !ct) { | ||
213 | BN_ULONG A = a->d[0]; | ||
214 | ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); | ||
215 | } else | ||
216 | ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); | ||
217 | } else { | ||
218 | ret = BN_mod_exp_recp(r, a,p, m, ctx); | ||
219 | } | ||
220 | |||
221 | return (ret); | ||
222 | } | ||
223 | |||
224 | int | ||
225 | BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
226 | BN_CTX *ctx) | ||
227 | { | ||
228 | return BN_mod_exp_internal(r, a, p, m, ctx, | ||
229 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
230 | } | ||
231 | |||
232 | int | ||
233 | BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
234 | BN_CTX *ctx) | ||
235 | { | ||
236 | return BN_mod_exp_internal(r, a, p, m, ctx, 1); | ||
237 | } | ||
238 | |||
239 | |||
240 | int | ||
241 | BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
242 | BN_CTX *ctx) | ||
243 | { | ||
244 | return BN_mod_exp_internal(r, a, p, m, ctx, 0); | ||
245 | } | ||
246 | |||
247 | |||
248 | int | 175 | int |
249 | BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | 176 | BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
250 | BN_CTX *ctx) | 177 | BN_CTX *ctx) |
251 | { | 178 | { |
252 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | 179 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
253 | int start = 1; | 180 | int start = 1; |
254 | BIGNUM *aa; | 181 | BIGNUM *d; |
255 | /* Table of variables obtained from 'ctx' */ | 182 | /* Table of variables obtained from 'ctx' */ |
256 | BIGNUM *val[TABLE_SIZE]; | 183 | BIGNUM *val[TABLE_SIZE]; |
257 | BN_RECP_CTX recp; | ||
258 | 184 | ||
259 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | 185 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
260 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | 186 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
@@ -273,27 +199,13 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
273 | return ret; | 199 | return ret; |
274 | } | 200 | } |
275 | 201 | ||
276 | BN_RECP_CTX_init(&recp); | ||
277 | |||
278 | BN_CTX_start(ctx); | 202 | BN_CTX_start(ctx); |
279 | if ((aa = BN_CTX_get(ctx)) == NULL) | 203 | if ((d = BN_CTX_get(ctx)) == NULL) |
280 | goto err; | 204 | goto err; |
281 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | 205 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
282 | goto err; | 206 | goto err; |
283 | 207 | ||
284 | if (m->neg) { | 208 | if (!BN_nnmod(val[0],a,m,ctx)) |
285 | /* ignore sign of 'm' */ | ||
286 | if (!BN_copy(aa, m)) | ||
287 | goto err; | ||
288 | aa->neg = 0; | ||
289 | if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) | ||
290 | goto err; | ||
291 | } else { | ||
292 | if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) | ||
293 | goto err; | ||
294 | } | ||
295 | |||
296 | if (!BN_nnmod(val[0], a, m, ctx)) | ||
297 | goto err; /* 1 */ | 209 | goto err; /* 1 */ |
298 | if (BN_is_zero(val[0])) { | 210 | if (BN_is_zero(val[0])) { |
299 | BN_zero(r); | 211 | BN_zero(r); |
@@ -303,13 +215,12 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
303 | 215 | ||
304 | window = BN_window_bits_for_exponent_size(bits); | 216 | window = BN_window_bits_for_exponent_size(bits); |
305 | if (window > 1) { | 217 | if (window > 1) { |
306 | if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) | 218 | if (!BN_mod_mul(d, val[0], val[0], m, ctx)) |
307 | goto err; /* 2 */ | 219 | goto err; /* 2 */ |
308 | j = 1 << (window - 1); | 220 | j = 1 << (window - 1); |
309 | for (i = 1; i < j; i++) { | 221 | for (i = 1; i < j; i++) { |
310 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | 222 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
311 | !BN_mod_mul_reciprocal(val[i], val[i - 1], | 223 | !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) |
312 | aa, &recp, ctx)) | ||
313 | goto err; | 224 | goto err; |
314 | } | 225 | } |
315 | } | 226 | } |
@@ -327,153 +238,8 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
327 | for (;;) { | 238 | for (;;) { |
328 | if (BN_is_bit_set(p, wstart) == 0) { | 239 | if (BN_is_bit_set(p, wstart) == 0) { |
329 | if (!start) | 240 | if (!start) |
330 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) | 241 | if (!BN_mod_mul(r, r, r, m, ctx)) |
331 | goto err; | ||
332 | if (wstart == 0) | ||
333 | break; | ||
334 | wstart--; | ||
335 | continue; | ||
336 | } | ||
337 | /* We now have wstart on a 'set' bit, we now need to work out | ||
338 | * how bit a window to do. To do this we need to scan | ||
339 | * forward until the last set bit before the end of the | ||
340 | * window */ | ||
341 | j = wstart; | ||
342 | wvalue = 1; | ||
343 | wend = 0; | ||
344 | for (i = 1; i < window; i++) { | ||
345 | if (wstart - i < 0) | ||
346 | break; | ||
347 | if (BN_is_bit_set(p, wstart - i)) { | ||
348 | wvalue <<= (i - wend); | ||
349 | wvalue |= 1; | ||
350 | wend = i; | ||
351 | } | ||
352 | } | ||
353 | |||
354 | /* wend is the size of the current window */ | ||
355 | j = wend + 1; | ||
356 | /* add the 'bytes above' */ | ||
357 | if (!start) | ||
358 | for (i = 0; i < j; i++) { | ||
359 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) | ||
360 | goto err; | ||
361 | } | ||
362 | |||
363 | /* wvalue will be an odd number < 2^window */ | ||
364 | if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) | ||
365 | goto err; | ||
366 | |||
367 | /* move the 'window' down further */ | ||
368 | wstart -= wend + 1; | ||
369 | wvalue = 0; | ||
370 | start = 0; | ||
371 | if (wstart < 0) | ||
372 | break; | ||
373 | } | ||
374 | ret = 1; | ||
375 | |||
376 | err: | ||
377 | BN_CTX_end(ctx); | ||
378 | BN_RECP_CTX_free(&recp); | ||
379 | return (ret); | ||
380 | } | ||
381 | |||
382 | static int | ||
383 | BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
384 | BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) | ||
385 | { | ||
386 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | ||
387 | int start = 1; | ||
388 | BIGNUM *d, *r; | ||
389 | const BIGNUM *aa; | ||
390 | /* Table of variables obtained from 'ctx' */ | ||
391 | BIGNUM *val[TABLE_SIZE]; | ||
392 | BN_MONT_CTX *mont = NULL; | ||
393 | |||
394 | if (ct) { | ||
395 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); | ||
396 | } | ||
397 | |||
398 | |||
399 | if (!BN_is_odd(m)) { | ||
400 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
401 | return (0); | ||
402 | } | ||
403 | |||
404 | bits = BN_num_bits(p); | ||
405 | if (bits == 0) { | ||
406 | /* x**0 mod 1 is still zero. */ | ||
407 | if (BN_is_one(m)) { | ||
408 | ret = 1; | ||
409 | BN_zero(rr); | ||
410 | } else | ||
411 | ret = BN_one(rr); | ||
412 | return ret; | ||
413 | } | ||
414 | |||
415 | BN_CTX_start(ctx); | ||
416 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
417 | goto err; | ||
418 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
419 | goto err; | ||
420 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
421 | goto err; | ||
422 | |||
423 | /* If this is not done, things will break in the montgomery | ||
424 | * part */ | ||
425 | |||
426 | if (in_mont != NULL) | ||
427 | mont = in_mont; | ||
428 | else { | ||
429 | if ((mont = BN_MONT_CTX_new()) == NULL) | ||
430 | goto err; | ||
431 | if (!BN_MONT_CTX_set(mont, m, ctx)) | ||
432 | goto err; | ||
433 | } | ||
434 | |||
435 | if (a->neg || BN_ucmp(a, m) >= 0) { | ||
436 | if (!BN_nnmod(val[0], a,m, ctx)) | ||
437 | goto err; | ||
438 | aa = val[0]; | ||
439 | } else | ||
440 | aa = a; | ||
441 | if (BN_is_zero(aa)) { | ||
442 | BN_zero(rr); | ||
443 | ret = 1; | ||
444 | goto err; | ||
445 | } | ||
446 | if (!BN_to_montgomery(val[0], aa, mont, ctx)) | ||
447 | goto err; /* 1 */ | ||
448 | |||
449 | window = BN_window_bits_for_exponent_size(bits); | ||
450 | if (window > 1) { | ||
451 | if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) | ||
452 | goto err; /* 2 */ | ||
453 | j = 1 << (window - 1); | ||
454 | for (i = 1; i < j; i++) { | ||
455 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
456 | !BN_mod_mul_montgomery(val[i], val[i - 1], | ||
457 | d, mont, ctx)) | ||
458 | goto err; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | start = 1; /* This is used to avoid multiplication etc | ||
463 | * when there is only the value '1' in the | ||
464 | * buffer. */ | ||
465 | wvalue = 0; /* The 'value' of the window */ | ||
466 | wstart = bits - 1; /* The top bit of the window */ | ||
467 | wend = 0; /* The bottom bit of the window */ | ||
468 | |||
469 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) | ||
470 | goto err; | ||
471 | for (;;) { | ||
472 | if (BN_is_bit_set(p, wstart) == 0) { | ||
473 | if (!start) { | ||
474 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
475 | goto err; | 242 | goto err; |
476 | } | ||
477 | if (wstart == 0) | 243 | if (wstart == 0) |
478 | break; | 244 | break; |
479 | wstart--; | 245 | wstart--; |
@@ -501,12 +267,12 @@ BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIG | |||
501 | /* add the 'bytes above' */ | 267 | /* add the 'bytes above' */ |
502 | if (!start) | 268 | if (!start) |
503 | for (i = 0; i < j; i++) { | 269 | for (i = 0; i < j; i++) { |
504 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | 270 | if (!BN_mod_mul(r, r, r, m, ctx)) |
505 | goto err; | 271 | goto err; |
506 | } | 272 | } |
507 | 273 | ||
508 | /* wvalue will be an odd number < 2^window */ | 274 | /* wvalue will be an odd number < 2^window */ |
509 | if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) | 275 | if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) |
510 | goto err; | 276 | goto err; |
511 | 277 | ||
512 | /* move the 'window' down further */ | 278 | /* move the 'window' down further */ |
@@ -516,39 +282,13 @@ BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIG | |||
516 | if (wstart < 0) | 282 | if (wstart < 0) |
517 | break; | 283 | break; |
518 | } | 284 | } |
519 | if (!BN_from_montgomery(rr, r,mont, ctx)) | ||
520 | goto err; | ||
521 | ret = 1; | 285 | ret = 1; |
522 | 286 | ||
523 | err: | 287 | err: |
524 | if ((in_mont == NULL) && (mont != NULL)) | ||
525 | BN_MONT_CTX_free(mont); | ||
526 | BN_CTX_end(ctx); | 288 | BN_CTX_end(ctx); |
527 | return (ret); | 289 | return (ret); |
528 | } | 290 | } |
529 | 291 | ||
530 | int | ||
531 | BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
532 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
533 | { | ||
534 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, | ||
535 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
536 | } | ||
537 | |||
538 | int | ||
539 | BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
540 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
541 | { | ||
542 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); | ||
543 | } | ||
544 | |||
545 | int | ||
546 | BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
547 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
548 | { | ||
549 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); | ||
550 | } | ||
551 | |||
552 | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout | 292 | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout |
553 | * so that accessing any of these table values shows the same access pattern as far | 293 | * so that accessing any of these table values shows the same access pattern as far |
554 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM | 294 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM |
@@ -892,6 +632,176 @@ err: | |||
892 | return (ret); | 632 | return (ret); |
893 | } | 633 | } |
894 | 634 | ||
635 | static int | ||
636 | BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
637 | BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) | ||
638 | { | ||
639 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | ||
640 | int start = 1; | ||
641 | BIGNUM *d, *r; | ||
642 | const BIGNUM *aa; | ||
643 | /* Table of variables obtained from 'ctx' */ | ||
644 | BIGNUM *val[TABLE_SIZE]; | ||
645 | BN_MONT_CTX *mont = NULL; | ||
646 | |||
647 | if (ct) { | ||
648 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); | ||
649 | } | ||
650 | |||
651 | |||
652 | if (!BN_is_odd(m)) { | ||
653 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
654 | return (0); | ||
655 | } | ||
656 | |||
657 | bits = BN_num_bits(p); | ||
658 | if (bits == 0) { | ||
659 | /* x**0 mod 1 is still zero. */ | ||
660 | if (BN_is_one(m)) { | ||
661 | ret = 1; | ||
662 | BN_zero(rr); | ||
663 | } else | ||
664 | ret = BN_one(rr); | ||
665 | return ret; | ||
666 | } | ||
667 | |||
668 | BN_CTX_start(ctx); | ||
669 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
670 | goto err; | ||
671 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
672 | goto err; | ||
673 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
674 | goto err; | ||
675 | |||
676 | /* If this is not done, things will break in the montgomery | ||
677 | * part */ | ||
678 | |||
679 | if (in_mont != NULL) | ||
680 | mont = in_mont; | ||
681 | else { | ||
682 | if ((mont = BN_MONT_CTX_new()) == NULL) | ||
683 | goto err; | ||
684 | if (!BN_MONT_CTX_set(mont, m, ctx)) | ||
685 | goto err; | ||
686 | } | ||
687 | |||
688 | if (a->neg || BN_ucmp(a, m) >= 0) { | ||
689 | if (!BN_nnmod(val[0], a,m, ctx)) | ||
690 | goto err; | ||
691 | aa = val[0]; | ||
692 | } else | ||
693 | aa = a; | ||
694 | if (BN_is_zero(aa)) { | ||
695 | BN_zero(rr); | ||
696 | ret = 1; | ||
697 | goto err; | ||
698 | } | ||
699 | if (!BN_to_montgomery(val[0], aa, mont, ctx)) | ||
700 | goto err; /* 1 */ | ||
701 | |||
702 | window = BN_window_bits_for_exponent_size(bits); | ||
703 | if (window > 1) { | ||
704 | if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) | ||
705 | goto err; /* 2 */ | ||
706 | j = 1 << (window - 1); | ||
707 | for (i = 1; i < j; i++) { | ||
708 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
709 | !BN_mod_mul_montgomery(val[i], val[i - 1], | ||
710 | d, mont, ctx)) | ||
711 | goto err; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | start = 1; /* This is used to avoid multiplication etc | ||
716 | * when there is only the value '1' in the | ||
717 | * buffer. */ | ||
718 | wvalue = 0; /* The 'value' of the window */ | ||
719 | wstart = bits - 1; /* The top bit of the window */ | ||
720 | wend = 0; /* The bottom bit of the window */ | ||
721 | |||
722 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) | ||
723 | goto err; | ||
724 | for (;;) { | ||
725 | if (BN_is_bit_set(p, wstart) == 0) { | ||
726 | if (!start) { | ||
727 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
728 | goto err; | ||
729 | } | ||
730 | if (wstart == 0) | ||
731 | break; | ||
732 | wstart--; | ||
733 | continue; | ||
734 | } | ||
735 | /* We now have wstart on a 'set' bit, we now need to work out | ||
736 | * how bit a window to do. To do this we need to scan | ||
737 | * forward until the last set bit before the end of the | ||
738 | * window */ | ||
739 | j = wstart; | ||
740 | wvalue = 1; | ||
741 | wend = 0; | ||
742 | for (i = 1; i < window; i++) { | ||
743 | if (wstart - i < 0) | ||
744 | break; | ||
745 | if (BN_is_bit_set(p, wstart - i)) { | ||
746 | wvalue <<= (i - wend); | ||
747 | wvalue |= 1; | ||
748 | wend = i; | ||
749 | } | ||
750 | } | ||
751 | |||
752 | /* wend is the size of the current window */ | ||
753 | j = wend + 1; | ||
754 | /* add the 'bytes above' */ | ||
755 | if (!start) | ||
756 | for (i = 0; i < j; i++) { | ||
757 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
758 | goto err; | ||
759 | } | ||
760 | |||
761 | /* wvalue will be an odd number < 2^window */ | ||
762 | if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) | ||
763 | goto err; | ||
764 | |||
765 | /* move the 'window' down further */ | ||
766 | wstart -= wend + 1; | ||
767 | wvalue = 0; | ||
768 | start = 0; | ||
769 | if (wstart < 0) | ||
770 | break; | ||
771 | } | ||
772 | if (!BN_from_montgomery(rr, r,mont, ctx)) | ||
773 | goto err; | ||
774 | ret = 1; | ||
775 | |||
776 | err: | ||
777 | if ((in_mont == NULL) && (mont != NULL)) | ||
778 | BN_MONT_CTX_free(mont); | ||
779 | BN_CTX_end(ctx); | ||
780 | return (ret); | ||
781 | } | ||
782 | |||
783 | int | ||
784 | BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
785 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
786 | { | ||
787 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, | ||
788 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
789 | } | ||
790 | |||
791 | int | ||
792 | BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
793 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
794 | { | ||
795 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); | ||
796 | } | ||
797 | |||
798 | int | ||
799 | BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
800 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
801 | { | ||
802 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); | ||
803 | } | ||
804 | |||
895 | int | 805 | int |
896 | BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, | 806 | BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, |
897 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | 807 | BN_CTX *ctx, BN_MONT_CTX *in_mont) |
@@ -1040,17 +950,16 @@ err: | |||
1040 | return (ret); | 950 | return (ret); |
1041 | } | 951 | } |
1042 | 952 | ||
1043 | |||
1044 | /* The old fallback, simple version :-) */ | ||
1045 | int | 953 | int |
1046 | BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | 954 | BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
1047 | BN_CTX *ctx) | 955 | BN_CTX *ctx) |
1048 | { | 956 | { |
1049 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | 957 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
1050 | int start = 1; | 958 | int start = 1; |
1051 | BIGNUM *d; | 959 | BIGNUM *aa; |
1052 | /* Table of variables obtained from 'ctx' */ | 960 | /* Table of variables obtained from 'ctx' */ |
1053 | BIGNUM *val[TABLE_SIZE]; | 961 | BIGNUM *val[TABLE_SIZE]; |
962 | BN_RECP_CTX recp; | ||
1054 | 963 | ||
1055 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | 964 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
1056 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | 965 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
@@ -1069,13 +978,27 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
1069 | return ret; | 978 | return ret; |
1070 | } | 979 | } |
1071 | 980 | ||
981 | BN_RECP_CTX_init(&recp); | ||
982 | |||
1072 | BN_CTX_start(ctx); | 983 | BN_CTX_start(ctx); |
1073 | if ((d = BN_CTX_get(ctx)) == NULL) | 984 | if ((aa = BN_CTX_get(ctx)) == NULL) |
1074 | goto err; | 985 | goto err; |
1075 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | 986 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
1076 | goto err; | 987 | goto err; |
1077 | 988 | ||
1078 | if (!BN_nnmod(val[0],a,m,ctx)) | 989 | if (m->neg) { |
990 | /* ignore sign of 'm' */ | ||
991 | if (!BN_copy(aa, m)) | ||
992 | goto err; | ||
993 | aa->neg = 0; | ||
994 | if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) | ||
995 | goto err; | ||
996 | } else { | ||
997 | if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) | ||
998 | goto err; | ||
999 | } | ||
1000 | |||
1001 | if (!BN_nnmod(val[0], a, m, ctx)) | ||
1079 | goto err; /* 1 */ | 1002 | goto err; /* 1 */ |
1080 | if (BN_is_zero(val[0])) { | 1003 | if (BN_is_zero(val[0])) { |
1081 | BN_zero(r); | 1004 | BN_zero(r); |
@@ -1085,12 +1008,13 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
1085 | 1008 | ||
1086 | window = BN_window_bits_for_exponent_size(bits); | 1009 | window = BN_window_bits_for_exponent_size(bits); |
1087 | if (window > 1) { | 1010 | if (window > 1) { |
1088 | if (!BN_mod_mul(d, val[0], val[0], m, ctx)) | 1011 | if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) |
1089 | goto err; /* 2 */ | 1012 | goto err; /* 2 */ |
1090 | j = 1 << (window - 1); | 1013 | j = 1 << (window - 1); |
1091 | for (i = 1; i < j; i++) { | 1014 | for (i = 1; i < j; i++) { |
1092 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | 1015 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
1093 | !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) | 1016 | !BN_mod_mul_reciprocal(val[i], val[i - 1], |
1017 | aa, &recp, ctx)) | ||
1094 | goto err; | 1018 | goto err; |
1095 | } | 1019 | } |
1096 | } | 1020 | } |
@@ -1108,7 +1032,7 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
1108 | for (;;) { | 1032 | for (;;) { |
1109 | if (BN_is_bit_set(p, wstart) == 0) { | 1033 | if (BN_is_bit_set(p, wstart) == 0) { |
1110 | if (!start) | 1034 | if (!start) |
1111 | if (!BN_mod_mul(r, r, r, m, ctx)) | 1035 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) |
1112 | goto err; | 1036 | goto err; |
1113 | if (wstart == 0) | 1037 | if (wstart == 0) |
1114 | break; | 1038 | break; |
@@ -1137,12 +1061,12 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
1137 | /* add the 'bytes above' */ | 1061 | /* add the 'bytes above' */ |
1138 | if (!start) | 1062 | if (!start) |
1139 | for (i = 0; i < j; i++) { | 1063 | for (i = 0; i < j; i++) { |
1140 | if (!BN_mod_mul(r, r, r, m, ctx)) | 1064 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) |
1141 | goto err; | 1065 | goto err; |
1142 | } | 1066 | } |
1143 | 1067 | ||
1144 | /* wvalue will be an odd number < 2^window */ | 1068 | /* wvalue will be an odd number < 2^window */ |
1145 | if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) | 1069 | if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) |
1146 | goto err; | 1070 | goto err; |
1147 | 1071 | ||
1148 | /* move the 'window' down further */ | 1072 | /* move the 'window' down further */ |
@@ -1156,5 +1080,78 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
1156 | 1080 | ||
1157 | err: | 1081 | err: |
1158 | BN_CTX_end(ctx); | 1082 | BN_CTX_end(ctx); |
1083 | BN_RECP_CTX_free(&recp); | ||
1159 | return (ret); | 1084 | return (ret); |
1160 | } | 1085 | } |
1086 | |||
1087 | static int | ||
1088 | BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1089 | BN_CTX *ctx, int ct) | ||
1090 | { | ||
1091 | int ret; | ||
1092 | |||
1093 | |||
1094 | /* For even modulus m = 2^k*m_odd, it might make sense to compute | ||
1095 | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery | ||
1096 | * exponentiation for the odd part), using appropriate exponent | ||
1097 | * reductions, and combine the results using the CRT. | ||
1098 | * | ||
1099 | * For now, we use Montgomery only if the modulus is odd; otherwise, | ||
1100 | * exponentiation using the reciprocal-based quick remaindering | ||
1101 | * algorithm is used. | ||
1102 | * | ||
1103 | * (Timing obtained with expspeed.c [computations a^p mod m | ||
1104 | * where a, p, m are of the same length: 256, 512, 1024, 2048, | ||
1105 | * 4096, 8192 bits], compared to the running time of the | ||
1106 | * standard algorithm: | ||
1107 | * | ||
1108 | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] | ||
1109 | * 55 .. 77 % [UltraSparc processor, but | ||
1110 | * debug-solaris-sparcv8-gcc conf.] | ||
1111 | * | ||
1112 | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] | ||
1113 | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] | ||
1114 | * | ||
1115 | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont | ||
1116 | * at 2048 and more bits, but at 512 and 1024 bits, it was | ||
1117 | * slower even than the standard algorithm! | ||
1118 | * | ||
1119 | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] | ||
1120 | * should be obtained when the new Montgomery reduction code | ||
1121 | * has been integrated into OpenSSL.) | ||
1122 | */ | ||
1123 | |||
1124 | if (BN_is_odd(m)) { | ||
1125 | if (a->top == 1 && !a->neg && !ct) { | ||
1126 | BN_ULONG A = a->d[0]; | ||
1127 | ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); | ||
1128 | } else | ||
1129 | ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); | ||
1130 | } else { | ||
1131 | ret = BN_mod_exp_recp(r, a,p, m, ctx); | ||
1132 | } | ||
1133 | |||
1134 | return (ret); | ||
1135 | } | ||
1136 | |||
1137 | int | ||
1138 | BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1139 | BN_CTX *ctx) | ||
1140 | { | ||
1141 | return BN_mod_exp_internal(r, a, p, m, ctx, | ||
1142 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
1143 | } | ||
1144 | |||
1145 | int | ||
1146 | BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1147 | BN_CTX *ctx) | ||
1148 | { | ||
1149 | return BN_mod_exp_internal(r, a, p, m, ctx, 1); | ||
1150 | } | ||
1151 | |||
1152 | int | ||
1153 | BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1154 | BN_CTX *ctx) | ||
1155 | { | ||
1156 | return BN_mod_exp_internal(r, a, p, m, ctx, 0); | ||
1157 | } | ||