diff options
| author | jsing <> | 2023-02-03 05:27:50 +0000 |
|---|---|---|
| committer | jsing <> | 2023-02-03 05:27:50 +0000 |
| commit | acdcbccd5c839ed36b33b72d9378f9a65a938915 (patch) | |
| tree | f478056d5e579aebc512bedcfed1c32b4d2f1ddb | |
| parent | 8c388cc76603dfe3b33db90e5d3790131a43777b (diff) | |
| download | openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.tar.gz openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.tar.bz2 openbsd-acdcbccd5c839ed36b33b72d9378f9a65a938915.zip | |
Reorder functions in bn_exp.c to be slightly sensible...
No functional change intended.
| -rw-r--r-- | src/lib/libcrypto/bn/bn_exp.c | 561 |
1 files changed, 279 insertions, 282 deletions
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index e36eeff6bf..b72b535962 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_exp.c,v 1.35 2022/11/26 16:08:51 tb Exp $ */ | 1 | /* $OpenBSD: bn_exp.c,v 1.36 2023/02/03 05:27:50 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -171,90 +171,16 @@ err: | |||
| 171 | return (ret); | 171 | return (ret); |
| 172 | } | 172 | } |
| 173 | 173 | ||
| 174 | static int | 174 | /* The old fallback, simple version :-) */ |
| 175 | BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 176 | BN_CTX *ctx, int ct) | ||
| 177 | { | ||
| 178 | int ret; | ||
| 179 | |||
| 180 | |||
| 181 | /* For even modulus m = 2^k*m_odd, it might make sense to compute | ||
| 182 | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery | ||
| 183 | * exponentiation for the odd part), using appropriate exponent | ||
| 184 | * reductions, and combine the results using the CRT. | ||
| 185 | * | ||
| 186 | * For now, we use Montgomery only if the modulus is odd; otherwise, | ||
| 187 | * exponentiation using the reciprocal-based quick remaindering | ||
| 188 | * algorithm is used. | ||
| 189 | * | ||
| 190 | * (Timing obtained with expspeed.c [computations a^p mod m | ||
| 191 | * where a, p, m are of the same length: 256, 512, 1024, 2048, | ||
| 192 | * 4096, 8192 bits], compared to the running time of the | ||
| 193 | * standard algorithm: | ||
| 194 | * | ||
| 195 | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] | ||
| 196 | * 55 .. 77 % [UltraSparc processor, but | ||
| 197 | * debug-solaris-sparcv8-gcc conf.] | ||
| 198 | * | ||
| 199 | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] | ||
| 200 | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] | ||
| 201 | * | ||
| 202 | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont | ||
| 203 | * at 2048 and more bits, but at 512 and 1024 bits, it was | ||
| 204 | * slower even than the standard algorithm! | ||
| 205 | * | ||
| 206 | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] | ||
| 207 | * should be obtained when the new Montgomery reduction code | ||
| 208 | * has been integrated into OpenSSL.) | ||
| 209 | */ | ||
| 210 | |||
| 211 | if (BN_is_odd(m)) { | ||
| 212 | if (a->top == 1 && !a->neg && !ct) { | ||
| 213 | BN_ULONG A = a->d[0]; | ||
| 214 | ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); | ||
| 215 | } else | ||
| 216 | ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); | ||
| 217 | } else { | ||
| 218 | ret = BN_mod_exp_recp(r, a,p, m, ctx); | ||
| 219 | } | ||
| 220 | |||
| 221 | return (ret); | ||
| 222 | } | ||
| 223 | |||
| 224 | int | ||
| 225 | BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 226 | BN_CTX *ctx) | ||
| 227 | { | ||
| 228 | return BN_mod_exp_internal(r, a, p, m, ctx, | ||
| 229 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
| 230 | } | ||
| 231 | |||
| 232 | int | ||
| 233 | BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 234 | BN_CTX *ctx) | ||
| 235 | { | ||
| 236 | return BN_mod_exp_internal(r, a, p, m, ctx, 1); | ||
| 237 | } | ||
| 238 | |||
| 239 | |||
| 240 | int | ||
| 241 | BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 242 | BN_CTX *ctx) | ||
| 243 | { | ||
| 244 | return BN_mod_exp_internal(r, a, p, m, ctx, 0); | ||
| 245 | } | ||
| 246 | |||
| 247 | |||
| 248 | int | 175 | int |
| 249 | BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | 176 | BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
| 250 | BN_CTX *ctx) | 177 | BN_CTX *ctx) |
| 251 | { | 178 | { |
| 252 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | 179 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
| 253 | int start = 1; | 180 | int start = 1; |
| 254 | BIGNUM *aa; | 181 | BIGNUM *d; |
| 255 | /* Table of variables obtained from 'ctx' */ | 182 | /* Table of variables obtained from 'ctx' */ |
| 256 | BIGNUM *val[TABLE_SIZE]; | 183 | BIGNUM *val[TABLE_SIZE]; |
| 257 | BN_RECP_CTX recp; | ||
| 258 | 184 | ||
| 259 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | 185 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
| 260 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | 186 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
| @@ -273,27 +199,13 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 273 | return ret; | 199 | return ret; |
| 274 | } | 200 | } |
| 275 | 201 | ||
| 276 | BN_RECP_CTX_init(&recp); | ||
| 277 | |||
| 278 | BN_CTX_start(ctx); | 202 | BN_CTX_start(ctx); |
| 279 | if ((aa = BN_CTX_get(ctx)) == NULL) | 203 | if ((d = BN_CTX_get(ctx)) == NULL) |
| 280 | goto err; | 204 | goto err; |
| 281 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | 205 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
| 282 | goto err; | 206 | goto err; |
| 283 | 207 | ||
| 284 | if (m->neg) { | 208 | if (!BN_nnmod(val[0],a,m,ctx)) |
| 285 | /* ignore sign of 'm' */ | ||
| 286 | if (!BN_copy(aa, m)) | ||
| 287 | goto err; | ||
| 288 | aa->neg = 0; | ||
| 289 | if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) | ||
| 290 | goto err; | ||
| 291 | } else { | ||
| 292 | if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) | ||
| 293 | goto err; | ||
| 294 | } | ||
| 295 | |||
| 296 | if (!BN_nnmod(val[0], a, m, ctx)) | ||
| 297 | goto err; /* 1 */ | 209 | goto err; /* 1 */ |
| 298 | if (BN_is_zero(val[0])) { | 210 | if (BN_is_zero(val[0])) { |
| 299 | BN_zero(r); | 211 | BN_zero(r); |
| @@ -303,13 +215,12 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 303 | 215 | ||
| 304 | window = BN_window_bits_for_exponent_size(bits); | 216 | window = BN_window_bits_for_exponent_size(bits); |
| 305 | if (window > 1) { | 217 | if (window > 1) { |
| 306 | if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) | 218 | if (!BN_mod_mul(d, val[0], val[0], m, ctx)) |
| 307 | goto err; /* 2 */ | 219 | goto err; /* 2 */ |
| 308 | j = 1 << (window - 1); | 220 | j = 1 << (window - 1); |
| 309 | for (i = 1; i < j; i++) { | 221 | for (i = 1; i < j; i++) { |
| 310 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | 222 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
| 311 | !BN_mod_mul_reciprocal(val[i], val[i - 1], | 223 | !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) |
| 312 | aa, &recp, ctx)) | ||
| 313 | goto err; | 224 | goto err; |
| 314 | } | 225 | } |
| 315 | } | 226 | } |
| @@ -327,153 +238,8 @@ BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 327 | for (;;) { | 238 | for (;;) { |
| 328 | if (BN_is_bit_set(p, wstart) == 0) { | 239 | if (BN_is_bit_set(p, wstart) == 0) { |
| 329 | if (!start) | 240 | if (!start) |
| 330 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) | 241 | if (!BN_mod_mul(r, r, r, m, ctx)) |
| 331 | goto err; | ||
| 332 | if (wstart == 0) | ||
| 333 | break; | ||
| 334 | wstart--; | ||
| 335 | continue; | ||
| 336 | } | ||
| 337 | /* We now have wstart on a 'set' bit, we now need to work out | ||
| 338 | * how bit a window to do. To do this we need to scan | ||
| 339 | * forward until the last set bit before the end of the | ||
| 340 | * window */ | ||
| 341 | j = wstart; | ||
| 342 | wvalue = 1; | ||
| 343 | wend = 0; | ||
| 344 | for (i = 1; i < window; i++) { | ||
| 345 | if (wstart - i < 0) | ||
| 346 | break; | ||
| 347 | if (BN_is_bit_set(p, wstart - i)) { | ||
| 348 | wvalue <<= (i - wend); | ||
| 349 | wvalue |= 1; | ||
| 350 | wend = i; | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | /* wend is the size of the current window */ | ||
| 355 | j = wend + 1; | ||
| 356 | /* add the 'bytes above' */ | ||
| 357 | if (!start) | ||
| 358 | for (i = 0; i < j; i++) { | ||
| 359 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) | ||
| 360 | goto err; | ||
| 361 | } | ||
| 362 | |||
| 363 | /* wvalue will be an odd number < 2^window */ | ||
| 364 | if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) | ||
| 365 | goto err; | ||
| 366 | |||
| 367 | /* move the 'window' down further */ | ||
| 368 | wstart -= wend + 1; | ||
| 369 | wvalue = 0; | ||
| 370 | start = 0; | ||
| 371 | if (wstart < 0) | ||
| 372 | break; | ||
| 373 | } | ||
| 374 | ret = 1; | ||
| 375 | |||
| 376 | err: | ||
| 377 | BN_CTX_end(ctx); | ||
| 378 | BN_RECP_CTX_free(&recp); | ||
| 379 | return (ret); | ||
| 380 | } | ||
| 381 | |||
| 382 | static int | ||
| 383 | BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 384 | BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) | ||
| 385 | { | ||
| 386 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | ||
| 387 | int start = 1; | ||
| 388 | BIGNUM *d, *r; | ||
| 389 | const BIGNUM *aa; | ||
| 390 | /* Table of variables obtained from 'ctx' */ | ||
| 391 | BIGNUM *val[TABLE_SIZE]; | ||
| 392 | BN_MONT_CTX *mont = NULL; | ||
| 393 | |||
| 394 | if (ct) { | ||
| 395 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); | ||
| 396 | } | ||
| 397 | |||
| 398 | |||
| 399 | if (!BN_is_odd(m)) { | ||
| 400 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
| 401 | return (0); | ||
| 402 | } | ||
| 403 | |||
| 404 | bits = BN_num_bits(p); | ||
| 405 | if (bits == 0) { | ||
| 406 | /* x**0 mod 1 is still zero. */ | ||
| 407 | if (BN_is_one(m)) { | ||
| 408 | ret = 1; | ||
| 409 | BN_zero(rr); | ||
| 410 | } else | ||
| 411 | ret = BN_one(rr); | ||
| 412 | return ret; | ||
| 413 | } | ||
| 414 | |||
| 415 | BN_CTX_start(ctx); | ||
| 416 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
| 417 | goto err; | ||
| 418 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
| 419 | goto err; | ||
| 420 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
| 421 | goto err; | ||
| 422 | |||
| 423 | /* If this is not done, things will break in the montgomery | ||
| 424 | * part */ | ||
| 425 | |||
| 426 | if (in_mont != NULL) | ||
| 427 | mont = in_mont; | ||
| 428 | else { | ||
| 429 | if ((mont = BN_MONT_CTX_new()) == NULL) | ||
| 430 | goto err; | ||
| 431 | if (!BN_MONT_CTX_set(mont, m, ctx)) | ||
| 432 | goto err; | ||
| 433 | } | ||
| 434 | |||
| 435 | if (a->neg || BN_ucmp(a, m) >= 0) { | ||
| 436 | if (!BN_nnmod(val[0], a,m, ctx)) | ||
| 437 | goto err; | ||
| 438 | aa = val[0]; | ||
| 439 | } else | ||
| 440 | aa = a; | ||
| 441 | if (BN_is_zero(aa)) { | ||
| 442 | BN_zero(rr); | ||
| 443 | ret = 1; | ||
| 444 | goto err; | ||
| 445 | } | ||
| 446 | if (!BN_to_montgomery(val[0], aa, mont, ctx)) | ||
| 447 | goto err; /* 1 */ | ||
| 448 | |||
| 449 | window = BN_window_bits_for_exponent_size(bits); | ||
| 450 | if (window > 1) { | ||
| 451 | if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) | ||
| 452 | goto err; /* 2 */ | ||
| 453 | j = 1 << (window - 1); | ||
| 454 | for (i = 1; i < j; i++) { | ||
| 455 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
| 456 | !BN_mod_mul_montgomery(val[i], val[i - 1], | ||
| 457 | d, mont, ctx)) | ||
| 458 | goto err; | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | start = 1; /* This is used to avoid multiplication etc | ||
| 463 | * when there is only the value '1' in the | ||
| 464 | * buffer. */ | ||
| 465 | wvalue = 0; /* The 'value' of the window */ | ||
| 466 | wstart = bits - 1; /* The top bit of the window */ | ||
| 467 | wend = 0; /* The bottom bit of the window */ | ||
| 468 | |||
| 469 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) | ||
| 470 | goto err; | ||
| 471 | for (;;) { | ||
| 472 | if (BN_is_bit_set(p, wstart) == 0) { | ||
| 473 | if (!start) { | ||
| 474 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
| 475 | goto err; | 242 | goto err; |
| 476 | } | ||
| 477 | if (wstart == 0) | 243 | if (wstart == 0) |
| 478 | break; | 244 | break; |
| 479 | wstart--; | 245 | wstart--; |
| @@ -501,12 +267,12 @@ BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIG | |||
| 501 | /* add the 'bytes above' */ | 267 | /* add the 'bytes above' */ |
| 502 | if (!start) | 268 | if (!start) |
| 503 | for (i = 0; i < j; i++) { | 269 | for (i = 0; i < j; i++) { |
| 504 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | 270 | if (!BN_mod_mul(r, r, r, m, ctx)) |
| 505 | goto err; | 271 | goto err; |
| 506 | } | 272 | } |
| 507 | 273 | ||
| 508 | /* wvalue will be an odd number < 2^window */ | 274 | /* wvalue will be an odd number < 2^window */ |
| 509 | if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) | 275 | if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) |
| 510 | goto err; | 276 | goto err; |
| 511 | 277 | ||
| 512 | /* move the 'window' down further */ | 278 | /* move the 'window' down further */ |
| @@ -516,39 +282,13 @@ BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIG | |||
| 516 | if (wstart < 0) | 282 | if (wstart < 0) |
| 517 | break; | 283 | break; |
| 518 | } | 284 | } |
| 519 | if (!BN_from_montgomery(rr, r,mont, ctx)) | ||
| 520 | goto err; | ||
| 521 | ret = 1; | 285 | ret = 1; |
| 522 | 286 | ||
| 523 | err: | 287 | err: |
| 524 | if ((in_mont == NULL) && (mont != NULL)) | ||
| 525 | BN_MONT_CTX_free(mont); | ||
| 526 | BN_CTX_end(ctx); | 288 | BN_CTX_end(ctx); |
| 527 | return (ret); | 289 | return (ret); |
| 528 | } | 290 | } |
| 529 | 291 | ||
| 530 | int | ||
| 531 | BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 532 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
| 533 | { | ||
| 534 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, | ||
| 535 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
| 536 | } | ||
| 537 | |||
| 538 | int | ||
| 539 | BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 540 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
| 541 | { | ||
| 542 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); | ||
| 543 | } | ||
| 544 | |||
| 545 | int | ||
| 546 | BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 547 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
| 548 | { | ||
| 549 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); | ||
| 550 | } | ||
| 551 | |||
| 552 | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout | 292 | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout |
| 553 | * so that accessing any of these table values shows the same access pattern as far | 293 | * so that accessing any of these table values shows the same access pattern as far |
| 554 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM | 294 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM |
| @@ -892,6 +632,176 @@ err: | |||
| 892 | return (ret); | 632 | return (ret); |
| 893 | } | 633 | } |
| 894 | 634 | ||
| 635 | static int | ||
| 636 | BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 637 | BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) | ||
| 638 | { | ||
| 639 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | ||
| 640 | int start = 1; | ||
| 641 | BIGNUM *d, *r; | ||
| 642 | const BIGNUM *aa; | ||
| 643 | /* Table of variables obtained from 'ctx' */ | ||
| 644 | BIGNUM *val[TABLE_SIZE]; | ||
| 645 | BN_MONT_CTX *mont = NULL; | ||
| 646 | |||
| 647 | if (ct) { | ||
| 648 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); | ||
| 649 | } | ||
| 650 | |||
| 651 | |||
| 652 | if (!BN_is_odd(m)) { | ||
| 653 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
| 654 | return (0); | ||
| 655 | } | ||
| 656 | |||
| 657 | bits = BN_num_bits(p); | ||
| 658 | if (bits == 0) { | ||
| 659 | /* x**0 mod 1 is still zero. */ | ||
| 660 | if (BN_is_one(m)) { | ||
| 661 | ret = 1; | ||
| 662 | BN_zero(rr); | ||
| 663 | } else | ||
| 664 | ret = BN_one(rr); | ||
| 665 | return ret; | ||
| 666 | } | ||
| 667 | |||
| 668 | BN_CTX_start(ctx); | ||
| 669 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
| 670 | goto err; | ||
| 671 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
| 672 | goto err; | ||
| 673 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
| 674 | goto err; | ||
| 675 | |||
| 676 | /* If this is not done, things will break in the montgomery | ||
| 677 | * part */ | ||
| 678 | |||
| 679 | if (in_mont != NULL) | ||
| 680 | mont = in_mont; | ||
| 681 | else { | ||
| 682 | if ((mont = BN_MONT_CTX_new()) == NULL) | ||
| 683 | goto err; | ||
| 684 | if (!BN_MONT_CTX_set(mont, m, ctx)) | ||
| 685 | goto err; | ||
| 686 | } | ||
| 687 | |||
| 688 | if (a->neg || BN_ucmp(a, m) >= 0) { | ||
| 689 | if (!BN_nnmod(val[0], a,m, ctx)) | ||
| 690 | goto err; | ||
| 691 | aa = val[0]; | ||
| 692 | } else | ||
| 693 | aa = a; | ||
| 694 | if (BN_is_zero(aa)) { | ||
| 695 | BN_zero(rr); | ||
| 696 | ret = 1; | ||
| 697 | goto err; | ||
| 698 | } | ||
| 699 | if (!BN_to_montgomery(val[0], aa, mont, ctx)) | ||
| 700 | goto err; /* 1 */ | ||
| 701 | |||
| 702 | window = BN_window_bits_for_exponent_size(bits); | ||
| 703 | if (window > 1) { | ||
| 704 | if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) | ||
| 705 | goto err; /* 2 */ | ||
| 706 | j = 1 << (window - 1); | ||
| 707 | for (i = 1; i < j; i++) { | ||
| 708 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
| 709 | !BN_mod_mul_montgomery(val[i], val[i - 1], | ||
| 710 | d, mont, ctx)) | ||
| 711 | goto err; | ||
| 712 | } | ||
| 713 | } | ||
| 714 | |||
| 715 | start = 1; /* This is used to avoid multiplication etc | ||
| 716 | * when there is only the value '1' in the | ||
| 717 | * buffer. */ | ||
| 718 | wvalue = 0; /* The 'value' of the window */ | ||
| 719 | wstart = bits - 1; /* The top bit of the window */ | ||
| 720 | wend = 0; /* The bottom bit of the window */ | ||
| 721 | |||
| 722 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) | ||
| 723 | goto err; | ||
| 724 | for (;;) { | ||
| 725 | if (BN_is_bit_set(p, wstart) == 0) { | ||
| 726 | if (!start) { | ||
| 727 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
| 728 | goto err; | ||
| 729 | } | ||
| 730 | if (wstart == 0) | ||
| 731 | break; | ||
| 732 | wstart--; | ||
| 733 | continue; | ||
| 734 | } | ||
| 735 | /* We now have wstart on a 'set' bit, we now need to work out | ||
| 736 | * how bit a window to do. To do this we need to scan | ||
| 737 | * forward until the last set bit before the end of the | ||
| 738 | * window */ | ||
| 739 | j = wstart; | ||
| 740 | wvalue = 1; | ||
| 741 | wend = 0; | ||
| 742 | for (i = 1; i < window; i++) { | ||
| 743 | if (wstart - i < 0) | ||
| 744 | break; | ||
| 745 | if (BN_is_bit_set(p, wstart - i)) { | ||
| 746 | wvalue <<= (i - wend); | ||
| 747 | wvalue |= 1; | ||
| 748 | wend = i; | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | /* wend is the size of the current window */ | ||
| 753 | j = wend + 1; | ||
| 754 | /* add the 'bytes above' */ | ||
| 755 | if (!start) | ||
| 756 | for (i = 0; i < j; i++) { | ||
| 757 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
| 758 | goto err; | ||
| 759 | } | ||
| 760 | |||
| 761 | /* wvalue will be an odd number < 2^window */ | ||
| 762 | if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) | ||
| 763 | goto err; | ||
| 764 | |||
| 765 | /* move the 'window' down further */ | ||
| 766 | wstart -= wend + 1; | ||
| 767 | wvalue = 0; | ||
| 768 | start = 0; | ||
| 769 | if (wstart < 0) | ||
| 770 | break; | ||
| 771 | } | ||
| 772 | if (!BN_from_montgomery(rr, r,mont, ctx)) | ||
| 773 | goto err; | ||
| 774 | ret = 1; | ||
| 775 | |||
| 776 | err: | ||
| 777 | if ((in_mont == NULL) && (mont != NULL)) | ||
| 778 | BN_MONT_CTX_free(mont); | ||
| 779 | BN_CTX_end(ctx); | ||
| 780 | return (ret); | ||
| 781 | } | ||
| 782 | |||
| 783 | int | ||
| 784 | BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 785 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
| 786 | { | ||
| 787 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, | ||
| 788 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
| 789 | } | ||
| 790 | |||
| 791 | int | ||
| 792 | BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 793 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
| 794 | { | ||
| 795 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); | ||
| 796 | } | ||
| 797 | |||
| 798 | int | ||
| 799 | BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 800 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
| 801 | { | ||
| 802 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); | ||
| 803 | } | ||
| 804 | |||
| 895 | int | 805 | int |
| 896 | BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, | 806 | BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, |
| 897 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | 807 | BN_CTX *ctx, BN_MONT_CTX *in_mont) |
| @@ -1040,17 +950,16 @@ err: | |||
| 1040 | return (ret); | 950 | return (ret); |
| 1041 | } | 951 | } |
| 1042 | 952 | ||
| 1043 | |||
| 1044 | /* The old fallback, simple version :-) */ | ||
| 1045 | int | 953 | int |
| 1046 | BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | 954 | BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, |
| 1047 | BN_CTX *ctx) | 955 | BN_CTX *ctx) |
| 1048 | { | 956 | { |
| 1049 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | 957 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; |
| 1050 | int start = 1; | 958 | int start = 1; |
| 1051 | BIGNUM *d; | 959 | BIGNUM *aa; |
| 1052 | /* Table of variables obtained from 'ctx' */ | 960 | /* Table of variables obtained from 'ctx' */ |
| 1053 | BIGNUM *val[TABLE_SIZE]; | 961 | BIGNUM *val[TABLE_SIZE]; |
| 962 | BN_RECP_CTX recp; | ||
| 1054 | 963 | ||
| 1055 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | 964 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { |
| 1056 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | 965 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ |
| @@ -1069,13 +978,27 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 1069 | return ret; | 978 | return ret; |
| 1070 | } | 979 | } |
| 1071 | 980 | ||
| 981 | BN_RECP_CTX_init(&recp); | ||
| 982 | |||
| 1072 | BN_CTX_start(ctx); | 983 | BN_CTX_start(ctx); |
| 1073 | if ((d = BN_CTX_get(ctx)) == NULL) | 984 | if ((aa = BN_CTX_get(ctx)) == NULL) |
| 1074 | goto err; | 985 | goto err; |
| 1075 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | 986 | if ((val[0] = BN_CTX_get(ctx)) == NULL) |
| 1076 | goto err; | 987 | goto err; |
| 1077 | 988 | ||
| 1078 | if (!BN_nnmod(val[0],a,m,ctx)) | 989 | if (m->neg) { |
| 990 | /* ignore sign of 'm' */ | ||
| 991 | if (!BN_copy(aa, m)) | ||
| 992 | goto err; | ||
| 993 | aa->neg = 0; | ||
| 994 | if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) | ||
| 995 | goto err; | ||
| 996 | } else { | ||
| 997 | if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) | ||
| 998 | goto err; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | if (!BN_nnmod(val[0], a, m, ctx)) | ||
| 1079 | goto err; /* 1 */ | 1002 | goto err; /* 1 */ |
| 1080 | if (BN_is_zero(val[0])) { | 1003 | if (BN_is_zero(val[0])) { |
| 1081 | BN_zero(r); | 1004 | BN_zero(r); |
| @@ -1085,12 +1008,13 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 1085 | 1008 | ||
| 1086 | window = BN_window_bits_for_exponent_size(bits); | 1009 | window = BN_window_bits_for_exponent_size(bits); |
| 1087 | if (window > 1) { | 1010 | if (window > 1) { |
| 1088 | if (!BN_mod_mul(d, val[0], val[0], m, ctx)) | 1011 | if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) |
| 1089 | goto err; /* 2 */ | 1012 | goto err; /* 2 */ |
| 1090 | j = 1 << (window - 1); | 1013 | j = 1 << (window - 1); |
| 1091 | for (i = 1; i < j; i++) { | 1014 | for (i = 1; i < j; i++) { |
| 1092 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | 1015 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || |
| 1093 | !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) | 1016 | !BN_mod_mul_reciprocal(val[i], val[i - 1], |
| 1017 | aa, &recp, ctx)) | ||
| 1094 | goto err; | 1018 | goto err; |
| 1095 | } | 1019 | } |
| 1096 | } | 1020 | } |
| @@ -1108,7 +1032,7 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 1108 | for (;;) { | 1032 | for (;;) { |
| 1109 | if (BN_is_bit_set(p, wstart) == 0) { | 1033 | if (BN_is_bit_set(p, wstart) == 0) { |
| 1110 | if (!start) | 1034 | if (!start) |
| 1111 | if (!BN_mod_mul(r, r, r, m, ctx)) | 1035 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) |
| 1112 | goto err; | 1036 | goto err; |
| 1113 | if (wstart == 0) | 1037 | if (wstart == 0) |
| 1114 | break; | 1038 | break; |
| @@ -1137,12 +1061,12 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 1137 | /* add the 'bytes above' */ | 1061 | /* add the 'bytes above' */ |
| 1138 | if (!start) | 1062 | if (!start) |
| 1139 | for (i = 0; i < j; i++) { | 1063 | for (i = 0; i < j; i++) { |
| 1140 | if (!BN_mod_mul(r, r, r, m, ctx)) | 1064 | if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) |
| 1141 | goto err; | 1065 | goto err; |
| 1142 | } | 1066 | } |
| 1143 | 1067 | ||
| 1144 | /* wvalue will be an odd number < 2^window */ | 1068 | /* wvalue will be an odd number < 2^window */ |
| 1145 | if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) | 1069 | if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) |
| 1146 | goto err; | 1070 | goto err; |
| 1147 | 1071 | ||
| 1148 | /* move the 'window' down further */ | 1072 | /* move the 'window' down further */ |
| @@ -1156,5 +1080,78 @@ BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | |||
| 1156 | 1080 | ||
| 1157 | err: | 1081 | err: |
| 1158 | BN_CTX_end(ctx); | 1082 | BN_CTX_end(ctx); |
| 1083 | BN_RECP_CTX_free(&recp); | ||
| 1159 | return (ret); | 1084 | return (ret); |
| 1160 | } | 1085 | } |
| 1086 | |||
| 1087 | static int | ||
| 1088 | BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 1089 | BN_CTX *ctx, int ct) | ||
| 1090 | { | ||
| 1091 | int ret; | ||
| 1092 | |||
| 1093 | |||
| 1094 | /* For even modulus m = 2^k*m_odd, it might make sense to compute | ||
| 1095 | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery | ||
| 1096 | * exponentiation for the odd part), using appropriate exponent | ||
| 1097 | * reductions, and combine the results using the CRT. | ||
| 1098 | * | ||
| 1099 | * For now, we use Montgomery only if the modulus is odd; otherwise, | ||
| 1100 | * exponentiation using the reciprocal-based quick remaindering | ||
| 1101 | * algorithm is used. | ||
| 1102 | * | ||
| 1103 | * (Timing obtained with expspeed.c [computations a^p mod m | ||
| 1104 | * where a, p, m are of the same length: 256, 512, 1024, 2048, | ||
| 1105 | * 4096, 8192 bits], compared to the running time of the | ||
| 1106 | * standard algorithm: | ||
| 1107 | * | ||
| 1108 | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] | ||
| 1109 | * 55 .. 77 % [UltraSparc processor, but | ||
| 1110 | * debug-solaris-sparcv8-gcc conf.] | ||
| 1111 | * | ||
| 1112 | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] | ||
| 1113 | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] | ||
| 1114 | * | ||
| 1115 | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont | ||
| 1116 | * at 2048 and more bits, but at 512 and 1024 bits, it was | ||
| 1117 | * slower even than the standard algorithm! | ||
| 1118 | * | ||
| 1119 | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] | ||
| 1120 | * should be obtained when the new Montgomery reduction code | ||
| 1121 | * has been integrated into OpenSSL.) | ||
| 1122 | */ | ||
| 1123 | |||
| 1124 | if (BN_is_odd(m)) { | ||
| 1125 | if (a->top == 1 && !a->neg && !ct) { | ||
| 1126 | BN_ULONG A = a->d[0]; | ||
| 1127 | ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); | ||
| 1128 | } else | ||
| 1129 | ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); | ||
| 1130 | } else { | ||
| 1131 | ret = BN_mod_exp_recp(r, a,p, m, ctx); | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | return (ret); | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | int | ||
| 1138 | BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 1139 | BN_CTX *ctx) | ||
| 1140 | { | ||
| 1141 | return BN_mod_exp_internal(r, a, p, m, ctx, | ||
| 1142 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | int | ||
| 1146 | BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 1147 | BN_CTX *ctx) | ||
| 1148 | { | ||
| 1149 | return BN_mod_exp_internal(r, a, p, m, ctx, 1); | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | int | ||
| 1153 | BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
| 1154 | BN_CTX *ctx) | ||
| 1155 | { | ||
| 1156 | return BN_mod_exp_internal(r, a, p, m, ctx, 0); | ||
| 1157 | } | ||
