diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_exp.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_exp.c | 240 |
1 files changed, 173 insertions, 67 deletions
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c index d9b6c737fc..2abf6fd678 100644 --- a/src/lib/libcrypto/bn/bn_exp.c +++ b/src/lib/libcrypto/bn/bn_exp.c | |||
@@ -113,6 +113,18 @@ | |||
113 | #include "cryptlib.h" | 113 | #include "cryptlib.h" |
114 | #include "bn_lcl.h" | 114 | #include "bn_lcl.h" |
115 | 115 | ||
116 | #include <stdlib.h> | ||
117 | #ifdef _WIN32 | ||
118 | # include <malloc.h> | ||
119 | # ifndef alloca | ||
120 | # define alloca _alloca | ||
121 | # endif | ||
122 | #elif defined(__GNUC__) | ||
123 | # ifndef alloca | ||
124 | # define alloca(s) __builtin_alloca((s)) | ||
125 | # endif | ||
126 | #endif | ||
127 | |||
116 | /* maximum precomputation table size for *variable* sliding windows */ | 128 | /* maximum precomputation table size for *variable* sliding windows */ |
117 | #define TABLE_SIZE 32 | 129 | #define TABLE_SIZE 32 |
118 | 130 | ||
@@ -522,23 +534,17 @@ err: | |||
522 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM | 534 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM |
523 | * from/to that table. */ | 535 | * from/to that table. */ |
524 | 536 | ||
525 | static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) | 537 | static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width) |
526 | { | 538 | { |
527 | size_t i, j; | 539 | size_t i, j; |
528 | 540 | ||
529 | if (bn_wexpand(b, top) == NULL) | 541 | if (top > b->top) |
530 | return 0; | 542 | top = b->top; /* this works because 'buf' is explicitly zeroed */ |
531 | while (b->top < top) | ||
532 | { | ||
533 | b->d[b->top++] = 0; | ||
534 | } | ||
535 | |||
536 | for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) | 543 | for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) |
537 | { | 544 | { |
538 | buf[j] = ((unsigned char*)b->d)[i]; | 545 | buf[j] = ((unsigned char*)b->d)[i]; |
539 | } | 546 | } |
540 | 547 | ||
541 | bn_correct_top(b); | ||
542 | return 1; | 548 | return 1; |
543 | } | 549 | } |
544 | 550 | ||
@@ -561,7 +567,7 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf | |||
561 | 567 | ||
562 | /* Given a pointer value, compute the next address that is a cache line multiple. */ | 568 | /* Given a pointer value, compute the next address that is a cache line multiple. */ |
563 | #define MOD_EXP_CTIME_ALIGN(x_) \ | 569 | #define MOD_EXP_CTIME_ALIGN(x_) \ |
564 | ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) | 570 | ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) |
565 | 571 | ||
566 | /* This variant of BN_mod_exp_mont() uses fixed windows and the special | 572 | /* This variant of BN_mod_exp_mont() uses fixed windows and the special |
567 | * precomputation memory layout to limit data-dependency to a minimum | 573 | * precomputation memory layout to limit data-dependency to a minimum |
@@ -572,17 +578,15 @@ static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf | |||
572 | int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, | 578 | int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
573 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | 579 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) |
574 | { | 580 | { |
575 | int i,bits,ret=0,idx,window,wvalue; | 581 | int i,bits,ret=0,window,wvalue; |
576 | int top; | 582 | int top; |
577 | BIGNUM *r; | ||
578 | const BIGNUM *aa; | ||
579 | BN_MONT_CTX *mont=NULL; | 583 | BN_MONT_CTX *mont=NULL; |
580 | 584 | ||
581 | int numPowers; | 585 | int numPowers; |
582 | unsigned char *powerbufFree=NULL; | 586 | unsigned char *powerbufFree=NULL; |
583 | int powerbufLen = 0; | 587 | int powerbufLen = 0; |
584 | unsigned char *powerbuf=NULL; | 588 | unsigned char *powerbuf=NULL; |
585 | BIGNUM *computeTemp=NULL, *am=NULL; | 589 | BIGNUM tmp, am; |
586 | 590 | ||
587 | bn_check_top(a); | 591 | bn_check_top(a); |
588 | bn_check_top(p); | 592 | bn_check_top(p); |
@@ -602,10 +606,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, | |||
602 | return ret; | 606 | return ret; |
603 | } | 607 | } |
604 | 608 | ||
605 | /* Initialize BIGNUM context and allocate intermediate result */ | ||
606 | BN_CTX_start(ctx); | 609 | BN_CTX_start(ctx); |
607 | r = BN_CTX_get(ctx); | ||
608 | if (r == NULL) goto err; | ||
609 | 610 | ||
610 | /* Allocate a montgomery context if it was not supplied by the caller. | 611 | /* Allocate a montgomery context if it was not supplied by the caller. |
611 | * If this is not done, things will break in the montgomery part. | 612 | * If this is not done, things will break in the montgomery part. |
@@ -620,40 +621,154 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, | |||
620 | 621 | ||
621 | /* Get the window size to use with size of p. */ | 622 | /* Get the window size to use with size of p. */ |
622 | window = BN_window_bits_for_ctime_exponent_size(bits); | 623 | window = BN_window_bits_for_ctime_exponent_size(bits); |
624 | #if defined(OPENSSL_BN_ASM_MONT5) | ||
625 | if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ | ||
626 | #endif | ||
623 | 627 | ||
624 | /* Allocate a buffer large enough to hold all of the pre-computed | 628 | /* Allocate a buffer large enough to hold all of the pre-computed |
625 | * powers of a. | 629 | * powers of am, am itself and tmp. |
626 | */ | 630 | */ |
627 | numPowers = 1 << window; | 631 | numPowers = 1 << window; |
628 | powerbufLen = sizeof(m->d[0])*top*numPowers; | 632 | powerbufLen = sizeof(m->d[0])*(top*numPowers + |
633 | ((2*top)>numPowers?(2*top):numPowers)); | ||
634 | #ifdef alloca | ||
635 | if (powerbufLen < 3072) | ||
636 | powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); | ||
637 | else | ||
638 | #endif | ||
629 | if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) | 639 | if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) |
630 | goto err; | 640 | goto err; |
631 | 641 | ||
632 | powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); | 642 | powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); |
633 | memset(powerbuf, 0, powerbufLen); | 643 | memset(powerbuf, 0, powerbufLen); |
634 | 644 | ||
635 | /* Initialize the intermediate result. Do this early to save double conversion, | 645 | #ifdef alloca |
636 | * once each for a^0 and intermediate result. | 646 | if (powerbufLen < 3072) |
637 | */ | 647 | powerbufFree = NULL; |
638 | if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; | 648 | #endif |
639 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err; | ||
640 | 649 | ||
641 | /* Initialize computeTemp as a^1 with montgomery precalcs */ | 650 | /* lay down tmp and am right after powers table */ |
642 | computeTemp = BN_CTX_get(ctx); | 651 | tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers); |
643 | am = BN_CTX_get(ctx); | 652 | am.d = tmp.d + top; |
644 | if (computeTemp==NULL || am==NULL) goto err; | 653 | tmp.top = am.top = 0; |
654 | tmp.dmax = am.dmax = top; | ||
655 | tmp.neg = am.neg = 0; | ||
656 | tmp.flags = am.flags = BN_FLG_STATIC_DATA; | ||
657 | |||
658 | /* prepare a^0 in Montgomery domain */ | ||
659 | #if 1 | ||
660 | if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; | ||
661 | #else | ||
662 | tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ | ||
663 | for (i=1;i<top;i++) | ||
664 | tmp.d[i] = (~m->d[i])&BN_MASK2; | ||
665 | tmp.top = top; | ||
666 | #endif | ||
645 | 667 | ||
668 | /* prepare a^1 in Montgomery domain */ | ||
646 | if (a->neg || BN_ucmp(a,m) >= 0) | 669 | if (a->neg || BN_ucmp(a,m) >= 0) |
647 | { | 670 | { |
648 | if (!BN_mod(am,a,m,ctx)) | 671 | if (!BN_mod(&am,a,m,ctx)) goto err; |
649 | goto err; | 672 | if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err; |
650 | aa= am; | ||
651 | } | 673 | } |
652 | else | 674 | else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; |
653 | aa=a; | 675 | |
654 | if (!BN_to_montgomery(am,aa,mont,ctx)) goto err; | 676 | #if defined(OPENSSL_BN_ASM_MONT5) |
655 | if (!BN_copy(computeTemp, am)) goto err; | 677 | /* This optimization uses ideas from http://eprint.iacr.org/2011/239, |
656 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err; | 678 | * specifically optimization of cache-timing attack countermeasures |
679 | * and pre-computation optimization. */ | ||
680 | |||
681 | /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as | ||
682 | * 512-bit RSA is hardly relevant, we omit it to spare size... */ | ||
683 | if (window==5) | ||
684 | { | ||
685 | void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap, | ||
686 | const void *table,const BN_ULONG *np, | ||
687 | const BN_ULONG *n0,int num,int power); | ||
688 | void bn_scatter5(const BN_ULONG *inp,size_t num, | ||
689 | void *table,size_t power); | ||
690 | void bn_gather5(BN_ULONG *out,size_t num, | ||
691 | void *table,size_t power); | ||
692 | |||
693 | BN_ULONG *np=mont->N.d, *n0=mont->n0; | ||
694 | |||
695 | /* BN_to_montgomery can contaminate words above .top | ||
696 | * [in BN_DEBUG[_DEBUG] build]... */ | ||
697 | for (i=am.top; i<top; i++) am.d[i]=0; | ||
698 | for (i=tmp.top; i<top; i++) tmp.d[i]=0; | ||
699 | |||
700 | bn_scatter5(tmp.d,top,powerbuf,0); | ||
701 | bn_scatter5(am.d,am.top,powerbuf,1); | ||
702 | bn_mul_mont(tmp.d,am.d,am.d,np,n0,top); | ||
703 | bn_scatter5(tmp.d,top,powerbuf,2); | ||
704 | |||
705 | #if 0 | ||
706 | for (i=3; i<32; i++) | ||
707 | { | ||
708 | /* Calculate a^i = a^(i-1) * a */ | ||
709 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); | ||
710 | bn_scatter5(tmp.d,top,powerbuf,i); | ||
711 | } | ||
712 | #else | ||
713 | /* same as above, but uses squaring for 1/2 of operations */ | ||
714 | for (i=4; i<32; i*=2) | ||
715 | { | ||
716 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
717 | bn_scatter5(tmp.d,top,powerbuf,i); | ||
718 | } | ||
719 | for (i=3; i<8; i+=2) | ||
720 | { | ||
721 | int j; | ||
722 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); | ||
723 | bn_scatter5(tmp.d,top,powerbuf,i); | ||
724 | for (j=2*i; j<32; j*=2) | ||
725 | { | ||
726 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
727 | bn_scatter5(tmp.d,top,powerbuf,j); | ||
728 | } | ||
729 | } | ||
730 | for (; i<16; i+=2) | ||
731 | { | ||
732 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); | ||
733 | bn_scatter5(tmp.d,top,powerbuf,i); | ||
734 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
735 | bn_scatter5(tmp.d,top,powerbuf,2*i); | ||
736 | } | ||
737 | for (; i<32; i+=2) | ||
738 | { | ||
739 | bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); | ||
740 | bn_scatter5(tmp.d,top,powerbuf,i); | ||
741 | } | ||
742 | #endif | ||
743 | bits--; | ||
744 | for (wvalue=0, i=bits%5; i>=0; i--,bits--) | ||
745 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); | ||
746 | bn_gather5(tmp.d,top,powerbuf,wvalue); | ||
747 | |||
748 | /* Scan the exponent one window at a time starting from the most | ||
749 | * significant bits. | ||
750 | */ | ||
751 | while (bits >= 0) | ||
752 | { | ||
753 | for (wvalue=0, i=0; i<5; i++,bits--) | ||
754 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); | ||
755 | |||
756 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
757 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
758 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
759 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
760 | bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); | ||
761 | bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); | ||
762 | } | ||
763 | |||
764 | tmp.top=top; | ||
765 | bn_correct_top(&tmp); | ||
766 | } | ||
767 | else | ||
768 | #endif | ||
769 | { | ||
770 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err; | ||
771 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err; | ||
657 | 772 | ||
658 | /* If the window size is greater than 1, then calculate | 773 | /* If the window size is greater than 1, then calculate |
659 | * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) | 774 | * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) |
@@ -662,62 +777,54 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, | |||
662 | */ | 777 | */ |
663 | if (window > 1) | 778 | if (window > 1) |
664 | { | 779 | { |
665 | for (i=2; i<numPowers; i++) | 780 | if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err; |
781 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err; | ||
782 | for (i=3; i<numPowers; i++) | ||
666 | { | 783 | { |
667 | /* Calculate a^i = a^(i-1) * a */ | 784 | /* Calculate a^i = a^(i-1) * a */ |
668 | if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx)) | 785 | if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx)) |
669 | goto err; | 786 | goto err; |
670 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err; | 787 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err; |
671 | } | 788 | } |
672 | } | 789 | } |
673 | 790 | ||
674 | /* Adjust the number of bits up to a multiple of the window size. | 791 | bits--; |
675 | * If the exponent length is not a multiple of the window size, then | 792 | for (wvalue=0, i=bits%window; i>=0; i--,bits--) |
676 | * this pads the most significant bits with zeros to normalize the | 793 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); |
677 | * scanning loop to there's no special cases. | 794 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err; |
678 | * | 795 | |
679 | * * NOTE: Making the window size a power of two less than the native | 796 | /* Scan the exponent one window at a time starting from the most |
680 | * * word size ensures that the padded bits won't go past the last | 797 | * significant bits. |
681 | * * word in the internal BIGNUM structure. Going past the end will | 798 | */ |
682 | * * still produce the correct result, but causes a different branch | 799 | while (bits >= 0) |
683 | * * to be taken in the BN_is_bit_set function. | ||
684 | */ | ||
685 | bits = ((bits+window-1)/window)*window; | ||
686 | idx=bits-1; /* The top bit of the window */ | ||
687 | |||
688 | /* Scan the exponent one window at a time starting from the most | ||
689 | * significant bits. | ||
690 | */ | ||
691 | while (idx >= 0) | ||
692 | { | 800 | { |
693 | wvalue=0; /* The 'value' of the window */ | 801 | wvalue=0; /* The 'value' of the window */ |
694 | 802 | ||
695 | /* Scan the window, squaring the result as we go */ | 803 | /* Scan the window, squaring the result as we go */ |
696 | for (i=0; i<window; i++,idx--) | 804 | for (i=0; i<window; i++,bits--) |
697 | { | 805 | { |
698 | if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err; | 806 | if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx)) goto err; |
699 | wvalue = (wvalue<<1)+BN_is_bit_set(p,idx); | 807 | wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); |
700 | } | 808 | } |
701 | 809 | ||
702 | /* Fetch the appropriate pre-computed value from the pre-buf */ | 810 | /* Fetch the appropriate pre-computed value from the pre-buf */ |
703 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err; | 811 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err; |
704 | 812 | ||
705 | /* Multiply the result into the intermediate result */ | 813 | /* Multiply the result into the intermediate result */ |
706 | if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err; | 814 | if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err; |
707 | } | 815 | } |
816 | } | ||
708 | 817 | ||
709 | /* Convert the final result from montgomery to standard format */ | 818 | /* Convert the final result from montgomery to standard format */ |
710 | if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; | 819 | if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err; |
711 | ret=1; | 820 | ret=1; |
712 | err: | 821 | err: |
713 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); | 822 | if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); |
714 | if (powerbuf!=NULL) | 823 | if (powerbuf!=NULL) |
715 | { | 824 | { |
716 | OPENSSL_cleanse(powerbuf,powerbufLen); | 825 | OPENSSL_cleanse(powerbuf,powerbufLen); |
717 | OPENSSL_free(powerbufFree); | 826 | if (powerbufFree) OPENSSL_free(powerbufFree); |
718 | } | 827 | } |
719 | if (am!=NULL) BN_clear(am); | ||
720 | if (computeTemp!=NULL) BN_clear(computeTemp); | ||
721 | BN_CTX_end(ctx); | 828 | BN_CTX_end(ctx); |
722 | return(ret); | 829 | return(ret); |
723 | } | 830 | } |
@@ -988,4 +1095,3 @@ err: | |||
988 | bn_check_top(r); | 1095 | bn_check_top(r); |
989 | return(ret); | 1096 | return(ret); |
990 | } | 1097 | } |
991 | |||