diff options
| author | markus <> | 2002-09-05 12:51:50 +0000 |
|---|---|---|
| committer | markus <> | 2002-09-05 12:51:50 +0000 |
| commit | 15b5d84f9da2ce4bfae8580e56e34a859f74ad71 (patch) | |
| tree | bf939e82d7fd73cc8a01cf6959002209972091bc /src/lib/libcrypto/bn/bn_asm.c | |
| parent | 027351f729b9e837200dae6e1520cda6577ab930 (diff) | |
| download | openbsd-15b5d84f9da2ce4bfae8580e56e34a859f74ad71.tar.gz openbsd-15b5d84f9da2ce4bfae8580e56e34a859f74ad71.tar.bz2 openbsd-15b5d84f9da2ce4bfae8580e56e34a859f74ad71.zip | |
import openssl-0.9.7-beta1
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
| -rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 178 |
1 files changed, 104 insertions, 74 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 4d3da16a0c..be8aa3ffc5 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
| @@ -56,97 +56,95 @@ | |||
| 56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
| 57 | */ | 57 | */ |
| 58 | 58 | ||
| 59 | #ifndef BN_DEBUG | ||
| 60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
| 61 | # define NDEBUG | ||
| 62 | #endif | ||
| 63 | |||
| 59 | #include <stdio.h> | 64 | #include <stdio.h> |
| 65 | #include <assert.h> | ||
| 60 | #include "cryptlib.h" | 66 | #include "cryptlib.h" |
| 61 | #include "bn_lcl.h" | 67 | #include "bn_lcl.h" |
| 62 | 68 | ||
| 63 | #ifdef BN_LLONG | 69 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) |
| 64 | 70 | ||
| 65 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 71 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
| 66 | { | 72 | { |
| 67 | BN_ULONG c1=0; | 73 | BN_ULONG c1=0; |
| 68 | 74 | ||
| 69 | bn_check_num(num); | 75 | assert(num >= 0); |
| 70 | if (num <= 0) return(c1); | 76 | if (num <= 0) return(c1); |
| 71 | 77 | ||
| 72 | for (;;) | 78 | while (num&~3) |
| 73 | { | 79 | { |
| 74 | mul_add(rp[0],ap[0],w,c1); | 80 | mul_add(rp[0],ap[0],w,c1); |
| 75 | if (--num == 0) break; | ||
| 76 | mul_add(rp[1],ap[1],w,c1); | 81 | mul_add(rp[1],ap[1],w,c1); |
| 77 | if (--num == 0) break; | ||
| 78 | mul_add(rp[2],ap[2],w,c1); | 82 | mul_add(rp[2],ap[2],w,c1); |
| 79 | if (--num == 0) break; | ||
| 80 | mul_add(rp[3],ap[3],w,c1); | 83 | mul_add(rp[3],ap[3],w,c1); |
| 81 | if (--num == 0) break; | 84 | ap+=4; rp+=4; num-=4; |
| 82 | ap+=4; | 85 | } |
| 83 | rp+=4; | 86 | if (num) |
| 87 | { | ||
| 88 | mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; | ||
| 89 | mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; | ||
| 90 | mul_add(rp[2],ap[2],w,c1); return c1; | ||
| 84 | } | 91 | } |
| 85 | 92 | ||
| 86 | return(c1); | 93 | return(c1); |
| 87 | } | 94 | } |
| 88 | 95 | ||
| 89 | BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 96 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
| 90 | { | 97 | { |
| 91 | BN_ULONG c1=0; | 98 | BN_ULONG c1=0; |
| 92 | 99 | ||
| 93 | bn_check_num(num); | 100 | assert(num >= 0); |
| 94 | if (num <= 0) return(c1); | 101 | if (num <= 0) return(c1); |
| 95 | 102 | ||
| 96 | /* for (;;) */ | 103 | while (num&~3) |
| 97 | while (1) /* circumvent egcs-1.1.2 bug */ | ||
| 98 | { | 104 | { |
| 99 | mul(rp[0],ap[0],w,c1); | 105 | mul(rp[0],ap[0],w,c1); |
| 100 | if (--num == 0) break; | ||
| 101 | mul(rp[1],ap[1],w,c1); | 106 | mul(rp[1],ap[1],w,c1); |
| 102 | if (--num == 0) break; | ||
| 103 | mul(rp[2],ap[2],w,c1); | 107 | mul(rp[2],ap[2],w,c1); |
| 104 | if (--num == 0) break; | ||
| 105 | mul(rp[3],ap[3],w,c1); | 108 | mul(rp[3],ap[3],w,c1); |
| 106 | if (--num == 0) break; | 109 | ap+=4; rp+=4; num-=4; |
| 107 | ap+=4; | 110 | } |
| 108 | rp+=4; | 111 | if (num) |
| 112 | { | ||
| 113 | mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; | ||
| 114 | mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; | ||
| 115 | mul(rp[2],ap[2],w,c1); | ||
| 109 | } | 116 | } |
| 110 | return(c1); | 117 | return(c1); |
| 111 | } | 118 | } |
| 112 | 119 | ||
| 113 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 120 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
| 114 | { | 121 | { |
| 115 | bn_check_num(n); | 122 | assert(n >= 0); |
| 116 | if (n <= 0) return; | 123 | if (n <= 0) return; |
| 117 | for (;;) | 124 | while (n&~3) |
| 118 | { | 125 | { |
| 119 | BN_ULLONG t; | 126 | sqr(r[0],r[1],a[0]); |
| 120 | 127 | sqr(r[2],r[3],a[1]); | |
| 121 | t=(BN_ULLONG)(a[0])*(a[0]); | 128 | sqr(r[4],r[5],a[2]); |
| 122 | r[0]=Lw(t); r[1]=Hw(t); | 129 | sqr(r[6],r[7],a[3]); |
| 123 | if (--n == 0) break; | 130 | a+=4; r+=8; n-=4; |
| 124 | 131 | } | |
| 125 | t=(BN_ULLONG)(a[1])*(a[1]); | 132 | if (n) |
| 126 | r[2]=Lw(t); r[3]=Hw(t); | 133 | { |
| 127 | if (--n == 0) break; | 134 | sqr(r[0],r[1],a[0]); if (--n == 0) return; |
| 128 | 135 | sqr(r[2],r[3],a[1]); if (--n == 0) return; | |
| 129 | t=(BN_ULLONG)(a[2])*(a[2]); | 136 | sqr(r[4],r[5],a[2]); |
| 130 | r[4]=Lw(t); r[5]=Hw(t); | ||
| 131 | if (--n == 0) break; | ||
| 132 | |||
| 133 | t=(BN_ULLONG)(a[3])*(a[3]); | ||
| 134 | r[6]=Lw(t); r[7]=Hw(t); | ||
| 135 | if (--n == 0) break; | ||
| 136 | |||
| 137 | a+=4; | ||
| 138 | r+=8; | ||
| 139 | } | 137 | } |
| 140 | } | 138 | } |
| 141 | 139 | ||
| 142 | #else | 140 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
| 143 | 141 | ||
| 144 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 142 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
| 145 | { | 143 | { |
| 146 | BN_ULONG c=0; | 144 | BN_ULONG c=0; |
| 147 | BN_ULONG bl,bh; | 145 | BN_ULONG bl,bh; |
| 148 | 146 | ||
| 149 | bn_check_num(num); | 147 | assert(num >= 0); |
| 150 | if (num <= 0) return((BN_ULONG)0); | 148 | if (num <= 0) return((BN_ULONG)0); |
| 151 | 149 | ||
| 152 | bl=LBITS(w); | 150 | bl=LBITS(w); |
| @@ -168,12 +166,12 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
| 168 | return(c); | 166 | return(c); |
| 169 | } | 167 | } |
| 170 | 168 | ||
| 171 | BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 169 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
| 172 | { | 170 | { |
| 173 | BN_ULONG carry=0; | 171 | BN_ULONG carry=0; |
| 174 | BN_ULONG bl,bh; | 172 | BN_ULONG bl,bh; |
| 175 | 173 | ||
| 176 | bn_check_num(num); | 174 | assert(num >= 0); |
| 177 | if (num <= 0) return((BN_ULONG)0); | 175 | if (num <= 0) return((BN_ULONG)0); |
| 178 | 176 | ||
| 179 | bl=LBITS(w); | 177 | bl=LBITS(w); |
| @@ -195,9 +193,9 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
| 195 | return(carry); | 193 | return(carry); |
| 196 | } | 194 | } |
| 197 | 195 | ||
| 198 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 196 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
| 199 | { | 197 | { |
| 200 | bn_check_num(n); | 198 | assert(n >= 0); |
| 201 | if (n <= 0) return; | 199 | if (n <= 0) return; |
| 202 | for (;;) | 200 | for (;;) |
| 203 | { | 201 | { |
| @@ -218,7 +216,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | |||
| 218 | } | 216 | } |
| 219 | } | 217 | } |
| 220 | 218 | ||
| 221 | #endif | 219 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
| 222 | 220 | ||
| 223 | #if defined(BN_LLONG) && defined(BN_DIV2W) | 221 | #if defined(BN_LLONG) && defined(BN_DIV2W) |
| 224 | 222 | ||
| @@ -229,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
| 229 | 227 | ||
| 230 | #else | 228 | #else |
| 231 | 229 | ||
| 232 | /* Divide h-l by d and return the result. */ | 230 | /* Divide h,l by d and return the result. */ |
| 233 | /* I need to test this some more :-( */ | 231 | /* I need to test this some more :-( */ |
| 234 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | 232 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
| 235 | { | 233 | { |
| @@ -239,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
| 239 | if (d == 0) return(BN_MASK2); | 237 | if (d == 0) return(BN_MASK2); |
| 240 | 238 | ||
| 241 | i=BN_num_bits_word(d); | 239 | i=BN_num_bits_word(d); |
| 242 | if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) | 240 | assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); |
| 243 | { | 241 | |
| 244 | #if !defined(NO_STDIO) && !defined(WIN16) | ||
| 245 | fprintf(stderr,"Division would overflow (%d)\n",i); | ||
| 246 | #endif | ||
| 247 | abort(); | ||
| 248 | } | ||
| 249 | i=BN_BITS2-i; | 242 | i=BN_BITS2-i; |
| 250 | if (h >= d) h-=d; | 243 | if (h >= d) h-=d; |
| 251 | 244 | ||
| @@ -300,14 +293,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
| 300 | ret|=q; | 293 | ret|=q; |
| 301 | return(ret); | 294 | return(ret); |
| 302 | } | 295 | } |
| 303 | #endif | 296 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ |
| 304 | 297 | ||
| 305 | #ifdef BN_LLONG | 298 | #ifdef BN_LLONG |
| 306 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 299 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) |
| 307 | { | 300 | { |
| 308 | BN_ULLONG ll=0; | 301 | BN_ULLONG ll=0; |
| 309 | 302 | ||
| 310 | bn_check_num(n); | 303 | assert(n >= 0); |
| 311 | if (n <= 0) return((BN_ULONG)0); | 304 | if (n <= 0) return((BN_ULONG)0); |
| 312 | 305 | ||
| 313 | for (;;) | 306 | for (;;) |
| @@ -338,12 +331,12 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 338 | } | 331 | } |
| 339 | return((BN_ULONG)ll); | 332 | return((BN_ULONG)ll); |
| 340 | } | 333 | } |
| 341 | #else | 334 | #else /* !BN_LLONG */ |
| 342 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 335 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) |
| 343 | { | 336 | { |
| 344 | BN_ULONG c,l,t; | 337 | BN_ULONG c,l,t; |
| 345 | 338 | ||
| 346 | bn_check_num(n); | 339 | assert(n >= 0); |
| 347 | if (n <= 0) return((BN_ULONG)0); | 340 | if (n <= 0) return((BN_ULONG)0); |
| 348 | 341 | ||
| 349 | c=0; | 342 | c=0; |
| @@ -387,14 +380,14 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 387 | } | 380 | } |
| 388 | return((BN_ULONG)c); | 381 | return((BN_ULONG)c); |
| 389 | } | 382 | } |
| 390 | #endif | 383 | #endif /* !BN_LLONG */ |
| 391 | 384 | ||
| 392 | BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 385 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) |
| 393 | { | 386 | { |
| 394 | BN_ULONG t1,t2; | 387 | BN_ULONG t1,t2; |
| 395 | int c=0; | 388 | int c=0; |
| 396 | 389 | ||
| 397 | bn_check_num(n); | 390 | assert(n >= 0); |
| 398 | if (n <= 0) return((BN_ULONG)0); | 391 | if (n <= 0) return((BN_ULONG)0); |
| 399 | 392 | ||
| 400 | for (;;) | 393 | for (;;) |
| @@ -433,6 +426,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 433 | #undef bn_sqr_comba8 | 426 | #undef bn_sqr_comba8 |
| 434 | #undef bn_sqr_comba4 | 427 | #undef bn_sqr_comba4 |
| 435 | 428 | ||
| 429 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ | ||
| 430 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | ||
| 431 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
| 432 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
| 433 | |||
| 436 | #ifdef BN_LLONG | 434 | #ifdef BN_LLONG |
| 437 | #define mul_add_c(a,b,c0,c1,c2) \ | 435 | #define mul_add_c(a,b,c0,c1,c2) \ |
| 438 | t=(BN_ULLONG)a*b; \ | 436 | t=(BN_ULLONG)a*b; \ |
| @@ -460,7 +458,39 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 460 | 458 | ||
| 461 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 459 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 462 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 460 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 463 | #else | 461 | |
| 462 | #elif defined(BN_UMULT_HIGH) | ||
| 463 | |||
| 464 | #define mul_add_c(a,b,c0,c1,c2) { \ | ||
| 465 | BN_ULONG ta=(a),tb=(b); \ | ||
| 466 | t1 = ta * tb; \ | ||
| 467 | t2 = BN_UMULT_HIGH(ta,tb); \ | ||
| 468 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
| 469 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
| 470 | } | ||
| 471 | |||
| 472 | #define mul_add_c2(a,b,c0,c1,c2) { \ | ||
| 473 | BN_ULONG ta=(a),tb=(b),t0; \ | ||
| 474 | t1 = BN_UMULT_HIGH(ta,tb); \ | ||
| 475 | t0 = ta * tb; \ | ||
| 476 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | ||
| 477 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | ||
| 478 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
| 479 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
| 480 | } | ||
| 481 | |||
| 482 | #define sqr_add_c(a,i,c0,c1,c2) { \ | ||
| 483 | BN_ULONG ta=(a)[i]; \ | ||
| 484 | t1 = ta * ta; \ | ||
| 485 | t2 = BN_UMULT_HIGH(ta,ta); \ | ||
| 486 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
| 487 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
| 488 | } | ||
| 489 | |||
| 490 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
| 491 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
| 492 | |||
| 493 | #else /* !BN_LLONG */ | ||
| 464 | #define mul_add_c(a,b,c0,c1,c2) \ | 494 | #define mul_add_c(a,b,c0,c1,c2) \ |
| 465 | t1=LBITS(a); t2=HBITS(a); \ | 495 | t1=LBITS(a); t2=HBITS(a); \ |
| 466 | bl=LBITS(b); bh=HBITS(b); \ | 496 | bl=LBITS(b); bh=HBITS(b); \ |
| @@ -487,7 +517,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 487 | 517 | ||
| 488 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 518 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 489 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 519 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 490 | #endif | 520 | #endif /* !BN_LLONG */ |
| 491 | 521 | ||
| 492 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 522 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 493 | { | 523 | { |
| @@ -643,7 +673,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 643 | r[7]=c2; | 673 | r[7]=c2; |
| 644 | } | 674 | } |
| 645 | 675 | ||
| 646 | void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | 676 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 647 | { | 677 | { |
| 648 | #ifdef BN_LLONG | 678 | #ifdef BN_LLONG |
| 649 | BN_ULLONG t,tt; | 679 | BN_ULLONG t,tt; |
| @@ -724,7 +754,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | |||
| 724 | r[15]=c1; | 754 | r[15]=c1; |
| 725 | } | 755 | } |
| 726 | 756 | ||
| 727 | void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | 757 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 728 | { | 758 | { |
| 729 | #ifdef BN_LLONG | 759 | #ifdef BN_LLONG |
| 730 | BN_ULLONG t,tt; | 760 | BN_ULLONG t,tt; |
| @@ -762,7 +792,7 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | |||
| 762 | r[6]=c1; | 792 | r[6]=c1; |
| 763 | r[7]=c2; | 793 | r[7]=c2; |
| 764 | } | 794 | } |
| 765 | #else | 795 | #else /* !BN_MUL_COMBA */ |
| 766 | 796 | ||
| 767 | /* hmm... is it faster just to do a multiply? */ | 797 | /* hmm... is it faster just to do a multiply? */ |
| 768 | #undef bn_sqr_comba4 | 798 | #undef bn_sqr_comba4 |
| @@ -799,4 +829,4 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 799 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); | 829 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); |
| 800 | } | 830 | } |
| 801 | 831 | ||
| 802 | #endif /* BN_COMBA */ | 832 | #endif /* !BN_MUL_COMBA */ |
