diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 145 |
1 files changed, 90 insertions, 55 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 4d3da16a0c..3329cc18e6 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -56,31 +56,38 @@ | |||
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | 58 | ||
59 | #ifndef BN_DEBUG | ||
60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
61 | # define NDEBUG | ||
62 | #endif | ||
63 | |||
59 | #include <stdio.h> | 64 | #include <stdio.h> |
65 | #include <assert.h> | ||
60 | #include "cryptlib.h" | 66 | #include "cryptlib.h" |
61 | #include "bn_lcl.h" | 67 | #include "bn_lcl.h" |
62 | 68 | ||
63 | #ifdef BN_LLONG | 69 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) |
64 | 70 | ||
65 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 71 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
66 | { | 72 | { |
67 | BN_ULONG c1=0; | 73 | BN_ULONG c1=0; |
68 | 74 | ||
69 | bn_check_num(num); | 75 | assert(num >= 0); |
70 | if (num <= 0) return(c1); | 76 | if (num <= 0) return(c1); |
71 | 77 | ||
72 | for (;;) | 78 | while (num&~3) |
73 | { | 79 | { |
74 | mul_add(rp[0],ap[0],w,c1); | 80 | mul_add(rp[0],ap[0],w,c1); |
75 | if (--num == 0) break; | ||
76 | mul_add(rp[1],ap[1],w,c1); | 81 | mul_add(rp[1],ap[1],w,c1); |
77 | if (--num == 0) break; | ||
78 | mul_add(rp[2],ap[2],w,c1); | 82 | mul_add(rp[2],ap[2],w,c1); |
79 | if (--num == 0) break; | ||
80 | mul_add(rp[3],ap[3],w,c1); | 83 | mul_add(rp[3],ap[3],w,c1); |
81 | if (--num == 0) break; | 84 | ap+=4; rp+=4; num-=4; |
82 | ap+=4; | 85 | } |
83 | rp+=4; | 86 | if (num) |
87 | { | ||
88 | mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; | ||
89 | mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; | ||
90 | mul_add(rp[2],ap[2],w,c1); return c1; | ||
84 | } | 91 | } |
85 | 92 | ||
86 | return(c1); | 93 | return(c1); |
@@ -90,63 +97,54 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
90 | { | 97 | { |
91 | BN_ULONG c1=0; | 98 | BN_ULONG c1=0; |
92 | 99 | ||
93 | bn_check_num(num); | 100 | assert(num >= 0); |
94 | if (num <= 0) return(c1); | 101 | if (num <= 0) return(c1); |
95 | 102 | ||
96 | /* for (;;) */ | 103 | while (num&~3) |
97 | while (1) /* circumvent egcs-1.1.2 bug */ | ||
98 | { | 104 | { |
99 | mul(rp[0],ap[0],w,c1); | 105 | mul(rp[0],ap[0],w,c1); |
100 | if (--num == 0) break; | ||
101 | mul(rp[1],ap[1],w,c1); | 106 | mul(rp[1],ap[1],w,c1); |
102 | if (--num == 0) break; | ||
103 | mul(rp[2],ap[2],w,c1); | 107 | mul(rp[2],ap[2],w,c1); |
104 | if (--num == 0) break; | ||
105 | mul(rp[3],ap[3],w,c1); | 108 | mul(rp[3],ap[3],w,c1); |
106 | if (--num == 0) break; | 109 | ap+=4; rp+=4; num-=4; |
107 | ap+=4; | 110 | } |
108 | rp+=4; | 111 | if (num) |
112 | { | ||
113 | mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; | ||
114 | mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; | ||
115 | mul(rp[2],ap[2],w,c1); | ||
109 | } | 116 | } |
110 | return(c1); | 117 | return(c1); |
111 | } | 118 | } |
112 | 119 | ||
113 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 120 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) |
114 | { | 121 | { |
115 | bn_check_num(n); | 122 | assert(n >= 0); |
116 | if (n <= 0) return; | 123 | if (n <= 0) return; |
117 | for (;;) | 124 | while (n&~3) |
118 | { | 125 | { |
119 | BN_ULLONG t; | 126 | sqr(r[0],r[1],a[0]); |
120 | 127 | sqr(r[2],r[3],a[1]); | |
121 | t=(BN_ULLONG)(a[0])*(a[0]); | 128 | sqr(r[4],r[5],a[2]); |
122 | r[0]=Lw(t); r[1]=Hw(t); | 129 | sqr(r[6],r[7],a[3]); |
123 | if (--n == 0) break; | 130 | a+=4; r+=8; n-=4; |
124 | 131 | } | |
125 | t=(BN_ULLONG)(a[1])*(a[1]); | 132 | if (n) |
126 | r[2]=Lw(t); r[3]=Hw(t); | 133 | { |
127 | if (--n == 0) break; | 134 | sqr(r[0],r[1],a[0]); if (--n == 0) return; |
128 | 135 | sqr(r[2],r[3],a[1]); if (--n == 0) return; | |
129 | t=(BN_ULLONG)(a[2])*(a[2]); | 136 | sqr(r[4],r[5],a[2]); |
130 | r[4]=Lw(t); r[5]=Hw(t); | ||
131 | if (--n == 0) break; | ||
132 | |||
133 | t=(BN_ULLONG)(a[3])*(a[3]); | ||
134 | r[6]=Lw(t); r[7]=Hw(t); | ||
135 | if (--n == 0) break; | ||
136 | |||
137 | a+=4; | ||
138 | r+=8; | ||
139 | } | 137 | } |
140 | } | 138 | } |
141 | 139 | ||
142 | #else | 140 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
143 | 141 | ||
144 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 142 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) |
145 | { | 143 | { |
146 | BN_ULONG c=0; | 144 | BN_ULONG c=0; |
147 | BN_ULONG bl,bh; | 145 | BN_ULONG bl,bh; |
148 | 146 | ||
149 | bn_check_num(num); | 147 | assert(num >= 0); |
150 | if (num <= 0) return((BN_ULONG)0); | 148 | if (num <= 0) return((BN_ULONG)0); |
151 | 149 | ||
152 | bl=LBITS(w); | 150 | bl=LBITS(w); |
@@ -173,7 +171,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
173 | BN_ULONG carry=0; | 171 | BN_ULONG carry=0; |
174 | BN_ULONG bl,bh; | 172 | BN_ULONG bl,bh; |
175 | 173 | ||
176 | bn_check_num(num); | 174 | assert(num >= 0); |
177 | if (num <= 0) return((BN_ULONG)0); | 175 | if (num <= 0) return((BN_ULONG)0); |
178 | 176 | ||
179 | bl=LBITS(w); | 177 | bl=LBITS(w); |
@@ -197,7 +195,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
197 | 195 | ||
198 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 196 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) |
199 | { | 197 | { |
200 | bn_check_num(n); | 198 | assert(n >= 0); |
201 | if (n <= 0) return; | 199 | if (n <= 0) return; |
202 | for (;;) | 200 | for (;;) |
203 | { | 201 | { |
@@ -218,7 +216,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | |||
218 | } | 216 | } |
219 | } | 217 | } |
220 | 218 | ||
221 | #endif | 219 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
222 | 220 | ||
223 | #if defined(BN_LLONG) && defined(BN_DIV2W) | 221 | #if defined(BN_LLONG) && defined(BN_DIV2W) |
224 | 222 | ||
@@ -300,14 +298,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
300 | ret|=q; | 298 | ret|=q; |
301 | return(ret); | 299 | return(ret); |
302 | } | 300 | } |
303 | #endif | 301 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ |
304 | 302 | ||
305 | #ifdef BN_LLONG | 303 | #ifdef BN_LLONG |
306 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 304 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
307 | { | 305 | { |
308 | BN_ULLONG ll=0; | 306 | BN_ULLONG ll=0; |
309 | 307 | ||
310 | bn_check_num(n); | 308 | assert(n >= 0); |
311 | if (n <= 0) return((BN_ULONG)0); | 309 | if (n <= 0) return((BN_ULONG)0); |
312 | 310 | ||
313 | for (;;) | 311 | for (;;) |
@@ -338,12 +336,12 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
338 | } | 336 | } |
339 | return((BN_ULONG)ll); | 337 | return((BN_ULONG)ll); |
340 | } | 338 | } |
341 | #else | 339 | #else /* !BN_LLONG */ |
342 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 340 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
343 | { | 341 | { |
344 | BN_ULONG c,l,t; | 342 | BN_ULONG c,l,t; |
345 | 343 | ||
346 | bn_check_num(n); | 344 | assert(n >= 0); |
347 | if (n <= 0) return((BN_ULONG)0); | 345 | if (n <= 0) return((BN_ULONG)0); |
348 | 346 | ||
349 | c=0; | 347 | c=0; |
@@ -387,14 +385,14 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
387 | } | 385 | } |
388 | return((BN_ULONG)c); | 386 | return((BN_ULONG)c); |
389 | } | 387 | } |
390 | #endif | 388 | #endif /* !BN_LLONG */ |
391 | 389 | ||
392 | BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 390 | BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
393 | { | 391 | { |
394 | BN_ULONG t1,t2; | 392 | BN_ULONG t1,t2; |
395 | int c=0; | 393 | int c=0; |
396 | 394 | ||
397 | bn_check_num(n); | 395 | assert(n >= 0); |
398 | if (n <= 0) return((BN_ULONG)0); | 396 | if (n <= 0) return((BN_ULONG)0); |
399 | 397 | ||
400 | for (;;) | 398 | for (;;) |
@@ -433,6 +431,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
433 | #undef bn_sqr_comba8 | 431 | #undef bn_sqr_comba8 |
434 | #undef bn_sqr_comba4 | 432 | #undef bn_sqr_comba4 |
435 | 433 | ||
434 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ | ||
435 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | ||
436 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
437 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
438 | |||
436 | #ifdef BN_LLONG | 439 | #ifdef BN_LLONG |
437 | #define mul_add_c(a,b,c0,c1,c2) \ | 440 | #define mul_add_c(a,b,c0,c1,c2) \ |
438 | t=(BN_ULLONG)a*b; \ | 441 | t=(BN_ULLONG)a*b; \ |
@@ -460,7 +463,39 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
460 | 463 | ||
461 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 464 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
462 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 465 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
463 | #else | 466 | |
467 | #elif defined(BN_UMULT_HIGH) | ||
468 | |||
469 | #define mul_add_c(a,b,c0,c1,c2) { \ | ||
470 | BN_ULONG ta=(a),tb=(b); \ | ||
471 | t1 = ta * tb; \ | ||
472 | t2 = BN_UMULT_HIGH(ta,tb); \ | ||
473 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
474 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
475 | } | ||
476 | |||
477 | #define mul_add_c2(a,b,c0,c1,c2) { \ | ||
478 | BN_ULONG ta=(a),tb=(b),t0; \ | ||
479 | t1 = BN_UMULT_HIGH(ta,tb); \ | ||
480 | t0 = ta * tb; \ | ||
481 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | ||
482 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | ||
483 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
484 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
485 | } | ||
486 | |||
487 | #define sqr_add_c(a,i,c0,c1,c2) { \ | ||
488 | BN_ULONG ta=(a)[i]; \ | ||
489 | t1 = ta * ta; \ | ||
490 | t2 = BN_UMULT_HIGH(ta,ta); \ | ||
491 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
492 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
493 | } | ||
494 | |||
495 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
496 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
497 | |||
498 | #else /* !BN_LLONG */ | ||
464 | #define mul_add_c(a,b,c0,c1,c2) \ | 499 | #define mul_add_c(a,b,c0,c1,c2) \ |
465 | t1=LBITS(a); t2=HBITS(a); \ | 500 | t1=LBITS(a); t2=HBITS(a); \ |
466 | bl=LBITS(b); bh=HBITS(b); \ | 501 | bl=LBITS(b); bh=HBITS(b); \ |
@@ -487,7 +522,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
487 | 522 | ||
488 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 523 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
489 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 524 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
490 | #endif | 525 | #endif /* !BN_LLONG */ |
491 | 526 | ||
492 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 527 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
493 | { | 528 | { |
@@ -762,7 +797,7 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | |||
762 | r[6]=c1; | 797 | r[6]=c1; |
763 | r[7]=c2; | 798 | r[7]=c2; |
764 | } | 799 | } |
765 | #else | 800 | #else /* !BN_MUL_COMBA */ |
766 | 801 | ||
767 | /* hmm... is it faster just to do a multiply? */ | 802 | /* hmm... is it faster just to do a multiply? */ |
768 | #undef bn_sqr_comba4 | 803 | #undef bn_sqr_comba4 |
@@ -799,4 +834,4 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
799 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); | 834 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); |
800 | } | 835 | } |
801 | 836 | ||
802 | #endif /* BN_COMBA */ | 837 | #endif /* !BN_MUL_COMBA */ |