diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 178 |
1 files changed, 104 insertions, 74 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 4d3da16a0c..be8aa3ffc5 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -56,97 +56,95 @@ | |||
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | 58 | ||
59 | #ifndef BN_DEBUG | ||
60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
61 | # define NDEBUG | ||
62 | #endif | ||
63 | |||
59 | #include <stdio.h> | 64 | #include <stdio.h> |
65 | #include <assert.h> | ||
60 | #include "cryptlib.h" | 66 | #include "cryptlib.h" |
61 | #include "bn_lcl.h" | 67 | #include "bn_lcl.h" |
62 | 68 | ||
63 | #ifdef BN_LLONG | 69 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) |
64 | 70 | ||
65 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 71 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
66 | { | 72 | { |
67 | BN_ULONG c1=0; | 73 | BN_ULONG c1=0; |
68 | 74 | ||
69 | bn_check_num(num); | 75 | assert(num >= 0); |
70 | if (num <= 0) return(c1); | 76 | if (num <= 0) return(c1); |
71 | 77 | ||
72 | for (;;) | 78 | while (num&~3) |
73 | { | 79 | { |
74 | mul_add(rp[0],ap[0],w,c1); | 80 | mul_add(rp[0],ap[0],w,c1); |
75 | if (--num == 0) break; | ||
76 | mul_add(rp[1],ap[1],w,c1); | 81 | mul_add(rp[1],ap[1],w,c1); |
77 | if (--num == 0) break; | ||
78 | mul_add(rp[2],ap[2],w,c1); | 82 | mul_add(rp[2],ap[2],w,c1); |
79 | if (--num == 0) break; | ||
80 | mul_add(rp[3],ap[3],w,c1); | 83 | mul_add(rp[3],ap[3],w,c1); |
81 | if (--num == 0) break; | 84 | ap+=4; rp+=4; num-=4; |
82 | ap+=4; | 85 | } |
83 | rp+=4; | 86 | if (num) |
87 | { | ||
88 | mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; | ||
89 | mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; | ||
90 | mul_add(rp[2],ap[2],w,c1); return c1; | ||
84 | } | 91 | } |
85 | 92 | ||
86 | return(c1); | 93 | return(c1); |
87 | } | 94 | } |
88 | 95 | ||
89 | BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 96 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
90 | { | 97 | { |
91 | BN_ULONG c1=0; | 98 | BN_ULONG c1=0; |
92 | 99 | ||
93 | bn_check_num(num); | 100 | assert(num >= 0); |
94 | if (num <= 0) return(c1); | 101 | if (num <= 0) return(c1); |
95 | 102 | ||
96 | /* for (;;) */ | 103 | while (num&~3) |
97 | while (1) /* circumvent egcs-1.1.2 bug */ | ||
98 | { | 104 | { |
99 | mul(rp[0],ap[0],w,c1); | 105 | mul(rp[0],ap[0],w,c1); |
100 | if (--num == 0) break; | ||
101 | mul(rp[1],ap[1],w,c1); | 106 | mul(rp[1],ap[1],w,c1); |
102 | if (--num == 0) break; | ||
103 | mul(rp[2],ap[2],w,c1); | 107 | mul(rp[2],ap[2],w,c1); |
104 | if (--num == 0) break; | ||
105 | mul(rp[3],ap[3],w,c1); | 108 | mul(rp[3],ap[3],w,c1); |
106 | if (--num == 0) break; | 109 | ap+=4; rp+=4; num-=4; |
107 | ap+=4; | 110 | } |
108 | rp+=4; | 111 | if (num) |
112 | { | ||
113 | mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; | ||
114 | mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; | ||
115 | mul(rp[2],ap[2],w,c1); | ||
109 | } | 116 | } |
110 | return(c1); | 117 | return(c1); |
111 | } | 118 | } |
112 | 119 | ||
113 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 120 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
114 | { | 121 | { |
115 | bn_check_num(n); | 122 | assert(n >= 0); |
116 | if (n <= 0) return; | 123 | if (n <= 0) return; |
117 | for (;;) | 124 | while (n&~3) |
118 | { | 125 | { |
119 | BN_ULLONG t; | 126 | sqr(r[0],r[1],a[0]); |
120 | 127 | sqr(r[2],r[3],a[1]); | |
121 | t=(BN_ULLONG)(a[0])*(a[0]); | 128 | sqr(r[4],r[5],a[2]); |
122 | r[0]=Lw(t); r[1]=Hw(t); | 129 | sqr(r[6],r[7],a[3]); |
123 | if (--n == 0) break; | 130 | a+=4; r+=8; n-=4; |
124 | 131 | } | |
125 | t=(BN_ULLONG)(a[1])*(a[1]); | 132 | if (n) |
126 | r[2]=Lw(t); r[3]=Hw(t); | 133 | { |
127 | if (--n == 0) break; | 134 | sqr(r[0],r[1],a[0]); if (--n == 0) return; |
128 | 135 | sqr(r[2],r[3],a[1]); if (--n == 0) return; | |
129 | t=(BN_ULLONG)(a[2])*(a[2]); | 136 | sqr(r[4],r[5],a[2]); |
130 | r[4]=Lw(t); r[5]=Hw(t); | ||
131 | if (--n == 0) break; | ||
132 | |||
133 | t=(BN_ULLONG)(a[3])*(a[3]); | ||
134 | r[6]=Lw(t); r[7]=Hw(t); | ||
135 | if (--n == 0) break; | ||
136 | |||
137 | a+=4; | ||
138 | r+=8; | ||
139 | } | 137 | } |
140 | } | 138 | } |
141 | 139 | ||
142 | #else | 140 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
143 | 141 | ||
144 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 142 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
145 | { | 143 | { |
146 | BN_ULONG c=0; | 144 | BN_ULONG c=0; |
147 | BN_ULONG bl,bh; | 145 | BN_ULONG bl,bh; |
148 | 146 | ||
149 | bn_check_num(num); | 147 | assert(num >= 0); |
150 | if (num <= 0) return((BN_ULONG)0); | 148 | if (num <= 0) return((BN_ULONG)0); |
151 | 149 | ||
152 | bl=LBITS(w); | 150 | bl=LBITS(w); |
@@ -168,12 +166,12 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
168 | return(c); | 166 | return(c); |
169 | } | 167 | } |
170 | 168 | ||
171 | BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | 169 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) |
172 | { | 170 | { |
173 | BN_ULONG carry=0; | 171 | BN_ULONG carry=0; |
174 | BN_ULONG bl,bh; | 172 | BN_ULONG bl,bh; |
175 | 173 | ||
176 | bn_check_num(num); | 174 | assert(num >= 0); |
177 | if (num <= 0) return((BN_ULONG)0); | 175 | if (num <= 0) return((BN_ULONG)0); |
178 | 176 | ||
179 | bl=LBITS(w); | 177 | bl=LBITS(w); |
@@ -195,9 +193,9 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) | |||
195 | return(carry); | 193 | return(carry); |
196 | } | 194 | } |
197 | 195 | ||
198 | void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | 196 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) |
199 | { | 197 | { |
200 | bn_check_num(n); | 198 | assert(n >= 0); |
201 | if (n <= 0) return; | 199 | if (n <= 0) return; |
202 | for (;;) | 200 | for (;;) |
203 | { | 201 | { |
@@ -218,7 +216,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) | |||
218 | } | 216 | } |
219 | } | 217 | } |
220 | 218 | ||
221 | #endif | 219 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ |
222 | 220 | ||
223 | #if defined(BN_LLONG) && defined(BN_DIV2W) | 221 | #if defined(BN_LLONG) && defined(BN_DIV2W) |
224 | 222 | ||
@@ -229,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
229 | 227 | ||
230 | #else | 228 | #else |
231 | 229 | ||
232 | /* Divide h-l by d and return the result. */ | 230 | /* Divide h,l by d and return the result. */ |
233 | /* I need to test this some more :-( */ | 231 | /* I need to test this some more :-( */ |
234 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | 232 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) |
235 | { | 233 | { |
@@ -239,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
239 | if (d == 0) return(BN_MASK2); | 237 | if (d == 0) return(BN_MASK2); |
240 | 238 | ||
241 | i=BN_num_bits_word(d); | 239 | i=BN_num_bits_word(d); |
242 | if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) | 240 | assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i)); |
243 | { | 241 | |
244 | #if !defined(NO_STDIO) && !defined(WIN16) | ||
245 | fprintf(stderr,"Division would overflow (%d)\n",i); | ||
246 | #endif | ||
247 | abort(); | ||
248 | } | ||
249 | i=BN_BITS2-i; | 242 | i=BN_BITS2-i; |
250 | if (h >= d) h-=d; | 243 | if (h >= d) h-=d; |
251 | 244 | ||
@@ -300,14 +293,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | |||
300 | ret|=q; | 293 | ret|=q; |
301 | return(ret); | 294 | return(ret); |
302 | } | 295 | } |
303 | #endif | 296 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ |
304 | 297 | ||
305 | #ifdef BN_LLONG | 298 | #ifdef BN_LLONG |
306 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 299 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) |
307 | { | 300 | { |
308 | BN_ULLONG ll=0; | 301 | BN_ULLONG ll=0; |
309 | 302 | ||
310 | bn_check_num(n); | 303 | assert(n >= 0); |
311 | if (n <= 0) return((BN_ULONG)0); | 304 | if (n <= 0) return((BN_ULONG)0); |
312 | 305 | ||
313 | for (;;) | 306 | for (;;) |
@@ -338,12 +331,12 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
338 | } | 331 | } |
339 | return((BN_ULONG)ll); | 332 | return((BN_ULONG)ll); |
340 | } | 333 | } |
341 | #else | 334 | #else /* !BN_LLONG */ |
342 | BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 335 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) |
343 | { | 336 | { |
344 | BN_ULONG c,l,t; | 337 | BN_ULONG c,l,t; |
345 | 338 | ||
346 | bn_check_num(n); | 339 | assert(n >= 0); |
347 | if (n <= 0) return((BN_ULONG)0); | 340 | if (n <= 0) return((BN_ULONG)0); |
348 | 341 | ||
349 | c=0; | 342 | c=0; |
@@ -387,14 +380,14 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
387 | } | 380 | } |
388 | return((BN_ULONG)c); | 381 | return((BN_ULONG)c); |
389 | } | 382 | } |
390 | #endif | 383 | #endif /* !BN_LLONG */ |
391 | 384 | ||
392 | BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 385 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) |
393 | { | 386 | { |
394 | BN_ULONG t1,t2; | 387 | BN_ULONG t1,t2; |
395 | int c=0; | 388 | int c=0; |
396 | 389 | ||
397 | bn_check_num(n); | 390 | assert(n >= 0); |
398 | if (n <= 0) return((BN_ULONG)0); | 391 | if (n <= 0) return((BN_ULONG)0); |
399 | 392 | ||
400 | for (;;) | 393 | for (;;) |
@@ -433,6 +426,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
433 | #undef bn_sqr_comba8 | 426 | #undef bn_sqr_comba8 |
434 | #undef bn_sqr_comba4 | 427 | #undef bn_sqr_comba4 |
435 | 428 | ||
429 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ | ||
430 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | ||
431 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
432 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
433 | |||
436 | #ifdef BN_LLONG | 434 | #ifdef BN_LLONG |
437 | #define mul_add_c(a,b,c0,c1,c2) \ | 435 | #define mul_add_c(a,b,c0,c1,c2) \ |
438 | t=(BN_ULLONG)a*b; \ | 436 | t=(BN_ULLONG)a*b; \ |
@@ -460,7 +458,39 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
460 | 458 | ||
461 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 459 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
462 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 460 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
463 | #else | 461 | |
462 | #elif defined(BN_UMULT_HIGH) | ||
463 | |||
464 | #define mul_add_c(a,b,c0,c1,c2) { \ | ||
465 | BN_ULONG ta=(a),tb=(b); \ | ||
466 | t1 = ta * tb; \ | ||
467 | t2 = BN_UMULT_HIGH(ta,tb); \ | ||
468 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
469 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
470 | } | ||
471 | |||
472 | #define mul_add_c2(a,b,c0,c1,c2) { \ | ||
473 | BN_ULONG ta=(a),tb=(b),t0; \ | ||
474 | t1 = BN_UMULT_HIGH(ta,tb); \ | ||
475 | t0 = ta * tb; \ | ||
476 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | ||
477 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | ||
478 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
479 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
480 | } | ||
481 | |||
482 | #define sqr_add_c(a,i,c0,c1,c2) { \ | ||
483 | BN_ULONG ta=(a)[i]; \ | ||
484 | t1 = ta * ta; \ | ||
485 | t2 = BN_UMULT_HIGH(ta,ta); \ | ||
486 | c0 += t1; t2 += (c0<t1)?1:0; \ | ||
487 | c1 += t2; c2 += (c1<t2)?1:0; \ | ||
488 | } | ||
489 | |||
490 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
491 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
492 | |||
493 | #else /* !BN_LLONG */ | ||
464 | #define mul_add_c(a,b,c0,c1,c2) \ | 494 | #define mul_add_c(a,b,c0,c1,c2) \ |
465 | t1=LBITS(a); t2=HBITS(a); \ | 495 | t1=LBITS(a); t2=HBITS(a); \ |
466 | bl=LBITS(b); bh=HBITS(b); \ | 496 | bl=LBITS(b); bh=HBITS(b); \ |
@@ -487,7 +517,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
487 | 517 | ||
488 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 518 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
489 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 519 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
490 | #endif | 520 | #endif /* !BN_LLONG */ |
491 | 521 | ||
492 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 522 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
493 | { | 523 | { |
@@ -643,7 +673,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
643 | r[7]=c2; | 673 | r[7]=c2; |
644 | } | 674 | } |
645 | 675 | ||
646 | void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | 676 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
647 | { | 677 | { |
648 | #ifdef BN_LLONG | 678 | #ifdef BN_LLONG |
649 | BN_ULLONG t,tt; | 679 | BN_ULLONG t,tt; |
@@ -724,7 +754,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) | |||
724 | r[15]=c1; | 754 | r[15]=c1; |
725 | } | 755 | } |
726 | 756 | ||
727 | void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | 757 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
728 | { | 758 | { |
729 | #ifdef BN_LLONG | 759 | #ifdef BN_LLONG |
730 | BN_ULLONG t,tt; | 760 | BN_ULLONG t,tt; |
@@ -762,7 +792,7 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) | |||
762 | r[6]=c1; | 792 | r[6]=c1; |
763 | r[7]=c2; | 793 | r[7]=c2; |
764 | } | 794 | } |
765 | #else | 795 | #else /* !BN_MUL_COMBA */ |
766 | 796 | ||
767 | /* hmm... is it faster just to do a multiply? */ | 797 | /* hmm... is it faster just to do a multiply? */ |
768 | #undef bn_sqr_comba4 | 798 | #undef bn_sqr_comba4 |
@@ -799,4 +829,4 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
799 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); | 829 | r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); |
800 | } | 830 | } |
801 | 831 | ||
802 | #endif /* BN_COMBA */ | 832 | #endif /* !BN_MUL_COMBA */ |