summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/bn_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c178
1 files changed, 104 insertions, 74 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index 4d3da16a0c..be8aa3ffc5 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -56,97 +56,95 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58 58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
59#include <stdio.h> 64#include <stdio.h>
65#include <assert.h>
60#include "cryptlib.h" 66#include "cryptlib.h"
61#include "bn_lcl.h" 67#include "bn_lcl.h"
62 68
63#ifdef BN_LLONG 69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
64 70
65BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 71BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
66 { 72 {
67 BN_ULONG c1=0; 73 BN_ULONG c1=0;
68 74
69 bn_check_num(num); 75 assert(num >= 0);
70 if (num <= 0) return(c1); 76 if (num <= 0) return(c1);
71 77
72 for (;;) 78 while (num&~3)
73 { 79 {
74 mul_add(rp[0],ap[0],w,c1); 80 mul_add(rp[0],ap[0],w,c1);
75 if (--num == 0) break;
76 mul_add(rp[1],ap[1],w,c1); 81 mul_add(rp[1],ap[1],w,c1);
77 if (--num == 0) break;
78 mul_add(rp[2],ap[2],w,c1); 82 mul_add(rp[2],ap[2],w,c1);
79 if (--num == 0) break;
80 mul_add(rp[3],ap[3],w,c1); 83 mul_add(rp[3],ap[3],w,c1);
81 if (--num == 0) break; 84 ap+=4; rp+=4; num-=4;
82 ap+=4; 85 }
83 rp+=4; 86 if (num)
87 {
88 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90 mul_add(rp[2],ap[2],w,c1); return c1;
84 } 91 }
85 92
86 return(c1); 93 return(c1);
87 } 94 }
88 95
89BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 96BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
90 { 97 {
91 BN_ULONG c1=0; 98 BN_ULONG c1=0;
92 99
93 bn_check_num(num); 100 assert(num >= 0);
94 if (num <= 0) return(c1); 101 if (num <= 0) return(c1);
95 102
96 /* for (;;) */ 103 while (num&~3)
97 while (1) /* circumvent egcs-1.1.2 bug */
98 { 104 {
99 mul(rp[0],ap[0],w,c1); 105 mul(rp[0],ap[0],w,c1);
100 if (--num == 0) break;
101 mul(rp[1],ap[1],w,c1); 106 mul(rp[1],ap[1],w,c1);
102 if (--num == 0) break;
103 mul(rp[2],ap[2],w,c1); 107 mul(rp[2],ap[2],w,c1);
104 if (--num == 0) break;
105 mul(rp[3],ap[3],w,c1); 108 mul(rp[3],ap[3],w,c1);
106 if (--num == 0) break; 109 ap+=4; rp+=4; num-=4;
107 ap+=4; 110 }
108 rp+=4; 111 if (num)
112 {
113 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115 mul(rp[2],ap[2],w,c1);
109 } 116 }
110 return(c1); 117 return(c1);
111 } 118 }
112 119
113void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 120void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
114 { 121 {
115 bn_check_num(n); 122 assert(n >= 0);
116 if (n <= 0) return; 123 if (n <= 0) return;
117 for (;;) 124 while (n&~3)
118 { 125 {
119 BN_ULLONG t; 126 sqr(r[0],r[1],a[0]);
120 127 sqr(r[2],r[3],a[1]);
121 t=(BN_ULLONG)(a[0])*(a[0]); 128 sqr(r[4],r[5],a[2]);
122 r[0]=Lw(t); r[1]=Hw(t); 129 sqr(r[6],r[7],a[3]);
123 if (--n == 0) break; 130 a+=4; r+=8; n-=4;
124 131 }
125 t=(BN_ULLONG)(a[1])*(a[1]); 132 if (n)
126 r[2]=Lw(t); r[3]=Hw(t); 133 {
127 if (--n == 0) break; 134 sqr(r[0],r[1],a[0]); if (--n == 0) return;
128 135 sqr(r[2],r[3],a[1]); if (--n == 0) return;
129 t=(BN_ULLONG)(a[2])*(a[2]); 136 sqr(r[4],r[5],a[2]);
130 r[4]=Lw(t); r[5]=Hw(t);
131 if (--n == 0) break;
132
133 t=(BN_ULLONG)(a[3])*(a[3]);
134 r[6]=Lw(t); r[7]=Hw(t);
135 if (--n == 0) break;
136
137 a+=4;
138 r+=8;
139 } 137 }
140 } 138 }
141 139
142#else 140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
143 141
144BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 142BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
145 { 143 {
146 BN_ULONG c=0; 144 BN_ULONG c=0;
147 BN_ULONG bl,bh; 145 BN_ULONG bl,bh;
148 146
149 bn_check_num(num); 147 assert(num >= 0);
150 if (num <= 0) return((BN_ULONG)0); 148 if (num <= 0) return((BN_ULONG)0);
151 149
152 bl=LBITS(w); 150 bl=LBITS(w);
@@ -168,12 +166,12 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
168 return(c); 166 return(c);
169 } 167 }
170 168
171BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 169BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
172 { 170 {
173 BN_ULONG carry=0; 171 BN_ULONG carry=0;
174 BN_ULONG bl,bh; 172 BN_ULONG bl,bh;
175 173
176 bn_check_num(num); 174 assert(num >= 0);
177 if (num <= 0) return((BN_ULONG)0); 175 if (num <= 0) return((BN_ULONG)0);
178 176
179 bl=LBITS(w); 177 bl=LBITS(w);
@@ -195,9 +193,9 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
195 return(carry); 193 return(carry);
196 } 194 }
197 195
198void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 196void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
199 { 197 {
200 bn_check_num(n); 198 assert(n >= 0);
201 if (n <= 0) return; 199 if (n <= 0) return;
202 for (;;) 200 for (;;)
203 { 201 {
@@ -218,7 +216,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
218 } 216 }
219 } 217 }
220 218
221#endif 219#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
222 220
223#if defined(BN_LLONG) && defined(BN_DIV2W) 221#if defined(BN_LLONG) && defined(BN_DIV2W)
224 222
@@ -229,7 +227,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
229 227
230#else 228#else
231 229
232/* Divide h-l by d and return the result. */ 230/* Divide h,l by d and return the result. */
233/* I need to test this some more :-( */ 231/* I need to test this some more :-( */
234BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) 232BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
235 { 233 {
@@ -239,13 +237,8 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
239 if (d == 0) return(BN_MASK2); 237 if (d == 0) return(BN_MASK2);
240 238
241 i=BN_num_bits_word(d); 239 i=BN_num_bits_word(d);
242 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) 240 assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i));
243 { 241
244#if !defined(NO_STDIO) && !defined(WIN16)
245 fprintf(stderr,"Division would overflow (%d)\n",i);
246#endif
247 abort();
248 }
249 i=BN_BITS2-i; 242 i=BN_BITS2-i;
250 if (h >= d) h-=d; 243 if (h >= d) h-=d;
251 244
@@ -300,14 +293,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
300 ret|=q; 293 ret|=q;
301 return(ret); 294 return(ret);
302 } 295 }
303#endif 296#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
304 297
305#ifdef BN_LLONG 298#ifdef BN_LLONG
306BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 299BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
307 { 300 {
308 BN_ULLONG ll=0; 301 BN_ULLONG ll=0;
309 302
310 bn_check_num(n); 303 assert(n >= 0);
311 if (n <= 0) return((BN_ULONG)0); 304 if (n <= 0) return((BN_ULONG)0);
312 305
313 for (;;) 306 for (;;)
@@ -338,12 +331,12 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
338 } 331 }
339 return((BN_ULONG)ll); 332 return((BN_ULONG)ll);
340 } 333 }
341#else 334#else /* !BN_LLONG */
342BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 335BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
343 { 336 {
344 BN_ULONG c,l,t; 337 BN_ULONG c,l,t;
345 338
346 bn_check_num(n); 339 assert(n >= 0);
347 if (n <= 0) return((BN_ULONG)0); 340 if (n <= 0) return((BN_ULONG)0);
348 341
349 c=0; 342 c=0;
@@ -387,14 +380,14 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
387 } 380 }
388 return((BN_ULONG)c); 381 return((BN_ULONG)c);
389 } 382 }
390#endif 383#endif /* !BN_LLONG */
391 384
392BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 385BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
393 { 386 {
394 BN_ULONG t1,t2; 387 BN_ULONG t1,t2;
395 int c=0; 388 int c=0;
396 389
397 bn_check_num(n); 390 assert(n >= 0);
398 if (n <= 0) return((BN_ULONG)0); 391 if (n <= 0) return((BN_ULONG)0);
399 392
400 for (;;) 393 for (;;)
@@ -433,6 +426,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
433#undef bn_sqr_comba8 426#undef bn_sqr_comba8
434#undef bn_sqr_comba4 427#undef bn_sqr_comba4
435 428
429/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
430/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
431/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
432/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
433
436#ifdef BN_LLONG 434#ifdef BN_LLONG
437#define mul_add_c(a,b,c0,c1,c2) \ 435#define mul_add_c(a,b,c0,c1,c2) \
438 t=(BN_ULLONG)a*b; \ 436 t=(BN_ULLONG)a*b; \
@@ -460,7 +458,39 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
460 458
461#define sqr_add_c2(a,i,j,c0,c1,c2) \ 459#define sqr_add_c2(a,i,j,c0,c1,c2) \
462 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 460 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
463#else 461
462#elif defined(BN_UMULT_HIGH)
463
464#define mul_add_c(a,b,c0,c1,c2) { \
465 BN_ULONG ta=(a),tb=(b); \
466 t1 = ta * tb; \
467 t2 = BN_UMULT_HIGH(ta,tb); \
468 c0 += t1; t2 += (c0<t1)?1:0; \
469 c1 += t2; c2 += (c1<t2)?1:0; \
470 }
471
472#define mul_add_c2(a,b,c0,c1,c2) { \
473 BN_ULONG ta=(a),tb=(b),t0; \
474 t1 = BN_UMULT_HIGH(ta,tb); \
475 t0 = ta * tb; \
476 t2 = t1+t1; c2 += (t2<t1)?1:0; \
477 t1 = t0+t0; t2 += (t1<t0)?1:0; \
478 c0 += t1; t2 += (c0<t1)?1:0; \
479 c1 += t2; c2 += (c1<t2)?1:0; \
480 }
481
482#define sqr_add_c(a,i,c0,c1,c2) { \
483 BN_ULONG ta=(a)[i]; \
484 t1 = ta * ta; \
485 t2 = BN_UMULT_HIGH(ta,ta); \
486 c0 += t1; t2 += (c0<t1)?1:0; \
487 c1 += t2; c2 += (c1<t2)?1:0; \
488 }
489
490#define sqr_add_c2(a,i,j,c0,c1,c2) \
491 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
492
493#else /* !BN_LLONG */
464#define mul_add_c(a,b,c0,c1,c2) \ 494#define mul_add_c(a,b,c0,c1,c2) \
465 t1=LBITS(a); t2=HBITS(a); \ 495 t1=LBITS(a); t2=HBITS(a); \
466 bl=LBITS(b); bh=HBITS(b); \ 496 bl=LBITS(b); bh=HBITS(b); \
@@ -487,7 +517,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
487 517
488#define sqr_add_c2(a,i,j,c0,c1,c2) \ 518#define sqr_add_c2(a,i,j,c0,c1,c2) \
489 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 519 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
490#endif 520#endif /* !BN_LLONG */
491 521
492void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 522void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
493 { 523 {
@@ -643,7 +673,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
643 r[7]=c2; 673 r[7]=c2;
644 } 674 }
645 675
646void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) 676void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
647 { 677 {
648#ifdef BN_LLONG 678#ifdef BN_LLONG
649 BN_ULLONG t,tt; 679 BN_ULLONG t,tt;
@@ -724,7 +754,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
724 r[15]=c1; 754 r[15]=c1;
725 } 755 }
726 756
727void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) 757void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
728 { 758 {
729#ifdef BN_LLONG 759#ifdef BN_LLONG
730 BN_ULLONG t,tt; 760 BN_ULLONG t,tt;
@@ -762,7 +792,7 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
762 r[6]=c1; 792 r[6]=c1;
763 r[7]=c2; 793 r[7]=c2;
764 } 794 }
765#else 795#else /* !BN_MUL_COMBA */
766 796
767/* hmm... is it faster just to do a multiply? */ 797/* hmm... is it faster just to do a multiply? */
768#undef bn_sqr_comba4 798#undef bn_sqr_comba4
@@ -799,4 +829,4 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
799 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); 829 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
800 } 830 }
801 831
802#endif /* BN_COMBA */ 832#endif /* !BN_MUL_COMBA */