summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/bn_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c145
1 files changed, 90 insertions, 55 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index 4d3da16a0c..3329cc18e6 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -56,31 +56,38 @@
56 * [including the GNU Public Licence.] 56 * [including the GNU Public Licence.]
57 */ 57 */
58 58
59#ifndef BN_DEBUG
60# undef NDEBUG /* avoid conflicting definitions */
61# define NDEBUG
62#endif
63
59#include <stdio.h> 64#include <stdio.h>
65#include <assert.h>
60#include "cryptlib.h" 66#include "cryptlib.h"
61#include "bn_lcl.h" 67#include "bn_lcl.h"
62 68
63#ifdef BN_LLONG 69#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
64 70
65BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 71BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
66 { 72 {
67 BN_ULONG c1=0; 73 BN_ULONG c1=0;
68 74
69 bn_check_num(num); 75 assert(num >= 0);
70 if (num <= 0) return(c1); 76 if (num <= 0) return(c1);
71 77
72 for (;;) 78 while (num&~3)
73 { 79 {
74 mul_add(rp[0],ap[0],w,c1); 80 mul_add(rp[0],ap[0],w,c1);
75 if (--num == 0) break;
76 mul_add(rp[1],ap[1],w,c1); 81 mul_add(rp[1],ap[1],w,c1);
77 if (--num == 0) break;
78 mul_add(rp[2],ap[2],w,c1); 82 mul_add(rp[2],ap[2],w,c1);
79 if (--num == 0) break;
80 mul_add(rp[3],ap[3],w,c1); 83 mul_add(rp[3],ap[3],w,c1);
81 if (--num == 0) break; 84 ap+=4; rp+=4; num-=4;
82 ap+=4; 85 }
83 rp+=4; 86 if (num)
87 {
88 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90 mul_add(rp[2],ap[2],w,c1); return c1;
84 } 91 }
85 92
86 return(c1); 93 return(c1);
@@ -90,63 +97,54 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
90 { 97 {
91 BN_ULONG c1=0; 98 BN_ULONG c1=0;
92 99
93 bn_check_num(num); 100 assert(num >= 0);
94 if (num <= 0) return(c1); 101 if (num <= 0) return(c1);
95 102
96 /* for (;;) */ 103 while (num&~3)
97 while (1) /* circumvent egcs-1.1.2 bug */
98 { 104 {
99 mul(rp[0],ap[0],w,c1); 105 mul(rp[0],ap[0],w,c1);
100 if (--num == 0) break;
101 mul(rp[1],ap[1],w,c1); 106 mul(rp[1],ap[1],w,c1);
102 if (--num == 0) break;
103 mul(rp[2],ap[2],w,c1); 107 mul(rp[2],ap[2],w,c1);
104 if (--num == 0) break;
105 mul(rp[3],ap[3],w,c1); 108 mul(rp[3],ap[3],w,c1);
106 if (--num == 0) break; 109 ap+=4; rp+=4; num-=4;
107 ap+=4; 110 }
108 rp+=4; 111 if (num)
112 {
113 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115 mul(rp[2],ap[2],w,c1);
109 } 116 }
110 return(c1); 117 return(c1);
111 } 118 }
112 119
113void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 120void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
114 { 121 {
115 bn_check_num(n); 122 assert(n >= 0);
116 if (n <= 0) return; 123 if (n <= 0) return;
117 for (;;) 124 while (n&~3)
118 { 125 {
119 BN_ULLONG t; 126 sqr(r[0],r[1],a[0]);
120 127 sqr(r[2],r[3],a[1]);
121 t=(BN_ULLONG)(a[0])*(a[0]); 128 sqr(r[4],r[5],a[2]);
122 r[0]=Lw(t); r[1]=Hw(t); 129 sqr(r[6],r[7],a[3]);
123 if (--n == 0) break; 130 a+=4; r+=8; n-=4;
124 131 }
125 t=(BN_ULLONG)(a[1])*(a[1]); 132 if (n)
126 r[2]=Lw(t); r[3]=Hw(t); 133 {
127 if (--n == 0) break; 134 sqr(r[0],r[1],a[0]); if (--n == 0) return;
128 135 sqr(r[2],r[3],a[1]); if (--n == 0) return;
129 t=(BN_ULLONG)(a[2])*(a[2]); 136 sqr(r[4],r[5],a[2]);
130 r[4]=Lw(t); r[5]=Hw(t);
131 if (--n == 0) break;
132
133 t=(BN_ULLONG)(a[3])*(a[3]);
134 r[6]=Lw(t); r[7]=Hw(t);
135 if (--n == 0) break;
136
137 a+=4;
138 r+=8;
139 } 137 }
140 } 138 }
141 139
142#else 140#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
143 141
144BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) 142BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
145 { 143 {
146 BN_ULONG c=0; 144 BN_ULONG c=0;
147 BN_ULONG bl,bh; 145 BN_ULONG bl,bh;
148 146
149 bn_check_num(num); 147 assert(num >= 0);
150 if (num <= 0) return((BN_ULONG)0); 148 if (num <= 0) return((BN_ULONG)0);
151 149
152 bl=LBITS(w); 150 bl=LBITS(w);
@@ -173,7 +171,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
173 BN_ULONG carry=0; 171 BN_ULONG carry=0;
174 BN_ULONG bl,bh; 172 BN_ULONG bl,bh;
175 173
176 bn_check_num(num); 174 assert(num >= 0);
177 if (num <= 0) return((BN_ULONG)0); 175 if (num <= 0) return((BN_ULONG)0);
178 176
179 bl=LBITS(w); 177 bl=LBITS(w);
@@ -197,7 +195,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
197 195
198void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) 196void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
199 { 197 {
200 bn_check_num(n); 198 assert(n >= 0);
201 if (n <= 0) return; 199 if (n <= 0) return;
202 for (;;) 200 for (;;)
203 { 201 {
@@ -218,7 +216,7 @@ void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
218 } 216 }
219 } 217 }
220 218
221#endif 219#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
222 220
223#if defined(BN_LLONG) && defined(BN_DIV2W) 221#if defined(BN_LLONG) && defined(BN_DIV2W)
224 222
@@ -300,14 +298,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
300 ret|=q; 298 ret|=q;
301 return(ret); 299 return(ret);
302 } 300 }
303#endif 301#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
304 302
305#ifdef BN_LLONG 303#ifdef BN_LLONG
306BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 304BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
307 { 305 {
308 BN_ULLONG ll=0; 306 BN_ULLONG ll=0;
309 307
310 bn_check_num(n); 308 assert(n >= 0);
311 if (n <= 0) return((BN_ULONG)0); 309 if (n <= 0) return((BN_ULONG)0);
312 310
313 for (;;) 311 for (;;)
@@ -338,12 +336,12 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
338 } 336 }
339 return((BN_ULONG)ll); 337 return((BN_ULONG)ll);
340 } 338 }
341#else 339#else /* !BN_LLONG */
342BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 340BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
343 { 341 {
344 BN_ULONG c,l,t; 342 BN_ULONG c,l,t;
345 343
346 bn_check_num(n); 344 assert(n >= 0);
347 if (n <= 0) return((BN_ULONG)0); 345 if (n <= 0) return((BN_ULONG)0);
348 346
349 c=0; 347 c=0;
@@ -387,14 +385,14 @@ BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
387 } 385 }
388 return((BN_ULONG)c); 386 return((BN_ULONG)c);
389 } 387 }
390#endif 388#endif /* !BN_LLONG */
391 389
392BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) 390BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
393 { 391 {
394 BN_ULONG t1,t2; 392 BN_ULONG t1,t2;
395 int c=0; 393 int c=0;
396 394
397 bn_check_num(n); 395 assert(n >= 0);
398 if (n <= 0) return((BN_ULONG)0); 396 if (n <= 0) return((BN_ULONG)0);
399 397
400 for (;;) 398 for (;;)
@@ -433,6 +431,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
433#undef bn_sqr_comba8 431#undef bn_sqr_comba8
434#undef bn_sqr_comba4 432#undef bn_sqr_comba4
435 433
434/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
435/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
436/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
437/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
438
436#ifdef BN_LLONG 439#ifdef BN_LLONG
437#define mul_add_c(a,b,c0,c1,c2) \ 440#define mul_add_c(a,b,c0,c1,c2) \
438 t=(BN_ULLONG)a*b; \ 441 t=(BN_ULLONG)a*b; \
@@ -460,7 +463,39 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
460 463
461#define sqr_add_c2(a,i,j,c0,c1,c2) \ 464#define sqr_add_c2(a,i,j,c0,c1,c2) \
462 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 465 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
463#else 466
467#elif defined(BN_UMULT_HIGH)
468
469#define mul_add_c(a,b,c0,c1,c2) { \
470 BN_ULONG ta=(a),tb=(b); \
471 t1 = ta * tb; \
472 t2 = BN_UMULT_HIGH(ta,tb); \
473 c0 += t1; t2 += (c0<t1)?1:0; \
474 c1 += t2; c2 += (c1<t2)?1:0; \
475 }
476
477#define mul_add_c2(a,b,c0,c1,c2) { \
478 BN_ULONG ta=(a),tb=(b),t0; \
479 t1 = BN_UMULT_HIGH(ta,tb); \
480 t0 = ta * tb; \
481 t2 = t1+t1; c2 += (t2<t1)?1:0; \
482 t1 = t0+t0; t2 += (t1<t0)?1:0; \
483 c0 += t1; t2 += (c0<t1)?1:0; \
484 c1 += t2; c2 += (c1<t2)?1:0; \
485 }
486
487#define sqr_add_c(a,i,c0,c1,c2) { \
488 BN_ULONG ta=(a)[i]; \
489 t1 = ta * ta; \
490 t2 = BN_UMULT_HIGH(ta,ta); \
491 c0 += t1; t2 += (c0<t1)?1:0; \
492 c1 += t2; c2 += (c1<t2)?1:0; \
493 }
494
495#define sqr_add_c2(a,i,j,c0,c1,c2) \
496 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
497
498#else /* !BN_LLONG */
464#define mul_add_c(a,b,c0,c1,c2) \ 499#define mul_add_c(a,b,c0,c1,c2) \
465 t1=LBITS(a); t2=HBITS(a); \ 500 t1=LBITS(a); t2=HBITS(a); \
466 bl=LBITS(b); bh=HBITS(b); \ 501 bl=LBITS(b); bh=HBITS(b); \
@@ -487,7 +522,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
487 522
488#define sqr_add_c2(a,i,j,c0,c1,c2) \ 523#define sqr_add_c2(a,i,j,c0,c1,c2) \
489 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 524 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
490#endif 525#endif /* !BN_LLONG */
491 526
492void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 527void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
493 { 528 {
@@ -762,7 +797,7 @@ void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
762 r[6]=c1; 797 r[6]=c1;
763 r[7]=c2; 798 r[7]=c2;
764 } 799 }
765#else 800#else /* !BN_MUL_COMBA */
766 801
767/* hmm... is it faster just to do a multiply? */ 802/* hmm... is it faster just to do a multiply? */
768#undef bn_sqr_comba4 803#undef bn_sqr_comba4
@@ -799,4 +834,4 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
799 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); 834 r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
800 } 835 }
801 836
802#endif /* BN_COMBA */ 837#endif /* !BN_MUL_COMBA */