diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_lcl.h')
-rw-r--r-- | src/lib/libcrypto/bn/bn_lcl.h | 125 |
1 files changed, 89 insertions, 36 deletions
diff --git a/src/lib/libcrypto/bn/bn_lcl.h b/src/lib/libcrypto/bn/bn_lcl.h index 85a372695b..e36ccbc4c2 100644 --- a/src/lib/libcrypto/bn/bn_lcl.h +++ b/src/lib/libcrypto/bn/bn_lcl.h | |||
@@ -73,18 +73,53 @@ extern "C" { | |||
73 | #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ | 73 | #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ |
74 | #define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ | 74 | #define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ |
75 | 75 | ||
76 | #if 0 | 76 | #if !defined(NO_ASM) && !defined(NO_INLINE_ASM) && !defined(PEDANTIC) |
77 | #ifndef BN_MUL_COMBA | 77 | /* |
78 | /* #define bn_mul_comba8(r,a,b) bn_mul_normal(r,a,8,b,8) */ | 78 | * BN_UMULT_HIGH section. |
79 | /* #define bn_mul_comba4(r,a,b) bn_mul_normal(r,a,4,b,4) */ | 79 | * |
80 | #endif | 80 | * No, I'm not trying to overwhelm you when stating that the |
81 | 81 | * product of N-bit numbers is 2*N bits wide:-) No, I don't expect | |
82 | #ifndef BN_SQR_COMBA | 82 | * you to be impressed when I say that if the compiler doesn't |
83 | /* This is probably faster than using the C code - I need to check */ | 83 | * support 2*N integer type, then you have to replace every N*N |
84 | #define bn_sqr_comba8(r,a) bn_mul_normal(r,a,8,a,8) | 84 | * multiplication with 4 (N/2)*(N/2) accompanied by some shifts |
85 | #define bn_sqr_comba4(r,a) bn_mul_normal(r,a,4,a,4) | 85 | * and additions which unavoidably results in severe performance |
86 | #endif | 86 | * penalties. Of course provided that the hardware is capable of |
87 | #endif | 87 | * producing 2*N result... That's when you normally start |
88 | * considering assembler implementation. However! It should be | ||
89 | * pointed out that some CPUs (most notably Alpha, PowerPC and | ||
90 | * upcoming IA-64 family:-) provide *separate* instruction | ||
91 | * calculating the upper half of the product placing the result | ||
92 | * into a general purpose register. Now *if* the compiler supports | ||
93 | * inline assembler, then it's not impossible to implement the | ||
94 | * "bignum" routines (and have the compiler optimize 'em) | ||
95 | * exhibiting "native" performance in C. That's what BN_UMULT_HIGH | ||
96 | * macro is about:-) | ||
97 | * | ||
98 | * <appro@fy.chalmers.se> | ||
99 | */ | ||
100 | # if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) | ||
101 | # if defined(__DECC) | ||
102 | # include <c_asm.h> | ||
103 | # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) | ||
104 | # elif defined(__GNUC__) | ||
105 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
106 | register BN_ULONG ret; \ | ||
107 | asm ("umulh %1,%2,%0" \ | ||
108 | : "=r"(ret) \ | ||
109 | : "r"(a), "r"(b)); \ | ||
110 | ret; }) | ||
111 | # endif /* compiler */ | ||
112 | # elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) | ||
113 | # if defined(__GNUC__) | ||
114 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
115 | register BN_ULONG ret; \ | ||
116 | asm ("mulhdu %0,%1,%2" \ | ||
117 | : "=r"(ret) \ | ||
118 | : "r"(a), "r"(b)); \ | ||
119 | ret; }) | ||
120 | # endif /* compiler */ | ||
121 | # endif /* cpu */ | ||
122 | #endif /* NO_ASM */ | ||
88 | 123 | ||
89 | /************************************************************* | 124 | /************************************************************* |
90 | * Using the long long type | 125 | * Using the long long type |
@@ -92,15 +127,12 @@ extern "C" { | |||
92 | #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) | 127 | #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) |
93 | #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) | 128 | #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) |
94 | 129 | ||
95 | /* These are used for internal error checking and are not normally used */ | 130 | /* This is used for internal error checking and is not normally used */ |
96 | #ifdef BN_DEBUG | 131 | #ifdef BN_DEBUG |
97 | #define bn_check_top(a) \ | 132 | # include <assert.h> |
98 | { if (((a)->top < 0) || ((a)->top > (a)->max)) \ | 133 | # define bn_check_top(a) assert ((a)->top >= 0 && (a)->top <= (a)->max); |
99 | { char *nullp=NULL; *nullp='z'; } } | ||
100 | #define bn_check_num(a) if ((a) < 0) { char *nullp=NULL; *nullp='z'; } | ||
101 | #else | 134 | #else |
102 | #define bn_check_top(a) | 135 | # define bn_check_top(a) |
103 | #define bn_check_num(a) | ||
104 | #endif | 136 | #endif |
105 | 137 | ||
106 | /* This macro is to add extra stuff for development checking */ | 138 | /* This macro is to add extra stuff for development checking */ |
@@ -134,8 +166,6 @@ extern "C" { | |||
134 | bn_set_max(r); \ | 166 | bn_set_max(r); \ |
135 | } | 167 | } |
136 | 168 | ||
137 | /* #define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?(n):bn_expand2((n),(b))) */ | ||
138 | |||
139 | #ifdef BN_LLONG | 169 | #ifdef BN_LLONG |
140 | #define mul_add(r,a,w,c) { \ | 170 | #define mul_add(r,a,w,c) { \ |
141 | BN_ULLONG t; \ | 171 | BN_ULLONG t; \ |
@@ -151,6 +181,43 @@ extern "C" { | |||
151 | (c)= Hw(t); \ | 181 | (c)= Hw(t); \ |
152 | } | 182 | } |
153 | 183 | ||
184 | #define sqr(r0,r1,a) { \ | ||
185 | BN_ULLONG t; \ | ||
186 | t=(BN_ULLONG)(a)*(a); \ | ||
187 | (r0)=Lw(t); \ | ||
188 | (r1)=Hw(t); \ | ||
189 | } | ||
190 | |||
191 | #elif defined(BN_UMULT_HIGH) | ||
192 | #define mul_add(r,a,w,c) { \ | ||
193 | BN_ULONG high,low,ret,tmp=(a); \ | ||
194 | ret = (r); \ | ||
195 | high= BN_UMULT_HIGH(w,tmp); \ | ||
196 | ret += (c); \ | ||
197 | low = (w) * tmp; \ | ||
198 | (c) = (ret<(c))?1:0; \ | ||
199 | (c) += high; \ | ||
200 | ret += low; \ | ||
201 | (c) += (ret<low)?1:0; \ | ||
202 | (r) = ret; \ | ||
203 | } | ||
204 | |||
205 | #define mul(r,a,w,c) { \ | ||
206 | BN_ULONG high,low,ret,ta=(a); \ | ||
207 | low = (w) * ta; \ | ||
208 | high= BN_UMULT_HIGH(w,ta); \ | ||
209 | ret = low + (c); \ | ||
210 | (c) = high; \ | ||
211 | (c) += (ret<low)?1:0; \ | ||
212 | (r) = ret; \ | ||
213 | } | ||
214 | |||
215 | #define sqr(r0,r1,a) { \ | ||
216 | BN_ULONG tmp=(a); \ | ||
217 | (r0) = tmp * tmp; \ | ||
218 | (r1) = BN_UMULT_HIGH(tmp,tmp); \ | ||
219 | } | ||
220 | |||
154 | #else | 221 | #else |
155 | /************************************************************* | 222 | /************************************************************* |
156 | * No long long type | 223 | * No long long type |
@@ -228,21 +295,7 @@ extern "C" { | |||
228 | (c)=h&BN_MASK2; \ | 295 | (c)=h&BN_MASK2; \ |
229 | (r)=l&BN_MASK2; \ | 296 | (r)=l&BN_MASK2; \ |
230 | } | 297 | } |
231 | 298 | #endif /* !BN_LLONG */ | |
232 | #endif | ||
233 | |||
234 | OPENSSL_EXTERN int bn_limit_bits; | ||
235 | OPENSSL_EXTERN int bn_limit_num; /* (1<<bn_limit_bits) */ | ||
236 | /* Recursive 'low' limit */ | ||
237 | OPENSSL_EXTERN int bn_limit_bits_low; | ||
238 | OPENSSL_EXTERN int bn_limit_num_low; /* (1<<bn_limit_bits_low) */ | ||
239 | /* Do modified 'high' part calculation' */ | ||
240 | OPENSSL_EXTERN int bn_limit_bits_high; | ||
241 | OPENSSL_EXTERN int bn_limit_num_high; /* (1<<bn_limit_bits_high) */ | ||
242 | OPENSSL_EXTERN int bn_limit_bits_mont; | ||
243 | OPENSSL_EXTERN int bn_limit_num_mont; /* (1<<bn_limit_bits_mont) */ | ||
244 | |||
245 | BIGNUM *bn_expand2(BIGNUM *b, int bits); | ||
246 | 299 | ||
247 | void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb); | 300 | void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb); |
248 | void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); | 301 | void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b); |