diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_mul.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 539 |
1 files changed, 453 insertions, 86 deletions
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index 3ae3822bc2..b848c8cc60 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
@@ -56,10 +56,325 @@ | |||
56 | * [including the GNU Public Licence.] | 56 | * [including the GNU Public Licence.] |
57 | */ | 57 | */ |
58 | 58 | ||
59 | #ifndef BN_DEBUG | ||
60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
61 | # define NDEBUG | ||
62 | #endif | ||
63 | |||
59 | #include <stdio.h> | 64 | #include <stdio.h> |
65 | #include <assert.h> | ||
60 | #include "cryptlib.h" | 66 | #include "cryptlib.h" |
61 | #include "bn_lcl.h" | 67 | #include "bn_lcl.h" |
62 | 68 | ||
69 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) | ||
70 | /* Here follows specialised variants of bn_add_words() and | ||
71 | bn_sub_words(). They have the property performing operations on | ||
72 | arrays of different sizes. The sizes of those arrays is expressed through | ||
73 | cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl, | ||
74 | which is the delta between the two lengths, calculated as len(a)-len(b). | ||
75 | All lengths are the number of BN_ULONGs... For the operations that require | ||
76 | a result array as parameter, it must have the length cl+abs(dl). | ||
77 | These functions should probably end up in bn_asm.c as soon as there are | ||
78 | assembler counterparts for the systems that use assembler files. */ | ||
79 | |||
80 | BN_ULONG bn_sub_part_words(BN_ULONG *r, | ||
81 | const BN_ULONG *a, const BN_ULONG *b, | ||
82 | int cl, int dl) | ||
83 | { | ||
84 | BN_ULONG c, t; | ||
85 | |||
86 | assert(cl >= 0); | ||
87 | c = bn_sub_words(r, a, b, cl); | ||
88 | |||
89 | if (dl == 0) | ||
90 | return c; | ||
91 | |||
92 | r += cl; | ||
93 | a += cl; | ||
94 | b += cl; | ||
95 | |||
96 | if (dl < 0) | ||
97 | { | ||
98 | #ifdef BN_COUNT | ||
99 | fprintf(stderr, " bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); | ||
100 | #endif | ||
101 | for (;;) | ||
102 | { | ||
103 | t = b[0]; | ||
104 | r[0] = (0-t-c)&BN_MASK2; | ||
105 | if (t != 0) c=1; | ||
106 | if (++dl >= 0) break; | ||
107 | |||
108 | t = b[1]; | ||
109 | r[1] = (0-t-c)&BN_MASK2; | ||
110 | if (t != 0) c=1; | ||
111 | if (++dl >= 0) break; | ||
112 | |||
113 | t = b[2]; | ||
114 | r[2] = (0-t-c)&BN_MASK2; | ||
115 | if (t != 0) c=1; | ||
116 | if (++dl >= 0) break; | ||
117 | |||
118 | t = b[3]; | ||
119 | r[3] = (0-t-c)&BN_MASK2; | ||
120 | if (t != 0) c=1; | ||
121 | if (++dl >= 0) break; | ||
122 | |||
123 | b += 4; | ||
124 | r += 4; | ||
125 | } | ||
126 | } | ||
127 | else | ||
128 | { | ||
129 | int save_dl = dl; | ||
130 | #ifdef BN_COUNT | ||
131 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c); | ||
132 | #endif | ||
133 | while(c) | ||
134 | { | ||
135 | t = a[0]; | ||
136 | r[0] = (t-c)&BN_MASK2; | ||
137 | if (t != 0) c=0; | ||
138 | if (--dl <= 0) break; | ||
139 | |||
140 | t = a[1]; | ||
141 | r[1] = (t-c)&BN_MASK2; | ||
142 | if (t != 0) c=0; | ||
143 | if (--dl <= 0) break; | ||
144 | |||
145 | t = a[2]; | ||
146 | r[2] = (t-c)&BN_MASK2; | ||
147 | if (t != 0) c=0; | ||
148 | if (--dl <= 0) break; | ||
149 | |||
150 | t = a[3]; | ||
151 | r[3] = (t-c)&BN_MASK2; | ||
152 | if (t != 0) c=0; | ||
153 | if (--dl <= 0) break; | ||
154 | |||
155 | save_dl = dl; | ||
156 | a += 4; | ||
157 | r += 4; | ||
158 | } | ||
159 | if (dl > 0) | ||
160 | { | ||
161 | #ifdef BN_COUNT | ||
162 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); | ||
163 | #endif | ||
164 | if (save_dl > dl) | ||
165 | { | ||
166 | switch (save_dl - dl) | ||
167 | { | ||
168 | case 1: | ||
169 | r[1] = a[1]; | ||
170 | if (--dl <= 0) break; | ||
171 | case 2: | ||
172 | r[2] = a[2]; | ||
173 | if (--dl <= 0) break; | ||
174 | case 3: | ||
175 | r[3] = a[3]; | ||
176 | if (--dl <= 0) break; | ||
177 | } | ||
178 | a += 4; | ||
179 | r += 4; | ||
180 | } | ||
181 | } | ||
182 | if (dl > 0) | ||
183 | { | ||
184 | #ifdef BN_COUNT | ||
185 | fprintf(stderr, " bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl); | ||
186 | #endif | ||
187 | for(;;) | ||
188 | { | ||
189 | r[0] = a[0]; | ||
190 | if (--dl <= 0) break; | ||
191 | r[1] = a[1]; | ||
192 | if (--dl <= 0) break; | ||
193 | r[2] = a[2]; | ||
194 | if (--dl <= 0) break; | ||
195 | r[3] = a[3]; | ||
196 | if (--dl <= 0) break; | ||
197 | |||
198 | a += 4; | ||
199 | r += 4; | ||
200 | } | ||
201 | } | ||
202 | } | ||
203 | return c; | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | BN_ULONG bn_add_part_words(BN_ULONG *r, | ||
208 | const BN_ULONG *a, const BN_ULONG *b, | ||
209 | int cl, int dl) | ||
210 | { | ||
211 | BN_ULONG c, l, t; | ||
212 | |||
213 | assert(cl >= 0); | ||
214 | c = bn_add_words(r, a, b, cl); | ||
215 | |||
216 | if (dl == 0) | ||
217 | return c; | ||
218 | |||
219 | r += cl; | ||
220 | a += cl; | ||
221 | b += cl; | ||
222 | |||
223 | if (dl < 0) | ||
224 | { | ||
225 | int save_dl = dl; | ||
226 | #ifdef BN_COUNT | ||
227 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c); | ||
228 | #endif | ||
229 | while (c) | ||
230 | { | ||
231 | l=(c+b[0])&BN_MASK2; | ||
232 | c=(l < c); | ||
233 | r[0]=l; | ||
234 | if (++dl >= 0) break; | ||
235 | |||
236 | l=(c+b[1])&BN_MASK2; | ||
237 | c=(l < c); | ||
238 | r[1]=l; | ||
239 | if (++dl >= 0) break; | ||
240 | |||
241 | l=(c+b[2])&BN_MASK2; | ||
242 | c=(l < c); | ||
243 | r[2]=l; | ||
244 | if (++dl >= 0) break; | ||
245 | |||
246 | l=(c+b[3])&BN_MASK2; | ||
247 | c=(l < c); | ||
248 | r[3]=l; | ||
249 | if (++dl >= 0) break; | ||
250 | |||
251 | save_dl = dl; | ||
252 | b+=4; | ||
253 | r+=4; | ||
254 | } | ||
255 | if (dl < 0) | ||
256 | { | ||
257 | #ifdef BN_COUNT | ||
258 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl); | ||
259 | #endif | ||
260 | if (save_dl < dl) | ||
261 | { | ||
262 | switch (dl - save_dl) | ||
263 | { | ||
264 | case 1: | ||
265 | r[1] = b[1]; | ||
266 | if (++dl >= 0) break; | ||
267 | case 2: | ||
268 | r[2] = b[2]; | ||
269 | if (++dl >= 0) break; | ||
270 | case 3: | ||
271 | r[3] = b[3]; | ||
272 | if (++dl >= 0) break; | ||
273 | } | ||
274 | b += 4; | ||
275 | r += 4; | ||
276 | } | ||
277 | } | ||
278 | if (dl < 0) | ||
279 | { | ||
280 | #ifdef BN_COUNT | ||
281 | fprintf(stderr, " bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl); | ||
282 | #endif | ||
283 | for(;;) | ||
284 | { | ||
285 | r[0] = b[0]; | ||
286 | if (++dl >= 0) break; | ||
287 | r[1] = b[1]; | ||
288 | if (++dl >= 0) break; | ||
289 | r[2] = b[2]; | ||
290 | if (++dl >= 0) break; | ||
291 | r[3] = b[3]; | ||
292 | if (++dl >= 0) break; | ||
293 | |||
294 | b += 4; | ||
295 | r += 4; | ||
296 | } | ||
297 | } | ||
298 | } | ||
299 | else | ||
300 | { | ||
301 | int save_dl = dl; | ||
302 | #ifdef BN_COUNT | ||
303 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0)\n", cl, dl); | ||
304 | #endif | ||
305 | while (c) | ||
306 | { | ||
307 | t=(a[0]+c)&BN_MASK2; | ||
308 | c=(t < c); | ||
309 | r[0]=t; | ||
310 | if (--dl <= 0) break; | ||
311 | |||
312 | t=(a[1]+c)&BN_MASK2; | ||
313 | c=(t < c); | ||
314 | r[1]=t; | ||
315 | if (--dl <= 0) break; | ||
316 | |||
317 | t=(a[2]+c)&BN_MASK2; | ||
318 | c=(t < c); | ||
319 | r[2]=t; | ||
320 | if (--dl <= 0) break; | ||
321 | |||
322 | t=(a[3]+c)&BN_MASK2; | ||
323 | c=(t < c); | ||
324 | r[3]=t; | ||
325 | if (--dl <= 0) break; | ||
326 | |||
327 | save_dl = dl; | ||
328 | a+=4; | ||
329 | r+=4; | ||
330 | } | ||
331 | #ifdef BN_COUNT | ||
332 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl); | ||
333 | #endif | ||
334 | if (dl > 0) | ||
335 | { | ||
336 | if (save_dl > dl) | ||
337 | { | ||
338 | switch (save_dl - dl) | ||
339 | { | ||
340 | case 1: | ||
341 | r[1] = a[1]; | ||
342 | if (--dl <= 0) break; | ||
343 | case 2: | ||
344 | r[2] = a[2]; | ||
345 | if (--dl <= 0) break; | ||
346 | case 3: | ||
347 | r[3] = a[3]; | ||
348 | if (--dl <= 0) break; | ||
349 | } | ||
350 | a += 4; | ||
351 | r += 4; | ||
352 | } | ||
353 | } | ||
354 | if (dl > 0) | ||
355 | { | ||
356 | #ifdef BN_COUNT | ||
357 | fprintf(stderr, " bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl); | ||
358 | #endif | ||
359 | for(;;) | ||
360 | { | ||
361 | r[0] = a[0]; | ||
362 | if (--dl <= 0) break; | ||
363 | r[1] = a[1]; | ||
364 | if (--dl <= 0) break; | ||
365 | r[2] = a[2]; | ||
366 | if (--dl <= 0) break; | ||
367 | r[3] = a[3]; | ||
368 | if (--dl <= 0) break; | ||
369 | |||
370 | a += 4; | ||
371 | r += 4; | ||
372 | } | ||
373 | } | ||
374 | } | ||
375 | return c; | ||
376 | } | ||
377 | |||
63 | #ifdef BN_RECURSION | 378 | #ifdef BN_RECURSION |
64 | /* Karatsuba recursive multiplication algorithm | 379 | /* Karatsuba recursive multiplication algorithm |
65 | * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ | 380 | * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ |
@@ -74,15 +389,17 @@ | |||
74 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) | 389 | * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) |
75 | * a[1]*b[1] | 390 | * a[1]*b[1] |
76 | */ | 391 | */ |
392 | /* dnX may not be positive, but n2/2+dnX has to be */ | ||
77 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | 393 | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, |
78 | BN_ULONG *t) | 394 | int dna, int dnb, BN_ULONG *t) |
79 | { | 395 | { |
80 | int n=n2/2,c1,c2; | 396 | int n=n2/2,c1,c2; |
397 | int tna=n+dna, tnb=n+dnb; | ||
81 | unsigned int neg,zero; | 398 | unsigned int neg,zero; |
82 | BN_ULONG ln,lo,*p; | 399 | BN_ULONG ln,lo,*p; |
83 | 400 | ||
84 | # ifdef BN_COUNT | 401 | # ifdef BN_COUNT |
85 | printf(" bn_mul_recursive %d * %d\n",n2,n2); | 402 | fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb); |
86 | # endif | 403 | # endif |
87 | # ifdef BN_MUL_COMBA | 404 | # ifdef BN_MUL_COMBA |
88 | # if 0 | 405 | # if 0 |
@@ -92,34 +409,40 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
92 | return; | 409 | return; |
93 | } | 410 | } |
94 | # endif | 411 | # endif |
95 | if (n2 == 8) | 412 | /* Only call bn_mul_comba 8 if n2 == 8 and the |
413 | * two arrays are complete [steve] | ||
414 | */ | ||
415 | if (n2 == 8 && dna == 0 && dnb == 0) | ||
96 | { | 416 | { |
97 | bn_mul_comba8(r,a,b); | 417 | bn_mul_comba8(r,a,b); |
98 | return; | 418 | return; |
99 | } | 419 | } |
100 | # endif /* BN_MUL_COMBA */ | 420 | # endif /* BN_MUL_COMBA */ |
421 | /* Else do normal multiply */ | ||
101 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) | 422 | if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) |
102 | { | 423 | { |
103 | /* This should not happen */ | 424 | bn_mul_normal(r,a,n2+dna,b,n2+dnb); |
104 | bn_mul_normal(r,a,n2,b,n2); | 425 | if ((dna + dnb) < 0) |
426 | memset(&r[2*n2 + dna + dnb], 0, | ||
427 | sizeof(BN_ULONG) * -(dna + dnb)); | ||
105 | return; | 428 | return; |
106 | } | 429 | } |
107 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | 430 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ |
108 | c1=bn_cmp_words(a,&(a[n]),n); | 431 | c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); |
109 | c2=bn_cmp_words(&(b[n]),b,n); | 432 | c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); |
110 | zero=neg=0; | 433 | zero=neg=0; |
111 | switch (c1*3+c2) | 434 | switch (c1*3+c2) |
112 | { | 435 | { |
113 | case -4: | 436 | case -4: |
114 | bn_sub_words(t, &(a[n]),a, n); /* - */ | 437 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ |
115 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | 438 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ |
116 | break; | 439 | break; |
117 | case -3: | 440 | case -3: |
118 | zero=1; | 441 | zero=1; |
119 | break; | 442 | break; |
120 | case -2: | 443 | case -2: |
121 | bn_sub_words(t, &(a[n]),a, n); /* - */ | 444 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ |
122 | bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ | 445 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ |
123 | neg=1; | 446 | neg=1; |
124 | break; | 447 | break; |
125 | case -1: | 448 | case -1: |
@@ -128,21 +451,22 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
128 | zero=1; | 451 | zero=1; |
129 | break; | 452 | break; |
130 | case 2: | 453 | case 2: |
131 | bn_sub_words(t, a, &(a[n]),n); /* + */ | 454 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ |
132 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | 455 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ |
133 | neg=1; | 456 | neg=1; |
134 | break; | 457 | break; |
135 | case 3: | 458 | case 3: |
136 | zero=1; | 459 | zero=1; |
137 | break; | 460 | break; |
138 | case 4: | 461 | case 4: |
139 | bn_sub_words(t, a, &(a[n]),n); | 462 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); |
140 | bn_sub_words(&(t[n]),&(b[n]),b, n); | 463 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); |
141 | break; | 464 | break; |
142 | } | 465 | } |
143 | 466 | ||
144 | # ifdef BN_MUL_COMBA | 467 | # ifdef BN_MUL_COMBA |
145 | if (n == 4) | 468 | if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take |
469 | extra args to do this well */ | ||
146 | { | 470 | { |
147 | if (!zero) | 471 | if (!zero) |
148 | bn_mul_comba4(&(t[n2]),t,&(t[n])); | 472 | bn_mul_comba4(&(t[n2]),t,&(t[n])); |
@@ -152,7 +476,9 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
152 | bn_mul_comba4(r,a,b); | 476 | bn_mul_comba4(r,a,b); |
153 | bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); | 477 | bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n])); |
154 | } | 478 | } |
155 | else if (n == 8) | 479 | else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could |
480 | take extra args to do this | ||
481 | well */ | ||
156 | { | 482 | { |
157 | if (!zero) | 483 | if (!zero) |
158 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | 484 | bn_mul_comba8(&(t[n2]),t,&(t[n])); |
@@ -167,11 +493,11 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
167 | { | 493 | { |
168 | p= &(t[n2*2]); | 494 | p= &(t[n2*2]); |
169 | if (!zero) | 495 | if (!zero) |
170 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); | 496 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); |
171 | else | 497 | else |
172 | memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); | 498 | memset(&(t[n2]),0,n2*sizeof(BN_ULONG)); |
173 | bn_mul_recursive(r,a,b,n,p); | 499 | bn_mul_recursive(r,a,b,n,0,0,p); |
174 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p); | 500 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p); |
175 | } | 501 | } |
176 | 502 | ||
177 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | 503 | /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign |
@@ -220,39 +546,40 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
220 | 546 | ||
221 | /* n+tn is the word length | 547 | /* n+tn is the word length |
222 | * t needs to be n*4 is size, as does r */ | 548 | * t needs to be n*4 is size, as does r */ |
223 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, | 549 | /* tnX may not be negative but less than n */ |
224 | int n, BN_ULONG *t) | 550 | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, |
551 | int tna, int tnb, BN_ULONG *t) | ||
225 | { | 552 | { |
226 | int i,j,n2=n*2; | 553 | int i,j,n2=n*2; |
227 | int c1,c2,neg,zero; | 554 | int c1,c2,neg,zero; |
228 | BN_ULONG ln,lo,*p; | 555 | BN_ULONG ln,lo,*p; |
229 | 556 | ||
230 | # ifdef BN_COUNT | 557 | # ifdef BN_COUNT |
231 | printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n); | 558 | fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n", |
559 | n, tna, n, tnb); | ||
232 | # endif | 560 | # endif |
233 | if (n < 8) | 561 | if (n < 8) |
234 | { | 562 | { |
235 | i=tn+n; | 563 | bn_mul_normal(r,a,n+tna,b,n+tnb); |
236 | bn_mul_normal(r,a,i,b,i); | ||
237 | return; | 564 | return; |
238 | } | 565 | } |
239 | 566 | ||
240 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ | 567 | /* r=(a[0]-a[1])*(b[1]-b[0]) */ |
241 | c1=bn_cmp_words(a,&(a[n]),n); | 568 | c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna); |
242 | c2=bn_cmp_words(&(b[n]),b,n); | 569 | c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n); |
243 | zero=neg=0; | 570 | zero=neg=0; |
244 | switch (c1*3+c2) | 571 | switch (c1*3+c2) |
245 | { | 572 | { |
246 | case -4: | 573 | case -4: |
247 | bn_sub_words(t, &(a[n]),a, n); /* - */ | 574 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ |
248 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | 575 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ |
249 | break; | 576 | break; |
250 | case -3: | 577 | case -3: |
251 | zero=1; | 578 | zero=1; |
252 | /* break; */ | 579 | /* break; */ |
253 | case -2: | 580 | case -2: |
254 | bn_sub_words(t, &(a[n]),a, n); /* - */ | 581 | bn_sub_part_words(t, &(a[n]),a, tna,tna-n); /* - */ |
255 | bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */ | 582 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); /* + */ |
256 | neg=1; | 583 | neg=1; |
257 | break; | 584 | break; |
258 | case -1: | 585 | case -1: |
@@ -261,16 +588,16 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, | |||
261 | zero=1; | 588 | zero=1; |
262 | /* break; */ | 589 | /* break; */ |
263 | case 2: | 590 | case 2: |
264 | bn_sub_words(t, a, &(a[n]),n); /* + */ | 591 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); /* + */ |
265 | bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */ | 592 | bn_sub_part_words(&(t[n]),b, &(b[n]),tnb,n-tnb); /* - */ |
266 | neg=1; | 593 | neg=1; |
267 | break; | 594 | break; |
268 | case 3: | 595 | case 3: |
269 | zero=1; | 596 | zero=1; |
270 | /* break; */ | 597 | /* break; */ |
271 | case 4: | 598 | case 4: |
272 | bn_sub_words(t, a, &(a[n]),n); | 599 | bn_sub_part_words(t, a, &(a[n]),tna,n-tna); |
273 | bn_sub_words(&(t[n]),&(b[n]),b, n); | 600 | bn_sub_part_words(&(t[n]),&(b[n]),b, tnb,tnb-n); |
274 | break; | 601 | break; |
275 | } | 602 | } |
276 | /* The zero case isn't yet implemented here. The speedup | 603 | /* The zero case isn't yet implemented here. The speedup |
@@ -289,54 +616,62 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int tn, | |||
289 | { | 616 | { |
290 | bn_mul_comba8(&(t[n2]),t,&(t[n])); | 617 | bn_mul_comba8(&(t[n2]),t,&(t[n])); |
291 | bn_mul_comba8(r,a,b); | 618 | bn_mul_comba8(r,a,b); |
292 | bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); | 619 | bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); |
293 | memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2)); | 620 | memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb)); |
294 | } | 621 | } |
295 | else | 622 | else |
296 | { | 623 | { |
297 | p= &(t[n2*2]); | 624 | p= &(t[n2*2]); |
298 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p); | 625 | bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p); |
299 | bn_mul_recursive(r,a,b,n,p); | 626 | bn_mul_recursive(r,a,b,n,0,0,p); |
300 | i=n/2; | 627 | i=n/2; |
301 | /* If there is only a bottom half to the number, | 628 | /* If there is only a bottom half to the number, |
302 | * just do it */ | 629 | * just do it */ |
303 | j=tn-i; | 630 | if (tna > tnb) |
631 | j = tna - i; | ||
632 | else | ||
633 | j = tnb - i; | ||
304 | if (j == 0) | 634 | if (j == 0) |
305 | { | 635 | { |
306 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p); | 636 | bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]), |
637 | i,tna-i,tnb-i,p); | ||
307 | memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); | 638 | memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2)); |
308 | } | 639 | } |
309 | else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ | 640 | else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */ |
310 | { | 641 | { |
311 | bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), | 642 | bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]), |
312 | j,i,p); | 643 | i,tna-i,tnb-i,p); |
313 | memset(&(r[n2+tn*2]),0, | 644 | memset(&(r[n2+tna+tnb]),0, |
314 | sizeof(BN_ULONG)*(n2-tn*2)); | 645 | sizeof(BN_ULONG)*(n2-tna-tnb)); |
315 | } | 646 | } |
316 | else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ | 647 | else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ |
317 | { | 648 | { |
318 | memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); | 649 | memset(&(r[n2]),0,sizeof(BN_ULONG)*n2); |
319 | if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL) | 650 | if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL |
651 | && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) | ||
320 | { | 652 | { |
321 | bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn); | 653 | bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb); |
322 | } | 654 | } |
323 | else | 655 | else |
324 | { | 656 | { |
325 | for (;;) | 657 | for (;;) |
326 | { | 658 | { |
327 | i/=2; | 659 | i/=2; |
328 | if (i < tn) | 660 | /* these simplified conditions work |
661 | * exclusively because difference | ||
662 | * between tna and tnb is 1 or 0 */ | ||
663 | if (i < tna || i < tnb) | ||
329 | { | 664 | { |
330 | bn_mul_part_recursive(&(r[n2]), | 665 | bn_mul_part_recursive(&(r[n2]), |
331 | &(a[n]),&(b[n]), | 666 | &(a[n]),&(b[n]), |
332 | tn-i,i,p); | 667 | i,tna-i,tnb-i,p); |
333 | break; | 668 | break; |
334 | } | 669 | } |
335 | else if (i == tn) | 670 | else if (i == tna || i == tnb) |
336 | { | 671 | { |
337 | bn_mul_recursive(&(r[n2]), | 672 | bn_mul_recursive(&(r[n2]), |
338 | &(a[n]),&(b[n]), | 673 | &(a[n]),&(b[n]), |
339 | i,p); | 674 | i,tna-i,tnb-i,p); |
340 | break; | 675 | break; |
341 | } | 676 | } |
342 | } | 677 | } |
@@ -397,10 +732,10 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | |||
397 | int n=n2/2; | 732 | int n=n2/2; |
398 | 733 | ||
399 | # ifdef BN_COUNT | 734 | # ifdef BN_COUNT |
400 | printf(" bn_mul_low_recursive %d * %d\n",n2,n2); | 735 | fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2); |
401 | # endif | 736 | # endif |
402 | 737 | ||
403 | bn_mul_recursive(r,a,b,n,&(t[0])); | 738 | bn_mul_recursive(r,a,b,n,0,0,&(t[0])); |
404 | if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) | 739 | if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) |
405 | { | 740 | { |
406 | bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); | 741 | bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2])); |
@@ -431,7 +766,7 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | |||
431 | BN_ULONG ll,lc,*lp,*mp; | 766 | BN_ULONG ll,lc,*lp,*mp; |
432 | 767 | ||
433 | # ifdef BN_COUNT | 768 | # ifdef BN_COUNT |
434 | printf(" bn_mul_high %d * %d\n",n2,n2); | 769 | fprintf(stderr," bn_mul_high %d * %d\n",n2,n2); |
435 | # endif | 770 | # endif |
436 | n=n2/2; | 771 | n=n2/2; |
437 | 772 | ||
@@ -484,8 +819,8 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | |||
484 | else | 819 | else |
485 | # endif | 820 | # endif |
486 | { | 821 | { |
487 | bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2])); | 822 | bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2])); |
488 | bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2])); | 823 | bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2])); |
489 | } | 824 | } |
490 | 825 | ||
491 | /* s0 == low(al*bl) | 826 | /* s0 == low(al*bl) |
@@ -610,19 +945,19 @@ void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, | |||
610 | 945 | ||
611 | int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | 946 | int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) |
612 | { | 947 | { |
948 | int ret=0; | ||
613 | int top,al,bl; | 949 | int top,al,bl; |
614 | BIGNUM *rr; | 950 | BIGNUM *rr; |
615 | int ret = 0; | ||
616 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 951 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
617 | int i; | 952 | int i; |
618 | #endif | 953 | #endif |
619 | #ifdef BN_RECURSION | 954 | #ifdef BN_RECURSION |
620 | BIGNUM *t; | 955 | BIGNUM *t=NULL; |
621 | int j,k; | 956 | int j=0,k; |
622 | #endif | 957 | #endif |
623 | 958 | ||
624 | #ifdef BN_COUNT | 959 | #ifdef BN_COUNT |
625 | printf("BN_mul %d * %d\n",a->top,b->top); | 960 | fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top); |
626 | #endif | 961 | #endif |
627 | 962 | ||
628 | bn_check_top(a); | 963 | bn_check_top(a); |
@@ -634,7 +969,7 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
634 | 969 | ||
635 | if ((al == 0) || (bl == 0)) | 970 | if ((al == 0) || (bl == 0)) |
636 | { | 971 | { |
637 | if (!BN_zero(r)) goto err; | 972 | BN_zero(r); |
638 | return(1); | 973 | return(1); |
639 | } | 974 | } |
640 | top=al+bl; | 975 | top=al+bl; |
@@ -675,21 +1010,55 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
675 | #ifdef BN_RECURSION | 1010 | #ifdef BN_RECURSION |
676 | if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) | 1011 | if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) |
677 | { | 1012 | { |
678 | if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA) && bl<b->dmax) | 1013 | if (i >= -1 && i <= 1) |
679 | { | 1014 | { |
680 | #if 0 /* tribute to const-ification, bl<b->dmax above covers for this */ | 1015 | int sav_j =0; |
681 | if (bn_wexpand(b,al) == NULL) goto err; | 1016 | /* Find out the power of two lower or equal |
682 | #endif | 1017 | to the longest of the two numbers */ |
683 | b->d[bl]=0; | 1018 | if (i >= 0) |
1019 | { | ||
1020 | j = BN_num_bits_word((BN_ULONG)al); | ||
1021 | } | ||
1022 | if (i == -1) | ||
1023 | { | ||
1024 | j = BN_num_bits_word((BN_ULONG)bl); | ||
1025 | } | ||
1026 | sav_j = j; | ||
1027 | j = 1<<(j-1); | ||
1028 | assert(j <= al || j <= bl); | ||
1029 | k = j+j; | ||
1030 | t = BN_CTX_get(ctx); | ||
1031 | if (al > j || bl > j) | ||
1032 | { | ||
1033 | bn_wexpand(t,k*4); | ||
1034 | bn_wexpand(rr,k*4); | ||
1035 | bn_mul_part_recursive(rr->d,a->d,b->d, | ||
1036 | j,al-j,bl-j,t->d); | ||
1037 | } | ||
1038 | else /* al <= j || bl <= j */ | ||
1039 | { | ||
1040 | bn_wexpand(t,k*2); | ||
1041 | bn_wexpand(rr,k*2); | ||
1042 | bn_mul_recursive(rr->d,a->d,b->d, | ||
1043 | j,al-j,bl-j,t->d); | ||
1044 | } | ||
1045 | rr->top=top; | ||
1046 | goto end; | ||
1047 | } | ||
1048 | #if 0 | ||
1049 | if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA)) | ||
1050 | { | ||
1051 | BIGNUM *tmp_bn = (BIGNUM *)b; | ||
1052 | if (bn_wexpand(tmp_bn,al) == NULL) goto err; | ||
1053 | tmp_bn->d[bl]=0; | ||
684 | bl++; | 1054 | bl++; |
685 | i--; | 1055 | i--; |
686 | } | 1056 | } |
687 | else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA) && al<a->dmax) | 1057 | else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA)) |
688 | { | 1058 | { |
689 | #if 0 /* tribute to const-ification, al<a->dmax above covers for this */ | 1059 | BIGNUM *tmp_bn = (BIGNUM *)a; |
690 | if (bn_wexpand(a,bl) == NULL) goto err; | 1060 | if (bn_wexpand(tmp_bn,bl) == NULL) goto err; |
691 | #endif | 1061 | tmp_bn->d[al]=0; |
692 | a->d[al]=0; | ||
693 | al++; | 1062 | al++; |
694 | i++; | 1063 | i++; |
695 | } | 1064 | } |
@@ -706,26 +1075,17 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
706 | if (bn_wexpand(t,k*2) == NULL) goto err; | 1075 | if (bn_wexpand(t,k*2) == NULL) goto err; |
707 | if (bn_wexpand(rr,k*2) == NULL) goto err; | 1076 | if (bn_wexpand(rr,k*2) == NULL) goto err; |
708 | bn_mul_recursive(rr->d,a->d,b->d,al,t->d); | 1077 | bn_mul_recursive(rr->d,a->d,b->d,al,t->d); |
709 | rr->top=top; | ||
710 | goto end; | ||
711 | } | 1078 | } |
712 | #if 0 /* tribute to const-ification, rsa/dsa performance is not affected */ | ||
713 | else | 1079 | else |
714 | { | 1080 | { |
715 | if (bn_wexpand(a,k) == NULL ) goto err; | 1081 | if (bn_wexpand(t,k*4) == NULL) goto err; |
716 | if (bn_wexpand(b,k) == NULL ) goto err; | 1082 | if (bn_wexpand(rr,k*4) == NULL) goto err; |
717 | if (bn_wexpand(t,k*4) == NULL ) goto err; | ||
718 | if (bn_wexpand(rr,k*4) == NULL ) goto err; | ||
719 | for (i=a->top; i<k; i++) | ||
720 | a->d[i]=0; | ||
721 | for (i=b->top; i<k; i++) | ||
722 | b->d[i]=0; | ||
723 | bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); | 1083 | bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d); |
724 | } | 1084 | } |
725 | rr->top=top; | 1085 | rr->top=top; |
726 | goto end; | 1086 | goto end; |
727 | #endif | ||
728 | } | 1087 | } |
1088 | #endif | ||
729 | } | 1089 | } |
730 | #endif /* BN_RECURSION */ | 1090 | #endif /* BN_RECURSION */ |
731 | if (bn_wexpand(rr,top) == NULL) goto err; | 1091 | if (bn_wexpand(rr,top) == NULL) goto err; |
@@ -735,10 +1095,11 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | |||
735 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 1095 | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) |
736 | end: | 1096 | end: |
737 | #endif | 1097 | #endif |
738 | bn_fix_top(rr); | 1098 | bn_correct_top(rr); |
739 | if (r != rr) BN_copy(r,rr); | 1099 | if (r != rr) BN_copy(r,rr); |
740 | ret=1; | 1100 | ret=1; |
741 | err: | 1101 | err: |
1102 | bn_check_top(r); | ||
742 | BN_CTX_end(ctx); | 1103 | BN_CTX_end(ctx); |
743 | return(ret); | 1104 | return(ret); |
744 | } | 1105 | } |
@@ -748,7 +1109,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | |||
748 | BN_ULONG *rr; | 1109 | BN_ULONG *rr; |
749 | 1110 | ||
750 | #ifdef BN_COUNT | 1111 | #ifdef BN_COUNT |
751 | printf(" bn_mul_normal %d * %d\n",na,nb); | 1112 | fprintf(stderr," bn_mul_normal %d * %d\n",na,nb); |
752 | #endif | 1113 | #endif |
753 | 1114 | ||
754 | if (na < nb) | 1115 | if (na < nb) |
@@ -761,7 +1122,13 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | |||
761 | 1122 | ||
762 | } | 1123 | } |
763 | rr= &(r[na]); | 1124 | rr= &(r[na]); |
764 | rr[0]=bn_mul_words(r,a,na,b[0]); | 1125 | if (nb <= 0) |
1126 | { | ||
1127 | (void)bn_mul_words(r,a,na,0); | ||
1128 | return; | ||
1129 | } | ||
1130 | else | ||
1131 | rr[0]=bn_mul_words(r,a,na,b[0]); | ||
765 | 1132 | ||
766 | for (;;) | 1133 | for (;;) |
767 | { | 1134 | { |
@@ -782,7 +1149,7 @@ void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | |||
782 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 1149 | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) |
783 | { | 1150 | { |
784 | #ifdef BN_COUNT | 1151 | #ifdef BN_COUNT |
785 | printf(" bn_mul_low_normal %d * %d\n",n,n); | 1152 | fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n); |
786 | #endif | 1153 | #endif |
787 | bn_mul_words(r,a,n,b[0]); | 1154 | bn_mul_words(r,a,n,b[0]); |
788 | 1155 | ||