diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 1098 |
1 files changed, 0 insertions, 1098 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c deleted file mode 100644 index 49f0ba5d7b..0000000000 --- a/src/lib/libcrypto/bn/bn_asm.c +++ /dev/null | |||
@@ -1,1098 +0,0 @@ | |||
1 | /* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | |||
59 | #ifndef BN_DEBUG | ||
60 | # undef NDEBUG /* avoid conflicting definitions */ | ||
61 | # define NDEBUG | ||
62 | #endif | ||
63 | |||
64 | #include <assert.h> | ||
65 | #include <stdio.h> | ||
66 | |||
67 | #include <openssl/opensslconf.h> | ||
68 | |||
69 | #include "bn_lcl.h" | ||
70 | |||
71 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) | ||
72 | |||
73 | BN_ULONG | ||
74 | bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | ||
75 | { | ||
76 | BN_ULONG c1 = 0; | ||
77 | |||
78 | assert(num >= 0); | ||
79 | if (num <= 0) | ||
80 | return (c1); | ||
81 | |||
82 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
83 | while (num & ~3) { | ||
84 | mul_add(rp[0], ap[0], w, c1); | ||
85 | mul_add(rp[1], ap[1], w, c1); | ||
86 | mul_add(rp[2], ap[2], w, c1); | ||
87 | mul_add(rp[3], ap[3], w, c1); | ||
88 | ap += 4; | ||
89 | rp += 4; | ||
90 | num -= 4; | ||
91 | } | ||
92 | #endif | ||
93 | while (num) { | ||
94 | mul_add(rp[0], ap[0], w, c1); | ||
95 | ap++; | ||
96 | rp++; | ||
97 | num--; | ||
98 | } | ||
99 | |||
100 | return (c1); | ||
101 | } | ||
102 | |||
103 | BN_ULONG | ||
104 | bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | ||
105 | { | ||
106 | BN_ULONG c1 = 0; | ||
107 | |||
108 | assert(num >= 0); | ||
109 | if (num <= 0) | ||
110 | return (c1); | ||
111 | |||
112 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
113 | while (num & ~3) { | ||
114 | mul(rp[0], ap[0], w, c1); | ||
115 | mul(rp[1], ap[1], w, c1); | ||
116 | mul(rp[2], ap[2], w, c1); | ||
117 | mul(rp[3], ap[3], w, c1); | ||
118 | ap += 4; | ||
119 | rp += 4; | ||
120 | num -= 4; | ||
121 | } | ||
122 | #endif | ||
123 | while (num) { | ||
124 | mul(rp[0], ap[0], w, c1); | ||
125 | ap++; | ||
126 | rp++; | ||
127 | num--; | ||
128 | } | ||
129 | return (c1); | ||
130 | } | ||
131 | |||
132 | void | ||
133 | bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) | ||
134 | { | ||
135 | assert(n >= 0); | ||
136 | if (n <= 0) | ||
137 | return; | ||
138 | |||
139 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
140 | while (n & ~3) { | ||
141 | sqr(r[0], r[1], a[0]); | ||
142 | sqr(r[2], r[3], a[1]); | ||
143 | sqr(r[4], r[5], a[2]); | ||
144 | sqr(r[6], r[7], a[3]); | ||
145 | a += 4; | ||
146 | r += 8; | ||
147 | n -= 4; | ||
148 | } | ||
149 | #endif | ||
150 | while (n) { | ||
151 | sqr(r[0], r[1], a[0]); | ||
152 | a++; | ||
153 | r += 2; | ||
154 | n--; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | ||
159 | |||
160 | BN_ULONG | ||
161 | bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | ||
162 | { | ||
163 | BN_ULONG c = 0; | ||
164 | BN_ULONG bl, bh; | ||
165 | |||
166 | assert(num >= 0); | ||
167 | if (num <= 0) | ||
168 | return ((BN_ULONG)0); | ||
169 | |||
170 | bl = LBITS(w); | ||
171 | bh = HBITS(w); | ||
172 | |||
173 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
174 | while (num & ~3) { | ||
175 | mul_add(rp[0], ap[0], bl, bh, c); | ||
176 | mul_add(rp[1], ap[1], bl, bh, c); | ||
177 | mul_add(rp[2], ap[2], bl, bh, c); | ||
178 | mul_add(rp[3], ap[3], bl, bh, c); | ||
179 | ap += 4; | ||
180 | rp += 4; | ||
181 | num -= 4; | ||
182 | } | ||
183 | #endif | ||
184 | while (num) { | ||
185 | mul_add(rp[0], ap[0], bl, bh, c); | ||
186 | ap++; | ||
187 | rp++; | ||
188 | num--; | ||
189 | } | ||
190 | return (c); | ||
191 | } | ||
192 | |||
193 | BN_ULONG | ||
194 | bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | ||
195 | { | ||
196 | BN_ULONG carry = 0; | ||
197 | BN_ULONG bl, bh; | ||
198 | |||
199 | assert(num >= 0); | ||
200 | if (num <= 0) | ||
201 | return ((BN_ULONG)0); | ||
202 | |||
203 | bl = LBITS(w); | ||
204 | bh = HBITS(w); | ||
205 | |||
206 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
207 | while (num & ~3) { | ||
208 | mul(rp[0], ap[0], bl, bh, carry); | ||
209 | mul(rp[1], ap[1], bl, bh, carry); | ||
210 | mul(rp[2], ap[2], bl, bh, carry); | ||
211 | mul(rp[3], ap[3], bl, bh, carry); | ||
212 | ap += 4; | ||
213 | rp += 4; | ||
214 | num -= 4; | ||
215 | } | ||
216 | #endif | ||
217 | while (num) { | ||
218 | mul(rp[0], ap[0], bl, bh, carry); | ||
219 | ap++; | ||
220 | rp++; | ||
221 | num--; | ||
222 | } | ||
223 | return (carry); | ||
224 | } | ||
225 | |||
226 | void | ||
227 | bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) | ||
228 | { | ||
229 | assert(n >= 0); | ||
230 | if (n <= 0) | ||
231 | return; | ||
232 | |||
233 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
234 | while (n & ~3) { | ||
235 | sqr64(r[0], r[1], a[0]); | ||
236 | sqr64(r[2], r[3], a[1]); | ||
237 | sqr64(r[4], r[5], a[2]); | ||
238 | sqr64(r[6], r[7], a[3]); | ||
239 | a += 4; | ||
240 | r += 8; | ||
241 | n -= 4; | ||
242 | } | ||
243 | #endif | ||
244 | while (n) { | ||
245 | sqr64(r[0], r[1], a[0]); | ||
246 | a++; | ||
247 | r += 2; | ||
248 | n--; | ||
249 | } | ||
250 | } | ||
251 | |||
252 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | ||
253 | |||
254 | #if defined(BN_LLONG) && defined(BN_DIV2W) | ||
255 | |||
256 | BN_ULONG | ||
257 | bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | ||
258 | { | ||
259 | return ((BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2)|l)/(BN_ULLONG)d)); | ||
260 | } | ||
261 | |||
262 | #else | ||
263 | |||
264 | /* Divide h,l by d and return the result. */ | ||
265 | /* I need to test this some more :-( */ | ||
266 | BN_ULONG | ||
267 | bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) | ||
268 | { | ||
269 | BN_ULONG dh, dl, q,ret = 0, th, tl, t; | ||
270 | int i, count = 2; | ||
271 | |||
272 | if (d == 0) | ||
273 | return (BN_MASK2); | ||
274 | |||
275 | i = BN_num_bits_word(d); | ||
276 | assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); | ||
277 | |||
278 | i = BN_BITS2 - i; | ||
279 | if (h >= d) | ||
280 | h -= d; | ||
281 | |||
282 | if (i) { | ||
283 | d <<= i; | ||
284 | h = (h << i) | (l >> (BN_BITS2 - i)); | ||
285 | l <<= i; | ||
286 | } | ||
287 | dh = (d & BN_MASK2h) >> BN_BITS4; | ||
288 | dl = (d & BN_MASK2l); | ||
289 | for (;;) { | ||
290 | if ((h >> BN_BITS4) == dh) | ||
291 | q = BN_MASK2l; | ||
292 | else | ||
293 | q = h / dh; | ||
294 | |||
295 | th = q * dh; | ||
296 | tl = dl * q; | ||
297 | for (;;) { | ||
298 | t = h - th; | ||
299 | if ((t & BN_MASK2h) || | ||
300 | ((tl) <= ( | ||
301 | (t << BN_BITS4) | | ||
302 | ((l & BN_MASK2h) >> BN_BITS4)))) | ||
303 | break; | ||
304 | q--; | ||
305 | th -= dh; | ||
306 | tl -= dl; | ||
307 | } | ||
308 | t = (tl >> BN_BITS4); | ||
309 | tl = (tl << BN_BITS4) & BN_MASK2h; | ||
310 | th += t; | ||
311 | |||
312 | if (l < tl) | ||
313 | th++; | ||
314 | l -= tl; | ||
315 | if (h < th) { | ||
316 | h += d; | ||
317 | q--; | ||
318 | } | ||
319 | h -= th; | ||
320 | |||
321 | if (--count == 0) | ||
322 | break; | ||
323 | |||
324 | ret = q << BN_BITS4; | ||
325 | h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2; | ||
326 | l = (l & BN_MASK2l) << BN_BITS4; | ||
327 | } | ||
328 | ret |= q; | ||
329 | return (ret); | ||
330 | } | ||
331 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ | ||
332 | |||
333 | #ifdef BN_LLONG | ||
334 | BN_ULONG | ||
335 | bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
336 | { | ||
337 | BN_ULLONG ll = 0; | ||
338 | |||
339 | assert(n >= 0); | ||
340 | if (n <= 0) | ||
341 | return ((BN_ULONG)0); | ||
342 | |||
343 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
344 | while (n & ~3) { | ||
345 | ll += (BN_ULLONG)a[0] + b[0]; | ||
346 | r[0] = (BN_ULONG)ll & BN_MASK2; | ||
347 | ll >>= BN_BITS2; | ||
348 | ll += (BN_ULLONG)a[1] + b[1]; | ||
349 | r[1] = (BN_ULONG)ll & BN_MASK2; | ||
350 | ll >>= BN_BITS2; | ||
351 | ll += (BN_ULLONG)a[2] + b[2]; | ||
352 | r[2] = (BN_ULONG)ll & BN_MASK2; | ||
353 | ll >>= BN_BITS2; | ||
354 | ll += (BN_ULLONG)a[3] + b[3]; | ||
355 | r[3] = (BN_ULONG)ll & BN_MASK2; | ||
356 | ll >>= BN_BITS2; | ||
357 | a += 4; | ||
358 | b += 4; | ||
359 | r += 4; | ||
360 | n -= 4; | ||
361 | } | ||
362 | #endif | ||
363 | while (n) { | ||
364 | ll += (BN_ULLONG)a[0] + b[0]; | ||
365 | r[0] = (BN_ULONG)ll & BN_MASK2; | ||
366 | ll >>= BN_BITS2; | ||
367 | a++; | ||
368 | b++; | ||
369 | r++; | ||
370 | n--; | ||
371 | } | ||
372 | return ((BN_ULONG)ll); | ||
373 | } | ||
374 | #else /* !BN_LLONG */ | ||
375 | BN_ULONG | ||
376 | bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
377 | { | ||
378 | BN_ULONG c, l, t; | ||
379 | |||
380 | assert(n >= 0); | ||
381 | if (n <= 0) | ||
382 | return ((BN_ULONG)0); | ||
383 | |||
384 | c = 0; | ||
385 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
386 | while (n & ~3) { | ||
387 | t = a[0]; | ||
388 | t = (t + c) & BN_MASK2; | ||
389 | c = (t < c); | ||
390 | l = (t + b[0]) & BN_MASK2; | ||
391 | c += (l < t); | ||
392 | r[0] = l; | ||
393 | t = a[1]; | ||
394 | t = (t + c) & BN_MASK2; | ||
395 | c = (t < c); | ||
396 | l = (t + b[1]) & BN_MASK2; | ||
397 | c += (l < t); | ||
398 | r[1] = l; | ||
399 | t = a[2]; | ||
400 | t = (t + c) & BN_MASK2; | ||
401 | c = (t < c); | ||
402 | l = (t + b[2]) & BN_MASK2; | ||
403 | c += (l < t); | ||
404 | r[2] = l; | ||
405 | t = a[3]; | ||
406 | t = (t + c) & BN_MASK2; | ||
407 | c = (t < c); | ||
408 | l = (t + b[3]) & BN_MASK2; | ||
409 | c += (l < t); | ||
410 | r[3] = l; | ||
411 | a += 4; | ||
412 | b += 4; | ||
413 | r += 4; | ||
414 | n -= 4; | ||
415 | } | ||
416 | #endif | ||
417 | while (n) { | ||
418 | t = a[0]; | ||
419 | t = (t + c) & BN_MASK2; | ||
420 | c = (t < c); | ||
421 | l = (t + b[0]) & BN_MASK2; | ||
422 | c += (l < t); | ||
423 | r[0] = l; | ||
424 | a++; | ||
425 | b++; | ||
426 | r++; | ||
427 | n--; | ||
428 | } | ||
429 | return ((BN_ULONG)c); | ||
430 | } | ||
431 | #endif /* !BN_LLONG */ | ||
432 | |||
433 | BN_ULONG | ||
434 | bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | ||
435 | { | ||
436 | BN_ULONG t1, t2; | ||
437 | int c = 0; | ||
438 | |||
439 | assert(n >= 0); | ||
440 | if (n <= 0) | ||
441 | return ((BN_ULONG)0); | ||
442 | |||
443 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
444 | while (n&~3) { | ||
445 | t1 = a[0]; | ||
446 | t2 = b[0]; | ||
447 | r[0] = (t1 - t2 - c) & BN_MASK2; | ||
448 | if (t1 != t2) | ||
449 | c = (t1 < t2); | ||
450 | t1 = a[1]; | ||
451 | t2 = b[1]; | ||
452 | r[1] = (t1 - t2 - c) & BN_MASK2; | ||
453 | if (t1 != t2) | ||
454 | c = (t1 < t2); | ||
455 | t1 = a[2]; | ||
456 | t2 = b[2]; | ||
457 | r[2] = (t1 - t2 - c) & BN_MASK2; | ||
458 | if (t1 != t2) | ||
459 | c = (t1 < t2); | ||
460 | t1 = a[3]; | ||
461 | t2 = b[3]; | ||
462 | r[3] = (t1 - t2 - c) & BN_MASK2; | ||
463 | if (t1 != t2) | ||
464 | c = (t1 < t2); | ||
465 | a += 4; | ||
466 | b += 4; | ||
467 | r += 4; | ||
468 | n -= 4; | ||
469 | } | ||
470 | #endif | ||
471 | while (n) { | ||
472 | t1 = a[0]; | ||
473 | t2 = b[0]; | ||
474 | r[0] = (t1 - t2 - c) & BN_MASK2; | ||
475 | if (t1 != t2) | ||
476 | c = (t1 < t2); | ||
477 | a++; | ||
478 | b++; | ||
479 | r++; | ||
480 | n--; | ||
481 | } | ||
482 | return (c); | ||
483 | } | ||
484 | |||
485 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) | ||
486 | |||
487 | #undef bn_mul_comba8 | ||
488 | #undef bn_mul_comba4 | ||
489 | #undef bn_sqr_comba8 | ||
490 | #undef bn_sqr_comba4 | ||
491 | |||
492 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ | ||
493 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | ||
494 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
496 | |||
497 | #ifdef BN_LLONG | ||
498 | /* | ||
499 | * Keep in mind that additions to multiplication result can not | ||
500 | * overflow, because its high half cannot be all-ones. | ||
501 | */ | ||
502 | #define mul_add_c(a,b,c0,c1,c2) do { \ | ||
503 | BN_ULONG hi; \ | ||
504 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | ||
505 | t += c0; /* no carry */ \ | ||
506 | c0 = (BN_ULONG)Lw(t); \ | ||
507 | hi = (BN_ULONG)Hw(t); \ | ||
508 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
509 | } while(0) | ||
510 | |||
511 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
512 | BN_ULONG hi; \ | ||
513 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | ||
514 | BN_ULLONG tt = t+c0; /* no carry */ \ | ||
515 | c0 = (BN_ULONG)Lw(tt); \ | ||
516 | hi = (BN_ULONG)Hw(tt); \ | ||
517 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
518 | t += c0; /* no carry */ \ | ||
519 | c0 = (BN_ULONG)Lw(t); \ | ||
520 | hi = (BN_ULONG)Hw(t); \ | ||
521 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
522 | } while(0) | ||
523 | |||
524 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
525 | BN_ULONG hi; \ | ||
526 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
527 | t += c0; /* no carry */ \ | ||
528 | c0 = (BN_ULONG)Lw(t); \ | ||
529 | hi = (BN_ULONG)Hw(t); \ | ||
530 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
531 | } while(0) | ||
532 | |||
533 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
534 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
535 | |||
536 | #elif defined(BN_UMULT_LOHI) | ||
537 | /* | ||
538 | * Keep in mind that additions to hi can not overflow, because | ||
539 | * the high word of a multiplication result cannot be all-ones. | ||
540 | */ | ||
541 | #define mul_add_c(a,b,c0,c1,c2) do { \ | ||
542 | BN_ULONG ta = (a), tb = (b); \ | ||
543 | BN_ULONG lo, hi; \ | ||
544 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | ||
545 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
546 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
547 | } while(0) | ||
548 | |||
549 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
550 | BN_ULONG ta = (a), tb = (b); \ | ||
551 | BN_ULONG lo, hi, tt; \ | ||
552 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | ||
553 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | ||
554 | c1 += tt; c2 += (c1<tt)?1:0; \ | ||
555 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
556 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
557 | } while(0) | ||
558 | |||
559 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
560 | BN_ULONG ta = (a)[i]; \ | ||
561 | BN_ULONG lo, hi; \ | ||
562 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
563 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
564 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
565 | } while(0) | ||
566 | |||
567 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
568 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
569 | |||
570 | #elif defined(BN_UMULT_HIGH) | ||
571 | /* | ||
572 | * Keep in mind that additions to hi can not overflow, because | ||
573 | * the high word of a multiplication result cannot be all-ones. | ||
574 | */ | ||
575 | #define mul_add_c(a,b,c0,c1,c2) do { \ | ||
576 | BN_ULONG ta = (a), tb = (b); \ | ||
577 | BN_ULONG lo = ta * tb; \ | ||
578 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ | ||
579 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
580 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
581 | } while(0) | ||
582 | |||
583 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
584 | BN_ULONG ta = (a), tb = (b), tt; \ | ||
585 | BN_ULONG lo = ta * tb; \ | ||
586 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ | ||
587 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ | ||
588 | c1 += tt; c2 += (c1<tt)?1:0; \ | ||
589 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
590 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
591 | } while(0) | ||
592 | |||
593 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
594 | BN_ULONG ta = (a)[i]; \ | ||
595 | BN_ULONG lo = ta * ta; \ | ||
596 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ | ||
597 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
598 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
599 | } while(0) | ||
600 | |||
601 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
602 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
603 | |||
604 | #else /* !BN_LLONG */ | ||
605 | /* | ||
606 | * Keep in mind that additions to hi can not overflow, because | ||
607 | * the high word of a multiplication result cannot be all-ones. | ||
608 | */ | ||
609 | #define mul_add_c(a,b,c0,c1,c2) do { \ | ||
610 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ | ||
611 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | ||
612 | mul64(lo,hi,bl,bh); \ | ||
613 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
614 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
615 | } while(0) | ||
616 | |||
617 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
618 | BN_ULONG tt; \ | ||
619 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ | ||
620 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | ||
621 | mul64(lo,hi,bl,bh); \ | ||
622 | tt = hi; \ | ||
623 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | ||
624 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ | ||
625 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
626 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
627 | } while(0) | ||
628 | |||
629 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
630 | BN_ULONG lo, hi; \ | ||
631 | sqr64(lo,hi,(a)[i]); \ | ||
632 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
633 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
634 | } while(0) | ||
635 | |||
636 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
637 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
638 | #endif /* !BN_LLONG */ | ||
639 | |||
640 | void | ||
641 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
642 | { | ||
643 | BN_ULONG c1, c2, c3; | ||
644 | |||
645 | c1 = 0; | ||
646 | c2 = 0; | ||
647 | c3 = 0; | ||
648 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
649 | r[0] = c1; | ||
650 | c1 = 0; | ||
651 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
652 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
653 | r[1] = c2; | ||
654 | c2 = 0; | ||
655 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
656 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
657 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
658 | r[2] = c3; | ||
659 | c3 = 0; | ||
660 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
661 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
662 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
663 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
664 | r[3] = c1; | ||
665 | c1 = 0; | ||
666 | mul_add_c(a[4], b[0], c2, c3, c1); | ||
667 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
668 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
669 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
670 | mul_add_c(a[0], b[4], c2, c3, c1); | ||
671 | r[4] = c2; | ||
672 | c2 = 0; | ||
673 | mul_add_c(a[0], b[5], c3, c1, c2); | ||
674 | mul_add_c(a[1], b[4], c3, c1, c2); | ||
675 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
676 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
677 | mul_add_c(a[4], b[1], c3, c1, c2); | ||
678 | mul_add_c(a[5], b[0], c3, c1, c2); | ||
679 | r[5] = c3; | ||
680 | c3 = 0; | ||
681 | mul_add_c(a[6], b[0], c1, c2, c3); | ||
682 | mul_add_c(a[5], b[1], c1, c2, c3); | ||
683 | mul_add_c(a[4], b[2], c1, c2, c3); | ||
684 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
685 | mul_add_c(a[2], b[4], c1, c2, c3); | ||
686 | mul_add_c(a[1], b[5], c1, c2, c3); | ||
687 | mul_add_c(a[0], b[6], c1, c2, c3); | ||
688 | r[6] = c1; | ||
689 | c1 = 0; | ||
690 | mul_add_c(a[0], b[7], c2, c3, c1); | ||
691 | mul_add_c(a[1], b[6], c2, c3, c1); | ||
692 | mul_add_c(a[2], b[5], c2, c3, c1); | ||
693 | mul_add_c(a[3], b[4], c2, c3, c1); | ||
694 | mul_add_c(a[4], b[3], c2, c3, c1); | ||
695 | mul_add_c(a[5], b[2], c2, c3, c1); | ||
696 | mul_add_c(a[6], b[1], c2, c3, c1); | ||
697 | mul_add_c(a[7], b[0], c2, c3, c1); | ||
698 | r[7] = c2; | ||
699 | c2 = 0; | ||
700 | mul_add_c(a[7], b[1], c3, c1, c2); | ||
701 | mul_add_c(a[6], b[2], c3, c1, c2); | ||
702 | mul_add_c(a[5], b[3], c3, c1, c2); | ||
703 | mul_add_c(a[4], b[4], c3, c1, c2); | ||
704 | mul_add_c(a[3], b[5], c3, c1, c2); | ||
705 | mul_add_c(a[2], b[6], c3, c1, c2); | ||
706 | mul_add_c(a[1], b[7], c3, c1, c2); | ||
707 | r[8] = c3; | ||
708 | c3 = 0; | ||
709 | mul_add_c(a[2], b[7], c1, c2, c3); | ||
710 | mul_add_c(a[3], b[6], c1, c2, c3); | ||
711 | mul_add_c(a[4], b[5], c1, c2, c3); | ||
712 | mul_add_c(a[5], b[4], c1, c2, c3); | ||
713 | mul_add_c(a[6], b[3], c1, c2, c3); | ||
714 | mul_add_c(a[7], b[2], c1, c2, c3); | ||
715 | r[9] = c1; | ||
716 | c1 = 0; | ||
717 | mul_add_c(a[7], b[3], c2, c3, c1); | ||
718 | mul_add_c(a[6], b[4], c2, c3, c1); | ||
719 | mul_add_c(a[5], b[5], c2, c3, c1); | ||
720 | mul_add_c(a[4], b[6], c2, c3, c1); | ||
721 | mul_add_c(a[3], b[7], c2, c3, c1); | ||
722 | r[10] = c2; | ||
723 | c2 = 0; | ||
724 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
725 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
726 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
727 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
728 | r[11] = c3; | ||
729 | c3 = 0; | ||
730 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
731 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
732 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
733 | r[12] = c1; | ||
734 | c1 = 0; | ||
735 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
736 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
737 | r[13] = c2; | ||
738 | c2 = 0; | ||
739 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
740 | r[14] = c3; | ||
741 | r[15] = c1; | ||
742 | } | ||
743 | |||
744 | void | ||
745 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
746 | { | ||
747 | BN_ULONG c1, c2, c3; | ||
748 | |||
749 | c1 = 0; | ||
750 | c2 = 0; | ||
751 | c3 = 0; | ||
752 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
753 | r[0] = c1; | ||
754 | c1 = 0; | ||
755 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
756 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
757 | r[1] = c2; | ||
758 | c2 = 0; | ||
759 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
760 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
761 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
762 | r[2] = c3; | ||
763 | c3 = 0; | ||
764 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
765 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
766 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
767 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
768 | r[3] = c1; | ||
769 | c1 = 0; | ||
770 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
771 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
772 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
773 | r[4] = c2; | ||
774 | c2 = 0; | ||
775 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
776 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
777 | r[5] = c3; | ||
778 | c3 = 0; | ||
779 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
780 | r[6] = c1; | ||
781 | r[7] = c2; | ||
782 | } | ||
783 | |||
784 | void | ||
785 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
786 | { | ||
787 | BN_ULONG c1, c2, c3; | ||
788 | |||
789 | c1 = 0; | ||
790 | c2 = 0; | ||
791 | c3 = 0; | ||
792 | sqr_add_c(a, 0, c1, c2, c3); | ||
793 | r[0] = c1; | ||
794 | c1 = 0; | ||
795 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
796 | r[1] = c2; | ||
797 | c2 = 0; | ||
798 | sqr_add_c(a, 1, c3, c1, c2); | ||
799 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
800 | r[2] = c3; | ||
801 | c3 = 0; | ||
802 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
803 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
804 | r[3] = c1; | ||
805 | c1 = 0; | ||
806 | sqr_add_c(a, 2, c2, c3, c1); | ||
807 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
808 | sqr_add_c2(a, 4, 0, c2, c3, c1); | ||
809 | r[4] = c2; | ||
810 | c2 = 0; | ||
811 | sqr_add_c2(a, 5, 0, c3, c1, c2); | ||
812 | sqr_add_c2(a, 4, 1, c3, c1, c2); | ||
813 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
814 | r[5] = c3; | ||
815 | c3 = 0; | ||
816 | sqr_add_c(a, 3, c1, c2, c3); | ||
817 | sqr_add_c2(a, 4, 2, c1, c2, c3); | ||
818 | sqr_add_c2(a, 5, 1, c1, c2, c3); | ||
819 | sqr_add_c2(a, 6, 0, c1, c2, c3); | ||
820 | r[6] = c1; | ||
821 | c1 = 0; | ||
822 | sqr_add_c2(a, 7, 0, c2, c3, c1); | ||
823 | sqr_add_c2(a, 6, 1, c2, c3, c1); | ||
824 | sqr_add_c2(a, 5, 2, c2, c3, c1); | ||
825 | sqr_add_c2(a, 4, 3, c2, c3, c1); | ||
826 | r[7] = c2; | ||
827 | c2 = 0; | ||
828 | sqr_add_c(a, 4, c3, c1, c2); | ||
829 | sqr_add_c2(a, 5, 3, c3, c1, c2); | ||
830 | sqr_add_c2(a, 6, 2, c3, c1, c2); | ||
831 | sqr_add_c2(a, 7, 1, c3, c1, c2); | ||
832 | r[8] = c3; | ||
833 | c3 = 0; | ||
834 | sqr_add_c2(a, 7, 2, c1, c2, c3); | ||
835 | sqr_add_c2(a, 6, 3, c1, c2, c3); | ||
836 | sqr_add_c2(a, 5, 4, c1, c2, c3); | ||
837 | r[9] = c1; | ||
838 | c1 = 0; | ||
839 | sqr_add_c(a, 5, c2, c3, c1); | ||
840 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
841 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
842 | r[10] = c2; | ||
843 | c2 = 0; | ||
844 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
845 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
846 | r[11] = c3; | ||
847 | c3 = 0; | ||
848 | sqr_add_c(a, 6, c1, c2, c3); | ||
849 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
850 | r[12] = c1; | ||
851 | c1 = 0; | ||
852 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
853 | r[13] = c2; | ||
854 | c2 = 0; | ||
855 | sqr_add_c(a, 7, c3, c1, c2); | ||
856 | r[14] = c3; | ||
857 | r[15] = c1; | ||
858 | } | ||
859 | |||
860 | void | ||
861 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
862 | { | ||
863 | BN_ULONG c1, c2, c3; | ||
864 | |||
865 | c1 = 0; | ||
866 | c2 = 0; | ||
867 | c3 = 0; | ||
868 | sqr_add_c(a, 0, c1, c2, c3); | ||
869 | r[0] = c1; | ||
870 | c1 = 0; | ||
871 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
872 | r[1] = c2; | ||
873 | c2 = 0; | ||
874 | sqr_add_c(a, 1, c3, c1, c2); | ||
875 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
876 | r[2] = c3; | ||
877 | c3 = 0; | ||
878 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
879 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
880 | r[3] = c1; | ||
881 | c1 = 0; | ||
882 | sqr_add_c(a, 2, c2, c3, c1); | ||
883 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
884 | r[4] = c2; | ||
885 | c2 = 0; | ||
886 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
887 | r[5] = c3; | ||
888 | c3 = 0; | ||
889 | sqr_add_c(a, 3, c1, c2, c3); | ||
890 | r[6] = c1; | ||
891 | r[7] = c2; | ||
892 | } | ||
893 | |||
894 | #ifdef OPENSSL_NO_ASM | ||
895 | #ifdef OPENSSL_BN_ASM_MONT | ||
896 | /* | ||
897 | * This is essentially reference implementation, which may or may not | ||
898 | * result in performance improvement. E.g. on IA-32 this routine was | ||
899 | * observed to give 40% faster rsa1024 private key operations and 10% | ||
900 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only | ||
901 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a | ||
902 | * reference implementation, one to be used as starting point for | ||
903 | * platform-specific assembler. Mentioned numbers apply to compiler | ||
904 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and | ||
905 | * can vary not only from platform to platform, but even for compiler | ||
906 | * versions. Assembler vs. assembler improvement coefficients can | ||
907 | * [and are known to] differ and are to be documented elsewhere. | ||
908 | */ | ||
909 | int | ||
910 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
911 | { | ||
912 | BN_ULONG c0, c1, ml, *tp, n0; | ||
913 | #ifdef mul64 | ||
914 | BN_ULONG mh; | ||
915 | #endif | ||
916 | int i = 0, j; | ||
917 | |||
918 | #if 0 /* template for platform-specific implementation */ | ||
919 | if (ap == bp) | ||
920 | return bn_sqr_mont(rp, ap, np, n0p, num); | ||
921 | #endif | ||
922 | tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG)); | ||
923 | if (tp == NULL) | ||
924 | return 0; | ||
925 | |||
926 | n0 = *n0p; | ||
927 | |||
928 | c0 = 0; | ||
929 | ml = bp[0]; | ||
930 | #ifdef mul64 | ||
931 | mh = HBITS(ml); | ||
932 | ml = LBITS(ml); | ||
933 | for (j = 0; j < num; ++j) | ||
934 | mul(tp[j], ap[j], ml, mh, c0); | ||
935 | #else | ||
936 | for (j = 0; j < num; ++j) | ||
937 | mul(tp[j], ap[j], ml, c0); | ||
938 | #endif | ||
939 | |||
940 | tp[num] = c0; | ||
941 | tp[num + 1] = 0; | ||
942 | goto enter; | ||
943 | |||
944 | for (i = 0; i < num; i++) { | ||
945 | c0 = 0; | ||
946 | ml = bp[i]; | ||
947 | #ifdef mul64 | ||
948 | mh = HBITS(ml); | ||
949 | ml = LBITS(ml); | ||
950 | for (j = 0; j < num; ++j) | ||
951 | mul_add(tp[j], ap[j], ml, mh, c0); | ||
952 | #else | ||
953 | for (j = 0; j < num; ++j) | ||
954 | mul_add(tp[j], ap[j], ml, c0); | ||
955 | #endif | ||
956 | c1 = (tp[num] + c0) & BN_MASK2; | ||
957 | tp[num] = c1; | ||
958 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
959 | enter: | ||
960 | c1 = tp[0]; | ||
961 | ml = (c1 * n0) & BN_MASK2; | ||
962 | c0 = 0; | ||
963 | #ifdef mul64 | ||
964 | mh = HBITS(ml); | ||
965 | ml = LBITS(ml); | ||
966 | mul_add(c1, np[0], ml, mh, c0); | ||
967 | #else | ||
968 | mul_add(c1, ml, np[0], c0); | ||
969 | #endif | ||
970 | for (j = 1; j < num; j++) { | ||
971 | c1 = tp[j]; | ||
972 | #ifdef mul64 | ||
973 | mul_add(c1, np[j], ml, mh, c0); | ||
974 | #else | ||
975 | mul_add(c1, ml, np[j], c0); | ||
976 | #endif | ||
977 | tp[j - 1] = c1 & BN_MASK2; | ||
978 | } | ||
979 | c1 = (tp[num] + c0) & BN_MASK2; | ||
980 | tp[num - 1] = c1; | ||
981 | tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0); | ||
982 | } | ||
983 | |||
984 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
985 | c0 = bn_sub_words(rp, tp, np, num); | ||
986 | if (tp[num] != 0 || c0 == 0) { | ||
987 | goto out; | ||
988 | } | ||
989 | } | ||
990 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
991 | out: | ||
992 | explicit_bzero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
993 | free(tp); | ||
994 | return 1; | ||
995 | } | ||
996 | #else | ||
997 | /* | ||
998 | * Return value of 0 indicates that multiplication/convolution was not | ||
999 | * performed to signal the caller to fall down to alternative/original | ||
1000 | * code-path. | ||
1001 | */ | ||
1002 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
1003 | { return 0; | ||
1004 | } | ||
1005 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
1006 | #endif | ||
1007 | |||
1008 | #else /* !BN_MUL_COMBA */ | ||
1009 | |||
1010 | /* hmm... is it faster just to do a multiply? */ | ||
1011 | #undef bn_sqr_comba4 | ||
1012 | void | ||
1013 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
1014 | { | ||
1015 | BN_ULONG t[8]; | ||
1016 | bn_sqr_normal(r, a, 4, t); | ||
1017 | } | ||
1018 | |||
1019 | #undef bn_sqr_comba8 | ||
1020 | void | ||
1021 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
1022 | { | ||
1023 | BN_ULONG t[16]; | ||
1024 | bn_sqr_normal(r, a, 8, t); | ||
1025 | } | ||
1026 | |||
1027 | void | ||
1028 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
1029 | { | ||
1030 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); | ||
1031 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); | ||
1032 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]); | ||
1033 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); | ||
1034 | } | ||
1035 | |||
1036 | void | ||
1037 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
1038 | { | ||
1039 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); | ||
1040 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); | ||
1041 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]); | ||
1042 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]); | ||
1043 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]); | ||
1044 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]); | ||
1045 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]); | ||
1046 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]); | ||
1047 | } | ||
1048 | |||
1049 | #ifdef OPENSSL_NO_ASM | ||
1050 | #ifdef OPENSSL_BN_ASM_MONT | ||
1051 | int | ||
1052 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
1053 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
1054 | { | ||
1055 | BN_ULONG c0, c1, *tp, n0 = *n0p; | ||
1056 | int i = 0, j; | ||
1057 | |||
1058 | tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); | ||
1059 | if (tp == NULL) | ||
1060 | return 0; | ||
1061 | |||
1062 | for (i = 0; i < num; i++) { | ||
1063 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); | ||
1064 | c1 = (tp[num] + c0) & BN_MASK2; | ||
1065 | tp[num] = c1; | ||
1066 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
1067 | |||
1068 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); | ||
1069 | c1 = (tp[num] + c0) & BN_MASK2; | ||
1070 | tp[num] = c1; | ||
1071 | tp[num + 1] += (c1 < c0 ? 1 : 0); | ||
1072 | for (j = 0; j <= num; j++) | ||
1073 | tp[j] = tp[j + 1]; | ||
1074 | } | ||
1075 | |||
1076 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
1077 | c0 = bn_sub_words(rp, tp, np, num); | ||
1078 | if (tp[num] != 0 || c0 == 0) { | ||
1079 | goto out; | ||
1080 | } | ||
1081 | } | ||
1082 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
1083 | out: | ||
1084 | explicit_bzero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
1085 | free(tp); | ||
1086 | return 1; | ||
1087 | } | ||
1088 | #else | ||
1089 | int | ||
1090 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
1091 | const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
1092 | { | ||
1093 | return 0; | ||
1094 | } | ||
1095 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
1096 | #endif | ||
1097 | |||
1098 | #endif /* !BN_MUL_COMBA */ | ||