diff options
Diffstat (limited to 'src/lib/libcrypto/bn/bn_exp.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_exp.c | 1330 |
1 files changed, 0 insertions, 1330 deletions
diff --git a/src/lib/libcrypto/bn/bn_exp.c b/src/lib/libcrypto/bn/bn_exp.c deleted file mode 100644 index e925d325d2..0000000000 --- a/src/lib/libcrypto/bn/bn_exp.c +++ /dev/null | |||
@@ -1,1330 +0,0 @@ | |||
1 | /* $OpenBSD: bn_exp.c,v 1.58 2025/02/13 11:15:09 tb Exp $ */ | ||
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * This package is an SSL implementation written | ||
6 | * by Eric Young (eay@cryptsoft.com). | ||
7 | * The implementation was written so as to conform with Netscapes SSL. | ||
8 | * | ||
9 | * This library is free for commercial and non-commercial use as long as | ||
10 | * the following conditions are aheared to. The following conditions | ||
11 | * apply to all code found in this distribution, be it the RC4, RSA, | ||
12 | * lhash, DES, etc., code; not just the SSL code. The SSL documentation | ||
13 | * included with this distribution is covered by the same copyright terms | ||
14 | * except that the holder is Tim Hudson (tjh@cryptsoft.com). | ||
15 | * | ||
16 | * Copyright remains Eric Young's, and as such any Copyright notices in | ||
17 | * the code are not to be removed. | ||
18 | * If this package is used in a product, Eric Young should be given attribution | ||
19 | * as the author of the parts of the library used. | ||
20 | * This can be in the form of a textual message at program startup or | ||
21 | * in documentation (online or textual) provided with the package. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * 1. Redistributions of source code must retain the copyright | ||
27 | * notice, this list of conditions and the following disclaimer. | ||
28 | * 2. Redistributions in binary form must reproduce the above copyright | ||
29 | * notice, this list of conditions and the following disclaimer in the | ||
30 | * documentation and/or other materials provided with the distribution. | ||
31 | * 3. All advertising materials mentioning features or use of this software | ||
32 | * must display the following acknowledgement: | ||
33 | * "This product includes cryptographic software written by | ||
34 | * Eric Young (eay@cryptsoft.com)" | ||
35 | * The word 'cryptographic' can be left out if the rouines from the library | ||
36 | * being used are not cryptographic related :-). | ||
37 | * 4. If you include any Windows specific code (or a derivative thereof) from | ||
38 | * the apps directory (application code) you must include an acknowledgement: | ||
39 | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" | ||
40 | * | ||
41 | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | ||
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | ||
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||
51 | * SUCH DAMAGE. | ||
52 | * | ||
53 | * The licence and distribution terms for any publically available version or | ||
54 | * derivative of this code cannot be changed. i.e. this code cannot simply be | ||
55 | * copied and put under another distribution licence | ||
56 | * [including the GNU Public Licence.] | ||
57 | */ | ||
58 | /* ==================================================================== | ||
59 | * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. | ||
60 | * | ||
61 | * Redistribution and use in source and binary forms, with or without | ||
62 | * modification, are permitted provided that the following conditions | ||
63 | * are met: | ||
64 | * | ||
65 | * 1. Redistributions of source code must retain the above copyright | ||
66 | * notice, this list of conditions and the following disclaimer. | ||
67 | * | ||
68 | * 2. Redistributions in binary form must reproduce the above copyright | ||
69 | * notice, this list of conditions and the following disclaimer in | ||
70 | * the documentation and/or other materials provided with the | ||
71 | * distribution. | ||
72 | * | ||
73 | * 3. All advertising materials mentioning features or use of this | ||
74 | * software must display the following acknowledgment: | ||
75 | * "This product includes software developed by the OpenSSL Project | ||
76 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | ||
77 | * | ||
78 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | ||
79 | * endorse or promote products derived from this software without | ||
80 | * prior written permission. For written permission, please contact | ||
81 | * openssl-core@openssl.org. | ||
82 | * | ||
83 | * 5. Products derived from this software may not be called "OpenSSL" | ||
84 | * nor may "OpenSSL" appear in their names without prior written | ||
85 | * permission of the OpenSSL Project. | ||
86 | * | ||
87 | * 6. Redistributions of any form whatsoever must retain the following | ||
88 | * acknowledgment: | ||
89 | * "This product includes software developed by the OpenSSL Project | ||
90 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | ||
91 | * | ||
92 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | ||
93 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
94 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
95 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | ||
96 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
97 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
98 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
99 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
100 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
101 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
102 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
103 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
104 | * ==================================================================== | ||
105 | * | ||
106 | * This product includes cryptographic software written by Eric Young | ||
107 | * (eay@cryptsoft.com). This product includes software written by Tim | ||
108 | * Hudson (tjh@cryptsoft.com). | ||
109 | * | ||
110 | */ | ||
111 | |||
112 | #include <stdlib.h> | ||
113 | #include <string.h> | ||
114 | |||
115 | #include <openssl/err.h> | ||
116 | |||
117 | #include "bn_local.h" | ||
118 | #include "constant_time.h" | ||
119 | |||
120 | /* maximum precomputation table size for *variable* sliding windows */ | ||
121 | #define TABLE_SIZE 32 | ||
122 | |||
123 | /* Calculates r = a^p by successive squaring of a. Not constant time. */ | ||
124 | int | ||
125 | BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) | ||
126 | { | ||
127 | BIGNUM *rr, *v; | ||
128 | int i; | ||
129 | int ret = 0; | ||
130 | |||
131 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | ||
132 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); | ||
133 | return -1; | ||
134 | } | ||
135 | |||
136 | BN_CTX_start(ctx); | ||
137 | |||
138 | if ((v = BN_CTX_get(ctx)) == NULL) | ||
139 | goto err; | ||
140 | |||
141 | rr = r; | ||
142 | if (r == a || r == p) | ||
143 | rr = BN_CTX_get(ctx); | ||
144 | if (rr == NULL) | ||
145 | goto err; | ||
146 | |||
147 | if (!BN_one(rr)) | ||
148 | goto err; | ||
149 | if (BN_is_odd(p)) { | ||
150 | if (!bn_copy(rr, a)) | ||
151 | goto err; | ||
152 | } | ||
153 | |||
154 | if (!bn_copy(v, a)) | ||
155 | goto err; | ||
156 | |||
157 | for (i = 1; i < BN_num_bits(p); i++) { | ||
158 | if (!BN_sqr(v, v, ctx)) | ||
159 | goto err; | ||
160 | if (!BN_is_bit_set(p, i)) | ||
161 | continue; | ||
162 | if (!BN_mul(rr, rr, v, ctx)) | ||
163 | goto err; | ||
164 | } | ||
165 | |||
166 | if (!bn_copy(r, rr)) | ||
167 | goto err; | ||
168 | |||
169 | ret = 1; | ||
170 | |||
171 | err: | ||
172 | BN_CTX_end(ctx); | ||
173 | |||
174 | return ret; | ||
175 | } | ||
176 | LCRYPTO_ALIAS(BN_exp); | ||
177 | |||
178 | /* The old fallback, simple version :-) */ | ||
179 | int | ||
180 | BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
181 | BN_CTX *ctx) | ||
182 | { | ||
183 | int i, j, bits, wstart, wend, window, wvalue; | ||
184 | int start = 1; | ||
185 | BIGNUM *d, *q; | ||
186 | /* Table of variables obtained from 'ctx' */ | ||
187 | BIGNUM *val[TABLE_SIZE]; | ||
188 | int ret = 0; | ||
189 | |||
190 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | ||
191 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | ||
192 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); | ||
193 | return -1; | ||
194 | } | ||
195 | |||
196 | if (r == m) { | ||
197 | BNerror(BN_R_INVALID_ARGUMENT); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | bits = BN_num_bits(p); | ||
202 | if (bits == 0) { | ||
203 | /* x**0 mod 1 is still zero. */ | ||
204 | if (BN_abs_is_word(m, 1)) { | ||
205 | ret = 1; | ||
206 | BN_zero(r); | ||
207 | } else | ||
208 | ret = BN_one(r); | ||
209 | return ret; | ||
210 | } | ||
211 | |||
212 | BN_CTX_start(ctx); | ||
213 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
214 | goto err; | ||
215 | if ((q = BN_CTX_get(ctx)) == NULL) | ||
216 | goto err; | ||
217 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
218 | goto err; | ||
219 | |||
220 | if (!BN_nnmod(val[0], a, m, ctx)) | ||
221 | goto err; | ||
222 | if (BN_is_zero(val[0])) { | ||
223 | BN_zero(r); | ||
224 | goto done; | ||
225 | } | ||
226 | if (!bn_copy(q, p)) | ||
227 | goto err; | ||
228 | |||
229 | window = BN_window_bits_for_exponent_size(bits); | ||
230 | if (window > 1) { | ||
231 | if (!BN_mod_mul(d, val[0], val[0], m, ctx)) | ||
232 | goto err; | ||
233 | j = 1 << (window - 1); | ||
234 | for (i = 1; i < j; i++) { | ||
235 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
236 | !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) | ||
237 | goto err; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | start = 1; /* This is used to avoid multiplication etc | ||
242 | * when there is only the value '1' in the | ||
243 | * buffer. */ | ||
244 | wvalue = 0; /* The 'value' of the window */ | ||
245 | wstart = bits - 1; /* The top bit of the window */ | ||
246 | wend = 0; /* The bottom bit of the window */ | ||
247 | |||
248 | if (!BN_one(r)) | ||
249 | goto err; | ||
250 | |||
251 | for (;;) { | ||
252 | if (BN_is_bit_set(q, wstart) == 0) { | ||
253 | if (!start) | ||
254 | if (!BN_mod_mul(r, r, r, m, ctx)) | ||
255 | goto err; | ||
256 | if (wstart == 0) | ||
257 | break; | ||
258 | wstart--; | ||
259 | continue; | ||
260 | } | ||
261 | /* We now have wstart on a 'set' bit, we now need to work out | ||
262 | * how bit a window to do. To do this we need to scan | ||
263 | * forward until the last set bit before the end of the | ||
264 | * window */ | ||
265 | j = wstart; | ||
266 | wvalue = 1; | ||
267 | wend = 0; | ||
268 | for (i = 1; i < window; i++) { | ||
269 | if (wstart - i < 0) | ||
270 | break; | ||
271 | if (BN_is_bit_set(q, wstart - i)) { | ||
272 | wvalue <<= (i - wend); | ||
273 | wvalue |= 1; | ||
274 | wend = i; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | /* wend is the size of the current window */ | ||
279 | j = wend + 1; | ||
280 | /* add the 'bytes above' */ | ||
281 | if (!start) | ||
282 | for (i = 0; i < j; i++) { | ||
283 | if (!BN_mod_mul(r, r, r, m, ctx)) | ||
284 | goto err; | ||
285 | } | ||
286 | |||
287 | /* wvalue will be an odd number < 2^window */ | ||
288 | if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) | ||
289 | goto err; | ||
290 | |||
291 | /* move the 'window' down further */ | ||
292 | wstart -= wend + 1; | ||
293 | wvalue = 0; | ||
294 | start = 0; | ||
295 | if (wstart < 0) | ||
296 | break; | ||
297 | } | ||
298 | |||
299 | done: | ||
300 | ret = 1; | ||
301 | |||
302 | err: | ||
303 | BN_CTX_end(ctx); | ||
304 | |||
305 | return ret; | ||
306 | } | ||
307 | |||
308 | /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout | ||
309 | * so that accessing any of these table values shows the same access pattern as far | ||
310 | * as cache lines are concerned. The following functions are used to transfer a BIGNUM | ||
311 | * from/to that table. */ | ||
312 | |||
313 | static int | ||
314 | MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, | ||
315 | int idx, int window) | ||
316 | { | ||
317 | int i, j; | ||
318 | int width = 1 << window; | ||
319 | BN_ULONG *table = (BN_ULONG *)buf; | ||
320 | |||
321 | if (top > b->top) | ||
322 | top = b->top; /* this works because 'buf' is explicitly zeroed */ | ||
323 | |||
324 | for (i = 0, j = idx; i < top; i++, j += width) { | ||
325 | table[j] = b->d[i]; | ||
326 | } | ||
327 | |||
328 | return 1; | ||
329 | } | ||
330 | |||
331 | static int | ||
332 | MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, | ||
333 | int window) | ||
334 | { | ||
335 | int i, j; | ||
336 | int width = 1 << window; | ||
337 | volatile BN_ULONG *table = (volatile BN_ULONG *)buf; | ||
338 | |||
339 | if (!bn_wexpand(b, top)) | ||
340 | return 0; | ||
341 | |||
342 | if (window <= 3) { | ||
343 | for (i = 0; i < top; i++, table += width) { | ||
344 | BN_ULONG acc = 0; | ||
345 | |||
346 | for (j = 0; j < width; j++) { | ||
347 | acc |= table[j] & | ||
348 | ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); | ||
349 | } | ||
350 | |||
351 | b->d[i] = acc; | ||
352 | } | ||
353 | } else { | ||
354 | int xstride = 1 << (window - 2); | ||
355 | BN_ULONG y0, y1, y2, y3; | ||
356 | |||
357 | i = idx >> (window - 2); /* equivalent of idx / xstride */ | ||
358 | idx &= xstride - 1; /* equivalent of idx % xstride */ | ||
359 | |||
360 | y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); | ||
361 | y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); | ||
362 | y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); | ||
363 | y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); | ||
364 | |||
365 | for (i = 0; i < top; i++, table += width) { | ||
366 | BN_ULONG acc = 0; | ||
367 | |||
368 | for (j = 0; j < xstride; j++) { | ||
369 | acc |= ( (table[j + 0 * xstride] & y0) | | ||
370 | (table[j + 1 * xstride] & y1) | | ||
371 | (table[j + 2 * xstride] & y2) | | ||
372 | (table[j + 3 * xstride] & y3) ) | ||
373 | & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); | ||
374 | } | ||
375 | |||
376 | b->d[i] = acc; | ||
377 | } | ||
378 | } | ||
379 | b->top = top; | ||
380 | bn_correct_top(b); | ||
381 | return 1; | ||
382 | } | ||
383 | |||
384 | /* Given a pointer value, compute the next address that is a cache line multiple. */ | ||
385 | #define MOD_EXP_CTIME_ALIGN(x_) \ | ||
386 | ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) | ||
387 | |||
388 | /* This variant of BN_mod_exp_mont() uses fixed windows and the special | ||
389 | * precomputation memory layout to limit data-dependency to a minimum | ||
390 | * to protect secret exponents (cf. the hyper-threading timing attacks | ||
391 | * pointed out by Colin Percival, | ||
392 | * http://www.daemonology.net/hyperthreading-considered-harmful/) | ||
393 | */ | ||
394 | int | ||
395 | BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, | ||
396 | const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
397 | { | ||
398 | int i, bits, ret = 0, window, wvalue; | ||
399 | int top; | ||
400 | BN_MONT_CTX *mont = NULL; | ||
401 | int numPowers; | ||
402 | unsigned char *powerbufFree = NULL; | ||
403 | int powerbufLen = 0; | ||
404 | unsigned char *powerbuf = NULL; | ||
405 | BIGNUM tmp, am; | ||
406 | |||
407 | |||
408 | if (!BN_is_odd(m)) { | ||
409 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
410 | return (0); | ||
411 | } | ||
412 | |||
413 | top = m->top; | ||
414 | |||
415 | bits = BN_num_bits(p); | ||
416 | if (bits == 0) { | ||
417 | /* x**0 mod 1 is still zero. */ | ||
418 | if (BN_abs_is_word(m, 1)) { | ||
419 | ret = 1; | ||
420 | BN_zero(rr); | ||
421 | } else | ||
422 | ret = BN_one(rr); | ||
423 | return ret; | ||
424 | } | ||
425 | |||
426 | BN_CTX_start(ctx); | ||
427 | |||
428 | if ((mont = in_mont) == NULL) | ||
429 | mont = BN_MONT_CTX_create(m, ctx); | ||
430 | if (mont == NULL) | ||
431 | goto err; | ||
432 | |||
433 | /* Get the window size to use with size of p. */ | ||
434 | window = BN_window_bits_for_ctime_exponent_size(bits); | ||
435 | #if defined(OPENSSL_BN_ASM_MONT5) | ||
436 | if (window == 6 && bits <= 1024) | ||
437 | window = 5; /* ~5% improvement of 2048-bit RSA sign */ | ||
438 | #endif | ||
439 | |||
440 | /* Allocate a buffer large enough to hold all of the pre-computed | ||
441 | * powers of am, am itself and tmp. | ||
442 | */ | ||
443 | numPowers = 1 << window; | ||
444 | powerbufLen = sizeof(m->d[0]) * (top * numPowers + | ||
445 | ((2*top) > numPowers ? (2*top) : numPowers)); | ||
446 | if ((powerbufFree = calloc(powerbufLen + | ||
447 | MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL) | ||
448 | goto err; | ||
449 | powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); | ||
450 | |||
451 | /* lay down tmp and am right after powers table */ | ||
452 | tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); | ||
453 | am.d = tmp.d + top; | ||
454 | tmp.top = am.top = 0; | ||
455 | tmp.dmax = am.dmax = top; | ||
456 | tmp.neg = am.neg = 0; | ||
457 | tmp.flags = am.flags = BN_FLG_STATIC_DATA; | ||
458 | |||
459 | /* prepare a^0 in Montgomery domain */ | ||
460 | #if 1 | ||
461 | if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) | ||
462 | goto err; | ||
463 | #else | ||
464 | tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ | ||
465 | for (i = 1; i < top; i++) | ||
466 | tmp.d[i] = (~m->d[i]) & BN_MASK2; | ||
467 | tmp.top = top; | ||
468 | #endif | ||
469 | |||
470 | /* prepare a^1 in Montgomery domain */ | ||
471 | if (!BN_nnmod(&am, a, m, ctx)) | ||
472 | goto err; | ||
473 | if (!BN_to_montgomery(&am, &am, mont, ctx)) | ||
474 | goto err; | ||
475 | |||
476 | #if defined(OPENSSL_BN_ASM_MONT5) | ||
477 | /* This optimization uses ideas from http://eprint.iacr.org/2011/239, | ||
478 | * specifically optimization of cache-timing attack countermeasures | ||
479 | * and pre-computation optimization. */ | ||
480 | |||
481 | /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as | ||
482 | * 512-bit RSA is hardly relevant, we omit it to spare size... */ | ||
483 | if (window == 5 && top > 1) { | ||
484 | void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, | ||
485 | const void *table, const BN_ULONG *np, | ||
486 | const BN_ULONG *n0, int num, int power); | ||
487 | void bn_scatter5(const BN_ULONG *inp, size_t num, | ||
488 | void *table, size_t power); | ||
489 | void bn_gather5(BN_ULONG *out, size_t num, | ||
490 | void *table, size_t power); | ||
491 | |||
492 | BN_ULONG *np = mont->N.d, *n0 = mont->n0; | ||
493 | |||
494 | /* BN_to_montgomery can contaminate words above .top | ||
495 | * [in BN_DEBUG[_DEBUG] build]... */ | ||
496 | for (i = am.top; i < top; i++) | ||
497 | am.d[i] = 0; | ||
498 | for (i = tmp.top; i < top; i++) | ||
499 | tmp.d[i] = 0; | ||
500 | |||
501 | bn_scatter5(tmp.d, top, powerbuf, 0); | ||
502 | bn_scatter5(am.d, am.top, powerbuf, 1); | ||
503 | bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); | ||
504 | bn_scatter5(tmp.d, top, powerbuf, 2); | ||
505 | |||
506 | #if 0 | ||
507 | for (i = 3; i < 32; i++) { | ||
508 | /* Calculate a^i = a^(i-1) * a */ | ||
509 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, | ||
510 | n0, top, i - 1); | ||
511 | bn_scatter5(tmp.d, top, powerbuf, i); | ||
512 | } | ||
513 | #else | ||
514 | /* same as above, but uses squaring for 1/2 of operations */ | ||
515 | for (i = 4; i < 32; i*=2) { | ||
516 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
517 | bn_scatter5(tmp.d, top, powerbuf, i); | ||
518 | } | ||
519 | for (i = 3; i < 8; i += 2) { | ||
520 | int j; | ||
521 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, | ||
522 | n0, top, i - 1); | ||
523 | bn_scatter5(tmp.d, top, powerbuf, i); | ||
524 | for (j = 2 * i; j < 32; j *= 2) { | ||
525 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
526 | bn_scatter5(tmp.d, top, powerbuf, j); | ||
527 | } | ||
528 | } | ||
529 | for (; i < 16; i += 2) { | ||
530 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, | ||
531 | n0, top, i - 1); | ||
532 | bn_scatter5(tmp.d, top, powerbuf, i); | ||
533 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
534 | bn_scatter5(tmp.d, top, powerbuf, 2*i); | ||
535 | } | ||
536 | for (; i < 32; i += 2) { | ||
537 | bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, | ||
538 | n0, top, i - 1); | ||
539 | bn_scatter5(tmp.d, top, powerbuf, i); | ||
540 | } | ||
541 | #endif | ||
542 | bits--; | ||
543 | for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) | ||
544 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); | ||
545 | bn_gather5(tmp.d, top, powerbuf, wvalue); | ||
546 | |||
547 | /* Scan the exponent one window at a time starting from the most | ||
548 | * significant bits. | ||
549 | */ | ||
550 | while (bits >= 0) { | ||
551 | for (wvalue = 0, i = 0; i < 5; i++, bits--) | ||
552 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); | ||
553 | |||
554 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
555 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
556 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
557 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
558 | bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); | ||
559 | bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); | ||
560 | } | ||
561 | |||
562 | tmp.top = top; | ||
563 | bn_correct_top(&tmp); | ||
564 | } else | ||
565 | #endif | ||
566 | { | ||
567 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, | ||
568 | window)) | ||
569 | goto err; | ||
570 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, | ||
571 | window)) | ||
572 | goto err; | ||
573 | |||
574 | /* If the window size is greater than 1, then calculate | ||
575 | * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) | ||
576 | * (even powers could instead be computed as (a^(i/2))^2 | ||
577 | * to use the slight performance advantage of sqr over mul). | ||
578 | */ | ||
579 | if (window > 1) { | ||
580 | if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) | ||
581 | goto err; | ||
582 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, | ||
583 | 2, window)) | ||
584 | goto err; | ||
585 | for (i = 3; i < numPowers; i++) { | ||
586 | /* Calculate a^i = a^(i-1) * a */ | ||
587 | if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, | ||
588 | mont, ctx)) | ||
589 | goto err; | ||
590 | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, | ||
591 | powerbuf, i, window)) | ||
592 | goto err; | ||
593 | } | ||
594 | } | ||
595 | |||
596 | bits--; | ||
597 | for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) | ||
598 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); | ||
599 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, | ||
600 | wvalue, window)) | ||
601 | goto err; | ||
602 | |||
603 | /* Scan the exponent one window at a time starting from the most | ||
604 | * significant bits. | ||
605 | */ | ||
606 | while (bits >= 0) { | ||
607 | wvalue = 0; /* The 'value' of the window */ | ||
608 | |||
609 | /* Scan the window, squaring the result as we go */ | ||
610 | for (i = 0; i < window; i++, bits--) { | ||
611 | if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, | ||
612 | mont, ctx)) | ||
613 | goto err; | ||
614 | wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); | ||
615 | } | ||
616 | |||
617 | /* Fetch the appropriate pre-computed value from the pre-buf */ | ||
618 | if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, | ||
619 | wvalue, window)) | ||
620 | goto err; | ||
621 | |||
622 | /* Multiply the result into the intermediate result */ | ||
623 | if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) | ||
624 | goto err; | ||
625 | } | ||
626 | } | ||
627 | |||
628 | /* Convert the final result from montgomery to standard format */ | ||
629 | if (!BN_from_montgomery(rr, &tmp, mont, ctx)) | ||
630 | goto err; | ||
631 | |||
632 | ret = 1; | ||
633 | |||
634 | err: | ||
635 | if (mont != in_mont) | ||
636 | BN_MONT_CTX_free(mont); | ||
637 | BN_CTX_end(ctx); | ||
638 | freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); | ||
639 | |||
640 | return ret; | ||
641 | } | ||
642 | LCRYPTO_ALIAS(BN_mod_exp_mont_consttime); | ||
643 | |||
644 | static int | ||
645 | BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
646 | BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) | ||
647 | { | ||
648 | int i, j, bits, ret = 0, wstart, wend, window, wvalue; | ||
649 | int start = 1; | ||
650 | BIGNUM *d, *r; | ||
651 | const BIGNUM *aa; | ||
652 | /* Table of variables obtained from 'ctx' */ | ||
653 | BIGNUM *val[TABLE_SIZE]; | ||
654 | BN_MONT_CTX *mont = NULL; | ||
655 | |||
656 | if (ct) { | ||
657 | return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); | ||
658 | } | ||
659 | |||
660 | |||
661 | if (!BN_is_odd(m)) { | ||
662 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
663 | return (0); | ||
664 | } | ||
665 | |||
666 | bits = BN_num_bits(p); | ||
667 | if (bits == 0) { | ||
668 | /* x**0 mod 1 is still zero. */ | ||
669 | if (BN_abs_is_word(m, 1)) { | ||
670 | ret = 1; | ||
671 | BN_zero(rr); | ||
672 | } else | ||
673 | ret = BN_one(rr); | ||
674 | return ret; | ||
675 | } | ||
676 | |||
677 | BN_CTX_start(ctx); | ||
678 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
679 | goto err; | ||
680 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
681 | goto err; | ||
682 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
683 | goto err; | ||
684 | |||
685 | if ((mont = in_mont) == NULL) | ||
686 | mont = BN_MONT_CTX_create(m, ctx); | ||
687 | if (mont == NULL) | ||
688 | goto err; | ||
689 | |||
690 | if (!BN_nnmod(val[0], a,m, ctx)) | ||
691 | goto err; | ||
692 | aa = val[0]; | ||
693 | if (BN_is_zero(aa)) { | ||
694 | BN_zero(rr); | ||
695 | ret = 1; | ||
696 | goto err; | ||
697 | } | ||
698 | if (!BN_to_montgomery(val[0], aa, mont, ctx)) | ||
699 | goto err; | ||
700 | |||
701 | window = BN_window_bits_for_exponent_size(bits); | ||
702 | if (window > 1) { | ||
703 | if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) | ||
704 | goto err; | ||
705 | j = 1 << (window - 1); | ||
706 | for (i = 1; i < j; i++) { | ||
707 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
708 | !BN_mod_mul_montgomery(val[i], val[i - 1], | ||
709 | d, mont, ctx)) | ||
710 | goto err; | ||
711 | } | ||
712 | } | ||
713 | |||
714 | start = 1; /* This is used to avoid multiplication etc | ||
715 | * when there is only the value '1' in the | ||
716 | * buffer. */ | ||
717 | wvalue = 0; /* The 'value' of the window */ | ||
718 | wstart = bits - 1; /* The top bit of the window */ | ||
719 | wend = 0; /* The bottom bit of the window */ | ||
720 | |||
721 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) | ||
722 | goto err; | ||
723 | for (;;) { | ||
724 | if (BN_is_bit_set(p, wstart) == 0) { | ||
725 | if (!start) { | ||
726 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
727 | goto err; | ||
728 | } | ||
729 | if (wstart == 0) | ||
730 | break; | ||
731 | wstart--; | ||
732 | continue; | ||
733 | } | ||
734 | /* We now have wstart on a 'set' bit, we now need to work out | ||
735 | * how bit a window to do. To do this we need to scan | ||
736 | * forward until the last set bit before the end of the | ||
737 | * window */ | ||
738 | j = wstart; | ||
739 | wvalue = 1; | ||
740 | wend = 0; | ||
741 | for (i = 1; i < window; i++) { | ||
742 | if (wstart - i < 0) | ||
743 | break; | ||
744 | if (BN_is_bit_set(p, wstart - i)) { | ||
745 | wvalue <<= (i - wend); | ||
746 | wvalue |= 1; | ||
747 | wend = i; | ||
748 | } | ||
749 | } | ||
750 | |||
751 | /* wend is the size of the current window */ | ||
752 | j = wend + 1; | ||
753 | /* add the 'bytes above' */ | ||
754 | if (!start) | ||
755 | for (i = 0; i < j; i++) { | ||
756 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
757 | goto err; | ||
758 | } | ||
759 | |||
760 | /* wvalue will be an odd number < 2^window */ | ||
761 | if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) | ||
762 | goto err; | ||
763 | |||
764 | /* move the 'window' down further */ | ||
765 | wstart -= wend + 1; | ||
766 | wvalue = 0; | ||
767 | start = 0; | ||
768 | if (wstart < 0) | ||
769 | break; | ||
770 | } | ||
771 | if (!BN_from_montgomery(rr, r,mont, ctx)) | ||
772 | goto err; | ||
773 | |||
774 | ret = 1; | ||
775 | |||
776 | err: | ||
777 | if (mont != in_mont) | ||
778 | BN_MONT_CTX_free(mont); | ||
779 | BN_CTX_end(ctx); | ||
780 | |||
781 | return ret; | ||
782 | } | ||
783 | |||
784 | int | ||
785 | BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
786 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
787 | { | ||
788 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, | ||
789 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
790 | } | ||
791 | LCRYPTO_ALIAS(BN_mod_exp_mont); | ||
792 | |||
793 | int | ||
794 | BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
795 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
796 | { | ||
797 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); | ||
798 | } | ||
799 | |||
800 | int | ||
801 | BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
802 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
803 | { | ||
804 | return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); | ||
805 | } | ||
806 | |||
807 | int | ||
808 | BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, | ||
809 | BN_CTX *ctx, BN_MONT_CTX *in_mont) | ||
810 | { | ||
811 | BN_MONT_CTX *mont = NULL; | ||
812 | int b, bits, ret = 0; | ||
813 | int r_is_one; | ||
814 | BN_ULONG w, next_w; | ||
815 | BIGNUM *d, *r, *t; | ||
816 | BIGNUM *swap_tmp; | ||
817 | |||
818 | #define BN_MOD_MUL_WORD(r, w, m) \ | ||
819 | (BN_mul_word(r, (w)) && \ | ||
820 | (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ | ||
821 | (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) | ||
822 | /* BN_MOD_MUL_WORD is only used with 'w' large, | ||
823 | * so the BN_ucmp test is probably more overhead | ||
824 | * than always using BN_mod (which uses bn_copy if | ||
825 | * a similar test returns true). */ | ||
826 | /* We can use BN_mod and do not need BN_nnmod because our | ||
827 | * accumulator is never negative (the result of BN_mod does | ||
828 | * not depend on the sign of the modulus). | ||
829 | */ | ||
830 | #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ | ||
831 | (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) | ||
832 | |||
833 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | ||
834 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | ||
835 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); | ||
836 | return -1; | ||
837 | } | ||
838 | |||
839 | |||
840 | if (!BN_is_odd(m)) { | ||
841 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
842 | return (0); | ||
843 | } | ||
844 | if (m->top == 1) | ||
845 | a %= m->d[0]; /* make sure that 'a' is reduced */ | ||
846 | |||
847 | bits = BN_num_bits(p); | ||
848 | if (bits == 0) { | ||
849 | /* x**0 mod 1 is still zero. */ | ||
850 | if (BN_abs_is_word(m, 1)) { | ||
851 | ret = 1; | ||
852 | BN_zero(rr); | ||
853 | } else | ||
854 | ret = BN_one(rr); | ||
855 | return ret; | ||
856 | } | ||
857 | if (a == 0) { | ||
858 | BN_zero(rr); | ||
859 | ret = 1; | ||
860 | return ret; | ||
861 | } | ||
862 | |||
863 | BN_CTX_start(ctx); | ||
864 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
865 | goto err; | ||
866 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
867 | goto err; | ||
868 | if ((t = BN_CTX_get(ctx)) == NULL) | ||
869 | goto err; | ||
870 | |||
871 | if ((mont = in_mont) == NULL) | ||
872 | mont = BN_MONT_CTX_create(m, ctx); | ||
873 | if (mont == NULL) | ||
874 | goto err; | ||
875 | |||
876 | r_is_one = 1; /* except for Montgomery factor */ | ||
877 | |||
878 | /* bits-1 >= 0 */ | ||
879 | |||
880 | /* The result is accumulated in the product r*w. */ | ||
881 | w = a; /* bit 'bits-1' of 'p' is always set */ | ||
882 | for (b = bits - 2; b >= 0; b--) { | ||
883 | /* First, square r*w. */ | ||
884 | next_w = w * w; | ||
885 | if ((next_w / w) != w) /* overflow */ | ||
886 | { | ||
887 | if (r_is_one) { | ||
888 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) | ||
889 | goto err; | ||
890 | r_is_one = 0; | ||
891 | } else { | ||
892 | if (!BN_MOD_MUL_WORD(r, w, m)) | ||
893 | goto err; | ||
894 | } | ||
895 | next_w = 1; | ||
896 | } | ||
897 | w = next_w; | ||
898 | if (!r_is_one) { | ||
899 | if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) | ||
900 | goto err; | ||
901 | } | ||
902 | |||
903 | /* Second, multiply r*w by 'a' if exponent bit is set. */ | ||
904 | if (BN_is_bit_set(p, b)) { | ||
905 | next_w = w * a; | ||
906 | if ((next_w / a) != w) /* overflow */ | ||
907 | { | ||
908 | if (r_is_one) { | ||
909 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) | ||
910 | goto err; | ||
911 | r_is_one = 0; | ||
912 | } else { | ||
913 | if (!BN_MOD_MUL_WORD(r, w, m)) | ||
914 | goto err; | ||
915 | } | ||
916 | next_w = a; | ||
917 | } | ||
918 | w = next_w; | ||
919 | } | ||
920 | } | ||
921 | |||
922 | /* Finally, set r:=r*w. */ | ||
923 | if (w != 1) { | ||
924 | if (r_is_one) { | ||
925 | if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) | ||
926 | goto err; | ||
927 | r_is_one = 0; | ||
928 | } else { | ||
929 | if (!BN_MOD_MUL_WORD(r, w, m)) | ||
930 | goto err; | ||
931 | } | ||
932 | } | ||
933 | |||
934 | if (r_is_one) /* can happen only if a == 1*/ | ||
935 | { | ||
936 | if (!BN_one(rr)) | ||
937 | goto err; | ||
938 | } else { | ||
939 | if (!BN_from_montgomery(rr, r, mont, ctx)) | ||
940 | goto err; | ||
941 | } | ||
942 | |||
943 | ret = 1; | ||
944 | |||
945 | err: | ||
946 | if (mont != in_mont) | ||
947 | BN_MONT_CTX_free(mont); | ||
948 | BN_CTX_end(ctx); | ||
949 | |||
950 | return ret; | ||
951 | } | ||
952 | |||
953 | int | ||
954 | BN_mod_exp_reciprocal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
955 | BN_CTX *ctx) | ||
956 | { | ||
957 | int i, j, bits, wstart, wend, window, wvalue; | ||
958 | int start = 1; | ||
959 | BIGNUM *aa, *q; | ||
960 | /* Table of variables obtained from 'ctx' */ | ||
961 | BIGNUM *val[TABLE_SIZE]; | ||
962 | BN_RECP_CTX *recp = NULL; | ||
963 | int ret = 0; | ||
964 | |||
965 | if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { | ||
966 | /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ | ||
967 | BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); | ||
968 | return -1; | ||
969 | } | ||
970 | |||
971 | bits = BN_num_bits(p); | ||
972 | if (bits == 0) { | ||
973 | /* x**0 mod 1 is still zero. */ | ||
974 | if (BN_abs_is_word(m, 1)) { | ||
975 | ret = 1; | ||
976 | BN_zero(r); | ||
977 | } else | ||
978 | ret = BN_one(r); | ||
979 | return ret; | ||
980 | } | ||
981 | |||
982 | BN_CTX_start(ctx); | ||
983 | if ((aa = BN_CTX_get(ctx)) == NULL) | ||
984 | goto err; | ||
985 | if ((q = BN_CTX_get(ctx)) == NULL) | ||
986 | goto err; | ||
987 | if ((val[0] = BN_CTX_get(ctx)) == NULL) | ||
988 | goto err; | ||
989 | |||
990 | if ((recp = BN_RECP_CTX_create(m)) == NULL) | ||
991 | goto err; | ||
992 | |||
993 | if (!BN_nnmod(val[0], a, m, ctx)) | ||
994 | goto err; | ||
995 | if (BN_is_zero(val[0])) { | ||
996 | BN_zero(r); | ||
997 | goto done; | ||
998 | } | ||
999 | if (!bn_copy(q, p)) | ||
1000 | goto err; | ||
1001 | |||
1002 | window = BN_window_bits_for_exponent_size(bits); | ||
1003 | if (window > 1) { | ||
1004 | if (!BN_mod_sqr_reciprocal(aa, val[0], recp, ctx)) | ||
1005 | goto err; | ||
1006 | j = 1 << (window - 1); | ||
1007 | for (i = 1; i < j; i++) { | ||
1008 | if (((val[i] = BN_CTX_get(ctx)) == NULL) || | ||
1009 | !BN_mod_mul_reciprocal(val[i], val[i - 1], | ||
1010 | aa, recp, ctx)) | ||
1011 | goto err; | ||
1012 | } | ||
1013 | } | ||
1014 | |||
1015 | start = 1; /* This is used to avoid multiplication etc | ||
1016 | * when there is only the value '1' in the | ||
1017 | * buffer. */ | ||
1018 | wvalue = 0; /* The 'value' of the window */ | ||
1019 | wstart = bits - 1; /* The top bit of the window */ | ||
1020 | wend = 0; /* The bottom bit of the window */ | ||
1021 | |||
1022 | if (!BN_one(r)) | ||
1023 | goto err; | ||
1024 | |||
1025 | for (;;) { | ||
1026 | if (BN_is_bit_set(q, wstart) == 0) { | ||
1027 | if (!start) | ||
1028 | if (!BN_mod_sqr_reciprocal(r, r, recp, ctx)) | ||
1029 | goto err; | ||
1030 | if (wstart == 0) | ||
1031 | break; | ||
1032 | wstart--; | ||
1033 | continue; | ||
1034 | } | ||
1035 | /* We now have wstart on a 'set' bit, we now need to work out | ||
1036 | * how bit a window to do. To do this we need to scan | ||
1037 | * forward until the last set bit before the end of the | ||
1038 | * window */ | ||
1039 | j = wstart; | ||
1040 | wvalue = 1; | ||
1041 | wend = 0; | ||
1042 | for (i = 1; i < window; i++) { | ||
1043 | if (wstart - i < 0) | ||
1044 | break; | ||
1045 | if (BN_is_bit_set(q, wstart - i)) { | ||
1046 | wvalue <<= (i - wend); | ||
1047 | wvalue |= 1; | ||
1048 | wend = i; | ||
1049 | } | ||
1050 | } | ||
1051 | |||
1052 | /* wend is the size of the current window */ | ||
1053 | j = wend + 1; | ||
1054 | /* add the 'bytes above' */ | ||
1055 | if (!start) | ||
1056 | for (i = 0; i < j; i++) { | ||
1057 | if (!BN_mod_sqr_reciprocal(r, r, recp, ctx)) | ||
1058 | goto err; | ||
1059 | } | ||
1060 | |||
1061 | /* wvalue will be an odd number < 2^window */ | ||
1062 | if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], recp, ctx)) | ||
1063 | goto err; | ||
1064 | |||
1065 | /* move the 'window' down further */ | ||
1066 | wstart -= wend + 1; | ||
1067 | wvalue = 0; | ||
1068 | start = 0; | ||
1069 | if (wstart < 0) | ||
1070 | break; | ||
1071 | } | ||
1072 | |||
1073 | done: | ||
1074 | ret = 1; | ||
1075 | |||
1076 | err: | ||
1077 | BN_CTX_end(ctx); | ||
1078 | BN_RECP_CTX_free(recp); | ||
1079 | |||
1080 | return ret; | ||
1081 | } | ||
1082 | |||
1083 | static int | ||
1084 | BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1085 | BN_CTX *ctx, int ct) | ||
1086 | { | ||
1087 | int ret; | ||
1088 | |||
1089 | |||
1090 | /* For even modulus m = 2^k*m_odd, it might make sense to compute | ||
1091 | * a^p mod m_odd and a^p mod 2^k separately (with Montgomery | ||
1092 | * exponentiation for the odd part), using appropriate exponent | ||
1093 | * reductions, and combine the results using the CRT. | ||
1094 | * | ||
1095 | * For now, we use Montgomery only if the modulus is odd; otherwise, | ||
1096 | * exponentiation using the reciprocal-based quick remaindering | ||
1097 | * algorithm is used. | ||
1098 | * | ||
1099 | * (Timing obtained with expspeed.c [computations a^p mod m | ||
1100 | * where a, p, m are of the same length: 256, 512, 1024, 2048, | ||
1101 | * 4096, 8192 bits], compared to the running time of the | ||
1102 | * standard algorithm: | ||
1103 | * | ||
1104 | * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] | ||
1105 | * 55 .. 77 % [UltraSparc processor, but | ||
1106 | * debug-solaris-sparcv8-gcc conf.] | ||
1107 | * | ||
1108 | * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] | ||
1109 | * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] | ||
1110 | * | ||
1111 | * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont | ||
1112 | * at 2048 and more bits, but at 512 and 1024 bits, it was | ||
1113 | * slower even than the standard algorithm! | ||
1114 | * | ||
1115 | * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] | ||
1116 | * should be obtained when the new Montgomery reduction code | ||
1117 | * has been integrated into OpenSSL.) | ||
1118 | */ | ||
1119 | |||
1120 | if (BN_is_odd(m)) { | ||
1121 | if (a->top == 1 && !a->neg && !ct) { | ||
1122 | BN_ULONG A = a->d[0]; | ||
1123 | ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); | ||
1124 | } else | ||
1125 | ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); | ||
1126 | } else { | ||
1127 | ret = BN_mod_exp_reciprocal(r, a,p, m, ctx); | ||
1128 | } | ||
1129 | |||
1130 | return (ret); | ||
1131 | } | ||
1132 | |||
1133 | int | ||
1134 | BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1135 | BN_CTX *ctx) | ||
1136 | { | ||
1137 | return BN_mod_exp_internal(r, a, p, m, ctx, | ||
1138 | (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); | ||
1139 | } | ||
1140 | LCRYPTO_ALIAS(BN_mod_exp); | ||
1141 | |||
1142 | int | ||
1143 | BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1144 | BN_CTX *ctx) | ||
1145 | { | ||
1146 | return BN_mod_exp_internal(r, a, p, m, ctx, 1); | ||
1147 | } | ||
1148 | |||
1149 | int | ||
1150 | BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, | ||
1151 | BN_CTX *ctx) | ||
1152 | { | ||
1153 | return BN_mod_exp_internal(r, a, p, m, ctx, 0); | ||
1154 | } | ||
1155 | |||
1156 | int | ||
1157 | BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, | ||
1158 | const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx, | ||
1159 | BN_MONT_CTX *in_mont) | ||
1160 | { | ||
1161 | int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2; | ||
1162 | int r_is_one = 1; | ||
1163 | BIGNUM *d, *r; | ||
1164 | const BIGNUM *a_mod_m; | ||
1165 | /* Tables of variables obtained from 'ctx' */ | ||
1166 | BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE]; | ||
1167 | BN_MONT_CTX *mont = NULL; | ||
1168 | |||
1169 | |||
1170 | if (!BN_is_odd(m)) { | ||
1171 | BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); | ||
1172 | return (0); | ||
1173 | } | ||
1174 | bits1 = BN_num_bits(p1); | ||
1175 | bits2 = BN_num_bits(p2); | ||
1176 | if ((bits1 == 0) && (bits2 == 0)) { | ||
1177 | ret = BN_one(rr); | ||
1178 | return ret; | ||
1179 | } | ||
1180 | |||
1181 | bits = (bits1 > bits2) ? bits1 : bits2; | ||
1182 | |||
1183 | BN_CTX_start(ctx); | ||
1184 | if ((d = BN_CTX_get(ctx)) == NULL) | ||
1185 | goto err; | ||
1186 | if ((r = BN_CTX_get(ctx)) == NULL) | ||
1187 | goto err; | ||
1188 | if ((val1[0] = BN_CTX_get(ctx)) == NULL) | ||
1189 | goto err; | ||
1190 | if ((val2[0] = BN_CTX_get(ctx)) == NULL) | ||
1191 | goto err; | ||
1192 | |||
1193 | if ((mont = in_mont) == NULL) | ||
1194 | mont = BN_MONT_CTX_create(m, ctx); | ||
1195 | if (mont == NULL) | ||
1196 | goto err; | ||
1197 | |||
1198 | window1 = BN_window_bits_for_exponent_size(bits1); | ||
1199 | window2 = BN_window_bits_for_exponent_size(bits2); | ||
1200 | |||
1201 | /* | ||
1202 | * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1) | ||
1203 | */ | ||
1204 | if (!BN_nnmod(val1[0], a1, m, ctx)) | ||
1205 | goto err; | ||
1206 | a_mod_m = val1[0]; | ||
1207 | if (BN_is_zero(a_mod_m)) { | ||
1208 | BN_zero(rr); | ||
1209 | ret = 1; | ||
1210 | goto err; | ||
1211 | } | ||
1212 | |||
1213 | if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) | ||
1214 | goto err; | ||
1215 | if (window1 > 1) { | ||
1216 | if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) | ||
1217 | goto err; | ||
1218 | |||
1219 | j = 1 << (window1 - 1); | ||
1220 | for (i = 1; i < j; i++) { | ||
1221 | if (((val1[i] = BN_CTX_get(ctx)) == NULL) || | ||
1222 | !BN_mod_mul_montgomery(val1[i], val1[i - 1], | ||
1223 | d, mont, ctx)) | ||
1224 | goto err; | ||
1225 | } | ||
1226 | } | ||
1227 | |||
1228 | |||
1229 | /* | ||
1230 | * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1) | ||
1231 | */ | ||
1232 | if (!BN_nnmod(val2[0], a2, m, ctx)) | ||
1233 | goto err; | ||
1234 | a_mod_m = val2[0]; | ||
1235 | if (BN_is_zero(a_mod_m)) { | ||
1236 | BN_zero(rr); | ||
1237 | ret = 1; | ||
1238 | goto err; | ||
1239 | } | ||
1240 | if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) | ||
1241 | goto err; | ||
1242 | if (window2 > 1) { | ||
1243 | if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) | ||
1244 | goto err; | ||
1245 | |||
1246 | j = 1 << (window2 - 1); | ||
1247 | for (i = 1; i < j; i++) { | ||
1248 | if (((val2[i] = BN_CTX_get(ctx)) == NULL) || | ||
1249 | !BN_mod_mul_montgomery(val2[i], val2[i - 1], | ||
1250 | d, mont, ctx)) | ||
1251 | goto err; | ||
1252 | } | ||
1253 | } | ||
1254 | |||
1255 | |||
1256 | /* Now compute the power product, using independent windows. */ | ||
1257 | r_is_one = 1; | ||
1258 | wvalue1 = 0; /* The 'value' of the first window */ | ||
1259 | wvalue2 = 0; /* The 'value' of the second window */ | ||
1260 | wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */ | ||
1261 | wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */ | ||
1262 | |||
1263 | if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) | ||
1264 | goto err; | ||
1265 | for (b = bits - 1; b >= 0; b--) { | ||
1266 | if (!r_is_one) { | ||
1267 | if (!BN_mod_mul_montgomery(r, r,r, mont, ctx)) | ||
1268 | goto err; | ||
1269 | } | ||
1270 | |||
1271 | if (!wvalue1) | ||
1272 | if (BN_is_bit_set(p1, b)) { | ||
1273 | /* consider bits b-window1+1 .. b for this window */ | ||
1274 | i = b - window1 + 1; | ||
1275 | while (!BN_is_bit_set(p1, i)) /* works for i<0 */ | ||
1276 | i++; | ||
1277 | wpos1 = i; | ||
1278 | wvalue1 = 1; | ||
1279 | for (i = b - 1; i >= wpos1; i--) { | ||
1280 | wvalue1 <<= 1; | ||
1281 | if (BN_is_bit_set(p1, i)) | ||
1282 | wvalue1++; | ||
1283 | } | ||
1284 | } | ||
1285 | |||
1286 | if (!wvalue2) | ||
1287 | if (BN_is_bit_set(p2, b)) { | ||
1288 | /* consider bits b-window2+1 .. b for this window */ | ||
1289 | i = b - window2 + 1; | ||
1290 | while (!BN_is_bit_set(p2, i)) | ||
1291 | i++; | ||
1292 | wpos2 = i; | ||
1293 | wvalue2 = 1; | ||
1294 | for (i = b - 1; i >= wpos2; i--) { | ||
1295 | wvalue2 <<= 1; | ||
1296 | if (BN_is_bit_set(p2, i)) | ||
1297 | wvalue2++; | ||
1298 | } | ||
1299 | } | ||
1300 | |||
1301 | if (wvalue1 && b == wpos1) { | ||
1302 | /* wvalue1 is odd and < 2^window1 */ | ||
1303 | if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], | ||
1304 | mont, ctx)) | ||
1305 | goto err; | ||
1306 | wvalue1 = 0; | ||
1307 | r_is_one = 0; | ||
1308 | } | ||
1309 | |||
1310 | if (wvalue2 && b == wpos2) { | ||
1311 | /* wvalue2 is odd and < 2^window2 */ | ||
1312 | if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], | ||
1313 | mont, ctx)) | ||
1314 | goto err; | ||
1315 | wvalue2 = 0; | ||
1316 | r_is_one = 0; | ||
1317 | } | ||
1318 | } | ||
1319 | if (!BN_from_montgomery(rr, r,mont, ctx)) | ||
1320 | goto err; | ||
1321 | |||
1322 | ret = 1; | ||
1323 | |||
1324 | err: | ||
1325 | if (mont != in_mont) | ||
1326 | BN_MONT_CTX_free(mont); | ||
1327 | BN_CTX_end(ctx); | ||
1328 | |||
1329 | return ret; | ||
1330 | } | ||