summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/bn_internal.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/bn/bn_internal.h568
1 files changed, 0 insertions, 568 deletions
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h
deleted file mode 100644
index fd04bc9f8a..0000000000
--- a/src/lib/libcrypto/bn/bn_internal.h
+++ /dev/null
@@ -1,568 +0,0 @@
1/* $OpenBSD: bn_internal.h,v 1.15 2023/06/25 11:42:26 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_arch.h"
21
22#ifndef HEADER_BN_INTERNAL_H
23#define HEADER_BN_INTERNAL_H
24
25int bn_word_clz(BN_ULONG w);
26
27int bn_bitsize(const BIGNUM *bn);
28
29#ifndef HAVE_BN_CT_NE_ZERO
30static inline int
31bn_ct_ne_zero(BN_ULONG w)
32{
33 return (w | ~(w - 1)) >> (BN_BITS2 - 1);
34}
35#endif
36
37#ifndef HAVE_BN_CT_NE_ZERO_MASK
38static inline BN_ULONG
39bn_ct_ne_zero_mask(BN_ULONG w)
40{
41 return 0 - bn_ct_ne_zero(w);
42}
43#endif
44
45#ifndef HAVE_BN_CT_EQ_ZERO
46static inline int
47bn_ct_eq_zero(BN_ULONG w)
48{
49 return 1 - bn_ct_ne_zero(w);
50}
51#endif
52
53#ifndef HAVE_BN_CT_EQ_ZERO_MASK
54static inline BN_ULONG
55bn_ct_eq_zero_mask(BN_ULONG w)
56{
57 return 0 - bn_ct_eq_zero(w);
58}
59#endif
60
61#ifndef HAVE_BN_CLZW
62static inline int
63bn_clzw(BN_ULONG w)
64{
65 return bn_word_clz(w);
66}
67#endif
68
69/*
70 * Big number primitives are named as the operation followed by a suffix
71 * that indicates the number of words that it operates on, where 'w' means
72 * single word, 'dw' means double word, 'tw' means triple word and 'qw' means
73 * quadruple word. Unless otherwise noted, the size of the output is implied
74 * based on its inputs, for example bn_mulw() takes two single word inputs
75 * and is going to produce a double word result.
76 *
77 * Where a function implements multiple operations, these are listed in order.
78 * For example, a function that computes (r1:r0) = a * b + c is named
79 * bn_mulw_addw(), producing a double word result.
80 */
81
82/*
83 * Default implementations for BN_ULLONG architectures.
84 *
85 * On these platforms the C compiler is generally better at optimising without
86 * the use of inline assembly primitives. However, it can be difficult for the
87 * compiler to see through primitives in order to combine operations, due to
88 * type changes/narrowing. For this reason compound primitives are usually
89 * explicitly provided.
90 */
91#ifdef BN_ULLONG
92
93#ifndef HAVE_BN_ADDW
94#define HAVE_BN_ADDW
95static inline void
96bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
97{
98 BN_ULLONG r;
99
100 r = (BN_ULLONG)a + (BN_ULLONG)b;
101
102 *out_r1 = r >> BN_BITS2;
103 *out_r0 = r & BN_MASK2;
104}
105#endif
106
107#ifndef HAVE_BN_ADDW_ADDW
108#define HAVE_BN_ADDW_ADDW
109static inline void
110bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
111 BN_ULONG *out_r0)
112{
113 BN_ULLONG r;
114
115 r = (BN_ULLONG)a + (BN_ULLONG)b + (BN_ULLONG)c;
116
117 *out_r1 = r >> BN_BITS2;
118 *out_r0 = r & BN_MASK2;
119}
120#endif
121
122#ifndef HAVE_BN_MULW
123#define HAVE_BN_MULW
124static inline void
125bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
126{
127 BN_ULLONG r;
128
129 r = (BN_ULLONG)a * (BN_ULLONG)b;
130
131 *out_r1 = r >> BN_BITS2;
132 *out_r0 = r & BN_MASK2;
133}
134#endif
135
136#ifndef HAVE_BN_MULW_ADDW
137#define HAVE_BN_MULW_ADDW
138static inline void
139bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
140 BN_ULONG *out_r0)
141{
142 BN_ULLONG r;
143
144 r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c;
145
146 *out_r1 = r >> BN_BITS2;
147 *out_r0 = r & BN_MASK2;
148}
149#endif
150
151#ifndef HAVE_BN_MULW_ADDW_ADDW
152#define HAVE_BN_MULW_ADDW_ADDW
153static inline void
154bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
155 BN_ULONG *out_r1, BN_ULONG *out_r0)
156{
157 BN_ULLONG r;
158
159 r = (BN_ULLONG)a * (BN_ULLONG)b + (BN_ULLONG)c + (BN_ULLONG)d;
160
161 *out_r1 = r >> BN_BITS2;
162 *out_r0 = r & BN_MASK2;
163}
164#endif
165
166#endif /* !BN_ULLONG */
167
168/*
169 * bn_addw() computes (r1:r0) = a + b, where both inputs are single words,
170 * producing a double word result. The value of r1 is the carry from the
171 * addition.
172 */
173#ifndef HAVE_BN_ADDW
174static inline void
175bn_addw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
176{
177 BN_ULONG r1, r0, c1, c2;
178
179 c1 = a | b;
180 c2 = a & b;
181 r0 = a + b;
182 r1 = ((c1 & ~r0) | c2) >> (BN_BITS2 - 1); /* carry */
183
184 *out_r1 = r1;
185 *out_r0 = r0;
186}
187#endif
188
189/*
190 * bn_addw_addw() computes (r1:r0) = a + b + c, where all inputs are single
191 * words, producing a double word result.
192 */
193#ifndef HAVE_BN_ADDW_ADDW
194static inline void
195bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
196 BN_ULONG *out_r0)
197{
198 BN_ULONG carry, r1, r0;
199
200 bn_addw(a, b, &r1, &r0);
201 bn_addw(r0, c, &carry, &r0);
202 r1 += carry;
203
204 *out_r1 = r1;
205 *out_r0 = r0;
206}
207#endif
208
209/*
210 * bn_qwaddqw() computes
211 * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) + (b3:b2:b1:b0) + carry, where a is a quad word,
212 * b is a quad word, and carry is a single word with value 0 or 1, producing a four
213 * word result and carry.
214 */
215#ifndef HAVE_BN_QWADDQW
216static inline void
217bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
218 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry,
219 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
220{
221 BN_ULONG r3, r2, r1, r0;
222
223 bn_addw_addw(a0, b0, carry, &carry, &r0);
224 bn_addw_addw(a1, b1, carry, &carry, &r1);
225 bn_addw_addw(a2, b2, carry, &carry, &r2);
226 bn_addw_addw(a3, b3, carry, &carry, &r3);
227
228 *out_carry = carry;
229 *out_r3 = r3;
230 *out_r2 = r2;
231 *out_r1 = r1;
232 *out_r0 = r0;
233}
234#endif
235
236/*
237 * bn_subw() computes r0 = a - b, where both inputs are single words,
238 * producing a single word result and borrow.
239 */
240#ifndef HAVE_BN_SUBW
241static inline void
242bn_subw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_borrow, BN_ULONG *out_r0)
243{
244 BN_ULONG borrow, r0;
245
246 r0 = a - b;
247 borrow = ((r0 | (b & ~a)) & (b | ~a)) >> (BN_BITS2 - 1);
248
249 *out_borrow = borrow;
250 *out_r0 = r0;
251}
252#endif
253
254/*
255 * bn_subw_subw() computes r0 = a - b - c, where all inputs are single words,
256 * producing a single word result and borrow.
257 */
258#ifndef HAVE_BN_SUBW_SUBW
259static inline void
260bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
261 BN_ULONG *out_r0)
262{
263 BN_ULONG b1, b2, r0;
264
265 bn_subw(a, b, &b1, &r0);
266 bn_subw(r0, c, &b2, &r0);
267
268 *out_borrow = b1 + b2;
269 *out_r0 = r0;
270}
271#endif
272
273/*
274 * bn_qwsubqw() computes
275 * (r3:r2:r1:r0) = (a3:a2:a1:a0) - (b3:b2:b1:b0) - borrow, where a is a quad word,
276 * b is a quad word, and borrow is a single word with value 0 or 1, producing a
277 * four word result and borrow.
278 */
279#ifndef HAVE_BN_QWSUBQW
280static inline void
281bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3,
282 BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow,
283 BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
284{
285 BN_ULONG r3, r2, r1, r0;
286
287 bn_subw_subw(a0, b0, borrow, &borrow, &r0);
288 bn_subw_subw(a1, b1, borrow, &borrow, &r1);
289 bn_subw_subw(a2, b2, borrow, &borrow, &r2);
290 bn_subw_subw(a3, b3, borrow, &borrow, &r3);
291
292 *out_borrow = borrow;
293 *out_r3 = r3;
294 *out_r2 = r2;
295 *out_r1 = r1;
296 *out_r0 = r0;
297}
298#endif
299
300/*
301 * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words,
302 * producing a double word result.
303 */
304#ifndef HAVE_BN_MULW
305/*
306 * Multiply two words (a * b) producing a double word result (h:l).
307 *
308 * This can be rewritten as:
309 *
310 * a * b = (hi32(a) * 2^32 + lo32(a)) * (hi32(b) * 2^32 + lo32(b))
311 * = hi32(a) * hi32(b) * 2^64 +
312 * hi32(a) * lo32(b) * 2^32 +
313 * hi32(b) * lo32(a) * 2^32 +
314 * lo32(a) * lo32(b)
315 *
316 * The multiplication for each part of a and b can be calculated for each of
317 * these four terms without overflowing a BN_ULONG, as the maximum value of a
318 * 32 bit x 32 bit multiplication is 32 + 32 = 64 bits. Once these
319 * multiplications have been performed the result can be partitioned and summed
320 * into a double word (h:l). The same applies on a 32 bit system, substituting
321 * 16 for 32 and 32 for 64.
322 */
323#if 1
324static inline void
325bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
326{
327 BN_ULONG a1, a0, b1, b0, r1, r0;
328 BN_ULONG carry, x;
329
330 a1 = a >> BN_BITS4;
331 a0 = a & BN_MASK2l;
332 b1 = b >> BN_BITS4;
333 b0 = b & BN_MASK2l;
334
335 r1 = a1 * b1;
336 r0 = a0 * b0;
337
338 /* (a1 * b0) << BN_BITS4, partition the result across r1:r0 with carry. */
339 x = a1 * b0;
340 r1 += x >> BN_BITS4;
341 bn_addw(r0, x << BN_BITS4, &carry, &r0);
342 r1 += carry;
343
344 /* (b1 * a0) << BN_BITS4, partition the result across r1:r0 with carry. */
345 x = b1 * a0;
346 r1 += x >> BN_BITS4;
347 bn_addw(r0, x << BN_BITS4, &carry, &r0);
348 r1 += carry;
349
350 *out_r1 = r1;
351 *out_r0 = r0;
352}
353#else
354
355/*
356 * XXX - this accumulator based version uses fewer instructions, however
357 * requires more variables/registers. It seems to be slower on at least amd64
358 * and i386, however may be faster on other architectures that have more
359 * registers available. Further testing is required and one of the two
360 * implementations should eventually be removed.
361 */
362static inline void
363bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
364{
365 BN_ULONG a1, a0, b1, b0, r1, r0, x;
366 BN_ULONG acc0, acc1, acc2, acc3;
367
368 a1 = a >> BN_BITS4;
369 b1 = b >> BN_BITS4;
370 a0 = a & BN_MASK2l;
371 b0 = b & BN_MASK2l;
372
373 r1 = a1 * b1;
374 r0 = a0 * b0;
375
376 acc0 = r0 & BN_MASK2l;
377 acc1 = r0 >> BN_BITS4;
378 acc2 = r1 & BN_MASK2l;
379 acc3 = r1 >> BN_BITS4;
380
381 /* (a1 * b0) << BN_BITS4, partition the result across r1:r0. */
382 x = a1 * b0;
383 acc1 += x & BN_MASK2l;
384 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
385 acc1 &= BN_MASK2l;
386 acc3 += acc2 >> BN_BITS4;
387 acc2 &= BN_MASK2l;
388
389 /* (b1 * a0) << BN_BITS4, partition the result across r1:r0. */
390 x = b1 * a0;
391 acc1 += x & BN_MASK2l;
392 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
393 acc1 &= BN_MASK2l;
394 acc3 += acc2 >> BN_BITS4;
395 acc2 &= BN_MASK2l;
396
397 *out_r1 = (acc3 << BN_BITS4) | acc2;
398 *out_r0 = (acc1 << BN_BITS4) | acc0;
399}
400#endif
401#endif
402
403#ifndef HAVE_BN_MULW_LO
404static inline BN_ULONG
405bn_mulw_lo(BN_ULONG a, BN_ULONG b)
406{
407 return a * b;
408}
409#endif
410
411#ifndef HAVE_BN_MULW_HI
412static inline BN_ULONG
413bn_mulw_hi(BN_ULONG a, BN_ULONG b)
414{
415 BN_ULONG h, l;
416
417 bn_mulw(a, b, &h, &l);
418
419 return h;
420}
421#endif
422
423/*
424 * bn_mulw_addw() computes (r1:r0) = a * b + c with all inputs being single
425 * words, producing a double word result.
426 */
427#ifndef HAVE_BN_MULW_ADDW
428static inline void
429bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
430 BN_ULONG *out_r0)
431{
432 BN_ULONG carry, r1, r0;
433
434 bn_mulw(a, b, &r1, &r0);
435 bn_addw(r0, c, &carry, &r0);
436 r1 += carry;
437
438 *out_r1 = r1;
439 *out_r0 = r0;
440}
441#endif
442
443/*
444 * bn_mulw_addw_addw() computes (r1:r0) = a * b + c + d with all inputs being
445 * single words, producing a double word result.
446 */
447#ifndef HAVE_BN_MULW_ADDW_ADDW
448static inline void
449bn_mulw_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG d,
450 BN_ULONG *out_r1, BN_ULONG *out_r0)
451{
452 BN_ULONG carry, r1, r0;
453
454 bn_mulw_addw(a, b, c, &r1, &r0);
455 bn_addw(r0, d, &carry, &r0);
456 r1 += carry;
457
458 *out_r1 = r1;
459 *out_r0 = r0;
460}
461#endif
462
463/*
464 * bn_mulw_addtw() computes (r2:r1:r0) = a * b + (c2:c1:c0), where a and b are
465 * single words and (c2:c1:c0) is a triple word, producing a triple word result.
466 * The caller must ensure that the inputs provided do not result in c2
467 * overflowing.
468 */
469#ifndef HAVE_BN_MULW_ADDTW
470static inline void
471bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
472 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
473{
474 BN_ULONG carry, r2, r1, r0, x1;
475
476 bn_mulw_addw(a, b, c0, &x1, &r0);
477 bn_addw(c1, x1, &carry, &r1);
478 r2 = c2 + carry;
479
480 *out_r2 = r2;
481 *out_r1 = r1;
482 *out_r0 = r0;
483}
484#endif
485
486/*
487 * bn_mul2_mulw_addtw() computes (r2:r1:r0) = 2 * a * b + (c2:c1:c0), where a
488 * and b are single words and (c2:c1:c0) is a triple word, producing a triple
489 * word result. The caller must ensure that the inputs provided do not result
490 * in c2 overflowing.
491 */
492#ifndef HAVE_BN_MUL2_MULW_ADDTW
493static inline void
494bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
495 BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0)
496{
497 BN_ULONG r2, r1, r0, x1, x0;
498 BN_ULONG carry;
499
500 bn_mulw(a, b, &x1, &x0);
501 bn_addw(c0, x0, &carry, &r0);
502 bn_addw(c1, x1 + carry, &r2, &r1);
503 bn_addw(c2, r2, &carry, &r2);
504 bn_addw(r0, x0, &carry, &r0);
505 bn_addw(r1, x1 + carry, &carry, &r1);
506 r2 += carry;
507
508 *out_r2 = r2;
509 *out_r1 = r1;
510 *out_r0 = r0;
511}
512#endif
513
514/*
515 * bn_qwmulw_addw() computes (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + c, where a
516 * is a quad word, b is a single word and c is a single word, producing a five
517 * word result.
518 */
519#ifndef HAVE_BN_QWMULW_ADDW
520static inline void
521bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b,
522 BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2,
523 BN_ULONG *out_r1, BN_ULONG *out_r0)
524{
525 BN_ULONG r3, r2, r1, r0;
526
527 bn_mulw_addw(a0, b, c, &c, &r0);
528 bn_mulw_addw(a1, b, c, &c, &r1);
529 bn_mulw_addw(a2, b, c, &c, &r2);
530 bn_mulw_addw(a3, b, c, &c, &r3);
531
532 *out_r4 = c;
533 *out_r3 = r3;
534 *out_r2 = r2;
535 *out_r1 = r1;
536 *out_r0 = r0;
537}
538#endif
539
540/*
541 * bn_qwmulw_addqw_addw() computes
542 * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + (c3:c2:c1:c0) + d, where a
543 * is a quad word, b is a single word, c is a quad word, and d is a single word,
544 * producing a five word result.
545 */
546#ifndef HAVE_BN_QWMULW_ADDQW_ADDW
547static inline void
548bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0,
549 BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d,
550 BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1,
551 BN_ULONG *out_r0)
552{
553 BN_ULONG r3, r2, r1, r0;
554
555 bn_mulw_addw_addw(a0, b, c0, d, &d, &r0);
556 bn_mulw_addw_addw(a1, b, c1, d, &d, &r1);
557 bn_mulw_addw_addw(a2, b, c2, d, &d, &r2);
558 bn_mulw_addw_addw(a3, b, c3, d, &d, &r3);
559
560 *out_r4 = d;
561 *out_r3 = r3;
562 *out_r2 = r2;
563 *out_r1 = r1;
564 *out_r0 = r0;
565}
566#endif
567
568#endif