summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/bn/bn_internal.h159
1 files changed, 159 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h
new file mode 100644
index 0000000000..de5cd22424
--- /dev/null
+++ b/src/lib/libcrypto/bn/bn_internal.h
@@ -0,0 +1,159 @@
1/* $OpenBSD: bn_internal.h,v 1.1 2023/01/31 05:48:39 jsing Exp $ */
2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <openssl/bn.h>
19
20#include "bn_arch.h"
21
22#ifndef HEADER_BN_INTERNAL_H
23#define HEADER_BN_INTERNAL_H
24
25#ifndef HAVE_BN_UMUL_HILO
26#ifdef BN_LLONG
27static inline void
28bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l)
29{
30 BN_ULLONG r;
31
32 r = (BN_ULLONG)a * (BN_ULLONG)b;
33
34 *out_h = r >> BN_BITS2;
35 *out_l = r & BN_MASK2;
36}
37
38#else /* !BN_LLONG */
39/*
40 * Multiply two words (a * b) producing a double word result (h:l).
41 *
42 * This can be rewritten as:
43 *
44 * a * b = (hi32(a) * 2^32 + lo32(a)) * (hi32(b) * 2^32 + lo32(b))
45 * = hi32(a) * hi32(b) * 2^64 +
46 * hi32(a) * lo32(b) * 2^32 +
47 * hi32(b) * lo32(a) * 2^32 +
48 * lo32(a) * lo32(b)
49 *
50 * The multiplication for each part of a and b can be calculated for each of
51 * these four terms without overflowing a BN_ULONG, as the maximum value of a
52 * 32 bit x 32 bit multiplication is 32 + 32 = 64 bits. Once these
53 * multiplications have been performed the result can be partitioned and summed
54 * into a double word (h:l). The same applies on a 32 bit system, substituting
55 * 16 for 32 and 32 for 64.
56 */
57#if 1
58static inline void
59bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l)
60{
61 BN_ULONG ah, al, bh, bl, h, l, x, c1, c2;
62
63 ah = a >> BN_BITS4;
64 al = a & BN_MASK2l;
65 bh = b >> BN_BITS4;
66 bl = b & BN_MASK2l;
67
68 h = ah * bh;
69 l = al * bl;
70
71 /* (ah * bl) << BN_BITS4, partition the result across h:l with carry. */
72 x = ah * bl;
73 h += x >> BN_BITS4;
74 x <<= BN_BITS4;
75 c1 = l | x;
76 c2 = l & x;
77 l += x;
78 h += ((c1 & ~l) | c2) >> (BN_BITS2 - 1); /* carry */
79
80 /* (bh * al) << BN_BITS4, partition the result across h:l with carry. */
81 x = bh * al;
82 h += x >> BN_BITS4;
83 x <<= BN_BITS4;
84 c1 = l | x;
85 c2 = l & x;
86 l += x;
87 h += ((c1 & ~l) | c2) >> (BN_BITS2 - 1); /* carry */
88
89 *out_h = h;
90 *out_l = l;
91}
92#else
93
94/*
95 * XXX - this accumulator based version uses fewer instructions, however
96 * requires more variables/registers. It seems to be slower on at least amd64
97 * and i386, however may be faster on other architectures that have more
98 * registers available. Further testing is required and one of the two
99 * implementations should eventually be removed.
100 */
101static inline void
102bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l)
103{
104 BN_ULONG ah, bh, al, bl, x, h, l;
105 BN_ULONG acc0, acc1, acc2, acc3;
106
107 ah = a >> BN_BITS4;
108 bh = b >> BN_BITS4;
109 al = a & BN_MASK2l;
110 bl = b & BN_MASK2l;
111
112 h = ah * bh;
113 l = al * bl;
114
115 acc0 = l & BN_MASK2l;
116 acc1 = l >> BN_BITS4;
117 acc2 = h & BN_MASK2l;
118 acc3 = h >> BN_BITS4;
119
120 /* (ah * bl) << BN_BITS4, partition the result across h:l. */
121 x = ah * bl;
122 acc1 += x & BN_MASK2l;
123 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
124 acc3 += acc2 >> BN_BITS4;
125
126 /* (bh * al) << BN_BITS4, partition the result across h:l. */
127 x = bh * al;
128 acc1 += x & BN_MASK2l;
129 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
130 acc3 += acc2 >> BN_BITS4;
131
132 *out_h = (acc3 << BN_BITS4) | acc2;
133 *out_l = (acc1 << BN_BITS4) | acc0;
134}
135#endif
136#endif /* !BN_LLONG */
137#endif
138
139#ifndef HAVE_BN_UMUL_LO
140static inline BN_ULONG
141bn_umul_lo(BN_ULONG a, BN_ULONG b)
142{
143 return a * b;
144}
145#endif
146
147#ifndef HAVE_BN_UMUL_HI
148static inline BN_ULONG
149bn_umul_hi(BN_ULONG a, BN_ULONG b)
150{
151 BN_ULONG h, l;
152
153 bn_umul_hilo(a, b, &h, &l);
154
155 return h;
156}
157#endif
158
159#endif